2017-04-16 55 views
0

我有我的類字符的列表。序列化和反序列化按預期工作。如果我的列表包含需要描述字節順序標記的字符。示例char代碼是56256.所以,創建了一個簡單的測試來解決這個問題。序列化和反序列化字符(S)

[Test] 
public void Utf8CharSerializeAndDeserializeShouldEqual() 
{ 
    UInt16 charCode = 56256; 
    char utfChar = (char)charCode; 
    using (MemoryStream ms = new MemoryStream()) 
    { 
     using (StreamWriter writer = new StreamWriter(ms, Encoding.UTF8, 1024, true)) 
     { 
      var serializer = new JsonSerializer(); 
      serializer.Serialize(writer, utfChar); 
     } 

     ms.Position = 0; 
     using (StreamReader reader = new StreamReader(ms, true)) 
     { 
      using (JsonTextReader jsonReader = new JsonTextReader(reader)) 
      { 
       var serializer = new JsonSerializer(); 
       char deserializedChar = serializer.Deserialize<char>(jsonReader); 

       Console.WriteLine($"{(int)utfChar}, {(int)deserializedChar}"); 
       Assert.AreEqual(utfChar, deserializedChar); 
       Assert.AreEqual((int)utfChar, (int)deserializedChar); 
      } 
     } 
    } 
} 

當不需要char代碼時,測試可以很好地工作BOM。例如65(A)將通過該測試。

+0

你就不能寫同StreamWriter的任意碼點的BOM是比較特殊的,當然。比如說寫一個代理人沒什麼不同。如果*有*寫JSON與StreamWriter的(爲什麼?),那麼至少做一個實驗,看一個JSON序列如何保存它 –

+0

當然,我們的API是支持流寫入,但我們已經改變了我們的代碼有一個小黑客。 Json.net支持StreamWriter。我有一個字符表(表)和序列化字符不等於反序列化。這不是一個重要的問題嗎? –

+0

我不知道你是什麼意思「是需要描述字節順序標記」或「不需要BOM」。代碼點56256(也稱爲U + DBC0)是高代理碼點;它與字節順序標記沒有任何關係。 –

回答

1

你的問題是無關的Json.NET。你的問題是,U+DBC0 (decimal 56256)是無效的Unicode字符,並且,作爲documentation解釋,你StreamWriter使用的Encoding.UTF8將不編碼這樣的性格:

Encoding.UTF8返回使用替換回退來代替每個UTF8Encoding對象它不能編碼的字符串以及它不能用問號(「?」)字符解碼的每個字節。

爲了證實這一點,如果你在你的測試實例與new UTF8Encoding(true, true)取代Encoding.UTF8,你會得到以下異常:

EncoderFallbackException: Unable to translate Unicode character \uDBC0 at index 1 to specified code page. 

如果你要嘗試序列無效的Unicode char值,你」重新將需要手動編碼它們作爲,例如,使用下面的一個字節數組:

public static partial class TextExtensions 
{ 
    static void ToBytesWithoutEncoding(char c, out byte lower, out byte upper) 
    { 
     var u = (uint)c; 
     lower = unchecked((byte)u); 
     upper = unchecked((byte)(u >> 8)); 
    } 

    public static byte[] ToByteArrayWithoutEncoding(this char c) 
    { 
     byte lower, upper; 
     ToBytesWithoutEncoding(c, out lower, out upper); 
     return new byte[] { lower, upper }; 
    } 

    public static byte[] ToByteArrayWithoutEncoding(this ICollection<char> list) 
    { 
     if (list == null) 
      return null; 
     var bytes = new byte[checked(list.Count * 2)]; 
     int to = 0; 
     foreach (var c in list) 
     { 
      ToBytesWithoutEncoding(c, out bytes[to], out bytes[to + 1]); 
      to += 2; 
     } 
     return bytes; 
    } 

    public static char ToCharWithoutEncoding(this byte[] bytes) 
    { 
     return bytes.ToCharWithoutEncoding(0); 
    } 

    public static char ToCharWithoutEncoding(this byte[] bytes, int position) 
    { 
     if (bytes == null) 
      return default(char); 
     char c = default(char); 
     if (position < bytes.Length) 
      c += (char)bytes[position]; 
     if (position + 1 < bytes.Length) 
      c += (char)((uint)bytes[position + 1] << 8); 
     return c; 
    } 

    public static List<char> ToCharListWithoutEncoding(this byte[] bytes) 
    { 
     if (bytes == null) 
      return null; 
     var chars = new List<char>(bytes.Length/2 + bytes.Length % 2); 
     for (int from = 0; from < bytes.Length; from += 2) 
     { 
      chars.Add(bytes.ToCharWithoutEncoding(from)); 
     } 
     return chars; 
    } 
} 

然後修改您的測試方法如下:

public void Utf8JsonCharSerializeAndDeserializeShouldEqualFixed() 
    { 
     Utf8JsonCharSerializeAndDeserializeShouldEqualFixed((char)56256); 
    } 

    public void Utf8JsonCharSerializeAndDeserializeShouldEqualFixed(char utfChar) 
    { 
     byte[] data; 

     using (MemoryStream ms = new MemoryStream()) 
     { 
      using (StreamWriter writer = new StreamWriter(ms, new UTF8Encoding(true, true), 1024)) 
      { 
       var serializer = new JsonSerializer(); 
       serializer.Serialize(writer, utfChar.ToByteArrayWithoutEncoding()); 
      } 
      data = ms.ToArray(); 
     } 

     using (MemoryStream ms = new MemoryStream(data)) 
     { 
      using (StreamReader reader = new StreamReader(ms, true)) 
      { 
       using (JsonTextReader jsonReader = new JsonTextReader(reader)) 
       { 
        var serializer = new JsonSerializer(); 
        char deserializedChar = serializer.Deserialize<byte[]>(jsonReader).ToCharWithoutEncoding(); 

        //Console.WriteLine(string.Format("{0}, {1}", utfChar, deserializedChar)); 
        Assert.AreEqual(utfChar, deserializedChar); 
        Assert.AreEqual((int)utfChar, (int)deserializedChar); 
       } 
      } 
     } 
    } 

或者,如果你在一些容器類有List<char>屬性,您可以創建以下轉換器:

public class CharListConverter : JsonConverter 
{ 
    public override bool CanConvert(Type objectType) 
    { 
     return objectType == typeof(List<char>); 
    } 

    public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer) 
    { 
     if (reader.TokenType == JsonToken.Null) 
      return null; 
     var bytes = serializer.Deserialize<byte[]>(reader); 
     return bytes.ToCharListWithoutEncoding(); 
    } 

    public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer) 
    { 
     var list = (ICollection<char>)value; 
     var bytes = list.ToByteArrayWithoutEncoding(); 
     serializer.Serialize(writer, bytes); 
    } 
} 

,並按如下應用它:

public class RootObject 
{ 
    [JsonConverter(typeof(CharListConverter))] 
    public List<char> Characters { get; set; } 
} 

在這兩種情況下, Json.NET會將字節數組編碼爲Base64。