Skip to content

Commit 5ba8a1f

Browse files
authored
Merge pull request #81 from AzureCosmosDB/feature/unicode-json
Fixing unicode output from Utf8JsonWriter
2 parents 69efd3e + 424bba9 commit 5ba8a1f

File tree

3 files changed

+62
-13
lines changed

3 files changed

+62
-13
lines changed

Extensions/Json/Cosmos.DataTransfer.JsonExtension.UnitTests/JsonSinkTests.cs

+41-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
using Cosmos.DataTransfer.Interfaces;
1+
using Cosmos.DataTransfer.Interfaces;
22
using Microsoft.Extensions.Logging.Abstractions;
33
using Newtonsoft.Json;
44

@@ -86,7 +86,6 @@ public async Task WriteAsync_WithSourceDates_PreservesDateFormats()
8686
Assert.IsTrue(outputData.Any(o => o.Id == 3 && o.Created == randomTime));
8787
}
8888

89-
9089
[TestMethod]
9190
public async Task WriteAsync_WithDateArray_PreservesDateFormats()
9291
{
@@ -119,5 +118,45 @@ public async Task WriteAsync_WithDateArray_PreservesDateFormats()
119118
Assert.AreEqual(DateTime.UnixEpoch, outputData?.Single().Dates?.ElementAt(2));
120119
}
121120

121+
[TestMethod]
122+
public async Task WriteAsync_WithUnicode_PreservesOriginalCharacters()
123+
{
124+
var sink = new JsonFileSink();
125+
126+
const string japanese = "炎ギ因";
127+
const string emoji = "✉✔✈";
128+
const string text = "XXX";
129+
var data = new List<DictionaryDataItem>
130+
{
131+
new(new Dictionary<string, object?>
132+
{
133+
{ "Id", 1 },
134+
{ "Array", new[] { japanese, emoji, text } },
135+
{ "Emoji", emoji },
136+
{ japanese, text },
137+
})
138+
};
139+
140+
string outputFile = $"{DateTime.UtcNow:yy-MM-dd}_MixedUnicodeOutput.json";
141+
var config = TestHelpers.CreateConfig(new Dictionary<string, string>
142+
{
143+
{ "FilePath", outputFile }
144+
});
145+
146+
await sink.WriteAsync(data.ToAsyncEnumerable(), config, new JsonFileSource(), NullLogger.Instance);
147+
148+
string json = await File.ReadAllTextAsync(outputFile);
149+
150+
Assert.IsTrue(json.Contains(japanese));
151+
Assert.IsTrue(json.Contains(emoji));
152+
153+
var outputData = JsonConvert.DeserializeObject<List<TestDataObject>>(json);
154+
155+
var dataObject = outputData?.Single();
156+
Assert.AreEqual(japanese, dataObject?.Array?.ElementAt(0));
157+
Assert.AreEqual(emoji, dataObject?.Array?.ElementAt(1));
158+
Assert.AreEqual(emoji, dataObject?.Emoji);
159+
Assert.AreEqual(text, dataObject?.炎ギ因);
160+
}
122161
}
123162
}
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1-
namespace Cosmos.DataTransfer.JsonExtension.UnitTests
1+
namespace Cosmos.DataTransfer.JsonExtension.UnitTests
22
{
33
public class TestDataObject
44
{
55
public int Id { get; set; }
66
public string? Name { get; set; }
77
public DateTime? Created { get; set; }
88
public List<DateTime>? Dates { get; set; }
9+
public List<string>? Array { get; set; }
10+
public string? Emoji { get; set; }
11+
public string? 炎ギ因 { get; set; }
912
}
1013
}

Interfaces/Cosmos.DataTransfer.Interfaces/DataItemJsonConverter.cs

+17-10
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using System.Collections;
22
using System.Text;
3+
using System.Text.Encodings.Web;
34
using System.Text.Json;
45

56
namespace Cosmos.DataTransfer.Interfaces;
@@ -18,11 +19,11 @@ public static string AsJsonString(this IDataItem dataItem, bool indented, bool i
1819
return Encoding.UTF8.GetString(bytes);
1920
}
2021

21-
public static void WriteDataItem(Utf8JsonWriter writer, IDataItem item, bool includeNullFields, string? objectName = null)
22+
public static void WriteDataItem(Utf8JsonWriter writer, IDataItem item, bool includeNullFields, JsonEncodedText? objectName = null)
2223
{
2324
if (objectName != null)
2425
{
25-
writer.WriteStartObject(objectName);
26+
writer.WriteStartObject(objectName.Value);
2627
}
2728
else
2829
{
@@ -40,22 +41,23 @@ public static void WriteDataItem(Utf8JsonWriter writer, IDataItem item, bool inc
4041

4142
private static void WriteFieldValue(Utf8JsonWriter writer, string fieldName, object? fieldValue, bool includeNullFields)
4243
{
44+
var propertyName = GetAsUnescaped(fieldName);
4345
if (fieldValue == null)
4446
{
4547
if (includeNullFields)
4648
{
47-
writer.WriteNull(fieldName);
49+
writer.WriteNull(propertyName);
4850
}
4951
}
5052
else
5153
{
5254
if (fieldValue is IDataItem child)
5355
{
54-
WriteDataItem(writer, child, includeNullFields, fieldName);
56+
WriteDataItem(writer, child, includeNullFields, propertyName);
5557
}
5658
else if (fieldValue is not string && fieldValue is IEnumerable children)
5759
{
58-
writer.WriteStartArray(fieldName);
60+
writer.WriteStartArray(propertyName);
5961
foreach (object arrayItem in children)
6062
{
6163
if (arrayItem is IDataItem arrayChild)
@@ -76,30 +78,35 @@ private static void WriteFieldValue(Utf8JsonWriter writer, string fieldName, obj
7678
}
7779
else
7880
{
79-
writer.WriteStringValue(arrayItem.ToString());
81+
writer.WriteStringValue(GetAsUnescaped(arrayItem.ToString()!));
8082
}
8183
}
8284
writer.WriteEndArray();
8385
}
8486
else if (TryGetNumber(fieldValue, out var number))
8587
{
86-
writer.WriteNumber(fieldName, number);
88+
writer.WriteNumber(propertyName, number);
8789
}
8890
else if (fieldValue is bool boolean)
8991
{
90-
writer.WriteBoolean(fieldName, boolean);
92+
writer.WriteBoolean(propertyName, boolean);
9193
}
9294
else if (fieldValue is DateTime date)
9395
{
94-
writer.WriteString(fieldName, date.ToString("O"));
96+
writer.WriteString(propertyName, date.ToString("O"));
9597
}
9698
else
9799
{
98-
writer.WriteString(fieldName, fieldValue.ToString());
100+
writer.WriteString(propertyName, GetAsUnescaped(fieldValue.ToString()!));
99101
}
100102
}
101103
}
102104

105+
private static JsonEncodedText GetAsUnescaped(string text)
106+
{
107+
return JsonEncodedText.Encode(text, JavaScriptEncoder.UnsafeRelaxedJsonEscaping);
108+
}
109+
103110
private static bool TryGetNumber(object x, out double number)
104111
{
105112
if (x is float f)

0 commit comments

Comments
 (0)