Skip to content

Commit 7eb9c90

Browse files
authored
Merge pull request AzureCosmosDB#116 from adambarath/main
Fix for issue: CosmosDB to CosmosDB doesn't migrate "Id" field AzureCosmosDB#100
2 parents b25e819 + 39c5664 commit 7eb9c90

File tree

5 files changed

+115
-12
lines changed

5 files changed

+115
-12
lines changed

Extensions/Cosmos/Cosmos.DataTransfer.CosmosExtension.UnitTests/CosmosDataSinkExtensionTests.cs

+78
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using Cosmos.DataTransfer.Interfaces;
2+
using Microsoft.VisualStudio.TestTools.UnitTesting;
23

34
namespace Cosmos.DataTransfer.CosmosExtension.UnitTests
45
{
@@ -54,5 +55,82 @@ public void BuildDynamicObjectTree_WithNestedArrays_WorksCorrectly()
5455

5556
Assert.AreEqual("sub2-1", secondSubArray[0].id);
5657
}
58+
59+
[TestMethod]
60+
public void BuildDynamicObjectTree_WithAnyCaseIds_UsesSourceIdValue()
61+
{
62+
var numeric = Random.Shared.Next();
63+
var lower = Guid.NewGuid().ToString();
64+
var upper = Guid.NewGuid().ToString();
65+
var mixed = Guid.NewGuid().ToString();
66+
var reversed = Guid.NewGuid().ToString();
67+
var item = new CosmosDictionaryDataItem(new Dictionary<string, object?>()
68+
{
69+
{ "id", numeric },
70+
});
71+
72+
dynamic obj = item.BuildDynamicObjectTree(requireStringId: true, preserveMixedCaseIds: false)!;
73+
Assert.AreEqual(numeric.ToString(), obj.id);
74+
75+
item = new CosmosDictionaryDataItem(new Dictionary<string, object?>()
76+
{
77+
{ "id", lower },
78+
});
79+
80+
obj = item.BuildDynamicObjectTree(requireStringId: true, preserveMixedCaseIds: false)!;
81+
Assert.AreEqual(lower, obj.id);
82+
83+
item = new CosmosDictionaryDataItem(new Dictionary<string, object?>()
84+
{
85+
{ "ID", upper },
86+
});
87+
obj = item.BuildDynamicObjectTree(requireStringId: true, preserveMixedCaseIds: false)!;
88+
Assert.AreEqual(upper, obj.id);
89+
90+
item = new CosmosDictionaryDataItem(new Dictionary<string, object?>()
91+
{
92+
{ "Id", mixed },
93+
});
94+
obj = item.BuildDynamicObjectTree(requireStringId: true, preserveMixedCaseIds: false)!;
95+
Assert.AreEqual(mixed, obj.id);
96+
97+
item = new CosmosDictionaryDataItem(new Dictionary<string, object?>()
98+
{
99+
{ "iD", reversed },
100+
});
101+
obj = item.BuildDynamicObjectTree(requireStringId: true, preserveMixedCaseIds: false)!;
102+
Assert.AreEqual(reversed, obj.id);
103+
}
104+
105+
[TestMethod]
106+
public void BuildDynamicObjectTree_WithPreservedMixedCaseIds_PassesThroughSourceValues()
107+
{
108+
var id = Random.Shared.Next();
109+
var upper = Guid.NewGuid().ToString();
110+
var mixed = Guid.NewGuid().ToString();
111+
var item = new CosmosDictionaryDataItem(new Dictionary<string, object?>()
112+
{
113+
{ "id", id },
114+
{ "ID", upper },
115+
{ "Id", mixed }
116+
});
117+
118+
dynamic obj = item.BuildDynamicObjectTree(requireStringId: true, preserveMixedCaseIds: true)!;
119+
Assert.AreEqual(id.ToString(), obj.id);
120+
Assert.AreEqual(upper, obj.ID);
121+
Assert.AreEqual(mixed, obj.Id);
122+
123+
item = new CosmosDictionaryDataItem(new Dictionary<string, object?>()
124+
{
125+
{ "ID", upper },
126+
{ "Id", mixed }
127+
});
128+
obj = item.BuildDynamicObjectTree(requireStringId: true, preserveMixedCaseIds: true)!;
129+
Assert.AreEqual(upper, obj.ID);
130+
Assert.AreEqual(mixed, obj.Id);
131+
string? cosmosId = obj.id;
132+
Assert.IsNotNull(cosmosId);
133+
Assert.IsFalse(string.IsNullOrWhiteSpace(cosmosId));
134+
}
57135
}
58136
}

Extensions/Cosmos/Cosmos.DataTransfer.CosmosExtension/CosmosDataSinkExtension.cs

+4-1
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,10 @@ void ReportCount(int i)
9393
}
9494
}
9595

96-
var convertedObjects = dataItems.Select(di => di.BuildDynamicObjectTree(requireStringId: true, ignoreNullValues: settings.IgnoreNullValues)).Where(o => o != null).OfType<ExpandoObject>();
96+
var convertedObjects = dataItems
97+
.Select(di => di.BuildDynamicObjectTree(requireStringId: true, ignoreNullValues: settings.IgnoreNullValues, preserveMixedCaseIds: settings.PreserveMixedCaseIds))
98+
.Where(o => o != null)
99+
.OfType<ExpandoObject>();
97100
var batches = convertedObjects.Buffer(settings.BatchSize);
98101
var retry = GetRetryPolicy(settings.MaxRetryCount, settings.InitialRetryDurationMs);
99102
await foreach (var batch in batches.WithCancellation(cancellationToken))

Extensions/Cosmos/Cosmos.DataTransfer.CosmosExtension/CosmosSinkSettings.cs

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ public class CosmosSinkSettings : CosmosSettingsBase, IDataExtensionSettings
1414
public bool UseAutoscaleForCreatedContainer { get; set; } = true;
1515
public bool IsServerlessAccount { get; set; } = false;
1616
public bool UseSharedThroughput { get; set; } = false;
17+
public bool PreserveMixedCaseIds { get; set; } = false;
1718
public DataWriteMode WriteMode { get; set; } = DataWriteMode.Insert;
1819
public bool IgnoreNullValues { get; set; } = false;
1920
public List<string>? PartitionKeyPaths { get; set; }

Extensions/Cosmos/README.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ Or with RBAC:
4444
}
4545
```
4646

47-
Sink requires an additional `PartitionKeyPath` parameter which is used when creating the container if it does not exist. To use hierarchical partition keys, instead use the `PartitionKeyPaths` setting to supply an array of up to 3 paths. It also supports an optional `RecreateContainer` parameter (`false` by default) to delete and then recreate the container to ensure only newly imported data is present. The optional `BatchSize` parameter (100 by default) sets the number of items to accumulate before inserting. `ConnectionMode` can be set to either `Gateway` (default) or `Direct` to control how the client connects to the CosmosDB service. For situations where a container is created as part of the transfer operation `CreatedContainerMaxThroughput` (in RUs) and `UseAutoscaleForCreatedContainer` provide the initial throughput settings which will be in effect when executing the transfer. To instead use shared throughput that has been provisioned at the database level, set the `UseSharedThroughput` parameter to `true`. The optional `WriteMode` parameter specifies the type of data write to use: `InsertStream`, `Insert`, `UpsertStream`, or `Upsert`. The `IsServerlessAccount` parameter specifies whether the target account uses Serverless instead of Provisioned throughput, which affects the way containers are created. Additional parameters allow changing the behavior of the Cosmos client appropriate to your environment.
47+
Sink requires an additional `PartitionKeyPath` parameter which is used when creating the container if it does not exist. To use hierarchical partition keys, instead use the `PartitionKeyPaths` setting to supply an array of up to 3 paths. It also supports an optional `RecreateContainer` parameter (`false` by default) to delete and then recreate the container to ensure only newly imported data is present. The optional `BatchSize` parameter (100 by default) sets the number of items to accumulate before inserting. `ConnectionMode` can be set to either `Gateway` (default) or `Direct` to control how the client connects to the CosmosDB service. For situations where a container is created as part of the transfer operation `CreatedContainerMaxThroughput` (in RUs) and `UseAutoscaleForCreatedContainer` provide the initial throughput settings which will be in effect when executing the transfer. To instead use shared throughput that has been provisioned at the database level, set the `UseSharedThroughput` parameter to `true`. The optional `WriteMode` parameter specifies the type of data write to use: `InsertStream`, `Insert`, `UpsertStream`, or `Upsert`. The `IsServerlessAccount` parameter specifies whether the target account uses Serverless instead of Provisioned throughput, which affects the way containers are created. Additional parameters allow changing the behavior of the Cosmos client appropriate to your environment. The `PreserveMixedCaseIds` parameter (`false` by default) ignores differently cased `id` fields and writes them through without modification, while generating a separate lowercased `id` field as required by Cosmos.
4848

4949
### Sink
5050

@@ -62,6 +62,7 @@ Sink requires an additional `PartitionKeyPath` parameter which is used when crea
6262
"CreatedContainerMaxThroughput": 1000,
6363
"UseAutoscaleForCreatedContainer": true,
6464
"WriteMode": "InsertStream",
65+
"PreserveMixedCaseIds": false,
6566
"IsServerlessAccount": false,
6667
"UseSharedThroughput": false
6768
}

Interfaces/Cosmos.DataTransfer.Interfaces/DataItemExtensions.cs

+30-10
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@ public static class DataItemExtensions
99
/// </summary>
1010
/// <param name="source"></param>
1111
/// <param name="requireStringId">If true, adds a new GUID "id" field to any top level items where one is not already present.</param>
12+
/// <param name="preserveMixedCaseIds">If true, disregards differently cased "id" fields for purposes of required "id" and passes them through.</param>
1213
/// <returns>A dynamic object containing the entire data structure.</returns>
1314
/// <remarks>The returned ExpandoObject can be used directly as an IDictionary.</remarks>
14-
public static ExpandoObject? BuildDynamicObjectTree(this IDataItem? source, bool requireStringId = false, bool ignoreNullValues = false)
15+
public static ExpandoObject? BuildDynamicObjectTree(this IDataItem? source, bool requireStringId = false, bool ignoreNullValues = false, bool preserveMixedCaseIds = false)
1516
{
1617
if (source == null)
1718
{
@@ -20,20 +21,25 @@ public static class DataItemExtensions
2021

2122
var fields = source.GetFieldNames().ToList();
2223
var item = new ExpandoObject();
23-
24+
2425
/*
2526
* If the item contains a lowercase id field, we can take it as is.
26-
* If we have an uppercase Id or ID field, but no lowercase id, we will rename it to id.
27+
* If we have an uppercase Id or ID field, but no lowercase id, we will rename it to id, unless `preserveMixedCaseIds` is set to true.
28+
* If `preserveMixedCaseIds` is set to true, any differently cased "id" fields will be passed through as normal properties with no casing change and a separate "id" will be generated.
2729
* Then it can be used i.e. as CosmosDB primary key, when `requireStringId` is set to true.
2830
*/
2931
var containsLowercaseIdField = fields.Contains("id", StringComparer.CurrentCulture);
3032
var containsAnyIdField = fields.Contains("id", StringComparer.CurrentCultureIgnoreCase);
31-
32-
if (requireStringId && !containsAnyIdField)
33+
34+
if (requireStringId)
3335
{
34-
item.TryAdd("id", Guid.NewGuid().ToString());
36+
bool mismatchedIdCasing = preserveMixedCaseIds && !containsLowercaseIdField;
37+
if (!containsAnyIdField || mismatchedIdCasing)
38+
{
39+
item.TryAdd("id", Guid.NewGuid().ToString());
40+
}
3541
}
36-
42+
3743
foreach (string field in fields)
3844
{
3945
object? value = source.GetValue(field);
@@ -43,10 +49,24 @@ public static class DataItemExtensions
4349
}
4450

4551
var fieldName = field;
46-
if (string.Equals(field, "id", StringComparison.CurrentCultureIgnoreCase) && requireStringId && !containsLowercaseIdField)
52+
if (requireStringId && string.Equals(field, "id", StringComparison.CurrentCultureIgnoreCase))
4753
{
48-
value = value?.ToString();
49-
fieldName = "id";
54+
if (preserveMixedCaseIds)
55+
{
56+
if (string.Equals(field, "id", StringComparison.CurrentCulture))
57+
{
58+
value = value?.ToString();
59+
}
60+
}
61+
else if (!containsLowercaseIdField)
62+
{
63+
value = value?.ToString();
64+
fieldName = "id";
65+
}
66+
else
67+
{
68+
value = value?.ToString();
69+
}
5070
}
5171
else if (value is IDataItem child)
5272
{

0 commit comments

Comments
 (0)