Skip to content

Commit 4347fb8

Browse files
authored
Merge pull request #52 from AzureCosmosDB/develop
Various fixes
2 parents 53d7064 + a58a08a commit 4347fb8

File tree

7 files changed

+147
-15
lines changed

7 files changed

+147
-15
lines changed

.github/workflows/dotnet-build-test.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: .NET
1+
name: Build and Test all .NET projects
22

33
on:
44
push:
@@ -8,9 +8,9 @@ on:
88

99
jobs:
1010
build:
11-
11+
name: Build and test .NET projects
1212
runs-on: ubuntu-latest
13-
13+
container: mcr.microsoft.com/dotnet/sdk:6.0
1414
steps:
1515
- uses: actions/checkout@v3
1616
- name: Setup .NET
@@ -28,4 +28,4 @@ jobs:
2828
uses: actions/upload-artifact@v3
2929
with:
3030
name: debug-build
31-
path: /home/runner/work/azure-documentdb-datamigrationtool/azure-documentdb-datamigrationtool/Core/Cosmos.DataTransfer.Core/bin/Debug/net6.0 #path/to/artifact/ # or path/to/artifact
31+
path: /home/runner/work/data-migration-desktop-tool/data-migration-desktop-tool/Core/Cosmos.DataTransfer.Core/bin/Debug/net6.0 #path/to/artifact/ # or path/to/artifact

.github/workflows/validate.yml

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
name: Validate all .NET projects
22
on:
3-
pull_request:
4-
branches:
5-
- main
3+
# pull_request:
4+
# branches:
5+
# - main
6+
workflow_dispatch:
67
jobs:
78
build-test:
89
name: Build and test .NET projects

ExampleConfigs.md

+123
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
# Example `migrationsettings.json` Files
2+
3+
## JSON to Cosmos-NoSQL
4+
```json
5+
{
6+
"Source": "json",
7+
"Sink": "cosmos-nosql",
8+
"SourceSettings": {
9+
"FilePath": "https://mytestfiles.local/sales-data.json"
10+
},
11+
"SinkSettings": {
12+
"ConnectionString": "AccountEndpoint=https://...",
13+
"Database": "myDb",
14+
"Container": "myContainer",
15+
"PartitionKeyPath": "/id",
16+
"RecreateContainer": true,
17+
"WriteMode": "Insert",
18+
"CreatedContainerMaxThroughput": 5000,
19+
"IsServerlessAccount": false
20+
}
21+
}
22+
```
23+
24+
## Cosmos-NoSQL to JSON
25+
```json
26+
{
27+
"Source": "Cosmos-NoSql",
28+
"Sink": "JSON",
29+
"SourceSettings":
30+
{
31+
"ConnectionString": "AccountEndpoint=https://...",
32+
"Database":"cosmicworks",
33+
"Container":"customers",
34+
"IncludeMetadataFields": true
35+
},
36+
"SinkSettings":
37+
{
38+
"FilePath": "c:\\data\\cosmicworks\\customers.json",
39+
"Indented": true
40+
}
41+
}
42+
```
43+
44+
## MongoDB to Cosmos-NoSQL
45+
```json
46+
{
47+
"Source": "mongodb",
48+
"Sink": "cosmos-nosql",
49+
"SourceSettings": {
50+
"ConnectionString": "mongodb://...",
51+
"DatabaseName": "sales",
52+
"Collection": "person"
53+
},
54+
"SinkSettings": {
55+
"ConnectionString": "AccountEndpoint=https://...",
56+
"Database": "users",
57+
"Container": "migrated",
58+
"PartitionKeyPath": "/id",
59+
"ConnectionMode": "Direct",
60+
"WriteMode": "UpsertStream",
61+
"CreatedContainerMaxThroughput": 8000,
62+
"UseAutoscaleForCreatedContainer": false
63+
}
64+
}
65+
```
66+
67+
## SqlServer to AzureTableAPI
68+
```json
69+
{
70+
"Source": "SqlServer",
71+
"Sink": "AzureTableApi",
72+
"SourceSettings": {
73+
"ConnectionString": "Server=...",
74+
"QueryText": "SELECT Id, Date, Amount FROM dbo.Payments WHERE Status = 'open'"
75+
},
76+
"SinkSettings": {
77+
"ConnectionString": "DefaultEndpointsProtocol=https;AccountName=...",
78+
"Table": "payments",
79+
"RowKeyFieldName": "Id"
80+
}
81+
}
82+
```
83+
84+
## Cosmos-NoSQL to SqlServer
85+
```json
86+
{
87+
"Source": "cosmos-nosql",
88+
"Sink": "sqlserver",
89+
"SourceSettings":
90+
{
91+
"ConnectionString": "AccountEndpoint=https://...",
92+
"Database":"operations",
93+
"Container":"alerts",
94+
"PartitionKeyValue": "jan",
95+
"Query": "SELECT a.name, a.description, a.count, a.id, a.isSet FROM a"
96+
},
97+
"SinkSettings":
98+
{
99+
"ConnectionString": "Server=...",
100+
"TableName": "Import",
101+
"ColumnMappings": [
102+
{
103+
"ColumnName": "Name"
104+
},
105+
{
106+
"ColumnName": "Description"
107+
},
108+
{
109+
"ColumnName": "Count",
110+
"SourceFieldName": "number"
111+
},
112+
{
113+
"ColumnName": "Id"
114+
},
115+
{
116+
"ColumnName": "IsSet",
117+
"AllowNull": false,
118+
"DefaultValue": false
119+
}
120+
]
121+
}
122+
}
123+
```

Extensions/Cosmos/Cosmos.DataTransfer.CosmosExtension/CosmosDataSinkExtension.cs

+8-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
using System.Globalization;
55
using System.Reflection;
66
using System.Text;
7+
using System.Text.RegularExpressions;
78
using Cosmos.DataTransfer.Interfaces;
89
using Microsoft.Azure.Cosmos;
910
using Microsoft.Extensions.Configuration;
@@ -33,8 +34,8 @@ public async Task WriteAsync(IAsyncEnumerable<IDataItem> dataItems, IConfigurati
3334

3435
var entryAssembly = Assembly.GetEntryAssembly();
3536
bool isShardedImport = false;
36-
string sourceName = dataSource.DisplayName;
37-
string sinkName = DisplayName;
37+
string sourceName = StripSpecialChars(dataSource.DisplayName);
38+
string sinkName = StripSpecialChars(DisplayName);
3839
string userAgentString = string.Format(CultureInfo.InvariantCulture, "{0}-{1}-{2}-{3}{4}",
3940
entryAssembly == null ? "dtr" : entryAssembly.GetName().Name,
4041
Assembly.GetExecutingAssembly().GetName().Version,
@@ -119,6 +120,11 @@ void ReportCount(int i)
119120
logger.LogInformation("Added {AddedCount} total records in {TotalSeconds}s", addedCount, $"{timer.ElapsedMilliseconds / 1000.0:F2}");
120121
}
121122

123+
private static string StripSpecialChars(string displayName)
124+
{
125+
return Regex.Replace(displayName, "[^\\w]", "", RegexOptions.Compiled);
126+
}
127+
122128
private static AsyncRetryPolicy GetRetryPolicy(int maxRetryCount, int initialRetryDuration)
123129
{
124130
int retryDelayBaseMs = initialRetryDuration / 2;

Extensions/Cosmos/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ Source supports an optional `IncludeMetadataFields` parameter (`false` by defaul
2626
}
2727
```
2828

29-
Sink requires an additional `PartitionKeyPath` parameter which is used when creating the container if it does not exist. It also supports an optional `RecreateContainer` parameter (`false` by default) to delete and then recreate the container to ensure only newly imported data is present. The optional `BatchSize` parameter (100 by default) sets the number of items to accumulate before inserting. The optional `WriteMode` parameter specifies the type of data write to use: `InsertStream`, `Insert`, `UpsertStream`, or `Upsert`. The `IsServerlessAccount` parameter specifies whether the target account uses Serverless instead of Provisioned throughput, which affects the way containers are created. Additional parameters allow changing the behavior of the Cosmos client appropriate to your environment.
29+
Sink requires an additional `PartitionKeyPath` parameter which is used when creating the container if it does not exist. It also supports an optional `RecreateContainer` parameter (`false` by default) to delete and then recreate the container to ensure only newly imported data is present. The optional `BatchSize` parameter (100 by default) sets the number of items to accumulate before inserting. `ConnectionMode` can be set to either `Gateway` (default) or `Direct` to control how the client connects to the CosmosDB service. For situations where a container is created as part of the transfer operation `CreatedContainerMaxThroughput` (in RUs) and `UseAutoscaleForCreatedContainer` provide the initial throughput settings which will be in effect when executing the transfer. The optional `WriteMode` parameter specifies the type of data write to use: `InsertStream`, `Insert`, `UpsertStream`, or `Upsert`. The `IsServerlessAccount` parameter specifies whether the target account uses Serverless instead of Provisioned throughput, which affects the way containers are created. Additional parameters allow changing the behavior of the Cosmos client appropriate to your environment.
3030

3131
### Sink
3232

Extensions/SqlServer/Cosmos.DataTransfer.SqlServerExtension/SqlServerDataSinkExtension.cs

+5-3
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,12 @@ public async Task WriteAsync(IAsyncEnumerable<IDataItem> dataItems, IConfigurati
3636
bulkCopy.ColumnMappings.Add(new SqlBulkCopyColumnMapping(dbColumn.ColumnName, dbColumn.ColumnName));
3737
}
3838

39+
var dataTable = new DataTable();
40+
dataTable.Columns.AddRange(dataColumns.Values.ToArray());
41+
3942
var batches = dataItems.Buffer(settings.BatchSize);
4043
await foreach (var batch in batches.WithCancellation(cancellationToken))
4144
{
42-
var dataTable = new DataTable();
43-
dataTable.Columns.AddRange(dataColumns.Values.ToArray());
4445
foreach (var item in batch)
4546
{
4647
var fieldNames = item.GetFieldNames().ToList();
@@ -77,6 +78,7 @@ public async Task WriteAsync(IAsyncEnumerable<IDataItem> dataItems, IConfigurati
7778
dataTable.Rows.Add(row);
7879
}
7980
await bulkCopy.WriteToServerAsync(dataTable, cancellationToken);
81+
dataTable.Clear();
8082
}
8183

8284
await transaction.CommitAsync(cancellationToken);
@@ -91,4 +93,4 @@ public async Task WriteAsync(IAsyncEnumerable<IDataItem> dataItems, IConfigurati
9193
await connection.CloseAsync();
9294
}
9395
}
94-
}
96+
}

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ The Azure Cosmos DB Desktop Data Migration Tool is an open-source project contai
2626

2727
## Quick Installation
2828

29-
To use the tool, download the latest zip file for your platform (win-x64, mac-x64, or linux-x64) from [Releases](https://github.com/AzureCosmosDB/data-migration-desktop-tool/releases) and extract all files to your desired install location. To begin a data transfer operation, first populate the `migrationsettings.json` file with appropriate settings for your data source and sink (see [detailed instructions](#using-the-command-line) below), and then run the application from a command line: `dmt.exe` on Windows or `dmt` on other platforms.
29+
To use the tool, download the latest zip file for your platform (win-x64, mac-x64, or linux-x64) from [Releases](https://github.com/AzureCosmosDB/data-migration-desktop-tool/releases) and extract all files to your desired install location. To begin a data transfer operation, first populate the `migrationsettings.json` file with appropriate settings for your data source and sink (see [detailed instructions](#using-the-command-line) below or [review examples](ExampleConfigs.md)), and then run the application from a command line: `dmt.exe` on Windows or `dmt` on other platforms.
3030

3131
## Extension documentation
3232

@@ -177,7 +177,7 @@ This tutorial outlines how to use the Azure Cosmos DB Desktop Data Migration Too
177177
}
178178
}
179179
```
180-
> **Note**: **migrationsettings.json** can also be configured to execute multiple data transfer operations with a single run command. To do this, include an `Operations` property consisting of an array of objects that include `SourceSettings` and `SinkSettings` properties using the same format as those shown above for single operations.
180+
> **Note**: **migrationsettings.json** can also be configured to execute multiple data transfer operations with a single run command. To do this, include an `Operations` property consisting of an array of objects that include `SourceSettings` and `SinkSettings` properties using the same format as those shown above for single operations. Additional details and examples can be found in [this blog post](https://codemindinterface.com/2023/03/cosmos-tool-operations/).
181181

182182
4. Execute the program using the following command:
183183

0 commit comments

Comments
 (0)