Skip to content

Commit 681c9a4

Browse files
authored
Merge pull request #66 from AzureCosmosDB/feature/csv-extension
Adding CSV extension
2 parents 7cf7b3b + d5fb723 commit 681c9a4

19 files changed

+467
-4
lines changed

.github/actions/build-with-plugins/action.yml

+15
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,21 @@ runs:
136136
-p:PublishReadyToRun=false \
137137
-p:PublishTrimmed=false \
138138
-p:Version=${{ inputs.build-version }}
139+
- name: Build CSV Extension
140+
shell: bash
141+
run: |
142+
dotnet publish \
143+
Extensions/Csv/Cosmos.DataTransfer.CsvExtension/Cosmos.DataTransfer.CsvExtension.csproj \
144+
--configuration Release \
145+
--output ${{ inputs.platform-short }}/Extensions \
146+
--self-contained false \
147+
--runtime ${{ inputs.runtime }} \
148+
-p:PublishSingleFile=false \
149+
-p:DebugType=embedded \
150+
-p:EnableCompressionInSingleFile=true \
151+
-p:PublishReadyToRun=false \
152+
-p:PublishTrimmed=false \
153+
-p:Version=${{ inputs.build-version }}
139154
- name: Upload package
140155
uses: actions/upload-artifact@v3
141156
with:

CosmosDbDataMigrationTool.sln

+24-4
Original file line numberDiff line numberDiff line change
@@ -64,18 +64,27 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "AzureBlob", "AzureBlob", "{
6464
EndProject
6565
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "AwsS3", "AwsS3", "{502197E4-F554-4B5B-9235-FBFE7E49EBEF}"
6666
EndProject
67-
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cosmos.DataTransfer.AwsS3Storage", "Extensions\AwsS3\Cosmos.DataTransfer.AwsS3Storage\Cosmos.DataTransfer.AwsS3Storage.csproj", "{8BA59E9C-0B45-426F-A672-61D40C3C4FB7}"
67+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Cosmos.DataTransfer.AwsS3Storage", "Extensions\AwsS3\Cosmos.DataTransfer.AwsS3Storage\Cosmos.DataTransfer.AwsS3Storage.csproj", "{8BA59E9C-0B45-426F-A672-61D40C3C4FB7}"
6868
EndProject
69-
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cosmos.DataTransfer.AzureBlobStorage", "Extensions\AzureBlob\Cosmos.DataTransfer.AzureBlobStorage\Cosmos.DataTransfer.AzureBlobStorage.csproj", "{60ACD837-40BD-4596-832A-139CCBFA7EFE}"
69+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Cosmos.DataTransfer.AzureBlobStorage", "Extensions\AzureBlob\Cosmos.DataTransfer.AzureBlobStorage\Cosmos.DataTransfer.AzureBlobStorage.csproj", "{60ACD837-40BD-4596-832A-139CCBFA7EFE}"
7070
EndProject
7171
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "CognitiveSearch", "CognitiveSearch", "{F745B535-C483-4894-8BA9-657DB1913D0B}"
7272
ProjectSection(SolutionItems) = preProject
7373
Extensions\CognitiveSearch\README.md = Extensions\CognitiveSearch\README.md
7474
EndProjectSection
7575
EndProject
76-
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cosmos.DataTransfer.CognitiveSearchExtension", "Extensions\CognitiveSearch\Cosmos.DataTransfer.CognitiveSearchExtension\Cosmos.DataTransfer.CognitiveSearchExtension.csproj", "{37DBC3CB-F8F6-48F8-BFBA-07A27D2E9DD9}"
76+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Cosmos.DataTransfer.CognitiveSearchExtension", "Extensions\CognitiveSearch\Cosmos.DataTransfer.CognitiveSearchExtension\Cosmos.DataTransfer.CognitiveSearchExtension.csproj", "{37DBC3CB-F8F6-48F8-BFBA-07A27D2E9DD9}"
7777
EndProject
78-
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cosmos.DataTransfer.CognitiveSearchExtension.UnitTests", "Extensions\CognitiveSearch\Cosmos.DataTransfer.CognitiveSearchExtension.UnitTests\Cosmos.DataTransfer.CognitiveSearchExtension.UnitTests.csproj", "{B55A183D-E4D4-4095-8569-D361A6AA1D10}"
78+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Cosmos.DataTransfer.CognitiveSearchExtension.UnitTests", "Extensions\CognitiveSearch\Cosmos.DataTransfer.CognitiveSearchExtension.UnitTests\Cosmos.DataTransfer.CognitiveSearchExtension.UnitTests.csproj", "{B55A183D-E4D4-4095-8569-D361A6AA1D10}"
79+
EndProject
80+
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Csv", "Csv", "{39930280-DA29-4814-837B-FA7F252EB3EC}"
81+
ProjectSection(SolutionItems) = preProject
82+
Extensions\Csv\README.md = Extensions\Csv\README.md
83+
EndProjectSection
84+
EndProject
85+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Cosmos.DataTransfer.CsvExtension", "Extensions\Csv\Cosmos.DataTransfer.CsvExtension\Cosmos.DataTransfer.CsvExtension.csproj", "{6A3FB90C-B837-4724-A406-214D4CEA686F}"
86+
EndProject
87+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cosmos.DataTransfer.CsvExtension.UnitTests", "Extensions\Csv\Cosmos.DataTransfer.CsvExtension.UnitTests\Cosmos.DataTransfer.CsvExtension.UnitTests.csproj", "{40AD8890-BD78-48F5-AE76-2C2FC6F15B7E}"
7988
EndProject
8089
Global
8190
GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -155,6 +164,14 @@ Global
155164
{B55A183D-E4D4-4095-8569-D361A6AA1D10}.Debug|Any CPU.Build.0 = Debug|Any CPU
156165
{B55A183D-E4D4-4095-8569-D361A6AA1D10}.Release|Any CPU.ActiveCfg = Release|Any CPU
157166
{B55A183D-E4D4-4095-8569-D361A6AA1D10}.Release|Any CPU.Build.0 = Release|Any CPU
167+
{6A3FB90C-B837-4724-A406-214D4CEA686F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
168+
{6A3FB90C-B837-4724-A406-214D4CEA686F}.Debug|Any CPU.Build.0 = Debug|Any CPU
169+
{6A3FB90C-B837-4724-A406-214D4CEA686F}.Release|Any CPU.ActiveCfg = Release|Any CPU
170+
{6A3FB90C-B837-4724-A406-214D4CEA686F}.Release|Any CPU.Build.0 = Release|Any CPU
171+
{40AD8890-BD78-48F5-AE76-2C2FC6F15B7E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
172+
{40AD8890-BD78-48F5-AE76-2C2FC6F15B7E}.Debug|Any CPU.Build.0 = Debug|Any CPU
173+
{40AD8890-BD78-48F5-AE76-2C2FC6F15B7E}.Release|Any CPU.ActiveCfg = Release|Any CPU
174+
{40AD8890-BD78-48F5-AE76-2C2FC6F15B7E}.Release|Any CPU.Build.0 = Release|Any CPU
158175
EndGlobalSection
159176
GlobalSection(SolutionProperties) = preSolution
160177
HideSolutionNode = FALSE
@@ -183,6 +200,9 @@ Global
183200
{F745B535-C483-4894-8BA9-657DB1913D0B} = {A8A1CEAB-2D82-460C-9B86-74ABD17CD201}
184201
{37DBC3CB-F8F6-48F8-BFBA-07A27D2E9DD9} = {F745B535-C483-4894-8BA9-657DB1913D0B}
185202
{B55A183D-E4D4-4095-8569-D361A6AA1D10} = {F745B535-C483-4894-8BA9-657DB1913D0B}
203+
{39930280-DA29-4814-837B-FA7F252EB3EC} = {A8A1CEAB-2D82-460C-9B86-74ABD17CD201}
204+
{6A3FB90C-B837-4724-A406-214D4CEA686F} = {39930280-DA29-4814-837B-FA7F252EB3EC}
205+
{40AD8890-BD78-48F5-AE76-2C2FC6F15B7E} = {39930280-DA29-4814-837B-FA7F252EB3EC}
186206
EndGlobalSection
187207
GlobalSection(ExtensibilityGlobals) = postSolution
188208
SolutionGuid = {662B3F27-70D8-45E6-A1C0-1438A9C8A542}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<TargetFramework>net6.0</TargetFramework>
5+
<ImplicitUsings>enable</ImplicitUsings>
6+
<Nullable>enable</Nullable>
7+
8+
<IsPackable>false</IsPackable>
9+
</PropertyGroup>
10+
11+
<ItemGroup>
12+
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.1.0" />
13+
<PackageReference Include="MSTest.TestAdapter" Version="2.2.8" />
14+
<PackageReference Include="MSTest.TestFramework" Version="2.2.8" />
15+
<PackageReference Include="coverlet.collector" Version="3.1.2" />
16+
</ItemGroup>
17+
18+
<ItemGroup>
19+
<ProjectReference Include="..\..\Json\Cosmos.DataTransfer.JsonExtension.UnitTests\Cosmos.DataTransfer.JsonExtension.UnitTests.csproj" />
20+
<ProjectReference Include="..\Cosmos.DataTransfer.CsvExtension\Cosmos.DataTransfer.CsvExtension.csproj" />
21+
</ItemGroup>
22+
23+
<ItemGroup>
24+
<None Update="Data\NoHeaders.csv">
25+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
26+
</None>
27+
<None Update="Data\SimpleData.csv">
28+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
29+
</None>
30+
</ItemGroup>
31+
32+
</Project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
using System.Globalization;
2+
using Cosmos.DataTransfer.JsonExtension;
3+
using Cosmos.DataTransfer.JsonExtension.UnitTests;
4+
using Microsoft.Extensions.Logging.Abstractions;
5+
using Newtonsoft.Json.Linq;
6+
7+
namespace Cosmos.DataTransfer.CsvExtension.UnitTests;
8+
9+
[TestClass]
10+
public class CsvFileRoundTripTests
11+
{
12+
[TestMethod]
13+
public async Task WriteAsync_fromReadAsync_ProducesIdenticalFile()
14+
{
15+
var input = new CsvFileSource();
16+
var output = new CsvFileSink();
17+
18+
const string fileIn = "Data/SimpleData.csv";
19+
const string fileOut = $"{nameof(WriteAsync_fromReadAsync_ProducesIdenticalFile)}_out.csv";
20+
21+
var sourceConfig = TestHelpers.CreateConfig(new Dictionary<string, string>
22+
{
23+
{ "FilePath", fileIn },
24+
});
25+
var sinkConfig = TestHelpers.CreateConfig(new Dictionary<string, string>
26+
{
27+
{ "FilePath", fileOut },
28+
});
29+
30+
await output.WriteAsync(input.ReadAsync(sourceConfig, NullLogger.Instance), sinkConfig, input, NullLogger.Instance);
31+
32+
var originalText = await File.ReadAllLinesAsync(fileIn);
33+
var finalText = await File.ReadAllLinesAsync(fileOut);
34+
for (var index = 0; index < originalText.Length; index++)
35+
{
36+
var a = originalText.ElementAtOrDefault(index);
37+
var b = finalText.ElementAtOrDefault(index);
38+
Assert.AreEqual(a, b, $"Different text at row {index}");
39+
}
40+
41+
CollectionAssert.AreEquivalent(originalText, finalText);
42+
}
43+
44+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
using Cosmos.DataTransfer.JsonExtension.UnitTests;
2+
using Microsoft.Extensions.Logging.Abstractions;
3+
4+
namespace Cosmos.DataTransfer.CsvExtension.UnitTests;
5+
6+
[TestClass]
7+
public class CsvFileSourceTests
8+
{
9+
[TestMethod]
10+
public async Task ReadAsync_WithSimpleFile_ReadsRows()
11+
{
12+
CsvFileSource extension = new();
13+
var config = TestHelpers.CreateConfig(new Dictionary<string, string>
14+
{
15+
{ "FilePath", "Data/SimpleData.csv" }
16+
});
17+
18+
int counter = 0;
19+
int lastId = -1;
20+
await foreach (var dataItem in extension.ReadAsync(config, NullLogger.Instance))
21+
{
22+
counter++;
23+
CollectionAssert.AreEquivalent(new[] { "id", "name", "description", "count" }, dataItem.GetFieldNames().ToArray());
24+
object? value = dataItem.GetValue("id");
25+
Assert.IsNotNull(value);
26+
Assert.IsNotNull(dataItem.GetValue("name"));
27+
var current = Int32.Parse(value.ToString());
28+
Assert.IsTrue(current > lastId);
29+
lastId = current;
30+
}
31+
32+
Assert.AreEqual(4, counter);
33+
}
34+
35+
[TestMethod]
36+
public async Task ReadAsync_WithNoHeaders_ReadsRows()
37+
{
38+
CsvFileSource extension = new();
39+
var config = TestHelpers.CreateConfig(new Dictionary<string, string>
40+
{
41+
{ "FilePath", "Data/NoHeaders.csv" },
42+
{ "HasHeader", "false" },
43+
{ "ColumnNameFormat", "myColumn{0}" },
44+
});
45+
46+
int counter = 0;
47+
await foreach (var dataItem in extension.ReadAsync(config, NullLogger.Instance))
48+
{
49+
counter++;
50+
CollectionAssert.AreEquivalent(new[] { "myColumn0", "myColumn1", "myColumn2", "myColumn3" }, dataItem.GetFieldNames().ToArray());
51+
Assert.IsNotNull(dataItem.GetValue("myColumn0"));
52+
}
53+
54+
Assert.AreEqual(3, counter);
55+
}
56+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
1,One,Lorem ipsum dolor sit amet,1
2+
2,Two,consectetur adipiscing elit,2
3+
3,Three,"sed do, eiusmod tempor",3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
id,name,description,count
2+
1,One,Lorem ipsum dolor sit amet,1
3+
2,Two,consectetur adipiscing elit,2
4+
3,Three,sed do eiusmod tempor,3
5+
4,Four,Ut enim ad minim veniam,4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
global using Microsoft.VisualStudio.TestTools.UnitTesting;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<TargetFramework>net6.0</TargetFramework>
5+
<ImplicitUsings>enable</ImplicitUsings>
6+
<Nullable>enable</Nullable>
7+
</PropertyGroup>
8+
9+
<ItemGroup>
10+
<PackageReference Include="CsvHelper" Version="30.0.1" />
11+
<PackageReference Include="Microsoft.Extensions.Configuration.Binder" Version="6.0.0" />
12+
<PackageReference Include="System.ComponentModel.Composition" Version="6.0.0" />
13+
</ItemGroup>
14+
15+
<ItemGroup>
16+
<ProjectReference Include="..\..\..\Interfaces\Cosmos.DataTransfer.Common\Cosmos.DataTransfer.Common.csproj" />
17+
<ProjectReference Include="..\..\..\Interfaces\Cosmos.DataTransfer.Interfaces\Cosmos.DataTransfer.Interfaces.csproj" />
18+
<ProjectReference Include="..\..\AwsS3\Cosmos.DataTransfer.AwsS3Storage\Cosmos.DataTransfer.AwsS3Storage.csproj" />
19+
<ProjectReference Include="..\..\AzureBlob\Cosmos.DataTransfer.AzureBlobStorage\Cosmos.DataTransfer.AzureBlobStorage.csproj" />
20+
</ItemGroup>
21+
22+
<Target Name="PublishToExtensionsFolder" AfterTargets="Build" Condition=" '$(Configuration)' == 'Debug' ">
23+
<Exec Command="dotnet publish --configuration $(Configuration) --no-build -p:PublishProfile=PublishToExtensionsFolder" />
24+
</Target>
25+
</Project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
using Cosmos.DataTransfer.AwsS3Storage;
2+
using Cosmos.DataTransfer.Common;
3+
using Cosmos.DataTransfer.Interfaces;
4+
using System.ComponentModel.Composition;
5+
6+
namespace Cosmos.DataTransfer.CsvExtension;
7+
8+
[Export(typeof(IDataSinkExtension))]
9+
public class CsvAwsS3Sink : CompositeSinkExtension<AwsS3DataSink, CsvFormatWriter>
10+
{
11+
public override string DisplayName => "CSV-AWSS3";
12+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
using Cosmos.DataTransfer.AzureBlobStorage;
2+
using Cosmos.DataTransfer.Common;
3+
using Cosmos.DataTransfer.Interfaces;
4+
using System.ComponentModel.Composition;
5+
6+
namespace Cosmos.DataTransfer.CsvExtension;
7+
8+
[Export(typeof(IDataSinkExtension))]
9+
public class CsvAzureBlobSink : CompositeSinkExtension<AzureBlobDataSink, CsvFormatWriter>
10+
{
11+
public override string DisplayName => "CSV-AzureBlob";
12+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
using Cosmos.DataTransfer.Common;
2+
using Cosmos.DataTransfer.Interfaces;
3+
using System.ComponentModel.Composition;
4+
using Cosmos.DataTransfer.AwsS3Storage;
5+
using Cosmos.DataTransfer.AzureBlobStorage;
6+
7+
namespace Cosmos.DataTransfer.CsvExtension;
8+
9+
[Export(typeof(IDataSinkExtension))]
10+
public class CsvFileSink : CompositeSinkExtension<FileDataSink, CsvFormatWriter>
11+
{
12+
public override string DisplayName => "CSV-File";
13+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
using System.ComponentModel.Composition;
2+
using Cosmos.DataTransfer.Common;
3+
using Cosmos.DataTransfer.Interfaces;
4+
5+
namespace Cosmos.DataTransfer.CsvExtension;
6+
7+
[Export(typeof(IDataSourceExtension))]
8+
public class CsvFileSource : CompositeSourceExtension<FileDataSource, CsvFormatReader>
9+
{
10+
public override string DisplayName => "CSV-File";
11+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
using System.Globalization;
2+
using System.Runtime.CompilerServices;
3+
using Cosmos.DataTransfer.CsvExtension.Settings;
4+
using Cosmos.DataTransfer.Interfaces;
5+
using CsvHelper;
6+
using CsvHelper.Configuration;
7+
using Microsoft.Extensions.Configuration;
8+
using Microsoft.Extensions.Logging;
9+
10+
namespace Cosmos.DataTransfer.CsvExtension;
11+
12+
public class CsvFormatReader : IFormattedDataReader
13+
{
14+
public IEnumerable<IDataExtensionSettings> GetSettings()
15+
{
16+
yield return new CsvReaderSettings();
17+
}
18+
19+
public async IAsyncEnumerable<IDataItem> ParseDataAsync(IComposableDataSource sourceExtension, IConfiguration config, ILogger logger, [EnumeratorCancellation] CancellationToken cancellationToken = default)
20+
{
21+
var settings = config.Get<CsvReaderSettings>();
22+
settings.Validate();
23+
24+
var data = sourceExtension.ReadSourceAsync(config, logger, cancellationToken);
25+
await foreach (var source in data.WithCancellation(cancellationToken))
26+
{
27+
if (source == null)
28+
continue;
29+
30+
using var textReader = new StreamReader(source, leaveOpen: true);
31+
using var reader = new CsvReader(textReader, new CsvConfiguration(CultureInfo.InvariantCulture)
32+
{
33+
HasHeaderRecord = settings.HasHeader,
34+
Delimiter = settings.Delimiter,
35+
});
36+
37+
if (settings.HasHeader)
38+
{
39+
await reader.ReadAsync();
40+
reader.ReadHeader();
41+
}
42+
43+
int rowCount = 0;
44+
while (await reader.ReadAsync())
45+
{
46+
rowCount++;
47+
var values = new Dictionary<string, object?>();
48+
for (int i = 0; i < reader.Parser.Count; i++)
49+
{
50+
var value = reader.GetField(i);
51+
var columnName = string.Format(settings.ColumnNameFormat ?? "{0}", i);
52+
if (settings.HasHeader)
53+
{
54+
var header = reader.HeaderRecord?.ElementAtOrDefault(i);
55+
columnName = header;
56+
}
57+
if (columnName != null)
58+
{
59+
values[columnName] = value;
60+
}
61+
else
62+
{
63+
logger.LogWarning("Missing column name for Value: {value} in row {rowNumber}", value, rowCount);
64+
}
65+
}
66+
yield return new DictionaryDataItem(values);
67+
}
68+
}
69+
}
70+
}

0 commit comments

Comments
 (0)