Skip to content

Commit f8a331d

Browse files
authored
Merge pull request #34 from HiroyukiSakoh/feature/cognitivesearch
add cognitive search extention
2 parents 4347fb8 + 97159cd commit f8a331d

19 files changed

+518
-0
lines changed

.github/actions/build-with-plugins/action.yml

+15
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,21 @@ runs:
118118
-p:PublishReadyToRun=false \
119119
-p:PublishTrimmed=false \
120120
-p:Version=0.0.${{ github.run_number }}
121+
- name: Build Cognitive Search Extension
122+
shell: bash
123+
run: |
124+
dotnet publish \
125+
Extensions/CognitiveSearch/Cosmos.DataTransfer.CognitiveSearchExtension/Cosmos.DataTransfer.CognitiveSearchExtension.csproj \
126+
--configuration Release \
127+
--output ${{ inputs.platform-short }}/Extensions \
128+
--self-contained false \
129+
--runtime ${{ inputs.runtime }} \
130+
-p:PublishSingleFile=false \
131+
-p:DebugType=embedded \
132+
-p:EnableCompressionInSingleFile=true \
133+
-p:PublishReadyToRun=false \
134+
-p:PublishTrimmed=false \
135+
-p:Version=0.0.${{ github.run_number }}
121136
- name: Upload package
122137
uses: actions/upload-artifact@v3
123138
with:

CosmosDbDataMigrationTool.sln

+20
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,15 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cosmos.DataTransfer.AwsS3St
6868
EndProject
6969
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cosmos.DataTransfer.AzureBlobStorage", "Extensions\AzureBlob\Cosmos.DataTransfer.AzureBlobStorage\Cosmos.DataTransfer.AzureBlobStorage.csproj", "{60ACD837-40BD-4596-832A-139CCBFA7EFE}"
7070
EndProject
71+
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "CognitiveSearch", "CognitiveSearch", "{F745B535-C483-4894-8BA9-657DB1913D0B}"
72+
ProjectSection(SolutionItems) = preProject
73+
Extensions\CognitiveSearch\README.md = Extensions\CognitiveSearch\README.md
74+
EndProjectSection
75+
EndProject
76+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cosmos.DataTransfer.CognitiveSearchExtension", "Extensions\CognitiveSearch\Cosmos.DataTransfer.CognitiveSearchExtension\Cosmos.DataTransfer.CognitiveSearchExtension.csproj", "{37DBC3CB-F8F6-48F8-BFBA-07A27D2E9DD9}"
77+
EndProject
78+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cosmos.DataTransfer.CognitiveSearchExtension.UnitTests", "Extensions\CognitiveSearch\Cosmos.DataTransfer.CognitiveSearchExtension.UnitTests\Cosmos.DataTransfer.CognitiveSearchExtension.UnitTests.csproj", "{B55A183D-E4D4-4095-8569-D361A6AA1D10}"
79+
EndProject
7180
Global
7281
GlobalSection(SolutionConfigurationPlatforms) = preSolution
7382
Debug|Any CPU = Debug|Any CPU
@@ -138,6 +147,14 @@ Global
138147
{60ACD837-40BD-4596-832A-139CCBFA7EFE}.Debug|Any CPU.Build.0 = Debug|Any CPU
139148
{60ACD837-40BD-4596-832A-139CCBFA7EFE}.Release|Any CPU.ActiveCfg = Release|Any CPU
140149
{60ACD837-40BD-4596-832A-139CCBFA7EFE}.Release|Any CPU.Build.0 = Release|Any CPU
150+
{37DBC3CB-F8F6-48F8-BFBA-07A27D2E9DD9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
151+
{37DBC3CB-F8F6-48F8-BFBA-07A27D2E9DD9}.Debug|Any CPU.Build.0 = Debug|Any CPU
152+
{37DBC3CB-F8F6-48F8-BFBA-07A27D2E9DD9}.Release|Any CPU.ActiveCfg = Release|Any CPU
153+
{37DBC3CB-F8F6-48F8-BFBA-07A27D2E9DD9}.Release|Any CPU.Build.0 = Release|Any CPU
154+
{B55A183D-E4D4-4095-8569-D361A6AA1D10}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
155+
{B55A183D-E4D4-4095-8569-D361A6AA1D10}.Debug|Any CPU.Build.0 = Debug|Any CPU
156+
{B55A183D-E4D4-4095-8569-D361A6AA1D10}.Release|Any CPU.ActiveCfg = Release|Any CPU
157+
{B55A183D-E4D4-4095-8569-D361A6AA1D10}.Release|Any CPU.Build.0 = Release|Any CPU
141158
EndGlobalSection
142159
GlobalSection(SolutionProperties) = preSolution
143160
HideSolutionNode = FALSE
@@ -163,6 +180,9 @@ Global
163180
{502197E4-F554-4B5B-9235-FBFE7E49EBEF} = {A8A1CEAB-2D82-460C-9B86-74ABD17CD201}
164181
{8BA59E9C-0B45-426F-A672-61D40C3C4FB7} = {502197E4-F554-4B5B-9235-FBFE7E49EBEF}
165182
{60ACD837-40BD-4596-832A-139CCBFA7EFE} = {9627A42A-BEB0-4A39-B49C-C3C6D54E705A}
183+
{F745B535-C483-4894-8BA9-657DB1913D0B} = {A8A1CEAB-2D82-460C-9B86-74ABD17CD201}
184+
{37DBC3CB-F8F6-48F8-BFBA-07A27D2E9DD9} = {F745B535-C483-4894-8BA9-657DB1913D0B}
185+
{B55A183D-E4D4-4095-8569-D361A6AA1D10} = {F745B535-C483-4894-8BA9-657DB1913D0B}
166186
EndGlobalSection
167187
GlobalSection(ExtensibilityGlobals) = postSolution
168188
SolutionGuid = {662B3F27-70D8-45E6-A1C0-1438A9C8A542}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
using Cosmos.DataTransfer.Interfaces;
2+
using System.Text.Json;
3+
4+
namespace Cosmos.DataTransfer.CognitiveSearchExtension.UnitTests
5+
{
6+
[TestClass]
7+
public class CognitiveSearchDataItemTests
8+
{
9+
[TestMethod]
10+
public async Task GetFieldNames_WithFlatObject_ReportsCorrectNames()
11+
{
12+
const string fileIn = "Data/IdName.json";
13+
14+
var json = JsonDocument.Parse(await File.ReadAllTextAsync(fileIn));
15+
16+
var item = new CognitiveSearchDataItem(json.RootElement);
17+
18+
var fields = item.GetFieldNames().ToList();
19+
20+
Assert.AreEqual(2, fields.Count);
21+
CollectionAssert.Contains(fields, "id");
22+
CollectionAssert.Contains(fields, "name");
23+
}
24+
25+
[TestMethod]
26+
public async Task GetValue_WithFlatObject_ReturnsValidValues()
27+
{
28+
const string fileIn = "Data/IdName.json";
29+
30+
var json = JsonDocument.Parse(await File.ReadAllTextAsync(fileIn));
31+
32+
var item = new CognitiveSearchDataItem(json.RootElement);
33+
34+
Assert.AreEqual(1m, item.GetValue("id"));
35+
Assert.AreEqual("One", item.GetValue("name"));
36+
}
37+
38+
[TestMethod]
39+
public async Task GetFieldNames_WithNestedObject_ReportsParentAndChildNames()
40+
{
41+
const string fileIn = "Data/Nested.json";
42+
43+
var json = JsonDocument.Parse(await File.ReadAllTextAsync(fileIn));
44+
45+
var item = new CognitiveSearchDataItem(json.RootElement);
46+
47+
var fields = item.GetFieldNames().ToList();
48+
49+
Assert.AreEqual(3, fields.Count);
50+
CollectionAssert.Contains(fields, "id");
51+
CollectionAssert.Contains(fields, "name");
52+
CollectionAssert.Contains(fields, "child");
53+
54+
var child = item.GetValue("child") as IDataItem;
55+
Assert.IsNotNull(child);
56+
var childFields = child.GetFieldNames().ToList();
57+
Assert.AreEqual(2, childFields.Count);
58+
CollectionAssert.Contains(childFields, "type");
59+
CollectionAssert.Contains(childFields, "data");
60+
}
61+
62+
[TestMethod]
63+
public async Task GetValue_WithMixedValueTypes_ReturnsValidTypedValues()
64+
{
65+
const string fileIn = "Data/MixedTypes.json";
66+
67+
var json = JsonDocument.Parse(await File.ReadAllTextAsync(fileIn));
68+
69+
var item = new CognitiveSearchDataItem(json.RootElement);
70+
71+
Assert.AreEqual(2m, item.GetValue("id"));
72+
Assert.AreEqual("Matt", item.GetValue("name"));
73+
74+
object? arrayValue = item.GetValue("otherNames");
75+
var array = arrayValue as IEnumerable<object>;
76+
Assert.IsNotNull(array);
77+
Assert.AreEqual(3, array.Count());
78+
CollectionAssert.DoesNotContain(array.Select(a => a is string).ToList(), false);
79+
80+
object? mixedArrayValue = item.GetValue("mixed");
81+
var mixedArray = mixedArrayValue as IEnumerable<object>;
82+
Assert.IsNotNull(mixedArray);
83+
Assert.AreEqual(5, mixedArray.Count());
84+
Assert.AreEqual(1m, mixedArray.ElementAt(0));
85+
Assert.AreEqual(true, mixedArray.ElementAt(1));
86+
Assert.AreEqual(3m, mixedArray.ElementAt(2));
87+
Assert.AreEqual("four", mixedArray.ElementAt(3));
88+
Assert.IsInstanceOfType(mixedArray.ElementAt(4), typeof(IDataItem));
89+
}
90+
}
91+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<TargetFramework>net6.0</TargetFramework>
5+
<ImplicitUsings>enable</ImplicitUsings>
6+
<Nullable>enable</Nullable>
7+
8+
<IsPackable>false</IsPackable>
9+
</PropertyGroup>
10+
11+
<ItemGroup>
12+
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.1.0" />
13+
<PackageReference Include="MSTest.TestAdapter" Version="2.2.8" />
14+
<PackageReference Include="MSTest.TestFramework" Version="2.2.8" />
15+
<PackageReference Include="coverlet.collector" Version="3.1.2" />
16+
</ItemGroup>
17+
18+
<ItemGroup>
19+
<ProjectReference Include="..\Cosmos.DataTransfer.CognitiveSearchExtension\Cosmos.DataTransfer.CognitiveSearchExtension.csproj" />
20+
</ItemGroup>
21+
22+
<ItemGroup>
23+
<None Update="Data\IdName.json">
24+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
25+
</None>
26+
<None Update="Data\MixedTypes.json">
27+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
28+
</None>
29+
<None Update="Data\Nested.json">
30+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
31+
</None>
32+
</ItemGroup>
33+
34+
</Project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"id": 1,
3+
"name": "One"
4+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
"id": 2,
3+
"name": "Matt",
4+
"otherNames": [
5+
"One",
6+
"Two",
7+
"Three"
8+
],
9+
"mixed": [
10+
1,
11+
true,
12+
3,
13+
"four",
14+
{
15+
"letter": "E",
16+
"number": 6
17+
}
18+
]
19+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"id": 1,
3+
"name": "One",
4+
"child": {
5+
"type": "Key",
6+
"data": "Value"
7+
}
8+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
global using Microsoft.VisualStudio.TestTools.UnitTesting;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
using Cosmos.DataTransfer.Interfaces;
2+
using System.Text.Json;
3+
4+
namespace Cosmos.DataTransfer.CognitiveSearchExtension
5+
{
6+
public class CognitiveSearchDataItem : IDataItem
7+
{
8+
public JsonElement JsonElement { get; }
9+
10+
public CognitiveSearchDataItem(JsonElement jsonElement)
11+
{
12+
JsonElement = jsonElement;
13+
}
14+
15+
public IEnumerable<string> GetFieldNames()
16+
{
17+
return JsonElement.EnumerateObject().Where(prop => prop.Name != "@search.score").Select(prop => prop.Name);
18+
}
19+
20+
public object? GetValue(string fieldName)
21+
{
22+
if (!JsonElement.TryGetProperty(fieldName, out JsonElement value))
23+
{
24+
return null;
25+
}
26+
27+
return GetTypedValue(value);
28+
}
29+
30+
private static object? GetTypedValue(JsonElement jsonElement)
31+
{
32+
return jsonElement.ValueKind switch
33+
{
34+
JsonValueKind.Null => null,
35+
JsonValueKind.Number => jsonElement.GetDecimal(),
36+
JsonValueKind.String => jsonElement.GetString(),
37+
JsonValueKind.True => true,
38+
JsonValueKind.False => false,
39+
JsonValueKind.Array => jsonElement.EnumerateArray().Select(item => GetTypedValue(item)).ToList(),
40+
_ => new CognitiveSearchDataItem(jsonElement)
41+
};
42+
}
43+
}
44+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
using Azure;
2+
using Azure.Search.Documents;
3+
using Azure.Search.Documents.Indexes;
4+
using Azure.Search.Documents.Models;
5+
using Cosmos.DataTransfer.CognitiveSearchExtension.Settings;
6+
using Cosmos.DataTransfer.Interfaces;
7+
using Microsoft.Extensions.Configuration;
8+
using Microsoft.Extensions.Logging;
9+
using System.ComponentModel.Composition;
10+
using System.Diagnostics;
11+
using System.Dynamic;
12+
13+
namespace Cosmos.DataTransfer.CognitiveSearchExtension
14+
{
15+
[Export(typeof(IDataSinkExtension))]
16+
public class CognitiveSearchDataSinkExtension : IDataSinkExtension
17+
{
18+
public string DisplayName => "CognitiveSearch";
19+
20+
public async Task WriteAsync(IAsyncEnumerable<IDataItem> dataItems, IConfiguration config, IDataSourceExtension dataSource, ILogger logger, CancellationToken cancellationToken = default)
21+
{
22+
var settings = config.Get<CognitiveSearchDataSinkSettings>();
23+
settings.Validate();
24+
25+
var indexClient = new SearchIndexClient(new Uri(settings.Endpoint!), new AzureKeyCredential(settings.ApiKey!));
26+
var searchClient = indexClient.GetSearchClient(settings.Index);
27+
28+
var convertedObjects = dataItems.Select(di => BuildObject(di)).Where(o => o != null).OfType<ExpandoObject>();
29+
var batches = convertedObjects.Buffer(settings.BatchSize);
30+
31+
int totalSucceededCount = 0;
32+
int totalFailedCount = 0;
33+
var timer = Stopwatch.StartNew();
34+
await foreach (var batch in batches.WithCancellation(cancellationToken))
35+
{
36+
var result = await searchClient.IndexDocumentsAsync(
37+
settings.IndexAction switch
38+
{
39+
IndexActionType.Upload => IndexDocumentsBatch.Upload(batch),
40+
IndexActionType.Delete => IndexDocumentsBatch.Delete(batch),
41+
IndexActionType.Merge => IndexDocumentsBatch.Merge(batch),
42+
IndexActionType.MergeOrUpload => IndexDocumentsBatch.MergeOrUpload(batch),
43+
_ => throw new InvalidOperationException()
44+
}
45+
, cancellationToken: cancellationToken);
46+
47+
var succeededCount = result.Value.Results.Count(r => r.Succeeded);
48+
var failedCount = result.Value.Results.Count(r => !r.Succeeded);
49+
totalSucceededCount += succeededCount;
50+
totalFailedCount += failedCount;
51+
52+
logger.LogInformation("Succeeded {Succeeded},Faild {Failed} documents indexed after {TotalSeconds}s", succeededCount, failedCount, $"{timer!.ElapsedMilliseconds / 1000.0:F2}");
53+
foreach (var r in result.Value.Results.Where(r => !r.Succeeded))
54+
{
55+
logger.LogWarning("Key:{Key},Status:{Status},ErrorMessage{ErrorMessage}", r.Key, r.Status, r.ErrorMessage);
56+
}
57+
}
58+
59+
logger.LogInformation("Succeeded {Succeeded},Faild {Failed} documents indexed in {TotalSeconds}s", totalSucceededCount, totalFailedCount, $"{timer.ElapsedMilliseconds / 1000.0:F2}");
60+
}
61+
62+
private static ExpandoObject? BuildObject(IDataItem? source)
63+
{
64+
if (source == null)
65+
return null;
66+
67+
var fields = source.GetFieldNames().ToList();
68+
var item = new ExpandoObject();
69+
foreach (string field in fields)
70+
{
71+
object? value = source.GetValue(field);
72+
var fieldName = field;
73+
if (value is IDataItem child)
74+
{
75+
value = BuildObject(child);
76+
}
77+
else if (value is IEnumerable<object?> array)
78+
{
79+
value = array.Select(dataItem =>
80+
{
81+
if (dataItem is IDataItem childObject)
82+
{
83+
return BuildObject(childObject);
84+
}
85+
return dataItem;
86+
}).ToArray();
87+
}
88+
89+
item.TryAdd(fieldName, value);
90+
}
91+
92+
return item;
93+
}
94+
}
95+
}

0 commit comments

Comments
 (0)