Skip to content

Commit 77a81c8

Browse files
committed
Setting up Mongo Vector as an extension of the primary Mongo extension
1 parent 3bd4579 commit 77a81c8

17 files changed

+160
-10
lines changed

CosmosDbDataMigrationTool.sln

+7
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "PostgreSQL", "PostgreSQL",
103103
Extensions\PostgreSQL\README.md = Extensions\PostgreSQL\README.md
104104
EndProjectSection
105105
EndProject
106+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Cosmos.DataTransfer.MongoExtension", "Extensions\Mongo\Cosmos.DataTransfer.MongoExtension\Cosmos.DataTransfer.MongoExtension.csproj", "{31BC84E1-55E5-45AA-BFAC-90732F20588B}"
107+
EndProject
106108
Global
107109
GlobalSection(SolutionConfigurationPlatforms) = preSolution
108110
Debug|Any CPU = Debug|Any CPU
@@ -193,6 +195,10 @@ Global
193195
{85820167-DB94-458B-B09B-9E823996C692}.Debug|Any CPU.Build.0 = Debug|Any CPU
194196
{85820167-DB94-458B-B09B-9E823996C692}.Release|Any CPU.ActiveCfg = Release|Any CPU
195197
{85820167-DB94-458B-B09B-9E823996C692}.Release|Any CPU.Build.0 = Release|Any CPU
198+
{31BC84E1-55E5-45AA-BFAC-90732F20588B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
199+
{31BC84E1-55E5-45AA-BFAC-90732F20588B}.Debug|Any CPU.Build.0 = Debug|Any CPU
200+
{31BC84E1-55E5-45AA-BFAC-90732F20588B}.Release|Any CPU.ActiveCfg = Release|Any CPU
201+
{31BC84E1-55E5-45AA-BFAC-90732F20588B}.Release|Any CPU.Build.0 = Release|Any CPU
196202
EndGlobalSection
197203
GlobalSection(SolutionProperties) = preSolution
198204
HideSolutionNode = FALSE
@@ -226,6 +232,7 @@ Global
226232
{40AD8890-BD78-48F5-AE76-2C2FC6F15B7E} = {39930280-DA29-4814-837B-FA7F252EB3EC}
227233
{85820167-DB94-458B-B09B-9E823996C692} = {1B927C5F-50FC-42A6-BAF6-B00E6D760543}
228234
{1B927C5F-50FC-42A6-BAF6-B00E6D760543} = {A8A1CEAB-2D82-460C-9B86-74ABD17CD201}
235+
{31BC84E1-55E5-45AA-BFAC-90732F20588B} = {F18E789A-D32D-48D3-B75F-1196D7215F74}
229236
EndGlobalSection
230237
GlobalSection(ExtensibilityGlobals) = postSolution
231238
SolutionGuid = {662B3F27-70D8-45E6-A1C0-1438A9C8A542}

Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/Context.cs Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Context.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
using MongoDB.Driver;
33
using MongoDB.Driver.Core.Events;
44

5-
namespace Cosmos.DataTransfer.MongoVectorExtension;
5+
namespace Cosmos.DataTransfer.MongoExtension;
66
public class Context
77
{
88
private readonly IMongoDatabase database = null!;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<TargetFramework>net6.0</TargetFramework>
5+
<ImplicitUsings>enable</ImplicitUsings>
6+
<Nullable>enable</Nullable>
7+
<OutputType>Exe</OutputType>
8+
</PropertyGroup>
9+
10+
<ItemGroup>
11+
<PackageReference Include="Microsoft.Extensions.Configuration.Binder" Version="6.0.0" />
12+
<PackageReference Include="MongoDB.Driver" Version="2.19.1" />
13+
<PackageReference Include="System.ComponentModel.Composition" Version="6.0.0" />
14+
</ItemGroup>
15+
16+
<ItemGroup>
17+
<ProjectReference Include="..\..\..\Interfaces\Cosmos.DataTransfer.Interfaces\Cosmos.DataTransfer.Interfaces.csproj" />
18+
</ItemGroup>
19+
20+
<Target Name="PublishToExtensionsFolder" AfterTargets="Build" Condition=" '$(Configuration)' == 'Debug' ">
21+
<Exec Command="dotnet publish --configuration $(Configuration) --no-build -p:PublishProfile=PublishToExtensionsFolder" />
22+
</Target>
23+
24+
</Project>

Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/IRepository.cs Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/IRepository.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
using System.Linq.Expressions;
22

3-
namespace Cosmos.DataTransfer.MongoVectorExtension;
3+
namespace Cosmos.DataTransfer.MongoExtension;
44

55
public interface IRepository<TDocument>
66
{

Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/MongoDataItem.cs Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/MongoDataItem.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
using Cosmos.DataTransfer.Interfaces;
22
using MongoDB.Bson;
33

4-
namespace Cosmos.DataTransfer.MongoVectorExtension;
4+
namespace Cosmos.DataTransfer.MongoExtension;
55
public class MongoDataItem : IDataItem
66
{
77
private readonly BsonDocument record;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
using System.ComponentModel.Composition;
2+
using Cosmos.DataTransfer.Interfaces;
3+
using Cosmos.DataTransfer.MongoExtension.Settings;
4+
using Microsoft.Extensions.Configuration;
5+
using Microsoft.Extensions.Logging;
6+
using MongoDB.Bson;
7+
8+
namespace Cosmos.DataTransfer.MongoExtension;
9+
[Export(typeof(IDataSinkExtension))]
10+
public class MongoDataSinkExtension : IDataSinkExtensionWithSettings
11+
{
12+
public string DisplayName => "MongoDB";
13+
14+
public async Task WriteAsync(IAsyncEnumerable<IDataItem> dataItems, IConfiguration config, IDataSourceExtension dataSource, ILogger logger, CancellationToken cancellationToken = default)
15+
{
16+
var settings = config.Get<MongoSinkSettings>();
17+
settings.Validate();
18+
19+
if (!string.IsNullOrEmpty(settings.ConnectionString) && !string.IsNullOrEmpty(settings.DatabaseName) && !string.IsNullOrEmpty(settings.Collection))
20+
{
21+
var context = new Context(settings.ConnectionString, settings.DatabaseName);
22+
var repo = context.GetRepository<BsonDocument>(settings.Collection);
23+
24+
var batchSize = settings.BatchSize ?? 1000;
25+
26+
var objects = new List<BsonDocument>();
27+
int itemCount = 0;
28+
await foreach (var item in dataItems.WithCancellation(cancellationToken))
29+
{
30+
var dict = item.BuildDynamicObjectTree();
31+
objects.Add(new BsonDocument(dict));
32+
itemCount++;
33+
34+
if (objects.Count == batchSize)
35+
{
36+
await repo.AddRange(objects);
37+
logger.LogInformation("Added {ItemCount} items to collection '{Collection}'", itemCount, settings.Collection);
38+
objects.Clear();
39+
}
40+
}
41+
42+
if (objects.Any())
43+
{
44+
await repo.AddRange(objects);
45+
}
46+
47+
if (itemCount > 0)
48+
logger.LogInformation("Added {ItemCount} total items to collection '{Collection}'", itemCount, settings.Collection);
49+
else
50+
logger.LogWarning("No items added to collection '{Collection}'", settings.Collection);
51+
}
52+
}
53+
54+
public IEnumerable<IDataExtensionSettings> GetSettings()
55+
{
56+
yield return new MongoSinkSettings();
57+
}
58+
}

Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/MongoVectorDataSourceExtension.cs Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/MongoDataSourceExtension.cs

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
using System.ComponentModel.Composition;
22
using System.Runtime.CompilerServices;
33
using Cosmos.DataTransfer.Interfaces;
4-
using Cosmos.DataTransfer.MongoVectorExtension.Settings;
4+
using Cosmos.DataTransfer.MongoExtension.Settings;
55
using Microsoft.Extensions.Configuration;
66
using Microsoft.Extensions.Logging;
77
using MongoDB.Bson;
88

9-
namespace Cosmos.DataTransfer.MongoVectorExtension;
9+
namespace Cosmos.DataTransfer.MongoExtension;
1010
[Export(typeof(IDataSourceExtension))]
11-
internal class MongoVectorDataSourceExtension : IDataSourceExtensionWithSettings
11+
internal class MongoDataSourceExtension : IDataSourceExtensionWithSettings
1212
{
13-
public string DisplayName => $"MongoDB-Vector{ExtensionExtensions.BetaExtensionTag}";
13+
public string DisplayName => "MongoDB";
1414

1515
public async IAsyncEnumerable<IDataItem> ReadAsync(IConfiguration config, ILogger logger, [EnumeratorCancellation] CancellationToken cancellationToken = default)
1616
{

Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/MongoRepository.cs Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/MongoRepository.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
using System.Linq.Expressions;
22
using MongoDB.Driver;
33

4-
namespace Cosmos.DataTransfer.MongoVectorExtension;
4+
namespace Cosmos.DataTransfer.MongoExtension;
55

66
public class MongoRepository<TDocument> : IRepository<TDocument>
77
{
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Console.WriteLine("Starting Mongo extension");
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
<?xml version="1.0" encoding="utf-8"?>
2+
<!--
3+
https://go.microsoft.com/fwlink/?LinkID=208121.
4+
-->
5+
<Project>
6+
<PropertyGroup Condition=" '$(Configuration)' == 'Debug' ">
7+
<Configuration>Debug</Configuration>
8+
<Platform>Any CPU</Platform>
9+
<PublishDir>..\..\..\Core\Cosmos.DataTransfer.Core\bin\Debug\net6.0\Extensions</PublishDir>
10+
<PublishProtocol>FileSystem</PublishProtocol>
11+
<_TargetId>Folder</_TargetId>
12+
<TargetFramework>net6.0</TargetFramework>
13+
<SelfContained>false</SelfContained>
14+
</PropertyGroup>
15+
<PropertyGroup Condition=" '$(Configuration)' != 'Debug' ">
16+
<Configuration>Release</Configuration>
17+
<Platform>Any CPU</Platform>
18+
<PublishDir>..\..\..\Core\Cosmos.DataTransfer.Core\bin\Release\net6.0\Extensions</PublishDir>
19+
<PublishProtocol>FileSystem</PublishProtocol>
20+
<_TargetId>Folder</_TargetId>
21+
<TargetFramework>net6.0</TargetFramework>
22+
<SelfContained>false</SelfContained>
23+
</PropertyGroup>
24+
</Project>

Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/Settings/MongoBaseSettings.cs Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Settings/MongoBaseSettings.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
using Cosmos.DataTransfer.Interfaces;
33
using Cosmos.DataTransfer.Interfaces.Manifest;
44

5-
namespace Cosmos.DataTransfer.MongoVectorExtension.Settings;
5+
namespace Cosmos.DataTransfer.MongoExtension.Settings;
66
public class MongoBaseSettings : IDataExtensionSettings
77
{
88
[Required]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
using System.ComponentModel.DataAnnotations;
2+
3+
namespace Cosmos.DataTransfer.MongoExtension.Settings;
4+
public class MongoSinkSettings : MongoBaseSettings
5+
{
6+
[Required]
7+
public string? Collection { get; set; }
8+
9+
public int? BatchSize { get; set; }
10+
}

Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/Settings/MongoSourceSettings.cs Extensions/Mongo/Cosmos.DataTransfer.MongoExtension/Settings/MongoSourceSettings.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
namespace Cosmos.DataTransfer.MongoVectorExtension.Settings;
1+
namespace Cosmos.DataTransfer.MongoExtension.Settings;
22
public class MongoSourceSettings : MongoBaseSettings
33
{
44
public string? Collection { get; set; }

Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/Cosmos.DataTransfer.MongoVectorExtension.csproj

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
<ItemGroup>
1818
<ProjectReference Include="..\..\..\Interfaces\Cosmos.DataTransfer.Interfaces\Cosmos.DataTransfer.Interfaces.csproj" />
19+
<ProjectReference Include="..\Cosmos.DataTransfer.MongoExtension\Cosmos.DataTransfer.MongoExtension.csproj" />
1920
</ItemGroup>
2021

2122
<Target Name="PublishToExtensionsFolder" AfterTargets="Build" Condition=" '$(Configuration)' == 'Debug' ">

Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/MongoVectorDataSinkExtension.cs

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
using Azure;
33
using Azure.AI.OpenAI;
44
using Cosmos.DataTransfer.Interfaces;
5+
using Cosmos.DataTransfer.MongoExtension;
56
using Cosmos.DataTransfer.MongoVectorExtension.Settings;
67
using Microsoft.Extensions.Configuration;
78
using Microsoft.Extensions.Logging;

Extensions/Mongo/Cosmos.DataTransfer.MongoVectorExtension/Settings/MongoVectorSinkSettings.cs

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System.ComponentModel.DataAnnotations;
2+
using Cosmos.DataTransfer.MongoExtension.Settings;
23

34
namespace Cosmos.DataTransfer.MongoVectorExtension.Settings;
45
public class MongoVectorSinkSettings : MongoBaseSettings

Extensions/Mongo/README.md

+23
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,28 @@ Source and sink settings require both `ConnectionString` and `DatabaseName` para
2020

2121
### Sink
2222

23+
```json
24+
{
25+
"ConnectionString": "",
26+
"DatabaseName: "",
27+
"Collection": ""
28+
}
29+
```
30+
31+
# MongoDB Vector Extension (Beta)
32+
33+
The MongoDB Vector extension is a Sink only extension that builds on the MongoDB extension by providing additional capabilities for generating embeddings using Azure OpenAI APIs.
34+
35+
> **Note**: When specifying the MongoDB Vector extension as the Sink property in configuration, utilize the name **MongoDB-Vector(beta)**.
36+
37+
## Settings
38+
39+
The settings are based on the MongoDB extension settings with additional parameters for generating embeddings.
40+
41+
### Additional Sink Settings
42+
43+
The sink settings require the following additional parameters:
44+
2345
- `GenerateEmbedding`: If set to true, the sink will generate embeddings for the records before writing them to the database. The sink requires the `OpenAIUrl`, `OpenAIKey`, and `OpenAIDeploymentModel` parameters to be set. Following paramaters are required if this is true
2446
- `OpenAIUrl`: The URL of the OpenAI API
2547
- `OpenAIKey`: The API key for the OpenAI API
@@ -41,3 +63,4 @@ Source and sink settings require both `ConnectionString` and `DatabaseName` para
4163
"DestPropEmbedding": ""
4264
}
4365
```
66+

0 commit comments

Comments
 (0)