1
+ import path from "path" ;
1
2
import models from "./models.ts" ;
2
3
import {
3
4
IAgentRuntime ,
4
5
ITextGenerationService ,
5
6
ModelProviderName ,
6
7
ServiceType ,
7
8
} from "./types.ts" ;
9
+ import fs from "fs" ;
10
+ import { EmbeddingModel , FlagEmbedding } from "fastembed" ;
8
11
9
12
/**
10
13
* Send a message to the OpenAI API for embedding.
@@ -20,21 +23,35 @@ export async function embed(runtime: IAgentRuntime, input: string) {
20
23
runtime . character . modelProvider !== ModelProviderName . OPENAI &&
21
24
runtime . character . modelProvider !== ModelProviderName . OLLAMA
22
25
) {
23
- const service = runtime . getService < ITextGenerationService > (
24
- ServiceType . TEXT_GENERATION
25
- ) ;
26
+
27
+ // make sure to trim tokens to 8192
28
+
29
+ const embeddingModel = await FlagEmbedding . init ( {
30
+ model : EmbeddingModel . BGEBaseEN
31
+ } ) ;
26
32
27
- const instance = service ?. getInstance ( ) ;
33
+ const embedding : number [ ] = await embeddingModel . queryEmbed ( input ) ;
34
+ console . log ( "Embedding dimensions: " , embedding . length ) ;
35
+ return embedding ;
28
36
29
- if ( instance ) {
30
- return await instance . getEmbeddingResponse ( input ) ;
31
- }
37
+ // commented out the text generation service that uses llama
38
+ // const service = runtime.getService<ITextGenerationService>(
39
+ // ServiceType.TEXT_GENERATION
40
+ // );
41
+
42
+ // const instance = service?.getInstance();
43
+
44
+ // if (instance) {
45
+ // return await instance.getEmbeddingResponse(input);
46
+ // }
32
47
}
48
+
49
+ // TODO: Fix retrieveCachedEmbedding
33
50
// Check if we already have the embedding in the lore
34
- // const cachedEmbedding = await retrieveCachedEmbedding(runtime, input);
35
- // if (cachedEmbedding) {
36
- // return cachedEmbedding;
37
- // }
51
+ const cachedEmbedding = await retrieveCachedEmbedding ( runtime , input ) ;
52
+ if ( cachedEmbedding ) {
53
+ return cachedEmbedding ;
54
+ }
38
55
39
56
const requestOptions = {
40
57
method : "POST" ,
@@ -48,7 +65,8 @@ export async function embed(runtime: IAgentRuntime, input: string) {
48
65
body : JSON . stringify ( {
49
66
input,
50
67
model : embeddingModel ,
51
- length : 1536 ,
68
+ length : 768 , // we are squashing dimensions to 768 for openai, even thought the model supports 1536
69
+ // -- this is ok for matryoshka embeddings but longterm, we might want to support 1536
52
70
} ) ,
53
71
} ;
54
72
try {
0 commit comments