@@ -189,7 +189,7 @@ export class LlamaService extends Service {
189
189
const modelName = "model.gguf" ;
190
190
this . modelPath = path . join (
191
191
process . env . LLAMALOCAL_PATH ?. trim ( ) ?? "./" ,
192
- modelName
192
+ modelName ,
193
193
) ;
194
194
this . ollamaModel = process . env . OLLAMA_MODEL ;
195
195
}
@@ -202,7 +202,7 @@ export class LlamaService extends Service {
202
202
private async ensureInitialized ( ) {
203
203
if ( ! this . modelInitialized ) {
204
204
elizaLogger . info (
205
- "Model not initialized, starting initialization..."
205
+ "Model not initialized, starting initialization..." ,
206
206
) ;
207
207
await this . initializeModel ( ) ;
208
208
} else {
@@ -217,16 +217,16 @@ export class LlamaService extends Service {
217
217
218
218
const systemInfo = await si . graphics ( ) ;
219
219
const hasCUDA = systemInfo . controllers . some ( ( controller ) =>
220
- controller . vendor . toLowerCase ( ) . includes ( "nvidia" )
220
+ controller . vendor . toLowerCase ( ) . includes ( "nvidia" ) ,
221
221
) ;
222
222
223
223
if ( hasCUDA ) {
224
224
elizaLogger . info (
225
- "LlamaService: CUDA detected, using GPU acceleration"
225
+ "LlamaService: CUDA detected, using GPU acceleration" ,
226
226
) ;
227
227
} else {
228
228
elizaLogger . warn (
229
- "LlamaService: No CUDA detected - local response will be slow"
229
+ "LlamaService: No CUDA detected - local response will be slow" ,
230
230
) ;
231
231
}
232
232
@@ -238,7 +238,7 @@ export class LlamaService extends Service {
238
238
elizaLogger . info ( "Creating JSON schema grammar..." ) ;
239
239
const grammar = new LlamaJsonSchemaGrammar (
240
240
this . llama ,
241
- jsonSchemaGrammar as GbnfJsonSchema
241
+ jsonSchemaGrammar as GbnfJsonSchema ,
242
242
) ;
243
243
this . grammar = grammar ;
244
244
@@ -257,21 +257,21 @@ export class LlamaService extends Service {
257
257
} catch ( error ) {
258
258
elizaLogger . error (
259
259
"Model initialization failed. Deleting model and retrying:" ,
260
- error
260
+ error ,
261
261
) ;
262
262
try {
263
263
elizaLogger . info (
264
- "Attempting to delete and re-download model..."
264
+ "Attempting to delete and re-download model..." ,
265
265
) ;
266
266
await this . deleteModel ( ) ;
267
267
await this . initializeModel ( ) ;
268
268
} catch ( retryError ) {
269
269
elizaLogger . error (
270
270
"Model re-initialization failed:" ,
271
- retryError
271
+ retryError ,
272
272
) ;
273
273
throw new Error (
274
- `Model initialization failed after retry: ${ retryError . message } `
274
+ `Model initialization failed after retry: ${ retryError . message } ` ,
275
275
) ;
276
276
}
277
277
}
@@ -294,7 +294,7 @@ export class LlamaService extends Service {
294
294
response . headers . location
295
295
) {
296
296
elizaLogger . info (
297
- `Following redirect to: ${ response . headers . location } `
297
+ `Following redirect to: ${ response . headers . location } ` ,
298
298
) ;
299
299
downloadModel ( response . headers . location ) ;
300
300
return ;
@@ -303,24 +303,24 @@ export class LlamaService extends Service {
303
303
if ( response . statusCode !== 200 ) {
304
304
reject (
305
305
new Error (
306
- `Failed to download model: HTTP ${ response . statusCode } `
307
- )
306
+ `Failed to download model: HTTP ${ response . statusCode } ` ,
307
+ ) ,
308
308
) ;
309
309
return ;
310
310
}
311
311
312
312
totalSize = Number . parseInt (
313
313
response . headers [ "content-length" ] || "0" ,
314
- 10
314
+ 10 ,
315
315
) ;
316
316
elizaLogger . info (
317
- `Downloading model: Hermes-3-Llama-3.1-8B.Q8_0.gguf`
317
+ `Downloading model: Hermes-3-Llama-3.1-8B.Q8_0.gguf` ,
318
318
) ;
319
319
elizaLogger . info (
320
- `Download location: ${ this . modelPath } `
320
+ `Download location: ${ this . modelPath } ` ,
321
321
) ;
322
322
elizaLogger . info (
323
- `Total size: ${ ( totalSize / 1024 / 1024 ) . toFixed ( 2 ) } MB`
323
+ `Total size: ${ ( totalSize / 1024 / 1024 ) . toFixed ( 2 ) } MB` ,
324
324
) ;
325
325
326
326
response . pipe ( file ) ;
@@ -336,7 +336,7 @@ export class LlamaService extends Service {
336
336
) . toFixed ( 1 )
337
337
: "0.0" ;
338
338
const dots = "." . repeat (
339
- Math . floor ( Number ( progress ) / 5 )
339
+ Math . floor ( Number ( progress ) / 5 ) ,
340
340
) ;
341
341
progressString = `Downloading model: [${ dots . padEnd ( 20 , " " ) } ] ${ progress } %` ;
342
342
elizaLogger . progress ( progressString ) ;
@@ -353,17 +353,17 @@ export class LlamaService extends Service {
353
353
fs . unlink ( this . modelPath , ( ) => { } ) ;
354
354
reject (
355
355
new Error (
356
- `Model download failed: ${ error . message } `
357
- )
356
+ `Model download failed: ${ error . message } ` ,
357
+ ) ,
358
358
) ;
359
359
} ) ;
360
360
} )
361
361
. on ( "error" , ( error ) => {
362
362
fs . unlink ( this . modelPath , ( ) => { } ) ;
363
363
reject (
364
364
new Error (
365
- `Model download request failed: ${ error . message } `
366
- )
365
+ `Model download request failed: ${ error . message } ` ,
366
+ ) ,
367
367
) ;
368
368
} ) ;
369
369
} ;
@@ -393,7 +393,7 @@ export class LlamaService extends Service {
393
393
stop : string [ ] ,
394
394
frequency_penalty : number ,
395
395
presence_penalty : number ,
396
- max_tokens : number
396
+ max_tokens : number ,
397
397
) : Promise < any > {
398
398
await this . ensureInitialized ( ) ;
399
399
return new Promise ( ( resolve , reject ) => {
@@ -418,7 +418,7 @@ export class LlamaService extends Service {
418
418
stop : string [ ] ,
419
419
frequency_penalty : number ,
420
420
presence_penalty : number ,
421
- max_tokens : number
421
+ max_tokens : number ,
422
422
) : Promise < string > {
423
423
await this . ensureInitialized ( ) ;
424
424
@@ -460,7 +460,7 @@ export class LlamaService extends Service {
460
460
message . frequency_penalty ,
461
461
message . presence_penalty ,
462
462
message . max_tokens ,
463
- message . useGrammar
463
+ message . useGrammar ,
464
464
) ;
465
465
message . resolve ( response ) ;
466
466
} catch ( error ) {
@@ -509,14 +509,17 @@ export class LlamaService extends Service {
509
509
frequency_penalty : number ,
510
510
presence_penalty : number ,
511
511
max_tokens : number ,
512
- useGrammar : boolean
512
+ useGrammar : boolean ,
513
513
) : Promise < any | string > {
514
+ context = context +=
515
+ "\nIMPORTANT: Escape any quotes in any string fields with a backslash so the JSON is valid." ;
516
+
514
517
const ollamaModel = process . env . OLLAMA_MODEL ;
515
518
if ( ollamaModel ) {
516
519
const ollamaUrl =
517
520
process . env . OLLAMA_SERVER_URL || "http://localhost:11434" ;
518
521
elizaLogger . info (
519
- `Using Ollama API at ${ ollamaUrl } with model ${ ollamaModel } `
522
+ `Using Ollama API at ${ ollamaUrl } with model ${ ollamaModel } ` ,
520
523
) ;
521
524
522
525
const response = await fetch ( `${ ollamaUrl } /api/generate` , {
@@ -538,7 +541,7 @@ export class LlamaService extends Service {
538
541
539
542
if ( ! response . ok ) {
540
543
throw new Error (
541
- `Ollama request failed: ${ response . statusText } `
544
+ `Ollama request failed: ${ response . statusText } ` ,
542
545
) ;
543
546
}
544
547
@@ -552,11 +555,12 @@ export class LlamaService extends Service {
552
555
}
553
556
554
557
const session = new LlamaChatSession ( {
555
- contextSequence : this . sequence
558
+ contextSequence : this . sequence ,
556
559
} ) ;
557
560
558
- const wordsToPunishTokens = wordsToPunish
559
- . flatMap ( ( word ) => this . model ! . tokenize ( word ) ) ;
561
+ const wordsToPunishTokens = wordsToPunish . flatMap ( ( word ) =>
562
+ this . model ! . tokenize ( word ) ,
563
+ ) ;
560
564
561
565
const repeatPenalty : LlamaChatSessionRepeatPenalty = {
562
566
punishTokensFilter : ( ) => wordsToPunishTokens ,
@@ -566,11 +570,12 @@ export class LlamaService extends Service {
566
570
} ;
567
571
568
572
const response = await session . prompt ( context , {
569
- onTextChunk ( chunk ) { // stream the response to the console as it's being generated
573
+ onTextChunk ( chunk ) {
574
+ // stream the response to the console as it's being generated
570
575
process . stdout . write ( chunk ) ;
571
576
} ,
572
577
temperature : Number ( temperature ) ,
573
- repeatPenalty : repeatPenalty
578
+ repeatPenalty : repeatPenalty ,
574
579
} ) ;
575
580
576
581
if ( ! response ) {
@@ -612,7 +617,7 @@ export class LlamaService extends Service {
612
617
const embeddingModel =
613
618
process . env . OLLAMA_EMBEDDING_MODEL || "mxbai-embed-large" ;
614
619
elizaLogger . info (
615
- `Using Ollama API for embeddings with model ${ embeddingModel } (base: ${ ollamaModel } )`
620
+ `Using Ollama API for embeddings with model ${ embeddingModel } (base: ${ ollamaModel } )` ,
616
621
) ;
617
622
618
623
const response = await fetch ( `${ ollamaUrl } /api/embeddings` , {
@@ -626,7 +631,7 @@ export class LlamaService extends Service {
626
631
627
632
if ( ! response . ok ) {
628
633
throw new Error (
629
- `Ollama embeddings request failed: ${ response . statusText } `
634
+ `Ollama embeddings request failed: ${ response . statusText } ` ,
630
635
) ;
631
636
}
632
637
@@ -644,7 +649,7 @@ export class LlamaService extends Service {
644
649
const embeddingModel =
645
650
process . env . OLLAMA_EMBEDDING_MODEL || "mxbai-embed-large" ;
646
651
elizaLogger . info (
647
- `Using Ollama API for embeddings with model ${ embeddingModel } (base: ${ this . ollamaModel } )`
652
+ `Using Ollama API for embeddings with model ${ embeddingModel } (base: ${ this . ollamaModel } )` ,
648
653
) ;
649
654
650
655
const response = await fetch ( `${ ollamaUrl } /api/embeddings` , {
@@ -671,7 +676,7 @@ export class LlamaService extends Service {
671
676
const ollamaUrl =
672
677
process . env . OLLAMA_SERVER_URL || "http://localhost:11434" ;
673
678
elizaLogger . info (
674
- `Using Ollama API at ${ ollamaUrl } with model ${ ollamaModel } `
679
+ `Using Ollama API at ${ ollamaUrl } with model ${ ollamaModel } ` ,
675
680
) ;
676
681
677
682
const response = await fetch ( `${ ollamaUrl } /api/generate` , {
@@ -706,7 +711,7 @@ export class LlamaService extends Service {
706
711
const embeddingModel =
707
712
process . env . OLLAMA_EMBEDDING_MODEL || "mxbai-embed-large" ;
708
713
elizaLogger . info (
709
- `Using Ollama API for embeddings with model ${ embeddingModel } (base: ${ ollamaModel } )`
714
+ `Using Ollama API for embeddings with model ${ embeddingModel } (base: ${ ollamaModel } )` ,
710
715
) ;
711
716
712
717
const response = await fetch ( `${ ollamaUrl } /api/embeddings` , {
@@ -720,7 +725,7 @@ export class LlamaService extends Service {
720
725
721
726
if ( ! response . ok ) {
722
727
throw new Error (
723
- `Ollama embeddings request failed: ${ response . statusText } `
728
+ `Ollama embeddings request failed: ${ response . statusText } ` ,
724
729
) ;
725
730
}
726
731
@@ -736,8 +741,9 @@ export class LlamaService extends Service {
736
741
const tokens = this . model ! . tokenize ( prompt ) ;
737
742
738
743
// tokenize the words to punish
739
- const wordsToPunishTokens = wordsToPunish
740
- . flatMap ( ( word ) => this . model ! . tokenize ( word ) ) ;
744
+ const wordsToPunishTokens = wordsToPunish . flatMap ( ( word ) =>
745
+ this . model ! . tokenize ( word ) ,
746
+ ) ;
741
747
742
748
const repeatPenalty : LlamaContextSequenceRepeatPenalty = {
743
749
punishTokens : ( ) => wordsToPunishTokens ,
0 commit comments