@@ -11,6 +11,8 @@ import {
11
11
GbnfJsonSchema ,
12
12
getLlama ,
13
13
Llama ,
14
+ LlamaChatSession ,
15
+ LlamaChatSessionRepeatPenalty ,
14
16
LlamaContext ,
15
17
LlamaContextSequence ,
16
18
LlamaContextSequenceRepeatPenalty ,
@@ -549,49 +551,28 @@ export class LlamaService extends Service {
549
551
throw new Error ( "Model not initialized." ) ;
550
552
}
551
553
552
- const tokens = this . model ! . tokenize ( context ) ;
554
+ const session = new LlamaChatSession ( {
555
+ contextSequence : this . sequence
556
+ } ) ;
553
557
554
- // tokenize the words to punish
555
558
const wordsToPunishTokens = wordsToPunish
556
559
. map ( ( word ) => this . model ! . tokenize ( word ) )
557
560
. flat ( ) ;
558
561
559
- const repeatPenalty : LlamaContextSequenceRepeatPenalty = {
560
- punishTokens : ( ) => wordsToPunishTokens ,
562
+ const repeatPenalty : LlamaChatSessionRepeatPenalty = {
563
+ punishTokensFilter : ( ) => wordsToPunishTokens ,
561
564
penalty : 1.2 ,
562
565
frequencyPenalty : frequency_penalty ,
563
566
presencePenalty : presence_penalty ,
564
567
} ;
565
568
566
- const responseTokens : Token [ ] = [ ] ;
567
-
568
- for await ( const token of this . sequence . evaluate ( tokens , {
569
+ const response = await session . prompt ( context , {
570
+ onTextChunk ( chunk ) { // stream the response to the console as it's being generated
571
+ process . stdout . write ( chunk ) ;
572
+ } ,
569
573
temperature : Number ( temperature ) ,
570
- repeatPenalty : repeatPenalty ,
571
- grammarEvaluationState : useGrammar ? this . grammar : undefined ,
572
- yieldEogToken : false ,
573
- } ) ) {
574
- const current = this . model . detokenize ( [ ...responseTokens , token ] ) ;
575
- if ( [ ...stop ] . some ( ( s ) => current . includes ( s ) ) ) {
576
- elizaLogger . info ( "Stop sequence found" ) ;
577
- break ;
578
- }
579
-
580
- responseTokens . push ( token ) ;
581
- process . stdout . write ( this . model ! . detokenize ( [ token ] ) ) ;
582
- if ( useGrammar ) {
583
- if ( current . replaceAll ( "\n" , "" ) . includes ( "}```" ) ) {
584
- elizaLogger . info ( "JSON block found" ) ;
585
- break ;
586
- }
587
- }
588
- if ( responseTokens . length > max_tokens ) {
589
- elizaLogger . info ( "Max tokens reached" ) ;
590
- break ;
591
- }
592
- }
593
-
594
- const response = this . model ! . detokenize ( responseTokens ) ;
574
+ repeatPenalty : repeatPenalty
575
+ } ) ;
595
576
596
577
if ( ! response ) {
597
578
throw new Error ( "Response is undefined" ) ;
0 commit comments