Skip to content

Commit 35d857e

Browse files
zoe27odilitime
andauthored
fix: fix the chat stuck in infinite loop (#1755)
* fix the chat stuck in infinite loop * perfect the PR and keep the temprature and wordsToPunish in the generate response * Update README.md * Trigger CI checks --------- Co-authored-by: Odilitime <janesmith@airmail.cc>
1 parent aac570b commit 35d857e

File tree

1 file changed

+13
-32
lines changed
  • packages/plugin-node/src/services

1 file changed

+13
-32
lines changed

packages/plugin-node/src/services/llama.ts

+13-32
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ import {
1111
GbnfJsonSchema,
1212
getLlama,
1313
Llama,
14+
LlamaChatSession,
15+
LlamaChatSessionRepeatPenalty,
1416
LlamaContext,
1517
LlamaContextSequence,
1618
LlamaContextSequenceRepeatPenalty,
@@ -549,49 +551,28 @@ export class LlamaService extends Service {
549551
throw new Error("Model not initialized.");
550552
}
551553

552-
const tokens = this.model!.tokenize(context);
554+
const session = new LlamaChatSession({
555+
contextSequence: this.sequence
556+
});
553557

554-
// tokenize the words to punish
555558
const wordsToPunishTokens = wordsToPunish
556559
.map((word) => this.model!.tokenize(word))
557560
.flat();
558561

559-
const repeatPenalty: LlamaContextSequenceRepeatPenalty = {
560-
punishTokens: () => wordsToPunishTokens,
562+
const repeatPenalty: LlamaChatSessionRepeatPenalty = {
563+
punishTokensFilter: () => wordsToPunishTokens,
561564
penalty: 1.2,
562565
frequencyPenalty: frequency_penalty,
563566
presencePenalty: presence_penalty,
564567
};
565568

566-
const responseTokens: Token[] = [];
567-
568-
for await (const token of this.sequence.evaluate(tokens, {
569+
const response = await session.prompt(context, {
570+
onTextChunk(chunk) { // stream the response to the console as it's being generated
571+
process.stdout.write(chunk);
572+
},
569573
temperature: Number(temperature),
570-
repeatPenalty: repeatPenalty,
571-
grammarEvaluationState: useGrammar ? this.grammar : undefined,
572-
yieldEogToken: false,
573-
})) {
574-
const current = this.model.detokenize([...responseTokens, token]);
575-
if ([...stop].some((s) => current.includes(s))) {
576-
elizaLogger.info("Stop sequence found");
577-
break;
578-
}
579-
580-
responseTokens.push(token);
581-
process.stdout.write(this.model!.detokenize([token]));
582-
if (useGrammar) {
583-
if (current.replaceAll("\n", "").includes("}```")) {
584-
elizaLogger.info("JSON block found");
585-
break;
586-
}
587-
}
588-
if (responseTokens.length > max_tokens) {
589-
elizaLogger.info("Max tokens reached");
590-
break;
591-
}
592-
}
593-
594-
const response = this.model!.detokenize(responseTokens);
574+
repeatPenalty: repeatPenalty
575+
});
595576

596577
if (!response) {
597578
throw new Error("Response is undefined");

0 commit comments

Comments
 (0)