Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: fix the chat stuck in infinite loop #1755

Merged
merged 11 commits into from
Jan 12, 2025
45 changes: 13 additions & 32 deletions packages/plugin-node/src/services/llama.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import {
GbnfJsonSchema,
getLlama,
Llama,
LlamaChatSession,
LlamaChatSessionRepeatPenalty,
LlamaContext,
LlamaContextSequence,
LlamaContextSequenceRepeatPenalty,
Expand Down Expand Up @@ -549,49 +551,28 @@ export class LlamaService extends Service {
throw new Error("Model not initialized.");
}

const tokens = this.model!.tokenize(context);
const session = new LlamaChatSession({
contextSequence: this.sequence
});

// tokenize the words to punish
const wordsToPunishTokens = wordsToPunish
.map((word) => this.model!.tokenize(word))
.flat();

const repeatPenalty: LlamaContextSequenceRepeatPenalty = {
punishTokens: () => wordsToPunishTokens,
const repeatPenalty: LlamaChatSessionRepeatPenalty = {
punishTokensFilter: () => wordsToPunishTokens,
penalty: 1.2,
frequencyPenalty: frequency_penalty,
presencePenalty: presence_penalty,
};

const responseTokens: Token[] = [];

for await (const token of this.sequence.evaluate(tokens, {
const response = await session.prompt(context, {
onTextChunk(chunk) { // stream the response to the console as it's being generated
process.stdout.write(chunk);
},
temperature: Number(temperature),
repeatPenalty: repeatPenalty,
grammarEvaluationState: useGrammar ? this.grammar : undefined,
yieldEogToken: false,
})) {
const current = this.model.detokenize([...responseTokens, token]);
if ([...stop].some((s) => current.includes(s))) {
elizaLogger.info("Stop sequence found");
break;
}

responseTokens.push(token);
process.stdout.write(this.model!.detokenize([token]));
if (useGrammar) {
if (current.replaceAll("\n", "").includes("}```")) {
elizaLogger.info("JSON block found");
break;
}
}
if (responseTokens.length > max_tokens) {
elizaLogger.info("Max tokens reached");
break;
}
}

const response = this.model!.detokenize(responseTokens);
repeatPenalty: repeatPenalty
});

if (!response) {
throw new Error("Response is undefined");
Expand Down
Loading