Skip to content

Commit 436fa6c

Browse files
committed
clear kv cache on each completion
1 parent d5d1781 commit 436fa6c

File tree

3 files changed

+15
-13
lines changed

3 files changed

+15
-13
lines changed

godot/addons/godot-llama-cpp/godot-llama-cpp.gdextension

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ macos.release = "res://addons/godot-llama-cpp/lib/libgodot-llama-cpp-aarch64-mac
1010
windows.debug.x86_32 = "res://addons/godot-llama-cpp/lib/libgodot-llama-cpp.windows.template_debug.x86_32.dll"
1111
windows.release.x86_32 = "res://addons/godot-llama-cpp/lib/libgodot-llama-cpp.windows.template_release.x86_32.dll"
1212
windows.debug.x86_64 = "res://addons/godot-llama-cpp/lib/godot-llama-cpp-x86_64-windows-gnu-Debug.dll"
13-
windows.release.x86_64 = "res://addons/godot-llama-cpp/lib/libgodot-llama-cpp.windows.template_release.x86_64.dll"
13+
windows.release.x86_64 = "res://addons/godot-llama-cpp/lib/godot-llama-cpp-x86_64-windows-gnu-ReleaseFast.dll"
1414
linux.debug.x86_64 = "res://addons/godot-llama-cpp/lib/libgodot-llama-cpp.linux.template_debug.x86_64.so"
1515
linux.release.x86_64 = "res://addons/godot-llama-cpp/lib/libgodot-llama-cpp.linux.template_release.x86_64.so"
1616
linux.debug.arm64 = "res://addons/godot-llama-cpp/lib/libgodot-llama-cpp.linux.template_debug.arm64.so"

src/llama_context.cpp

+8-7
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ void LlamaContext::_ready() {
3030
return;
3131
}
3232

33-
ctx_params.seed = -1;
34-
ctx_params.n_ctx = 2048;
33+
ctx_params.seed = -1;
34+
ctx_params.n_ctx = 4096;
3535
int32_t n_threads = OS::get_singleton()->get_processor_count();
3636
ctx_params.n_threads = n_threads;
3737
ctx_params.n_threads_batch = n_threads;
@@ -66,14 +66,14 @@ void LlamaContext::_fulfill_completion(const String &prompt) {
6666
return;
6767
}
6868

69-
llama_batch batch = llama_batch_init(tokens_list.size(), 0, 1);
70-
7169
for (size_t i = 0; i < tokens_list.size(); i++) {
7270
llama_batch_add(batch, tokens_list[i], i, { 0 }, false);
7371
}
74-
72+
7573
batch.logits[batch.n_tokens - 1] = true;
7674

75+
llama_kv_cache_clear(ctx);
76+
7777
int decode_res = llama_decode(ctx, batch);
7878
if (decode_res != 0) {
7979
UtilityFunctions::printerr(vformat("%s: Failed to decode prompt with error code: %d", __func__, decode_res));
@@ -129,8 +129,6 @@ void LlamaContext::_fulfill_completion(const String &prompt) {
129129
break;
130130
}
131131
}
132-
133-
llama_batch_free(batch);
134132
}
135133

136134
void LlamaContext::set_model(const Ref<LlamaModel> p_model) {
@@ -145,6 +143,9 @@ LlamaContext::~LlamaContext() {
145143
if (ctx) {
146144
llama_free(ctx);
147145
}
146+
147+
llama_batch_free(batch);
148+
148149
if (task_id) {
149150
WorkerThreadPool::get_singleton()->wait_for_task_completion(task_id);
150151
}

src/llama_context.h

+6-5
Original file line numberDiff line numberDiff line change
@@ -12,19 +12,20 @@ class LlamaContext : public Node {
1212
private:
1313
Ref<LlamaModel> model;
1414
llama_context *ctx = nullptr;
15-
llama_context_params ctx_params = llama_context_default_params();
16-
int task_id;
15+
llama_context_params ctx_params = llama_context_default_params();
16+
llama_batch batch = llama_batch_init(4096, 0, 1);
17+
int task_id;
1718

1819
protected:
1920
static void _bind_methods();
2021

2122
public:
2223
void set_model(const Ref<LlamaModel> model);
2324
Ref<LlamaModel> get_model();
24-
Variant request_completion(const String &prompt);
25-
void _fulfill_completion(const String &prompt);
25+
Variant request_completion(const String &prompt);
26+
void _fulfill_completion(const String &prompt);
2627
virtual void _ready() override;
27-
~LlamaContext();
28+
~LlamaContext();
2829
};
2930
} //namespace godot
3031

0 commit comments

Comments
 (0)