Skip to content

Commit e0e6291

Browse files
committedJun 17, 2024·
finally fix fatal logic flaw
1 parent 154bdcb commit e0e6291

File tree

9 files changed

+103
-41
lines changed

9 files changed

+103
-41
lines changed
 

‎.gitignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ compile_commands.json
4949
.vscode/*
5050
!.vscode/extensions.json
5151

52-
zig-cache
52+
.zig-cache
5353
zig-out
5454
*.gguf
5555

‎README.md

+60-1
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,64 @@ Run large language models in [Godot](https://godotengine.org). Powered by [llama
1313
![GitHub last commit](https://img.shields.io/github/last-commit/hazelnutcloud/godot-llama-cpp)
1414
![GitHub License](https://img.shields.io/github/license/hazelnutcloud/godot-llama-cpp)
1515

16-
1716
</div>
17+
18+
## Overview
19+
20+
This library aims to provide a high-level interface to run large language models in Godot, following Godot's node-based design principles.
21+
22+
```gdscript
23+
@onready var llama_context = %LlamaContext
24+
25+
var messages = [
26+
{ "sender": "system", "text": "You are a pirate chatbot who always responds in pirate speak!" },
27+
{ "sender": "user", "text": "Who are you?" }
28+
]
29+
var prompt = ChatFormatter.apply("llama3", messages)
30+
var completion_id = llama_context.request_completion(prompt)
31+
32+
while (true):
33+
var response = await llama_context.completion_generated
34+
print(response["text"])
35+
36+
if response["done"]: break
37+
```
38+
39+
## Features
40+
41+
- Chat formatter for:
42+
- [x] Llama3
43+
- [x] Mistral
44+
- [ ] More to come!
45+
- Compute backend builds:
46+
- [x] Metal
47+
- [x] Vulkan
48+
- [ ] CUDA
49+
- Asynchronous completion generation
50+
- Support any language model that llama.cpp supports in GGUF format
51+
- GGUF files are Godot resources
52+
53+
## Building & Installation
54+
55+
1. Download zig v0.13.0 from https://ziglang.org/download/
56+
2. Clone the repository:
57+
```bash
58+
git clone --recurse-submodules https://github.com/hazelnutcloud/godot-llama-cpp.git
59+
```
60+
3. Copy the `godot-llama-cpp` addon folder in `godot/addons` to your Godot project's `addons` folder.
61+
```bash
62+
cp -r godot-llama-cpp/godot/addons/godot-llama-cpp <your_project>/addons
63+
```
64+
4. Build the extension and install it in your Godot project:
65+
```bash
66+
cd godot-llama-cpp
67+
zig build --prefix <your_project>/addons/godot-llama-cpp
68+
```
69+
5. Enable the plugin in your Godot project settings.
70+
6. Add the `LlamaContext` node to your scene.
71+
7. Run your Godot project.
72+
8. Enjoy!
73+
74+
## License
75+
76+
This project is licensed under the MIT License - see the [LICENSE](LICENSE.md) file for details.

‎build.zig

+16-17
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,15 @@ pub fn build(b: *std.Build) !void {
1818
.optimize = optimize,
1919
});
2020
plugin.addCSourceFiles(.{ .files = try findFilesRecursive(b, "src/", &cfiles_exts) });
21-
plugin.addIncludePath(.{ .path = "src/" });
22-
plugin.addIncludePath(.{ .path = "godot_cpp/gdextension/" });
23-
plugin.addIncludePath(.{ .path = "godot_cpp/include/" });
24-
plugin.addIncludePath(.{ .path = "godot_cpp/gen/include" });
25-
plugin.addIncludePath(.{ .path = "llama.cpp" });
26-
plugin.addIncludePath(.{ .path = "llama.cpp/common" });
21+
plugin.addIncludePath(.{ .src_path = .{ .owner = b, .sub_path = "src/" } });
22+
plugin.addIncludePath(.{ .src_path = .{ .owner = b, .sub_path = "godot_cpp/gdextension/" } });
23+
plugin.addIncludePath(.{ .src_path = .{ .owner = b, .sub_path = "godot_cpp/include/" } });
24+
plugin.addIncludePath(.{ .src_path = .{ .owner = b, .sub_path = "godot_cpp/gen/include" } });
25+
plugin.addIncludePath(.{ .src_path = .{ .owner = b, .sub_path = "llama.cpp" } });
26+
plugin.addIncludePath(.{ .src_path = .{ .owner = b, .sub_path = "llama.cpp/common" } });
2727
plugin.linkLibrary(lib_llama_cpp);
2828
plugin.linkLibrary(lib_godot_cpp);
2929

30-
b.lib_dir = "./godot/addons/godot-llama-cpp/lib";
3130
b.installArtifact(plugin);
3231
}
3332

@@ -50,7 +49,7 @@ fn build_lib_godot_cpp(params: BuildParams) !*std.Build.Step.Compile {
5049
b.build_root.handle.access("godot_cpp/gen", .{}) catch |e| {
5150
switch (e) {
5251
error.FileNotFound => {
53-
_ = try std.ChildProcess.run(.{
52+
_ = try std.process.Child.run(.{
5453
.allocator = b.allocator,
5554
.argv = &.{ "python", "binding_generator.py", "godot_cpp/gdextension/extension_api.json", "godot_cpp" },
5655
.cwd_dir = b.build_root.handle,
@@ -60,9 +59,9 @@ fn build_lib_godot_cpp(params: BuildParams) !*std.Build.Step.Compile {
6059
}
6160
};
6261
lib_godot.linkLibCpp();
63-
lib_godot.addIncludePath(.{ .path = "godot_cpp/gdextension/" });
64-
lib_godot.addIncludePath(.{ .path = "godot_cpp/include/" });
65-
lib_godot.addIncludePath(.{ .path = "godot_cpp/gen/include" });
62+
lib_godot.addIncludePath(.{ .src_path = .{ .owner = b, .sub_path = "godot_cpp/gdextension/" } });
63+
lib_godot.addIncludePath(.{ .src_path = .{ .owner = b, .sub_path = "godot_cpp/include/" } });
64+
lib_godot.addIncludePath(.{ .src_path = .{ .owner = b, .sub_path = "godot_cpp/gen/include" } });
6665
const lib_godot_sources = try findFilesRecursive(b, "godot_cpp/src", &cfiles_exts);
6766
const lib_godot_gen_sources = try findFilesRecursive(b, "godot_cpp/gen/src", &cfiles_exts);
6867
lib_godot.addCSourceFiles(.{ .files = lib_godot_gen_sources, .flags = &.{ "-std=c++17", "-fno-exceptions" } });
@@ -77,9 +76,9 @@ fn build_lib_llama_cpp(params: BuildParams) !*std.Build.Step.Compile {
7776
const optimize = params.optimize;
7877
const zig_triple = try target.result.zigTriple(b.allocator);
7978

80-
const commit_hash = try std.ChildProcess.run(.{ .allocator = b.allocator, .argv = &.{ "git", "rev-parse", "HEAD" }, .cwd = b.pathFromRoot("llama.cpp") });
79+
const commit_hash = try std.process.Child.run(.{ .allocator = b.allocator, .argv = &.{ "git", "rev-parse", "HEAD" }, .cwd = b.pathFromRoot("llama.cpp") });
8180
const zig_version = builtin.zig_version_string;
82-
try b.build_root.handle.writeFile2(.{ .sub_path = "llama.cpp/common/build-info.cpp", .data = b.fmt(
81+
try b.build_root.handle.writeFile(.{ .sub_path = "llama.cpp/common/build-info.cpp", .data = b.fmt(
8382
\\int LLAMA_BUILD_NUMBER = {};
8483
\\char const *LLAMA_COMMIT = "{s}";
8584
\\char const *LLAMA_COMPILER = "Zig {s}";
@@ -108,13 +107,13 @@ fn build_lib_llama_cpp(params: BuildParams) !*std.Build.Step.Compile {
108107
const expand_metal = b.addExecutable(.{
109108
.name = "expand_metal",
110109
.target = target,
111-
.root_source_file = .{ .path = "tools/expand_metal.zig" },
110+
.root_source_file = .{ .src_path = .{ .owner = b, .sub_path = "tools/expand_metal.zig" } },
112111
});
113112
var run_expand_metal = b.addRunArtifact(expand_metal);
114113
run_expand_metal.addArg("--metal-file");
115-
run_expand_metal.addFileArg(.{ .path = "llama.cpp/ggml-metal.metal" });
114+
run_expand_metal.addFileArg(.{ .src_path = .{ .owner = b, .sub_path = "llama.cpp/ggml-metal.metal" } });
116115
run_expand_metal.addArg("--common-file");
117-
run_expand_metal.addFileArg(.{ .path = "llama.cpp/ggml-common.h" });
116+
run_expand_metal.addFileArg(.{ .src_path = .{ .owner = b, .sub_path = "llama.cpp/ggml-common.h" } });
118117
run_expand_metal.addArg("--output-file");
119118
const metal_expanded = run_expand_metal.addOutputFileArg("ggml-metal.metal");
120119
const install_metal = b.addInstallFileWithDir(metal_expanded, .lib, "ggml-metal.metal");
@@ -173,7 +172,7 @@ const ObjBuilder = struct {
173172
const obj = self.b.addObject(.{ .name = params.name, .target = self.target, .optimize = self.optimize });
174173
obj.addCSourceFiles(.{ .files = params.sources, .flags = self.flags.items });
175174
for (self.include_paths) |path| {
176-
obj.addIncludePath(.{ .path = path });
175+
obj.addIncludePath(.{ .src_path = .{ .owner = self.b, .sub_path = path } });
177176
}
178177
obj.linkLibC();
179178
obj.linkLibCpp();

‎godot/addons/godot-llama-cpp/plugin.gdextension

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ compatibility_minimum = "4.2"
55

66
[libraries]
77

8-
macos.debug = "res://addons/godot-llama-cpp/lib/libgodot-llama-cpp-aarch64-macos-none-ReleaseSafe.dylib"
8+
macos.debug = "res://addons/godot-llama-cpp/lib/libgodot-llama-cpp-aarch64-macos-none-Debug.dylib"
99
macos.release = "res://addons/godot-llama-cpp/lib/libgodot-llama-cpp-aarch64-macos-none-ReleaseSafe.dylib"
1010
windows.debug.x86_32 = "res://addons/godot-llama-cpp/lib/libgodot-llama-cpp.windows.template_debug.x86_32.dll"
1111
windows.release.x86_32 = "res://addons/godot-llama-cpp/lib/libgodot-llama-cpp.windows.template_release.x86_32.dll"

‎godot/examples/simple/simple.gd

+1-6
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,7 @@ func _on_text_edit_submit(input: String) -> void:
99
handle_input(input)
1010

1111
func handle_input(input: String) -> void:
12-
#var messages = [{ "sender": "system", "text": "You are a pirate chatbot who always responds in pirate speak!" }]
13-
14-
#var messages = [{ "sender": "system", "text": "You are a helpful chatbot assistant!" }]
15-
var messages = []
12+
var messages = [{ "sender": "system", "text": "You are a pirate chatbot who always responds in pirate speak!" }]
1613
messages.append_array(messages_container.get_children().filter(func(msg: Message): return msg.include_in_prompt).map(
1714
func(msg: Message) -> Dictionary:
1815
return { "text": msg.text, "sender": msg.sender }
@@ -35,8 +32,6 @@ func handle_input(input: String) -> void:
3532
ai_message.completion_id = completion_id
3633
ai_message.pending = true
3734
ai_message.grab_focus()
38-
39-
4035

4136
func _on_llama_context_completion_generated(chunk: Dictionary) -> void:
4237
var completion_id = chunk.id

‎godot/examples/simple/simple.tscn

+2-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
[ext_resource type="Script" path="res://examples/simple/simple.gd" id="1_sruc3"]
55
[ext_resource type="PackedScene" uid="uid://t862t0v8ht2q" path="res://examples/simple/message.tscn" id="2_7iip7"]
66
[ext_resource type="Script" path="res://examples/simple/TextEdit.gd" id="2_7usqw"]
7-
[ext_resource type="LlamaModel" path="res://models/meta-llama-3-8b-instruct.Q5_K_M.gguf" id="5_qov1l"]
7+
[ext_resource type="LlamaModel" path="res://models/Meta-Llama-3-8B-Instruct.Q4_K_M.gguf" id="5_yssjj"]
88

99
[node name="Node" type="Node"]
1010
script = ExtResource("1_sruc3")
@@ -68,8 +68,7 @@ icon = ExtResource("1_gjsev")
6868
expand_icon = true
6969

7070
[node name="LlamaContext" type="LlamaContext" parent="."]
71-
model = ExtResource("5_qov1l")
72-
temperature = 0.9
71+
model = ExtResource("5_yssjj")
7372
unique_name_in_owner = true
7473

7574
[connection signal="submit" from="Panel/MarginContainer/VBoxContainer/HBoxContainer/TextEdit" to="." method="_on_text_edit_submit"]

‎llama.cpp

‎src/llama_context.cpp

+20-10
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,10 @@ void LlamaContext::__thread_loop() {
162162

163163
for (size_t j = 0; j < sequence.size(); j++) {
164164
llama_batch_add(batch, sequence[j], j + curr_token_pos, { 0 }, false);
165-
curr_token_pos++;
166165
}
167166

167+
curr_token_pos += sequence.size();
168+
168169
if (i == sequences.size() - 1) {
169170
batch.logits[batch.n_tokens - 1] = true;
170171
}
@@ -175,6 +176,10 @@ void LlamaContext::__thread_loop() {
175176
}
176177
}
177178

179+
printf("Request tokens: %d\n", request_tokens.size());
180+
printf("Batch tokens: %d\n", batch.n_tokens);
181+
printf("Current token pos: %d\n", curr_token_pos);
182+
178183
if (decode_failed) {
179184
Dictionary response;
180185
response["id"] = req.id;
@@ -197,7 +202,10 @@ void LlamaContext::__thread_loop() {
197202

198203
context_tokens.push_back(new_token_id);
199204

200-
if (llama_token_is_eog(model->model, new_token_id) || curr_token_pos == n_len) {
205+
bool eog = llama_token_is_eog(model->model, new_token_id);
206+
bool curr_eq_n_len = curr_token_pos == n_len;
207+
208+
if (eog || curr_eq_n_len) {
201209
response["done"] = true;
202210
call_thread_safe("emit_signal", "completion_generated", response);
203211
break;
@@ -219,6 +227,8 @@ void LlamaContext::__thread_loop() {
219227
}
220228
}
221229

230+
llama_sampling_reset(sampling_ctx);
231+
222232
if (decode_failed) {
223233
Dictionary response;
224234
response["id"] = req.id;
@@ -281,31 +291,31 @@ void LlamaContext::set_n_len(int n_len) {
281291
}
282292

283293
float LlamaContext::get_temperature() {
284-
return sampling_params.temp;
294+
return sampling_params.temp;
285295
}
286296
void LlamaContext::set_temperature(float temperature) {
287-
sampling_params.temp = temperature;
297+
sampling_params.temp = temperature;
288298
}
289299

290300
float LlamaContext::get_top_p() {
291-
return sampling_params.top_p;
301+
return sampling_params.top_p;
292302
}
293303
void LlamaContext::set_top_p(float top_p) {
294-
sampling_params.top_p = top_p;
304+
sampling_params.top_p = top_p;
295305
}
296306

297307
float LlamaContext::get_frequency_penalty() {
298-
return sampling_params.penalty_freq;
308+
return sampling_params.penalty_freq;
299309
}
300310
void LlamaContext::set_frequency_penalty(float frequency_penalty) {
301-
sampling_params.penalty_freq = frequency_penalty;
311+
sampling_params.penalty_freq = frequency_penalty;
302312
}
303313

304314
float LlamaContext::get_presence_penalty() {
305-
return sampling_params.penalty_present;
315+
return sampling_params.penalty_present;
306316
}
307317
void LlamaContext::set_presence_penalty(float presence_penalty) {
308-
sampling_params.penalty_present = presence_penalty;
318+
sampling_params.penalty_present = presence_penalty;
309319
}
310320

311321
void LlamaContext::_exit_tree() {

0 commit comments

Comments
 (0)