Skip to content

Commit a243391

Browse files
committed
2 parents c7cbd01 + 4b8a1e9 commit a243391

7 files changed

+142
-31
lines changed

build.zig

+20-3
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ pub fn build(b: *std.Build) !void {
3939
const lib_godot_gen_sources = try findFilesRecursive(b, "godot_cpp/gen/src", &cfiles_exts);
4040
lib_godot.addCSourceFiles(.{ .files = lib_godot_gen_sources, .flags = &.{ "-std=c++17", "-fno-exceptions" } });
4141
lib_godot.addCSourceFiles(.{ .files = lib_godot_sources, .flags = &.{ "-std=c++17", "-fno-exceptions" } });
42-
// try objs.append(lib_godot);
4342

4443
// llama.cpp
4544
const commit_hash = try std.ChildProcess.run(.{ .allocator = b.allocator, .argv = &.{ "git", "rev-parse", "HEAD" }, .cwd = b.pathFromRoot("llama.cpp") });
@@ -54,7 +53,13 @@ pub fn build(b: *std.Build) !void {
5453

5554
var flags = std.ArrayList([]const u8).init(b.allocator);
5655
if (target.result.abi != .msvc) try flags.append("-D_GNU_SOURCE");
57-
if (target.result.os.tag == .macos) try flags.appendSlice(&.{ "-D_DARWIN_C_SOURCE", "-DGGML_USE_METAL", "-DGGML_USE_ACCELERATE", "-DACCELERATE_USE_LAPACK", "-DACCELERATE_LAPACK_ILP64" }) else try flags.append("-DGGML_USE_VULKAN");
56+
if (target.result.os.tag == .macos) try flags.appendSlice(&.{
57+
"-D_DARWIN_C_SOURCE",
58+
"-DGGML_USE_METAL",
59+
"-DGGML_USE_ACCELERATE",
60+
"-DACCELERATE_USE_LAPACK",
61+
"-DACCELERATE_LAPACK_ILP64",
62+
}) else try flags.append("-DGGML_USE_VULKAN");
5863
try flags.append("-D_XOPEN_SOURCE=600");
5964

6065
var cflags = std.ArrayList([]const u8).init(b.allocator);
@@ -200,6 +205,17 @@ pub fn build(b: *std.Build) !void {
200205
.link_lib_cpp = false,
201206
.flags = cflags.items,
202207
});
208+
const airCommand = b.addSystemCommand(&.{ "xcrun", "-sdk", "macosx", "metal", "-O3", "-c" });
209+
airCommand.addFileArg(.{ .path = "llama.cpp/ggml-metal.metal" });
210+
airCommand.addArg("-o");
211+
const air = airCommand.addOutputFileArg("ggml-metal.air");
212+
213+
const libCommand = b.addSystemCommand(&.{ "xcrun", "-sdk", "macosx", "metallib" });
214+
libCommand.addFileArg(air);
215+
libCommand.addArg("-o");
216+
const lib = libCommand.addOutputFileArg("default.metallib");
217+
const libInstall = b.addInstallLibFile(lib, "default.metallib");
218+
b.getInstallStep().dependOn(&libInstall.step);
203219
try objs.append(ggml_metal);
204220
} else {
205221
const ggml_vulkan = buildObj(.{
@@ -235,7 +251,8 @@ pub fn build(b: *std.Build) !void {
235251
extension.linkFramework("MetalKit");
236252
extension.linkFramework("Foundation");
237253
extension.linkFramework("Accelerate");
238-
b.installFile("llama.cpp/ggml-metal.metal", b.pathJoin(&.{ std.fs.path.basename(b.lib_dir), "ggml-metal.metal" }));
254+
// b.installFile("llama.cpp/ggml-metal.metal", b.pathJoin(&.{ std.fs.path.basename(b.lib_dir), "ggml-metal.metal" }));
255+
// b.installFile("llama.cpp/ggml-common.h", b.pathJoin(&.{ std.fs.path.basename(b.lib_dir), "ggml-common.h" }));
239256
} else {
240257
if (target.result.os.tag == .windows) {
241258
const vk_path = b.graph.env_map.get("VK_SDK_PATH") orelse @panic("VK_SDK_PATH not set");

godot/main.gd

-4
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,6 @@ extends Node
77
func _on_button_pressed():
88
handle_submit()
99

10-
#func _unhandled_key_input(event: InputEvent) -> void:
11-
#if (event.is_action_released("submit_form") and input.has_focus()):
12-
#handle_submit()
13-
1410
func handle_submit():
1511
print(input.text)
1612
Llama.request_completion(input.text)

godot/main.tscn

+3-2
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,12 @@ texture = ExtResource("1_ojdoj")
5757
expand_mode = 4
5858

5959
[node name="ScrollContainer" type="ScrollContainer" parent="CenterContainer/VBoxContainer"]
60-
custom_minimum_size = Vector2(0, 60)
60+
custom_minimum_size = Vector2(2.08165e-12, 150)
6161
layout_mode = 2
6262
horizontal_scroll_mode = 0
6363

6464
[node name="Panel" type="PanelContainer" parent="CenterContainer/VBoxContainer/ScrollContainer"]
65+
custom_minimum_size = Vector2(2.08165e-12, 2.08165e-12)
6566
layout_mode = 2
6667
size_flags_horizontal = 3
6768
size_flags_vertical = 3
@@ -74,7 +75,7 @@ theme_override_constants/margin_right = 20
7475

7576
[node name="Output" type="Label" parent="CenterContainer/VBoxContainer/ScrollContainer/Panel/MarginContainer"]
7677
unique_name_in_owner = true
77-
custom_minimum_size = Vector2(200, 0)
78+
custom_minimum_size = Vector2(200, 2.08165e-12)
7879
layout_mode = 2
7980
theme_override_colors/font_color = Color(0.101961, 0.0823529, 0.0627451, 1)
8081
text = "Ask me anything!"

src/llama_context.cpp

+66-7
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,41 @@ void LlamaContext::_bind_methods() {
1414
ClassDB::bind_method(D_METHOD("set_model", "model"), &LlamaContext::set_model);
1515
ClassDB::bind_method(D_METHOD("get_model"), &LlamaContext::get_model);
1616
ClassDB::add_property("LlamaContext", PropertyInfo(Variant::OBJECT, "model", PROPERTY_HINT_RESOURCE_TYPE, "LlamaModel"), "set_model", "get_model");
17+
18+
ClassDB::bind_method(D_METHOD("get_seed"), &LlamaContext::get_seed);
19+
ClassDB::bind_method(D_METHOD("set_seed", "seed"), &LlamaContext::set_seed);
20+
ClassDB::add_property("LlamaContext", PropertyInfo(Variant::INT, "seed"), "set_seed", "get_seed");
21+
22+
ClassDB::bind_method(D_METHOD("get_n_ctx"), &LlamaContext::get_n_ctx);
23+
ClassDB::bind_method(D_METHOD("set_n_ctx", "n_ctx"), &LlamaContext::set_n_ctx);
24+
ClassDB::add_property("LlamaContext", PropertyInfo(Variant::INT, "n_ctx"), "set_n_ctx", "get_n_ctx");
25+
26+
ClassDB::bind_method(D_METHOD("get_n_threads"), &LlamaContext::get_n_threads);
27+
ClassDB::bind_method(D_METHOD("set_n_threads", "n_threads"), &LlamaContext::set_n_threads);
28+
ClassDB::add_property("LlamaContext", PropertyInfo(Variant::INT, "n_threads"), "set_n_threads", "get_n_threads");
29+
30+
ClassDB::bind_method(D_METHOD("get_n_threads_batch"), &LlamaContext::get_n_threads_batch);
31+
ClassDB::bind_method(D_METHOD("set_n_threads_batch", "n_threads_batch"), &LlamaContext::set_n_threads_batch);
32+
ClassDB::add_property("LlamaContext", PropertyInfo(Variant::INT, "n_threads_batch"), "set_n_threads_batch", "get_n_threads_batch");
33+
1734
ClassDB::bind_method(D_METHOD("request_completion", "prompt"), &LlamaContext::request_completion);
1835
ClassDB::bind_method(D_METHOD("_fulfill_completion", "prompt"), &LlamaContext::_fulfill_completion);
36+
1937
ADD_SIGNAL(MethodInfo("completion_generated", PropertyInfo(Variant::STRING, "completion"), PropertyInfo(Variant::BOOL, "is_final")));
2038
}
2139

40+
LlamaContext::LlamaContext() {
41+
batch = llama_batch_init(4096, 0, 1);
42+
43+
ctx_params = llama_context_default_params();
44+
ctx_params.seed = -1;
45+
ctx_params.n_ctx = 4096;
46+
47+
int32_t n_threads = OS::get_singleton()->get_processor_count();
48+
ctx_params.n_threads = n_threads;
49+
ctx_params.n_threads_batch = n_threads;
50+
}
51+
2252
void LlamaContext::_ready() {
2353
// TODO: remove this and use runtime classes once godot 4.3 lands, see https://github.com/godotengine/godot/pull/82554
2454
if (Engine::get_singleton()->is_editor_hint()) {
@@ -30,12 +60,6 @@ void LlamaContext::_ready() {
3060
return;
3161
}
3262

33-
ctx_params.seed = -1;
34-
ctx_params.n_ctx = 4096;
35-
int32_t n_threads = OS::get_singleton()->get_processor_count();
36-
ctx_params.n_threads = n_threads;
37-
ctx_params.n_threads_batch = n_threads;
38-
3963
ctx = llama_new_context_with_model(model->model, ctx_params);
4064
if (ctx == NULL) {
4165
UtilityFunctions::printerr(vformat("%s: Failed to initialize llama context, null ctx", __func__));
@@ -44,6 +68,14 @@ void LlamaContext::_ready() {
4468
UtilityFunctions::print(vformat("%s: Context initialized", __func__));
4569
}
4670

71+
PackedStringArray LlamaContext::_get_configuration_warnings() const {
72+
PackedStringArray warnings;
73+
if (model == NULL) {
74+
warnings.push_back("Model resource property not defined");
75+
}
76+
return warnings;
77+
}
78+
4779
Variant LlamaContext::request_completion(const String &prompt) {
4880
UtilityFunctions::print(vformat("%s: Requesting completion for prompt: %s", __func__, prompt));
4981
if (task_id) {
@@ -134,11 +166,38 @@ void LlamaContext::_fulfill_completion(const String &prompt) {
134166
void LlamaContext::set_model(const Ref<LlamaModel> p_model) {
135167
model = p_model;
136168
}
137-
138169
Ref<LlamaModel> LlamaContext::get_model() {
139170
return model;
140171
}
141172

173+
int LlamaContext::get_seed() {
174+
return ctx_params.seed;
175+
}
176+
void LlamaContext::set_seed(int seed) {
177+
ctx_params.seed = seed;
178+
}
179+
180+
int LlamaContext::get_n_ctx() {
181+
return ctx_params.n_ctx;
182+
}
183+
void LlamaContext::set_n_ctx(int n_ctx) {
184+
ctx_params.n_ctx = n_ctx;
185+
}
186+
187+
int LlamaContext::get_n_threads() {
188+
return ctx_params.n_threads;
189+
}
190+
void LlamaContext::set_n_threads(int n_threads) {
191+
ctx_params.n_threads = n_threads;
192+
}
193+
194+
int LlamaContext::get_n_threads_batch() {
195+
return ctx_params.n_threads_batch;
196+
}
197+
void LlamaContext::set_n_threads_batch(int n_threads_batch) {
198+
ctx_params.n_threads_batch = n_threads_batch;
199+
}
200+
142201
LlamaContext::~LlamaContext() {
143202
if (ctx) {
144203
llama_free(ctx);

src/llama_context.h

+15-2
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ class LlamaContext : public Node {
1212
private:
1313
Ref<LlamaModel> model;
1414
llama_context *ctx = nullptr;
15-
llama_context_params ctx_params = llama_context_default_params();
16-
llama_batch batch = llama_batch_init(4096, 0, 1);
15+
llama_context_params ctx_params;
16+
llama_batch batch;
1717
int task_id;
1818

1919
protected:
@@ -22,9 +22,22 @@ class LlamaContext : public Node {
2222
public:
2323
void set_model(const Ref<LlamaModel> model);
2424
Ref<LlamaModel> get_model();
25+
2526
Variant request_completion(const String &prompt);
2627
void _fulfill_completion(const String &prompt);
28+
29+
int get_seed();
30+
void set_seed(int seed);
31+
int get_n_ctx();
32+
void set_n_ctx(int n_ctx);
33+
int get_n_threads();
34+
void set_n_threads(int n_threads);
35+
int get_n_threads_batch();
36+
void set_n_threads_batch(int n_threads_batch);
37+
38+
virtual PackedStringArray _get_configuration_warnings() const override;
2739
virtual void _ready() override;
40+
LlamaContext();
2841
~LlamaContext();
2942
};
3043
} //namespace godot

src/llama_model.cpp

+21-4
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,39 @@
55

66
using namespace godot;
77

8+
void LlamaModel::_bind_methods() {
9+
ClassDB::bind_method(D_METHOD("load_model", "path"), &LlamaModel::load_model);
10+
11+
ClassDB::bind_method(D_METHOD("get_n_gpu_layers"), &LlamaModel::get_n_gpu_layers);
12+
ClassDB::bind_method(D_METHOD("set_n_gpu_layers", "n"), &LlamaModel::set_n_gpu_layers);
13+
ClassDB::add_property("LlamaModel", PropertyInfo(Variant::INT, "n_gpu_layers"), "set_n_gpu_layers", "get_n_gpu_layers");
14+
}
15+
16+
LlamaModel::LlamaModel() {
17+
model_params = llama_model_default_params();
18+
}
19+
820
void LlamaModel::load_model(const String &path) {
921
if (model) {
1022
llama_free_model(model);
1123
}
12-
llama_model_params model_params = llama_model_default_params();
13-
model_params.n_gpu_layers = 99; // offload all layers to the GPU
24+
1425
model = llama_load_model_from_file(path.utf8().get_data(), model_params);
26+
1527
if (model == NULL) {
1628
UtilityFunctions::printerr(vformat("%s: Unable to load model from %s", __func__, path));
1729
return;
1830
}
31+
1932
UtilityFunctions::print(vformat("%s: Model loaded from %s", __func__, path));
2033
}
2134

22-
void LlamaModel::_bind_methods() {
23-
ClassDB::bind_method(D_METHOD("load_model", "path"), &LlamaModel::load_model);
35+
int LlamaModel::get_n_gpu_layers() {
36+
return model_params.n_gpu_layers;
37+
}
38+
39+
void LlamaModel::set_n_gpu_layers(int n) {
40+
model_params.n_gpu_layers = n;
2441
}
2542

2643
LlamaModel::~LlamaModel() {

src/llama_model.h

+17-9
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,25 @@
66

77
namespace godot {
88

9-
class LlamaModel : public Resource {
10-
GDCLASS(LlamaModel, Resource)
9+
class LlamaModel : public Resource {
10+
GDCLASS(LlamaModel, Resource)
1111

12-
protected:
13-
static void _bind_methods();
12+
private:
13+
llama_model_params model_params;
1414

15-
public:
16-
llama_model *model = nullptr;
17-
void load_model( const String &path );
18-
~LlamaModel();
19-
};
15+
protected:
16+
static void _bind_methods();
17+
18+
public:
19+
llama_model *model = nullptr;
20+
void load_model(const String &path);
21+
22+
int get_n_gpu_layers();
23+
void set_n_gpu_layers(int n);
24+
25+
LlamaModel();
26+
~LlamaModel();
27+
};
2028

2129
} //namespace godot
2230

0 commit comments

Comments
 (0)