Merge branch 'main' of https://github.com/hazelnutcloud/godot-llama-cpp

hazelnutcloud · hazelnutcloud · commit a24339160398 · 2024-03-15T13:00:54.000+08:00
diff --git a/build.zig b/build.zig
@@ -39,7 +39,6 @@ pub fn build(b: *std.Build) !void {
     const lib_godot_gen_sources = try findFilesRecursive(b, "godot_cpp/gen/src", &cfiles_exts);
     lib_godot.addCSourceFiles(.{ .files = lib_godot_gen_sources, .flags = &.{ "-std=c++17", "-fno-exceptions" } });
     lib_godot.addCSourceFiles(.{ .files = lib_godot_sources, .flags = &.{ "-std=c++17", "-fno-exceptions" } });
-    // try objs.append(lib_godot);
 
     // llama.cpp
     const commit_hash = try std.ChildProcess.run(.{ .allocator = b.allocator, .argv = &.{ "git", "rev-parse", "HEAD" }, .cwd = b.pathFromRoot("llama.cpp") });
@@ -54,7 +53,13 @@ pub fn build(b: *std.Build) !void {
 
     var flags = std.ArrayList([]const u8).init(b.allocator);
     if (target.result.abi != .msvc) try flags.append("-D_GNU_SOURCE");
-    if (target.result.os.tag == .macos) try flags.appendSlice(&.{ "-D_DARWIN_C_SOURCE", "-DGGML_USE_METAL", "-DGGML_USE_ACCELERATE", "-DACCELERATE_USE_LAPACK", "-DACCELERATE_LAPACK_ILP64" }) else try flags.append("-DGGML_USE_VULKAN");
+    if (target.result.os.tag == .macos) try flags.appendSlice(&.{
+        "-D_DARWIN_C_SOURCE",
+        "-DGGML_USE_METAL",
+        "-DGGML_USE_ACCELERATE",
+        "-DACCELERATE_USE_LAPACK",
+        "-DACCELERATE_LAPACK_ILP64",
+    }) else try flags.append("-DGGML_USE_VULKAN");
     try flags.append("-D_XOPEN_SOURCE=600");
 
     var cflags = std.ArrayList([]const u8).init(b.allocator);
@@ -200,6 +205,17 @@ pub fn build(b: *std.Build) !void {
             .link_lib_cpp = false,
             .flags = cflags.items,
         });
+        const airCommand = b.addSystemCommand(&.{ "xcrun", "-sdk", "macosx", "metal", "-O3", "-c" });
+        airCommand.addFileArg(.{ .path = "llama.cpp/ggml-metal.metal" });
+        airCommand.addArg("-o");
+        const air = airCommand.addOutputFileArg("ggml-metal.air");
+
+        const libCommand = b.addSystemCommand(&.{ "xcrun", "-sdk", "macosx", "metallib" });
+        libCommand.addFileArg(air);
+        libCommand.addArg("-o");
+        const lib = libCommand.addOutputFileArg("default.metallib");
+        const libInstall = b.addInstallLibFile(lib, "default.metallib");
+        b.getInstallStep().dependOn(&libInstall.step);
         try objs.append(ggml_metal);
     } else {
         const ggml_vulkan = buildObj(.{
@@ -235,7 +251,8 @@ pub fn build(b: *std.Build) !void {
         extension.linkFramework("MetalKit");
         extension.linkFramework("Foundation");
         extension.linkFramework("Accelerate");
-        b.installFile("llama.cpp/ggml-metal.metal", b.pathJoin(&.{ std.fs.path.basename(b.lib_dir), "ggml-metal.metal" }));
+        // b.installFile("llama.cpp/ggml-metal.metal", b.pathJoin(&.{ std.fs.path.basename(b.lib_dir), "ggml-metal.metal" }));
+        // b.installFile("llama.cpp/ggml-common.h", b.pathJoin(&.{ std.fs.path.basename(b.lib_dir), "ggml-common.h" }));
     } else {
         if (target.result.os.tag == .windows) {
             const vk_path = b.graph.env_map.get("VK_SDK_PATH") orelse @panic("VK_SDK_PATH not set");
diff --git a/godot/main.gd b/godot/main.gd
@@ -7,10 +7,6 @@ extends Node
 func _on_button_pressed():
 	handle_submit()
 	
-#func _unhandled_key_input(event: InputEvent) -> void:
-	#if (event.is_action_released("submit_form") and input.has_focus()):
-		#handle_submit()
-	
 func handle_submit():
 	print(input.text)
 	Llama.request_completion(input.text)
diff --git a/godot/main.tscn b/godot/main.tscn
@@ -57,11 +57,12 @@ texture = ExtResource("1_ojdoj")
 expand_mode = 4
 
 [node name="ScrollContainer" type="ScrollContainer" parent="CenterContainer/VBoxContainer"]
-custom_minimum_size = Vector2(0, 60)
+custom_minimum_size = Vector2(2.08165e-12, 150)
 layout_mode = 2
 horizontal_scroll_mode = 0
 
 [node name="Panel" type="PanelContainer" parent="CenterContainer/VBoxContainer/ScrollContainer"]
+custom_minimum_size = Vector2(2.08165e-12, 2.08165e-12)
 layout_mode = 2
 size_flags_horizontal = 3
 size_flags_vertical = 3
@@ -74,7 +75,7 @@ theme_override_constants/margin_right = 20
 
 [node name="Output" type="Label" parent="CenterContainer/VBoxContainer/ScrollContainer/Panel/MarginContainer"]
 unique_name_in_owner = true
-custom_minimum_size = Vector2(200, 0)
+custom_minimum_size = Vector2(200, 2.08165e-12)
 layout_mode = 2
 theme_override_colors/font_color = Color(0.101961, 0.0823529, 0.0627451, 1)
 text = "Ask me anything!"
diff --git a/src/llama_context.cpp b/src/llama_context.cpp
@@ -14,11 +14,41 @@ void LlamaContext::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("set_model", "model"), &LlamaContext::set_model);
 	ClassDB::bind_method(D_METHOD("get_model"), &LlamaContext::get_model);
 	ClassDB::add_property("LlamaContext", PropertyInfo(Variant::OBJECT, "model", PROPERTY_HINT_RESOURCE_TYPE, "LlamaModel"), "set_model", "get_model");
+
+  ClassDB::bind_method(D_METHOD("get_seed"), &LlamaContext::get_seed);
+  ClassDB::bind_method(D_METHOD("set_seed", "seed"), &LlamaContext::set_seed);
+  ClassDB::add_property("LlamaContext", PropertyInfo(Variant::INT, "seed"), "set_seed", "get_seed");
+
+  ClassDB::bind_method(D_METHOD("get_n_ctx"), &LlamaContext::get_n_ctx);
+  ClassDB::bind_method(D_METHOD("set_n_ctx", "n_ctx"), &LlamaContext::set_n_ctx);
+  ClassDB::add_property("LlamaContext", PropertyInfo(Variant::INT, "n_ctx"), "set_n_ctx", "get_n_ctx");
+
+  ClassDB::bind_method(D_METHOD("get_n_threads"), &LlamaContext::get_n_threads);
+  ClassDB::bind_method(D_METHOD("set_n_threads", "n_threads"), &LlamaContext::set_n_threads);
+  ClassDB::add_property("LlamaContext", PropertyInfo(Variant::INT, "n_threads"), "set_n_threads", "get_n_threads");
+
+  ClassDB::bind_method(D_METHOD("get_n_threads_batch"), &LlamaContext::get_n_threads_batch);
+  ClassDB::bind_method(D_METHOD("set_n_threads_batch", "n_threads_batch"), &LlamaContext::set_n_threads_batch);
+  ClassDB::add_property("LlamaContext", PropertyInfo(Variant::INT, "n_threads_batch"), "set_n_threads_batch", "get_n_threads_batch");
+
 	ClassDB::bind_method(D_METHOD("request_completion", "prompt"), &LlamaContext::request_completion);
 	ClassDB::bind_method(D_METHOD("_fulfill_completion", "prompt"), &LlamaContext::_fulfill_completion);
+
 	ADD_SIGNAL(MethodInfo("completion_generated", PropertyInfo(Variant::STRING, "completion"), PropertyInfo(Variant::BOOL, "is_final")));
 }
 
+LlamaContext::LlamaContext() {
+	batch = llama_batch_init(4096, 0, 1);
+
+	ctx_params = llama_context_default_params();
+	ctx_params.seed = -1;
+	ctx_params.n_ctx = 4096;
+
+	int32_t n_threads = OS::get_singleton()->get_processor_count();
+	ctx_params.n_threads = n_threads;
+	ctx_params.n_threads_batch = n_threads;
+}
+
 void LlamaContext::_ready() {
 	// TODO: remove this and use runtime classes once godot 4.3 lands, see https://github.com/godotengine/godot/pull/82554
 	if (Engine::get_singleton()->is_editor_hint()) {
@@ -30,12 +60,6 @@ void LlamaContext::_ready() {
 		return;
 	}
 
-	ctx_params.seed = -1;
-	ctx_params.n_ctx = 4096;
-	int32_t n_threads = OS::get_singleton()->get_processor_count();
-	ctx_params.n_threads = n_threads;
-	ctx_params.n_threads_batch = n_threads;
-
 	ctx = llama_new_context_with_model(model->model, ctx_params);
 	if (ctx == NULL) {
 		UtilityFunctions::printerr(vformat("%s: Failed to initialize llama context, null ctx", __func__));
@@ -44,6 +68,14 @@ void LlamaContext::_ready() {
 	UtilityFunctions::print(vformat("%s: Context initialized", __func__));
 }
 
+PackedStringArray LlamaContext::_get_configuration_warnings() const {
+  PackedStringArray warnings;
+  if (model == NULL) {
+    warnings.push_back("Model resource property not defined");
+  }
+  return warnings;
+}
+
 Variant LlamaContext::request_completion(const String &prompt) {
 	UtilityFunctions::print(vformat("%s: Requesting completion for prompt: %s", __func__, prompt));
 	if (task_id) {
@@ -134,11 +166,38 @@ void LlamaContext::_fulfill_completion(const String &prompt) {
 void LlamaContext::set_model(const Ref<LlamaModel> p_model) {
 	model = p_model;
 }
-
 Ref<LlamaModel> LlamaContext::get_model() {
 	return model;
 }
 
+int LlamaContext::get_seed() {
+	return ctx_params.seed;
+}
+void LlamaContext::set_seed(int seed) {
+	ctx_params.seed = seed;
+}
+
+int LlamaContext::get_n_ctx() {
+	return ctx_params.n_ctx;
+}
+void LlamaContext::set_n_ctx(int n_ctx) {
+	ctx_params.n_ctx = n_ctx;
+}
+
+int LlamaContext::get_n_threads() {
+	return ctx_params.n_threads;
+}
+void LlamaContext::set_n_threads(int n_threads) {
+	ctx_params.n_threads = n_threads;
+}
+
+int LlamaContext::get_n_threads_batch() {
+	return ctx_params.n_threads_batch;
+}
+void LlamaContext::set_n_threads_batch(int n_threads_batch) {
+	ctx_params.n_threads_batch = n_threads_batch;
+}
+
 LlamaContext::~LlamaContext() {
 	if (ctx) {
 		llama_free(ctx);
diff --git a/src/llama_context.h b/src/llama_context.h
@@ -12,8 +12,8 @@ class LlamaContext : public Node {
 private:
 	Ref<LlamaModel> model;
 	llama_context *ctx = nullptr;
-	llama_context_params ctx_params = llama_context_default_params();
-	llama_batch batch = llama_batch_init(4096, 0, 1);
+	llama_context_params ctx_params;
+	llama_batch batch;
 	int task_id;
 
 protected:
@@ -22,9 +22,22 @@ class LlamaContext : public Node {
 public:
 	void set_model(const Ref<LlamaModel> model);
 	Ref<LlamaModel> get_model();
+
 	Variant request_completion(const String &prompt);
 	void _fulfill_completion(const String &prompt);
+
+  int get_seed();
+  void set_seed(int seed);
+  int get_n_ctx();
+  void set_n_ctx(int n_ctx);
+  int get_n_threads();
+  void set_n_threads(int n_threads);
+  int get_n_threads_batch();
+  void set_n_threads_batch(int n_threads_batch);
+
+  virtual PackedStringArray _get_configuration_warnings() const override;
 	virtual void _ready() override;
+  LlamaContext();
 	~LlamaContext();
 };
 } //namespace godot
diff --git a/src/llama_model.cpp b/src/llama_model.cpp
@@ -5,22 +5,39 @@
 
 using namespace godot;
 
+void LlamaModel::_bind_methods() {
+	ClassDB::bind_method(D_METHOD("load_model", "path"), &LlamaModel::load_model);
+
+	ClassDB::bind_method(D_METHOD("get_n_gpu_layers"), &LlamaModel::get_n_gpu_layers);
+	ClassDB::bind_method(D_METHOD("set_n_gpu_layers", "n"), &LlamaModel::set_n_gpu_layers);
+	ClassDB::add_property("LlamaModel", PropertyInfo(Variant::INT, "n_gpu_layers"), "set_n_gpu_layers", "get_n_gpu_layers");
+}
+
+LlamaModel::LlamaModel() {
+	model_params = llama_model_default_params();
+}
+
 void LlamaModel::load_model(const String &path) {
 	if (model) {
 		llama_free_model(model);
 	}
-	llama_model_params model_params = llama_model_default_params();
-  model_params.n_gpu_layers = 99; // offload all layers to the GPU
+
 	model = llama_load_model_from_file(path.utf8().get_data(), model_params);
+
 	if (model == NULL) {
 		UtilityFunctions::printerr(vformat("%s: Unable to load model from %s", __func__, path));
 		return;
 	}
+
 	UtilityFunctions::print(vformat("%s: Model loaded from %s", __func__, path));
 }
 
-void LlamaModel::_bind_methods() {
-	ClassDB::bind_method(D_METHOD("load_model", "path"), &LlamaModel::load_model);
+int LlamaModel::get_n_gpu_layers() {
+	return model_params.n_gpu_layers;
+}
+
+void LlamaModel::set_n_gpu_layers(int n) {
+	model_params.n_gpu_layers = n;
 }
 
 LlamaModel::~LlamaModel() {
diff --git a/src/llama_model.h b/src/llama_model.h
@@ -6,17 +6,25 @@
 
 namespace godot {
 
-	class LlamaModel : public Resource {
-		GDCLASS(LlamaModel, Resource)
+class LlamaModel : public Resource {
+	GDCLASS(LlamaModel, Resource)
 
-	protected:
-		static void _bind_methods();
+private:
+	llama_model_params model_params;
 
-	public:
-    llama_model *model = nullptr;
-		void load_model( const String &path );
-    ~LlamaModel();
-	};
+protected:
+	static void _bind_methods();
+
+public:
+	llama_model *model = nullptr;
+	void load_model(const String &path);
+
+	int get_n_gpu_layers();
+	void set_n_gpu_layers(int n);
+
+	LlamaModel();
+	~LlamaModel();
+};
 
 } //namespace godot