@@ -21,6 +21,22 @@ void LlamaContext::_bind_methods() {
21
21
ClassDB::bind_method (D_METHOD (" set_seed" , " seed" ), &LlamaContext::set_seed);
22
22
ClassDB::add_property (" LlamaContext" , PropertyInfo (Variant::INT, " seed" ), " set_seed" , " get_seed" );
23
23
24
+ ClassDB::bind_method (D_METHOD (" get_temperature" ), &LlamaContext::get_temperature);
25
+ ClassDB::bind_method (D_METHOD (" set_temperature" , " temperature" ), &LlamaContext::set_temperature);
26
+ ClassDB::add_property (" LlamaContext" , PropertyInfo (Variant::FLOAT, " temperature" ), " set_temperature" , " get_temperature" );
27
+
28
+ ClassDB::bind_method (D_METHOD (" get_top_p" ), &LlamaContext::get_top_p);
29
+ ClassDB::bind_method (D_METHOD (" set_top_p" , " top_p" ), &LlamaContext::set_top_p);
30
+ ClassDB::add_property (" LlamaContext" , PropertyInfo (Variant::FLOAT, " top_p" ), " set_top_p" , " get_top_p" );
31
+
32
+ ClassDB::bind_method (D_METHOD (" get_frequency_penalty" ), &LlamaContext::get_frequency_penalty);
33
+ ClassDB::bind_method (D_METHOD (" set_frequency_penalty" , " frequency_penalty" ), &LlamaContext::set_frequency_penalty);
34
+ ClassDB::add_property (" LlamaContext" , PropertyInfo (Variant::FLOAT, " frequency_penalty" ), " set_frequency_penalty" , " get_frequency_penalty" );
35
+
36
+ ClassDB::bind_method (D_METHOD (" get_presence_penalty" ), &LlamaContext::get_presence_penalty);
37
+ ClassDB::bind_method (D_METHOD (" set_presence_penalty" , " presence_penalty" ), &LlamaContext::set_presence_penalty);
38
+ ClassDB::add_property (" LlamaContext" , PropertyInfo (Variant::FLOAT, " presence_penalty" ), " set_presence_penalty" , " get_presence_penalty" );
39
+
24
40
ClassDB::bind_method (D_METHOD (" get_n_ctx" ), &LlamaContext::get_n_ctx);
25
41
ClassDB::bind_method (D_METHOD (" set_n_ctx" , " n_ctx" ), &LlamaContext::set_n_ctx);
26
42
ClassDB::add_property (" LlamaContext" , PropertyInfo (Variant::INT, " n_ctx" ), " set_n_ctx" , " get_n_ctx" );
@@ -106,13 +122,13 @@ void LlamaContext::__thread_loop() {
106
122
shared_prefix_idx = std::min (context_tokens.size (), request_tokens.size ());
107
123
}
108
124
109
- bool rm_success = llama_kv_cache_seq_rm (ctx, 0 , shared_prefix_idx, -1 );
125
+ bool rm_success = llama_kv_cache_seq_rm (ctx, - 1 , shared_prefix_idx, -1 );
110
126
if (!rm_success) {
111
127
UtilityFunctions::printerr (vformat (" %s: Failed to remove tokens from kv cache" , __func__));
112
128
Dictionary response;
113
129
response[" id" ] = req.id ;
114
130
response[" error" ] = " Failed to remove tokens from kv cache" ;
115
- call_deferred (" emit_signal" , " completion_generated" , response);
131
+ call_thread_safe (" emit_signal" , " completion_generated" , response);
116
132
continue ;
117
133
}
118
134
context_tokens.erase (context_tokens.begin () + shared_prefix_idx, context_tokens.end ());
@@ -128,6 +144,14 @@ void LlamaContext::__thread_loop() {
128
144
sequences.push_back (std::vector<llama_token>(request_tokens.begin () + i, request_tokens.begin () + std::min (i + batch_size, request_tokens.size ())));
129
145
}
130
146
147
+ printf (" Request tokens: \n " );
148
+ for (auto sequence : sequences) {
149
+ for (auto token : sequence) {
150
+ printf (" %s" , llama_token_to_piece (ctx, token).c_str ());
151
+ }
152
+ }
153
+ printf (" \n " );
154
+
131
155
int curr_token_pos = context_tokens.size ();
132
156
bool decode_failed = false ;
133
157
@@ -155,7 +179,7 @@ void LlamaContext::__thread_loop() {
155
179
Dictionary response;
156
180
response[" id" ] = req.id ;
157
181
response[" error" ] = " llama_decode() failed" ;
158
- call_deferred (" emit_signal" , " completion_generated" , response);
182
+ call_thread_safe (" emit_signal" , " completion_generated" , response);
159
183
continue ;
160
184
}
161
185
@@ -171,17 +195,17 @@ void LlamaContext::__thread_loop() {
171
195
Dictionary response;
172
196
response[" id" ] = req.id ;
173
197
198
+ context_tokens.push_back (new_token_id);
199
+
174
200
if (llama_token_is_eog (model->model , new_token_id) || curr_token_pos == n_len) {
175
201
response[" done" ] = true ;
176
- call_deferred (" emit_signal" , " completion_generated" , response);
202
+ call_thread_safe (" emit_signal" , " completion_generated" , response);
177
203
break ;
178
204
}
179
205
180
- context_tokens.push_back (new_token_id);
181
-
182
206
response[" text" ] = llama_token_to_piece (ctx, new_token_id).c_str ();
183
207
response[" done" ] = false ;
184
- call_deferred (" emit_signal" , " completion_generated" , response);
208
+ call_thread_safe (" emit_signal" , " completion_generated" , response);
185
209
186
210
llama_batch_clear (batch);
187
211
@@ -199,11 +223,9 @@ void LlamaContext::__thread_loop() {
199
223
Dictionary response;
200
224
response[" id" ] = req.id ;
201
225
response[" error" ] = " llama_decode() failed" ;
202
- call_deferred (" emit_signal" , " completion_generated" , response);
226
+ call_thread_safe (" emit_signal" , " completion_generated" , response);
203
227
continue ;
204
228
}
205
-
206
- llama_sampling_reset (sampling_ctx);
207
229
}
208
230
}
209
231
@@ -258,6 +280,34 @@ void LlamaContext::set_n_len(int n_len) {
258
280
this ->n_len = n_len;
259
281
}
260
282
283
+ float LlamaContext::get_temperature () {
284
+ return sampling_params.temp ;
285
+ }
286
+ void LlamaContext::set_temperature (float temperature) {
287
+ sampling_params.temp = temperature;
288
+ }
289
+
290
+ float LlamaContext::get_top_p () {
291
+ return sampling_params.top_p ;
292
+ }
293
+ void LlamaContext::set_top_p (float top_p) {
294
+ sampling_params.top_p = top_p;
295
+ }
296
+
297
+ float LlamaContext::get_frequency_penalty () {
298
+ return sampling_params.penalty_freq ;
299
+ }
300
+ void LlamaContext::set_frequency_penalty (float frequency_penalty) {
301
+ sampling_params.penalty_freq = frequency_penalty;
302
+ }
303
+
304
+ float LlamaContext::get_presence_penalty () {
305
+ return sampling_params.penalty_present ;
306
+ }
307
+ void LlamaContext::set_presence_penalty (float presence_penalty) {
308
+ sampling_params.penalty_present = presence_penalty;
309
+ }
310
+
261
311
void LlamaContext::_exit_tree () {
262
312
if (Engine::get_singleton ()->is_editor_hint ()) {
263
313
return ;
@@ -275,5 +325,6 @@ void LlamaContext::_exit_tree() {
275
325
llama_free (ctx);
276
326
}
277
327
328
+ llama_sampling_free (sampling_ctx);
278
329
llama_backend_free ();
279
330
}
0 commit comments