@@ -30,6 +30,7 @@ void LlamaContext::_ready() {
30
30
return ;
31
31
}
32
32
33
+ ctx_params.seed = -1 ;
33
34
ctx_params.n_ctx = 2048 ;
34
35
int32_t n_threads = OS::get_singleton ()->get_processor_count ();
35
36
ctx_params.n_threads = n_threads;
@@ -45,9 +46,9 @@ void LlamaContext::_ready() {
45
46
46
47
Variant LlamaContext::request_completion (const String &prompt) {
47
48
UtilityFunctions::print (vformat (" %s: Requesting completion for prompt: %s" , __func__, prompt));
48
- if (task_id) {
49
- WorkerThreadPool::get_singleton ()->wait_for_task_completion (task_id);
50
- }
49
+ if (task_id) {
50
+ WorkerThreadPool::get_singleton ()->wait_for_task_completion (task_id);
51
+ }
51
52
task_id = WorkerThreadPool::get_singleton ()->add_task (Callable (this , " _fulfill_completion" ).bind (prompt));
52
53
return OK;
53
54
}
@@ -65,9 +66,12 @@ void LlamaContext::_fulfill_completion(const String &prompt) {
65
66
return ;
66
67
}
67
68
69
+ llama_batch batch = llama_batch_init (tokens_list.size (), 0 , 1 );
70
+
68
71
for (size_t i = 0 ; i < tokens_list.size (); i++) {
69
72
llama_batch_add (batch, tokens_list[i], i, { 0 }, false );
70
73
}
74
+
71
75
batch.logits [batch.n_tokens - 1 ] = true ;
72
76
73
77
int decode_res = llama_decode (ctx, batch);
@@ -79,6 +83,7 @@ void LlamaContext::_fulfill_completion(const String &prompt) {
79
83
int n_cur = batch.n_tokens ;
80
84
int n_decode = 0 ;
81
85
llama_model *llama_model = model->model ;
86
+
82
87
while (n_cur <= n_len) {
83
88
// sample the next token
84
89
{
@@ -121,9 +126,11 @@ void LlamaContext::_fulfill_completion(const String &prompt) {
121
126
int decode_res = llama_decode (ctx, batch);
122
127
if (decode_res != 0 ) {
123
128
UtilityFunctions::printerr (vformat (" %s: Failed to decode batch with error code: %d" , __func__, decode_res));
124
- return ;
129
+ break ;
125
130
}
126
131
}
132
+
133
+ llama_batch_free (batch);
127
134
}
128
135
129
136
void LlamaContext::set_model (const Ref<LlamaModel> p_model) {
@@ -138,7 +145,6 @@ LlamaContext::~LlamaContext() {
138
145
if (ctx) {
139
146
llama_free (ctx);
140
147
}
141
- llama_batch_free (batch);
142
148
if (task_id) {
143
149
WorkerThreadPool::get_singleton ()->wait_for_task_completion (task_id);
144
150
}
0 commit comments