@@ -15,6 +15,7 @@ import fs from "fs";
15
15
import https from "https" ;
16
16
import si from "systeminformation" ;
17
17
import { wordsToPunish } from "./wordsToPunish.ts" ;
18
+ import { prettyConsole } from "../index.ts" ;
18
19
19
20
const __dirname = path . dirname ( fileURLToPath ( import . meta. url ) ) ;
20
21
@@ -67,28 +68,25 @@ class LlamaService {
67
68
private modelInitialized : boolean = false ;
68
69
69
70
private constructor ( ) {
70
- console . log ( "Constructing" ) ;
71
71
this . llama = undefined ;
72
72
this . model = undefined ;
73
73
this . modelUrl =
74
74
"https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B-GGUF/resolve/main/Hermes-3-Llama-3.1-8B.Q8_0.gguf?download=true" ;
75
75
const modelName = "model.gguf" ;
76
- console . log ( "modelName" , modelName ) ;
77
76
this . modelPath = path . join ( __dirname , modelName ) ;
78
- try {
79
- this . initializeModel ( ) ;
80
- } catch ( error ) {
81
- console . error ( "Error initializing model" , error ) ;
77
+
78
+ }
79
+ private async ensureInitialized ( ) {
80
+ if ( ! this . modelInitialized ) {
81
+ await this . initializeModel ( ) ;
82
82
}
83
83
}
84
-
85
84
public static getInstance ( ) : LlamaService {
86
85
if ( ! LlamaService . instance ) {
87
86
LlamaService . instance = new LlamaService ( ) ;
88
87
}
89
88
return LlamaService . instance ;
90
89
}
91
-
92
90
async initializeModel ( ) {
93
91
try {
94
92
await this . checkModel ( ) ;
@@ -99,30 +97,26 @@ class LlamaService {
99
97
) ;
100
98
101
99
if ( hasCUDA ) {
102
- console . log ( "**** CUDA detected" ) ;
100
+ console . log ( "**** LlamaService: CUDA detected" ) ;
103
101
} else {
104
- console . log (
105
- "**** No CUDA detected - local response will be slow"
102
+ console . warn (
103
+ "**** LlamaService: No CUDA detected - local response will be slow"
106
104
) ;
107
105
}
108
106
109
107
this . llama = await getLlama ( {
110
108
gpu : "cuda" ,
111
109
} ) ;
112
- console . log ( "Creating grammar" ) ;
113
110
const grammar = new LlamaJsonSchemaGrammar (
114
111
this . llama ,
115
112
jsonSchemaGrammar as GbnfJsonSchema
116
113
) ;
117
114
this . grammar = grammar ;
118
- console . log ( "Loading model" ) ;
119
- console . log ( "this.modelPath" , this . modelPath ) ;
120
115
121
116
this . model = await this . llama . loadModel ( {
122
117
modelPath : this . modelPath ,
123
118
} ) ;
124
- console . log ( "Model GPU support" , this . llama . getGpuDeviceNames ( ) ) ;
125
- console . log ( "Creating context" ) ;
119
+
126
120
this . ctx = await this . model . createContext ( { contextSize : 8192 } ) ;
127
121
this . sequence = this . ctx . getSequence ( ) ;
128
122
@@ -139,11 +133,7 @@ class LlamaService {
139
133
}
140
134
141
135
async checkModel ( ) {
142
- console . log ( "Checking model" ) ;
143
136
if ( ! fs . existsSync ( this . modelPath ) ) {
144
- console . log ( "this.modelPath" , this . modelPath ) ;
145
- console . log ( "Model not found. Downloading..." ) ;
146
-
147
137
await new Promise < void > ( ( resolve , reject ) => {
148
138
const file = fs . createWriteStream ( this . modelPath ) ;
149
139
let downloadedSize = 0 ;
@@ -157,14 +147,9 @@ class LlamaService {
157
147
if ( isRedirect ) {
158
148
const redirectUrl = response . headers . location ;
159
149
if ( redirectUrl ) {
160
- console . log (
161
- "Following redirect to:" ,
162
- redirectUrl
163
- ) ;
164
150
downloadModel ( redirectUrl ) ;
165
151
return ;
166
152
} else {
167
- console . error ( "Redirect URL not found" ) ;
168
153
reject ( new Error ( "Redirect URL not found" ) ) ;
169
154
return ;
170
155
}
@@ -191,7 +176,6 @@ class LlamaService {
191
176
192
177
response . on ( "end" , ( ) => {
193
178
file . end ( ) ;
194
- console . log ( "\nModel downloaded successfully." ) ;
195
179
resolve ( ) ;
196
180
} ) ;
197
181
} )
@@ -211,14 +195,13 @@ class LlamaService {
211
195
} ) ;
212
196
} ) ;
213
197
} else {
214
- console . log ( "Model already exists." ) ;
198
+ prettyConsole . warn ( "Model already exists." ) ;
215
199
}
216
200
}
217
201
218
202
async deleteModel ( ) {
219
203
if ( fs . existsSync ( this . modelPath ) ) {
220
204
fs . unlinkSync ( this . modelPath ) ;
221
- console . log ( "Model deleted." ) ;
222
205
}
223
206
}
224
207
@@ -230,7 +213,7 @@ class LlamaService {
230
213
presence_penalty : number ,
231
214
max_tokens : number
232
215
) : Promise < any > {
233
- console . log ( "Queueing message generateText" ) ;
216
+ await this . ensureInitialized ( ) ;
234
217
return new Promise ( ( resolve , reject ) => {
235
218
this . messageQueue . push ( {
236
219
context,
@@ -255,13 +238,15 @@ class LlamaService {
255
238
presence_penalty : number ,
256
239
max_tokens : number
257
240
) : Promise < string > {
241
+ await this . ensureInitialized ( ) ;
242
+
258
243
return new Promise ( ( resolve , reject ) => {
259
244
this . messageQueue . push ( {
260
245
context,
261
246
temperature,
262
247
stop,
263
- frequency_penalty,
264
- presence_penalty,
248
+ frequency_penalty : frequency_penalty ?? 1.0 ,
249
+ presence_penalty : presence_penalty ?? 1.0 ,
265
250
max_tokens,
266
251
useGrammar : false ,
267
252
resolve,
@@ -286,7 +271,6 @@ class LlamaService {
286
271
const message = this . messageQueue . shift ( ) ;
287
272
if ( message ) {
288
273
try {
289
- console . log ( "Processing message" ) ;
290
274
const response = await this . getCompletionResponse (
291
275
message . context ,
292
276
message . temperature ,
@@ -334,7 +318,7 @@ class LlamaService {
334
318
} ;
335
319
336
320
const responseTokens : Token [ ] = [ ] ;
337
- console . log ( "Evaluating tokens" ) ;
321
+
338
322
for await ( const token of this . sequence . evaluate ( tokens , {
339
323
temperature : Number ( temperature ) ,
340
324
repeatPenalty : repeatPenalty ,
@@ -374,7 +358,6 @@ class LlamaService {
374
358
// try parsing response as JSON
375
359
try {
376
360
jsonString = JSON . stringify ( JSON . parse ( response ) ) ;
377
- console . log ( "parsedResponse" , jsonString ) ;
378
361
} catch {
379
362
throw new Error ( "JSON string not found" ) ;
380
363
}
@@ -384,20 +367,19 @@ class LlamaService {
384
367
if ( ! parsedResponse ) {
385
368
throw new Error ( "Parsed response is undefined" ) ;
386
369
}
387
- console . log ( "AI: " + parsedResponse . content ) ;
388
370
await this . sequence . clearHistory ( ) ;
389
371
return parsedResponse ;
390
372
} catch ( error ) {
391
373
console . error ( "Error parsing JSON:" , error ) ;
392
374
}
393
375
} else {
394
- console . log ( "AI: " + response ) ;
395
376
await this . sequence . clearHistory ( ) ;
396
377
return response ;
397
378
}
398
379
}
399
380
400
381
async getEmbeddingResponse ( input : string ) : Promise < number [ ] | undefined > {
382
+ await this . ensureInitialized ( ) ;
401
383
if ( ! this . model ) {
402
384
throw new Error ( "Model not initialized. Call initialize() first." ) ;
403
385
}
@@ -409,3 +391,4 @@ class LlamaService {
409
391
}
410
392
411
393
export default LlamaService ;
394
+
0 commit comments