1
1
// src/plugins/SttTtsPlugin.ts
2
2
3
- import { spawn } from ' child_process' ;
4
- import { ITranscriptionService } from ' @elizaos/core' ;
5
- import { Space , JanusClient , AudioDataWithUser } from ' agent-twitter-client' ;
3
+ import { spawn } from " child_process" ;
4
+ import { ITranscriptionService } from " @elizaos/core" ;
5
+ import { Space , JanusClient , AudioDataWithUser } from " agent-twitter-client" ;
6
6
7
7
interface PluginConfig {
8
8
openAiApiKey ?: string ; // for STT & ChatGPT
@@ -14,7 +14,7 @@ interface PluginConfig {
14
14
elevenLabsModel ?: string ; // e.g. "eleven_monolingual_v1"
15
15
systemPrompt ?: string ; // ex. "You are a helpful AI assistant"
16
16
chatContext ?: Array < {
17
- role : ' system' | ' user' | ' assistant' ;
17
+ role : " system" | " user" | " assistant" ;
18
18
content : string ;
19
19
} > ;
20
20
transcriptionService : ITranscriptionService ;
@@ -33,12 +33,12 @@ export class SttTtsPlugin implements Plugin {
33
33
private openAiApiKey ?: string ;
34
34
private elevenLabsApiKey ?: string ;
35
35
36
- private gptModel = ' gpt-3.5-turbo' ;
37
- private voiceId = ' 21m00Tcm4TlvDq8ikWAM' ;
38
- private elevenLabsModel = ' eleven_monolingual_v1' ;
39
- private systemPrompt = ' You are a helpful AI assistant.' ;
36
+ private gptModel = " gpt-3.5-turbo" ;
37
+ private voiceId = " 21m00Tcm4TlvDq8ikWAM" ;
38
+ private elevenLabsModel = " eleven_monolingual_v1" ;
39
+ private systemPrompt = " You are a helpful AI assistant." ;
40
40
private chatContext : Array < {
41
- role : ' system' | ' user' | ' assistant' ;
41
+ role : " system" | " user" | " assistant" ;
42
42
content : string ;
43
43
} > = [ ] ;
44
44
@@ -63,24 +63,26 @@ export class SttTtsPlugin implements Plugin {
63
63
private ttsQueue : string [ ] = [ ] ;
64
64
private isSpeaking = false ;
65
65
66
- onAttach ( space : Space ) {
67
- console . log ( ' [SttTtsPlugin] onAttach => space was attached' ) ;
66
+ onAttach ( _space : Space ) {
67
+ console . log ( " [SttTtsPlugin] onAttach => space was attached" ) ;
68
68
}
69
69
70
70
init ( params : { space : Space ; pluginConfig ?: Record < string , any > } ) : void {
71
71
console . log (
72
- ' [SttTtsPlugin] init => Space fully ready. Subscribing to events.' ,
72
+ " [SttTtsPlugin] init => Space fully ready. Subscribing to events."
73
73
) ;
74
74
75
75
this . space = params . space ;
76
- this . janus = ( this . space as any ) ?. janusClient as JanusClient | undefined ;
76
+ this . janus = ( this . space as any ) ?. janusClient as
77
+ | JanusClient
78
+ | undefined ;
77
79
78
80
const config = params . pluginConfig as PluginConfig ;
79
81
this . openAiApiKey = config ?. openAiApiKey ;
80
82
this . elevenLabsApiKey = config ?. elevenLabsApiKey ;
81
83
this . transcriptionService = config . transcriptionService ;
82
84
if ( config ?. gptModel ) this . gptModel = config . gptModel ;
83
- if ( typeof config ?. silenceThreshold === ' number' ) {
85
+ if ( typeof config ?. silenceThreshold === " number" ) {
84
86
this . silenceThreshold = config . silenceThreshold ;
85
87
}
86
88
if ( config ?. voiceId ) {
@@ -95,24 +97,24 @@ export class SttTtsPlugin implements Plugin {
95
97
if ( config ?. chatContext ) {
96
98
this . chatContext = config . chatContext ;
97
99
}
98
- console . log ( ' [SttTtsPlugin] Plugin config =>' , config ) ;
100
+ console . log ( " [SttTtsPlugin] Plugin config =>" , config ) ;
99
101
100
102
// Listen for mute events
101
103
this . space . on (
102
- ' muteStateChanged' ,
104
+ " muteStateChanged" ,
103
105
( evt : { userId : string ; muted : boolean } ) => {
104
- console . log ( ' [SttTtsPlugin] Speaker muteStateChanged =>' , evt ) ;
106
+ console . log ( " [SttTtsPlugin] Speaker muteStateChanged =>" , evt ) ;
105
107
if ( evt . muted ) {
106
108
this . handleMute ( evt . userId ) . catch ( ( err ) =>
107
- console . error ( ' [SttTtsPlugin] handleMute error =>' , err ) ,
109
+ console . error ( " [SttTtsPlugin] handleMute error =>" , err )
108
110
) ;
109
111
} else {
110
112
this . speakerUnmuted . set ( evt . userId , true ) ;
111
113
if ( ! this . pcmBuffers . has ( evt . userId ) ) {
112
114
this . pcmBuffers . set ( evt . userId , [ ] ) ;
113
115
}
114
116
}
115
- } ,
117
+ }
116
118
) ;
117
119
}
118
120
@@ -157,22 +159,22 @@ export class SttTtsPlugin implements Plugin {
157
159
const view = new DataView ( buffer ) ;
158
160
159
161
// RIFF chunk descriptor
160
- this . writeString ( view , 0 , ' RIFF' ) ;
162
+ this . writeString ( view , 0 , " RIFF" ) ;
161
163
view . setUint32 ( 4 , 36 + dataSize , true ) ; // file size - 8
162
- this . writeString ( view , 8 , ' WAVE' ) ;
164
+ this . writeString ( view , 8 , " WAVE" ) ;
163
165
164
166
// fmt sub-chunk
165
- this . writeString ( view , 12 , ' fmt ' ) ;
166
- view . setUint32 ( 16 , 16 , true ) ; // Subchunk1Size (16 for PCM)
167
- view . setUint16 ( 20 , 1 , true ) ; // AudioFormat (1 = PCM)
167
+ this . writeString ( view , 12 , " fmt " ) ;
168
+ view . setUint32 ( 16 , 16 , true ) ; // Subchunk1Size (16 for PCM)
169
+ view . setUint16 ( 20 , 1 , true ) ; // AudioFormat (1 = PCM)
168
170
view . setUint16 ( 22 , numChannels , true ) ; // NumChannels
169
- view . setUint32 ( 24 , sampleRate , true ) ; // SampleRate
170
- view . setUint32 ( 28 , byteRate , true ) ; // ByteRate
171
- view . setUint16 ( 32 , blockAlign , true ) ; // BlockAlign
172
- view . setUint16 ( 34 , 16 , true ) ; // BitsPerSample (16)
171
+ view . setUint32 ( 24 , sampleRate , true ) ; // SampleRate
172
+ view . setUint32 ( 28 , byteRate , true ) ; // ByteRate
173
+ view . setUint16 ( 32 , blockAlign , true ) ; // BlockAlign
174
+ view . setUint16 ( 34 , 16 , true ) ; // BitsPerSample (16)
173
175
174
176
// data sub-chunk
175
- this . writeString ( view , 36 , ' data' ) ;
177
+ this . writeString ( view , 36 , " data" ) ;
176
178
view . setUint32 ( 40 , dataSize , true ) ;
177
179
178
180
// Write PCM samples
@@ -199,11 +201,11 @@ export class SttTtsPlugin implements Plugin {
199
201
this . pcmBuffers . set ( userId , [ ] ) ;
200
202
201
203
if ( ! chunks . length ) {
202
- console . log ( ' [SttTtsPlugin] No audio chunks for user =>' , userId ) ;
204
+ console . log ( " [SttTtsPlugin] No audio chunks for user =>" , userId ) ;
203
205
return ;
204
206
}
205
207
console . log (
206
- `[SttTtsPlugin] Flushing STT buffer for user=${ userId } , chunks=${ chunks . length } ` ,
208
+ `[SttTtsPlugin] Flushing STT buffer for user=${ userId } , chunks=${ chunks . length } `
207
209
) ;
208
210
209
211
const totalLen = chunks . reduce ( ( acc , c ) => acc + c . length , 0 ) ;
@@ -221,14 +223,19 @@ export class SttTtsPlugin implements Plugin {
221
223
const sttText = await this . transcriptionService . transcribe ( wavBuffer ) ;
222
224
223
225
if ( ! sttText || ! sttText . trim ( ) ) {
224
- console . log ( '[SttTtsPlugin] No speech recognized for user =>' , userId ) ;
226
+ console . log (
227
+ "[SttTtsPlugin] No speech recognized for user =>" ,
228
+ userId
229
+ ) ;
225
230
return ;
226
231
}
227
232
console . log ( `[SttTtsPlugin] STT => user=${ userId } , text="${ sttText } "` ) ;
228
233
229
234
// GPT answer
230
235
const replyText = await this . askChatGPT ( sttText ) ;
231
- console . log ( `[SttTtsPlugin] GPT => user=${ userId } , reply="${ replyText } "` ) ;
236
+ console . log (
237
+ `[SttTtsPlugin] GPT => user=${ userId } , reply="${ replyText } "`
238
+ ) ;
232
239
233
240
// Use the standard speak method with queue
234
241
await this . speakText ( replyText ) ;
@@ -242,7 +249,7 @@ export class SttTtsPlugin implements Plugin {
242
249
if ( ! this . isSpeaking ) {
243
250
this . isSpeaking = true ;
244
251
this . processTtsQueue ( ) . catch ( ( err ) => {
245
- console . error ( ' [SttTtsPlugin] processTtsQueue error =>' , err ) ;
252
+ console . error ( " [SttTtsPlugin] processTtsQueue error =>" , err ) ;
246
253
} ) ;
247
254
}
248
255
}
@@ -260,7 +267,7 @@ export class SttTtsPlugin implements Plugin {
260
267
const pcm = await this . convertMp3ToPcm ( ttsAudio , 48000 ) ;
261
268
await this . streamToJanus ( pcm , 48000 ) ;
262
269
} catch ( err ) {
263
- console . error ( ' [SttTtsPlugin] TTS streaming error =>' , err ) ;
270
+ console . error ( " [SttTtsPlugin] TTS streaming error =>" , err ) ;
264
271
}
265
272
}
266
273
this . isSpeaking = false ;
@@ -271,20 +278,20 @@ export class SttTtsPlugin implements Plugin {
271
278
*/
272
279
private async askChatGPT ( userText : string ) : Promise < string > {
273
280
if ( ! this . openAiApiKey ) {
274
- throw new Error ( ' [SttTtsPlugin] No OpenAI API key for ChatGPT' ) ;
281
+ throw new Error ( " [SttTtsPlugin] No OpenAI API key for ChatGPT" ) ;
275
282
}
276
- const url = ' https://api.openai.com/v1/chat/completions' ;
283
+ const url = " https://api.openai.com/v1/chat/completions" ;
277
284
const messages = [
278
- { role : ' system' , content : this . systemPrompt } ,
285
+ { role : " system" , content : this . systemPrompt } ,
279
286
...this . chatContext ,
280
- { role : ' user' , content : userText } ,
287
+ { role : " user" , content : userText } ,
281
288
] ;
282
289
283
290
const resp = await fetch ( url , {
284
- method : ' POST' ,
291
+ method : " POST" ,
285
292
headers : {
286
293
Authorization : `Bearer ${ this . openAiApiKey } ` ,
287
- ' Content-Type' : ' application/json' ,
294
+ " Content-Type" : " application/json" ,
288
295
} ,
289
296
body : JSON . stringify ( {
290
297
model : this . gptModel ,
@@ -295,14 +302,14 @@ export class SttTtsPlugin implements Plugin {
295
302
if ( ! resp . ok ) {
296
303
const errText = await resp . text ( ) ;
297
304
throw new Error (
298
- `[SttTtsPlugin] ChatGPT error => ${ resp . status } ${ errText } ` ,
305
+ `[SttTtsPlugin] ChatGPT error => ${ resp . status } ${ errText } `
299
306
) ;
300
307
}
301
308
302
309
const json = await resp . json ( ) ;
303
- const reply = json . choices ?. [ 0 ] ?. message ?. content || '' ;
304
- this . chatContext . push ( { role : ' user' , content : userText } ) ;
305
- this . chatContext . push ( { role : ' assistant' , content : reply } ) ;
310
+ const reply = json . choices ?. [ 0 ] ?. message ?. content || "" ;
311
+ this . chatContext . push ( { role : " user" , content : userText } ) ;
312
+ this . chatContext . push ( { role : " assistant" , content : reply } ) ;
306
313
return reply . trim ( ) ;
307
314
}
308
315
@@ -311,14 +318,14 @@ export class SttTtsPlugin implements Plugin {
311
318
*/
312
319
private async elevenLabsTts ( text : string ) : Promise < Buffer > {
313
320
if ( ! this . elevenLabsApiKey ) {
314
- throw new Error ( ' [SttTtsPlugin] No ElevenLabs API key' ) ;
321
+ throw new Error ( " [SttTtsPlugin] No ElevenLabs API key" ) ;
315
322
}
316
323
const url = `https://api.elevenlabs.io/v1/text-to-speech/${ this . voiceId } ` ;
317
324
const resp = await fetch ( url , {
318
- method : ' POST' ,
325
+ method : " POST" ,
319
326
headers : {
320
- ' Content-Type' : ' application/json' ,
321
- ' xi-api-key' : this . elevenLabsApiKey ,
327
+ " Content-Type" : " application/json" ,
328
+ " xi-api-key" : this . elevenLabsApiKey ,
322
329
} ,
323
330
body : JSON . stringify ( {
324
331
text,
@@ -329,7 +336,7 @@ export class SttTtsPlugin implements Plugin {
329
336
if ( ! resp . ok ) {
330
337
const errText = await resp . text ( ) ;
331
338
throw new Error (
332
- `[SttTtsPlugin] ElevenLabs TTS error => ${ resp . status } ${ errText } ` ,
339
+ `[SttTtsPlugin] ElevenLabs TTS error => ${ resp . status } ${ errText } `
333
340
) ;
334
341
}
335
342
const arrayBuf = await resp . arrayBuffer ( ) ;
@@ -341,37 +348,37 @@ export class SttTtsPlugin implements Plugin {
341
348
*/
342
349
private convertMp3ToPcm (
343
350
mp3Buf : Buffer ,
344
- outRate : number ,
351
+ outRate : number
345
352
) : Promise < Int16Array > {
346
353
return new Promise ( ( resolve , reject ) => {
347
- const ff = spawn ( ' ffmpeg' , [
348
- '-i' ,
349
- ' pipe:0' ,
350
- '-f' ,
351
- ' s16le' ,
352
- ' -ar' ,
354
+ const ff = spawn ( " ffmpeg" , [
355
+ "-i" ,
356
+ " pipe:0" ,
357
+ "-f" ,
358
+ " s16le" ,
359
+ " -ar" ,
353
360
outRate . toString ( ) ,
354
- ' -ac' ,
355
- '1' ,
356
- ' pipe:1' ,
361
+ " -ac" ,
362
+ "1" ,
363
+ " pipe:1" ,
357
364
] ) ;
358
365
let raw = Buffer . alloc ( 0 ) ;
359
366
360
- ff . stdout . on ( ' data' , ( chunk : Buffer ) => {
367
+ ff . stdout . on ( " data" , ( chunk : Buffer ) => {
361
368
raw = Buffer . concat ( [ raw , chunk ] ) ;
362
369
} ) ;
363
- ff . stderr . on ( ' data' , ( ) => {
370
+ ff . stderr . on ( " data" , ( ) => {
364
371
// ignoring ffmpeg logs
365
372
} ) ;
366
- ff . on ( ' close' , ( code ) => {
373
+ ff . on ( " close" , ( code ) => {
367
374
if ( code !== 0 ) {
368
375
reject ( new Error ( `ffmpeg error code=${ code } ` ) ) ;
369
376
return ;
370
377
}
371
378
const samples = new Int16Array (
372
379
raw . buffer ,
373
380
raw . byteOffset ,
374
- raw . byteLength / 2 ,
381
+ raw . byteLength / 2
375
382
) ;
376
383
resolve ( samples ) ;
377
384
} ) ;
@@ -387,7 +394,7 @@ export class SttTtsPlugin implements Plugin {
387
394
*/
388
395
private async streamToJanus (
389
396
samples : Int16Array ,
390
- sampleRate : number ,
397
+ sampleRate : number
391
398
) : Promise < void > {
392
399
// TODO: Check if better than 480 fixed
393
400
const FRAME_SIZE = Math . floor ( sampleRate * 0.01 ) ; // 10ms frames => 480 @48kHz
@@ -408,25 +415,25 @@ export class SttTtsPlugin implements Plugin {
408
415
409
416
public setSystemPrompt ( prompt : string ) {
410
417
this . systemPrompt = prompt ;
411
- console . log ( ' [SttTtsPlugin] setSystemPrompt =>' , prompt ) ;
418
+ console . log ( " [SttTtsPlugin] setSystemPrompt =>" , prompt ) ;
412
419
}
413
420
414
421
/**
415
422
* Change the GPT model at runtime (e.g. "gpt-4", "gpt-3.5-turbo", etc.).
416
423
*/
417
424
public setGptModel ( model : string ) {
418
425
this . gptModel = model ;
419
- console . log ( ' [SttTtsPlugin] setGptModel =>' , model ) ;
426
+ console . log ( " [SttTtsPlugin] setGptModel =>" , model ) ;
420
427
}
421
428
422
429
/**
423
430
* Add a message (system, user or assistant) to the chat context.
424
431
* E.g. to store conversation history or inject a persona.
425
432
*/
426
- public addMessage ( role : ' system' | ' user' | ' assistant' , content : string ) {
433
+ public addMessage ( role : " system" | " user" | " assistant" , content : string ) {
427
434
this . chatContext . push ( { role, content } ) ;
428
435
console . log (
429
- `[SttTtsPlugin] addMessage => role=${ role } , content=${ content } ` ,
436
+ `[SttTtsPlugin] addMessage => role=${ role } , content=${ content } `
430
437
) ;
431
438
}
432
439
@@ -435,11 +442,11 @@ export class SttTtsPlugin implements Plugin {
435
442
*/
436
443
public clearChatContext ( ) {
437
444
this . chatContext = [ ] ;
438
- console . log ( ' [SttTtsPlugin] clearChatContext => done' ) ;
445
+ console . log ( " [SttTtsPlugin] clearChatContext => done" ) ;
439
446
}
440
447
441
448
cleanup ( ) : void {
442
- console . log ( ' [SttTtsPlugin] cleanup => releasing resources' ) ;
449
+ console . log ( " [SttTtsPlugin] cleanup => releasing resources" ) ;
443
450
this . pcmBuffers . clear ( ) ;
444
451
this . speakerUnmuted . clear ( ) ;
445
452
this . ttsQueue = [ ] ;
0 commit comments