@@ -370,6 +370,163 @@ export class DirectClient {
370
370
}
371
371
}
372
372
) ;
373
+
374
+ this . app . post ( "/:agentId/speak" , async ( req , res ) => {
375
+ const agentId = req . params . agentId ;
376
+ const roomId = stringToUuid ( req . body . roomId ?? "default-room-" + agentId ) ;
377
+ const userId = stringToUuid ( req . body . userId ?? "user" ) ;
378
+ const text = req . body . text ;
379
+
380
+ if ( ! text ) {
381
+ res . status ( 400 ) . send ( "No text provided" ) ;
382
+ return ;
383
+ }
384
+
385
+ let runtime = this . agents . get ( agentId ) ;
386
+
387
+ // if runtime is null, look for runtime with the same name
388
+ if ( ! runtime ) {
389
+ runtime = Array . from ( this . agents . values ( ) ) . find (
390
+ ( a ) => a . character . name . toLowerCase ( ) === agentId . toLowerCase ( )
391
+ ) ;
392
+ }
393
+
394
+ if ( ! runtime ) {
395
+ res . status ( 404 ) . send ( "Agent not found" ) ;
396
+ return ;
397
+ }
398
+
399
+ try {
400
+ // Process message through agent (same as /message endpoint)
401
+ await runtime . ensureConnection (
402
+ userId ,
403
+ roomId ,
404
+ req . body . userName ,
405
+ req . body . name ,
406
+ "direct"
407
+ ) ;
408
+
409
+ const messageId = stringToUuid ( Date . now ( ) . toString ( ) ) ;
410
+
411
+ const content : Content = {
412
+ text,
413
+ attachments : [ ] ,
414
+ source : "direct" ,
415
+ inReplyTo : undefined ,
416
+ } ;
417
+
418
+ const userMessage = {
419
+ content,
420
+ userId,
421
+ roomId,
422
+ agentId : runtime . agentId ,
423
+ } ;
424
+
425
+ const memory : Memory = {
426
+ id : messageId ,
427
+ agentId : runtime . agentId ,
428
+ userId,
429
+ roomId,
430
+ content,
431
+ createdAt : Date . now ( ) ,
432
+ } ;
433
+
434
+ await runtime . messageManager . createMemory ( memory ) ;
435
+
436
+ const state = await runtime . composeState ( userMessage , {
437
+ agentName : runtime . character . name ,
438
+ } ) ;
439
+
440
+ const context = composeContext ( {
441
+ state,
442
+ template : messageHandlerTemplate ,
443
+ } ) ;
444
+
445
+ const response = await generateMessageResponse ( {
446
+ runtime : runtime ,
447
+ context,
448
+ modelClass : ModelClass . LARGE ,
449
+ } ) ;
450
+
451
+ // save response to memory
452
+ const responseMessage = {
453
+ ...userMessage ,
454
+ userId : runtime . agentId ,
455
+ content : response ,
456
+ } ;
457
+
458
+ await runtime . messageManager . createMemory ( responseMessage ) ;
459
+
460
+ if ( ! response ) {
461
+ res . status ( 500 ) . send ( "No response from generateMessageResponse" ) ;
462
+ return ;
463
+ }
464
+
465
+ let message = null as Content | null ;
466
+
467
+ await runtime . evaluate ( memory , state ) ;
468
+
469
+ const _result = await runtime . processActions (
470
+ memory ,
471
+ [ responseMessage ] ,
472
+ state ,
473
+ async ( newMessages ) => {
474
+ message = newMessages ;
475
+ return [ memory ] ;
476
+ }
477
+ ) ;
478
+
479
+ // Get the text to convert to speech
480
+ const textToSpeak = response . text ;
481
+
482
+ // Convert to speech using ElevenLabs
483
+ const elevenLabsApiUrl = `https://api.elevenlabs.io/v1/text-to-speech/${ process . env . ELEVENLABS_VOICE_ID } ` ;
484
+ const apiKey = process . env . ELEVENLABS_XI_API_KEY ;
485
+
486
+ if ( ! apiKey ) {
487
+ throw new Error ( "ELEVENLABS_XI_API_KEY not configured" ) ;
488
+ }
489
+
490
+ const speechResponse = await fetch ( elevenLabsApiUrl , {
491
+ method : "POST" ,
492
+ headers : {
493
+ "Content-Type" : "application/json" ,
494
+ "xi-api-key" : apiKey ,
495
+ } ,
496
+ body : JSON . stringify ( {
497
+ text : textToSpeak ,
498
+ model_id : process . env . ELEVENLABS_MODEL_ID || "eleven_multilingual_v2" ,
499
+ voice_settings : {
500
+ stability : parseFloat ( process . env . ELEVENLABS_VOICE_STABILITY || "0.5" ) ,
501
+ similarity_boost : parseFloat ( process . env . ELEVENLABS_VOICE_SIMILARITY_BOOST || "0.9" ) ,
502
+ style : parseFloat ( process . env . ELEVENLABS_VOICE_STYLE || "0.66" ) ,
503
+ use_speaker_boost : process . env . ELEVENLABS_VOICE_USE_SPEAKER_BOOST === "true" ,
504
+ } ,
505
+ } ) ,
506
+ } ) ;
507
+
508
+ if ( ! speechResponse . ok ) {
509
+ throw new Error ( `ElevenLabs API error: ${ speechResponse . statusText } ` ) ;
510
+ }
511
+
512
+ const audioBuffer = await speechResponse . arrayBuffer ( ) ;
513
+
514
+ // Set appropriate headers for audio streaming
515
+ res . set ( {
516
+ 'Content-Type' : 'audio/mpeg' ,
517
+ 'Transfer-Encoding' : 'chunked'
518
+ } ) ;
519
+
520
+ res . send ( Buffer . from ( audioBuffer ) ) ;
521
+
522
+ } catch ( error ) {
523
+ console . error ( "Error processing message or generating speech:" , error ) ;
524
+ res . status ( 500 ) . json ( {
525
+ error : "Error processing message or generating speech" ,
526
+ details : error . message
527
+ } ) ;
528
+ }
529
+ } ) ;
373
530
}
374
531
375
532
// agent/src/index.ts:startAgent calls this
0 commit comments