@@ -445,6 +445,163 @@ export class DirectClient {
445
445
}
446
446
}
447
447
) ;
448
+
449
+ this . app . post ( "/:agentId/speak" , async ( req , res ) => {
450
+ const agentId = req . params . agentId ;
451
+ const roomId = stringToUuid ( req . body . roomId ?? "default-room-" + agentId ) ;
452
+ const userId = stringToUuid ( req . body . userId ?? "user" ) ;
453
+ const text = req . body . text ;
454
+
455
+ if ( ! text ) {
456
+ res . status ( 400 ) . send ( "No text provided" ) ;
457
+ return ;
458
+ }
459
+
460
+ let runtime = this . agents . get ( agentId ) ;
461
+
462
+ // if runtime is null, look for runtime with the same name
463
+ if ( ! runtime ) {
464
+ runtime = Array . from ( this . agents . values ( ) ) . find (
465
+ ( a ) => a . character . name . toLowerCase ( ) === agentId . toLowerCase ( )
466
+ ) ;
467
+ }
468
+
469
+ if ( ! runtime ) {
470
+ res . status ( 404 ) . send ( "Agent not found" ) ;
471
+ return ;
472
+ }
473
+
474
+ try {
475
+ // Process message through agent (same as /message endpoint)
476
+ await runtime . ensureConnection (
477
+ userId ,
478
+ roomId ,
479
+ req . body . userName ,
480
+ req . body . name ,
481
+ "direct"
482
+ ) ;
483
+
484
+ const messageId = stringToUuid ( Date . now ( ) . toString ( ) ) ;
485
+
486
+ const content : Content = {
487
+ text,
488
+ attachments : [ ] ,
489
+ source : "direct" ,
490
+ inReplyTo : undefined ,
491
+ } ;
492
+
493
+ const userMessage = {
494
+ content,
495
+ userId,
496
+ roomId,
497
+ agentId : runtime . agentId ,
498
+ } ;
499
+
500
+ const memory : Memory = {
501
+ id : messageId ,
502
+ agentId : runtime . agentId ,
503
+ userId,
504
+ roomId,
505
+ content,
506
+ createdAt : Date . now ( ) ,
507
+ } ;
508
+
509
+ await runtime . messageManager . createMemory ( memory ) ;
510
+
511
+ const state = await runtime . composeState ( userMessage , {
512
+ agentName : runtime . character . name ,
513
+ } ) ;
514
+
515
+ const context = composeContext ( {
516
+ state,
517
+ template : messageHandlerTemplate ,
518
+ } ) ;
519
+
520
+ const response = await generateMessageResponse ( {
521
+ runtime : runtime ,
522
+ context,
523
+ modelClass : ModelClass . LARGE ,
524
+ } ) ;
525
+
526
+ // save response to memory
527
+ const responseMessage = {
528
+ ...userMessage ,
529
+ userId : runtime . agentId ,
530
+ content : response ,
531
+ } ;
532
+
533
+ await runtime . messageManager . createMemory ( responseMessage ) ;
534
+
535
+ if ( ! response ) {
536
+ res . status ( 500 ) . send ( "No response from generateMessageResponse" ) ;
537
+ return ;
538
+ }
539
+
540
+ let message = null as Content | null ;
541
+
542
+ await runtime . evaluate ( memory , state ) ;
543
+
544
+ const _result = await runtime . processActions (
545
+ memory ,
546
+ [ responseMessage ] ,
547
+ state ,
548
+ async ( newMessages ) => {
549
+ message = newMessages ;
550
+ return [ memory ] ;
551
+ }
552
+ ) ;
553
+
554
+ // Get the text to convert to speech
555
+ const textToSpeak = response . text ;
556
+
557
+ // Convert to speech using ElevenLabs
558
+ const elevenLabsApiUrl = `https://api.elevenlabs.io/v1/text-to-speech/${ process . env . ELEVENLABS_VOICE_ID } ` ;
559
+ const apiKey = process . env . ELEVENLABS_XI_API_KEY ;
560
+
561
+ if ( ! apiKey ) {
562
+ throw new Error ( "ELEVENLABS_XI_API_KEY not configured" ) ;
563
+ }
564
+
565
+ const speechResponse = await fetch ( elevenLabsApiUrl , {
566
+ method : "POST" ,
567
+ headers : {
568
+ "Content-Type" : "application/json" ,
569
+ "xi-api-key" : apiKey ,
570
+ } ,
571
+ body : JSON . stringify ( {
572
+ text : textToSpeak ,
573
+ model_id : process . env . ELEVENLABS_MODEL_ID || "eleven_multilingual_v2" ,
574
+ voice_settings : {
575
+ stability : parseFloat ( process . env . ELEVENLABS_VOICE_STABILITY || "0.5" ) ,
576
+ similarity_boost : parseFloat ( process . env . ELEVENLABS_VOICE_SIMILARITY_BOOST || "0.9" ) ,
577
+ style : parseFloat ( process . env . ELEVENLABS_VOICE_STYLE || "0.66" ) ,
578
+ use_speaker_boost : process . env . ELEVENLABS_VOICE_USE_SPEAKER_BOOST === "true" ,
579
+ } ,
580
+ } ) ,
581
+ } ) ;
582
+
583
+ if ( ! speechResponse . ok ) {
584
+ throw new Error ( `ElevenLabs API error: ${ speechResponse . statusText } ` ) ;
585
+ }
586
+
587
+ const audioBuffer = await speechResponse . arrayBuffer ( ) ;
588
+
589
+ // Set appropriate headers for audio streaming
590
+ res . set ( {
591
+ 'Content-Type' : 'audio/mpeg' ,
592
+ 'Transfer-Encoding' : 'chunked'
593
+ } ) ;
594
+
595
+ res . send ( Buffer . from ( audioBuffer ) ) ;
596
+
597
+ } catch ( error ) {
598
+ console . error ( "Error processing message or generating speech:" , error ) ;
599
+ res . status ( 500 ) . json ( {
600
+ error : "Error processing message or generating speech" ,
601
+ details : error . message
602
+ } ) ;
603
+ }
604
+ } ) ;
448
605
}
449
606
450
607
// agent/src/index.ts:startAgent calls this
0 commit comments