@@ -14,6 +14,7 @@ import os from "os";
14
14
import path from "path" ;
15
15
import { fileURLToPath } from "url" ;
16
16
import { promisify } from "util" ;
17
+ import { createClient , DeepgramClient } from "@deepgram/sdk" ;
17
18
18
19
// const __dirname = path.dirname(new URL(import.meta.url).pathname); #compatibility issues with windows
19
20
const __filename = fileURLToPath ( import . meta. url ) ;
@@ -25,17 +26,23 @@ export class TranscriptionService
25
26
extends Service
26
27
implements ITranscriptionService
27
28
{
29
+ private runtime : IAgentRuntime | null = null ;
28
30
static serviceType : ServiceType = ServiceType . TRANSCRIPTION ;
29
31
private CONTENT_CACHE_DIR : string ;
30
32
private DEBUG_AUDIO_DIR : string ;
31
33
private TARGET_SAMPLE_RATE = 16000 ; // Common sample rate for speech recognition
32
34
private isCudaAvailable : boolean = false ;
33
35
private openai : OpenAI | null = null ;
36
+ private deepgram ?: DeepgramClient ;
34
37
35
38
private queue : { audioBuffer : ArrayBuffer ; resolve : Function } [ ] = [ ] ;
36
39
private processing : boolean = false ;
37
40
38
- async initialize ( _runtime : IAgentRuntime ) : Promise < void > { }
41
+ async initialize ( _runtime : IAgentRuntime ) : Promise < void > {
42
+ this . runtime = _runtime ;
43
+ const deepgramKey = this . runtime . getSetting ( "DEEPGRAM_API_KEY" ) ;
44
+ this . deepgram = deepgramKey ? createClient ( deepgramKey ) : null ;
45
+ }
39
46
40
47
constructor ( ) {
41
48
super ( ) ;
@@ -194,8 +201,12 @@ export class TranscriptionService
194
201
while ( this . queue . length > 0 ) {
195
202
const { audioBuffer, resolve } = this . queue . shift ( ) ! ;
196
203
let result : string | null = null ;
197
-
198
- if ( this . openai ) {
204
+ if ( this . deepgram ) {
205
+ console . log (
206
+ "%%%%%%%&&&&%%%%%%%&&&&%%%%%%%&&&&%%%%%%%&&&&%%%%%%%&&&&%%%%%%%&&&&%%%%%%%&&&&%%%%%%%&&&&%%%%%%%&&&&%%%%%%%&&&&%%%%%%%&&&&"
207
+ ) ;
208
+ result = await this . transcribeWithDeepgram ( audioBuffer ) ;
209
+ } else if ( this . openai ) {
199
210
result = await this . transcribeWithOpenAI ( audioBuffer ) ;
200
211
} else {
201
212
result = await this . transcribeLocally ( audioBuffer ) ;
@@ -207,6 +218,23 @@ export class TranscriptionService
207
218
this . processing = false ;
208
219
}
209
220
221
+ private async transcribeWithDeepgram (
222
+ audioBuffer : ArrayBuffer
223
+ ) : Promise < string | null > {
224
+ const buffer = Buffer . from ( audioBuffer ) ;
225
+ const response = await this . deepgram . listen . prerecorded . transcribeFile (
226
+ buffer ,
227
+ {
228
+ model : "nova-2" ,
229
+ language : "en-US" ,
230
+ smart_format : true ,
231
+ }
232
+ ) ;
233
+ const result =
234
+ response . result . results . channels [ 0 ] . alternatives [ 0 ] . transcript ;
235
+ return result ;
236
+ }
237
+
210
238
private async transcribeWithOpenAI (
211
239
audioBuffer : ArrayBuffer
212
240
) : Promise < string | null > {
0 commit comments