Skip to content

Commit 296b6a8

Browse files
committedDec 13, 2024
integrate deepgram
1 parent 1f6013f commit 296b6a8

File tree

2 files changed

+34
-28
lines changed

2 files changed

+34
-28
lines changed
 

‎packages/client-discord/src/voice.ts

+3-25
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ import {
4747
discordVoiceHandlerTemplate,
4848
} from "./templates.ts";
4949
import { getWavHeader } from "./utils.ts";
50-
import { createClient, DeepgramClient } from "@deepgram/sdk";
5150

5251
// These values are chosen for compatibility with picovoice components
5352
const DECODE_FRAME_SIZE = 1024;
@@ -139,7 +138,6 @@ export class AudioMonitor {
139138
}
140139

141140
export class VoiceManager extends EventEmitter {
142-
private deepgram?: DeepgramClient;
143141
private processingVoice: boolean = false;
144142
private transcriptionTimeout: NodeJS.Timeout | null = null;
145143
private userStates: Map<
@@ -165,9 +163,6 @@ export class VoiceManager extends EventEmitter {
165163
super();
166164
this.client = client.client;
167165
this.runtime = client.runtime;
168-
169-
const deepgramKey = this.runtime.getSetting("DEEPGRAM_API_KEY");
170-
this.deepgram = deepgramKey ? createClient(deepgramKey) : null;
171166
}
172167

173168
async handleVoiceStateUpdate(oldState: VoiceState, newState: VoiceState) {
@@ -583,26 +578,9 @@ export class VoiceManager extends EventEmitter {
583578

584579
let transcriptionText: string;
585580

586-
if (this.deepgram) {
587-
const response =
588-
await this.deepgram.listen.prerecorded.transcribeFile(
589-
wavBuffer,
590-
{
591-
model: "nova-2",
592-
language: "en-US",
593-
smart_format: true,
594-
}
595-
);
596-
transcriptionText =
597-
response.result.results.channels[0].alternatives[0]
598-
.transcript;
599-
} else {
600-
transcriptionText = await this.runtime
601-
.getService<ITranscriptionService>(
602-
ServiceType.TRANSCRIPTION
603-
)
604-
.transcribe(wavBuffer);
605-
}
581+
transcriptionText = await this.runtime
582+
.getService<ITranscriptionService>(ServiceType.TRANSCRIPTION)
583+
.transcribe(wavBuffer);
606584

607585
function isValidTranscription(text: string): boolean {
608586
if (!text || text.includes("[BLANK_AUDIO]")) return false;

‎packages/plugin-node/src/services/transcription.ts

+31-3
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import os from "os";
1414
import path from "path";
1515
import { fileURLToPath } from "url";
1616
import { promisify } from "util";
17+
import { createClient, DeepgramClient } from "@deepgram/sdk";
1718

1819
// const __dirname = path.dirname(new URL(import.meta.url).pathname); #compatibility issues with windows
1920
const __filename = fileURLToPath(import.meta.url);
@@ -25,17 +26,23 @@ export class TranscriptionService
2526
extends Service
2627
implements ITranscriptionService
2728
{
29+
private runtime: IAgentRuntime | null = null;
2830
static serviceType: ServiceType = ServiceType.TRANSCRIPTION;
2931
private CONTENT_CACHE_DIR: string;
3032
private DEBUG_AUDIO_DIR: string;
3133
private TARGET_SAMPLE_RATE = 16000; // Common sample rate for speech recognition
3234
private isCudaAvailable: boolean = false;
3335
private openai: OpenAI | null = null;
36+
private deepgram?: DeepgramClient;
3437

3538
private queue: { audioBuffer: ArrayBuffer; resolve: Function }[] = [];
3639
private processing: boolean = false;
3740

38-
async initialize(_runtime: IAgentRuntime): Promise<void> {}
41+
async initialize(_runtime: IAgentRuntime): Promise<void> {
42+
this.runtime = _runtime;
43+
const deepgramKey = this.runtime.getSetting("DEEPGRAM_API_KEY");
44+
this.deepgram = deepgramKey ? createClient(deepgramKey) : null;
45+
}
3946

4047
constructor() {
4148
super();
@@ -194,8 +201,12 @@ export class TranscriptionService
194201
while (this.queue.length > 0) {
195202
const { audioBuffer, resolve } = this.queue.shift()!;
196203
let result: string | null = null;
197-
198-
if (this.openai) {
204+
if (this.deepgram) {
205+
console.log(
206+
"%%%%%%%&&&&%%%%%%%&&&&%%%%%%%&&&&%%%%%%%&&&&%%%%%%%&&&&%%%%%%%&&&&%%%%%%%&&&&%%%%%%%&&&&%%%%%%%&&&&%%%%%%%&&&&%%%%%%%&&&&"
207+
);
208+
result = await this.transcribeWithDeepgram(audioBuffer);
209+
} else if (this.openai) {
199210
result = await this.transcribeWithOpenAI(audioBuffer);
200211
} else {
201212
result = await this.transcribeLocally(audioBuffer);
@@ -207,6 +218,23 @@ export class TranscriptionService
207218
this.processing = false;
208219
}
209220

221+
private async transcribeWithDeepgram(
222+
audioBuffer: ArrayBuffer
223+
): Promise<string | null> {
224+
const buffer = Buffer.from(audioBuffer);
225+
const response = await this.deepgram.listen.prerecorded.transcribeFile(
226+
buffer,
227+
{
228+
model: "nova-2",
229+
language: "en-US",
230+
smart_format: true,
231+
}
232+
);
233+
const result =
234+
response.result.results.channels[0].alternatives[0].transcript;
235+
return result;
236+
}
237+
210238
private async transcribeWithOpenAI(
211239
audioBuffer: ArrayBuffer
212240
): Promise<string | null> {

0 commit comments

Comments
 (0)