Skip to content

Commit 241296e

Browse files
authored
Merge pull request #1625 from elizaOS/tcm-transcription-config
feat: Select a transcription provider based on the character settings.
2 parents 64cfc07 + bd66835 commit 241296e

File tree

2 files changed

+45
-9
lines changed

2 files changed

+45
-9
lines changed

packages/core/src/types.ts

+7
Original file line numberDiff line numberDiff line change
@@ -784,6 +784,7 @@ export type Character = {
784784
solana?: any[];
785785
[key: string]: any[];
786786
};
787+
transcription?: TranscriptionProvider;
787788
};
788789

789790
/** Optional client-specific config */
@@ -1312,3 +1313,9 @@ export interface ActionResponse {
13121313
export interface ISlackService extends Service {
13131314
client: any;
13141315
}
1316+
1317+
export enum TranscriptionProvider {
1318+
OpenAI = "openai",
1319+
Deepgram = "deepgram",
1320+
Local = "local",
1321+
}

packages/plugin-node/src/services/transcription.ts

+38-9
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import {
33
IAgentRuntime,
44
ITranscriptionService,
55
settings,
6+
TranscriptionProvider,
67
} from "@elizaos/core";
78
import { Service, ServiceType } from "@elizaos/core";
89
import { exec } from "child_process";
@@ -32,16 +33,39 @@ export class TranscriptionService
3233
private DEBUG_AUDIO_DIR: string;
3334
private TARGET_SAMPLE_RATE = 16000; // Common sample rate for speech recognition
3435
private isCudaAvailable: boolean = false;
36+
private transcriptionProvider: TranscriptionProvider;
37+
private deepgram: DeepgramClient | null = null;
3538
private openai: OpenAI | null = null;
36-
private deepgram?: DeepgramClient;
3739

3840
private queue: { audioBuffer: ArrayBuffer; resolve: Function }[] = [];
3941
private processing: boolean = false;
4042

4143
async initialize(_runtime: IAgentRuntime): Promise<void> {
4244
this.runtime = _runtime;
43-
const deepgramKey = this.runtime.getSetting("DEEPGRAM_API_KEY");
44-
this.deepgram = deepgramKey ? createClient(deepgramKey) : null;
45+
46+
let transcriptionProvider = TranscriptionProvider.Local;
47+
48+
switch (this.runtime.character.settings.transcription) {
49+
case TranscriptionProvider.Deepgram: {
50+
const deepgramKey = this.runtime.getSetting("DEEPGRAM_API_KEY");
51+
if (deepgramKey) {
52+
this.deepgram = createClient(deepgramKey);
53+
transcriptionProvider = TranscriptionProvider.Deepgram;
54+
}
55+
break;
56+
}
57+
case TranscriptionProvider.OpenAI: {
58+
const openAIKey = this.runtime.getSetting("OPENAI_API_KEY");
59+
if (openAIKey) {
60+
this.openai = new OpenAI({
61+
apiKey: openAIKey,
62+
});
63+
transcriptionProvider = TranscriptionProvider.OpenAI;
64+
}
65+
break;
66+
}
67+
}
68+
this.transcriptionProvider = transcriptionProvider;
4569
}
4670

4771
constructor() {
@@ -201,12 +225,17 @@ export class TranscriptionService
201225
while (this.queue.length > 0) {
202226
const { audioBuffer, resolve } = this.queue.shift()!;
203227
let result: string | null = null;
204-
if (this.deepgram) {
205-
result = await this.transcribeWithDeepgram(audioBuffer);
206-
} else if (this.openai) {
207-
result = await this.transcribeWithOpenAI(audioBuffer);
208-
} else {
209-
result = await this.transcribeLocally(audioBuffer);
228+
229+
switch (this.transcriptionProvider) {
230+
case TranscriptionProvider.Deepgram:
231+
result = await this.transcribeWithDeepgram(audioBuffer);
232+
break;
233+
case TranscriptionProvider.OpenAI:
234+
result = await this.transcribeWithOpenAI(audioBuffer);
235+
break;
236+
default:
237+
result = await this.transcribeLocally(audioBuffer);
238+
break;
210239
}
211240

212241
resolve(result);

0 commit comments

Comments
 (0)