Skip to content

Commit cfd1f48

Browse files
bfonteswtfsayo
andauthored
plugin-tts: enhance TTS generation flow and caching (#2506)
* refactor(tts-generation): enhance TTS generation flow and caching * refactor(tts-generation): add error handling for language detection --------- Co-authored-by: Sayo <hi@sayo.wtf>
1 parent adace37 commit cfd1f48

File tree

1 file changed

+63
-87
lines changed

1 file changed

+63
-87
lines changed

packages/plugin-tts/src/index.ts

+63-87
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import * as fs from "fs";
1414
import { Buffer } from "buffer";
1515
import * as path from "path";
1616
import * as process from "process";
17-
import { detect } from 'langdetect';
17+
import { detect } from 'langdetect';
1818

1919
const generateTTS = async (prompt: string, voice: string, runtime: IAgentRuntime) => {
2020
process.env["FAL_KEY"] =
@@ -27,26 +27,24 @@ const generateTTS = async (prompt: string, voice: string, runtime: IAgentRuntime
2727
input: {
2828
input: prompt,
2929
voice: voice
30-
},
30+
},
3131
logs: true,
3232
onQueueUpdate: (update) => {
3333
if (update.status === "IN_PROGRESS") {
3434
update.logs
3535
.map((log) => log.message)
36-
.forEach(elizaLogger.log);
36+
.forEach(elizaLogger.debug);
3737
}
3838
},
3939
});
4040

41-
elizaLogger.log(
41+
elizaLogger.debug(
4242
"Generation request successful, received response:",
4343
response
4444
);
4545

46-
return {success: true,
47-
data: response.data};
48-
}
49-
catch (error) {
46+
return { success: true, data: response.data };
47+
} catch (error) {
5048
elizaLogger.error("TTS generation error:", error);
5149
return {
5250
success: false,
@@ -67,9 +65,9 @@ const TTSGeneration: Action = {
6765
],
6866
description: "Generate a tts audio based on a text prompt",
6967
validate: async (runtime: IAgentRuntime, _message: Memory) => {
70-
elizaLogger.log("Validating TTS action");
68+
elizaLogger.debug("Validating TTS action");
7169
const FalApiKey = runtime.getSetting("FAL_API_KEY");
72-
elizaLogger.log("FAL_API_KEY present:", !!FalApiKey);
70+
elizaLogger.debug("FAL_API_KEY present:", !!FalApiKey);
7371
return !!FalApiKey;
7472
},
7573
handler: async (
@@ -79,120 +77,98 @@ const TTSGeneration: Action = {
7977
_options: any,
8078
callback: HandlerCallback
8179
) => {
82-
elizaLogger.log("TTS request:", message);
83-
80+
elizaLogger.debug("TTS request:", message);
8481
// Clean up the prompt by removing mentions and commands
8582
const TTSPrompt = message.content.text
8683
.replace(/<@\d+>/g, "") // Remove mentions
87-
.replace(/generate TTS|create TTS|make TTS|render TTS/gi, "") // Remove commands
84+
.replace(/generate TTS|create TTS|make TTS|render TTS/gi, "")
8885
.trim();
8986

9087
if (!TTSPrompt || TTSPrompt.length < 3) {
91-
callback({
92-
text: "Please input a word at least of length 3",
93-
});
88+
callback({ text: "Please input a word at least of length 3" });
9489
return;
9590
}
9691

97-
elizaLogger.log("TTS prompt:", TTSPrompt);
92+
elizaLogger.debug("TTS prompt:", TTSPrompt);
9893

9994
callback({
100-
text: `I'll generate a audio based on your prompt: "${TTSPrompt}". This might take a few seconds...`,
95+
text: `I'll generate an audio based on your prompt: "${TTSPrompt}". This might take a few seconds...`,
10196
});
10297

103-
const language = detect(TTSPrompt);
104-
const voice_subject = VOICE_MAP[language[0].lang];
105-
const target_voice = getRandomVoice(voice_subject).fullName;
98+
let target_voice;
99+
try {
100+
const language = detect(TTSPrompt);
101+
if (language && language.length > 0) {
102+
const voice_subject = VOICE_MAP[language[0].lang];
103+
target_voice = getRandomVoice(voice_subject).fullName;
104+
} else {
105+
throw new Error("Language detection failed, no language detected.");
106+
}
107+
} catch (error) {
108+
elizaLogger.error("Language detection error:", error);
109+
const defaultVoice = VOICE_MAP['en'];
110+
target_voice = getRandomVoice(defaultVoice).fullName;
111+
}
106112

107-
elizaLogger.log("Starting TTS generation with prompt:", prompt, "and voice:", target_voice);
113+
elizaLogger.debug("Starting TTS generation with prompt:", TTSPrompt, "and voice:", target_voice);
108114

109115
try {
110116
const result = await generateTTS(TTSPrompt, target_voice, runtime);
111117

112118
if (result.success && result.data.audio.url) {
113-
// Download the Audio file
114-
const response = await fetch(result.data.audio.url);
115-
const arrayBuffer = await response.arrayBuffer();
116-
const TTSFileName = `content_cache/tts_${result.data.audio.file_name}`;
117-
118-
// ensure the directory is existed
119-
const directoryPath = path.dirname(TTSFileName);
120-
if (!fs.existsSync(directoryPath)) {
121-
fs.mkdirSync(directoryPath, { recursive: true });
119+
const cachedFile = `content_cache/tts_${result.data.audio.file_name}`;
120+
if (fs.existsSync(cachedFile)) {
121+
elizaLogger.debug("Using cached audio:", cachedFile);
122+
} else {
123+
const response = await fetch(result.data.audio.url);
124+
const arrayBuffer = await response.arrayBuffer();
125+
126+
const directoryPath = path.dirname(cachedFile);
127+
if (!fs.existsSync(directoryPath)) {
128+
fs.mkdirSync(directoryPath, { recursive: true });
129+
}
130+
131+
fs.writeFileSync(cachedFile, Buffer.from(arrayBuffer));
132+
elizaLogger.debug("Audio Duration:", result.data.audio.duration);
122133
}
123134

124-
// Save Audio file
125-
fs.writeFileSync(TTSFileName, Buffer.from(arrayBuffer));
126-
127-
elizaLogger.log("Audio Duration:", result.data.audio.duration);
128-
callback(
129-
{
130-
text: "TTS Success! Here's your generated audio!",
131-
attachments: [
132-
{
133-
id: crypto.randomUUID(),
134-
url: result.data.audio.url,
135-
title: "TTS Generation",
136-
source: "TTSGeneration",
137-
description: TTSPrompt,
138-
text: TTSPrompt,
139-
},
140-
],
141-
},
142-
[TTSFileName]
143-
); // Add the audio file to the attachments
144-
} else {
145135
callback({
146-
text: `TTS generation failed: ${result.error}`,
147-
error: true,
148-
});
136+
text: "TTS Success! Here's your generated audio!",
137+
attachments: [
138+
{
139+
id: crypto.randomUUID(),
140+
url: result.data.audio.url,
141+
title: "TTS Generation",
142+
source: "TTSGeneration",
143+
description: TTSPrompt,
144+
text: TTSPrompt,
145+
},
146+
],
147+
}, [cachedFile]);
148+
} else {
149+
callback({ text: `TTS generation failed: ${result.error}`, error: true });
149150
}
150151
} catch (error) {
151152
elizaLogger.error(`Failed to generate TTS. Error: ${error}`);
152-
callback({
153-
text: `TTS generation failed: ${error.message}`,
154-
error: true,
155-
});
153+
callback({ text: `TTS generation failed: ${error.message}`, error: true });
156154
}
157155
},
158156
examples: [
159157
[
160-
{
161-
user: "{{user1}}",
162-
content: {
163-
text: "Generate a TTS of prompt: Hello world!",
164-
},
165-
},
166-
{
167-
user: "{{agentName}}",
168-
content: {
169-
text: "I'll call a TTS to generate an audio based on your input prompt",
170-
action: "CREATE_TTS",
171-
},
172-
},
158+
{ user: "{{user1}}", content: { text: "Generate a TTS of prompt: Hello world!" } },
159+
{ user: "{{agentName}}", content: { text: "I'll call a TTS to generate an audio based on your input prompt", action: "CREATE_TTS" } },
173160
],
174161
[
175-
{
176-
user: "{{user1}}",
177-
content: {
178-
text: "Please do TTS to a prompt: Sam is busy now",
179-
},
180-
},
181-
{
182-
user: "{{agentName}}",
183-
content: {
184-
text: "Ok, please wait for the tts generation~",
185-
action: "AUDIO_CREATE",
186-
},
187-
},
162+
{ user: "{{user1}}", content: { text: "Please do TTS to a prompt: Sam is busy now" } },
163+
{ user: "{{agentName}}", content: { text: "Ok, please wait for the tts generation~", action: "AUDIO_CREATE" } },
188164
],
189165
],
190-
} as Action;
166+
};
191167

192168
export const TTSGenerationPlugin: Plugin = {
193169
name: "TTSGeneration",
194170
description: "Generate TTS using PlayAI tts (v3)",
195171
actions: [TTSGeneration],
196172
evaluators: [],
197173
providers: [],
198-
};
174+
};

0 commit comments

Comments
 (0)