Skip to content

Commit c6cbc4f

Browse files
committed
init commit of working ts
1 parent 2ec20b7 commit c6cbc4f

9 files changed

+497
-124
lines changed

nodemon.json

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"watch": ["src"],
3+
"ext": "ts",
4+
"ignore": ["src/**/*.spec.ts"],
5+
"exec": "node --loader ts-node/esm src/index.ts"
6+
}

package-lock.json

+293-73
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

+13-5
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,37 @@
11
{
2-
"name": "ruby",
2+
"name": "bgent-discord-voice",
33
"version": "1.0.0",
44
"description": "",
5-
"main": "main.js",
5+
"main": "dist/index.js",
6+
"type": "module",
7+
"types": "dist/index.d.ts",
68
"scripts": {
7-
"test": "echo \"Error: no test specified\" && exit 1"
9+
"build": "tsc",
10+
"start": "node --loader ts-node/esm src/index.ts",
11+
"watch": "tsc --watch",
12+
"dev": "nodemon"
813
},
914
"author": "",
1015
"license": "MIT",
1116
"dependencies": {
1217
"@discordjs/voice": "^0.16.1",
1318
"@supabase/supabase-js": "^2.39.7",
1419
"@types/fluent-ffmpeg": "^2.1.24",
15-
"bgent": "^0.0.22",
20+
"bgent": "^0.0.32",
1621
"discord.js": "^14.14.1",
1722
"dotenv": "^16.3.1",
1823
"ffmpeg-static": "^5.2.0",
1924
"libsodium-wrappers": "^0.7.13",
25+
"nodemon": "^3.1.0",
2026
"openai": "^4.24.1",
2127
"opusscript": "^0.0.8",
2228
"prism-media": "^1.3.5",
2329
"uuid-by-string": "^4.0.0",
2430
"ws": "^8.16.0"
2531
},
2632
"devDependencies": {
27-
"@types/node": "^20.10.7"
33+
"@types/node": "^20.10.7",
34+
"ts-node": "^10.9.2",
35+
"typescript": "^5.4.2"
2836
}
2937
}

src/discordClient.ts

+10-8
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import { BaseGuildVoiceChannel, ChannelType, Client, GatewayIntentBits, Guild, G
33
import { EventEmitter } from "events";
44
import prism from "prism-media";
55
import { Readable, pipeline } from "stream";
6-
import settings from "./settings";
6+
import settings from "./settings.ts";
77

88
// These values are chosen for compatibility with picovoice components
99
const DECODE_FRAME_SIZE = 1024;
@@ -39,9 +39,9 @@ export default class DiscordClient extends EventEmitter {
3939
});
4040
this.client.login(this.apiToken);
4141
this.client.on('voiceStateUpdate', (oldState, newState) => {
42-
if (newState.member.user.bot) return;
42+
if (newState?.member?.user.bot) return;
4343
if (newState.channelId != null && newState.channelId != oldState.channelId) {
44-
this.joinChannel(newState.channel);
44+
this.joinChannel(newState.channel as BaseGuildVoiceChannel);
4545
}
4646
});
4747
this.client.on('guildCreate', (guild) => {
@@ -62,12 +62,12 @@ export default class DiscordClient extends EventEmitter {
6262
private async scanGuild(guild: Guild) {
6363
// Iterate through all voice channels fetching the largest one with at least one connected member
6464
const channels = (await guild.channels.fetch())
65-
.filter(channel => channel.type == ChannelType.GuildVoice);
66-
var chosenChannel: BaseGuildVoiceChannel = null;
65+
.filter(channel => channel?.type == ChannelType.GuildVoice);
66+
let chosenChannel: BaseGuildVoiceChannel | null = null;
6767

6868
for (const [id, channel] of channels) {
6969
const voiceChannel = channel as BaseGuildVoiceChannel;
70-
if (voiceChannel.members.size > 0 && (chosenChannel == null || voiceChannel.members.size > chosenChannel.members.size)) {
70+
if (voiceChannel.members.size > 0 && (chosenChannel === null || voiceChannel.members.size > chosenChannel.members.size)) {
7171
chosenChannel = voiceChannel;
7272
}
7373
}
@@ -93,14 +93,15 @@ export default class DiscordClient extends EventEmitter {
9393

9494
connection.receiver.speaking.on('start', (userId) => {
9595
const user = channel.members.get(userId);
96-
if (user.user.bot) return;
96+
if(!user) return;
97+
if (user?.user.bot) return;
9798
this.monitorMember(user, channel);
9899
this.streams.get(userId)?.emit('speakingStarted');
99100
});
100101

101102
connection.receiver.speaking.on('end', async (userId) => {
102103
const user = channel.members.get(userId);
103-
if (user.user.bot) return;
104+
if (user?.user.bot) return;
104105
this.streams.get(userId)?.emit('speakingStopped');
105106
});
106107
}
@@ -109,6 +110,7 @@ export default class DiscordClient extends EventEmitter {
109110
const userId = member.id;
110111
const userName = member.displayName;
111112
const connection = getVoiceConnection(member.guild.id);
113+
if(!connection) return;
112114
const receiveStream = connection.receiver.subscribe(userId, {
113115
autoDestroy: true,
114116
emitClose: true

src/elevenlabs.ts

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { Readable } from "stream";
22
import { WebSocket } from "ws";
3-
import settings from "./settings";
4-
import { prependWavHeader } from "./util";
3+
import settings from "./settings.ts";
4+
import { prependWavHeader } from "./util.ts";
55

66
export async function textToSpeechStreaming(text: string): Promise<Readable> {
77
console.log("11 TTS: " + text);
@@ -33,10 +33,10 @@ export async function textToSpeechStreaming(text: string): Promise<Readable> {
3333
throw new Error(`Received status ${status} from Eleven Labs API: ${errorBodyString}`);
3434
}
3535

36-
let reader = response.body.getReader();
36+
let reader = response?.body?.getReader();
3737
let readable = new Readable({
3838
read() {
39-
reader.read().then(({ done, value }) => {
39+
reader?.read().then(({ done, value }) => {
4040
if (done) {
4141
this.push(null);
4242
} else {
@@ -173,7 +173,7 @@ export class ElevenLabsConverter extends Readable {
173173
private inputEnded: boolean = false;
174174
private outputEnded: boolean = false;
175175
private startTime: number;
176-
private openTime: number;
176+
private openTime: number | undefined
177177
private buffers: Buffer[] = [];
178178
private draining: boolean = false;
179179
private firstDataTime: number = -1;
@@ -231,7 +231,7 @@ export class ElevenLabsConverter extends Readable {
231231
} else if (response.audio) {
232232
if (this.firstDataTime == -1) {
233233
this.firstDataTime = Date.now();
234-
console.log(`First audio packet received after ${this.firstDataTime - this.openTime}ms`);
234+
console.log(`First audio packet received after ${this.firstDataTime - (this.openTime || 0)}ms`);
235235
}
236236
let audioChunk = Buffer.from(response.audio, 'base64');
237237
console.log(`Received audio chunk of length ${audioChunk.length}`);

src/main.ts src/index.ts

+150-16
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
import { SupabaseClient, createClient } from "@supabase/supabase-js";
2-
import { BgentRuntime, Message } from "bgent";
2+
import { BgentRuntime, Content, Message, State, composeContext, embeddingZeroVector, messageHandlerTemplate, parseJSONObjectFromText } from "bgent";
33
import { UUID } from 'crypto';
44
import dotenv from "dotenv";
55
import { Readable } from "stream";
66
import getUuid from 'uuid-by-string';
7-
import { AudioMonitor } from "./audioMonitor";
8-
import DiscordClient from "./discordClient";
9-
import { textToSpeech } from "./elevenlabs";
10-
import { speechToText } from "./speechtotext";
7+
import { AudioMonitor } from "./audioMonitor.ts";
8+
import DiscordClient from "./discordClient.ts";
9+
import { textToSpeech } from "./elevenlabs.ts";
10+
import { speechToText } from "./speechtotext.ts";
1111
import { BaseGuildVoiceChannel } from "discord.js";
1212

1313
enum ResponseType {
@@ -29,6 +29,138 @@ enum ResponseType {
2929
RESPONSE_AUDIO = 3
3030
}
3131

32+
33+
/**
34+
* Handle an incoming message, processing it and returning a response.
35+
* @param message The message to handle.
36+
* @param state The state of the agent.
37+
* @returns The response to the message.
38+
*/
39+
async function handleMessage(
40+
runtime: BgentRuntime,
41+
message: Message,
42+
state?: State
43+
) {
44+
const _saveRequestMessage = async (message: Message, state: State) => {
45+
const { content: senderContent, /* senderId, userIds, room_id */ } = message
46+
47+
// we run evaluation here since some evals could be modulo based, and we should run on every message
48+
if ((senderContent as Content).content) {
49+
const { data: data2, error } = await runtime.supabase.from('messages').select('*').eq('user_id', message.senderId)
50+
.eq('room_id', room_id)
51+
.order('created_at', { ascending: false })
52+
53+
if (error) {
54+
console.log('error', error)
55+
// TODO: dont need this recall
56+
} else if (data2.length > 0 && data2[0].content === message.content) {
57+
console.log('already saved', data2)
58+
} else {
59+
await runtime.messageManager.createMemory({
60+
user_ids: [message.senderId, message.agentId, ...message.userIds],
61+
user_id: senderId!,
62+
content: senderContent,
63+
room_id,
64+
embedding: embeddingZeroVector
65+
})
66+
}
67+
await runtime.evaluate(message, state)
68+
}
69+
}
70+
71+
await _saveRequestMessage(message, state as State)
72+
// if (!state) {
73+
state = (await runtime.composeState(message)) as State
74+
// }
75+
76+
const context = composeContext({
77+
state,
78+
template: messageHandlerTemplate
79+
})
80+
81+
if (runtime.debugMode) {
82+
console.log(context, 'Response Context')
83+
}
84+
85+
let responseContent: Content | null = null
86+
const { senderId, room_id, userIds: user_ids, agentId } = message
87+
88+
for (let triesLeft = 3; triesLeft > 0; triesLeft--) {
89+
console.log(context)
90+
const response = await runtime.completion({
91+
context,
92+
stop: []
93+
})
94+
95+
runtime.supabase
96+
.from('logs')
97+
.insert({
98+
body: { message, context, response },
99+
user_id: senderId,
100+
room_id,
101+
user_ids: user_ids!,
102+
agent_id: agentId!,
103+
type: 'main_completion'
104+
})
105+
.then(({ error }) => {
106+
if (error) {
107+
console.error('error', error)
108+
}
109+
})
110+
111+
const parsedResponse = parseJSONObjectFromText(
112+
response
113+
) as unknown as Content
114+
115+
if (
116+
(parsedResponse.user as string)?.includes(
117+
(state as State).agentName as string
118+
)
119+
) {
120+
responseContent = {
121+
content: parsedResponse.content,
122+
action: parsedResponse.action
123+
}
124+
break
125+
}
126+
}
127+
128+
if (!responseContent) {
129+
responseContent = {
130+
content: '',
131+
action: 'IGNORE'
132+
}
133+
}
134+
135+
const _saveResponseMessage = async (
136+
message: Message,
137+
state: State,
138+
responseContent: Content
139+
) => {
140+
const { agentId, userIds, room_id } = message
141+
142+
responseContent.content = responseContent.content?.trim()
143+
144+
if (responseContent.content) {
145+
await runtime.messageManager.createMemory({
146+
user_ids: userIds!,
147+
user_id: agentId!,
148+
content: responseContent,
149+
room_id,
150+
embedding: embeddingZeroVector
151+
})
152+
await runtime.evaluate(message, { ...state, responseContent })
153+
} else {
154+
console.warn('Empty response, skipping')
155+
}
156+
}
157+
158+
await _saveResponseMessage(message, state, responseContent)
159+
await runtime.processActions(message, responseContent)
160+
161+
return responseContent
162+
}
163+
32164
// Add this function to fetch the bot's name
33165
async function fetchBotName(botToken: string) {
34166
const url = 'https://discord.com/api/v10/users/@me';
@@ -159,13 +291,6 @@ const supabase = createClient(
159291

160292
const discordClient = new DiscordClient();
161293

162-
const runtime = new BgentRuntime({
163-
supabase,
164-
token: process.env.OPENAI_API_KEY,
165-
serverUrl: 'https://api.openai.com/v1',
166-
evaluators: [],
167-
actions: [],
168-
});
169294

170295
/**
171296
* Listens on an audio stream and responds with an audio stream.
@@ -198,9 +323,9 @@ async function respondToSpokenAudio(userId: string, userName: string, channelId:
198323
const sstService = speechToText;
199324
const text = await sstService(inputBuffer);
200325
if (requestedResponseType == ResponseType.SPOKEN_TEXT) {
201-
return Readable.from(text);
326+
return Readable.from(text as string);
202327
} else {
203-
return await respondToText(userId, userName, channelId, text, requestedResponseType);
328+
return await respondToText(userId, userName, channelId, text as string, requestedResponseType);
204329
}
205330
}
206331
/**
@@ -214,7 +339,7 @@ async function respondToText(userId: string, userName: string, channelId: string
214339

215340
const userIdUUID = getUuid(userId) as UUID;
216341

217-
const agentId = getUuid(process.env.DISCORD_APPLICATION_ID) as UUID;
342+
const agentId = getUuid(process.env.DISCORD_APPLICATION_ID as string) as UUID;
218343

219344
await ensureUserExists(supabase, agentId, null, process.env.DISCORD_TOKEN);
220345
await ensureUserExists(supabase, userIdUUID, userName);
@@ -230,7 +355,16 @@ async function respondToText(userId: string, userName: string, channelId: string
230355
room_id,
231356
} as unknown as Message;
232357

233-
const response = await runtime.handleMessage(message)
358+
359+
const runtime = new BgentRuntime({
360+
supabase,
361+
token: process.env.OPENAI_API_KEY as string,
362+
serverUrl: 'https://api.openai.com/v1',
363+
evaluators: [],
364+
actions: [],
365+
});
366+
367+
const response = await handleMessage(runtime, message)
234368

235369
if (requestedResponseType == ResponseType.RESPONSE_TEXT) {
236370
return Readable.from(response.content);

src/settings.ts

+1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ dotenv.config();
5353
// import from env
5454
for (const key in settings) {
5555
if (process.env[key]) {
56+
// @ts-expect-error - we know this key exists
5657
settings[key] = process.env[key]!;
5758
}
5859
}

src/speechtotext.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import OpenAI from "openai";
2-
import settings from "./settings";
3-
import { getWavHeader } from "./util";
2+
import settings from "./settings.ts";
3+
import { getWavHeader } from "./util.ts";
44

55
var openAI = new OpenAI({
66
apiKey: settings.OPENAI_KEY

0 commit comments

Comments
 (0)