Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add image generation capability to Telegram messaging #491

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ POST_IMMEDIATELY=
IMAGE_GEN= # Set to TRUE to enable image generation
USE_OPENAI_EMBEDDING= # Set to TRUE for OpenAI, leave blank for local

#Generation Prompts
SYSTEM_PROMPT= # Leave blank for empty system prompt or defined in character config
IMAGE_GENERATION_PROMPT= # Leave blank for default image generation prompt or defined in character config

# OpenRouter Models
OPENROUTER_MODEL= # Default: uses hermes 70b/405b
SMALL_OPENROUTER_MODEL=
Expand Down Expand Up @@ -101,6 +105,7 @@ TELEGRAM_BOT_TOKEN=

# Together Configuration
TOGETHER_API_KEY=
TOGETHER_IMAGE_MODEL= #Leave blank for default black-forest-labs/FLUX.1-schnell

# Server Configuration
SERVER_PORT=3000
Expand Down
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,8 @@ packages/core/src/providers/cache/*
cache/*
packages/plugin-coinbase/src/plugins/transactions.csv
packages/plugin-coinbase/package-lock.json

*.png
*.jpg
*.jpeg
*.webp
3 changes: 3 additions & 0 deletions agent/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ import fs from "fs";
import path from "path";
import readline from "readline";
import { fileURLToPath } from "url";
import { character } from "./character.ts";
import { imageGenerationPlugin } from "@ai16z/plugin-image-generation";
import type { DirectClient } from "@ai16z/client-direct";
import yargs from "yargs";

const __filename = fileURLToPath(import.meta.url); // get the resolved path to the file
Expand Down
3 changes: 3 additions & 0 deletions mise.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[tools]
node = "23.1.0"
pnpm = "latest"
18 changes: 15 additions & 3 deletions packages/client-telegram/src/messageManager.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import { Message } from "@telegraf/types";
import { Context, Telegraf } from "telegraf";
import { Context, Telegraf, Input } from "telegraf";

import { composeContext, elizaLogger, ServiceType } from "@ai16z/eliza";
import { embeddingZeroVector } from "@ai16z/eliza";
import { Media } from "@ai16z/eliza";
import { elizaLogger } from "@ai16z/eliza";
import {
Content,
HandlerCallback,
Expand Down Expand Up @@ -125,10 +127,11 @@ Note that {{agentName}} is capable of reading/seeing/hearing various forms of me

{{recentMessages}}

# Task: Generate a post/reply in the voice, style and perspective of {{agentName}} (@{{twitterUserName}}) while using the thread of tweets as additional context:
# Task: Generate a reply in the voice and style of {{agentName}}, aka @{{twitterUserName}}
Write a very short reply that is from the perspective of {{agentName}}. Try to write something totally different than previous posts. Do not add commentary or acknowledge this request, just write the reply. Use the thread of tweets as additional context:
Current Post:
{{currentPost}}
Thread of Tweets You Are Replying To:
Thread of messages you are replying to:

{{formattedConversation}}
` + messageCompletionFooter;
Expand Down Expand Up @@ -235,10 +238,18 @@ export class MessageManager {
private async sendMessageInChunks(
ctx: Context,
content: string,
attachments?: Media[],
replyToMessageId?: number
): Promise<Message.TextMessage[]> {
const chunks = this.splitMessage(content);
const sentMessages: Message.TextMessage[] = [];
const hasAttachment = attachments?.length > 0;

if (hasAttachment) {
const sentMessage = (await ctx.replyWithPhoto(Input.fromLocalFile(attachments[0].url)));

elizaLogger.log("Sent attachment: ", sentMessage);
}

for (let i = 0; i < chunks.length; i++) {
const chunk = chunks[i];
Expand Down Expand Up @@ -443,6 +454,7 @@ export class MessageManager {
const sentMessages = await this.sendMessageInChunks(
ctx,
content.text,
content.attachments,
message.message_id
);

Expand Down
2 changes: 1 addition & 1 deletion packages/client-twitter/src/post.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ About {{agentName}} (@{{twitterUserName}}):

# Task: Generate a post in the voice and style of {{agentName}}, aka @{{twitterUserName}}
Write a single sentence post that is {{adjective}} about {{topic}} (without mentioning {{topic}} directly), from the perspective of {{agentName}}. Try to write something totally different than previous posts. Do not add commentary or acknowledge this request, just write the post.
Your response should not contain any questions. Brief, concise statements only. No emojis. Use \\n\\n (double spaces) between statements.`;
Your response should not contain any questions. Brief, concise statements only. Use \\n\\n (double spaces) between statements.`;

const MAX_TWEET_LENGTH = 280;

Expand Down
40 changes: 26 additions & 14 deletions packages/core/src/generation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -769,15 +769,27 @@ export const generateImage = async (
count = 1;
}

const model = getModel(runtime.character.modelProvider, ModelClass.IMAGE);
const modelSettings = models[runtime.character.modelProvider].imageSettings;
const apiKey =
runtime.token ??
runtime.getSetting("HEURIST_API_KEY") ??
runtime.getSetting("TOGETHER_API_KEY") ??
runtime.getSetting("OPENAI_API_KEY");
const imageModelProvider =
runtime.character.imageModelProvider ?? runtime.character.modelProvider;

elizaLogger.log("imageModelProvider: ", imageModelProvider);

const model = getModel(imageModelProvider, ModelClass.IMAGE);
const modelSettings = models[imageModelProvider].imageSettings;
let apiKey = runtime.token;
switch (imageModelProvider) {
case ModelProviderName.HEURIST:
apiKey = runtime.getSetting("HEURIST_API_KEY");
break;
case ModelProviderName.LLAMACLOUD:
apiKey = runtime.getSetting("TOGETHER_API_KEY");
break;
case ModelProviderName.OPENAI:
apiKey = runtime.getSetting("OPENAI_API_KEY");
break;
}
try {
if (runtime.character.modelProvider === ModelProviderName.HEURIST) {
if (imageModelProvider === ModelProviderName.HEURIST) {
const response = await fetch(
"http://sequencer.heurist.xyz/submit_job",
{
Expand Down Expand Up @@ -815,11 +827,11 @@ export const generateImage = async (
const imageURL = await response.json();
return { success: true, data: [imageURL] };
} else if (
runtime.character.modelProvider === ModelProviderName.LLAMACLOUD
imageModelProvider === ModelProviderName.LLAMACLOUD
) {
const together = new Together({ apiKey: apiKey as string });
const response = await together.images.create({
model: "black-forest-labs/FLUX.1-schnell",
model: runtime.getSetting("TOGETHER_IMAGE_MODEL") ?? "black-forest-labs/FLUX.1-schnell",
prompt,
width,
height,
Expand All @@ -828,11 +840,10 @@ export const generateImage = async (
});
const urls: string[] = [];
for (let i = 0; i < response.data.length; i++) {
const json = response.data[i].b64_json;
// decode base64
const base64 = Buffer.from(json, "base64").toString("base64");
urls.push(base64);
const data: unknown = response.data
urls.push(data[i].url);
}

const base64s = await Promise.all(
urls.map(async (url) => {
const response = await fetch(url);
Expand All @@ -843,6 +854,7 @@ export const generateImage = async (
return base64;
})
);

return { success: true, data: base64s };
} else {
let targetSize = `${width}x${height}`;
Expand Down
4 changes: 4 additions & 0 deletions packages/core/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -606,9 +606,13 @@ export type Character = {

/** Optional system prompt */
system?: string;
/** Optional image generation prompt */
imageGenerationPrompt?: string;

/** Model provider to use */
modelProvider: ModelProviderName;
/** Optional image model provider */
imageModelProvider?: ModelProviderName;

/** Optional model endpoint override */
modelEndpointOverride?: string;
Expand Down
Loading
Loading