Skip to content

Commit e54e396

Browse files
author
Tom Daniel
committed
add comments and remove obsolete code
1 parent 2deb3a0 commit e54e396

File tree

1 file changed

+54
-36
lines changed
  • packages/plugin-image-generation/src

1 file changed

+54
-36
lines changed

packages/plugin-image-generation/src/index.ts

+54-36
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ import { generateCaption, generateImage, settings } from "@ai16z/eliza";
1515
import fs from "fs";
1616
import path from "path";
1717

18+
/**
19+
* Template for generating image descriptions in the agent's voice.
20+
* Uses various context elements like knowledge, bio, and recent posts
21+
* to maintain consistent character voice.
22+
*/
1823
const imagePromptTemplate = `# Knowledge
1924
{{knowledge}}
2025
@@ -28,11 +33,21 @@ About {{agentName}}:
2833
{{recentPosts}}
2934
3035
# Task: Generate an image description in the voice and style of {{agentName}} according to the previous <user_message>.
31-
Write a two sentence image description that considers the <user_message> and may also include {{adjective}} about {{topic}} (without mentioning {{topic}} directly), from the perspective of {{agentName}}. Try to write something totally different than previous posts. Do not add commentary or acknowledge this request, just write the description of the image to be generated.
36+
Write a short image description that considers the <user_message> complemented by {{adjective}} about {{topic}} from the perspective of {{agentName}}. Try to write something totally different than previous posts. Do not add commentary or acknowledge this request, just write the description of the image to be generated.
3237
Your response should not contain any questions. Brief, concise statements only. No emojis. Use \\n\\n (double spaces) between statements.`;
3338

39+
/**
40+
* Prompt for the image generation AI to create detailed, high-quality prompts
41+
* that will produce visually appealing images.
42+
*/
3443
const imageGenerationPrompt = "You are an AI assistant specialized in crafting effective prompts for image generation. Your task is to analyze a user's message and create a comprehensive, natural-language prompt that will guide an image generation algorithm to produce high-quality, visually appealing images.\n\nBegin by analyzing the content of the user's message. Follow these steps:\n\n1. List out key elements from the user's message, categorizing them to ensure comprehensive coverage:\n * Topic: The main subject or scene with specific details\n * Material: The medium or style (e.g., digital painting, 3D render)\n * Style: The artistic direction (e.g., fantasy, vaporwave)\n * Artist: Specific artists to influence the visual style\n * Webpage Influence: Art platforms like ArtStation or DeviantArt for quality enhancement\n * Sharpness: Terms like \"sharp focus\" or \"highly detailed\" for clarity\n * Extra Details: Descriptors to enhance atmosphere (e.g., cinematic, dystopian)\n * Shade and Color: Color-related keywords to control mood (e.g., moody lighting)\n * Lighting and Brightness: Specific lighting styles (e.g., dramatic shadows)\n * Camera Angle: Perspective and framing (e.g., close-up, wide shot, aerial view)\n * Composition: Layout guidance (e.g., rule of thirds, centered, dynamic)\n * Time Period: Temporal context if relevant\n * Cultural Elements: Any specific cultural influences\n * Textures: Surface quality descriptions\n * Weather/Atmosphere: Environmental conditions if applicable\n * Negative Prompts: Elements to exclude from the image\n\n2. Brainstorm complementary elements that would enhance the user's vision:\n * Suggest fitting artists and styles if not specified\n * Consider atmospheric elements that would strengthen the concept\n * Identify potential technical aspects that would improve the result\n * Note any elements that should be avoided to maintain the desired look\n\n3. Construct your final prompt by:\n * Leading with the most important scene/subject details from the user's message\n * Incorporating all relevant technical and stylistic elements\n * Grouping related concepts together naturally\n * Maintaining clear, flowing language throughout\n * Adding complementary details that enhance but don't alter the core concept\n * Concluding with negative prompts separated by a \"Negative:\" marker\n\nRemember:\n- Preserve ALL specific details from the user's original message\n- Don't force details into a rigid template\n- Create a cohesive, readable description\n- Keep the focus on the user's core concept while enhancing it with technical and artistic refinements\n\nYour output should contain ONLY the final prompt text, with no additional explanations, tags, or formatting.";
3544

45+
/**
46+
* Saves a base64-encoded image to the local filesystem
47+
* @param base64Data - The base64-encoded image data
48+
* @param filename - Name to use for the saved file (without extension)
49+
* @returns The full filepath where the image was saved
50+
*/
3651
export function saveBase64Image(base64Data: string, filename: string): string {
3752
// Create generatedImages directory if it doesn't exist
3853
const imageDir = path.join(process.cwd(), "generatedImages");
@@ -55,6 +70,12 @@ export function saveBase64Image(base64Data: string, filename: string): string {
5570
return filepath;
5671
}
5772

73+
/**
74+
* Saves an image from a Heurist URL to the local filesystem
75+
* @param imageUrl - URL of the image to download and save
76+
* @param filename - Name to use for the saved file (without extension)
77+
* @returns Promise resolving to the full filepath where the image was saved
78+
*/
5879
export async function saveHeuristImage(
5980
imageUrl: string,
6081
filename: string
@@ -82,8 +103,13 @@ export async function saveHeuristImage(
82103
return filepath;
83104
}
84105

106+
/**
107+
* Action definition for image generation capability
108+
* Handles generating images based on user prompts while maintaining agent personality
109+
*/
85110
const imageGeneration: Action = {
86111
name: "GENERATE_IMAGE",
112+
// Alternative action names that should trigger image generation
87113
similes: [
88114
"IMAGE_GENERATION",
89115
"IMAGE_GEN",
@@ -96,6 +122,10 @@ const imageGeneration: Action = {
96122
"MAKE_A",
97123
],
98124
description: "Generate an image to go along with the message.",
125+
126+
/**
127+
* Validates that required API keys are present for image generation
128+
*/
99129
validate: async (runtime: IAgentRuntime, message: Memory) => {
100130
const anthropicApiKeyOk = !!runtime.getSetting("ANTHROPIC_API_KEY");
101131
const togetherApiKeyOk = !!runtime.getSetting("TOGETHER_API_KEY");
@@ -105,6 +135,14 @@ const imageGeneration: Action = {
105135

106136
return anthropicApiKeyOk || togetherApiKeyOk || heuristApiKeyOk;
107137
},
138+
139+
/**
140+
* Main handler for image generation:
141+
* 1. Generates an image description in the agent's voice
142+
* 2. Converts that description into an optimized image generation prompt
143+
* 3. Generates the image
144+
* 4. Saves and returns the result
145+
*/
108146
handler: async (
109147
runtime: IAgentRuntime,
110148
message: Memory,
@@ -119,31 +157,32 @@ const imageGeneration: Action = {
119157
imagePromptTemplate,
120158
});
121159

160+
// Generate the initial prompt in agent's voice
122161
const agentImagePrompt = await generateText({
123162
runtime,
124163
context: `${agentContext}\n\n<user_message>${message.content.text}</user_message>`,
125164
modelClass: ModelClass.SMALL,
126165
});
127166

128-
elizaLogger.log("Agent image prompt:", agentImagePrompt);
129-
167+
elizaLogger.log("Agent prompt & caption for image: ", agentImagePrompt);
130168

131-
//state = (await runtime.composeState(message)) as State;
132169
const userId = runtime.agentId;
133170
elizaLogger.log("User ID:", userId);
134171

172+
// Convert agent's description into an optimized image generation prompt
135173
const context = runtime.character.system ??
136174
settings.SYSTEM_PROMPT ?? imageGenerationPrompt + `\n\nHere is the user's message:\n<user_message> ${agentImagePrompt} </user_message>`;
137175

176+
// Generate the technical prompt for the image generation model
138177
const imagePrompt = await generateText({
139178
runtime,
140179
context,
141180
modelClass: ModelClass.SMALL,
142181
});
143-
elizaLogger.log("Image prompt received:", imagePrompt);
144182

145183
const res: { image: string; caption: string }[] = [];
146184

185+
// Generate the actual image
147186
elizaLogger.log("Generating image with prompt:", imagePrompt);
148187
const images = await generateImage(
149188
{
@@ -155,6 +194,7 @@ const imageGeneration: Action = {
155194
runtime
156195
);
157196

197+
// Process and save generated images
158198
if (images.success && images.data && images.data.length > 0) {
159199
elizaLogger.log(
160200
"Image generation successful, number of images:",
@@ -173,46 +213,19 @@ const imageGeneration: Action = {
173213

174214
elizaLogger.log(`Processing image ${i + 1}:`, filename);
175215

176-
//just dont even add a caption or a description just have it generate & send
177-
/*
178-
try {
179-
const imageService = runtime.getService(ServiceType.IMAGE_DESCRIPTION);
180-
if (imageService && typeof imageService.describeImage === 'function') {
181-
const caption = await imageService.describeImage({ imageUrl: filepath });
182-
captionText = caption.description;
183-
captionTitle = caption.title;
184-
}
185-
} catch (error) {
186-
elizaLogger.error("Caption generation failed, using default caption:", error);
187-
}*/
188-
189-
//const caption = "...";
190-
/*= await generateCaption(
191-
{
192-
imageUrl: image,
193-
},
194-
runtime
195-
);*/
196-
197-
res.push({ image: filepath, caption: agentImagePrompt }); //caption.title });
198-
199-
elizaLogger.log(
200-
`Generated caption for image ${i + 1}:`,
201-
agentImagePrompt //caption.title
202-
);
203-
//res.push({ image: image, caption: caption.title });
216+
res.push({ image: filepath, caption: agentImagePrompt });
204217

205218
callback(
206219
{
207-
text: agentImagePrompt, //caption.description,
220+
text: agentImagePrompt,
208221
attachments: [
209222
{
210223
id: crypto.randomUUID(),
211224
url: filepath,
212225
title: "Generated image",
213226
source: "imageGeneration",
214-
description: imagePrompt, //caption.title,
215-
text: agentImagePrompt, //caption.description,
227+
description: imagePrompt,
228+
text: agentImagePrompt,
216229
},
217230
],
218231
},
@@ -228,6 +241,8 @@ const imageGeneration: Action = {
228241
elizaLogger.error("Image generation failed or returned no data.");
229242
}
230243
},
244+
245+
// Example interactions that should trigger image generation
231246
examples: [
232247
// TODO: We want to generate images in more abstract ways, not just when asked to generate an image
233248

@@ -312,6 +327,9 @@ const imageGeneration: Action = {
312327
],
313328
} as Action;
314329

330+
/**
331+
* Plugin definition for image generation functionality
332+
*/
315333
export const imageGenerationPlugin: Plugin = {
316334
name: "imageGeneration",
317335
description: "Generate images",

0 commit comments

Comments
 (0)