Skip to content

Commit 1f2b4b6

Browse files
author
Tom Daniel
committed
add comments and remove obsolete code
1 parent 3f0a431 commit 1f2b4b6

File tree

1 file changed

+54
-36
lines changed
  • packages/plugin-image-generation/src

1 file changed

+54
-36
lines changed

packages/plugin-image-generation/src/index.ts

+54-36
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ import fs from "fs";
1616
import path from "path";
1717
import { validateImageGenConfig } from "./enviroment";
1818

19+
/**
20+
* Template for generating image descriptions in the agent's voice.
21+
* Uses various context elements like knowledge, bio, and recent posts
22+
* to maintain consistent character voice.
23+
*/
1924
const imagePromptTemplate = `# Knowledge
2025
{{knowledge}}
2126
@@ -29,11 +34,21 @@ About {{agentName}}:
2934
{{recentPosts}}
3035
3136
# Task: Generate an image description in the voice and style of {{agentName}} according to the previous <user_message>.
32-
Write a two sentence image description that considers the <user_message> and may also include {{adjective}} about {{topic}} (without mentioning {{topic}} directly), from the perspective of {{agentName}}. Try to write something totally different than previous posts. Do not add commentary or acknowledge this request, just write the description of the image to be generated.
37+
Write a short image description that considers the <user_message> complemented by {{adjective}} about {{topic}} from the perspective of {{agentName}}. Try to write something totally different than previous posts. Do not add commentary or acknowledge this request, just write the description of the image to be generated.
3338
Your response should not contain any questions. Brief, concise statements only. No emojis. Use \\n\\n (double spaces) between statements.`;
3439

40+
/**
41+
* Prompt for the image generation AI to create detailed, high-quality prompts
42+
* that will produce visually appealing images.
43+
*/
3544
const imageGenerationPrompt = "You are an AI assistant specialized in crafting effective prompts for image generation. Your task is to analyze a user's message and create a comprehensive, natural-language prompt that will guide an image generation algorithm to produce high-quality, visually appealing images.\n\nBegin by analyzing the content of the user's message. Follow these steps:\n\n1. List out key elements from the user's message, categorizing them to ensure comprehensive coverage:\n * Topic: The main subject or scene with specific details\n * Material: The medium or style (e.g., digital painting, 3D render)\n * Style: The artistic direction (e.g., fantasy, vaporwave)\n * Artist: Specific artists to influence the visual style\n * Webpage Influence: Art platforms like ArtStation or DeviantArt for quality enhancement\n * Sharpness: Terms like \"sharp focus\" or \"highly detailed\" for clarity\n * Extra Details: Descriptors to enhance atmosphere (e.g., cinematic, dystopian)\n * Shade and Color: Color-related keywords to control mood (e.g., moody lighting)\n * Lighting and Brightness: Specific lighting styles (e.g., dramatic shadows)\n * Camera Angle: Perspective and framing (e.g., close-up, wide shot, aerial view)\n * Composition: Layout guidance (e.g., rule of thirds, centered, dynamic)\n * Time Period: Temporal context if relevant\n * Cultural Elements: Any specific cultural influences\n * Textures: Surface quality descriptions\n * Weather/Atmosphere: Environmental conditions if applicable\n * Negative Prompts: Elements to exclude from the image\n\n2. Brainstorm complementary elements that would enhance the user's vision:\n * Suggest fitting artists and styles if not specified\n * Consider atmospheric elements that would strengthen the concept\n * Identify potential technical aspects that would improve the result\n * Note any elements that should be avoided to maintain the desired look\n\n3. Construct your final prompt by:\n * Leading with the most important scene/subject details from the user's message\n * Incorporating all relevant technical and stylistic elements\n * Grouping related concepts together naturally\n * Maintaining clear, flowing language throughout\n * Adding complementary details that enhance but don't alter the core concept\n * Concluding with negative prompts separated by a \"Negative:\" marker\n\nRemember:\n- Preserve ALL specific details from the user's original message\n- Don't force details into a rigid template\n- Create a cohesive, readable description\n- Keep the focus on the user's core concept while enhancing it with technical and artistic refinements\n\nYour output should contain ONLY the final prompt text, with no additional explanations, tags, or formatting.";
3645

46+
/**
47+
* Saves a base64-encoded image to the local filesystem
48+
* @param base64Data - The base64-encoded image data
49+
* @param filename - Name to use for the saved file (without extension)
50+
* @returns The full filepath where the image was saved
51+
*/
3752
export function saveBase64Image(base64Data: string, filename: string): string {
3853
// Create generatedImages directory if it doesn't exist
3954
const imageDir = path.join(process.cwd(), "generatedImages");
@@ -56,6 +71,12 @@ export function saveBase64Image(base64Data: string, filename: string): string {
5671
return filepath;
5772
}
5873

74+
/**
75+
* Saves an image from a Heurist URL to the local filesystem
76+
* @param imageUrl - URL of the image to download and save
77+
* @param filename - Name to use for the saved file (without extension)
78+
* @returns Promise resolving to the full filepath where the image was saved
79+
*/
5980
export async function saveHeuristImage(
6081
imageUrl: string,
6182
filename: string
@@ -83,8 +104,13 @@ export async function saveHeuristImage(
83104
return filepath;
84105
}
85106

107+
/**
108+
* Action definition for image generation capability
109+
* Handles generating images based on user prompts while maintaining agent personality
110+
*/
86111
const imageGeneration: Action = {
87112
name: "GENERATE_IMAGE",
113+
// Alternative action names that should trigger image generation
88114
similes: [
89115
"IMAGE_GENERATION",
90116
"IMAGE_GEN",
@@ -97,6 +123,10 @@ const imageGeneration: Action = {
97123
"MAKE_A",
98124
],
99125
description: "Generate an image to go along with the message.",
126+
127+
/**
128+
* Validates that required API keys are present for image generation
129+
*/
100130
validate: async (runtime: IAgentRuntime, _message: Memory) => {
101131
await validateImageGenConfig(runtime);
102132

@@ -108,6 +138,14 @@ const imageGeneration: Action = {
108138

109139
return anthropicApiKeyOk || togetherApiKeyOk || heuristApiKeyOk;
110140
},
141+
142+
/**
143+
* Main handler for image generation:
144+
* 1. Generates an image description in the agent's voice
145+
* 2. Converts that description into an optimized image generation prompt
146+
* 3. Generates the image
147+
* 4. Saves and returns the result
148+
*/
111149
handler: async (
112150
runtime: IAgentRuntime,
113151
message: Memory,
@@ -122,31 +160,32 @@ const imageGeneration: Action = {
122160
imagePromptTemplate,
123161
});
124162

163+
// Generate the initial prompt in agent's voice
125164
const agentImagePrompt = await generateText({
126165
runtime,
127166
context: `${agentContext}\n\n<user_message>${message.content.text}</user_message>`,
128167
modelClass: ModelClass.SMALL,
129168
});
130169

131-
elizaLogger.log("Agent image prompt:", agentImagePrompt);
132-
170+
elizaLogger.log("Agent prompt & caption for image: ", agentImagePrompt);
133171

134-
//state = (await runtime.composeState(message)) as State;
135172
const userId = runtime.agentId;
136173
elizaLogger.log("User ID:", userId);
137174

175+
// Convert agent's description into an optimized image generation prompt
138176
const context = runtime.character.system ??
139177
settings.SYSTEM_PROMPT ?? imageGenerationPrompt + `\n\nHere is the user's message:\n<user_message> ${agentImagePrompt} </user_message>`;
140178

179+
// Generate the technical prompt for the image generation model
141180
const imagePrompt = await generateText({
142181
runtime,
143182
context,
144183
modelClass: ModelClass.SMALL,
145184
});
146-
elizaLogger.log("Image prompt received:", imagePrompt);
147185

148186
const res: { image: string; caption: string }[] = [];
149187

188+
// Generate the actual image
150189
elizaLogger.log("Generating image with prompt:", imagePrompt);
151190
const images = await generateImage(
152191
{
@@ -158,6 +197,7 @@ const imageGeneration: Action = {
158197
runtime
159198
);
160199

200+
// Process and save generated images
161201
if (images.success && images.data && images.data.length > 0) {
162202
elizaLogger.log(
163203
"Image generation successful, number of images:",
@@ -176,46 +216,19 @@ const imageGeneration: Action = {
176216

177217
elizaLogger.log(`Processing image ${i + 1}:`, filename);
178218

179-
//just dont even add a caption or a description just have it generate & send
180-
/*
181-
try {
182-
const imageService = runtime.getService(ServiceType.IMAGE_DESCRIPTION);
183-
if (imageService && typeof imageService.describeImage === 'function') {
184-
const caption = await imageService.describeImage({ imageUrl: filepath });
185-
captionText = caption.description;
186-
captionTitle = caption.title;
187-
}
188-
} catch (error) {
189-
elizaLogger.error("Caption generation failed, using default caption:", error);
190-
}*/
191-
192-
//const caption = "...";
193-
/*= await generateCaption(
194-
{
195-
imageUrl: image,
196-
},
197-
runtime
198-
);*/
199-
200-
res.push({ image: filepath, caption: agentImagePrompt }); //caption.title });
201-
202-
elizaLogger.log(
203-
`Generated caption for image ${i + 1}:`,
204-
agentImagePrompt //caption.title
205-
);
206-
//res.push({ image: image, caption: caption.title });
219+
res.push({ image: filepath, caption: agentImagePrompt });
207220

208221
callback(
209222
{
210-
text: agentImagePrompt, //caption.description,
223+
text: agentImagePrompt,
211224
attachments: [
212225
{
213226
id: crypto.randomUUID(),
214227
url: filepath,
215228
title: "Generated image",
216229
source: "imageGeneration",
217-
description: imagePrompt, //caption.title,
218-
text: agentImagePrompt, //caption.description,
230+
description: imagePrompt,
231+
text: agentImagePrompt,
219232
},
220233
],
221234
},
@@ -231,6 +244,8 @@ const imageGeneration: Action = {
231244
elizaLogger.error("Image generation failed or returned no data.");
232245
}
233246
},
247+
248+
// Example interactions that should trigger image generation
234249
examples: [
235250
// TODO: We want to generate images in more abstract ways, not just when asked to generate an image
236251

@@ -315,6 +330,9 @@ const imageGeneration: Action = {
315330
],
316331
} as Action;
317332

333+
/**
334+
* Plugin definition for image generation functionality
335+
*/
318336
export const imageGenerationPlugin: Plugin = {
319337
name: "imageGeneration",
320338
description: "Generate images",

0 commit comments

Comments
 (0)