make image generation prompt a config

Tom Daniel · Tom Daniel · commit 2deb3a0b5df5 · 2024-11-25T12:11:38.000Z
diff --git a/.env.example b/.env.example
@@ -38,6 +38,10 @@ POST_INTERVAL_MAX= #180 #Default
 #USE IMAGE GEN
 IMAGE_GEN= #TRUE
 
+#Generation Prompts
+SYSTEM_PROMPT= #Leave blank for empty system prompt or defined in character config
+IMAGE_GENERATION_PROMPT= #Leave blank for default image generation prompt or defined in character config
+
 #Leave blank to use local embeddings
 USE_OPENAI_EMBEDDING=  #TRUE
 
diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts
@@ -332,6 +332,7 @@ export type Character = {
     id?: UUID; // optional UUID which can be passed down to identify the character
     name: string;
     system?: string;
+    imageGenerationPrompt?: string;
     modelProvider: ModelProviderName;
     imageModelProvider?: ModelProviderName;
     modelEndpointOverride?: string;
diff --git a/packages/plugin-image-generation/src/index.ts b/packages/plugin-image-generation/src/index.ts
@@ -10,7 +10,7 @@ import {
     Plugin,
     State,
 } from "@ai16z/eliza";
-import { generateCaption, generateImage } from "@ai16z/eliza";
+import { generateCaption, generateImage, settings } from "@ai16z/eliza";
 
 import fs from "fs";
 import path from "path";
@@ -31,6 +31,8 @@ About {{agentName}}:
 Write a two sentence image description that considers the <user_message> and may also include {{adjective}} about {{topic}} (without mentioning {{topic}} directly), from the perspective of {{agentName}}. Try to write something totally different than previous posts. Do not add commentary or acknowledge this request, just write the description of the image to be generated.
 Your response should not contain any questions. Brief, concise statements only. No emojis. Use \\n\\n (double spaces) between statements.`;
 
+const imageGenerationPrompt = "You are an AI assistant specialized in crafting effective prompts for image generation. Your task is to analyze a user's message and create a comprehensive, natural-language prompt that will guide an image generation algorithm to produce high-quality, visually appealing images.\n\nBegin by analyzing the content of the user's message. Follow these steps:\n\n1. List out key elements from the user's message, categorizing them to ensure comprehensive coverage:\n   * Topic: The main subject or scene with specific details\n   * Material: The medium or style (e.g., digital painting, 3D render)\n   * Style: The artistic direction (e.g., fantasy, vaporwave)\n   * Artist: Specific artists to influence the visual style\n   * Webpage Influence: Art platforms like ArtStation or DeviantArt for quality enhancement\n   * Sharpness: Terms like \"sharp focus\" or \"highly detailed\" for clarity\n   * Extra Details: Descriptors to enhance atmosphere (e.g., cinematic, dystopian)\n   * Shade and Color: Color-related keywords to control mood (e.g., moody lighting)\n   * Lighting and Brightness: Specific lighting styles (e.g., dramatic shadows)\n   * Camera Angle: Perspective and framing (e.g., close-up, wide shot, aerial view)\n   * Composition: Layout guidance (e.g., rule of thirds, centered, dynamic)\n   * Time Period: Temporal context if relevant\n   * Cultural Elements: Any specific cultural influences\n   * Textures: Surface quality descriptions\n   * Weather/Atmosphere: Environmental conditions if applicable\n   * Negative Prompts: Elements to exclude from the image\n\n2. Brainstorm complementary elements that would enhance the user's vision:\n   * Suggest fitting artists and styles if not specified\n   * Consider atmospheric elements that would strengthen the concept\n   * Identify potential technical aspects that would improve the result\n   * Note any elements that should be avoided to maintain the desired look\n\n3. Construct your final prompt by:\n   * Leading with the most important scene/subject details from the user's message\n   * Incorporating all relevant technical and stylistic elements\n   * Grouping related concepts together naturally\n   * Maintaining clear, flowing language throughout\n   * Adding complementary details that enhance but don't alter the core concept\n   * Concluding with negative prompts separated by a \"Negative:\" marker\n\nRemember:\n- Preserve ALL specific details from the user's original message\n- Don't force details into a rigid template\n- Create a cohesive, readable description\n- Keep the focus on the user's core concept while enhancing it with technical and artistic refinements\n\nYour output should contain ONLY the final prompt text, with no additional explanations, tags, or formatting.";
+
 export function saveBase64Image(base64Data: string, filename: string): string {
     // Create generatedImages directory if it doesn't exist
     const imageDir = path.join(process.cwd(), "generatedImages");
@@ -119,7 +121,7 @@ const imageGeneration: Action = {
 
         const agentImagePrompt = await generateText({
             runtime,
-            context: `${agentContext}\n\n<user message>${message.content.text}</user message>`,
+            context: `${agentContext}\n\n<user_message>${message.content.text}</user_message>`,
             modelClass: ModelClass.SMALL,
         });
 
@@ -130,7 +132,8 @@ const imageGeneration: Action = {
         const userId = runtime.agentId;
         elizaLogger.log("User ID:", userId);
 
-        const context = `You are an AI assistant specialized in crafting effective prompts for image generation. Your task is to analyze a user's message and create a comprehensive, natural-language prompt that will guide an image generation algorithm to produce high-quality, visually appealing images.\n\nHere is the user's message:\n<user_message> ${agentImagePrompt} </user_message>\n\nBegin by analyzing the content of the user's message. Follow these steps:\n\n1. List out key elements from the user's message, categorizing them to ensure comprehensive coverage:\n   * Topic: The main subject or scene with specific details\n   * Material: The medium or style (e.g., digital painting, 3D render)\n   * Style: The artistic direction (e.g., fantasy, vaporwave)\n   * Artist: Specific artists to influence the visual style\n   * Webpage Influence: Art platforms like ArtStation or DeviantArt for quality enhancement\n   * Sharpness: Terms like "sharp focus" or "highly detailed" for clarity\n   * Extra Details: Descriptors to enhance atmosphere (e.g., cinematic, dystopian)\n   * Shade and Color: Color-related keywords to control mood (e.g., moody lighting)\n   * Lighting and Brightness: Specific lighting styles (e.g., dramatic shadows)\n   * Camera Angle: Perspective and framing (e.g., close-up, wide shot, aerial view)\n   * Composition: Layout guidance (e.g., rule of thirds, centered, dynamic)\n   * Time Period: Temporal context if relevant\n   * Cultural Elements: Any specific cultural influences\n   * Textures: Surface quality descriptions\n   * Weather/Atmosphere: Environmental conditions if applicable\n   * Negative Prompts: Elements to exclude from the image\n\n2. Brainstorm complementary elements that would enhance the user's vision:\n   * Suggest fitting artists and styles if not specified\n   * Consider atmospheric elements that would strengthen the concept\n   * Identify potential technical aspects that would improve the result\n   * Note any elements that should be avoided to maintain the desired look\n\n3. Construct your final prompt by:\n   * Leading with the most important scene/subject details from the user's message\n   * Incorporating all relevant technical and stylistic elements\n   * Grouping related concepts together naturally\n   * Maintaining clear, flowing language throughout\n   * Adding complementary details that enhance but don't alter the core concept\n   * Concluding with negative prompts separated by a "Negative:" marker\n\nRemember:\n- Preserve ALL specific details from the user's original message\n- Don't force details into a rigid template\n- Create a cohesive, readable description\n- Keep the focus on the user's core concept while enhancing it with technical and artistic refinements\n\nYour output should contain ONLY the final prompt text, with no additional explanations, tags, or formatting.`;
+        const context = runtime.character.system ??
+        settings.SYSTEM_PROMPT ?? imageGenerationPrompt + `\n\nHere is the user's message:\n<user_message> ${agentImagePrompt} </user_message>`;
 
         const imagePrompt = await generateText({
             runtime,