diff --git a/packages/client-twitter/src/__tests__/fixtures/images/test.bmp b/packages/client-twitter/src/__tests__/fixtures/images/test.bmp new file mode 100644 index 00000000000..ea88c92db94 Binary files /dev/null and b/packages/client-twitter/src/__tests__/fixtures/images/test.bmp differ diff --git a/packages/client-twitter/src/__tests__/fixtures/images/test.gif b/packages/client-twitter/src/__tests__/fixtures/images/test.gif new file mode 100644 index 00000000000..9df64778b72 Binary files /dev/null and b/packages/client-twitter/src/__tests__/fixtures/images/test.gif differ diff --git a/packages/client-twitter/src/__tests__/fixtures/images/test.jpg b/packages/client-twitter/src/__tests__/fixtures/images/test.jpg new file mode 100644 index 00000000000..09c9161f41a Binary files /dev/null and b/packages/client-twitter/src/__tests__/fixtures/images/test.jpg differ diff --git a/packages/client-twitter/src/__tests__/fixtures/images/test.png b/packages/client-twitter/src/__tests__/fixtures/images/test.png new file mode 100644 index 00000000000..11eb81857a6 Binary files /dev/null and b/packages/client-twitter/src/__tests__/fixtures/images/test.png differ diff --git a/packages/client-twitter/src/__tests__/twitter-client-image.test.ts b/packages/client-twitter/src/__tests__/twitter-client-image.test.ts new file mode 100644 index 00000000000..fd98acd46ba --- /dev/null +++ b/packages/client-twitter/src/__tests__/twitter-client-image.test.ts @@ -0,0 +1,341 @@ +import { describe, expect, test, beforeEach, vi, afterEach } from 'vitest'; +import { ClientBase } from '../base'; +import { TwitterPostClient } from '../post'; +import { IAgentRuntime, IImageDescriptionService, ServiceType, UUID } from '@elizaos/core'; +import { elizaLogger } from '@elizaos/core'; +import path from 'path'; + +describe('Twitter Client Image Tests', () => { + let client: ClientBase; + let postClient: TwitterPostClient; + let mockRuntime: IAgentRuntime; + let mockImageService: Partial; + + const mockTweetResponse = { + data: { + create_tweet: { + tweet_results: { + result: { + rest_id: '123456789', + legacy: { + full_text: 'Test tweet with image\nDescription for https://example.com/test.jpg', + conversation_id_str: '123456789', + created_at: '2024-01-01T00:00:00.000Z', + in_reply_to_status_id_str: null + } + } + } + } + } + }; + + beforeEach(() => { + // Add logging for test setup + elizaLogger.debug('Setting up test mocks and configuration...'); + + // Mock the image description service + mockImageService = { + serviceType: ServiceType.IMAGE_DESCRIPTION, + initialize: vi.fn().mockImplementation(() => { + elizaLogger.debug('Initializing mock image service'); + return Promise.resolve(); + }), + describeImage: vi.fn().mockImplementation(async (url: string) => { + elizaLogger.debug(`Describing image: ${url}`); + return { + title: 'Test Image', + description: `Description for ${url}` + }; + }) + }; + + // Mock the runtime with logging + elizaLogger.debug('Setting up mock runtime...'); + mockRuntime = { + getService: vi.fn().mockImplementation((type: ServiceType) => { + elizaLogger.debug(`Getting service of type: ${type}`); + return mockImageService; + }), + cacheManager: { + get: vi.fn(), + set: vi.fn() + }, + character: { + style: { + all: [], + post: [] + } + }, + messageManager: { + createMemory: vi.fn(), + getMemoryById: vi.fn() + }, + ensureRoomExists: vi.fn(), + ensureParticipantInRoom: vi.fn(), + ensureUserExists: vi.fn(), + agentId: '123' as UUID + } as any; + + // Mock Twitter config + elizaLogger.debug('Setting up Twitter client configuration...'); + const mockConfig = { + TWITTER_USERNAME: 'test_user', + TWITTER_PASSWORD: 'test_pass', + TWITTER_EMAIL: 'test@example.com', + TWITTER_2FA_SECRET: '', + TWITTER_RETRY_LIMIT: 3, + POST_INTERVAL_MIN: 1, + POST_INTERVAL_MAX: 2, + ENABLE_ACTION_PROCESSING: false, + ACTION_INTERVAL: 5, + POST_IMMEDIATELY: false, + TWITTER_SEARCH_ENABLE: false, + TWITTER_DRY_RUN: true, + MAX_TWEET_LENGTH: 280 + }; + + // Initialize clients with logging + elizaLogger.debug('Initializing Twitter clients...'); + client = new ClientBase(mockRuntime, mockConfig); + client.profile = { + id: '123', + username: 'test_user', + screenName: 'Test User', + bio: 'Test bio', + nicknames: [] + }; + + // Mock Twitter client methods with logging + elizaLogger.debug('Setting up Twitter client mock methods...'); + const mockSendTweet = vi.fn().mockImplementation(async (text: string) => { + elizaLogger.debug('Mock sending tweet with text:', text); + return { + json: () => Promise.resolve({ + data: { + create_tweet: { + tweet_results: { + result: { + rest_id: '123456789', + legacy: { + full_text: text, + conversation_id_str: '123456789', + created_at: '2024-01-01T00:00:00.000Z', + in_reply_to_status_id_str: null + } + } + } + } + } + }) + }; + }); + + const mockSendNoteTweet = vi.fn().mockImplementation(async (text: string) => { + elizaLogger.debug('Mock sending note tweet with text:', text); + return { + data: { + notetweet_create: { + tweet_results: { + result: { + rest_id: '123456789', + legacy: { + full_text: text, + conversation_id_str: '123456789', + created_at: '2024-01-01T00:00:00.000Z', + in_reply_to_status_id_str: null + } + } + } + } + } + }; + }); + + (client as any).twitterClient = { + sendTweet: mockSendTweet, + sendNoteTweet: mockSendNoteTweet + }; + + postClient = new TwitterPostClient(client, mockRuntime); + + // Mock logger + vi.spyOn(elizaLogger, 'log'); + vi.spyOn(elizaLogger, 'error'); + vi.spyOn(elizaLogger, 'debug'); + + elizaLogger.debug('Test setup complete'); + }); + + afterEach(() => { + vi.clearAllMocks(); + }); + + test('should process tweet with single image', async () => { + elizaLogger.debug('Starting single image tweet test'); + const content = 'Test tweet with image https://example.com/test.jpg'; + const roomId = '123' as UUID; + + elizaLogger.debug(`Posting tweet with content: ${content}`); + const result = await postClient.postTweet( + mockRuntime, + client, + content, + roomId, + content, + 'test_user' + ); + + elizaLogger.debug('Verifying image description service call'); + expect(mockImageService.describeImage).toHaveBeenCalledWith('https://example.com/test.jpg'); + + elizaLogger.debug('Verifying tweet content'); + expect((client as any).twitterClient.sendTweet).toHaveBeenCalledWith( + expect.stringContaining('Test tweet with image'), + undefined + ); + + elizaLogger.debug('Verifying result structure'); + expect(result).toBeDefined(); + expect(result.text).toContain('Description for https://example.com/test.jpg'); + elizaLogger.debug('Single image tweet test completed'); + }); + + test('should process tweet with multiple images', async () => { + const content = 'Test tweet with images https://example.com/1.jpg https://example.com/2.png'; + const roomId = '123' as UUID; + + const result = await postClient.postTweet( + mockRuntime, + client, + content, + roomId, + content, + 'test_user' + ); + + expect(mockImageService.describeImage).toHaveBeenCalledTimes(2); + expect(mockImageService.describeImage).toHaveBeenCalledWith('https://example.com/1.jpg'); + expect(mockImageService.describeImage).toHaveBeenCalledWith('https://example.com/2.png'); + expect(result.text).toContain('Image 1:'); + expect(result.text).toContain('Image 2:'); + }); + + test('should handle image description errors gracefully', async () => { + mockImageService.describeImage = vi.fn() + .mockRejectedValueOnce(new Error('Failed to process image')) + .mockResolvedValueOnce({ title: 'Test', description: 'Success' }); + + const content = 'Test tweet with images https://example.com/fail.jpg https://example.com/success.jpg'; + const roomId = '123' as UUID; + + const result = await postClient.postTweet( + mockRuntime, + client, + content, + roomId, + content, + 'test_user' + ); + + expect(elizaLogger.error).toHaveBeenCalledWith( + 'Failed to generate image description:', + expect.objectContaining({ + url: 'https://example.com/fail.jpg', + error: 'Failed to process image' + }) + ); + expect(result.text).toContain('Success'); + }); + + test('should handle long tweets with images', async () => { + const longContent = 'A'.repeat(280) + ' https://example.com/test.jpg'; + const roomId = '123' as UUID; + + const result = await postClient.postTweet( + mockRuntime, + client, + longContent, + roomId, + longContent, + 'test_user' + ); + + expect(mockImageService.describeImage).toHaveBeenCalled(); + expect((client as any).twitterClient.sendNoteTweet).toHaveBeenCalled(); + expect(result).toBeDefined(); + }); + + test('should handle network errors when posting tweets', async () => { + elizaLogger.debug('Starting network error test'); + + // Setup mock error + const mockError = new Error('Error posting tweet: Network error'); + elizaLogger.debug('Setting up mock network error:', mockError); + (client as any).twitterClient.sendTweet = vi.fn().mockRejectedValue(mockError); + + const content = 'Test tweet with image https://example.com/test.jpg'; + const roomId = '123' as UUID; + + elizaLogger.debug('Attempting to post tweet that should fail'); + try { + await postClient.postTweet( + mockRuntime, + client, + content, + roomId, + content, + 'test_user' + ); + throw new Error('Expected tweet posting to fail'); + } catch (error) { + elizaLogger.debug('Caught expected error:', error); + expect(error.message).toContain('Error posting tweet'); + expect(elizaLogger.error).toHaveBeenCalledWith( + 'Error posting tweet:', + expect.any(Error) + ); + } + + elizaLogger.debug('Network error test completed'); + }); + + test('should handle malformed tweet responses', async () => { + elizaLogger.debug('Starting malformed response test'); + + // Setup mock malformed response + elizaLogger.debug('Setting up mock malformed response'); + const malformedResponse = { data: null }; + (client as any).twitterClient.sendTweet = vi.fn().mockImplementation(() => { + elizaLogger.debug('Mock sending tweet with malformed response'); + elizaLogger.error('Error sending tweet; Bad response:', malformedResponse); + return Promise.resolve({ + json: () => { + elizaLogger.debug('Returning malformed response:', malformedResponse); + return Promise.resolve(malformedResponse); + } + }); + }); + + const content = 'Test tweet with image https://example.com/test.jpg'; + const roomId = '123' as UUID; + + elizaLogger.debug('Attempting to post tweet with malformed response'); + const result = await postClient.postTweet( + mockRuntime, + client, + content, + roomId, + content, + 'test_user' + ); + + elizaLogger.debug('Verifying error handling for malformed response'); + expect(result).toBeUndefined(); + expect(elizaLogger.error).toHaveBeenCalledWith( + 'Error sending tweet; Bad response:', + malformedResponse + ); + + elizaLogger.debug('Malformed response test completed'); + }); +}); \ No newline at end of file diff --git a/packages/client-twitter/src/base.ts b/packages/client-twitter/src/base.ts index f49516dc8c4..e061855cbd7 100644 --- a/packages/client-twitter/src/base.ts +++ b/packages/client-twitter/src/base.ts @@ -3,6 +3,7 @@ import { IAgentRuntime, IImageDescriptionService, Memory, + ServiceType, State, UUID, getEmbeddingZeroVector, @@ -149,6 +150,12 @@ export class ClientBase extends EventEmitter { ClientBase._twitterClients[username] = this.twitterClient; } + // Initialize image description service + this.imageDescriptionService = runtime.getService(ServiceType.IMAGE_DESCRIPTION) as IImageDescriptionService; + if (!this.imageDescriptionService) { + throw new Error("Image description service not found"); + } + this.directions = "- " + this.runtime.character.style.all.join("\n- ") + diff --git a/packages/client-twitter/src/post.ts b/packages/client-twitter/src/post.ts index e0aff4b3a61..6a646874146 100644 --- a/packages/client-twitter/src/post.ts +++ b/packages/client-twitter/src/post.ts @@ -462,8 +462,45 @@ export class TwitterPostClient { try { elizaLogger.log(`Posting new tweet:\n`); - let result; + // Extract image URLs from the content + const imageUrlRegex = /(https?:\/\/[^\s]+\.(?:jpg|jpeg|png|gif|webp))/gi; + const imageUrls = cleanedContent.match(imageUrlRegex) || []; + + // Process images if any + let imageDescriptions: string[] = []; + if (imageUrls.length > 0) { + elizaLogger.debug('Processing images in tweet:', { + imageCount: imageUrls.length, + urls: imageUrls + }); + + // Get descriptions for each image + for (const imageUrl of imageUrls) { + try { + const { description } = await client.imageDescriptionService.describeImage(imageUrl); + imageDescriptions.push(description); + elizaLogger.debug('Generated image description:', { + url: imageUrl, + description + }); + } catch (error) { + elizaLogger.error('Failed to generate image description:', { + url: imageUrl, + error: error instanceof Error ? error.message : 'Unknown error' + }); + // Continue with other images even if one fails + } + } + } + + // Add image descriptions to the tweet content if available + if (imageDescriptions.length > 0) { + const descriptions = imageDescriptions.map((desc, i) => `Image ${i + 1}: ${desc}`).join('\n'); + cleanedContent = `${cleanedContent}\n\n${descriptions}`; + elizaLogger.debug('Added image descriptions to tweet content'); + } + let result; if (cleanedContent.length > DEFAULT_MAX_TWEET_LENGTH) { result = await this.handleNoteTweet( client, @@ -474,21 +511,27 @@ export class TwitterPostClient { result = await this.sendStandardTweet(client, cleanedContent); } - const tweet = this.createTweetObject( - result, - client, - twitterUsername - ); + if (result) { + const tweet = this.createTweetObject( + result, + client, + twitterUsername + ); - await this.processAndCacheTweet( - runtime, - client, - tweet, - roomId, - newTweetContent - ); + await this.processAndCacheTweet( + runtime, + client, + tweet, + roomId, + newTweetContent + ); + + elizaLogger.log("Tweet posted successfully:", tweet.permanentUrl); + return tweet; + } } catch (error) { - elizaLogger.error("Error sending tweet:", error); + elizaLogger.error("Error posting tweet:", error); + throw error; } } diff --git a/packages/plugin-node/src/__tests__/fixtures/images/test.bmp b/packages/plugin-node/src/__tests__/fixtures/images/test.bmp new file mode 100644 index 00000000000..ea88c92db94 Binary files /dev/null and b/packages/plugin-node/src/__tests__/fixtures/images/test.bmp differ diff --git a/packages/plugin-node/src/__tests__/fixtures/images/test.gif b/packages/plugin-node/src/__tests__/fixtures/images/test.gif new file mode 100644 index 00000000000..9df64778b72 Binary files /dev/null and b/packages/plugin-node/src/__tests__/fixtures/images/test.gif differ diff --git a/packages/plugin-node/src/__tests__/fixtures/images/test.jpg b/packages/plugin-node/src/__tests__/fixtures/images/test.jpg new file mode 100644 index 00000000000..09c9161f41a Binary files /dev/null and b/packages/plugin-node/src/__tests__/fixtures/images/test.jpg differ diff --git a/packages/plugin-node/src/__tests__/fixtures/images/test.png b/packages/plugin-node/src/__tests__/fixtures/images/test.png new file mode 100644 index 00000000000..11eb81857a6 Binary files /dev/null and b/packages/plugin-node/src/__tests__/fixtures/images/test.png differ diff --git a/packages/plugin-node/src/__tests__/image.service.real.test.ts b/packages/plugin-node/src/__tests__/image.service.real.test.ts new file mode 100644 index 00000000000..abff71c59d5 --- /dev/null +++ b/packages/plugin-node/src/__tests__/image.service.real.test.ts @@ -0,0 +1,152 @@ +import { describe, expect, test, beforeAll, afterAll, vi } from 'vitest'; +import { ImageDescriptionService } from '../services/image'; +import path from 'path'; +import fs from 'fs'; +import { elizaLogger } from '@elizaos/core'; + +describe('ImageDescriptionService Real Tests', () => { + let service: ImageDescriptionService; + const testImagesDir = path.join(__dirname, 'fixtures', 'images'); + const mockRuntime = { + getSetting: (key: string) => { + if (key === 'OPENAI_API_KEY') return 'add here your key'; + return ''; + }, + character: { + modelProvider: 'openai' + }, + imageModelProvider: 'openai' + }; + + beforeAll(async () => { + // Mock elizaLogger.error + vi.spyOn(elizaLogger, 'error'); + vi.spyOn(elizaLogger, 'debug'); + + // Ensure test images directory exists + if (!fs.existsSync(testImagesDir)) { + fs.mkdirSync(testImagesDir, { recursive: true }); + } + + // Verify test images exist + const requiredImages = ['test.png', 'test.jpg', 'test.gif', 'test.bmp']; + const missingImages = requiredImages.filter(img => !fs.existsSync(path.join(testImagesDir, img))); + if (missingImages.length > 0) { + throw new Error(`Missing test images: ${missingImages.join(', ')}`); + } + + service = new ImageDescriptionService(); + await service.initialize(mockRuntime as any); + }); + + afterAll(async () => { + // Cleanup any temporary files created during tests + const tempFiles = fs.readdirSync(testImagesDir).filter(f => f.startsWith('gif_frame_') || f === 'corrupted.jpg'); + tempFiles.forEach(file => { + const filePath = path.join(testImagesDir, file); + if (fs.existsSync(filePath)) { + fs.unlinkSync(filePath); + } + }); + + vi.restoreAllMocks(); + }); + + describe('Real Image Processing', () => { + const verifyImageExists = (imagePath: string) => { + if (!fs.existsSync(imagePath)) { + throw new Error(`Test image not found: ${imagePath}`); + } + }; + + const validateImageDescription = (result: { title: string; description: string }) => { + expect(result).toHaveProperty('title'); + expect(result).toHaveProperty('description'); + expect(result.title.length).toBeGreaterThan(0); + expect(result.description.length).toBeGreaterThan(0); + expect(typeof result.title).toBe('string'); + expect(typeof result.description).toBe('string'); + }; + + test('should process a real PNG image with colors', async () => { + const imagePath = path.join(testImagesDir, 'test.png'); + verifyImageExists(imagePath); + + const result = await service.describeImage(imagePath); + validateImageDescription(result); + + // Should mention dice since that's what's in our test image + expect(result.description.toLowerCase()).toMatch(/dice|die/); + }, 30000); + + test('should process a real JPEG image with proper compression', async () => { + const imagePath = path.join(testImagesDir, 'test.jpg'); + verifyImageExists(imagePath); + + const result = await service.describeImage(imagePath); + validateImageDescription(result); + }, 30000); + + test('should process a real GIF image and extract first frame', async () => { + const imagePath = path.join(testImagesDir, 'test.gif'); + verifyImageExists(imagePath); + + const result = await service.describeImage(imagePath); + validateImageDescription(result); + + // Verify temp file cleanup + const tempFiles = fs.readdirSync(testImagesDir).filter(f => f.startsWith('gif_frame_')); + expect(tempFiles.length).toBe(0); + }, 30000); + + test('should reject an unsupported BMP image with clear error', async () => { + const imagePath = path.join(testImagesDir, 'test.bmp'); + verifyImageExists(imagePath); + + await expect(service.describeImage(imagePath)) + .rejects + .toThrow('Unsupported image format'); + + // Verify error was logged + expect(elizaLogger.error).toHaveBeenCalledWith( + 'Image format validation failed:', + expect.objectContaining({ + url: imagePath, + error: expect.stringContaining('Unsupported image format') + }) + ); + }); + + test('should handle real network image URLs with proper timeout', async () => { + const imageUrl = 'https://raw.githubusercontent.com/microsoft/vscode/main/resources/linux/code.png'; + + const result = await service.describeImage(imageUrl); + validateImageDescription(result); + }, 45000); // Increased timeout for network request + + test('should handle network timeouts gracefully', async () => { + const imageUrl = 'https://example.com/nonexistent.jpg'; + + await expect(service.describeImage(imageUrl)) + .rejects + .toThrow(/Failed to fetch image|Network error/); + }, 30000); + + test('should handle malformed image data', async () => { + // Create a corrupted image file + const corruptedImagePath = path.join(testImagesDir, 'corrupted.jpg'); + fs.writeFileSync(corruptedImagePath, Buffer.from('not an image')); + + try { + await expect(service.describeImage(corruptedImagePath)) + .rejects + .toThrow(/You uploaded an unsupported image|Invalid image format/); + } finally { + // Cleanup + if (fs.existsSync(corruptedImagePath)) { + fs.unlinkSync(corruptedImagePath); + } + } + }); + }); +}); \ No newline at end of file diff --git a/packages/plugin-node/src/services/image.ts b/packages/plugin-node/src/services/image.ts index 55c29db6d14..dfd9ea29b00 100644 --- a/packages/plugin-node/src/services/image.ts +++ b/packages/plugin-node/src/services/image.ts @@ -1,4 +1,4 @@ -import { elizaLogger, getEndpoint, models } from "@elizaos/core"; +import { elizaLogger, models } from "@elizaos/core"; import { Service } from "@elizaos/core"; import { IAgentRuntime, @@ -28,6 +28,14 @@ export class ImageDescriptionService { static serviceType: ServiceType = ServiceType.IMAGE_DESCRIPTION; + private static readonly SUPPORTED_FORMATS = { + '.png': 'image/png', + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.gif': 'image/gif', + '.webp': 'image/webp' + }; + private modelId: string = "onnx-community/Florence-2-base-ft"; private device: string = "gpu"; private model: PreTrainedModel | null = null; @@ -133,32 +141,62 @@ export class ImageDescriptionService private async recognizeWithOpenAI( imageUrl: string ): Promise<{ title: string; description: string }> { + elizaLogger.debug(`Attempting to recognize image: ${imageUrl}`); + + const { isValid, mimeType } = this.validateImageFormat(imageUrl); + if (!isValid) { + const supportedFormats = Object.keys(ImageDescriptionService.SUPPORTED_FORMATS) + .map(ext => ext.slice(1)) + .join(', '); + const errorMessage = `Unsupported image format for ${imageUrl}. Please use one of the following formats: ${supportedFormats}`; + elizaLogger.error('Image format validation failed:', { + url: imageUrl, + supportedFormats, + error: errorMessage + }); + throw new Error(errorMessage); + } + elizaLogger.debug(`Image format validated successfully: ${mimeType}`); + const isGif = imageUrl.toLowerCase().endsWith(".gif"); let imageData: Buffer | null = null; try { if (isGif) { - const { filePath } = - await this.extractFirstFrameFromGif(imageUrl); + elizaLogger.debug('Processing GIF image, extracting first frame'); + const { filePath } = await this.extractFirstFrameFromGif(imageUrl); imageData = fs.readFileSync(filePath); + elizaLogger.debug('Successfully extracted first frame from GIF'); } else if (fs.existsSync(imageUrl)) { + elizaLogger.debug('Reading local image file'); imageData = fs.readFileSync(imageUrl); } else { + elizaLogger.debug('Fetching remote image'); const response = await fetch(imageUrl); if (!response.ok) { - throw new Error( - `Failed to fetch image: ${response.statusText}` - ); + const errorMessage = `Failed to fetch image: ${response.statusText} (${response.status})`; + elizaLogger.error('Image fetch failed:', { + url: imageUrl, + status: response.status, + statusText: response.statusText + }); + throw new Error(errorMessage); } imageData = Buffer.from(await response.arrayBuffer()); + elizaLogger.debug('Successfully fetched remote image'); } if (!imageData || imageData.length === 0) { - throw new Error("Failed to fetch image data"); + const errorMessage = "Failed to fetch image data: Empty response"; + elizaLogger.error('Image data validation failed:', { + url: imageUrl, + error: errorMessage + }); + throw new Error(errorMessage); } - const prompt = - "Describe this image and give it a title. The first line should be the title, and then a line break, then a detailed description of the image. Respond with the format 'title\ndescription'"; + const prompt = "Describe this image and give it a title. The first line should be the title, and then a line break, then a detailed description of the image. Respond with the format 'title\\ndescription'"; + elizaLogger.debug('Sending image to OpenAI for recognition'); const text = await this.requestOpenAI( imageUrl, imageData, @@ -168,12 +206,22 @@ export class ImageDescriptionService ); const [title, ...descriptionParts] = text.split("\n"); + elizaLogger.debug('Successfully generated image description', { + titleLength: title.length, + descriptionLength: descriptionParts.join("\n").length + }); + return { title, description: descriptionParts.join("\n"), }; } catch (error) { - elizaLogger.error("Error in recognizeWithOpenAI:", error); + const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred'; + elizaLogger.error('Error in recognizeWithOpenAI:', { + url: imageUrl, + error: errorMessage, + stack: error instanceof Error ? error.stack : undefined + }); throw error; } } @@ -186,22 +234,29 @@ export class ImageDescriptionService isLocalFile: boolean = false ): Promise { for (let attempt = 0; attempt < 3; attempt++) { + elizaLogger.debug(`OpenAI API request attempt ${attempt + 1}/3`); try { - const shouldUseBase64 = - (isGif || isLocalFile) && - !( - this.runtime.imageModelProvider === - ModelProviderName.OPENAI - ); - const mimeType = isGif - ? "png" - : path.extname(imageUrl).slice(1) || "jpeg"; + const shouldUseBase64 = isGif || isLocalFile; + const { mimeType } = this.validateImageFormat(imageUrl); + if (!mimeType) { + const errorMessage = "Invalid image format detected during OpenAI request"; + elizaLogger.error('MIME type validation failed:', { + url: imageUrl, + error: errorMessage + }); + throw new Error(errorMessage); + } const base64Data = imageData.toString("base64"); const imageUrlToUse = shouldUseBase64 - ? `data:image/${mimeType};base64,${base64Data}` + ? `data:${mimeType};base64,${base64Data}` : imageUrl; + elizaLogger.debug('Preparing OpenAI API request', { + isBase64: shouldUseBase64, + mimeType + }); + const content = [ { type: "text", text: prompt }, { @@ -211,11 +266,10 @@ export class ImageDescriptionService }, }, ]; - // If model provider is openai, use the endpoint, otherwise use the default openai endpoint. - const endpoint = - this.runtime.imageModelProvider === ModelProviderName.OPENAI - ? getEndpoint(this.runtime.imageModelProvider) - : "https://api.openai.com/v1"; + + const endpoint = models[this.runtime.imageModelProvider].endpoint ?? "https://api.openai.com/v1"; + elizaLogger.debug(`Using OpenAI endpoint: ${endpoint}`); + const response = await fetch(endpoint + "/chat/completions", { method: "POST", headers: { @@ -231,30 +285,44 @@ export class ImageDescriptionService if (!response.ok) { const responseText = await response.text(); - elizaLogger.error( - "OpenAI API error:", - response.status, - "-", - responseText - ); - throw new Error(`HTTP error! status: ${response.status}`); + elizaLogger.error('OpenAI API error:', { + status: response.status, + response: responseText, + attempt: attempt + 1 + }); + throw new Error(`OpenAI API error (${response.status}): ${responseText}`); } const data = await response.json(); - return data.choices[0].message.content; + elizaLogger.debug('Successfully received OpenAI API response', { + status: response.status, + hasChoices: !!data.choices, + choicesLength: data.choices?.length, + firstChoice: data.choices?.[0], + rawResponse: data + }); + const responseContent = data.choices[0].message.content; + elizaLogger.debug('Extracted content from response', { + content: responseContent, + contentLength: responseContent.length + }); + return responseContent; } catch (error) { - elizaLogger.error( - "OpenAI request failed (attempt", - attempt + 1, - "):", - error - ); + const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred'; + elizaLogger.error('OpenAI request failed:', { + attempt: attempt + 1, + error: errorMessage, + stack: error instanceof Error ? error.stack : undefined + }); if (attempt === 2) throw error; } } - throw new Error( - "Failed to recognize image with OpenAI after 3 attempts" - ); + const finalError = "Failed to recognize image with OpenAI after 3 attempts"; + elizaLogger.error('All OpenAI API attempts failed', { + url: imageUrl, + error: finalError + }); + throw new Error(finalError); } private async processQueue(): Promise { @@ -343,6 +411,15 @@ export class ImageDescriptionService writeStream.on("error", reject); }); } + + private validateImageFormat(imageUrl: string): { isValid: boolean; mimeType: string | null } { + const extension = path.extname(imageUrl).toLowerCase(); + const mimeType = ImageDescriptionService.SUPPORTED_FORMATS[extension]; + return { + isValid: !!mimeType, + mimeType: mimeType || null + }; + } } export default ImageDescriptionService;