1
1
// TODO: Replace with the vercel ai sdk and support all providers
2
- import Anthropic from "@anthropic-ai/sdk" ;
3
2
import { Buffer } from 'buffer' ;
4
3
import Together from "together-ai" ;
4
+ import { IAgentRuntime } from "../core/types" ;
5
+ import { getModel , ImageGenModel } from "../core/imageGenModels.ts" ;
6
+ import OpenAI from "openai" ;
5
7
6
8
export const generateImage = async ( data : {
7
- apiKey : string ,
8
9
prompt : string ,
9
10
width : number ,
10
11
height : number ,
11
- steps ?: number ,
12
12
count ?: number
13
- } ) : Promise < {
13
+ } , runtime : IAgentRuntime ) : Promise < {
14
14
success : boolean ,
15
15
data ?: string [ ] ,
16
16
error ?: any
17
17
} > => {
18
- const { apiKey, prompt, width, height } = data ;
19
- let { steps, count } = data ;
20
- if ( ! steps ) {
21
- steps = 4 ;
22
- }
18
+ const { prompt, width, height } = data ;
19
+ let { count } = data ;
23
20
if ( ! count ) {
24
21
count = 1 ;
25
22
}
26
23
24
+ const imageGenModel = runtime . imageGenModel ;
25
+ const model = getModel ( imageGenModel ) ;
26
+ const apiKey = imageGenModel === ImageGenModel . TogetherAI ? runtime . getSetting ( "TOGETHER_API_KEY" ) : runtime . getSetting ( "OPENAI_API_KEY" ) ;
27
+
27
28
try {
29
+ if ( imageGenModel === ImageGenModel . TogetherAI ) {
28
30
const together = new Together ( { apiKey } ) ;
29
31
const response = await together . images . create ( {
30
32
model : "black-forest-labs/FLUX.1-schnell" ,
31
33
prompt,
32
34
width,
33
35
height,
34
- steps,
36
+ steps : model . steps ,
35
37
n : count ,
36
38
} ) ;
37
39
const urls : string [ ] = [ ] ;
@@ -48,63 +50,37 @@ export const generateImage = async (data: {
48
50
base64 = "data:image/jpeg;base64," + base64 ;
49
51
return base64 ;
50
52
} ) ) ;
51
- return { success : true , data : base64s } ;
53
+ return { success : true , data : base64s } ;
54
+ } else {
55
+ let targetSize = `${ width } x${ height } ` ;
56
+ if ( targetSize !== "1024x1024" && targetSize !== "1792x1024" && targetSize !== "1024x1792" ) {
57
+ targetSize = "1024x1024" ;
58
+ }
59
+ const openai = new OpenAI ( { apiKey } ) ;
60
+ const response = await openai . images . generate ( {
61
+ model : model . subModel ,
62
+ prompt,
63
+ size : targetSize as "1024x1024" | "1792x1024" | "1024x1792" ,
64
+ n : count ,
65
+ response_format : "b64_json" ,
66
+ } ) ;
67
+ const base64s = response . data . map ( ( image ) => `data:image/png;base64,${ image . b64_json } ` ) ;
68
+ return { success : true , data : base64s } ;
69
+ }
52
70
} catch ( error ) {
53
71
console . error ( error ) ;
54
72
return { success : false , error : error } ;
55
73
}
56
74
} ;
57
75
58
- export const generateCaption = async ( data : { apiKey : string , imageUrl : string } ) => {
59
- const { apiKey, imageUrl } = data ;
60
-
61
- try {
62
- const anthropic = new Anthropic ( {
63
- apiKey,
64
- } ) ;
65
-
66
- const base64Data = imageUrl . replace ( / ^ d a t a : i m a g e \/ \w + ; b a s e 6 4 , / , "" ) ;
67
- const buffer = Buffer . from ( base64Data , 'base64' ) ;
68
- const imageType = detectImageType ( buffer ) ;
69
-
70
- if ( ! imageType ) {
71
- throw new Error ( "Invalid image data" ) ;
72
- }
73
-
74
- const response = await anthropic . messages . create ( {
75
- model : "claude-3-5-sonnet-20240620" ,
76
- max_tokens : 8192 ,
77
- temperature : 0 ,
78
- messages : [
79
- {
80
- role : "user" ,
81
- content : [
82
- { type : "text" , text : "What do you see in this image? Generate a caption for it! Keep it short, max one phrase. Caption:" } ,
83
- //@ts -ignore
84
- { type : "image" , source : { data : base64Data , media_type : `image/${ imageType } ` , type : "base64" } }
85
- ]
86
- } ,
87
- ] ,
88
- tools : [ ] ,
89
- } ) ;
90
-
91
- const responseContent = ( ( response . content [ 0 ] as any ) . text as string ) . replace ( "Caption:" , "" ) . trim ( ) ;
92
- return { success : true , caption : responseContent } ;
93
- } catch ( error ) {
94
- console . error ( error ) ;
95
- return { success : false , error : error , caption : "" } ;
96
- }
97
- }
98
-
99
- function detectImageType ( buffer : Buffer ) : string | null {
100
- if ( buffer [ 0 ] === 0xFF && buffer [ 1 ] === 0xD8 && buffer [ 2 ] === 0xFF ) {
101
- return 'jpeg' ;
102
- } else if ( buffer [ 0 ] === 0x89 && buffer [ 1 ] === 0x50 && buffer [ 2 ] === 0x4E && buffer [ 3 ] === 0x47 ) {
103
- return 'png' ;
104
- } else if ( buffer [ 0 ] === 0x47 && buffer [ 1 ] === 0x49 && buffer [ 2 ] === 0x46 ) {
105
- return 'gif' ;
106
- } else if ( buffer [ 0 ] === 0x42 && buffer [ 1 ] === 0x4D ) {
107
- return 'bmp' ;
108
- }
109
- return null ;
76
+ export const generateCaption = async ( data : { imageUrl : string } , runtime : IAgentRuntime ) : Promise < {
77
+ title : string ,
78
+ description : string
79
+ } > => {
80
+ const { imageUrl } = data ;
81
+ const resp = await runtime . imageDescriptionService . describeImage ( imageUrl ) ;
82
+ return {
83
+ title : resp . title . trim ( ) ,
84
+ description : resp . description . trim ( )
85
+ } ;
110
86
}
0 commit comments