@@ -58,9 +58,9 @@ Then, define the inference parameters by setting the appropriate prompt / seed /
58
58
prompt = " a futuristic castle surrounded by a forest, mountains in the background"
59
59
seed = 42
60
60
sdxl.set_inference_steps(50 , first_step = 0 )
61
- sdxl.set_self_attention_guidance(
62
- enable = True , scale = 0.75
63
- ) # Enable self-attention guidance to enhance the quality of the generated images
61
+
62
+ # Enable self-attention guidance to enhance the quality of the generated images
63
+ sdxl.set_self_attention_guidance( enable = True , scale = 0.75 )
64
64
65
65
# ... Inference process
66
66
@@ -76,10 +76,10 @@ with no_grad(): # Disable gradient calculation for memory-efficient inference
76
76
)
77
77
time_ids = sdxl.default_time_ids
78
78
79
- manual_seed(seed = seed )
79
+ manual_seed(seed)
80
80
81
- # Using a higher latents inner dim to improve resolution of generated images
82
- x = torch.randn( size = ( 1 , 4 , 256 , 256 ), device = sdxl.device, dtype = sdxl.dtype)
81
+ # SDXL typically generates 1024x1024, here we use a higher resolution.
82
+ x = sdxl.init_latents(( 2048 , 2048 )).to( sdxl.device, sdxl.dtype)
83
83
84
84
# Diffusion process
85
85
for step in sdxl.steps:
@@ -131,8 +131,8 @@ predicted_image.save("vanilla_sdxl.png")
131
131
132
132
manual_seed(seed=seed)
133
133
134
- # Using a higher latents inner dim to improve resolution of generated images
135
- x = torch.randn(size=(1, 4, 256, 256), device= sdxl.device, dtype= sdxl.dtype)
134
+ # SDXL typically generates 1024x1024, here we use a higher resolution.
135
+ x = sdxl.init_latents((2048, 2048)).to( sdxl.device, sdxl.dtype)
136
136
137
137
# Diffusion process
138
138
for step in sdxl.steps:
@@ -213,8 +213,8 @@ manager.add_loras("scifi-lora", tensors=scifi_lora_weights)
213
213
214
214
manual_seed(seed=seed)
215
215
216
- # Using a higher latents inner dim to improve resolution of generated images
217
- x = torch.randn(size=(1, 4, 256, 256), device= sdxl.device, dtype= sdxl.dtype)
216
+ # SDXL typically generates 1024x1024, here we use a higher resolution.
217
+ x = sdxl.init_latents((2048, 2048)).to( sdxl.device, sdxl.dtype)
218
218
219
219
# Diffusion process
220
220
for step in sdxl.steps:
@@ -304,8 +304,8 @@ manager.add_loras("pixel-art-lora", load_from_safetensors("pixel-art-xl-v1.1.saf
304
304
305
305
manual_seed(seed=seed)
306
306
307
- # Using a higher latents inner dim to improve resolution of generated images
308
- x = torch.randn(size=(1, 4, 256, 256), device= sdxl.device, dtype= sdxl.dtype)
307
+ # SDXL typically generates 1024x1024, here we use a higher resolution.
308
+ x = sdxl.init_latents((2048, 2048)).to( sdxl.device, sdxl.dtype)
309
309
310
310
# Diffusion process
311
311
for step in sdxl.steps:
@@ -440,7 +440,7 @@ with torch.no_grad():
440
440
ip_adapter.set_clip_image_embedding(clip_image_embedding)
441
441
442
442
manual_seed(seed=seed)
443
- x = torch.randn(size=(1, 4, 128, 128), device= sdxl.device, dtype= sdxl.dtype)
443
+ x = sdxl.init_latents((1024, 1024)).to( sdxl.device, sdxl.dtype)
444
444
445
445
# Diffusion process
446
446
for step in sdxl.steps:
@@ -578,7 +578,7 @@ with torch.no_grad():
578
578
t2i_adapter.set_condition_features(features=t2i_adapter.compute_condition_features(condition))
579
579
580
580
manual_seed(seed=seed)
581
- x = torch.randn(size=(1, 4, 128, 128), device= sdxl.device, dtype= sdxl.dtype)
581
+ x = sdxl.init_latents((1024, 1024)).to( sdxl.device, sdxl.dtype)
582
582
583
583
# Diffusion process
584
584
for step in sdxl.steps:
0 commit comments