In [1]:
# importing
import torch

# for parameters ui
import ipywidgets as widgets
# for saving results
import shutil
from datetime import datetime
# for image_grid
from PIL import Image
# for prompt embeddings
from compel import Compel, ReturnedEmbeddingsType

# hugging face cache directory
CACHE_DIR = "D:\HuggingFaceCache"

# import models, schedulers and etc
from diffusers import StableDiffusionXLPipeline, StableDiffusionXLImg2ImgPipeline
from diffusers import EulerDiscreteScheduler, DDIMScheduler, LMSDiscreteScheduler

In [2]:
# ui
style = {'description_width': 'initial'}

# prompts
prompt1_text_area = widgets.Textarea(value='', placeholder='Type positive...', description='Prompt1:', style=style)
display(prompt1_text_area)
prompt2_text_area = widgets.Textarea(value='', placeholder='Type positive...', description='Prompt2:', style=style)
display(prompt2_text_area)
negative_prompt1_text_area = widgets.Textarea(value='', placeholder='Type negative...', description='Negative Prompt1:', style = style)
display(negative_prompt1_text_area)
negative_prompt2_text_area = widgets.Textarea(value='', placeholder='Type negative...', description='Negative Prompt2:', style = style)
display(negative_prompt2_text_area)
use_compel_checkbox = widgets.Checkbox(value=False, description="Use Compel", indent=False, style=style)
display(use_compel_checkbox)

# inference properties
num_inference_steps_slider = widgets.IntSlider(value=40, min=10, max=100, step=5, description="Num inference steps:", style=style)
display(num_inference_steps_slider)
width_slider = widgets.IntSlider(value=768, min=512, max=1024, step=64, description="Width:", style=style)
display(width_slider)
height_slider = widgets.IntSlider(value=768, min=512, max=1024, step=64, description="Height:", style=style)
display(height_slider)
guidance_scale_slider = widgets.FloatSlider(value=7.5, min=0, max=10, step=0.25, description="Guidance scale:", style=style)
display(guidance_scale_slider)
seed_slider = widgets.IntSlider(value=12345, min=0, max=1000000, step=1, description="Seed:", style=style)
display(seed_slider)
high_noise_frac_slider = widgets.FloatSlider(value=0.8, min=0, max=1, step=0.05, description="High noise frac:", style=style)
display(high_noise_frac_slider)

# refiner
use_refiner_checkbox = widgets.Checkbox(value=False, description="Use refiner", indent=False, style=style)
display(use_refiner_checkbox)



Textarea(value='', description='Prompt1:', placeholder='Type positive...', style=TextStyle(description_width='…

Textarea(value='', description='Prompt2:', placeholder='Type positive...', style=TextStyle(description_width='…

Textarea(value='', description='Negative Prompt1:', placeholder='Type negative...', style=TextStyle(descriptio…

Textarea(value='', description='Negative Prompt2:', placeholder='Type negative...', style=TextStyle(descriptio…

Checkbox(value=False, description='Use Compel', indent=False, style=CheckboxStyle(description_width='initial')…

IntSlider(value=40, description='Num inference steps:', min=10, step=5, style=SliderStyle(description_width='i…

IntSlider(value=768, description='Width:', max=1024, min=512, step=64, style=SliderStyle(description_width='in…

IntSlider(value=768, description='Height:', max=1024, min=512, step=64, style=SliderStyle(description_width='i…

FloatSlider(value=7.5, description='Guidance scale:', max=10.0, step=0.25, style=SliderStyle(description_width…

IntSlider(value=12345, description='Seed:', max=1000000, style=SliderStyle(description_width='initial'))

FloatSlider(value=0.8, description='High noise frac:', max=1.0, step=0.05, style=SliderStyle(description_width…

Checkbox(value=False, description='Use refiner', indent=False, style=CheckboxStyle(description_width='initial'…

In [3]:
# choose models, schedulers types and etc
torch_dtype=torch.float16
base_model = "stabilityai/stable-diffusion-xl-base-1.0"
refiner_model = "stabilityai/stable-diffusion-xl-refiner-1.0"
base_pipeline_type = StableDiffusionXLPipeline
refiner_pipeline_type = StableDiffusionXLImg2ImgPipeline
scheduler_type = LMSDiscreteScheduler
variant="fp16"
use_safetensors=True
#safety_checker = None

In [4]:
# load pipeline, set schelduler, to cuda, compel init and etc
base_pipe = base_pipeline_type.from_pretrained(base_model, cache_dir=CACHE_DIR, 
                                               torch_dtype=torch_dtype,
                                               variant=variant,
                                               use_safetensors=use_safetensors)


scheduler = scheduler_type.from_config(base_pipe.scheduler.config)
base_pipe.scheduler = scheduler
#base_pipe.unet = torch.compile(base_pipe.unet, mode="reduce-overhead", fullgraph=True) # not for windows

# base pipeline to CUDA
if(torch.cuda.is_available()):
    print("Base->CUDA...")
    base_pipe = base_pipe.to("cuda")
else:
    print("CUDA IS NOT AVAILABLE")
    

# if using torch < 2.0
# pipe.enable_xformers_memory_efficient_attention()

# Compels init (TODO: maybe should move "generate image" cell)
base_compel_1 = Compel(
    tokenizer=base_pipe.tokenizer,
    text_encoder=base_pipe.text_encoder,
    returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
    requires_pooled=False,
)
base_compel_2 = Compel(
    tokenizer=base_pipe.tokenizer_2,
    text_encoder=base_pipe.text_encoder_2,
    returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
    requires_pooled=True,
)

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Base->CUDA...


In [5]:
# generate image

# variables from ui
prompt = str(prompt1_text_area.value)
prompt_2 = str(prompt2_text_area.value)
negative_prompt = str(negative_prompt1_text_area.value)
negative_prompt_2 = str(negative_prompt2_text_area.value)
print(f"{prompt}\n{prompt_2}\n{negative_prompt}\n{negative_prompt_2}")

use_compel = use_compel_checkbox.value
num_inference_steps=num_inference_steps_slider.value
guidance_scale=guidance_scale_slider.value
output_type= "latent" if use_refiner_checkbox.value else "pil"
denoising_end=high_noise_frac_slider.value
width = width_slider.value
height = height_slider.value

# set embeds
base_positive_prompt_embeds_1 = base_compel_1(prompt)
base_positive_prompt_embeds_2, base_positive_prompt_pooled = base_compel_2(prompt_2)
base_negative_prompt_embeds_1 = base_compel_1(negative_prompt)
base_negative_prompt_embeds_2, base_negative_prompt_pooled = base_compel_2(negative_prompt_2)

# Pad the conditioning tensors to ensure thet they all have the same length
(base_positive_prompt_embeds_2, base_negative_prompt_embeds_2) = base_compel_2.pad_conditioning_tensors_to_same_length([base_positive_prompt_embeds_2, base_negative_prompt_embeds_2])

# Concatenate the cconditioning tensors corresponding to both the set of prompts
base_positive_prompt_embeds = torch.cat((base_positive_prompt_embeds_1, base_positive_prompt_embeds_2), dim=-1)
base_negative_prompt_embeds = torch.cat((base_negative_prompt_embeds_1, base_negative_prompt_embeds_2), dim=-1)

generator = torch.Generator("cuda").manual_seed(seed_slider.value)

# base
base_output = base_pipe(prompt=prompt if prompt != "" and not use_compel else None,
                        prompt_2 = prompt_2 if prompt_2 != "" and not use_compel else None,
                        negative_prompt = negative_prompt if negative_prompt != "" and not use_compel else None,
                        negative_prompt_2 = negative_prompt_2 if negative_prompt_2 != "" and not use_compel else None,
                        prompt_embeds=base_positive_prompt_embeds if base_positive_prompt_embeds != "" and use_compel else None,
                        pooled_prompt_embeds=base_positive_prompt_pooled if base_positive_prompt_pooled != "" and use_compel else None,
                        negative_prompt_embeds=base_negative_prompt_embeds if base_negative_prompt_embeds != "" and use_compel else None,
                        negative_pooled_prompt_embeds=base_negative_prompt_pooled if base_negative_prompt_pooled != "" and use_compel else None,
                        num_inference_steps=num_inference_steps,
                        generator=generator,
                        guidance_scale=guidance_scale,
                        output_type=output_type,
                        denoising_end=denoising_end,
                        width=width,
                        height=height)

torch.cuda.empty_cache()

Batman, cinematic light, high resolution 3D render

2d, low resolution, low quality, deformed face, deformed eyes, deformed hands, deformed fingers, rubbery skin, latex



  0%|          | 0/32 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [14]:
#if use_refiner_checkbox.value:
    #base_pipe.to("cpu")
    #torch.cuda.empty_cache()
    #torch.cuda.ipc_collect() 
    #unrefined_image = postprocess_latent(base_pipe, base_output)
    #display(unrefined_image)
#else:
#display(base_output.images[0])
torch.cuda.empty_cache()
torch.cuda.ipc_collect() 
    
# utilities functions
def image_grid(imgs, rows, cols):
    assert len(imgs) == rows*cols

    w, h = imgs[0].size
    grid = Image.new('RGB', size=(cols*w, rows*h))
    grid_w, grid_h = grid.size

    for i, img in enumerate(imgs):
        grid.paste(img, box=(i%cols*w, i//cols*h))
    return grid

def postprocess_latent(pipe, latent):
    vae_output = pipe.vae.decode(
        latent.images / pipe.vae.config.scaling_factor, return_dict=False
    )[0].detach()
    return pipe.image_processor.postprocess(vae_output, output_type="pil")[0]

In [None]:
# refiner

refiner_pipe = refiner_pipeline_type.from_pretrained(refiner_model, cache_dir=CACHE_DIR, 
                                    torch_dtype=torch_dtype,
                                    variant=variant,
                                    use_safetensors=use_safetensors,
                                    vae=base_pipe.vae,
                                    text_encoder_2=base_pipe.text_encoder_2)

refiner_pipe.scheduler = scheduler
#refiner_pipe.unet = torch.compile(refiner_pipe.unet, mode="reduce-overhead", fullgraph=True) # not for windows

# refiner pipeline to CUDA
if(torch.cuda.is_available()):
    print("Refiner->CUDA...")
    refiner_pipe = refiner_pipe.to("cuda")
else:
    print("CUDA IS NOT AVAILABLE")

image_refined = refiner_pipe(prompt=prompt if prompt != "" and not use_compel else None,
                             prompt_2 = prompt_2 if prompt_2 != "" and not use_compel else None,
                             negative_prompt = negative_prompt if negative_prompt != "" and not use_compel else None,
                             negative_prompt_2 = negative_prompt_2 if negative_prompt_2 != "" and not use_compel else None,
                             prompt_embeds=base_positive_prompt_embeds if base_positive_prompt_embeds != "" and use_compel else None,
                             pooled_prompt_embeds=base_positive_prompt_pooled if base_positive_prompt_pooled != "" and use_compel else None,
                             negative_prompt_embeds=base_negative_prompt_embeds if base_negative_prompt_embeds != "" and use_compel else None,
                             negative_pooled_prompt_embeds=base_negative_prompt_pooled if base_negative_prompt_pooled != "" and use_compel else None,
                             num_inference_steps=num_inference_steps,
                             generator=generator,
                             guidance_scale=guidance_scale,
                             denoising_start=high_noise_frac_slider.value, 
                             image=base_output.images,
                             #original_size = (height, width),
                             #target_size = (height, width)
                             ).images[0]

torch.cuda.empty_cache()
display(image_refined)