In [1]:
%load_ext autoreload
%autoreload 2
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["HF_CACHE"] = "/tmp/wendler/hf_cache"
import sys
sys.path.append("..")
sys.path.append("../NewtonRaphsonInversion")

In [2]:
import webdataset as wds
import logging
import torch
import io

def url_to_dataloader(url, num_workers=4, batch_size=16):
    def log_and_continue(exn):
        """Call in an exception handler to ignore any exception, issue a warning, and continue."""
        logging.warning(f'Handling webdataset error ({repr(exn)}). Ignoring.')
        return True
    
    def filter_no_latent(sample):
        return 'latent.pt' in sample

    def load_latent(z):
        return torch.load(io.BytesIO(z), map_location='cpu').to(torch.float32)
    
    pipeline = [
        wds.SimpleShardList(url),
        wds.split_by_node,
        wds.split_by_worker,
        wds.tarfile_to_samples(handler=log_and_continue),
        wds.select(filter_no_latent),
        wds.shuffle(bufsize=5000, initial=1000),
        wds.rename(image="latent.pt", txt="txt"),
        wds.map_dict(image=load_latent, txt=lambda x: x.decode("utf-8")),
        wds.to_tuple("image", "txt"),
        wds.batched(batch_size, partial=False),
    ]

    dataset = wds.DataPipeline(*pipeline)

    loader = wds.WebLoader(
        dataset, batch_size=None, shuffle=False, num_workers=num_workers,
    )
    return loader

In [3]:
url = "/share/datasets/datasets/laicoyo/{000000..000009}.tar"
loader = url_to_dataloader(url)

In [None]:
d = next(iter(loader))
d

In [None]:
from src.sdxl_inversion_pipeline import SDXLDDIMPipeline
from main import ImageEditorDemo

In [10]:
from SDLens import HookedStableDiffusionXLImg2ImgPipeline
from src.config import RunConfig
from ipywidgets import Text, VBox
import PIL
from src.euler_scheduler import MyEulerAncestralDiscreteScheduler
from diffusers.pipelines.auto_pipeline import AutoPipelineForImage2Image
from src.sdxl_inversion_pipeline import SDXLDDIMPipeline
from PIL import Image
from diffusers.utils.torch_utils import randn_tensor
from IPython.display import display

model = "stabilityai/sdxl-turbo"

def inversion_callback(pipe, step, timestep, callback_kwargs):
    return callback_kwargs


def inference_callback(pipe, step, timestep, callback_kwargs):
    return callback_kwargs

class ImageEditorDemo:
    def __init__(self, pipe_inversion, pipe_inference, latents, prompts, cfg, edit_cfg=1.2):
        self.pipe_inversion = pipe_inversion
        self.pipe_inference = pipe_inference
        self.load_image = True
        g_cpu = torch.Generator().manual_seed(7865)
        if model == "stabilityai/stable-diffusion-xl-base-1.0":
            img_size = (1024,1024)
        else:
            img_size = (512,512)
        # resise input image
        VQAE_SCALE = 8
        latents_size = (1, 4, img_size[0] // VQAE_SCALE, img_size[1] // VQAE_SCALE)
        print(pipe_inversion.unet.dtype)
        noise = [randn_tensor(latents_size, dtype=pipe_inversion.unet.dtype, device=torch.device("cuda:0"), generator=g_cpu) for i
                 in range(cfg.num_inversion_steps)]
        print(noise[0].shape)
        pipe_inversion.scheduler.set_noise_list(noise)
        pipe_inference.scheduler.set_noise_list(noise)
        pipe_inversion.scheduler_inference.set_noise_list(noise)
        pipe_inversion.set_progress_bar_config(disable=True)
        pipe_inference.set_progress_bar_config(disable=True)
        self.cfg = cfg
        self.pipe_inversion.cfg = cfg
        self.pipe_inference.cfg = cfg
        self.inv_hp = [2, 0.1, 0.2] # niter, alpha, lr 2, 0.1, 0.2 is default
        self.edit_cfg = edit_cfg

        #self.pipe_inference.to("cuda")
        #self.pipe_inversion.to("cuda")
        self.latents = latents
        self.last_latent = self.invert(latents, prompts)
        self.original_latent = self.last_latent

    def invert(self, latents, base_prompts):
        res = self.pipe_inversion.invert_latents(prompt=base_prompts,
                             num_inversion_steps=self.cfg.num_inversion_steps,
                             num_inference_steps=self.cfg.num_inference_steps,
                             latents=latents,
                             guidance_scale=self.cfg.guidance_scale,
                             callback_on_step_end=inversion_callback,
                             strength=self.cfg.inversion_max_step,
                             denoising_start=1.0 - self.cfg.inversion_max_step,
                             inv_hp=self.inv_hp)[0][0]
        return res

    def edit(self, target_prompt, guidance_scale=None):
        if guidance_scale is None:
            guidance_scale = self.edit_cfg
        image = self.pipe_inference(prompt=target_prompt,
                            num_inference_steps=self.cfg.num_inference_steps,
                            negative_prompt="",
                            callback_on_step_end=inference_callback,
                            image=self.last_latent,
                            strength=self.cfg.inversion_max_step,
                            denoising_start=1.0 - self.cfg.inversion_max_step,
                            guidance_scale=guidance_scale).images[0]
        return image.resize((512, 512))

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

if model == "stabilityai/stable-diffusion-xl-base-1.0":
    image_size = (1024,1024)
    config = RunConfig(num_inference_steps=20,
                   num_inversion_steps=20,
                   guidance_scale=0.0,
                   inversion_max_step=0.6) #4,4,0,0.6 is default settings 0.6 and 0.7 look the same
else:
    image_size = (512,512)
    config = RunConfig(num_inference_steps=4,
                   num_inversion_steps=4,
                   guidance_scale=0.0,
                   inversion_max_step=0.6) #4,4,0,0.6 is default settings 0.6 and 0.7 look the same
dtype = torch.float32
scheduler_class = MyEulerAncestralDiscreteScheduler
if model == "stabilityai/stable-diffusion-xl-base-1.0":
    pipe_inversion = SDXLDDIMPipeline.from_pretrained(model, 
                                                      torch_dtype=dtype,
                                                      device_map="balanced",
                                                      variant=("fp16" if dtype==torch.float16 else None))
    
    pipe_inference = HookedStableDiffusionXLImg2ImgPipeline.from_pretrained(model, 
                                                                        torch_dtype=dtype,
                                                                        device_map="balanced",
                                                                        variant=("fp16" if dtype==torch.float16 else None)
                                                                    )
    if dtype == torch.float32:
        pipe_inversion.text_encoder_2.to(dtype)
        pipe_inference.text_encoder_2.to(dtype)
else:
    pipe_inversion = SDXLDDIMPipeline.from_pretrained(model, use_safetensors=True, safety_checker=None, cache_dir=os.environ["HF_HOME"]).to(device)
    pipe_inference = HookedStableDiffusionXLImg2ImgPipeline.from_pretrained(model, use_safetensors=True, safety_checker=None, cache_dir=os.environ["HF_HOME"]).to(device)

#pipe_inference = AutoPipelineForImage2Image.from_pretrained("stabilityai/sdxl-turbo", use_safetensors=True, safety_checker= None, cache_dir=os.environ["HF_HOME"]).to(device)
pipe_inference.scheduler            = scheduler_class.from_config(pipe_inference.scheduler.config)
pipe_inversion.scheduler            = scheduler_class.from_config(pipe_inversion.scheduler.config)
pipe_inversion.scheduler_inference  = scheduler_class.from_config(pipe_inference.scheduler.config)

In [None]:
latents.shape

In [None]:
h = display(display_id='my-display')
latents, prompts = next(iter(loader))
latents *= pipe_inference.vae.config.scaling_factor
editor = ImageEditorDemo(pipe_inversion, pipe_inference, latents[0].unsqueeze(0).cuda(), prompts[0], config, edit_cfg=1.2) 
print(prompts[0])
h.display(editor.edit(prompts[0]))