In [None]:
%pip install --quiet --upgrade diffusers transformers accelerate invisible_watermark mediapy

In [None]:
use_refiner = False
use_latent_consistency = True

In [None]:
import mediapy as media
import random
import sys
import torch

from diffusers import DiffusionPipeline, LCMScheduler

pipe = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float16,
    use_safetensors=True,
    variant="fp16",
    )

In [None]:
if use_refiner:
  refiner = DiffusionPipeline.from_pretrained(
      "stabilityai/stable-diffusion-xl-refiner-1.0",
      text_encoder_2=pipe.text_encoder_2,
      vae=pipe.vae,
      torch_dtype=torch.float16,
      use_safetensors=True,
      variant="fp16",
  )

  refiner = refiner.to("cuda")

  pipe.enable_model_cpu_offload()
else:
  pipe = pipe.to("cuda")

In [None]:
if use_latent_consistency:
  pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
  pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)

  guidance_scale = 1
  num_inference_steps = 4
else:
  guidance_scale = 5
  num_inference_steps = 50

In [None]:
prompt = "a photo of Pikachu fine dining with a view to the Eiffel Tower"
seed = random.randint(0, sys.maxsize)

negative_prompt = "3d, cartoon, anime, (deformed eyes, nose, ears, nose), bad anatomy, ugly"

images = pipe(
    prompt = prompt,
    negative_prompt = negative_prompt,
    guidance_scale = guidance_scale,
    num_inference_steps = num_inference_steps,
    output_type = "latent" if use_refiner else "pil",
    generator = torch.Generator("cuda").manual_seed(seed),
    ).images

if use_refiner:
  images = refiner(
      prompt = prompt,
      negative_prompt = negative_prompt,
      image = images,
      ).images

print(f"Prompt:\t{prompt}\nSeed:\t{seed}")
media.show_images(images)
images[0].save("output.jpg")