In [None]:
!nvidia-smi

# Install basic toolkit

In [None]:
!pip install accelerate transformers safetensors opencv-python diffusers -qq

In [None]:
# Load SDXL model and ControlNet's canny model
from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
from diffusers.utils import load_image
from PIL import Image
import torch
import numpy as np
import cv2

# ControlNet's intensity parameter
controlnet_conditioning_scale = 0.5  # recommended for good generalization

controlnet = ControlNetModel.from_pretrained(
    "diffusers/controlnet-canny-sdxl-1.0-mid",
    torch_dtype=torch.float16
)
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet,
    vae=vae,
    torch_dtype=torch.float16,
)
pipe.enable_model_cpu_offload()

In [None]:
# Use Hugging Face's logo image
image = load_image("https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/hf-logo.png")
image

In [None]:
# Extract edges for the original image
image = np.array(image)
image = cv2.Canny(image, 100, 200)
image = image[:, :, None]
image = np.concatenate([image, image, image], axis=2)
image = Image.fromarray(image)
image

In [None]:
# Combine with ControlNet for line drawing
prompt = "aerial view, a futuristic research complex in a bright foggy jungle, hard lighting"
negative_prompt = "low quality, bad quality, sketches"

controlnet_conditioning_scale = 0.5  # recommended for good generalization

images = pipe(
    prompt, negative_prompt=negative_prompt, image=image, controlnet_conditioning_scale=controlnet_conditioning_scale,
).images

images[0]

In [None]:
from PIL import Image

# This function is used to show multiple images
def image_grid(imgs, rows, cols):
    assert len(imgs) == rows * cols

    w, h = imgs[0].size
    grid = Image.new("RGB", size=(cols * w, rows * h))
    grid_w, grid_h = grid.size

    for i, img in enumerate(imgs):
        grid.paste(img, box=(i % cols * w, i // cols * h))
    return grid

# Try other line drawing effects

In [None]:
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
from diffusers.utils import load_image
import torch

image_original = load_image("https://ice.frostsky.com/2023/08/26/3995bd36b16e2c65d5e7a98ad04264d2.png")
image_original

In [None]:
# Extract canny edges
image = np.array(image_original)
image = cv2.Canny(image, 100, 200)
image = image[:, :, None]
image = np.concatenate([image, image, image], axis=2)
image = Image.fromarray(image)
image

In [None]:
# Combine with ControlNet for line drawing
prompt = "a handsome cartoon boy, yellow hair, red eyes, red clothes"
generator = torch.manual_seed(1025)
negative_prompt = "lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry"

controlnet_conditioning_scale = 0.6  # recommended for good generalization

images = pipe(
    [prompt]*4, num_inference_steps=50, negative_prompt=[negative_prompt]*4, image=image, controlnet_conditioning_scale=controlnet_conditioning_scale,generator = generator
).images

In [None]:
# Landscape line drawing
image_original = load_image("https://ice.frostsky.com/2023/08/26/67c64c8c5be560da05eba8042beb7e54.jpeg")
image_original


In [None]:
image = np.array(image_original)
image = cv2.Canny(image, 100, 200)
image = image[:, :, None]
image = np.concatenate([image, image, image], axis=2)
image = Image.fromarray(image)
image

In [None]:
prompt = "a high-quality, detailed, and professional image"
generator = torch.manual_seed(1025)
negative_prompt = "lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry"

controlnet_conditioning_scale = 0.5  # recommended for good generalization

images = pipe(
    [prompt]*4, num_inference_steps=50, negative_prompt=[negative_prompt]*4, image=image, controlnet_conditioning_scale=controlnet_conditioning_scale,generator = generator
).images