In [None]:
import sys
sys.path.append("../../")

import numpy as np
import matplotlib.pyplot as plt
import lpips
import torch
from skimage.metrics import peak_signal_noise_ratio as psnr
from skimage.metrics import structural_similarity as ssim
from diffusers.training_utils import free_memory
from accelerate.utils import set_seed

from inversions.unet_based.nti import SDInversionPipeline, SDXLInversionPipeline, CustomDDIMInversionScheduler, image2latents, latents2image
from inversions.utils import pil2tensor

In [None]:
set_seed(666)

torch_dtype = torch.float16
variant = "fp16"
device = "cuda"
num_inference_steps = 50
lpips_loss = lpips.LPIPS(net='alex')

image_path = "../../demo/alley.jpg"
prompt = "A narrow alley way with a building in the background."

## Stable Diffusion v1.5

In [None]:
model_id = "/home/ailab/model_weights/stable-diffusion/stable-diffusion-v1-5/"
scheduler = CustomDDIMInversionScheduler.from_pretrained(model_id, subfolder="scheduler")
pipe = SDInversionPipeline.from_pretrained(model_id, torch_dtype=torch_dtype, variant=variant, scheduler=scheduler)
pipe.to(device)

inv_result = pipe.inverse(
    image=image_path,
    prompt=prompt,
    guidance_scale=7.5,
    num_inference_steps=num_inference_steps
)

ori_image = inv_result.ori_image
vae_latent = image2latents(pipe, ori_image)
vae_recon = latents2image(pipe, vae_latent)

recon_image = pipe(
    prompt=prompt,
    num_inference_steps=num_inference_steps,
    guidance_scale=7.5,
    uncond_embeds=inv_result.uncond_embeds,
    latents=inv_result.zT
).images[0]

del inv_result, pipe
free_memory()

In [None]:
vae_psnr = psnr(np.array(ori_image), np.array(vae_recon))
vae_ssim = ssim(np.array(ori_image), np.array(vae_recon), win_size=11, channel_axis=2)
vae_lpips = lpips_loss(pil2tensor(ori_image), pil2tensor(vae_recon)).item()
print(f"[VAE Reconstruction] PSNR: {vae_psnr:.2f}, SSIM: {vae_ssim:.4f}, LPIPS: {vae_lpips:.4f}")

psnr_score = psnr(np.array(ori_image), np.array(recon_image))
ssim_score = ssim(np.array(ori_image), np.array(recon_image), win_size=11, channel_axis=2)
lpips_score = lpips_loss(pil2tensor(ori_image), pil2tensor(recon_image)).item()
print(f"[NTI] PSNR: {psnr_score:.2f}, SSIM: {ssim_score:.4f}, LPIPS: {lpips_score:.4f}")

fig = plt.figure(figsize=(15, 5))
axs = fig.subplots(1, 3)
axs[0].set_title("Origin")
axs[0].imshow(np.array(ori_image))
axs[1].set_title("VAE Recon.")
axs[1].imshow(np.array(vae_recon))
axs[2].set_title("NTI")
axs[2].imshow(np.array(recon_image))
plt.show()

## Stable Diffusion v2.1

In [None]:
model_id = "/home/ailab/model_weights/stable-diffusion/stable-diffusion-v2-1/"
scheduler = CustomDDIMInversionScheduler.from_pretrained(model_id, subfolder="scheduler")
pipe = SDInversionPipeline.from_pretrained(model_id, torch_dtype=torch_dtype, variant=variant, scheduler=scheduler)
pipe.to(device)

inv_result = pipe.inverse(
    image=image_path,
    prompt=prompt,
    guidance_scale=7.5,
    num_inference_steps=num_inference_steps
)

ori_image = inv_result.ori_image
vae_latent = image2latents(pipe, ori_image)
vae_recon = latents2image(pipe, vae_latent)

recon_image = pipe(
    prompt=prompt,
    num_inference_steps=num_inference_steps,
    guidance_scale=7.5,
    uncond_embeds=inv_result.uncond_embeds,
    latents=inv_result.zT
).images[0]

del inv_result, pipe
free_memory()

In [None]:
vae_psnr = psnr(np.array(ori_image), np.array(vae_recon))
vae_ssim = ssim(np.array(ori_image), np.array(vae_recon), win_size=11, channel_axis=2)
vae_lpips = lpips_loss(pil2tensor(ori_image), pil2tensor(vae_recon)).item()
print(f"[VAE Reconstruction] PSNR: {vae_psnr:.2f}, SSIM: {vae_ssim:.4f}, LPIPS: {vae_lpips:.4f}")

psnr_score = psnr(np.array(ori_image), np.array(recon_image))
ssim_score = ssim(np.array(ori_image), np.array(recon_image), win_size=11, channel_axis=2)
lpips_score = lpips_loss(pil2tensor(ori_image), pil2tensor(recon_image)).item()
print(f"[NTI] PSNR: {psnr_score:.2f}, SSIM: {ssim_score:.4f}, LPIPS: {lpips_score:.4f}")

fig = plt.figure(figsize=(15, 5))
axs = fig.subplots(1, 3)
axs[0].set_title("Origin")
axs[0].imshow(np.array(ori_image))
axs[1].set_title("VAE Recon.")
axs[1].imshow(np.array(vae_recon))
axs[2].set_title("NTI")
axs[2].imshow(np.array(recon_image))
plt.show()

## SDXL

In [None]:
model_id = "/home/ailab/model_weights/stable-diffusion/sdxl-base-v1.0/"
scheduler = CustomDDIMInversionScheduler.from_pretrained(model_id, subfolder="scheduler")
pipe = SDXLInversionPipeline.from_pretrained(model_id, torch_dtype=torch_dtype, variant=variant, scheduler=scheduler)
pipe.to(device)

inv_result = pipe.inverse(
    image=image_path,
    prompt=prompt,
    guidance_scale=5.0,
    num_inference_steps=num_inference_steps
)

ori_image = inv_result.ori_image
vae_latent = image2latents(pipe, ori_image)
vae_recon = latents2image(pipe, vae_latent)

recon_image = pipe(
    prompt=prompt,
    num_inference_steps=num_inference_steps,
    guidance_scale=5.0,
    uncond_embeds=inv_result.uncond_embeds,
    latents=inv_result.zT
).images[0]

del inv_result, pipe
free_memory()

In [None]:
vae_psnr = psnr(np.array(ori_image), np.array(vae_recon))
vae_ssim = ssim(np.array(ori_image), np.array(vae_recon), win_size=11, channel_axis=2)
vae_lpips = lpips_loss(pil2tensor(ori_image), pil2tensor(vae_recon)).item()
print(f"[VAE Reconstruction] PSNR: {vae_psnr:.2f}, SSIM: {vae_ssim:.4f}, LPIPS: {vae_lpips:.4f}")

psnr_score = psnr(np.array(ori_image), np.array(recon_image))
ssim_score = ssim(np.array(ori_image), np.array(recon_image), win_size=11, channel_axis=2)
lpips_score = lpips_loss(pil2tensor(ori_image), pil2tensor(recon_image)).item()
print(f"[DDIM Inversion] PSNR: {psnr_score:.2f}, SSIM: {ssim_score:.4f}, LPIPS: {lpips_score:.4f}")

fig = plt.figure(figsize=(15, 5))
axs = fig.subplots(1, 3)
axs[0].set_title("Origin")
axs[0].imshow(np.array(ori_image))
axs[1].set_title("VAE Recon.")
axs[1].imshow(np.array(vae_recon))
axs[2].set_title("DDIM Recon.")
axs[2].imshow(np.array(recon_image))
plt.show()