In [None]:
[colab]: <https://colab.research.google.com/assets/colab-badge.svg>
[colab-cin]: <https://colab.research.google.com/github/vanga/glid-3-xl/blob/master/notebooks/glid-3-xl.ipynb>

[![][colab]][colab-cin]

This notebook is a modified version of [kaggle notebook](kaggle.com/code/litevex/lite-s-latent-diffusion-v9-with-gradio), in an attempt to make it simpler and configurable. Built on top of the [glid-3-xl fork](https://github.com/vanga/glid-3-xl)


**Known issues**
* selecting clip_model other than the default + `jack` configuration leads to a tensor size mismatch error (carried over from the original notebook)
* ViT-L/14 may cause memory issues on GPUs other than A100.

In [None]:
!pip install -q omegaconf>=2.0.0 pytorch-lightning>=1.0.8 torch-fidelity einops
!sudo apt -y -qq install imagemagick 
!pip install -qq timm
!pip install -q gradio
!pip install -q git+https://github.com/openai/CLIP.git


In [None]:
base_dir = "/content/glid-3-xl"

In [None]:
%cd $base_dir
!git clone -qq https://github.com/CompVis/latent-diffusion
!git clone -qq https://github.com/CompVis/taming-transformers

!pip install -e -qq ./taming-transformers

%cd $base_dir/latent-diffusion
!git clone -qq https://github.com/Lin-Sinorodin/SwinIR_wrapper.git
!git clone https://github.com/vangap/glid-3-xl
!pip install -qq -e ./glid-3-xl

!pip install -qq git+https://github.com/lucidrains/DALLE-pytorch


!mkdir -p $base_dir/working

print("Restarting runtime, continue running next cells afterwards")

import os

os.kill(os.getpid(), 9)

In [None]:
import torch

base_dir = "/content/glid-3-xl" # base directory under which all the files related to this notebook will be saved
model_base_dir = f"{base_dir}/latent-diffusion/glid-3-xl/checkpoints"
finetune_path = f"{model_base_dir}/finetune.pt"
base_path = f"{model_base_dir}/diffusion.pt"
ldm_first_stage_path = f"{model_base_dir}/kl-f8.pt"
inpaint_path = f"{model_base_dir}/inpaint.pt"
bert_path = f"{model_base_dir}/bert.pt"

clip_variant = 'ViT-L/14'
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


# Some options that need to be set BEFORE pressing Run All (run > restart to change later on)
# Models:
'''
"jack": This is the base model finetuned on a clean dataset of photographs by Jack000. It will produce better, higher resolution realistic images without watermarks,
but might not be as good at flat illustrations, some prompts and writing text

"base": This is the base 1.6B model released by CompVis trained on LAION-400M. It is better at illustrations but will sometimes produce blurry and watermarked images,
write text even if unwanted and follow the prompt less.

"inpaint": This is an inpainting model trained by jack0. If you use this, you have to set a mask image and use the Kaggle GUI.
The mask should be the image size and black for spots to fill in, and white for areas to keep. (also try to avoid antialiasing)
'''
which_model = "base" # jack, base, inpaint

# GUIs:
'''
Kaggle: GUI using Jupyter Forms. It will show up in the notebook and have a small progress preview if you're generating a single image, but the layout is simpler,
there's no API or queue and you can't share it with others

Gradio: [Does not support the inpaint model] GUI using Gradio. It will give you a gradio.app link (as well as embed in the notebook) with a better layout
that you can share with others, as well as an inbuilt API, but there's no progress preview.
'''
which_gui = "gradio" # kaggle, gradio

steps = 25 # How many steps diffusion should run for. Not much improvement above 25, lower values might lose detail.

In [None]:
!mkdir -p $model_base_dir
%cd $model_base_dir
!wget –quiet https://dall-3.com/models/glid-3-xl/bert.pt
!wget –quiet https://dall-3.com/models/glid-3-xl/kl-f8.pt
!wget –quiet https://dall-3.com/models/glid-3-xl/diffusion.pt
!wget –quiet https://dall-3.com/models/glid-3-xl/finetune.pt
!wget –quiet https://dall-3.com/models/glid-3-xl/inpaint.pt

In [None]:
%cd $base_dir/latent-diffusion/
from SwinIR_wrapper.SwinIR_wrapper import SwinIR_SR
import urllib.request
import matplotlib.pyplot as plt

#@title Setup Super Resolution Model { run: "auto" }
pretrained_model = "real_sr x4" #@param ["real_sr x4", "classical_sr x2", "classical_sr x3", "classical_sr x4", "classical_sr x8", "lightweight x2", "lightweight x3", "lightweight x4"]

model_type, scale = pretrained_model.split(' ')
scale = int(scale[1])

# initialize super resolution model
sr = SwinIR_SR(model_type, scale)

print(f'Loaded {pretrained_model} successfully')

#### methods

In [None]:
import gc
import io
import math
import sys

sys.path.append(f"{base_dir}/latent-diffusion/glid-3-xl")

from PIL import Image, ImageOps
import requests
import torch
from torch import nn
from torch.nn import functional as F
from torchvision import transforms
from torchvision.transforms import functional as TF
from tqdm.notebook import tqdm

import numpy as np

from guided_diffusion.script_util import create_model_and_diffusion, model_and_diffusion_defaults
from sample_api import *

from dalle_pytorch import DiscreteVAE, VQGanVAE

from einops import rearrange
from math import log2, sqrt

import argparse
import pickle

import os

from encoders.modules import BERTEmbedder

import clip

args = Args(which_model))

model_params, models = load_models(args)

In [None]:
import cv2
def swinUpscale(path, showLarger):
    smallImg = cv2.imread(path, cv2.IMREAD_COLOR)
    hqImg = sr.upscale(smallImg)
    # now downscale again, so it looks sharp
    if showLarger == False:
        resized_image = cv2.resize(hqImg, (0,0), fx=0.25, fy=0.25) 
    else:
        resized_image = cv2.resize(hqImg, (0,0), fx=0.5, fy=0.5) 
    cv2.imwrite(path,resized_image)
gc.collect()

#### Gradio

In [None]:
# import ipywidgets as widgets
import time
# from IPython.display import display
from IPython.display import clear_output
from IPython.display import Image as PImage
# from IPython.display import display as PDisplay
from os.path import exists
import shutil
import glob
import gradio as gr

output_dir = "./output"

%cd $base_dir/latent-diffusion/glid-3-xl
def adv_run(prompt,negative,init_image,skips,guidance,batches,amount_per_batch,width,height,clip_rerank,swin_input,show_large):
        args.text = prompt
        args.negative = negative
        if init_image != None:
            args.init_image = init_image
        else:
            args.init_image = None
        args.skip_timesteps = skips
        args.guidance_scale = guidance
        args.num_batches = batches
        args.batch_size = amount_per_batch
        args.width = width
        args.height = height
        args.clip_score = clip_rerank
        shutil.rmtree(output_dir, True)
        os.makedirs(output_dir, exist_ok=True)
        do_run(args, model_params, models)
        print(f"current working directory: {os.getcwd()}")
        if args.batch_size > 1:
            if swin_input == True:
                for file in tqdm(glob.glob(f"{output_dir}/*.png")):
                    swinUpscale(file,show_large)
            !montage -geometry +1+1 -background black $base_dir/output/*.png $base_dir/grid.png
            return Image.open(f"{base_dir}/grid.png")
        if swin_input == True and args.batch_size == 1:
            swinUpscale(f"{output_dir}/_progress_00000.png",show_large)
            return Image.open(f"{output_dir}/_progress_00000.png")
        if swin_input == False and args.batch_size == 1:
            return Image.open(f"{output_dir}/_progress_00000.png")

iface = gr.Interface(fn=adv_run, inputs=["text","text",gr.inputs.Image(shape=(256, 256), optional=True, type="filepath"),gr.inputs.Slider(0, steps,1,default=0,label="Step Skips (required for init image)"),gr.inputs.Slider(1, 15,1,default=5),gr.inputs.Slider(1, 32,1,default=1),gr.inputs.Slider(1, 16,1,default=1),gr.inputs.Slider(16, 512, 16,default=256),gr.inputs.Slider(16, 512, 16,default=256), gr.inputs.Checkbox(default=False, label="Clip Rerank (for batch images)", optional=False),gr.inputs.Checkbox(default=True, label="Increase sharpness using SwinIR", optional=False),gr.inputs.Checkbox(default=False, label="Show SwinIR results as 512x512 (less sharp)", optional=False)
], outputs="image")
iface.launch(share=True,debug=True, inline=False, enable_queue = True)