In [1]:
from pathlib import Path
import shutil

HF_REPO_ID = "williamberman/vq-diffusion-orig"
G_DRIVE_MOUNT_PATH = "/content/drive"
ITHQ_VQVAE_WEIGHTS_FILENAME = "ithq_vqvae.pth"
ITHQ_WEIGHTS_FILENAME = "ithq_learnable.pth"

G_DRIVE_PATH = Path(G_DRIVE_MOUNT_PATH) / "MyDrive"

G_DRIVE_ITHQ_WEIGHTS_PATH = G_DRIVE_PATH / ITHQ_WEIGHTS_FILENAME
G_DRIVE_ITHQ_VQVAE_WEIGHTS_PATH = G_DRIVE_PATH / ITHQ_VQVAE_WEIGHTS_FILENAME

# Final locations of the original checkpoint files. Will either be symlinks from
# huggingface cache or from google drive.
ITHQ_WEIGHTS_PATH = Path("/content") / ITHQ_WEIGHTS_FILENAME
ITHQ_VQVAE_WEIGHTS_PATH = Path("/content") / ITHQ_VQVAE_WEIGHTS_FILENAME

CONVERTED_DIFFUSERS_MODEL_PATH = Path("/content/vq_diffusion_diffusers_dump")

In [11]:
# Download model weights.

# Skip this step if you already have weights downloaded and saved to your google drive

! pip install huggingface_hub

from huggingface_hub import hf_hub_download, scan_cache_dir

hf_hub_download(
    repo_id=HF_REPO_ID,
    filename=ITHQ_VQVAE_WEIGHTS_FILENAME, 
)

hf_hub_download(
    repo_id=HF_REPO_ID,
    filename=ITHQ_WEIGHTS_FILENAME, 
)

found_revision = None

for repo in scan_cache_dir().repos:
  if repo.repo_id == HF_REPO_ID:
    found_revision = next(iter(repo.revisions))

assert found_revision is not None, f"could not find repo {HF_REPO_ID}. Did you download it?"

found_vqvae_weights_file = None
found_weights_file = None

for file in found_revision.files:
  if file.file_name == ITHQ_VQVAE_WEIGHTS_FILENAME:
    found_vqvae_weights_file = file.file_path

  if file.file_name == ITHQ_WEIGHTS_FILENAME:
    found_weights_file = file.file_path

assert found_vqvae_weights_file and found_weights_file, f"found not find {ITHQ_VQVAE_WEIGHTS_FILENAME} and {ITHQ_WEIGHTS_FILENAME}"

ITHQ_VQVAE_WEIGHTS_PATH.symlink_to(found_vqvae_weights_file)
ITHQ_WEIGHTS_PATH.symlink_to(found_weights_file)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


'/root/.cache/huggingface/hub/models--williamberman--vq-diffusion-orig/snapshots/2b4bc52dd4f1619287a67e318a1af3c0bfaaa04b/ithq_learnable.pth'

In [54]:
# Move model weights to your google drive. 
#
# Skip this step if you already have weights downloaded and saved to your google drive.
#
# Skip this step if you don't want to mount your google drive and want to use local weights. 
# Note there is no way to mount an isolated subdirectory of your google drive and this 
# _will_ give this notebook full access to your google drive.

from google.colab import drive

drive.mount(G_DRIVE_MOUNT_PATH)

print("copying vqvae weights")

shutil.copy(str(found_vqvae_weights_file), str(G_DRIVE_ITHQ_VQVAE_WEIGHTS_PATH))

print("done copying vqvae weights")

print("copying model weights")

shutil.copy(str(found_weights_file), str(G_DRIVE_ITHQ_WEIGHTS_PATH))

print("done copying model weights")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Mount google drive, and symlink weights into expected location
#
# Run this step if you already have weights downloaded and saved to your google drive
#
# Skip this step if you don't want to mount your google drive and want to use local weights. 
# Note there is no way to mount an isolated subdirectory of your google drive and this 
# _will_ give this notebook full access to your google drive.

from google.colab import drive

drive.mount(G_DRIVE_MOUNT_PATH)

ITHQ_VQVAE_WEIGHTS_PATH.symlink_to(G_DRIVE_ITHQ_VQVAE_WEIGHTS_PATH)
ITHQ_WEIGHTS_PATH.symlink_to(G_DRIVE_ITHQ_WEIGHTS_PATH)

Mounted at /content/drive


In [3]:
# clone diffusers, original VQ-Diffusion implementation, and notebook dependencies
! git clone https://github.com/williamberman/diffusers.git && cd diffusers && git checkout vq-diffusion-ithq-vqvae
! git clone https://github.com/williamberman/VQ-Diffusion.git
! git clone https://github.com/williamberman/vq-diffusion-notebook.git

Cloning into 'diffusers'...
remote: Enumerating objects: 7940, done.[K
remote: Counting objects: 100% (202/202), done.[K
remote: Compressing objects: 100% (121/121), done.[K
remote: Total 7940 (delta 109), reused 130 (delta 62), pack-reused 7738[K
Receiving objects: 100% (7940/7940), 7.16 MiB | 36.30 MiB/s, done.
Resolving deltas: 100% (5226/5226), done.
Branch 'vq-diffusion-ithq-vqvae' set up to track remote branch 'vq-diffusion-ithq-vqvae' from 'origin'.
Switched to a new branch 'vq-diffusion-ithq-vqvae'
Cloning into 'VQ-Diffusion'...
remote: Enumerating objects: 327, done.[K
remote: Counting objects: 100% (79/79), done.[K
remote: Compressing objects: 100% (52/52), done.[K
remote: Total 327 (delta 45), reused 43 (delta 27), pack-reused 248[K
Receiving objects: 100% (327/327), 2.38 MiB | 26.80 MiB/s, done.
Resolving deltas: 100% (152/152), done.
Cloning into 'vq-diffusion-notebook'...
remote: Enumerating objects: 11, done.[K
remote: Counting objects: 100% (11/11), done.[K
re

In [4]:
# Install python dependencies.

! wget https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-Linux-x86_64.sh 
! bash Miniconda3-py39_4.12.0-Linux-x86_64.sh -b -f -p /usr/local
! conda init bash

# Create a conda env named hf for huggingface dependencies.
! conda create -n hf python=3.9 -y

# Create a conda env named vq-diffusion for original vq-diffusion
! conda create -n vq-diffusion python=3.9 -y

# install original vq-diffusion dependencies
# Documented here + a few other undocumented ones
# https://github.com/microsoft/VQ-Diffusion/blob/3c98e77f721db7c787b76304fa2c96a36c7b00af/install_req.sh
! . "/usr/local/etc/profile.d/conda.sh" && conda activate vq-diffusion && \
    pip install torch==1.9.0 \
      torchvision \
      omegaconf \
      pytorch-lightning \
      timm==0.3.4 \
      tensorboard==1.15.0 \
      lmdb \
      tqdm \
      einops \
      ftfy \
      git+https://github.com/openai/DALL-E.git \
      opencv-python \
      regex

# install huggingface dependencies
! . "/usr/local/etc/profile.d/conda.sh" && conda activate hf && \
        cd diffusers && \
        pip install -e ".[dev]" && \
        pip install OmegaConf

--2022-10-11 06:32:41--  https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-Linux-x86_64.sh
Resolving repo.anaconda.com (repo.anaconda.com)... 104.16.130.3, 104.16.131.3, 2606:4700::6810:8303, ...
Connecting to repo.anaconda.com (repo.anaconda.com)|104.16.130.3|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 76607678 (73M) [application/x-sh]
Saving to: ‘Miniconda3-py39_4.12.0-Linux-x86_64.sh’


2022-10-11 06:32:41 (151 MB/s) - ‘Miniconda3-py39_4.12.0-Linux-x86_64.sh’ saved [76607678/76607678]

PREFIX=/usr/local
Unpacking payload ...
Collecting package metadata (current_repodata.json): - \ done
Solving environment: / - done

## Package Plan ##

  environment location: /usr/local

  added / updated specs:
    - _libgcc_mutex==0.1=main
    - _openmp_mutex==4.5=1_gnu
    - brotlipy==0.7.0=py39h27cfd23_1003
    - ca-certificates==2022.3.29=h06a4308_1
    - certifi==2021.10.8=py39h06a4308_2
    - cffi==1.15.0=py39hd667e15_1
    - charset-normalizer==

In [7]:
# convert original vq-diffusion to diffusers
print("converting vq diffusion to diffusers")

! . "/usr/local/etc/profile.d/conda.sh" && conda activate hf && \
        cd diffusers && \
        python ./scripts/convert_vq_diffusion_to_diffusers.py \
          --checkpoint_path /content/ithq_learnable.pth \
          --original_config_file /content/vq-diffusion-notebook/ithq.yaml \
          --vqvae_checkpoint_path /content/ithq_vqvae.pth \
          --vqvae_original_config_file /content/vq-diffusion-notebook/ithq_vqvae.yaml \
          --dump_path /content/vq_diffusion_diffusers_dump

print("done converting vq diffusion to diffusers")
print(f"converted model stored: {CONVERTED_DIFFUSERS_MODEL_PATH}")

converting vq diffusion to diffusers
loading checkpoints to cpu
loading vqvae, config: /content/vq-diffusion-notebook/ithq_vqvae.yaml, checkpoint: /content/ithq_vqvae.pth
done loading vqvae
loading transformer, config: /content/vq-diffusion-notebook/ithq.yaml, checkpoint: /content/ithq_learnable.pth, use ema: True
done loading transformer
loading CLIP text encoder
ftfy or spacy is not installed using BERT BasicTokenizer instead of ftfy.
Some weights of the model checkpoint at openai/clip-vit-base-patch32 were not used when initializing CLIPTextModel: ['vision_model.encoder.layers.0.self_attn.q_proj.weight', 'vision_model.encoder.layers.8.mlp.fc2.bias', 'vision_model.encoder.layers.9.mlp.fc2.weight', 'vision_model.encoder.layers.4.self_attn.q_proj.bias', 'vision_model.encoder.layers.5.self_attn.out_proj.weight', 'vision_model.encoder.layers.6.layer_norm2.weight', 'vision_model.encoder.layers.10.mlp.fc2.weight', 'vision_model.encoder.layers.2.self_attn.k_proj.weight', 'vision_model.encod