This tutorial is associated to the following blog post:
[Unconditional model](https://website.vincent-roger.fr/blog/deeplearning/python/2024/06/02/diffusers-unconditional_model).

Follow it to have more explanations and context.

In [1]:
from os.path import exists

import numpy as np
import torch
from diffusers import DDPMPipeline, DDPMScheduler
from diffusers.models import UNet2DModel
from torch.optim import Adam
from torch.utils.data import DataLoader
from tqdm import tqdm
from torch import GradScaler, autocast

from dlai_lib.diffusion_utilities import CustomDataset, transform
from diffusers_tutorials.tools.plotly import plot_generated_images

  from .autonotebook import tqdm as notebook_tqdm


# Hyperparameters

In [2]:
batch_size = 100
num_epochs = 32

# Dataset

In [3]:
# load dataset using the deeplearning.ai course
dataset = CustomDataset("dlai_lib/sprites_1788_16x16.npy", "dlai_lib/sprite_labels_nc_1788_16x16.npy", transform, null_context=False)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True)

sprite shape: (89400, 16, 16, 3)
labels shape: (89400, 5)


# Model definition

In [4]:
# Define the UNet model
# Note: this model use less parameters compared to deeplearning.ai course as it is not necessary to have such huge model for this task
model = UNet2DModel(
    sample_size=(16,16),                              # Input image size
    in_channels=3,                                    # Number of input channels (e.g., 3 for RGB)
    out_channels=3,                                   # Number of output channels
    layers_per_block=2,                               # Layers per block in the UNet
    block_out_channels=(128, 64),                     # Channels in each block
    down_block_types=("DownBlock2D", "DownBlock2D"),  # Types of down blocks
    up_block_types=("UpBlock2D", "UpBlock2D")         # Types of up blocks
)

# Define the DDPM scheduler
noise_scheduler = DDPMScheduler(num_train_timesteps=500)

In [5]:
# Having a CUDA compatible GPU is recommended to be faster to train the model and have faster inferences
model = model.to("cuda" if torch.cuda.is_available() else "cpu")

# Train or load the previously learned model

In [6]:

def train(unet: UNet2DModel, noise_scheduler: DDPMScheduler, dataloader: DataLoader, num_epochs: int, lr: float) -> None:
    """Train the unet given its noise_scheduler and a dataloader.

    Parameters
    ----------
    unet : UNet2DModel
        The model unet to train.
    noise_scheduler : DDPMScheduler
        noise scheduler to use while training.
    dataloader : DataLoader
        The dataloader containing the images to reproduce.
    num_epochs : int
        The number of epochs to train the unet.
    lr : float
        The learning rate to use to train the unet.
    """
    epochs = range(num_epochs)
    losses = np.zeros(num_epochs)

    optimizer = Adam(unet.parameters(), lr=lr)
    scaler = GradScaler("cuda" if torch.cuda.is_available() else "cpu")  # For mixed precision
    unet.train()

    for epoch in epochs:
        epoch_loss = 0
        for batch in tqdm(dataloader):
            optimizer.zero_grad()

            # Assuming your dataloader provides images and targets (not used here)
            images, _ = batch
            images = images.to(unet.device, non_blocking=True)

            with autocast("cuda" if torch.cuda.is_available() else "cpu"):  # Mixed precision
                # Generate random noise
                noise = torch.randn(images.shape, device=unet.device)

                # Forward pass through the model
                timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (images.shape[0],), device=unet.device)
                noisy_images = noise_scheduler.add_noise(images, noise, timesteps)
                predicted_noise = unet(noisy_images, timesteps).sample
                # Compute loss (mean squared error between actual and predicted noise)
                loss = torch.nn.functional.mse_loss(predicted_noise, noise)

            # Backward pass and optimization
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            epoch_loss += loss.item()

        epoch_loss /= len(dataloader)
        losses[epoch] = epoch_loss
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss}")

In [7]:
pre_trained_model = "weights/non_conditional_model.pth"

In [8]:
if not exists(pre_trained_model):
    train(model, noise_scheduler, dataloader, num_epochs, lr=1e-4)
    model.save_pretrained(pre_trained_model)
else:
    model = UNet2DModel.from_pretrained(pre_trained_model)

  return F.conv2d(input, weight, bias, self.stride,
100%|██████████| 894/894 [00:36<00:00, 24.76it/s]


Epoch 1/32, Loss: 0.1434705962387374


100%|██████████| 894/894 [00:34<00:00, 25.75it/s]


Epoch 2/32, Loss: 0.09169128171732895


100%|██████████| 894/894 [00:36<00:00, 24.63it/s]


Epoch 3/32, Loss: 0.07975886966741458


100%|██████████| 894/894 [00:36<00:00, 24.31it/s]


Epoch 4/32, Loss: 0.07213670432450921


100%|██████████| 894/894 [00:36<00:00, 24.76it/s]


Epoch 5/32, Loss: 0.06651023365723874


100%|██████████| 894/894 [00:36<00:00, 24.44it/s]


Epoch 6/32, Loss: 0.06260828059625038


100%|██████████| 894/894 [00:36<00:00, 24.44it/s]


Epoch 7/32, Loss: 0.05992325459100656


100%|██████████| 894/894 [00:36<00:00, 24.57it/s]


Epoch 8/32, Loss: 0.05699232611096812


100%|██████████| 894/894 [00:36<00:00, 24.60it/s]


Epoch 9/32, Loss: 0.054222490384308014


100%|██████████| 894/894 [00:36<00:00, 24.52it/s]


Epoch 10/32, Loss: 0.05188901640133103


100%|██████████| 894/894 [00:36<00:00, 24.38it/s]


Epoch 11/32, Loss: 0.04968541018014786


100%|██████████| 894/894 [00:36<00:00, 24.56it/s]


Epoch 12/32, Loss: 0.04825622070022344


100%|██████████| 894/894 [00:36<00:00, 24.73it/s]


Epoch 13/32, Loss: 0.0466355055766451


100%|██████████| 894/894 [00:36<00:00, 24.50it/s]


Epoch 14/32, Loss: 0.045048584874044326


100%|██████████| 894/894 [00:36<00:00, 24.58it/s]


Epoch 15/32, Loss: 0.04375938282575317


100%|██████████| 894/894 [00:36<00:00, 24.76it/s]


Epoch 16/32, Loss: 0.0427069246489763


100%|██████████| 894/894 [00:36<00:00, 24.60it/s]


Epoch 17/32, Loss: 0.041544920546096445


100%|██████████| 894/894 [00:36<00:00, 24.69it/s]


Epoch 18/32, Loss: 0.040400704912741316


100%|██████████| 894/894 [00:36<00:00, 24.29it/s]


Epoch 19/32, Loss: 0.03943257022157345


100%|██████████| 894/894 [00:36<00:00, 24.65it/s]


Epoch 20/32, Loss: 0.03846677413139434


100%|██████████| 894/894 [00:36<00:00, 24.55it/s]


Epoch 21/32, Loss: 0.03776356819350381


100%|██████████| 894/894 [00:36<00:00, 24.52it/s]


Epoch 22/32, Loss: 0.03663826732785843


100%|██████████| 894/894 [00:36<00:00, 24.57it/s]


Epoch 23/32, Loss: 0.036021676583328104


100%|██████████| 894/894 [00:36<00:00, 24.51it/s]


Epoch 24/32, Loss: 0.03524558035910996


100%|██████████| 894/894 [00:36<00:00, 24.51it/s]


Epoch 25/32, Loss: 0.03494798392890817


100%|██████████| 894/894 [00:36<00:00, 24.40it/s]


Epoch 26/32, Loss: 0.03385997061061379


100%|██████████| 894/894 [00:36<00:00, 24.44it/s]


Epoch 27/32, Loss: 0.03360816345693881


100%|██████████| 894/894 [00:36<00:00, 24.57it/s]


Epoch 28/32, Loss: 0.0330493290964889


100%|██████████| 894/894 [00:36<00:00, 24.51it/s]


Epoch 29/32, Loss: 0.03239942944381528


100%|██████████| 894/894 [00:36<00:00, 24.54it/s]


Epoch 30/32, Loss: 0.03185163606666612


100%|██████████| 894/894 [00:36<00:00, 24.66it/s]


Epoch 31/32, Loss: 0.03153622766510819


100%|██████████| 894/894 [00:36<00:00, 24.57it/s]


Epoch 32/32, Loss: 0.030822471171717516


# Try some inferences with our model

In [9]:
# Create the DDPM pipeline
pipeline = DDPMPipeline(unet=model, scheduler=noise_scheduler)
pipeline.to("cuda" if torch.cuda.is_available() else "cpu")

DDPMPipeline {
  "_class_name": "DDPMPipeline",
  "_diffusers_version": "0.27.2",
  "scheduler": [
    "diffusers",
    "DDPMScheduler"
  ],
  "unet": [
    "diffusers",
    "UNet2DModel"
  ]
}

In [12]:
generated_image = pipeline(batch_size=16, num_inference_steps=500)

100%|██████████| 500/500 [00:02<00:00, 180.88it/s]


In [13]:
fig = plot_generated_images(generated_image.images, 4, 4)
fig.show()