In [None]:
!pip install diffusers==0.16.1 transformers accelerate datasets accelerate==0.21.0 huggingface_hub==0.19.4

Collecting diffusers==0.16.1
  Downloading diffusers-0.16.1-py3-none-any.whl.metadata (19 kB)
Collecting accelerate
  Downloading accelerate-0.21.0-py3-none-any.whl.metadata (17 kB)
Collecting huggingface_hub==0.19.4
  Downloading huggingface_hub-0.19.4-py3-none-any.whl.metadata (14 kB)
INFO: pip is looking at multiple versions of transformers to determine which version is compatible with other requirements. This could take a while.
Collecting transformers
  Downloading transformers-4.52.3-py3-none-any.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.2/40.2 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Downloading transformers-4.52.1-py3-none-any.whl.metadata (38 kB)
  Downloading transformers-4.51.3-py3-none-any.whl.metadata (38 kB)
  Downloading transformers-4.51.2-py3-none-any.whl.metadata (38 kB)
  Downloading transformers-4.51.1-py3-none-any.whl.metadata (38 kB)
  Downloading transformers-4.51.0-py3-none-any.whl.metadata (38 kB)
  Dow

In [None]:
!pip uninstall -y jax jaxlib flax
!pip uninstall -y diffusers huggingface_hub accelerate
!pip install diffusers==0.16.1 huggingface_hub==0.19.4 accelerate==0.21.0

Found existing installation: jax 0.5.2
Uninstalling jax-0.5.2:
  Successfully uninstalled jax-0.5.2
Found existing installation: jaxlib 0.5.1
Uninstalling jaxlib-0.5.1:
  Successfully uninstalled jaxlib-0.5.1
Found existing installation: flax 0.10.6
Uninstalling flax-0.10.6:
  Successfully uninstalled flax-0.10.6
Found existing installation: diffusers 0.16.1
Uninstalling diffusers-0.16.1:
  Successfully uninstalled diffusers-0.16.1
Found existing installation: huggingface-hub 0.19.4
Uninstalling huggingface-hub-0.19.4:
  Successfully uninstalled huggingface-hub-0.19.4
Found existing installation: accelerate 0.21.0
Uninstalling accelerate-0.21.0:
  Successfully uninstalled accelerate-0.21.0
Collecting diffusers==0.16.1
  Using cached diffusers-0.16.1-py3-none-any.whl.metadata (19 kB)
Collecting huggingface_hub==0.19.4
  Using cached huggingface_hub-0.19.4-py3-none-any.whl.metadata (14 kB)
Collecting accelerate==0.21.0
  Using cached accelerate-0.21.0-py3-none-any.whl.metadata (17 kB)
Us

In [None]:
import os
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms

class FaceDataset(Dataset):
    def __init__(self, image_dir):
        self.image_paths = [os.path.join(image_dir, file) for file in os.listdir(image_dir) if file.endswith('.png')]
        self.transform = transforms.Compose([
            transforms.Resize((64, 64)),
            transforms.ToTensor(),
            transforms.Normalize([0.5], [0.5])
        ])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        return self.transform(image)


In [None]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import transforms
from accelerate import Accelerator
from diffusers import UNet2DModel, DDPMScheduler
# from dataset import FaceDataset  # Ensure this points to your custom dataset class

# ----- Config -----
data_path = "/content/drive/MyDrive/GAI_HW5/selected_10000/"
model_ckpt_dir = "/content/drive/MyDrive/GAI_HW5/model_v2"
os.makedirs(model_ckpt_dir, exist_ok=True)

batch_size = 64
learning_rate = 1e-4
num_epochs = 200
start_epoch = 0  # resume from epoch_50

# ----- Accelerator & Device -----
accelerator = Accelerator(mixed_precision="fp16")
device = accelerator.device

# ----- Dataset & DataLoader -----
dataset = FaceDataset(data_path)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4)

# ----- Model -----
model = UNet2DModel(
    sample_size=64,
    in_channels=3,
    out_channels=3,
    layers_per_block=2,
    block_out_channels=(128, 256, 256, 512, 512, 1024),
    down_block_types=("DownBlock2D", "DownBlock2D", "AttnDownBlock2D", "AttnDownBlock2D", "DownBlock2D", "DownBlock2D"),
    up_block_types=("UpBlock2D", "UpBlock2D", "AttnUpBlock2D", "AttnUpBlock2D", "UpBlock2D", "UpBlock2D"),
)

# ----- Scheduler & Optimizer -----
scheduler = DDPMScheduler(num_train_timesteps=1000)
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda step: min((step + 1) / 500, 1))

# ----- Load checkpoint if exists -----
# if os.path.exists(f"{model_ckpt_dir}/epoch_{start_epoch}"):
#     print(f"[INFO] Loading model from epoch {start_epoch}")
#     model = UNet2DModel.from_pretrained(f"{model_ckpt_dir}/epoch_{start_epoch}")
#     model.to(device)
#     if os.path.exists(f"{model_ckpt_dir}/optimizer_epoch_{start_epoch}.pt"):
#         optimizer.load_state_dict(torch.load(f"{model_ckpt_dir}/optimizer_epoch_{start_epoch}.pt"))


# ----- Accelerator prepare -----
model, optimizer, dataloader, lr_scheduler = accelerator.prepare(model, optimizer, dataloader, lr_scheduler)

# ----- Training loop -----
for epoch in range(start_epoch, num_epochs):
    model.train()
    for step, batch in enumerate(dataloader):
        batch = batch.to(device)
        noise = torch.randn_like(batch)
        timesteps = torch.randint(0, scheduler.config.num_train_timesteps, (batch.size(0),), device=device).long()

        noisy_images = scheduler.add_noise(batch, noise, timesteps)
        noise_pred = model(noisy_images, timesteps).sample

        loss = nn.functional.l1_loss(noise_pred, noise)  # L1 Loss
        accelerator.backward(loss)

        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

        if step % 50 == 0:
            print(f"Epoch {epoch} | Step {step} | Loss: {loss.item():.4f}")

    # Save model & optimizer
    if (epoch + 1) % 100 == 0:
      if accelerator.is_main_process:
          model.save_pretrained(f"{model_ckpt_dir}/epoch_{epoch+1}")
          torch.save(optimizer.state_dict(), f"{model_ckpt_dir}/optimizer_epoch_{epoch+1}.pt")
          torch.save(lr_scheduler.state_dict(), f"{model_ckpt_dir}/lr_scheduler_epoch_{epoch+1}.pt")
          print(f"[INFO] Saved model at epoch {epoch+1}")


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
  new_forward = torch.cuda.amp.autocast(dtype=torch.float16)(model_forward_func)


Epoch 0 | Step 0 | Loss: 0.8482
Epoch 0 | Step 50 | Loss: 0.5782
Epoch 0 | Step 100 | Loss: 0.2393
Epoch 0 | Step 150 | Loss: 0.1678
Epoch 1 | Step 0 | Loss: 0.1916
Epoch 1 | Step 50 | Loss: 0.1550
Epoch 1 | Step 100 | Loss: 0.1411
Epoch 1 | Step 150 | Loss: 0.1172
Epoch 2 | Step 0 | Loss: 0.1214
Epoch 2 | Step 50 | Loss: 0.1339
Epoch 2 | Step 100 | Loss: 0.1356
Epoch 2 | Step 150 | Loss: 0.1412
Epoch 3 | Step 0 | Loss: 0.1137
Epoch 3 | Step 50 | Loss: 0.1027
Epoch 3 | Step 100 | Loss: 0.0924
Epoch 3 | Step 150 | Loss: 0.0674
Epoch 4 | Step 0 | Loss: 0.0871
Epoch 4 | Step 50 | Loss: 0.0998
Epoch 4 | Step 100 | Loss: 0.1040
Epoch 4 | Step 150 | Loss: 0.0849
Epoch 5 | Step 0 | Loss: 0.0828
Epoch 5 | Step 50 | Loss: 0.0715
Epoch 5 | Step 100 | Loss: 0.0779
Epoch 5 | Step 150 | Loss: 0.0828
Epoch 6 | Step 0 | Loss: 0.0858
Epoch 6 | Step 50 | Loss: 0.0756
Epoch 6 | Step 100 | Loss: 0.0803
Epoch 6 | Step 150 | Loss: 0.0728
Epoch 7 | Step 0 | Loss: 0.1236
Epoch 7 | Step 50 | Loss: 0.0835
Epoc

In [None]:
import os
import torch
from diffusers import UNet2DModel, DDPMScheduler
from torchvision.utils import save_image
from tqdm import tqdm

# --- Load your trained model ---
model = UNet2DModel.from_pretrained("/content/drive/MyDrive/GAI_HW5/model_v2/epoch_200").to("cuda").eval()
scheduler = DDPMScheduler(num_train_timesteps=1000)

# --- Output directory ---
os.makedirs("/content/drive/MyDrive/GAI_HW5/generated_images_v2", exist_ok=True)

# --- Generate 10,000 images in batches ---
batch_size = 100
total = 10000
steps = total // batch_size

for step in tqdm(range(steps), desc="Generating images"):
    x = torch.randn(batch_size, 3, 64, 64).to("cuda")

    for t in reversed(range(scheduler.config.num_train_timesteps)):
        t_scalar = torch.tensor(t, dtype=torch.long, device="cpu")  # for scheduler.step
        t_tensor = torch.full((batch_size,), t, device="cuda", dtype=torch.long)  # for model

        with torch.no_grad():
            noise_pred = model(x, t_tensor).sample
        x = scheduler.step(noise_pred, t_scalar, x).prev_sample


    # Post-process and save
    x = (x.clamp(-1, 1) + 1) / 2  # [-1, 1] → [0, 1]
    for i in range(batch_size):
        save_image(x[i], f"/content/drive/MyDrive/GAI_HW5/generated_images/{step * batch_size + i:05d}.png")


Generating images: 100%|██████████| 100/100 [3:39:54<00:00, 131.95s/it]
