## Section 1: Setup and Imports

In [None]:
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms



# Set up device
if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(f"Using device: {device}")

# Set seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

print(" Setup complete")


In [None]:
# Visualization helpers (provided - focus on the model!)
def plot_loss_curves(d_losses, g_losses):
    """Plot discriminator and generator losses."""
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))

    axes[0].plot(d_losses, linewidth=2, color="navy", label="D Loss")
    axes[0].axhline(y=0.5, color="red", linestyle="--", alpha=0.5, label="Ideal (0.5)")
    axes[0].set_xlabel("Epoch")
    axes[0].set_ylabel("Loss")
    axes[0].set_title("Discriminator Loss")
    axes[0].legend()
    axes[0].grid(alpha=0.3)

    axes[1].plot(g_losses, linewidth=2, color="darkgreen", label="G Loss")
    axes[1].set_xlabel("Epoch")
    axes[1].set_ylabel("Loss")
    axes[1].set_title("Generator Loss")
    axes[1].legend()
    axes[1].grid(alpha=0.3)

    plt.tight_layout()
    plt.show()


def visualize_generated_images(generator, num_samples=16, device="cpu"):
    """Generate and display images."""
    generator.eval()

    with torch.no_grad():
        z = torch.randn(num_samples, 100, device=device)
        images = generator(z)

    # Denormalize
    images = (images + 1) / 2
    images = torch.clamp(images, 0, 1)

    # Display
    fig, axes = plt.subplots(4, 4, figsize=(8, 8))
    for i, ax in enumerate(axes.flat):
        img = images[i].cpu().permute(1, 2, 0).numpy()
        ax.imshow(img)
        ax.axis("off")

    plt.suptitle("Generated Images", fontsize=14, fontweight="bold")
    plt.tight_layout()
    plt.show()


print("Visualization helpers loaded")


## TODO 1: Load CIFAR-10 Dataset

**What to do:**
- Define transforms: ToTensor() and Normalize to [-1, 1]
- Load CIFAR-10 training dataset
- Create DataLoader with batch_size=64 and shuffle=True

**Hints:**
- Use `transforms.Compose()` to chain operations
- Use `transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))` for [-1, 1] normalization
- Use `datasets.CIFAR10()` with `download=True`

In [None]:
# TODO 1: Load CIFAR-10 Dataset
# TODO: Define transforms
transform = None  # Replace with your implementation

# TODO: Load dataset
train_dataset = None  # Replace with your implementation

# TODO: Create DataLoader
batch_size = 64
train_loader = None  # Replace with your implementation

# Verify
print(f"Dataset size: {len(train_dataset)}")
print(f"Batches per epoch: {len(train_loader)}")
print(f"Image shape: {train_dataset[0][0].shape}")


## TODO 2: Build Generator Network

**Architecture:**
- Input: Noise vector (batch, 100)
- FC layer: 100 → 256 * 8 * 8 = 16,384
- Reshape to (256, 8, 8)
- ConvTranspose block 1: 8×8 → 16×16 (256 → 128 channels)
- ConvTranspose block 2: 16×16 → 32×32 (128 → 64 channels)
- Output layer: 64 → 3 channels with **Tanh** activation

**Use:** `nn.ConvTranspose2d(in_channels, out_channels, kernel_size=4, stride=2, padding=1)`

In [None]:
# TODO 2: Implement Generator
class Generator(nn.Module):
    def __init__(self, latent_dim=100, num_channels=3):
        super().__init__()

        # TODO: FC layer from latent_dim to 256*8*8
        self.fc = None

        # TODO: ConvTranspose blocks
        # - 256x8x8 -> 128x16x16 (with BatchNorm + ReLU)
        # - 128x16x16 -> 64x32x32 (with BatchNorm + ReLU)
        # - 64x32x32 -> 3x32x32 (with Tanh, no BatchNorm)
        self.main = None

    def forward(self, z):
        # z: (batch, latent_dim)
        # TODO: FC -> reshape to (batch, 256, 8, 8) -> main -> output
        pass


generator = Generator(latent_dim=100, num_channels=3).to(device)
print(
    f"Generator created. Parameters: {sum(p.numel() for p in generator.parameters()):,}"
)


## TODO 3: Build Discriminator Network

**Architecture:**
- Input: Image (batch, 3, 32, 32)
- Conv block 1: 32×32 → 16×16 (3 → 64 channels)
- Conv block 2: 16×16 → 8×8 (64 → 128 channels)
- Conv block 3: 8×8 → 4×4 (128 → 256 channels)
- Flatten to (256 * 4 * 4) = 4,096 features
- FC: 4096 → 1024 → 1 with Sigmoid output

**Use:** `nn.Conv2d(in_channels, out_channels, kernel_size=4, stride=2, padding=1)`

In [None]:
# TODO 3: Implement Discriminator
class Discriminator(nn.Module):
    def __init__(self, num_channels=3):
        super().__init__()

        # TODO: Conv blocks (no BatchNorm on first, then BatchNorm + LeakyReLU)
        # - 3x32x32 -> 64x16x16 (LeakyReLU only)
        # - 64x16x16 -> 128x8x8 (BatchNorm + LeakyReLU)
        # - 128x8x8 -> 256x4x4 (BatchNorm + LeakyReLU)
        self.main = None

        # TODO: FC layers to output
        # - Flatten -> 4096 -> 1024 -> 1 (with Sigmoid)
        self.fc = None

    def forward(self, x):
        # x: (batch, 3, 32, 32)
        # TODO: Conv blocks -> flatten -> fc -> output
        pass


discriminator = Discriminator(num_channels=3).to(device)
print(
    f"Discriminator created. Parameters: {sum(p.numel() for p in discriminator.parameters()):,}"
)


## TODO 4: Weight Initialization

**DCGAN standard:**
- Conv/ConvTranspose: `Normal(mean=0, std=0.02)`
- BatchNorm: `Normal(mean=1, std=0.02)` weight, bias=0

**Use:**
- `nn.init.normal_(m.weight, 0.0, 0.02)`
- `nn.init.normal_(m.weight, 1.0, 0.02)` for BatchNorm

In [None]:
# TODO 4: Implement weight initialization
def initialize_weights(model):
    # TODO: Iterate through model.modules()
    # - For Conv2d/ConvTranspose2d: nn.init.normal_(m.weight, 0.0, 0.02)
    # - For BatchNorm2d: nn.init.normal_(m.weight, 1.0, 0.02), nn.init.constant_(m.bias, 0.0)
    pass


initialize_weights(generator)
initialize_weights(discriminator)
print("✓ Weights initialized")


## TODO 5: Set Up Optimizers and Loss

**Configuration:**
- Adam optimizer with lr=0.0002, beta1=0.5, beta2=0.999
- BCELoss for real/fake classification

**Note:** beta1=0.5 is unusual but empirically works better for GANs than default 0.9

In [None]:
# TODO 5: Set up optimizers and loss
lr = 0.0002
beta1 = 0.5

# TODO: Create Adam optimizers for G and D
optimizer_g = None  # Replace with your implementation
optimizer_d = None  # Replace with your implementation

# TODO: Create BCELoss criterion
criterion = None  # Replace with your implementation

print("✓ Optimizers and loss initialized")


## TODO 6: Implement Training Loop

**Discriminator step:**
1. Forward real images → target=1
2. Generate fake images (detached)
3. Forward fake images → target=0
4. Sum losses and backprop

**Generator step:**
1. Generate fake images (NOT detached)
2. Forward through discriminator
3. Try to fool D → target=1
4. Backprop generator only

In [None]:
# TODO 6: Training loop
num_epochs = 20
d_losses = []
g_losses = []

for epoch in range(num_epochs):
    epoch_d_loss = 0.0
    epoch_g_loss = 0.0

    for batch_idx, (real_images, _) in enumerate(train_loader):
        real_images = real_images.to(device)
        batch_size = real_images.size(0)

        # TODO: Discriminator step
        # 1. optimizer_d.zero_grad()
        # 2. Forward real images, compute loss with target=1
        # 3. Generate fake images with torch.randn(batch_size, 100, device=device)
        # 4. Forward fake images (detached!), compute loss with target=0
        # 5. d_loss = real_loss + fake_loss
        # 6. d_loss.backward() and optimizer_d.step()
        pass

        # TODO: Generator step
        # 1. optimizer_g.zero_grad()
        # 2. Generate new fake images (no detach)
        # 3. Forward through discriminator
        # 4. Compute loss with target=1 (fool D)
        # 5. g_loss.backward() and optimizer_g.step()
        pass

        if (batch_idx + 1) % 100 == 0:
            print(
                f"Epoch {epoch+1}/{num_epochs}, Batch {batch_idx+1}/{len(train_loader)}"
            )

    print(f"Epoch {epoch+1}/{num_epochs} complete")

print("✓ Training complete")


## TODO 7: Plot Loss Curves

**What to look for:**
- D loss stabilizes around 0.5
- G loss generally decreases
- No divergence or wild oscillations

In [None]:
# TODO 7: Plot loss curves
plot_loss_curves(d_losses, g_losses)


## TODO 8: Generate and Visualize Images

**Steps:**
1. Set generator to eval mode
2. Generate 16 samples with `torch.no_grad()`
3. Denormalize: `(generated + 1) / 2` and clamp to [0, 1]
4. Display as 4×4 grid

In [None]:
# TODO 8: Generate and visualize images
visualize_generated_images(generator, num_samples=16, device=device)


## TODO 9: Analysis

Write your observations about:
1. Image quality and realism
2. Patterns and diversity
3. Loss curve behavior
4. Potential improvements

In [None]:
# TODO 9: Write your analysis
# TODO: Replace with your observations

analysis = """
ANALYSIS:

[Your analysis here]
"""

print(analysis)
