# Lecture 07: Convolutional Neural Networks

**Slides:** `07_CNN.pdf`

## What you will learn
- CNN building blocks (conv/pool/fully-connected)
- Training a small CNN on CIFAR-10 with PyTorch
- Evaluating performance with a confusion matrix

## Notes
Includes pretrained weights so you can run the notebook quickly.

## How to use this notebook
1. Run the **Setup** cell below (it will detect the repository root and set paths).
2. Run cells top-to-bottom. If a cell takes too long, skim it and continue ‚Äî the goal is to learn the workflow, not to optimize runtime.

In [None]:
# --- Setup (run this first) ---
from __future__ import annotations

import os
import sys
from pathlib import Path
from typing import Optional

def _find_repo_root(start: Optional[Path] = None) -> Path:
    """Find repo root by walking upwards and looking for common markers."""
    start = (start or Path.cwd()).resolve()
    for p in [start] + list(start.parents):
        if (p / "pyproject.toml").exists() and (p / "src").exists():
            return p
        if (p / "slides").exists() and (p / "notebooks").exists():
            return p
    return start

PROJECT_ROOT = _find_repo_root()
os.chdir(PROJECT_ROOT)

# Make `import aml_course` work without installing the package.
SRC_DIR = PROJECT_ROOT / "src"
if str(SRC_DIR) not in sys.path:
    sys.path.insert(0, str(SRC_DIR))

# Common paths used in the course.
DATA_DIR = PROJECT_ROOT / "data"
FIGURES_DIR = PROJECT_ROOT / "pictures"
MODELS_DIR = PROJECT_ROOT / "models"

DATA_DIR.mkdir(parents=True, exist_ok=True)
FIGURES_DIR.mkdir(parents=True, exist_ok=True)
MODELS_DIR.mkdir(parents=True, exist_ok=True)

print(f"üìÅ Project root: {PROJECT_ROOT}")
print(f"üì¶ Data dir:     {DATA_DIR}")
print(f"üñºÔ∏è  Figures dir:  {FIGURES_DIR}")
print(f"ü§ñ Models dir:   {MODELS_DIR}")


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define the CNN architecture
class SimpleCNN(nn.Module):
    """A small LeNet-style CNN for CIFAR-10 (32x32 RGB images).

    Architecture (matches the provided pretrained weights in `./models`):
      conv(3‚Üí8, 5x5) ‚Üí ReLU ‚Üí maxpool(2x2)
      conv(8‚Üí16, 5x5) ‚Üí ReLU ‚Üí maxpool(2x2)
      flatten (16*5*5=400) ‚Üí fc 120 ‚Üí fc 84 ‚Üí fc 10
    """

    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 8, kernel_size=5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=5)

        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)


In [None]:
# Dataset and DataLoader setup
# (Mean/std values are common quick defaults for CIFAR-10.)
transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ]
)


In [None]:
# Load the CIFAR10 dataset
train_dataset = datasets.CIFAR10(root='./data', train=True,
                                 download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False,
                                download=True, transform=transform)

In [None]:
import matplotlib.pyplot as plt
plt.imshow(train_dataset.data[5])

In [None]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False, num_workers=2)


In [None]:
# Initialize the network
model = SimpleCNN()

In [None]:
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [None]:
# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
# Training (optional)
#
# Training CIFAR-10 from scratch can take a while on CPU.
# By default we load pretrained weights shipped with this repository.
TRAIN_FROM_SCRATCH = False
EPOCHS = 5

if TRAIN_FROM_SCRATCH:
    model.train()
    for epoch in range(EPOCHS):
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(train_loader, start=1):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 200 == 0:
                print(f"Epoch {epoch + 1}/{EPOCHS} | step {i} | loss {running_loss / 200:.4f}")
                running_loss = 0.0

    # Save weights for later reuse
    out_path = MODELS_DIR / f"simple_cnn_{device.type}.pth"
    torch.save(model.state_dict(), out_path)
    print(f"Saved weights to: {out_path}")
else:
    # Load pretrained weights (CPU / CUDA)
    preferred = MODELS_DIR / ("simple_cnn_cuda.pth" if device.type == "cuda" else "simple_cnn_cpu.pth")
    fallback = MODELS_DIR / "simple_cnn_cpu.pth"

    weights_path = preferred if preferred.exists() else fallback
    print(f"Loading weights: {weights_path}")
    model.load_state_dict(torch.load(weights_path, map_location=device))
    model.eval()


Tip: If you want to train longer or change hyperparameters, use `scripts/07_train_cnn.py` and save the weights under `./models/`.

In [None]:
import matplotlib.pyplot as plt

# Assuming model and test_loader are already defined and the model is loaded with trained parameters

# Function to evaluate the model
def evaluate_model(model, device, test_loader):
    model.eval()  # Set the model to evaluation mode
    test_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():  # No need to track gradients
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            test_loss += nn.CrossEntropyLoss()(outputs, target).item()  # Sum up batch loss
            _, predicted = torch.max(outputs.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / total
    return test_loss, accuracy

In [None]:
# Evaluate the model
test_loss, accuracy = evaluate_model(model, device, test_loader)

print(f'Test Loss: {test_loss:.4f}, Accuracy: {accuracy:.2f}%')

In [None]:
import seaborn as sns
from sklearn.metrics import confusion_matrix
import numpy as np

# Assuming model, device, and test_loader are already defined and the model is loaded with trained parameters

def get_all_predictions(model, device, loader):
    all_preds = []
    all_labels = []
    model.eval()
    with torch.no_grad():
        for data, targets in loader:
            data, targets = data.to(device), targets.to(device)
            outputs = model(data)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(targets.cpu().numpy())
    return all_preds, all_labels

# Get all predictions and labels
predictions, labels = get_all_predictions(model, device, test_loader)

# Generate confusion matrix
cm = confusion_matrix(labels, predictions)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]  # Normalizing the confusion matrix

# Plotting
plt.figure(figsize=(10, 8))
sns.heatmap(cm_normalized, annot=True, fmt=".2f", cmap="Blues")
plt.title("Normalized Confusion Matrix")
plt.ylabel("True Label")
plt.xlabel("Predicted Label")
plt.show()

