### Device Initialization

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torch.utils.data import DataLoader
from torch.utils.data import sampler


import torchvision.datasets as dset
import torchvision.transforms as T

import numpy as np

USE_GPU = True
dtype = torch.float32 # We will be using float throughout this tutorial.

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Constant to control how frequently we print train loss.
print_every = 100
print('using device:', device)

using device: cuda


### Data Preparation

In [21]:
NUM_TRAIN = 49000

# The torchvision.transforms package provides tools for preprocessing data
# and for performing data augmentation; here we set up a transform to
# preprocess the data by subtracting the mean RGB value and dividing by the
# standard deviation of each RGB value; we've hardcoded the mean and std.
CIFAR10_MEAN = [0.4914, 0.4822, 0.4465]
CIFAR10_STD  = [0.2470, 0.2435, 0.2616]


transform = T.Compose([
    T.ToTensor(),
    T.Normalize(CIFAR10_MEAN, CIFAR10_STD),
    T.Lambda(torch.flatten)
])

# We set up a Dataset object for each split (train / val / test); Datasets load
# training examples one at a time, so we wrap each Dataset in a DataLoader which
# iterates through the Dataset and forms minibatches. We divide the CIFAR-10
# training set into train and val sets by passing a Sampler object to the
# DataLoader telling how it should sample from the underlying Dataset.
cifar10_train = dset.CIFAR10('./data', train=True, download=True,
                             transform=transform)
loader_train = DataLoader(cifar10_train, batch_size=64,
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

cifar10_val = dset.CIFAR10('./data', train=True, download=True,
                           transform=transform)
loader_val = DataLoader(cifar10_val, batch_size=64,
                        sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 50000)))

cifar10_test = dset.CIFAR10('./data', train=False, download=True,
                            transform=transform)
loader_test = DataLoader(cifar10_test, batch_size=64)

100%|██████████| 170M/170M [00:05<00:00, 33.1MB/s] 


In [25]:
class Flatten(nn.Module):
    def forward(self, x):
        # x: (N, 3, 32, 32) -> (N, 3072) 
        return x.view(x.shape[0], -1)

class Unflatten(nn.Module):
    def forward(self, x):
        # x: (N, 3072) -> (N, 3, 32, 32)
        return x.view(x.shape[0], 3, 32, 32)

### Training/Accuracy

In [26]:
def compute_accuracy(loader, model, device=device):
    model.eval()
    num_correct = 0
    num_samples = 0
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum().item()
            num_samples += y.size(0)
    return num_correct / num_samples



In [19]:
def evaluate_model(loader, model, device=device):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

In [4]:
import matplotlib.pyplot as plt

def train(model, optimizer, epochs=1, print_every=100):
    """
    Train a model on CIFAR-10 using the PyTorch Module API.

    Tracks:
      - per-iteration training loss
      - per-iteration validation accuracy
    Allows:
      - early stop via Ctrl+C
    Plots:
      - loss vs iteration
      - validation accuracy vs iteration
    """
    model = model.to(device=device)

    history = {
        "iter": [],
        "train_loss": [],
        "val_acc": [],
    }

    global_iter = 0

    try:
        for e in range(epochs):
            print("=" * 60)
            print(f"Epoch {e + 1}/{epochs}")
            print("=" * 60)

            model.train()

            for t, (x, y) in enumerate(loader_train):
                global_iter += 1

                x = x.to(device=device, dtype=dtype)
                y = y.to(device=device, dtype=torch.long)

                scores = model(x)
                loss = F.cross_entropy(scores, y)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                if global_iter % print_every == 0:
                    # Record train loss
                    history["iter"].append(global_iter)
                    history["train_loss"].append(loss.item())

                    # Compute validation accuracy
                    val_acc = compute_accuracy(loader_val, model)
                    history["val_acc"].append(val_acc)

                    print(f"[Iter {global_iter}] loss = {loss.item():.4f}")
                    print(f"   validation accuracy: {val_acc * 100:.2f}%")

            print()

    except KeyboardInterrupt:
        print("\n⚠️ Training interrupted by user. Plotting collected results...\n")

    finally:
        if len(history["iter"]) > 0:
            it = history["iter"]
            loss = history["train_loss"]
            acc  = [a * 100 for a in history["val_acc"]]

            plt.figure(figsize=(8, 5))

            # ---- Left axis: Loss ----
            ax1 = plt.gca()
            ax1.plot(it, loss, color='tab:red', marker='o', label='Training Loss')
            ax1.set_xlabel("Iteration")
            ax1.set_ylabel("Training Loss", color='tab:red')
            ax1.tick_params(axis='y', labelcolor='tab:red')
            
            # ---- Right axis: Accuracy ----
            ax2 = ax1.twinx()
            ax2.plot(it, acc, color='tab:blue', marker='o', label='Validation Accuracy')
            ax2.set_ylabel("Validation Accuracy (%)", color='tab:blue')
            ax2.tick_params(axis='y', labelcolor='tab:blue')

            # ---- Title & Grid ----
            plt.title("Training Loss & Validation Accuracy vs Iteration")
            ax1.grid(True)

            plt.show()

        else:
            print("No statistics collected; nothing to plot.")


    return history


In [12]:
def train_model(model, optimizer):
    train(model, optimizer, epochs=10)

### Train model here!