# CSE 151B: Homework 2 Coding
## PyTorch Implementation

Using PyTorch’s `Sequential` model class, build a deep convolutional network to classify handwritten digits in MNIST.

You are only allowed to use the following in your model design:
- Linear Layers
- Conv2D
- MaxPool2D
- BatchNorm2D
- Dropout Layers
- ReLU and Softmax
- Flatten

Your goal is to build a model that achieves **test accuracy ≥ 0.985** with fewer than 1 million parameters.

**Warning**: The modules in your Sequential network should *only* consist of `nn` objects! That means you should not be using `torch.nn.functional` modules or lambda expressions in your Sequential block. Leaving functional/lambda expressions in your model code will result in no credit!

This notebook provides a skeleton layout for you. You may use whatever parts of this notebook you deem necessary; there is no need for you to adhere to the structure. However, during submission, you must carefully follow the zip file formatting as requested; see the bottom of the notebook.

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [8]:
def get_data_loaders(batch_size) -> tuple[DataLoader, DataLoader]:
    '''
    Return the training and testing MNIST dataloaders.
    '''
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])
    
    train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
    test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader


In [9]:
def build_model(dropout_prob=0.5) -> nn.Module:
    model = nn.Sequential(
        # Input: (1, 28, 28)
        nn.Conv2d(1, 32, kernel_size=3, padding=1),     # (32, 28, 28)
        nn.BatchNorm2d(32),
        nn.ReLU(),
        nn.Conv2d(32, 64, kernel_size=3, padding=1),    # (64, 28, 28)
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.MaxPool2d(2),                                # (64, 14, 14)
        nn.Dropout(dropout_prob),

        nn.Conv2d(64, 128, kernel_size=3, padding=1),   # (128, 14, 14)
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.Conv2d(128, 128, kernel_size=3, padding=1),  # (128, 14, 14)
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.MaxPool2d(2),                                # (128, 7, 7)
        nn.Dropout(dropout_prob),

        nn.Flatten(),                                   # (128*7*7) = 6272
        nn.Linear(6272, 256),
        nn.BatchNorm1d(256),
        nn.ReLU(),
        nn.Dropout(dropout_prob),
        nn.Linear(256, 10),
        nn.Softmax(dim=1)
    )
    return model


In [None]:
def check_params():
    model = build_model()
    print(f"Number of parameters: {sum(p.numel() for p in model.parameters())}")

In [11]:
def train(model, optimizer, criterion, train_loader, n_epochs=1):
    '''
    Train the model for `n_epochs` epochs. Returns none (model is modified in place).
    '''
    model.train()
    for epoch in range(n_epochs):
        running_loss = 0.0
        correct = 0
        total = 0

        for images, labels in train_loader:
            # Zero the gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(images)

            # Compute loss
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            # Track statistics
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        epoch_loss = running_loss / len(train_loader)
        epoch_acc = correct / total
        print(f"Epoch [{epoch + 1}/{n_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")


In [12]:
def test(model, test_loader):
    '''
    Tests the model. Returns none (you should print the accuracy).
    '''
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f"Test Accuracy: {accuracy:.4f}")


In [13]:
train_loader, test_loader = get_data_loaders(batch_size=64)

criterion = nn.CrossEntropyLoss()
dropout_values = [i / 10 for i in range(10)]

for p in dropout_values:
    print(f"\nTraining with dropout={p:.1f}")
    model = build_model(dropout_prob=p)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    
    train(model, optimizer, criterion, train_loader, n_epochs=5)
    test(model, test_loader)
    torch.save(model, f'hw2_dropout_{p:.1f}.pt')



Training with dropout=0.0
Epoch [1/5], Loss: 1.4923, Accuracy: 0.9784
Epoch [2/5], Loss: 1.4734, Accuracy: 0.9897
Epoch [3/5], Loss: 1.4706, Accuracy: 0.9916
Epoch [4/5], Loss: 1.4692, Accuracy: 0.9929
Epoch [5/5], Loss: 1.4685, Accuracy: 0.9933
Test Accuracy: 0.9927

Training with dropout=0.1
Epoch [1/5], Loss: 1.4947, Accuracy: 0.9778
Epoch [2/5], Loss: 1.4740, Accuracy: 0.9890
Epoch [3/5], Loss: 1.4716, Accuracy: 0.9910
Epoch [4/5], Loss: 1.4706, Accuracy: 0.9915
Epoch [5/5], Loss: 1.4690, Accuracy: 0.9928
Test Accuracy: 0.9918

Training with dropout=0.2
Epoch [1/5], Loss: 1.4987, Accuracy: 0.9736
Epoch [2/5], Loss: 1.4770, Accuracy: 0.9865
Epoch [3/5], Loss: 1.4735, Accuracy: 0.9890
Epoch [4/5], Loss: 1.4718, Accuracy: 0.9905
Epoch [5/5], Loss: 1.4710, Accuracy: 0.9910
Test Accuracy: 0.9948

Training with dropout=0.3
Epoch [1/5], Loss: 1.5038, Accuracy: 0.9704
Epoch [2/5], Loss: 1.4782, Accuracy: 0.9854
Epoch [3/5], Loss: 1.4744, Accuracy: 0.9885
Epoch [4/5], Loss: 1.4732, Accurac

In [16]:
# find your best model, and train it for 10 epochs
best_p = 0.2 # TODO: fill in your best probability
model = build_model(dropout_prob=best_p)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

train(model, optimizer, criterion, train_loader, n_epochs = 10)
test(model, test_loader)
torch.save(model, "hw2_model.pt")

Epoch [1/10], Loss: 1.4984, Accuracy: 0.9734
Epoch [2/10], Loss: 1.4760, Accuracy: 0.9872
Epoch [3/10], Loss: 1.4727, Accuracy: 0.9899
Epoch [4/10], Loss: 1.4710, Accuracy: 0.9911
Epoch [5/10], Loss: 1.4702, Accuracy: 0.9916
Epoch [6/10], Loss: 1.4692, Accuracy: 0.9927
Epoch [7/10], Loss: 1.4681, Accuracy: 0.9937
Epoch [8/10], Loss: 1.4680, Accuracy: 0.9937
Epoch [9/10], Loss: 1.4675, Accuracy: 0.9942
Epoch [10/10], Loss: 1.4670, Accuracy: 0.9946
Test Accuracy: 0.9951


# Submission Instructions

Zip all of your **code** and **model .pt files** into one file, and submit on Gradescope to the respective submission.