Draw on everything we've discussed to build and train a model to get the best performance on CIFAR that you can. Tentatively due on the 10th. Ideally you can set this up to run fairly easily using the code that you've developed so far, and can let that churn while you focus on your final project.

(Somewhat open ended question - there is no 'right' answer here, but I am looking for some amount of thoroughness/breadth of your approach and discussion of your approach. One thing that I'll note here, it might be worth thinking about how you could experiment with different models to find a good one before launching a 'full' training run.)

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms.functional as TF
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset, Dataset
import numpy as np
import matplotlib.pyplot as plt
import time
import random

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.benchmark = True
print(f"Using device: {device}")

torch.manual_seed(0)
np.random.seed(0)
random.seed(0)

Using device: cuda


Load Data

In [3]:
# CIFAR-10 (32x32 RGB)
normalize = transforms.Normalize( mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010], )
transform = transforms.Compose([ transforms.Resize((224,224)), transforms.ToTensor(), normalize])

cifar_train_ds = datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
cifar_test_ds  = datasets.CIFAR10(root="./data", train=False, download=True, transform=transform)

cifar_train_loader = DataLoader(cifar_train_ds, batch_size=256, shuffle=True, num_workers=0, pin_memory=True)
cifar_test_loader  = DataLoader(cifar_test_ds,  batch_size=512, shuffle=False, num_workers=0, pin_memory=True)

## Models

In [4]:
class ConvNetwork(nn.Module):
    def __init__(self, input_size=(3, 32, 32), num_classes=10, device='cpu'):
        super().__init__()
        
        # Convolutional Layers
        conv_layers = []
        conv_layers.extend([
            nn.Conv2d(input_size[0], 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Dropout2d(0.25),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Dropout2d(0.25),
            nn.MaxPool2d(kernel_size=2, stride=2)
        ])
        self.conv_layers = nn.Sequential(*conv_layers)

        # Get flattened size with dummy tensor
        with torch.no_grad():
            dummy = torch.zeros(1, *input_size)
            dummy_out = self.conv_layers(dummy)
            flattened_dim = dummy_out.view(1, -1).size(1)

        # Fully Connected Layers
        fc_layers = []
        fc_layers.extend([
            nn.Linear(flattened_dim, 256),
            nn.ReLU(),
            nn.Linear(256, num_classes)
        ])
        self.fc_layers = nn.Sequential(*fc_layers)

        self.to(device)

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc_layers(x)
        return x

In [5]:
def train_model(model, train_loader, test_loader, num_epochs=10, lr=0.001, plot=False, device='cpu'):
    model.to(device)
    train_losses, test_losses = [], []

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=0.0001)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        train_losses.append(epoch_loss)

        model.eval()
        test_running_loss = 0.0
        correct, total = 0, 0
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                test_running_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        epoch_test_loss = test_running_loss / len(test_loader.dataset)
        test_losses.append(epoch_test_loss)

        if epoch % 1 == 0 or epoch == num_epochs - 1:
            print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {epoch_loss:.4f}, Test Loss: {epoch_test_loss:.4f}")

    if plot:
        plt.figure(figsize=(10, 5))
        plt.plot(range(1, num_epochs + 1), train_losses, label='Train Loss')
        plt.plot(range(1, num_epochs + 1), test_losses, label='Test Loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.title('Training and Test Loss over Epochs')
        plt.legend()
        plt.show()

    return model, train_losses, test_losses

def test_model(model, test_loader, device='cpu'):
    model.to(device)
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = 100 * correct / total
    return accuracy

In [None]:
cnn = ConvNetwork(input_size=(3, 32, 32), device=device)
cnn, train_losses, test_losses = train_model(cnn, cifar_train_loader, cifar_test_loader, num_epochs=20, lr=0.001, plot=True, device=device)
final_test_accuracy = test_model(cnn, cifar_test_loader, device=device)

print(f"Final Test Accuracy: {final_test_accuracy:.2f}%")