In [121]:
from itertools import product

import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.transforms import v2
from torch.utils.data import DataLoader

In [122]:
preprocess_transforms = [
    transforms.ToTensor(), 
    transforms.Normalize((0.5,), (0.5,))
]

augmentation_transforms = [
    transforms.RandomRotation(45),
    v2.RandomResizedCrop(size=(28, 28), antialias=True),
    v2.RandomHorizontalFlip(p=0.5),
]

train_indices = torch.randperm(60000)[:2000]
train_dataset = torch.utils.data.Subset(torchvision.datasets.FashionMNIST(
        root='./data', train=True, download=True, 
        transform=transforms.Compose(preprocess_transforms + augmentation_transforms)
    ),
    train_indices
)

test_indices = torch.randperm(10000)[:350]
test_dataset = torch.utils.data.Subset(torchvision.datasets.FashionMNIST(
        root='./data', train=False, download=True, 
        transform=transforms.Compose(preprocess_transforms)
    ),
    test_indices
)

In [123]:
batch_sizes = [16, 32, 64]
num_epochs = [5, 10, 20]
learning_rates = [1e-3, 1e-4]
dropouts = [0.0, 0.2]

optimizers = {'SGD': optim.SGD, 'Adam': optim.Adam, 'RMSprop': optim.RMSprop}

def get_data_loader(batch_size):
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    return train_loader, test_loader

In [124]:
def create_cnn(dropout=0.0, depth=3, h=64):
    layers = []
    width, height, dim = 28, 28, 1

    for i in range(depth):
        in_ch = 16 * (2 ** (i - 1)) if i > 0 else dim
        out_ch = 16 * (2 ** i)

        layers.append(nn.Conv2d(in_ch, out_ch, 3, padding='same'))
        layers.append(nn.ReLU())
        layers.append(nn.MaxPool2d(3))

    if dropout > 0:
        layers.append(nn.Dropout2d(dropout))

    layers.append(nn.Flatten())
    layers.append(nn.LazyLinear(h))
    layers.append(nn.ReLU())

    layers.append(nn.Linear(h, 10))
    layers.append(nn.Softmax())
    return nn.Sequential(*layers)

In [125]:
def train_and_evaluate(batch_size, optimizer_name, lr, dropout, max_epochs=50, verbose=False):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    train_loader, test_loader = get_data_loader(batch_size)
    model = create_cnn(dropout).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optimizers[optimizer_name](model.parameters(), lr=lr)

    best_acc = 0.0
    min_loss = float('inf')
    patience_counter = 0
    history = []

    for epoch in range(max_epochs):
        model.train()
        correct, total = 0, 0
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            _, predicted = torch.max(outputs, 1)
            loss.backward()
            optimizer.step()
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            running_loss += loss.item()
        train_acc = correct / total
        train_loss = running_loss / len(train_loader)

        model.eval()
        correct, total = 0, 0
        running_loss = 0.0
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                running_loss += loss.item()
        test_acc = correct / total
        test_loss = running_loss / len(test_loader)

        history.append((train_acc, train_loss, test_acc, test_loss))
        if verbose:
            print(f"{epoch + 1}: Train Acc {train_acc} Train Loss {train_loss} Test Acc {test_acc} Test Loss {test_loss}")

        if test_acc > best_acc:
            best_acc = test_acc

    return best_acc, history

In [126]:
param_grid = product(batch_sizes, num_epochs, learning_rates, dropouts, optimizers.keys())
results = []

for batch_size, epochs, lr, dropout, optimizer_name in param_grid:
    best_acc, history = train_and_evaluate(batch_size, optimizer_name, lr, dropout, max_epochs=epochs)
    res = (batch_size, epochs, lr, dropout, optimizer_name, best_acc, history)
    print(f"Batch: {res[0]}, Epochs: {res[1]}, Learning rate: {res[2]}, Dropout: {res[3]}, Opt: {res[4]} Acc: {res[5]:.4f}")
    results.append(res)

Batch: 16, Epochs: 5, Learning rate: 0.001, Dropout: 0.0, Opt: SGD Acc: 0.1371
Batch: 16, Epochs: 5, Learning rate: 0.001, Dropout: 0.0, Opt: Adam Acc: 0.4257
Batch: 16, Epochs: 5, Learning rate: 0.001, Dropout: 0.0, Opt: RMSprop Acc: 0.4543
Batch: 16, Epochs: 5, Learning rate: 0.001, Dropout: 0.2, Opt: SGD Acc: 0.1000
Batch: 16, Epochs: 5, Learning rate: 0.001, Dropout: 0.2, Opt: Adam Acc: 0.4400
Batch: 16, Epochs: 5, Learning rate: 0.001, Dropout: 0.2, Opt: RMSprop Acc: 0.4029
Batch: 16, Epochs: 5, Learning rate: 0.0001, Dropout: 0.0, Opt: SGD Acc: 0.0829
Batch: 16, Epochs: 5, Learning rate: 0.0001, Dropout: 0.0, Opt: Adam Acc: 0.2714
Batch: 16, Epochs: 5, Learning rate: 0.0001, Dropout: 0.0, Opt: RMSprop Acc: 0.2457
Batch: 16, Epochs: 5, Learning rate: 0.0001, Dropout: 0.2, Opt: SGD Acc: 0.0829
Batch: 16, Epochs: 5, Learning rate: 0.0001, Dropout: 0.2, Opt: Adam Acc: 0.2943
Batch: 16, Epochs: 5, Learning rate: 0.0001, Dropout: 0.2, Opt: RMSprop Acc: 0.2457
Batch: 16, Epochs: 10, Lea

In [127]:
df = pd.DataFrame(results, columns=['Batch', 'Epochs', 'Learning rate', 'Dropout', 'Opt', 'Acc', 'History'])
df.to_csv('data/c_5_lab_2_out.csv', index=False) 