<a href="https://colab.research.google.com/github/zihadbappy/Machine-Learning-Projects/blob/master/HW3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# CSIT 598 - Assignment 3: Deep Learning on MNIST
# Author: Your Name
# Date: Nov 2025

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import time
import matplotlib.pyplot as plt

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# MNIST Data Loading (with normalization)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST mean & std
])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset  = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=1000, shuffle=False)

def train_and_evaluate(model, optimizer, epochs=15, model_name="model"):
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()

    start_time = time.time()
    for epoch in range(epochs):
        model.train()
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

        # Evaluate on test set every epoch
        model.eval()
        correct = 0
        with torch.no_grad():
            for data, target in test_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()
        acc = 100. * correct / len(test_loader.dataset)
        if (epoch+1) % 5 == 0 or epoch == epochs-1:
            print(f"{model_name} | Epoch {epoch+1:2d} | Test Accuracy: {acc:.2f}%")

    total_time = time.time() - start_time
    final_acc = acc
    print(f"{model_name} Final Accuracy: {final_acc:.2f}% | Time: {total_time:.1f}s\n")
    return final_acc, total_time

# ======================== Task A: MLP ========================

# A1: 3-layer MLP + ReLU + Adam
class MLP3_ReLU_Adam(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28*28, 512), nn.ReLU(),
            nn.Linear(512, 256),   nn.ReLU(),
            nn.Linear(256, 10)
        )
    def forward(self, x): return self.net(x)

# A2: 3-layer MLP + Sigmoid + Adam
class MLP3_Sigmoid_Adam(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28*28, 512), nn.Sigmoid(),
            nn.Linear(512, 256),   nn.Sigmoid(),
            nn.Linear(256, 10)
        )
    def forward(self, x): return self.net(x)

# A3: 3-layer MLP + ReLU + SGD
class MLP3_ReLU_SGD(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28*28, 512), nn.ReLU(),
            nn.Linear(512, 256),   nn.ReLU(),
            nn.Linear(256, 10)
        )
    def forward(self, x): return self.net(x)

# A4: Best 5-layer MLP (ReLU + Adam)
class MLP5_Best(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28*28, 1024), nn.ReLU(),
            nn.Linear(1024, 512),   nn.ReLU(),
            nn.Linear(512, 256),    nn.ReLU(),
            nn.Linear(256, 128),    nn.ReLU(),
            nn.Linear(128, 10)
        )
    def forward(self, x): return self.net(x)

print("=== Task A: MLP Experiments ===")
acc1, t1 = train_and_evaluate(MLP3_ReLU_Adam(), optim.Adam(MLP3_ReLU_Adam().parameters(), lr=0.001), model_name="A1: 3-layer ReLU+Adam")
acc2, t2 = train_and_evaluate(MLP3_Sigmoid_Adam(), optim.Adam(MLP3_Sigmoid_Adam().parameters(), lr=0.001), model_name="A2: 3-layer Sigmoid+Adam")
acc3, t3 = train_and_evaluate(MLP3_ReLU_SGD(), optim.SGD(MLP3_ReLU_SGD().parameters(), lr=0.01, momentum=0.9), model_name="A3: 3-layer ReLU+SGD")
acc4, t4 = train_and_evaluate(MLP5_Best(), optim.Adam(MLP5_Best().parameters(), lr=0.001), model_name="A4: 5-layer Best (ReLU+Adam)")

# ======================== Task B: CNN ========================

# B1: Simple 4-layer CNN
class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(),
            nn.Conv2d(128, 128, 3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128*7*7, 512), nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 10)
        )
    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# B2: ResNet-18 (lightweight version works great on MNIST)
from torchvision.models import resnet18
class ResNet18_MNIST(nn.Module):
    def __init__(self):
        super().__init__()
        resnet = resnet18(num_classes=10)
        resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=1, padding=3, bias=False)  # adapt to 28×28
        resnet.maxpool = nn.Identity()  # remove initial maxpool
        self.model = resnet
    def forward(self, x): return self.model(x)

print("\n=== Task B: CNN Experiments ===")
acc_cnn1, t_cnn1 = train_and_evaluate(SimpleCNN(), optim.Adam(SimpleCNN().parameters(), lr=0.001), epochs=15, model_name="B1: Simple 4-layer CNN")
acc_resnet, t_resnet = train_and_evaluate(ResNet18_MNIST(), optim.Adam(ResNet18_MNIST().parameters(), lr=0.001), epochs=12, model_name="B2: ResNet-18 (adapted)")

# Summary table (you can copy-paste into your report)
print("\n=== FINAL RESULTS SUMMARY ===")
print("Model                        | Test Acc (%) | Time (s)")
print("-----------------------------|--------------|---------")
print(f"A1: 3-layer ReLU+Adam        | {acc1:7.2f}     | {t1:6.1f}")
print(f"A2: 3-layer Sigmoid+Adam     | {acc2:7.2f}     | {t2:6.1f}")
print(f"A3: 3-layer ReLU+SGD         | {acc3:7.2f}     | {t3:6.1f}")
print(f"A4: 5-layer ReLU+Adam        | {acc4:7.2f}     | {t4:6.1f}")
print(f"B1: Simple 4-layer CNN       | {acc_cnn1:7.2f}     | {t_cnn1:6.1f}")
print(f"B2: ResNet-18 (adapted)      | {acc_resnet:7.2f}     | {t_resnet:6.1f}")

Using device: cuda


100%|██████████| 9.91M/9.91M [00:00<00:00, 18.6MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 496kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.67MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 8.34MB/s]


=== Task A: MLP Experiments ===
A1: 3-layer ReLU+Adam | Epoch  5 | Test Accuracy: 7.12%
A1: 3-layer ReLU+Adam | Epoch 10 | Test Accuracy: 7.12%
A1: 3-layer ReLU+Adam | Epoch 15 | Test Accuracy: 7.12%
A1: 3-layer ReLU+Adam Final Accuracy: 7.12% | Time: 202.2s

A2: 3-layer Sigmoid+Adam | Epoch  5 | Test Accuracy: 9.82%
A2: 3-layer Sigmoid+Adam | Epoch 10 | Test Accuracy: 9.82%
A2: 3-layer Sigmoid+Adam | Epoch 15 | Test Accuracy: 9.82%
A2: 3-layer Sigmoid+Adam Final Accuracy: 9.82% | Time: 199.7s

A3: 3-layer ReLU+SGD | Epoch  5 | Test Accuracy: 8.70%
A3: 3-layer ReLU+SGD | Epoch 10 | Test Accuracy: 8.70%
A3: 3-layer ReLU+SGD | Epoch 15 | Test Accuracy: 8.70%
A3: 3-layer ReLU+SGD Final Accuracy: 8.70% | Time: 201.3s

A4: 5-layer Best (ReLU+Adam) | Epoch  5 | Test Accuracy: 10.32%
A4: 5-layer Best (ReLU+Adam) | Epoch 10 | Test Accuracy: 10.32%
A4: 5-layer Best (ReLU+Adam) | Epoch 15 | Test Accuracy: 10.32%
A4: 5-layer Best (ReLU+Adam) Final Accuracy: 10.32% | Time: 200.7s


=== Task B: CNN