<a href="https://colab.research.google.com/github/yashc73080/CS462-Deep-Learning/blob/main/HW1/architecture_and_hyperparam_search.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
import random
import matplotlib.pyplot as plt

Generating and Preparing Data

In [2]:
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())

100%|██████████| 9.91M/9.91M [00:00<00:00, 18.0MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 481kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.43MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 13.4MB/s]


In [3]:
def preprocess_data(dataset, device):
  data = (dataset.data / 255.0) - 0.5
  flattened_data = data.view(data.size(0), -1)
  targets = dataset.targets
  return flattened_data.to(device), targets.to(device)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
X_train, y_train = preprocess_data(trainset, device)
X_test, y_test = preprocess_data(testset, device)

In [None]:
X_train.size()

torch.Size([60000, 784])

# Basic Model

$F(\vec{x}) = \text{softmax}(A\vec{v}+\vec{b})$

In [11]:
class BasicModel():
  def __init__(self, input=784, output=10, device='cpu'):
    # Set device to CPU or GPU
    self.device = torch.device('cuda' if torch.cuda.is_available() else device)

    # Model parameters
    self.input = input
    self.output = output
    self.A = nn.Parameter(torch.randn(output, input, device=self.device), requires_grad=True) # shape (10, 784)
    self.b = nn.Parameter(torch.randn(output, device=self.device), requires_grad=True)  # shape (10,)

  def forward(self, x):
    logits = x @ self.A.t() + self.b # CrossEntropyLoss applies softmax internally
    return logits

  def train(self, X_train, y_train, epochs=100, lr=0.7, batch_size=64):
    self.loss_function = nn.CrossEntropyLoss()
    self.optimizer = optim.SGD([self.A, self.b], lr=lr)

    n = X_train.size(0)

    for epoch in range(epochs):
        total_loss = 0.0

        # Process by batch for more efficiency
        for i in range(0, n, batch_size):
            x_batch = X_train[i:i+batch_size]
            y_batch = y_train[i:i+batch_size]

            logits = self.forward(x_batch)
            loss = self.loss_function(logits, y_batch)

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            total_loss += loss.item()

        print(f"Epoch {epoch+1}/{epochs}, Training Loss: {total_loss / (n // batch_size):.4f}")

    with torch.no_grad():
      logits = self.forward(X_train)
      preds = torch.argmax(logits, dim=1)
      acc = (preds == y_train).float().mean().item()
    print(f"Final Train Accuracy: {acc:.4f}")

  def test(self, X_test, y_test):
    with torch.no_grad():
      logits = self.forward(X_test)
      preds = torch.argmax(logits, dim=1)
      acc = (preds == y_test).float().mean().item()
    print(f"Test Accuracy: {acc:.4f}")
    return acc

In [12]:
basic_model = BasicModel()
basic_model.train(X_train, y_train, epochs=150, lr=0.7, batch_size=64)
basic_test_acc = basic_model.test(X_test, y_test)

Epoch 1/150, Training Loss: 1.3964
Epoch 2/150, Training Loss: 0.7092
Epoch 3/150, Training Loss: 0.6307
Epoch 4/150, Training Loss: 0.5881
Epoch 5/150, Training Loss: 0.5598
Epoch 6/150, Training Loss: 0.5401
Epoch 7/150, Training Loss: 0.5247
Epoch 8/150, Training Loss: 0.5123
Epoch 9/150, Training Loss: 0.5019
Epoch 10/150, Training Loss: 0.4929
Epoch 11/150, Training Loss: 0.4849
Epoch 12/150, Training Loss: 0.4780
Epoch 13/150, Training Loss: 0.4721
Epoch 14/150, Training Loss: 0.4669
Epoch 15/150, Training Loss: 0.4625
Epoch 16/150, Training Loss: 0.4586
Epoch 17/150, Training Loss: 0.4551
Epoch 18/150, Training Loss: 0.4520
Epoch 19/150, Training Loss: 0.4493
Epoch 20/150, Training Loss: 0.4467
Epoch 21/150, Training Loss: 0.4444
Epoch 22/150, Training Loss: 0.4421
Epoch 23/150, Training Loss: 0.4400
Epoch 24/150, Training Loss: 0.4380
Epoch 25/150, Training Loss: 0.4361
Epoch 26/150, Training Loss: 0.4343
Epoch 27/150, Training Loss: 0.4327
Epoch 28/150, Training Loss: 0.4312
E