<a href="https://colab.research.google.com/github/yashc73080/CS462-Deep-Learning/blob/main/HW1/architecture_and_hyperparam_search.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
import random
import matplotlib.pyplot as plt

Generating and Preparing Data

In [None]:
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())

100%|██████████| 9.91M/9.91M [00:00<00:00, 58.7MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 1.70MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 14.3MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 9.68MB/s]


In [None]:
def preprocess_data(dataset):
  data = (dataset.data / 255.0) - 0.5
  flattened_data = data.view(data.size(0), -1)
  targets = dataset.targets
  return flattened_data, targets

X_train, y_train = preprocess_data(trainset)
X_test, y_test = preprocess_data(testset)

In [None]:
X_train.size()

torch.Size([60000, 784])

# Basic Model

$F(\vec{x}) = \text{softmax}(A\vec{v}+\vec{b})$

In [None]:
class BasicModel():
  def __init__(self, input=784, output=10, device='cpu'):
    self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    self.input = input
    self.output = output
    self.A = nn.Parameter(torch.randn(output, input).to(device), requires_grad=True)
    self.b = nn.Parameter(torch.randn(output, 1).to(device), requires_grad=True)

  def forward(self, x):
    logits = x @ self.A.t() + self.b
    y_hat = nn.functional.softmax(logits, dim=1)
    return y_hat

  def train(self, X_train, y_train, X_test, y_test, epochs=100, lr=0.7):
    self.loss_function = nn.CrossEntropyLoss()
    self.optimizer = optim.SGD([self.A, self.b], lr=lr)

    for epoch in range(epochs):

      for x, y in zip(X_train, y_train):
        x = x.view(1, -1)
        prediction = self.forward(x)
        loss = self.loss_function(prediction, y)
        loss.backward()
        self.optimizer.step()
        self.optimizer.zero_grad()

      with torch.no_grad():
        predictions = self.forward(X_test)
        acc = (torch.argmax(predictions, dim=1) == y_test).float().mean()
      print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}, Test Acc: {acc:.4f}")

  def test(self, X_test, y_test):
