In [2]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from load_mnist import load_mnist
import numpy as np
from utils import train_neural_network

# Assignment 1.1

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


## Data ingestion

In [4]:
# Load MNIST dataset
X_train, Y_train, X_test, Y_test = load_mnist()

## Training and evaluating network

In [5]:
# Hyperparameters
input_size = 784  # 28x28 images
hidden_size_1 = 256
hidden_size_2 = 128
num_classes = 10
num_epochs = 100
batch_size = 128
learning_rate = 0.001

In [6]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
Y_train_tensor = torch.tensor(Y_train, dtype=torch.long).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
Y_test_tensor = torch.tensor(Y_test, dtype=torch.long).to(device)

# Create data loaders
train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = TensorDataset(X_test_tensor, Y_test_tensor)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [7]:
class NeuralNet(nn.Module):
    def __init__(self, input_size: int, hidden_size_1: int, hidden_size_2: int, num_classes: int):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size_1)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size_1, hidden_size_2)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_size_2, num_classes)
        np.random.seed(0)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.fc3(out)
        return out
    
# Initialize the model
model = NeuralNet(input_size, hidden_size_1, hidden_size_2, num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [9]:
train_neural_network(model, criterion, optimizer, train_loader, test_loader, num_epochs, device)

Epoch 1/100, Train Loss: 0.2957, Train Accuracy: 91.53%, Test Loss: 0.2884, Test Accuracy: 91.83%
Epoch 2/100, Train Loss: 0.2947, Train Accuracy: 91.55%, Test Loss: 0.2877, Test Accuracy: 91.87%
Epoch 3/100, Train Loss: 0.2935, Train Accuracy: 91.61%, Test Loss: 0.2860, Test Accuracy: 91.90%


KeyboardInterrupt: 