# Notebook for training the Neural Network

## Import libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import json
import os

## Check for GPU

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


## Define model

In [3]:
class SimpleMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28*28, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 10)  # No softmax here; CrossEntropyLoss expects raw logits
        )

    def forward(self, x):
        return self.model(x)

## Load MNIST data

In [4]:
transform = transforms.ToTensor()

train_data = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_data  = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_data, batch_size=1000, shuffle=False)

100%|██████████| 9.91M/9.91M [00:02<00:00, 4.61MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 132kB/s]
100%|██████████| 1.65M/1.65M [00:01<00:00, 1.09MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 8.31MB/s]


## Training

In [6]:
model = SimpleMLP().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 20
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs} - Loss: {total_loss/len(train_loader):.4f}")

# Evaluate accuracy
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")

Epoch 1/20 - Loss: 0.5458
Epoch 2/20 - Loss: 0.2530
Epoch 3/20 - Loss: 0.1960
Epoch 4/20 - Loss: 0.1601
Epoch 5/20 - Loss: 0.1366
Epoch 6/20 - Loss: 0.1203
Epoch 7/20 - Loss: 0.1082
Epoch 8/20 - Loss: 0.0982
Epoch 9/20 - Loss: 0.0918
Epoch 10/20 - Loss: 0.0858
Epoch 11/20 - Loss: 0.0797
Epoch 12/20 - Loss: 0.0753
Epoch 13/20 - Loss: 0.0699
Epoch 14/20 - Loss: 0.0661
Epoch 15/20 - Loss: 0.0620
Epoch 16/20 - Loss: 0.0588
Epoch 17/20 - Loss: 0.0550
Epoch 18/20 - Loss: 0.0533
Epoch 19/20 - Loss: 0.0498
Epoch 20/20 - Loss: 0.0485
Test Accuracy: 97.10%


## Export weights

In [7]:
def export_model_to_json(model, filename='model_weights.json'):
    model_weights = {}
    for name, param in model.named_parameters():
        model_weights[name] = param.detach().cpu().numpy().tolist()

    with open(filename, 'w') as f:
        json.dump(model_weights, f)

export_model_to_json(model)
print("Model weights exported to model_weights.json")

Model weights exported to model_weights.json
