In [1]:
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import TensorDataset, DataLoader
from tensorflow.keras.datasets import mnist

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

import matplotlib.pyplot as plt
%matplotlib inline

# Load Data

Normalize the images to a number from 0 to 1. Image has 3 channels (R,G,B) and each value in the channel can range from 0 to 255. Hence to normalize in 0-->1 range, we need to divide it by 255

# Model

In [2]:
# 1. Device config
device = torch.device('mps')

# 4. Neural Network Model (Simple Feedforward NN)
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = x.view(-1, 28*28)  # Flatten image
        out = self.fc1(out)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# Data

In [3]:
# 1. Load from Keras (already downloaded)
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# 2. Normalize and convert to tensors
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Apply transform manually to each image
X_train = torch.stack([transform(img.astype(np.uint8)) for img in X_train])
X_test = torch.stack([transform(img.astype(np.uint8)) for img in X_test])

y_train = torch.tensor(y_train, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)

# 3. Create TensorDatasets and DataLoaders
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Training

In [None]:
# 2. Hyperparameters
input_size = 784  # 28x28
hidden_size = 128
num_classes = 10
num_epochs = 10
learning_rate = 0.001


model = NeuralNet(input_size, hidden_size, num_classes).to(device)

# 5. Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 6. Training Loop
for epoch in range(num_epochs):
    for batch_idx, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/10], Loss: 0.0899
Epoch [2/10], Loss: 0.0881
Epoch [3/10], Loss: 0.0168
Epoch [4/10], Loss: 0.0164
Epoch [5/10], Loss: 0.0556
Epoch [6/10], Loss: 0.0121
Epoch [7/10], Loss: 0.0423
Epoch [8/10], Loss: 0.0912


# Evaluation

In [None]:
# 7. Testing the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy of the model on the 10,000 test images: {100 * correct / total:.2f}%')