In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())

100%|██████████| 9.91M/9.91M [00:00<00:00, 36.8MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 1.08MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 7.88MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 12.4MB/s]


In [None]:
def preprocess_data(dataset):
    data = (dataset.data / 255.0) - 0.5
    flattened_data = data.view(data.shape[0], -1)
    targets = dataset.targets
    return flattened_data, targets

x_train, y_train = preprocess_data(trainset)
x_test, y_test = preprocess_data(testset)

In [None]:
x_train.shape

torch.Size([60000, 784])

In [None]:
y_train.shape

torch.Size([60000])

In [None]:
class MNISTNetwork(nn.Module):
    def __init__(self):
        super().__init__()

        self.layer_1 = nn.Linear( in_features = 784, out_features = 1000 ) # Bias = True is the default
        self.layer_2 = nn.Linear( in_features = 1000, out_features = 10 )

        self.activation_function = nn.Sigmoid()

    def forward(self, x):

        ## Computing the values of the hidden nodes in the first hidden layer
        hidden_nodes = self.layer_1(x)
        hidden_nodes = self.activation_function( hidden_nodes )
        #####################################################################

        # Computing the values of the final output layer from the hidden layer
        # (But skipping evaluating softmax for now)
        logits  = self.layer_2( hidden_nodes )
        ######################################################################

        return logits

In [None]:
model = MNISTNetwork()

In [None]:
y_pred = model( x_train )

In [None]:
y_pred.shape

torch.Size([60000, 10])

In [None]:
model = MNISTNetwork()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.2)

In [None]:
import torch.utils.data as data

In [None]:
train_dataset = data.TensorDataset(x_train, y_train)

batch_size = 1024
epochs = 10
train_loader = data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

In [None]:
for epoch in range(epochs):
    total_loss = 0
    for x_batch, y_batch in train_loader:
        logits = model(x_batch)
        loss = loss_function(logits, y_batch)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader) # The number of batches is len(train_loader)
    print(f"Epoch {epoch+1}, Average Loss: {avg_loss:.4f}")

Epoch 1, Average Loss: 0.3774
Epoch 2, Average Loss: 0.3674
Epoch 3, Average Loss: 0.3586
Epoch 4, Average Loss: 0.3517
Epoch 5, Average Loss: 0.3494
Epoch 6, Average Loss: 0.3409
Epoch 7, Average Loss: 0.3370
Epoch 8, Average Loss: 0.3321
Epoch 9, Average Loss: 0.3290
Epoch 10, Average Loss: 0.3246


In [None]:
model.eval()

with torch.no_grad():
    logits = model(x_test)
    probabilities = torch.nn.functional.softmax(logits, dim=1)

predicted_classes = torch.argmax(probabilities, dim=1)

accuracy = (predicted_classes == y_test).float().mean()

print(f"Overall Accuracy: {accuracy.item() * 100:.2f}%")

print()

confusion_matrix = np.zeros((10, 10), dtype=np.int32)

for i in range(len(x_test)):
    actual_class = y_test[i].item()
    predicted_class = predicted_classes[i].item()

    confusion_matrix[actual_class, predicted_class] += 1

print("Confusion Matrix")
print(confusion_matrix)

Overall Accuracy: 91.23%

Confusion Matrix
[[ 962    0    3    2    0    3    6    1    3    0]
 [   0 1105    2    2    0    2    4    1   19    0]
 [  11   15  896   17   17    2   10   13   46    5]
 [   4    1   16  925    0   20    2   14   23    5]
 [   1    6    3    2  920    1   10    2   10   27]
 [  13    4    3   47   10  748   15    7   39    6]
 [  18    3    4    1   18   14  893    1    6    0]
 [   4   19   25    7   11    0    0  930    4   28]
 [   7   10    5   22    9   21   11    8  875    6]
 [  13    9    4   12   56    7    0   25   14  869]]


In [None]:
class MNISTNetwork(nn.Module):
    def __init__(self):
        super().__init__()

        self.layer_1 = nn.Linear( in_features = 784, out_features = 1000 )
        self.layer_2 = nn.Linear( in_features = 1000, out_features = 200 )
        self.layer_3 = nn.Linear( in_features = 200, out_features = 10 )

        self.activation_function = nn.ELU()

    def forward(self, x):

        hidden_nodes = self.layer_1(x)
        hidden_nodes = self.activation_function( hidden_nodes )

        hidden_nodes = self.layer_2( hidden_nodes )
        hidden_nodes = self.activation_function( hidden_nodes )

        # Computing the values of the final output layer from the hidden layer
        # (But skipping evaluating softmax for now)
        logits  = self.layer_3( hidden_nodes )
        ######################################################################

        return logits

In [None]:
model = MNISTNetwork()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.2)

In [None]:
train_dataset = data.TensorDataset(x_train, y_train)

batch_size = 1024
epochs = 10
train_loader = data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

In [None]:
for epoch in range(epochs):
    total_loss = 0
    for x_batch, y_batch in train_loader:
        logits = model(x_batch)
        loss = loss_function(logits, y_batch)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader) # The number of batches is len(train_loader)
    print(f"Epoch {epoch+1}, Average Loss: {avg_loss:.4f}")

Epoch 1, Average Loss: 1.1218
Epoch 2, Average Loss: 0.4965
Epoch 3, Average Loss: 0.3793
Epoch 4, Average Loss: 0.3412
Epoch 5, Average Loss: 0.3330
Epoch 6, Average Loss: 0.3059
Epoch 7, Average Loss: 0.2994
Epoch 8, Average Loss: 0.2815
Epoch 9, Average Loss: 0.2716
Epoch 10, Average Loss: 0.2575


In [None]:
model.eval()

with torch.no_grad():
    logits = model(x_test)
    probabilities = torch.nn.functional.softmax(logits, dim=1)

predicted_classes = torch.argmax(probabilities, dim=1)

accuracy = (predicted_classes == y_test).float().mean()

print(f"Overall Accuracy: {accuracy.item() * 100:.2f}%")

print()

confusion_matrix = np.zeros((10, 10), dtype=np.int32)

for i in range(len(x_test)):
    actual_class = y_test[i].item()
    predicted_class = predicted_classes[i].item()

    confusion_matrix[actual_class, predicted_class] += 1

print("Confusion Matrix")
print(confusion_matrix)

Overall Accuracy: 92.55%

Confusion Matrix
[[ 963    0    0    1    0    3   11    1    1    0]
 [   0 1104    2    2    1    3    4    1   17    1]
 [  12    8  910   10    9    3   21    8   45    6]
 [   4    1   11  908    0   38    5    4   24   15]
 [   1    1    2    1  904    0   17    1    8   47]
 [   9    2    0   21    4  798   19    1   29    9]
 [   9    3    4    0    5    6  926    1    4    0]
 [   3   10   23    5    8    1    0  913    3   62]
 [   6    3    2   15    7   23   14    3  885   16]
 [  10    8    1    8   22    5    1    4    6  944]]
