## Prepare neural network and data loaders:

In [1]:
import numpy as np
import pandas as pd
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Get MNIST data:
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True,
                                           transform=transforms.ToTensor())
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True,
                                          transform=transforms.ToTensor())

# Create data loaders for batch training: 
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True) # Randomize training data order
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

# Build the nn model as a class/object:
class Net(nn.Module): # Class Net inherits from the nn.Module class
    
    def __init__(self): # Specify architecture
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28*28, 128) # Input* 28*28 -> 128 hidden nodes
        self.fc2 = nn.Linear(128, 64) # 128 hidden nodes -> 64 hidden nodes
        self.fc3 = nn.Linear(64, 10) # Output: hidden nodes 64 -> 10 labels

    def forward(self, x): # Specify activation
        x = x.view(-1, 28*28) # Input layer
        x = F.relu(self.fc1(x)) # Relu layer 1
        x = F.relu(self.fc2(x)) # Relu layer 2
        x = self.fc3(x) # Output later
        return x

model = Net() # Initialize untrained model

# Select loss function and optimizer: 
criterion = nn.CrossEntropyLoss() # Loss function: Categorical cross entropy
optimizer = optim.Adam(model.parameters()) # Optimizer: Adam
L = 15 # training length (another hyperparameter)


## Train model:

In [2]:

# Train model: 
for epoch in range(L): # Control length of training
    for i, data in enumerate(train_loader): 
        inputs, labels = data # Get this data chunk
        optimizer.zero_grad() # Set gradients to zero
        outputs = model(inputs) # Use current model to make predictions 
        loss = criterion(outputs, labels) # Evaluate current loss
        loss.backward() # Back-propagation for next iteration
        optimizer.step() # Find a lower loss set of weights


## Evaluate model:

In [3]:
y_hat = [] # Predicted values
y_act = [] # Actual values

with torch.no_grad(): # Do not update gradient
    for data in test_loader: # Iterate over data batches
        inputs, labels = data # (y,X)
        outputs = model(inputs) # evaluate model for these data
        max_values, predicted_values = torch.max(outputs.data, 1) # extract predictions
        y_act+=labels.tolist() # Add actual values
        y_hat+=predicted_values.tolist() # Add predicted values

print( pd.crosstab(y_hat, y_act) ) # Confusion matrix

acc = np.sum( [y_hat[i] == y_act[i] for i in range(len(y_hat))] ) / len(y_hat)
print('Accuracy: %d %%' % (100 * acc)) # Accuracy

col_0    0     1     2    3    4    5    6     7    8    9
row_0                                                     
0      970     0     5    0    0    2    4     0    6    2
1        0  1121     1    0    0    0    2     4    1    2
2        0     2  1005    5    3    0    2     9    3    0
3        1     1     5  980    0    2    1     2    3    2
4        0     0     2    0  955    1    3     1    3    8
5        1     1     1   11    1  883    2     1   10    4
6        2     2     3    0    6    1  942     0    3    0
7        0     2     3    3    5    0    0  1001    0    6
8        4     6     6    2    2    1    2     3  938    2
9        2     0     1    9   10    2    0     7    7  983
Accuracy: 97 %
