In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision

import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [3]:
import os
from mnist_web import mnist

train_images, train_labels, test_images, test_labels = mnist(path=os.path.join(os.getcwd(), "mnist"))

X_train = torch.from_numpy(train_images).float().to(device)
y_train = torch.from_numpy(train_labels).int().to(device)
X_test = torch.from_numpy(test_images).float().to(device)
y_test = torch.from_numpy(test_labels).int().to(device)

In [4]:
class Neural_Network(nn.Module):
    def __init__(self):
        super(Neural_Network, self).__init__()
        self.fc1 = nn.Linear(784, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 100)
        self.fc4 = nn.Linear(100, 10)
        self.relu = nn.ReLU()
        self.log_softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, X):
        X = self.relu(self.fc1(X))
        X = self.relu(self.fc2(X))
        X = self.relu(self.fc3(X))
        X = self.log_softmax(self.fc4(X))
        return X

In [None]:
model = Neural_Network().to(device)

loss_fn = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
# scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')

epochs = 1000

# Train step
model.train()

for i in range(epochs):

    optimizer.zero_grad()

    y_train_ = model(X_train)

    loss = loss_fn(y_train_, torch.max(y_train, 1)[1])
    
    if ((i + 1) % 10) == 0:
        print(f"Epoch {i + 1}: {loss}")

    loss.backward()
    optimizer.step() 
    
print(f"Loss after {epochs} epochs: {loss}")

Epoch 10: 2.2804384231567383
Epoch 20: 2.152167558670044
Epoch 30: 1.1676325798034668
Epoch 40: 2.0325982570648193
Epoch 50: 1.5534710884094238
Epoch 60: 0.8273769021034241
Epoch 70: 0.47275230288505554
Epoch 80: 0.36580508947372437
Epoch 90: 0.30954164266586304
Epoch 100: 0.2746428847312927
Epoch 110: 0.24797435104846954
Epoch 120: 0.2275349497795105
Epoch 130: 0.2103096842765808
Epoch 140: 0.19541515409946442
Epoch 150: 0.18246617913246155
Epoch 160: 0.17103759944438934
Epoch 170: 0.1608562469482422
Epoch 180: 0.15165606141090393
Epoch 190: 0.1433696746826172
Epoch 200: 0.13589400053024292
Epoch 210: 0.12908203899860382
Epoch 220: 0.12278572469949722
Epoch 230: 0.11692002415657043
Epoch 240: 0.11147645115852356
Epoch 250: 0.10641425848007202
Epoch 260: 0.10169245302677155
Epoch 270: 0.09728709608316422
Epoch 280: 0.09316141903400421
Epoch 290: 0.0892731249332428
Epoch 300: 0.08560392260551453
Epoch 310: 0.08212511241436005
Epoch 320: 0.07883211225271225
Epoch 330: 0.07571693509817123

In [None]:
# Eval
model.eval()

with torch.no_grad():
    y_test_ = model(X_test)  

correct = []
incorrect = []

num_datapoints = y_test.shape[0]

for i in range(num_datapoints):
    if torch.argmax(y_test[i]) == torch.argmax(y_test_[i]):
        correct.append(i)
    else:
        incorrect.append(i)
    
print("Test Accuracy:", len(correct) / num_datapoints)

# plot some incorrect testing examples
if len(incorrect) >= 9:
    X_incorrect = X_test[incorrect].cpu()
    y_incorrect = model.forward(X_incorrect.cuda()).cpu()
    incorrect_labels = torch.argmax(y_incorrect, 1)
    correct_labels = torch.argmax(y_test[incorrect], 1)

    from random import sample
    
    examples = sample(range(len(incorrect)), 9)

    plt.figure(figsize=(10,10))
    for i in range(9):
        index = examples[i]
        plt.subplot(3,3,i+1)
        plt.tick_params(axis='both', labelsize=0, length=0)
        plt.grid(False)
        plt.imshow(X_incorrect[index].reshape((28,28)), cmap='Greys')
        label = f"Guess: {incorrect_labels[index]}, Correct: {correct_labels[index]}"
        plt.xlabel(label)
    plt.show()