In [None]:
import numpy as np
import time
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from torch import nn
import torch

# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,)),
                              ])

# Download and load the training data
trainset = datasets.MNIST('./dataset/MNIST/', download=True, train=True, transform=transform)
valset = datasets.MNIST('./dataset/MNIST/', download=True, train=False, transform=transform)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)
val_loader = torch.utils.data.DataLoader(valset, batch_size=32, shuffle=True)

# Layer details for the neural network
input_size = 784
hidden_size = 300
output_size = 10

# Build a simple 2-layer feed forward network as described
model = nn.Sequential(nn.Linear(input_size, hidden_size, bias=False),
                      nn.Sigmoid(),
                      nn.Linear(hidden_size, output_size, bias=False),
                      nn.LogSoftmax(dim=1))
print(model)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.0)
# Using the cross entropy (or NLL) loss
criterion = nn.NLLLoss()

epochs = 20
losses = []
for i in range(epochs):
    running_loss = 0
    for images, labels in train_loader:
        # Flatten MNIST images into a 784 long vector
        images = images.view(images.shape[0], -1)
    
        # Training pass
        optimizer.zero_grad()
        
        output = model(images)
        loss = criterion(output, labels)
        
        #This is where the model learns by backpropagating
        loss.backward()
        
        #And optimizes its weights here
        optimizer.step()
        
        running_loss += loss.item()
    else:
        losses.append(float(running_loss/len(train_loader)))
        print("Epoch {0}, Training loss: {1}".format(i, running_loss/len(train_loader)))

correct_count, all_count = 0, 0
for images,labels in val_loader:
    for i in range(len(labels)):
        img = images[i].view(1, 28*28)
        # Turn off gradients for forward pass
        with torch.no_grad():
            logps = model(img)

    # Output of the network are log-probabilities, need to take exponential for probabilities
    ps = torch.exp(logps)
    probab = list(ps.numpy()[0])
    pred_label = probab.index(max(probab))
    true_label = labels.numpy()[i]
    if(true_label == pred_label):
        correct_count += 1
    all_count += 1

print("Number Of Images Tested =", all_count)
print("\nModel Accuracy =", (correct_count/all_count))
plt.xlabel('Epochs')
plt.ylabel('Test Loss')
plt.plot(losses)
plt.show()

Sequential(
  (0): Linear(in_features=784, out_features=300, bias=False)
  (1): Sigmoid()
  (2): Linear(in_features=300, out_features=10, bias=False)
  (3): LogSoftmax(dim=1)
)
Epoch 0, Training loss: 1.2468972946484884
Epoch 1, Training loss: 0.5211230966409047
Epoch 2, Training loss: 0.41028766434987385
Epoch 3, Training loss: 0.365850011575222
Epoch 4, Training loss: 0.3407744847695033
Epoch 5, Training loss: 0.3237271421194077
Epoch 6, Training loss: 0.3112111658116182
Epoch 7, Training loss: 0.3007752120375633
Epoch 8, Training loss: 0.2918996914803982
Epoch 9, Training loss: 0.28396512301365534
Epoch 10, Training loss: 0.27619147540330885
Epoch 11, Training loss: 0.26929662137031557
Epoch 12, Training loss: 0.26270950037837026
Epoch 13, Training loss: 0.25621270189980666
Epoch 14, Training loss: 0.24978108696937562
Epoch 15, Training loss: 0.24382742386360964
Epoch 16, Training loss: 0.23770421476463477
Epoch 17, Training loss: 0.23195276439885298
Epoch 18, Training loss: 0.22596