# Application 4 - Handwritten Digits

In [1]:
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.autograd import Variable

In [2]:
input_size      =   784                 # number of input neurons (features)
hidden_size     =   400                 # number of hidden neurons
output_size     =   10                  # number of output neurons (class 0-9)
epochs          =   10                  # how many times we pass our entire dataset into network
batch_size      =   100                 # input size of data during one iteration
learning_rate   =   0.001               # how fast we are learning

In [3]:
train_dataset = datasets.MNIST(root      = './data',
                               train     = True,
                               transform = transforms.ToTensor(),
                               download  = True)
test_dataset  = datasets.MNIST(root      = './data',
                               train     = False,
                               transform = transforms.ToTensor(),
                               download  = True)

In [4]:
train_loader = torch.utils.data.DataLoader(dataset      = train_dataset,
                                           batch_size   = batch_size,
                                           shuffle      = True)
test_loader  = torch.utils.data.DataLoader(dataset      = test_dataset,
                                           batch_size   = batch_size,
                                           shuffle      = True)

In [5]:
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
        self.init_weights()
    
    def init_weights(self):
        nn.init.kaiming_normal_(self.fc1.weight)
        nn.init.kaiming_normal_(self.fc2.weight)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        return out

In [6]:
# create an object of the class, which represents our network
net = Net(input_size, hidden_size, output_size)
CUDA = torch.cuda.is_available()
if CUDA:
    net = net.cuda()

# the loss function. The Cross Entropy loss comes along with Softmax. Therefore, no need to specify Softmax as well
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params = net.parameters(), lr = learning_rate)

In [7]:
# train the network
for epoch in range(epochs):
    correct_train = 0
    running_loss = 0
    for i, (images, labels) in enumerate(train_loader):
        images = images.view(-1, 28*28)    
     
        if CUDA:
            images = images.cuda()
            labels = labels.cuda()
            
        outputs = net(images)       
        _, predicted = torch.max(outputs.data, 1)

        if CUDA:
            correct_train += (predicted.cpu() == labels.cup()).sum()
        else:
            correct_train += (predicted == labels).sum()
        # Difference between the actual and predicted (loss function)
        loss = criterion(outputs, labels)
        running_loss += loss.item()

        # clear gradient
        optimizer.zero_grad()

        # Backpropagation 
        loss.backward()
        
        # update the weights
        optimizer.step()
        
    print('Epoch [{}/{}], Training Loss: {:.3f}, Training Accuracy: {:.3f}%'.format(epoch+1, epochs, running_loss/len(train_loader), (100*correct_train.double()/len(train_dataset))))

print("DONE TRAINING!")

Epoch [1/10], Training Loss: 0.235, Training Accuracy: 93.113%
Epoch [2/10], Training Loss: 0.085, Training Accuracy: 97.393%
Epoch [3/10], Training Loss: 0.053, Training Accuracy: 98.343%
Epoch [4/10], Training Loss: 0.038, Training Accuracy: 98.753%
Epoch [5/10], Training Loss: 0.029, Training Accuracy: 99.087%
Epoch [6/10], Training Loss: 0.022, Training Accuracy: 99.278%
Epoch [7/10], Training Loss: 0.020, Training Accuracy: 99.320%
Epoch [8/10], Training Loss: 0.016, Training Accuracy: 99.457%
Epoch [9/10], Training Loss: 0.017, Training Accuracy: 99.427%
Epoch [10/10], Training Loss: 0.013, Training Accuracy: 99.605%
DONE TRAINING!


In [8]:
# testing the network
with torch.no_grad():
    correct_prediction = 0
    for images, labels in test_loader:
        if CUDA:
            images = images.cuda()
            labels = labels.cuda()
        
        images = images.view(-1, 28*28)
        outputs = net(images)

        _, predicted = torch.max(outputs.data, 1)

        if CUDA:
            correct_prediction += (predicted.cpu() == labels.cpu()).sum()
        else:
            correct_prediction += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct_prediction / len(test_dataset)))

Accuracy of the network on the 10000 test images: 97.65 %
