In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch 
from torchvision import datasets
from torchvision.transforms import ToTensor

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Downloading the MNIST dataset from the internet source made available via the module torchvision.

There exists a train dataset with 60'000 entries and a test dataset with 10'000 entries. 

In [None]:
train_data = datasets.MNIST(
    root = 'data',
    train = True,                         
    transform = ToTensor(), 
    download = True,            
)
test_data = datasets.MNIST(
    root = 'data', 
    train = False, 
    transform = ToTensor()
)

In [None]:
## looking a bit more closely at the individual entries of the data set

print(train_data,test_data)

## attributes of datasets

print(train_data.data.size())
print(test_data.data.size())

## Visualization of dataset


In [None]:
plt.imshow(train_data.data[0], cmap='gray')
plt.title('%i' % train_data.targets[0])
plt.show()

In [None]:
## plotting multiple images from dataset

figure = plt.figure(figsize=(10, 8))
cols, rows = 5, 5
for i in range(1, cols * rows + 1):
    sample_idx = torch.randint(len(train_data), size=(1,)).item()
    img, label = train_data[sample_idx]
    figure.add_subplot(rows, cols, i)
    plt.title(label)
    plt.axis("off")
    plt.imshow(img.squeeze(), cmap="gray")
plt.show()

## Preparing data for training with DataLoaders

prepares data so that it is passed in mini-batches for training and shuffled at each iteration (epoch) to reduce model overfitting

In [None]:
from torch.utils.data import DataLoader

loaders = {
    'train' : torch.utils.data.DataLoader(train_data, 
                                          batch_size=100, 
                                          shuffle=True, 
                                          num_workers=1),
    
    'test'  : torch.utils.data.DataLoader(test_data, 
                                          batch_size=100, 
                                          shuffle=True, 
                                          num_workers=1),
}
loaders

# Definition of the convolutional neural network model 

created using the torch.nn module

Architecture/Topoplogy of network: 2 fully convolutional layers, Relu activation function, MaxPooling 

List of Parameters: 
- number of channels in input image
- number of channels produced by convolution
- size of convolution kernel
- stride of the convolution --> default is usually of size 1
- padding to be added to both sides of input & padding mode
- dilation (= spacing between kernel elements)
- groups = number of blocked connections from input channels to output channels
- bias (learnable bias added to output)

In [None]:
## define neural network as its own class and let it build

import torch.nn as nn

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        ##build first convolutional layer

        self.conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels=1, 
                out_channels=16,
                kernel_size=3,
                stride=1, 
                padding=1,
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
        )

        ##build second convolutional layer
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 64, 3, 1, 1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )



        ##add fully connected layer which outputs the 10 different classes for all the numbers 
        self.out = nn.Sequential(
            nn.Linear(64*7*7, 54),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(54,10)
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        

        # flatten the output of conv2 to (batch_size, 32 * 7 * 7)
        x = x.view(x.size(0), -1)  
        #x = self.fc1(x)     
        output = self.out(x)
        return output, x    # return x for visualization

In [None]:
cnn = CNN()
print(cnn)

In [None]:
##definition of loss function
loss_func = nn.CrossEntropyLoss()
loss_func

In [None]:
##definition of optimization function and setting of learning rate
# Learning rate = rate at which model updates weights in the cell each time back-propagation is done

from torch import optim

optimizer = optim.Adam(cnn.parameters(), lr=0.01)
optimizer

# Training of the model

In [None]:
from torch.autograd import Variable
num_epochs = 10

def train(num_epochs, cnn, loaders):

    cnn.train() ##sets module into training mode

    # Train the model
    total_step = len(loaders['train'])

    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(loaders['train']):
            # gives batch data, normalize x when iterate train_loader
            b_x = Variable(images)
            b_y = Variable(labels)

            output = cnn(b_x)[0]
            loss = loss_func(output, b_y)

            # clear gradients for this training step   
            optimizer.zero_grad()           
            
            # backpropagation, compute gradients 
            loss.backward()    
            # apply gradients             
            optimizer.step()                
            
            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                       .format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))
            pass
        
        pass
    
    
    pass
train(num_epochs, cnn, loaders)

#Evalutation of the model on test data

In [None]:
def test():
    # Test the model
    cnn.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in loaders['test']:
            test_output, last_layer = cnn(images)
            pred_y = torch.max(test_output, 1)[1].data.squeeze()
            accuracy = (pred_y == labels).sum().item() / float(labels.size(0))
            pass
    print('Test Accuracy of the model on the 10000 test images: %.5f' % accuracy)
    
    pass
test()