In [2]:
# Use CNN to classify MNIST dataset

In [3]:
import os
import torch
import torchvision
import torch.nn as nn
from torchvision import transforms
from torchvision.utils import save_image
from torchsummary import summary as PyTorchSummary

import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [16]:
num_epochs = 10
batch_size = 100

## Data Pipeline

In [7]:
# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='./../data/',
                                           train=True, 
                                           transform=transforms.ToTensor(),
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='./../data/',
                                          train=False, 
                                          transform=transforms.ToTensor())

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size, 
                                          shuffle=False)

## Build a classification model

In [8]:
class Classifier(nn.Module):
    def __init__(self, num_Classes = 10):
        super(Classifier, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=5, stride = 1), #1x28x28->8x24x24
            nn.BatchNorm2d(8),
            nn.ReLU(),
            nn.MaxPool2d(2, stride = 2))#8x12x12
        self.layer2 = nn.Sequential(
            nn.Conv2d(8, 16, kernel_size=5, stride = 1), #16x8x8
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2, stride = 2)) #16x4x4
        self.layer3 = nn.Sequential(
            nn.Conv2d(16, 64, kernel_size=3, stride = 1), #64x2x2
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, stride = 2)) #64x1x1
        self.fc = nn.Linear(64, num_Classes)
    def forward(self,x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        return out

In [9]:
Clf = Classifier(10).to(device)

In [10]:
PyTorchSummary(Clf, input_size = (1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 24, 24]             208
       BatchNorm2d-2            [-1, 8, 24, 24]              16
              ReLU-3            [-1, 8, 24, 24]               0
         MaxPool2d-4            [-1, 8, 12, 12]               0
            Conv2d-5             [-1, 16, 8, 8]           3,216
       BatchNorm2d-6             [-1, 16, 8, 8]              32
              ReLU-7             [-1, 16, 8, 8]               0
         MaxPool2d-8             [-1, 16, 4, 4]               0
            Conv2d-9             [-1, 64, 2, 2]           9,280
      BatchNorm2d-10             [-1, 64, 2, 2]             128
             ReLU-11             [-1, 64, 2, 2]               0
        MaxPool2d-12             [-1, 64, 1, 1]               0
           Linear-13                   [-1, 10]             650
Total params: 13,530
Trainable params: 

In [12]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(Clf.parameters(), lr = 0.001)

## Training

In [15]:
total_step = len(train_loader)

In [19]:
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = Clf(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                  .format(epoch, num_epochs, i+1, total_step, loss.item()))

Epoch [0/10], Step [100/600], Loss: 0.4459
Epoch [0/10], Step [200/600], Loss: 0.1779
Epoch [0/10], Step [300/600], Loss: 0.1062
Epoch [0/10], Step [400/600], Loss: 0.1840
Epoch [0/10], Step [500/600], Loss: 0.1312
Epoch [0/10], Step [600/600], Loss: 0.0964
Epoch [1/10], Step [100/600], Loss: 0.0394
Epoch [1/10], Step [200/600], Loss: 0.0307
Epoch [1/10], Step [300/600], Loss: 0.0725
Epoch [1/10], Step [400/600], Loss: 0.0334
Epoch [1/10], Step [500/600], Loss: 0.0214
Epoch [1/10], Step [600/600], Loss: 0.0481
Epoch [2/10], Step [100/600], Loss: 0.0467
Epoch [2/10], Step [200/600], Loss: 0.0532
Epoch [2/10], Step [300/600], Loss: 0.0358
Epoch [2/10], Step [400/600], Loss: 0.0314
Epoch [2/10], Step [500/600], Loss: 0.0380
Epoch [2/10], Step [600/600], Loss: 0.0374
Epoch [3/10], Step [100/600], Loss: 0.0275
Epoch [3/10], Step [200/600], Loss: 0.0082
Epoch [3/10], Step [300/600], Loss: 0.0840
Epoch [3/10], Step [400/600], Loss: 0.0140
Epoch [3/10], Step [500/600], Loss: 0.0290
Epoch [3/10

In [21]:
# Test the model
Clf.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = Clf(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))


Test Accuracy of the model on the 10000 test images: 99.12 %


In [22]:

# Save the model checkpoints 
torch.save(Clf.state_dict(), 'Classifier.ckpt')