In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.utils import make_grid

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

In [2]:
transform = transforms.ToTensor()

In [3]:
train_data = datasets.MNIST(root='../Data/', train=True, download=True, transform=transform)

In [4]:
test_data = datasets.MNIST(root='../Data/', train=False, download=True, transform=transform)

In [5]:
train_data

Dataset MNIST
    Number of datapoints: 60000
    Root location: ../Data/
    Split: Train
    StandardTransform
Transform: ToTensor()

In [6]:
test_data

Dataset MNIST
    Number of datapoints: 10000
    Root location: ../Data/
    Split: Test
    StandardTransform
Transform: ToTensor()

In [7]:
train_loader = DataLoader(train_data, batch_size=10, shuffle=True)
test_loader = DataLoader(test_data, batch_size=10, shuffle=False)

In [8]:
# 1 color channel, 6 filters (output channels, arbitrary), 3x3 kernel, stride=1
conv1 = nn.Conv2d(1,6,3,1) # original img ---> 6 filters ---> pooling ---> conv2

# 6 input filters from conv1, 16 filters (arbitrary), 3x3 kernel, stride=1
conv2 = nn.Conv2d(6,16,3,1)

In [9]:
for i, (X_train, y_train) in enumerate(train_data):
    break

In [10]:
# (add dimension to batch size (1), 1, 28, 28)
x = X_train.view(1, 1, 28, 28) # conver to ----> 4D batch (batch of 1 image)

In [11]:
x = F.relu(conv1(x))

In [12]:
x.shape # (1 image, 6 filters, losing border info (28->26), same->26) because no paddings are added

torch.Size([1, 6, 26, 26])

In [13]:
x = F.max_pool2d(x,2,2) # (data, 2x2 kernel, stride=2)

In [14]:
x.shape # reduce the size by 2 (because of 2x2 kernel and stride=2)

torch.Size([1, 6, 13, 13])

In [15]:
x = F.relu(conv2(x))

In [16]:
x.shape

torch.Size([1, 16, 11, 11])

In [17]:
x = F.max_pool2d(x,2,2)

In [18]:
x.shape

torch.Size([1, 16, 5, 5])

In [19]:
((28-2)/2-2)/2 # how to get 5

5.5

In [20]:
x.view(-1,16*5*5).shape # keep first dimension, 16*5*5 to flatten

torch.Size([1, 400])

In [21]:
class ConvolutionalNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1,6,3,1)
        self.conv2 = nn.Conv2d(6,16,3,1)
        self.fc1 = nn.Linear(5*5*16, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84,10)

    def forward(self, X):
        X = F.relu(self.conv1(X))
        X = F.max_pool2d(X,2,2)
        X = F.relu(self.conv2(X))
        X = F.max_pool2d(X,2,2)
        X = X.view(-1,16*5*5)
        X = F.relu(self.fc1(X))
        X = F.relu(self.fc2(X))
        X = self.fc3(X)

        return F.log_softmax(X, dim=1)

In [22]:
torch.manual_seed(42)
model = ConvolutionalNetwork()
model

ConvolutionalNetwork(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [24]:
for param in model.parameters():
    print(param.numel())

# total = 60074

54
6
864
16
48000
120
10080
84
840
10


In [26]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [30]:
import time
start_time = time.time()

# variables (trackers)
epochs = 5
train_losses = []
test_losses = []
train_correct = []
test_correct = []

# for loop epochs
for i in range(epochs):
    trn_corr = 0
    tst_corr = 0

    # train
    for b, (X_train, y_train) in enumerate(train_loader):
        b += 1

        y_pred = model(X_train) # not flatten (no longer needed cuz 2d data is required for conv2d)
        loss = criterion(y_pred, y_train)

        predicted = torch.max(y_pred.data,1)[1]
        batch_corr = (predicted == y_train).sum() # true = 1 / false = 0
        trn_corr += batch_corr

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if b%600 == 0:
            print(f"epoch: {i} batch: {b} loss: {loss.item()}")

    train_losses.append(loss.item())
    train_correct.append(trn_corr)

    # test
    with torch.no_grad():
        for b, (X_test, y_test) in enumerate(test_loader):

            y_val = model(X_test)

            predicted = torch.max(y_val.data,1)[1]
            tst_corr += (predicted == y_test).sum()

    loss = criterion(y_val, y_test)
    test_losses.append(loss.item())
    test_correct.append(tst_corr)


current_time = time.time()
total = current_time - start_time
print(f"training took {total/60} minutes")

epoch: 0 batch: 600 loss: 0.10585442930459976
epoch: 0 batch: 1200 loss: 0.1937195211648941
epoch: 0 batch: 1800 loss: 0.28246447443962097
epoch: 0 batch: 2400 loss: 0.2788349390029907
epoch: 0 batch: 3000 loss: 0.1059822216629982
epoch: 0 batch: 3600 loss: 0.07784382998943329
epoch: 0 batch: 4200 loss: 0.26412978768348694
epoch: 0 batch: 4800 loss: 0.0032613431103527546
epoch: 0 batch: 5400 loss: 0.04633064940571785
epoch: 0 batch: 6000 loss: 0.007355888839811087
epoch: 1 batch: 600 loss: 0.20316310226917267
epoch: 1 batch: 1200 loss: 0.006854281760752201
epoch: 1 batch: 1800 loss: 0.0408165343105793
epoch: 1 batch: 2400 loss: 0.000346938002621755
epoch: 1 batch: 3000 loss: 0.004845469258725643
epoch: 1 batch: 3600 loss: 0.020094765350222588
epoch: 1 batch: 4200 loss: 0.00847818423062563
epoch: 1 batch: 4800 loss: 0.0010747266933321953
epoch: 1 batch: 5400 loss: 0.011174002662301064
epoch: 1 batch: 6000 loss: 0.00014427633141167462
epoch: 2 batch: 600 loss: 0.0009847130859270692
epoch