# Convolutional Neural Network (CNN) with PyTorch

## Exercise 1 of chapter 8 of the book `Deep learning with PyTorch`

By [Zahra Taheri](https://github.com/zata213), December 5, 2020

In [None]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import collections

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.set_printoptions(edgeitems=2)
torch.manual_seed(123)

<torch._C.Generator at 0x7eff2c06eb40>

In [None]:
class_names = ['airplane','automobile','bird','cat','deer',
               'dog','frog','horse','ship','truck']

In [None]:
from torchvision import datasets, transforms
data_path = '../data-unversioned/p1ch6/'
cifar10 = datasets.CIFAR10(
    data_path, train=True, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

Files already downloaded and verified


In [None]:
cifar10_val = datasets.CIFAR10(
    data_path, train=False, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

Files already downloaded and verified


In [None]:
label_map = {0: 0, 2: 1}
class_names = ['airplane', 'bird']
cifar2 = [(img, label_map[label])
          for img, label in cifar10
          if label in [0, 2]]
cifar2_val = [(img, label_map[label])
              for img, label in cifar10_val
              if label in [0, 2]]

### With kernel_size=3

In [None]:
class Net_3(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.act1 = nn.Tanh()
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.act2 = nn.Tanh()
        self.pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(8 * 8 * 8, 32)
        self.act3 = nn.Tanh()
        self.fc2 = nn.Linear(32, 2)

    def forward(self, x):
        out = self.pool1(self.act1(self.conv1(x)))
        out = self.pool2(self.act2(self.conv2(out)))
        out = out.view(-1, 8 * 8 * 8) # <1>
        out = self.act3(self.fc1(out))
        out = self.fc2(out)
        return out

### With kernel_size=5

In [None]:
class Net_5(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=5, padding=1)
        self.act1 = nn.Tanh()
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=5, padding=1)
        self.act2 = nn.Tanh()
        self.pool2 = nn.MaxPool2d(2)
        self.flatten = nn.Flatten() 
        self.fc1 = nn.Linear(288, 32)
        self.act3 = nn.Tanh()
        self.fc2 = nn.Linear(32, 2)

    def forward(self, x):
        out = self.pool1(self.act1(self.conv1(x)))
        out = self.pool2(self.act2(self.conv2(out)))
        out = self.flatten(out)
        out = self.act3(self.fc1(out))
        out = self.fc2(out)
        return out

In [None]:
model = Net_3()

numel_list = [p.numel() for p in model.parameters()]
sum(numel_list), numel_list

(18090, [432, 16, 1152, 8, 16384, 32, 64, 2])

In [None]:
from torchsummary import summary
summary(model, (3,32,32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 32, 32]             448
              Tanh-2           [-1, 16, 32, 32]               0
         MaxPool2d-3           [-1, 16, 16, 16]               0
            Conv2d-4            [-1, 8, 16, 16]           1,160
              Tanh-5            [-1, 8, 16, 16]               0
         MaxPool2d-6              [-1, 8, 8, 8]               0
            Linear-7                   [-1, 32]          16,416
              Tanh-8                   [-1, 32]               0
            Linear-9                    [-1, 2]              66
Total params: 18,090
Trainable params: 18,090
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.32
Params size (MB): 0.07
Estimated Total Size (MB): 0.40
---------------------------------------------

In [None]:
model = Net_5()

numel_list = [p.numel() for p in model.parameters()]
sum(numel_list), numel_list

(13738, [1200, 16, 3200, 8, 9216, 32, 64, 2])

In [None]:
summary(model, (3,32,32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 30, 30]           1,216
              Tanh-2           [-1, 16, 30, 30]               0
         MaxPool2d-3           [-1, 16, 15, 15]               0
            Conv2d-4            [-1, 8, 13, 13]           3,208
              Tanh-5            [-1, 8, 13, 13]               0
         MaxPool2d-6              [-1, 8, 6, 6]               0
           Flatten-7                  [-1, 288]               0
            Linear-8                   [-1, 32]           9,248
              Tanh-9                   [-1, 32]               0
           Linear-10                    [-1, 2]              66
Total params: 13,738
Trainable params: 13,738
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.27
Params size (MB): 0.05
Estimated Tot

#### **a)** If we change kernel_size from 3x3 to 5x5, the number of parameters decreases from 18090 to 13738.

In [None]:
import datetime  # <1>

def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):  # <2>
        loss_train = 0.0
        for imgs, labels in train_loader:  # <3>
            
            outputs = model(imgs)  # <4>
            
            loss = loss_fn(outputs, labels)  # <5>

            optimizer.zero_grad()  # <6>
            
            loss.backward()  # <7>
            
            optimizer.step()  # <8>

            loss_train += loss.item()  # <9>

        if epoch == 1 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader)))  # <10>

In [None]:
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64,
                                           shuffle=True)  # <1>

model = Net_3()  #  <2>
optimizer = optim.SGD(model.parameters(), lr=1e-2)  #  <3>
loss_fn = nn.CrossEntropyLoss()  #  <4>

training_loop(  # <5>
    n_epochs = 100,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2020-12-08 13:07:46.365656 Epoch 1, Training loss 0.6041859838233632
2020-12-08 13:08:22.616780 Epoch 10, Training loss 0.35150224815128717
2020-12-08 13:09:02.910174 Epoch 20, Training loss 0.29384877319168895
2020-12-08 13:09:43.783350 Epoch 30, Training loss 0.2623802735262616
2020-12-08 13:10:24.716709 Epoch 40, Training loss 0.23805504752572176
2020-12-08 13:11:04.552352 Epoch 50, Training loss 0.2195895376858438
2020-12-08 13:11:45.306755 Epoch 60, Training loss 0.2026940283786719
2020-12-08 13:12:25.146268 Epoch 70, Training loss 0.18730561691484635
2020-12-08 13:13:05.679425 Epoch 80, Training loss 0.17256072951350243
2020-12-08 13:13:45.913962 Epoch 90, Training loss 0.15853702300673078
2020-12-08 13:14:26.281938 Epoch 100, Training loss 0.1475972858773675


In [None]:
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64,
                                           shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64,
                                         shuffle=False)

def validate(model, train_loader, val_loader):
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0

        with torch.no_grad():  # <1>
            for imgs, labels in loader:
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1) # <2>
                total += labels.shape[0]  # <3>
                correct += int((predicted == labels).sum())  # <4>

        print("Accuracy {}: {:.2f}".format(name , correct / total))

validate(model, train_loader, val_loader)

Accuracy train: 0.92
Accuracy val: 0.88


In [None]:
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64,
                                           shuffle=True)  # <1>

model = Net_5()  #  <2>
optimizer = optim.SGD(model.parameters(), lr=1e-2)  #  <3>
loss_fn = nn.CrossEntropyLoss()  #  <4>

training_loop(  # <5>
    n_epochs = 100,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2020-12-08 13:14:33.871091 Epoch 1, Training loss 0.562738618843115
2020-12-08 13:15:13.116454 Epoch 10, Training loss 0.33893521433802926
2020-12-08 13:15:56.595942 Epoch 20, Training loss 0.29237931283416263
2020-12-08 13:16:39.950547 Epoch 30, Training loss 0.2623655254104335
2020-12-08 13:17:23.196080 Epoch 40, Training loss 0.23606456332145984
2020-12-08 13:18:06.726849 Epoch 50, Training loss 0.2113819701276767
2020-12-08 13:18:50.745517 Epoch 60, Training loss 0.18575189234154998
2020-12-08 13:19:34.780410 Epoch 70, Training loss 0.16641135948004238
2020-12-08 13:20:19.031219 Epoch 80, Training loss 0.14494506027668144
2020-12-08 13:21:03.396640 Epoch 90, Training loss 0.128674705305202
2020-12-08 13:21:47.496057 Epoch 100, Training loss 0.11188445038571479


In [None]:
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64,
                                           shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64,
                                         shuffle=False)

def validate(model, train_loader, val_loader):
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0

        with torch.no_grad():  # <1>
            for imgs, labels in loader:
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1) # <2>
                total += labels.shape[0]  # <3>
                correct += int((predicted == labels).sum())  # <4>

        print("Accuracy {}: {:.2f}".format(name , correct / total))

validate(model, train_loader, val_loader)

Accuracy train: 0.93
Accuracy val: 0.87


#### **b)** If we change kernel_size from 3x3 to 5x5, the training accuracy increases from 0.92 to 0.93, while the validation accuracy decreases from 0.88 to 0.87. So, the change degrade overfitting.

### With kernel_size=(1,3)

In [None]:
class Net_13(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=(1,3), padding=1)
        self.act1 = nn.Tanh()
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=(1,3), padding=1)
        self.act2 = nn.Tanh()
        self.pool2 = nn.MaxPool2d(2)
        self.flatten = nn.Flatten() 
        self.fc1 = nn.Linear(576, 32)
        self.act3 = nn.Tanh()
        self.fc2 = nn.Linear(32, 2)

    def forward(self, x):
        out = self.pool1(self.act1(self.conv1(x)))
        out = self.pool2(self.act2(self.conv2(out)))
        out = self.flatten(out)
        out = self.act3(self.fc1(out))
        out = self.fc2(out)
        return out

In [None]:
model = Net_13()

numel_list = [p.numel() for p in model.parameters()]
sum(numel_list), numel_list

(19082, [144, 16, 384, 8, 18432, 32, 64, 2])

In [None]:
from torchsummary import summary
summary(model, (3,32,32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 34, 32]             160
              Tanh-2           [-1, 16, 34, 32]               0
         MaxPool2d-3           [-1, 16, 17, 16]               0
            Conv2d-4            [-1, 8, 19, 16]             392
              Tanh-5            [-1, 8, 19, 16]               0
         MaxPool2d-6              [-1, 8, 9, 8]               0
           Flatten-7                  [-1, 576]               0
            Linear-8                   [-1, 32]          18,464
              Tanh-9                   [-1, 32]               0
           Linear-10                    [-1, 2]              66
Total params: 19,082
Trainable params: 19,082
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.35
Params size (MB): 0.07
Estimated Tot

In [None]:
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64,
                                           shuffle=True)  # <1>

model = Net_13()  #  <2>
optimizer = optim.SGD(model.parameters(), lr=1e-2)  #  <3>
loss_fn = nn.CrossEntropyLoss()  #  <4>

training_loop(  # <5>
    n_epochs = 100,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2020-12-08 13:33:19.786240 Epoch 1, Training loss 0.6084256811886076
2020-12-08 13:34:03.993435 Epoch 10, Training loss 0.3547499438000333
2020-12-08 13:34:53.428238 Epoch 20, Training loss 0.2999774587761824
2020-12-08 13:35:42.983857 Epoch 30, Training loss 0.2708665910799792
2020-12-08 13:36:32.526072 Epoch 40, Training loss 0.25456421808072716
2020-12-08 13:37:21.956149 Epoch 50, Training loss 0.24369606570263577
2020-12-08 13:38:11.135708 Epoch 60, Training loss 0.23021968278535612
2020-12-08 13:39:00.157423 Epoch 70, Training loss 0.21708584026356412
2020-12-08 13:39:49.545586 Epoch 80, Training loss 0.20586795782207684
2020-12-08 13:40:38.146433 Epoch 90, Training loss 0.19146147199497102
2020-12-08 13:41:26.572336 Epoch 100, Training loss 0.17867573165589837


In [None]:
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64,
                                           shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64,
                                         shuffle=False)

def validate(model, train_loader, val_loader):
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0

        with torch.no_grad():  # <1>
            for imgs, labels in loader:
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1) # <2>
                total += labels.shape[0]  # <3>
                correct += int((predicted == labels).sum())  # <4>

        print("Accuracy {}: {:.2f}".format(name , correct / total))

validate(model, train_loader, val_loader)

Accuracy train: 0.93
Accuracy val: 0.89


#### **d and e)** If we change kernel_size from 3x3 to 1x3, the number of parameters increases from 18090 to 19082. Also, the validation accuracy increases from 0.88 to 0.89. So, the change improve the model performance.