## Build Convolutional Neural Network from Scratch
* 2 Convolution Layers (Followed by ReLU)
* 2 Max Pooling Layers
* 3 Fully Connected Layers

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F # provides activation functions and functions for tasks like pooling, normalization, and convolution layers, etc.

In [4]:
# Define CNN architecture
class CNN(nn.Module): # nn.Module is the base class for all neural network modules in PyTorch
 def __init__(self):
  # initialize the class as a subclass of nn.Module
  super(CNN, self).__init__()
  
  # first convolutional layer
  self.conv1 = nn.Conv2d(3, 6, 5) # input channels = 3, output channels = 6, kernel size = 5 (5x5 filter)
  
  # first max pooling layer
  self.pool1 = nn.MaxPool2d(2, 2) # kernel size = 2, stride = 2
  
  # second convolutional layer
  self.conv2 = nn.Conv2d(6, 16, 5) # input channels = 6, output channels = 16, kernel size = 5 (5x5 filter)
  
  # second max pooling layer
  self.pool2 = nn.MaxPool2d(2, 2) # kernel size = 2, stride = 2
  
  # 3 fully connected layers
  # linear transformation from 16*5*5 to 120
  self.fc1 = nn.Linear(16 * 5 * 5, 120) # flatten the input, 16*5*5 input features, 120 output features
  
  # linear transformation from 120 to 84-dimensional space
  self.fc2 = nn.Linear(120, 84) # 120 input features, 84 output features
  
  # linear transformation from 84 to 10-dimensional space (output classes)
  self.fc3 = nn.Linear(84, 10) # 84 input features, 10 output features (number of classes)
  
 def forward(self, x): # defines how the data flows through the network
  # operation 1: first convolutional layer with ReLU activation and max pooling
  x = self.conv1(x)
  x = F.relu(x) # used to overcome the overfitting and vanishing gradient and increase the performance, sets negative values to zero and keeps positive values as they are, introducing non-linearity to the model to learn complex patterns
  x = self.pool1(x)
  
  # operation 2: second convolutional layer with ReLU activation and max pooling
  x = self.conv2(x)
  x = F.relu(x)
  x = self.pool2(x)
  
  # operation 3: flatten the data for the fully connected layers
  x = x.view(-1, 16 * 5 * 5) # flattening x into 1-D tensor while preserving the total number of elements
  
  # operation 4: first fully connected layer with ReLU activation
  x = self.fc1(x)
  x = F.relu(x)
  
  # operation 5: second fully connected layer with ReLU activation
  x = self.fc2(x)
  x = F.relu(x)
  
  # operation 6: output layer (fully connected) with raw scores for each class
  x = self.fc3(x)
  
  return x

In [5]:
# create an instance of the CNN class
model = CNN()
print(model) # print the model architecture

CNN(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [6]:
from torchsummary import summary

summary(model, (3, 32, 32)) # input shape: (3, 32, 32) - 3 channels, 32x32 image size

Layer (type:depth-idx)                   Output Shape              Param #
├─Conv2d: 1-1                            [-1, 6, 28, 28]           456
├─MaxPool2d: 1-2                         [-1, 6, 14, 14]           --
├─Conv2d: 1-3                            [-1, 16, 10, 10]          2,416
├─MaxPool2d: 1-4                         [-1, 16, 5, 5]            --
├─Linear: 1-5                            [-1, 120]                 48,120
├─Linear: 1-6                            [-1, 84]                  10,164
├─Linear: 1-7                            [-1, 10]                  850
Total params: 62,006
Trainable params: 62,006
Non-trainable params: 0
Total mult-adds (M): 0.65
Input size (MB): 0.01
Forward/backward pass size (MB): 0.05
Params size (MB): 0.24
Estimated Total Size (MB): 0.30


Layer (type:depth-idx)                   Output Shape              Param #
├─Conv2d: 1-1                            [-1, 6, 28, 28]           456
├─MaxPool2d: 1-2                         [-1, 6, 14, 14]           --
├─Conv2d: 1-3                            [-1, 16, 10, 10]          2,416
├─MaxPool2d: 1-4                         [-1, 16, 5, 5]            --
├─Linear: 1-5                            [-1, 120]                 48,120
├─Linear: 1-6                            [-1, 84]                  10,164
├─Linear: 1-7                            [-1, 10]                  850
Total params: 62,006
Trainable params: 62,006
Non-trainable params: 0
Total mult-adds (M): 0.65
Input size (MB): 0.01
Forward/backward pass size (MB): 0.05
Params size (MB): 0.24
Estimated Total Size (MB): 0.30

## CIFAR10 Image Dataset and its Augmentation

https://www.cs.toronto.edu/~kriz/cifar.html

In [7]:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# define the data transform for data augmentation, used to artificially increase the size of the training dataset by applying random transformations to the input images
# transforms mitigate overfitting and improve the generalization of the model
transform = transforms.Compose([transforms.RandomHorizontalFlip(),
                                transforms.RandomRotation(10),
                                transforms.RandomResizedCrop(32, scale=(0.8, 1.0), ratio=(1.0, 1.0)),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [8]:
# load the data
# train data
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2) # iterate over the training dataset in batches of 4, shuffle the data, and use 2 subprocesses for data loading

# test data
testdata = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
trainloader = DataLoader(testdata, batch_size=4, shuffle=False, num_workers=2)

# CIFAR-10 classes
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data\cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:35<00:00, 4739656.05it/s]


Extracting ./data\cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [9]:
# print the lengths of the train and test datasets
print("trainset length:", len(trainset))
print("testset length:", len(testdata))

trainset length: 50000
testset length: 10000
