In [None]:
# This practice is inspired by:
# https://medium.com/@golnaz.hosseini/beginner-tutorial-image-classification-using-pytorch-63f30dcc071c
# https://www.digitalocean.com/community/tutorials/alexnet-pytorch
# https://medium.com/thecyphy/train-cnn-model-with-pytorch-21dafb918f48

In [None]:
# imports necessary packages
import numpy as np
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import torchvision
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

<h1>CIFAR-10 Dataset</h1>

<h4>CIFAR-10 dataset consist of 60,000 color images in 10 distinct classes. It has been used to benchmark AI and machine learning models for image classification.
<a href="https://www.cs.toronto.edu/~kriz/cifar.html">https://www.cs.toronto.edu/~kriz/cifar.html</a></h4>
<h4>PyTorch provides convenient download and dataloading functions.</h4>

In [None]:
# Define number of images per training/testing batch.
# Here, we use a convenient nubmer for visualize the dataset. 
# This number is a subject to be optimized in actual experiments.
batch_size = 32
# Data preprocessing techniques. Here, we block the normalization for convenient of visualize the dataset.
transform = transforms.Compose([
    transforms.ToTensor(),
    # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
# Downloading trainset of CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
# Downloading testset of CIFAR-10 dataset
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)

In [None]:
# Building train data loader for the dataset
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)
# Building test data loader for the dataset
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

<h4>Let's print the 10 class names of CIFAR-10.</h4>

In [None]:
from collections import Counter
# Inspect distinct classes of CIFAR-10
# List of all classes
classes = trainset.classes
# List of labels for all images
labels = trainset.targets
# Count images per class
label_counts = Counter(labels)

print("ID\t#image\tlabel name")
for idx, it in enumerate(classes):
    print(str(idx)+'\t'+str(label_counts[idx])+'\t'+'\033[1m'+str(it)+'\033[0m')

<h4>
    Visualize the first image for each class in the trainset
</h4>

In [None]:
# Map to store first image for each class
first_images = []
# Loop over dataset to find the first image of each class
for img, label in trainset:
    if label not in first_images:
        first_images.append([img, label])
    if len(first_images) == len(classes):
        break
# Create figure
fig, ax = plt.subplots(1, len(classes), figsize=(18,8))
# Draw iamges
for i in range(len(classes)):
    im, lbl = first_images[i]
    ax[i].imshow(np.transpose(im.numpy(), (1,2,0)), interpolation='bilinear')
    ax[i].set_title(f'{classes[lbl]}')


<h4>
    Why are images bulary?
    <h5>Due to historical reasons: CIFAR-10 images are very small — only 32x32 pixels — which is low resolution by modern standards. <br>
        This fits for early convolutional neural network study for faster experiments.
    </h5>
</h4>

<h1>Data preparation</h1>

<h4>CIFAR-10 dataset is a processed and perfectly balanced.</h4>
<h4>We need normalization for equal importance across channels and learned features.</h4>

In [None]:
# Data preprocessing techniques. Here, we block the normalization for convenient of visualize the dataset.
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010)),
])
# Building train data loader for the dataset
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)
# Building test data loader for the dataset
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

<h4>Let's see the difference after normalization.</h4>

In [None]:
# Map to store first image for each class
first_images = []
# Loop over dataset to find the first image of each class
for img, label in trainset:
    if label not in first_images:
        first_images.append([img, label])
    if len(first_images) == len(classes):
        break
# Create figure
fig, ax = plt.subplots(1, len(classes), figsize=(18,8))
# Draw iamges
for i in range(len(classes)):
    im, lbl = first_images[i]
    ax[i].imshow(np.transpose(im.numpy(), (1,2,0)), interpolation='bilinear')
    ax[i].set_title(f'{classes[lbl]}')


<h4>
    Compared with original images:
<h5>
    - Image has negative values, so the warning shows images has values in invalid range. <br>
    - This is for computer to see, compared with for human to see.
</h5>
</h4>

<h1>Build a convlutional neural network model.</h1>
<h4>
    Let's build a custom network consist of 4-block convolutional layers followed by linear layers to generate predicaiton.
</h4>
<h5>
    Each block contains:
    <ul>
        <li>Conv2d (convolutional layer)</li>
        <li>BatchNorm2d (reduces internal covariate shift)</li>
        <li>ReLU (add non-linearity)</li>
        <li>MaxPool2d (preserve only important feature)</li>
    </ul>
    After 4 blocks, we use a classifier with:
    <ul>
    <li>2 fully connected (Linear) layers with dropout layer to control overfitting</li>
    <li>Final output layer for 10-class classification (CIFAR-10)</li>
    </ul>
</h5>

In [None]:
# Custom CNN from scratch
# PyTorch's base class for a machine learning model: torch.nn.Module
class CustomNet(nn.Module):
    def __init__(self, num_classes=10):
        super(CustomNet, self).__init__()
        self.block1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2,2))
        self.block2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2))
        self.block3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2))
        self.block4 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(1024, 2560),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(2560, 2560),
            nn.ReLU())
        self.fc2= nn.Sequential(
            nn.Linear(2560, num_classes))

    def forward(self, x):
        out = self.block1(x)
        out = self.block2(out)
        out = self.block3(out)
        out = self.block4(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

<h4>
    Let's print the model architecture.
</h4>

In [None]:
model = CustomNet()
print(model)

<h4>
    Not a fan of this format. Let's use TensorBoard to print a pretty format.
</h4>

In [None]:
from torch.utils.tensorboard import SummaryWriter
dummy_input = torch.randn(1, 3, 32, 32)  # CIFAR-10 shape
with SummaryWriter("log/img_cls/custom_cnn") as writer:
    writer.add_graph(model, dummy_input)

<h4>
    Hyperparameters are a core part of model selection in machine learning.
</h4>

In [None]:
# Initiate model
model = CustomNet().to(device)

# Set hyperparameters
batch_size = 320         # often limited by available memory
num_epochs = 20          # number of training epochs -- one full pass through the entire train set by the model
learning_rate = 0.005   # how much model learns for every batch of training
criterion = nn.CrossEntropyLoss()                 # Cross Entropy Loss is often used for classification task
optimizer = torch.optim.SGD(model.parameters(),   # Gradient decent algorithm used for updated the model weights -- learning
                            lr=learning_rate, 
                            weight_decay = 0.005, 
                            momentum = 0.9) 

<h4>
    Apply updated hyperparameters to the data loader.
</h4>

In [None]:
# Building train data loader for the dataset
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)
# Building test data loader for the dataset
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

<h1>Train the model.</h1>
<h4>
    Let's first see if GPU available for us.
</h4>
<h4>In PyTorch, the string 'cuda' refers to using NVIDIA GPUs via the CUDA backend, otherwise the device is CPU</h4>

In [None]:
# Get GPU device, or CPU if GPU is not detected
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

In [None]:
# start a TensorBoard summary writer to log training
writer = SummaryWriter("log/img_cls/cifar10_experiment")

total_step = len(trainloader)
for epoch in range(num_epochs):
    correct = 0
    total = 0
    for i, (images, labels) in enumerate(trainloader):  
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    writer.add_scalar("Accuracy/train", correct/total, epoch)
    writer.add_scalar("Loss/train", loss.item(), epoch)

    print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
writer.close()


<h1>Evaluate the trained model.</h1>
<h4>
    Noticed the test set was never used or even mentioned in any place?
</h4>
<h4>It's <b><u>crucial</u></b> that the testing set remains completely unseen throughout the training and validation phases to avoid data contamination for accurate evaluation.​</h4>
<h4>There are various metrics available to assess the effectiveness of a trained model. Selection often depends on the study case. Here, we evaluate only overall accuracy for simplicity and demonstration purposes.</h4>

In [None]:
# start a TensorBoard summary writer to log testing
writer = SummaryWriter("log/img_cls/cifar10_experiment")

with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in testloader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        del images, labels, outputs

    writer.add_scalar("Accuracy/test", correct/total, num_epochs)
    print('Accuracy of the network on the {} test images: {} %'.format(10000, 100 * correct / total))

writer.close()

<h1>Wonder how hyperparameters affect the training?</h1>
<h4>
    Larger batch size:
</h4>

In [None]:
# Initiate model
model = CustomNet().to(device)

# Hyperparameters
batch_size = 10000       # often limited by available memory
num_epochs = 20          # number of training epochs -- one full pass through the entire train set by the model
learning_rate = 0.005   # how much model learns for every batch of training
criterion = nn.CrossEntropyLoss()                 # Cross Entropy Loss is often used for classification task
optimizer = torch.optim.SGD(model.parameters(),   # Gradient decent algorithm used for updated the model weights -- learning
                            lr=learning_rate, 
                            weight_decay = 0.005, 
                            momentum = 0.9) 

<h4>
    Apply updated hyperparameters to the data loader.
</h4>

In [None]:
# Building train data loader for the dataset
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)
# Building test data loader for the dataset
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

<h4>
    Train the model.
</h4>

In [None]:
# start a TensorBoard summary writer to log training
writer = SummaryWriter("log/img_cls/cifar10_experiment_larger_batch_size")

total_step = len(trainloader)
for epoch in range(num_epochs):
    correct = 0
    total = 0
    for i, (images, labels) in enumerate(trainloader):  
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    writer.add_scalar("Accuracy/train", correct/total, epoch)
    writer.add_scalar("Loss/train", loss.item(), epoch)

    print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
writer.close()


<h4>
    Evaluate the model.
</h4>

In [None]:
# start a TensorBoard summary writer to log testing
writer = SummaryWriter("log/img_cls/cifar10_experiment_larger_batch_size")

with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in testloader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        del images, labels, outputs

    writer.add_scalar("Accuracy/test", correct/total, num_epochs)
    print('Accuracy of the network on the {} test images: {} %'.format(10000, 100 * correct / total))

writer.close()

<h4>
    Larger learning rate:
</h4>

In [None]:
# Initiate model
model = CustomNet().to(device)

# Hyperparameters
batch_size = 320         # often limited by available memory
num_epochs = 20          # number of training epochs -- one full pass through the entire train set by the model
learning_rate = 0.05   # how much model learns for every batch of training
criterion = nn.CrossEntropyLoss()                 # Cross Entropy Loss is often used for classification task
optimizer = torch.optim.SGD(model.parameters(),   # Gradient decent algorithm used for updated the model weights -- learning
                            lr=learning_rate, 
                            weight_decay = 0.005, 
                            momentum = 0.9) 

<h4>
    Apply updated hyperparameters to the data loader.
</h4>

In [None]:
# Building train data loader for the dataset
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)
# Building test data loader for the dataset
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

<h4>
    Train the model.
</h4>

In [None]:
# start a TensorBoard summary writer to log training
writer = SummaryWriter("log/img_cls/cifar10_experiment_larger_learning_rate")

total_step = len(trainloader)
for epoch in range(num_epochs):
    correct = 0
    total = 0
    for i, (images, labels) in enumerate(trainloader):  
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    writer.add_scalar("Accuracy/train", correct/total, epoch)
    writer.add_scalar("Loss/train", loss.item(), epoch)

    print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
writer.close()


<h4>
    Evaluate the model.
</h4>

In [None]:
# start a TensorBoard summary writer to log testing
writer = SummaryWriter("log/img_cls/cifar10_experiment_larger_learning_rate")

with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in testloader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        del images, labels, outputs

    writer.add_scalar("Accuracy/test", correct/total, num_epochs)
    print('Accuracy of the network on the {} test images: {} %'.format(10000, 100 * correct / total))

writer.close()

<h4>
    More training epochs:
</h4>

In [None]:
# Initiate model
model = CustomNet().to(device)

# Hyperparameters
batch_size = 320         # often limited by available memory
num_epochs = 40          # number of training epochs -- one full pass through the entire train set by the model
learning_rate = 0.005   # how much model learns for every batch of training
criterion = nn.CrossEntropyLoss()                 # Cross Entropy Loss is often used for classification task
optimizer = torch.optim.SGD(model.parameters(),   # Gradient decent algorithm used for updated the model weights -- learning
                            lr=learning_rate, 
                            weight_decay = 0.005, 
                            momentum = 0.9) 

<h4>
    Apply updated hyperparameters to the data loader.
</h4>

In [None]:
# Building train data loader for the dataset
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)
# Building test data loader for the dataset
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

<h4>
    Train the model.
</h4>

In [None]:
# start a TensorBoard summary writer to log training
writer = SummaryWriter("log/img_cls/cifar10_experiment/more_training_epoch")

total_step = len(trainloader)
for epoch in range(num_epochs):
    correct = 0
    total = 0
    for i, (images, labels) in enumerate(trainloader):  
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    writer.add_scalar("Accuracy/train", correct/total, epoch)
    writer.add_scalar("Loss/train", loss.item(), epoch)

    print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
writer.close()


<h4>
    Evaluate the model.
</h4>

In [None]:
# start a TensorBoard summary writer to log testing
writer = SummaryWriter("log/img_cls/cifar10_experiment_more_training_epoch")

with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in testloader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        del images, labels, outputs
        
    writer.add_scalar("Accuracy/test", correct/total, 20)
    print('Accuracy of the network on the {} test images: {} %'.format(10000, 100 * correct / total))

writer.close()