# Objective: Convolutional Neural Network Classification Using Augmented CIFAR-10
Here, I'm going to build an convolutional neural network classification model using PyTorch using augmented CIFAR-10 training data set.

## Import Libraries

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime

## Load Data
This covers how to download the data and load the data into the code.

In [2]:
# ref: https://pytorch.org/vision/0.9/transforms.html
transformer_train = transforms.Compose([
    # transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.RandomCrop(size=32, padding=4),
    transforms.RandomHorizontalFlip(p=0.5),
    # transforms.RandomRotation(degrees=15),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    # transforms.RandomPerspective(),
    transforms.ToTensor(),
])

# load train dataset for training
train_dataset = torchvision.datasets.CIFAR10(
    root='.',
    train=True,
    transform=transformer_train,
    download=True
)

# load train dataset for testing
train_dataset_original = torchvision.datasets.CIFAR10(
    root='.',
    train=True,
    transform=transforms.ToTensor(),
    download=True
)

Files already downloaded and verified
Files already downloaded and verified


In [3]:
# analyze train dataset
train_dataset.data.shape
# set(train_dataset.targets)

(50000, 32, 32, 3)

In [4]:
# load test dataset
test_dataset = torchvision.datasets.CIFAR10(
    root='.',
    train=False,
    transform=transforms.ToTensor(),
    download=True
)

Files already downloaded and verified


In [5]:
# analyze test dataset
test_dataset.data.shape

(10000, 32, 32, 3)

## Preprocess Data
This covers how to create batch using DataLoader.

In [6]:
# get the number of classes
K = len(set(train_dataset.targets))
print(f"The number of classes: {K}")

The number of classes: 10


In [7]:
# dataloader to automatically generate batches in the training loop with shuffling
batch_size = 128
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)
train_loader_original = torch.utils.data.DataLoader(dataset=train_dataset_original,
                                                    batch_size=batch_size,
                                                    shuffle=False)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

In [8]:
# check whether the dataloader works okay or not
tmp_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                         batch_size=1,
                                         shuffle=True)
for x, y in tmp_loader:
    print(x)
    print(x.shape)
    # [batch_size, num_channels, image_height, image_width]
    break

tensor([[[[0.0000, 0.0000, 0.0000,  ..., 0.8314, 0.8196, 0.8275],
          [0.0000, 0.0000, 0.0000,  ..., 0.8392, 0.6980, 0.6824],
          [0.0000, 0.0000, 0.0000,  ..., 0.8353, 0.6941, 0.6392],
          ...,
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000,  ..., 0.8510, 0.8314, 0.7529],
          [0.0000, 0.0000, 0.0000,  ..., 0.7333, 0.5490, 0.4667],
          [0.0000, 0.0000, 0.0000,  ..., 0.6000, 0.4314, 0.3725],
          ...,
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000,  ..., 0.8902, 0.8431, 0.7451],
          [0.0000, 0.0000, 0.0000,  ..., 0.7098, 0.5098, 0.4000],
          [0.0000, 0.0000, 0.0000,  ..., 0

## Build Model
This covers how to build a convolutional neural network classification model using fashion MNIST dataset to train. Since the dataset is big enough, we can use GPU for faster process if available.

In [16]:
# define the model
class CNN(nn.Module):
    def __init__(self, K):
        super(CNN, self).__init__()
        
        # define convolutional layers
        self.conv1 = nn.Sequential(
            
        )
        
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=2)
        # H_out = floor[(H_in + 2*p - d(k-1) - 1)/2 + 1]
        # image size: 32X32 -> 15X15
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=2)
        # image size: 15X15 -> 7X7
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2)
        # image size: 7X7 -> 3X3
        
        # define linear (fc) layers
        self.fc1 = nn.Linear(128 * 3 * 3, 1024)
        self.fc2 = nn.Linear(1024, K)
        
    def forward(self, X):
        X = F.relu(self.conv1(X))
        X = F.relu(self.conv2(X))
        X = F.relu(self.conv3(X))
        X = X.view(-1, 128 * 3 * 3)
        X = F.dropout(X, p=0.5)
        X = F.relu(self.fc1(X))
        X = F.dropout(X, p=0.2)
        return self.fc2(X)

In [17]:
# instantiate the model
model = CNN(K)

In [None]:
# activate gpu if possible, otherwise cpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

# send the model to the device
model.to(device)


In [19]:
# loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

## Training
This covers how to train the convolutional neural network classification model with the input data that we processed.

In [20]:
# build a function for training using batch gradient descent
def batch_gd(model, criterion, optimizer, train_loader, test_loader, epochs):
    # losses will be stored to plot the results
    # storage for losses per each epoch
    train_losses = np.zeros(epochs)
    test_losses = np.zeros(epochs)
    
    for epoch in range(epochs):
        # train mode
        model.train()
        t0 = datetime.now()
        train_loss = []
        
        for inputs, targets in train_loader:
            # send data to the device
            inputs, targets = inputs.to(device), targets.to(device)
            
            # zero the parameter gradients to make sure they are zero
            optimizer.zero_grad()
            
            # forward pass
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            # backward and optimize
            loss.backward()
            optimizer.step()
            
            # save the batch loss
            train_loss.append(loss.item())
        
        # get train loss per epoch as average loss of all the batches in that epoch
        train_loss = np.mean(train_loss)
        train_losses[epoch] = train_loss
        
        # test mode
        model.eval()
        test_loss = []
        
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            test_loss.append(loss.item())
        test_loss = np.mean(test_loss)
        test_losses[epoch] = test_loss
        
        # calculate time spent
        dt = datetime.now() - t0
        
        # print out the result
        print(f"Epoch: {epoch+1}/{epochs}, Train loss: {train_loss:.4f}, Tess loss: {test_loss:.4f}, Duration: {dt}")
    
    return train_losses, test_losses

In [None]:
# get the result
train_losses, test_losses = batch_gd(
    model, criterion, optimizer, train_loader, test_loader, epochs=15
)

## Result
This shows the result of the convolutional neural network classification model by showing the loss evolution and calculating the accuracy of the model.

In [None]:
# plot the train loss and test loss per iteration
plt.plot(train_losses, label="Train loss")
plt.plot(test_losses, label="Test loss")
plt.legend()
plt.show()

In [None]:
# calculate accuracy
# train accuracy
n_correct = 0.
n_total = 0.

model.eval()
for inputs, targets in train_loader:
    # send data to the device
    inputs, targets = inputs.to(device), targets.to(device)
    
    # forward pass
    outputs = model(inputs)
    
    # predictions by returning max value label
    _, predictions = torch.max(outputs, 1)
    
    # update numbers
    n_correct += (predictions == targets).sum().item()
    n_total += targets.shape[0]

# calculate train accuracy
train_acc = n_correct / n_total

# test accuracy
n_correct = 0.
n_total = 0.

for inputs, targets in test_loader:
    inputs, targets = inputs.to(device), targets.to(device)
    outputs = model(inputs)
    _, predictions = torch.max(outputs, 1)
    n_correct += (predictions == targets).sum().item()
    n_total += targets.shape[0]

# calculate test accuracy
test_acc = n_correct / n_total

# print the result
print(f"Train accuracy: {train_acc:.4f}, Test accuracy: {test_acc:.4f}")

## Apendix: Confusion Matrix
This covers how to plot confusion matrix using the results that we got above.

In [25]:
# import libraries to plot confusion matrix
from sklearn.metrics import confusion_matrix
import itertools

# function to plot confusion matrix
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                horizontalalignment="center",
                color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()

In [None]:
# get predictions in an array and plot the confusion matrix
x_test = test_dataset.data
y_test = test_dataset.targets
p_test = np.array([])

for inputs, targets in test_loader:
    # move data to device
    inputs = inputs.to(device)
    
    # forward pass
    outputs = model(inputs)
    
    # get predictions
    _, predictions = torch.max(outputs, 1)
    
    # update p_test
    p_test = np.concatenate((p_test, predictions.cpu().numpy()))
    # need to bring the result back to cpu to process the plot
    
cm = confusion_matrix(y_test, p_test)
plot_confusion_matrix(cm, list(range(10)))

## Apendix: Misclassified Cases
This covers examples of misclassification.

In [27]:
# label mapping
labels = '''airplane
automobile
bird
cat
deer
dog
frog
horse
ship
truck'''.split()

In [None]:
# Show some misclassified examples
p_test = p_test.astype(np.uint8)
misclassified_idx = np.where(p_test != y_test)[0]
i = np.random.choice(misclassified_idx)
plt.imshow(x_test[i].reshape(32,32,3))
plt.title("True label: %s Predicted: %s" % (labels[y_test[i]], labels[p_test[i]]))