# CIFAR-10 Image Classification Based on CNN

## Install Dependencies

In [None]:
!pip install ipywidgets seaborn scikit-learn pandas torchinfo
!jupyter nbextension enable --py widgetsnbextension

## Load Dataset

In [None]:
from typing import Tuple
import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn as nn

import matplotlib.pyplot as plt
import numpy as np

DATASET_PATH = "./data"
OUTPUT_PATH = "./cifar_net"
torch.manual_seed(2021)

transformTrain = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(5),
    transforms.RandomAffine(0, shear=10, scale=(0.8,1.2)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transformTest = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

batchSize = 128

trainset = torchvision.datasets.CIFAR10(root=DATASET_PATH,
                                        train=True,
                                        download=True,
                                        transform=transformTrain)
trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=batchSize,
                                          shuffle=True,
                                          num_workers=0)

testset = torchvision.datasets.CIFAR10(root=DATASET_PATH,
                                       train=False,
                                       download=True,
                                       transform=transformTest)
testloader = torch.utils.data.DataLoader(testset,
                                         batch_size=batchSize,
                                         shuffle=False,
                                         num_workers=0)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
           'ship', 'truck')

## Create Model

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Layer(nn.Module):
    def __init__(self, inChannel, outChannel, kernelSize=3):
        super().__init__()
        self.conv = nn.Conv2d(inChannel,
                              outChannel,
                              kernelSize,
                              stride=1,
                              padding=1)
        self.bn = nn.BatchNorm2d(outChannel)

    def forward(self, x):
        return F.relu(self.bn(self.conv(x)))

class Net(nn.Module):
    cfg = [(3, 32, 3), (32, 32, 3), 'M', (32, 64, 3), 'M', (64, 128, 3), 'M', (128, 128, 3), 'M']

    def __init__(self):
        super().__init__()
        self.layers = self.buildLayers()
        self.dropout1 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(512, 256)
        self.fc2 = nn.Linear(256, 64)
        self.fc3 = nn.Linear(64, 10)

    def buildLayers(self):
        layers = []
        for l in self.cfg:
            if l == 'M':
                layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
            else:
                layers.append(Layer(l[0], l[1], l[2]))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.layers(x)
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

## Detect GPU

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

## Train the Model

In [None]:
def train(dataloader, model, lossFunction, optimizer, epoch: int) -> Tuple[float, float]:
    trainingLoss = 0.0
    batches = len(dataloader)
    correctCount = 0
    total = len(dataloader.dataset)
    model.train()

    for batch, (inputs, labels) in enumerate(dataloader, 0):
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = lossFunction(outputs, labels)
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(outputs.data, 1)
        trainingLoss += loss.item()
        correctCount += (predicted == labels.data).sum().item()

        if batch % 100 == 99:  # print every 100 mini-batches
            print('[Epoch %3d Batch %3d] Loss: %.3f' % (epoch, batch + 1, loss.item()))

    trainingLoss /= batches
    accuracy = 100.0 * correctCount / total
    print("[Epoch %3d] Training Loss: %0.3f, Accuracy: %0.2f %%" % (epoch, trainingLoss, accuracy))
    return (trainingLoss, accuracy)

In [None]:
def test(dataloader, model, lossFunction) -> Tuple[float, float]:
    testLoss = 0.0
    batches = len(dataloader)
    correctCount = 0
    total = len(dataloader.dataset)
    model.eval()

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            # calculate outputs by running images through the network
            outputs = model(images)
            loss = lossFunction(outputs, labels)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            testLoss += loss.item()
            correctCount += (predicted == labels).sum().item()

    testLoss /= batches
    accuracy = 100.0 * correctCount / total
    print("Test Loss: %0.3f, Accuracy: %0.2f %%" % (testLoss, accuracy))
    return (testLoss, accuracy)

In [None]:
import seaborn as sns
from sklearn.metrics import confusion_matrix
import pandas as pd
import matplotlib.pyplot as plt

def evaluate(dataloader, model, classes):
    # prepare to count predictions for each class
    correctPred = {classname: 0 for classname in classes}
    totalPred = {classname: 0 for classname in classes}
    yPred = []
    yTrue = []
    model.eval()

    # again no gradients needed
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predictions = torch.max(outputs, 1)
            yPred.extend(predictions.view(-1).detach().cpu().numpy())
            yTrue.extend(labels.view(-1).detach().cpu().numpy())
            # collect the correct predictions for each class
            for label, prediction in zip(labels, predictions):
                if label == prediction:
                    correctPred[classes[label]] += 1
                totalPred[classes[label]] += 1

    # print accuracy for each class
    for classname, correct_count in correctPred.items():
        accuracy = 100 * float(correct_count) / totalPred[classname]
        print("Accuracy for class {:5s} is: {:.1f} %".format(
            classname, accuracy))

    confusionMatrix = confusion_matrix(yTrue, yPred)
    print(confusionMatrix)
    confusionMatrix = confusionMatrix / confusionMatrix.sum(axis=1)
    matrix = pd.DataFrame(confusionMatrix, classes, classes)
    plt.figure(figsize = (9,6))
    sns.heatmap(matrix, annot=True, cmap="Greens")
    plt.title("Confusion Matrix", fontsize=14)
    plt.xlabel("prediction", fontsize=12)
    plt.ylabel("label (ground truth)", fontsize=12)
    plt.show()


In [None]:
import matplotlib.pyplot as plt

def plotGraph(training, test, metrics):
    plt.plot(range(1,
                   len(training) + 1),
             training,
             label="train",
             c='r',
             marker='.')
    plt.plot(range(1, len(test) + 1), test, label="test", c='b', marker='.')
    plt.xlabel("Epochs")
    plt.ylabel(metrics)
    plt.title("Training & Test " + metrics)
    plt.legend()
    plt.show()

In [None]:
net = Net()
net.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
# optimizer = optim.SGD(net.parameters(), lr=0.008, momentum=0.9)
epochs = 100
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)

trainingLosses = []
trainingAccuracies = []
testLosses = []
testAccuracies = []

for epoch in range(epochs):  # loop over the dataset multiple times
    loss, accuracy = train(trainloader, net, criterion, optimizer, epoch + 1)
    trainingLosses.append(loss)
    trainingAccuracies.append(accuracy)

    loss, accuracy = test(testloader, net, criterion)
    testLosses.append(loss)
    testAccuracies.append(accuracy)

    scheduler.step()

print("Finished Training")
plotGraph(trainingLosses, testLosses, "Loss")
plotGraph(trainingAccuracies, testAccuracies, "Accuracy")

torch.save(net.state_dict(), OUTPUT_PATH)

print("Evaluation Result on Test:")
evaluate(testloader, net, classes)

In [None]:
# net.load_state_dict(torch.load("./cifar_net"))
evaluate(testloader, net, classes)

In [None]:
from torchinfo import summary

summary(net, (1, 3, 32, 32), col_names=("num_params",))

Layer (type:depth-idx)                   Param #
Net                                      --
├─Sequential: 1-1                        --
│    └─Layer: 2-1                        --
│    │    └─Conv2d: 3-1                  896
│    │    └─BatchNorm2d: 3-2             64
│    └─Layer: 2-2                        --
│    │    └─Conv2d: 3-3                  9,248
│    │    └─BatchNorm2d: 3-4             64
│    └─MaxPool2d: 2-3                    --
│    └─Layer: 2-4                        --
│    │    └─Conv2d: 3-5                  18,496
│    │    └─BatchNorm2d: 3-6             128
│    └─MaxPool2d: 2-5                    --
│    └─Layer: 2-6                        --
│    │    └─Conv2d: 3-7                  73,856
│    │    └─BatchNorm2d: 3-8             256
│    └─MaxPool2d: 2-7                    --
│    └─Layer: 2-8                        --
│    │    └─Conv2d: 3-9                  147,584
│    │    └─BatchNorm2d: 3-10            256
│    └─MaxPool2d: 2-9                    --
├─Linea

In [None]:
from PIL import Image
from torch.autograd import Variable
import numpy as np

transform = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

def softmax(x):
    y = np.exp(x - np.max(x))
    f_x = y / np.sum(np.exp(x))
    return f_x

def predictImage(image, model):
    model.eval()
    imageTensor = transform(image).float()
    imageTensor = imageTensor.unsqueeze_(0)
    input = Variable(imageTensor)
    input = input.to(device)
    output = model(input)
    index = output.data.cpu().numpy()
    print(softmax(index))
    index = index.argmax()
    return index


image = Image.open('cat.jpg')
index = predictImage(image, net)
print(classes[index])