# Deep Neural Learning: CIFAR10 Classification with CNN 

## Mohammad Ali Zamani
### Senior Machine Learning Scientist
 [zamani.ai](https://zamani.ai)

 
some parts taken from: https://pytorch.org/tutorials/

In [None]:
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [None]:
import numpy as np
import os
import gzip
from torch.utils.data import Dataset 
from torchvision import transforms
import torch
import pickle
from tqdm import tqdm
from skimage import io
from sklearn.model_selection import train_test_split

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self, args):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 8, 3, 1, 1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(8, 16, 3, 1, 1)
        self.conv3 = nn.Conv2d(16, 32, 3, 1, 1 )
        self.conv4 = nn.Conv2d(32, 64, 3, 1, 1)
        self.fc1 = nn.Linear(256, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.pool(F.relu(self.conv4(x)))
        x = x.view(x.shape[0], -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

Exception ignored in: <bound method _ConnectionBase.__del__ of <multiprocessing.connection.Connection object at 0x7f7251fa65c0>>
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 132, in __del__
    self._close()
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 361, in _close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor


In [None]:
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


class Optimization():
    def __init__(self, args, loss,  train_loader, val_loader, test_loader):
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        device = args.device
        self.args = args

        self.model = CNN(args).to(device)
        
        print("number of trainable parameter = ", count_parameters(self.model))
        
        if args.optimizer == 'Adam':
            self.optimizer = optim.Adam(self.model.parameters(), lr=args.rate)
        elif args.optimizer == 'SGD':
            self.optimizer = torch.optim.SGD(self.model.parameters(), lr=args.rate, momentum=args.sgd_momentum)

        self.scheduler = StepLR(self.optimizer, step_size=args.lr_decay_step)

        self.loss = loss
        self.device = device

    def train(self):
        batch_counter = 0.0
        total_loss = 0.0
        self.model.train()
        for iter, data in enumerate(self.train_loader, 0):
            
            inputs, labels = data 

            # To limit the number of training
            if iter * inputs.shape[0] > self.args.sample_num:
                break

            inputs = inputs.to(self.device)
            labels = labels.to(self.device)

            self.model.zero_grad()
            outputs = self.model(inputs)

            loss = self.loss(outputs, labels)
            loss.backward()

            self.optimizer.step()

            batch_counter += 1
            total_loss += loss.item()
            
        loss_value = total_loss / batch_counter
        return loss_value

    def val_eval(self):
        batch_counter = 0.0
        total_loss = 0.0
        self.model.eval()
        for iter, data in enumerate(self.val_loader, 0):
            inputs, labels = data
            
            # select some of the test_set for validation
            if iter * inputs.shape[0] >= 500:
                break
            
            inputs = inputs.to(self.device)
            labels = labels.to(self.device)
            
            # for evaluating the network, we disable the gradient calculation with the no_grad function
            with torch.no_grad():
                outputs = self.model(inputs)
                loss = self.loss(outputs, labels)

            batch_counter += 1
            total_loss += loss.item()

        loss_value = total_loss / batch_counter
        return loss_value

    def test_eval(self, graph=False):
        total= 0.0
        correct = 0
        class_correct = list(0. for i in range(10))
        class_total = list(0. for i in range(10))
        self.model.eval()

        for iter, data in enumerate(self.test_loader, 0):

            inputs, labels = data
            
            if iter * inputs.shape[0] < 500:
                continue
            
            inputs = inputs.to(self.device)
            labels = labels.to(self.device)
           
            # for evaluating the network, we disable the gradient calculation with the no_grad function
            with torch.no_grad():
                outputs = self.model(inputs)
                _ , predicted = torch.max(outputs, 1)
                result = (predicted == labels)
                total += labels.size(0)
                correct += result.sum().item()

                c = result.squeeze()
                for i in range(labels.shape[0]):
                    label = labels[i]
                    class_correct[label] += c[i].item()
                    class_total[label] += 1
         
        test_acc = correct / total
        
        print()
        for i in range(10): 
            print('%s: %2d%%,' % (classes[i], 100 * class_correct[i] / class_total[i]), end =" ")
        print()
       
        return test_acc

In [None]:
import copy
def main(args, train_loader, val_loader, test_loader):
    device = torch.device(args.device)
    best_val_error = np.inf

    if args.loss == 'NLL':
        loss_function = nn.NLLLoss(reduction='mean')
    elif args.loss == 'CE':
        loss_function = nn.CrossEntropyLoss(reduction='mean')
        nn.CrossEntropyLoss

    optimization = Optimization(args, loss_function, train_loader, val_loader, test_loader)

    train_loss_records = []
    val_loss_records = []
    test_loss_records = []

    print("loading training, val and test set completed!")
    mistake_counter = 0  # mistakes counter for validation loss

    for epoch in range(args.epochs):
        train_loss = optimization.train()
        train_loss_records.append(train_loss)
        optimization.scheduler.step()

        val_loss = optimization.val_eval()
        val_loss_records.append(val_loss)

        test_loss = optimization.test_eval()
        test_loss_records.append(test_loss)

        if epoch > 1:
            if val_loss_records[-1] > val_loss_records[-2]:
                mistake_counter += 1

        if val_loss < best_val_error:
            best_results = {
                'epoch': epoch + 1,
                'state_dict': copy.deepcopy(optimization.model.state_dict()),
                'model': optimization.model,
                'best_val_error': val_loss,
                'best_test_error': test_loss,
                'optimizer': copy.deepcopy(optimization.optimizer),
                'args': args
            }
            best_val_error = val_loss
        print(
            '[Epoch: %3d/%3d] LR: %0.8f  Train loss: %.4f,    Val loss: %.4f,   Test Acc: %.4f'
            % (epoch + 1, args.epochs, optimization.scheduler.get_lr()[0], train_loss_records[epoch], val_loss_records[epoch],
               test_loss_records[epoch]))
        
        if mistake_counter >= args.tol or epoch == args.epochs - 1:
            print('Training is terminated')
            break
    return test_loss, val_loss

In [None]:
from typing import NamedTuple
class Args(NamedTuple):
    rate: float  # learning rate
    lr_decay_step: int  # learning rate decay
    batch_size: int  # minibatch size
    epochs: int  # maximum training epochs
    sample_num: int  # number of sample to be loaded
    tol: int  # tolerance for the validation error increment
    device: str  # cuda or cpu
    loss: str  # loss function     
    optimizer: str # optimizer method
    sgd_momentum: float #

    dropout: float  # the probability for dropout 
    fc1: int # 1st hidden layer's units
    # TODO: add more layers if necessary

In [None]:
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(5),
        transforms.ColorJitter(
            brightness=0.1,
            contrast=0.1,
            saturation=0.1,
            hue=0.1),
        transforms.RandomCrop((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.CenterCrop((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
}

args = Args(
            rate=0.001,
            lr_decay_step=100,
            batch_size=32,
            epochs=200,
            sample_num=5000,
            tol=5,
            loss='CE',
            device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),

            optimizer='Adam',
            sgd_momentum=0.1,
            dropout=0.5,
            fc1=10, # TODO : change the first layer units 
            # TODO adjust number of next hidden unit after addition
            )

training_set = CIFAR10(root = '.', train=True, download=True, transform=data_transforms['train'])
test_set = CIFAR10(root = '.', train=False, download=True, transform=data_transforms['val'])

train_loader = DataLoader(training_set, batch_size=args.batch_size, num_workers=8, shuffle=True, drop_last=False)
val_loader = DataLoader(test_set, batch_size=args.batch_size, num_workers=8, shuffle=False, drop_last=True)
test_loader = DataLoader(test_set, batch_size=args.batch_size, num_workers=8, shuffle=False, drop_last=False)

print("######################################")
print("######   sample numbers: ", args.sample_num, "#######")
print("######################################")

main(args, train_loader, val_loader, test_loader)

Files already downloaded and verified
Files already downloaded and verified
######################################
######   sample numbers:  5000 #######
######################################
number of trainable parameter =  66330
loading training, val and test set completed!

plane: 44%, car:  7%, bird:  3%, cat:  9%, deer: 39%, dog: 14%, frog:  8%, horse: 37%, ship: 58%, truck: 49%, 
[Epoch:   1/200] LR: 0.00100000  Train loss: 2.1355,    Val loss: 1.9236,   Test Acc: 0.2727

plane: 42%, car: 50%, bird:  2%, cat:  5%, deer:  6%, dog: 19%, frog: 55%, horse: 32%, ship: 69%, truck: 12%, 
[Epoch:   2/200] LR: 0.00100000  Train loss: 1.8893,    Val loss: 1.8685,   Test Acc: 0.2964

plane: 62%, car: 48%, bird:  8%, cat: 12%, deer: 44%, dog: 42%, frog: 37%, horse: 37%, ship: 44%, truck: 42%, 
[Epoch:   3/200] LR: 0.00100000  Train loss: 1.7656,    Val loss: 1.6243,   Test Acc: 0.3796

plane: 54%, car: 58%, bird:  6%, cat:  6%, deer: 19%, dog: 64%, frog: 44%, horse: 54%, ship: 43%, truck: 5

(0.530881112984823, 1.250752743333578)