# About 
In this notebook, we build a CNN network and trains it with CIFAR10 dataset. Two CNN models are used in this notebook:
1. the example model shown in PyTorch official tutorial, see [here](https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html) form more informaiton.
2. the example model shown in TensorFlow official tutorial, this might be the one used for CIFAR10 experiments in [the vanilla FL paper](https://arxiv.org/abs/1602.05629), see [here](www.tensorflow.org/tutorials/images/cnn) for more informaiton about this model.

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data

import torchvision
from torchvision import transforms
from torchvision import datasets

# load the pre-defined models and dataloaders of CIFAR10
from my_nn_models import CNNCifar 
from data_preparation import data_setup

##### Configuring the training
Setup the hyper-parameters to be used in the training

In [18]:
class HyperParam():
    def __init__(self, path='..\data\cifar',learning_rate=0.1, batch_size=64, epoch=100, momentum=True, nesterov=False):
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.datapath = path
        self.lr=learning_rate
        self.bs=batch_size
        self.epoch=epoch
        self.momentum=momentum
        self.nesterov=nesterov

# an instance of HyperParam() class to set the parameters  
settings = HyperParam(batch_size=4)

# an instance of CNNCifar() model class
model = CNNCifar().to(settings.device)

# setup the loss function and optimizer
loss_fn = nn.CrossEntropyLoss().to(settings.device)
if settings.nesterov:
    optimizer = torch.optim.SGD(model.parameters(), lr=settings.lr, momentum=settings.momentum, nesterov=settings.nesterov)
else:
    optimizer = torch.optim.SGD(model.parameters(), lr=settings.lr)

##### Verify the configurations

In [21]:
print(settings.lr,settings.bs, settings.epoch, settings.momentum, settings.device, settings.datapath)
print(model)
print(loss_fn)
print(settings.epoch, settings.bs)

0.1 4 100 True cpu ..\data\cifar
CNNCifar(
  (conv_layer): Sequential(
    (0): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layer): Sequential(
    (0): Linear(in_features=400, out_features=120, bias=True)
    (1): ReLU()
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): ReLU()
    (4): Linear(in_features=84, out_features=10, bias=True)
  )
)
CrossEntropyLoss()
100 4


##### Verify the dataloader

In [20]:
loader_train, loader_test = data_setup(path=settings.datapath,batch_size=settings.bs)
# check out the imagesize
train_iter = iter(loader_train)
images, labels = train_iter.next()
print(labels)
# len(loader_train.dataset)

tensor([7, 2, 2, 5])


##### Start the training
We do not practice the procedure of training here, since it takes time, so the related code section for training is commented out.

In [23]:
# # start training
# for epoch in range(1, epochs+1):
#     train_loss = 0.0
#     test_acc = 0.0
#     model.train()
#     for batch, (images, labels) in enumerate(loader_train):
#         images, labels = images.to(device), labels.to(device)
#         optimizer.zero_grad()
#         outputs = model(images)
#         loss = loss_fn(outputs, labels)
#         loss.backward()
#         optimizer.step()
#         train_loss+=train_loss.item() * images.size(0)
#     epoch_loss = train_loss/len(loader_train.dataset)

##### TensorFlow CNN example model
We can also setup this model, and do some simple verification on correctneess of model structure

In [26]:
from my_nn_models import CNNCifarTf
model_tf = CNNCifarTf()
print(model_tf)

test_sample = images # this is the images batch loaded previously
test_output = model_tf(test_sample).argmax(1,keepdim=True)
print('\n', test_output)

CNNCifarTf(
  (conv_layer): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
    (7): ReLU()
  )
  (fc_layer): Sequential(
    (0): Linear(in_features=1024, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=10, bias=True)
    (3): ReLU()
  )
)

 tensor([[2],
        [7],
        [7],
        [7]])
