## Using the nn.Sequential() module for easier CNN creation

In [25]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt
import math

from collections import OrderedDict

from run_classes import RunBuilder, RunManager

In [26]:
train_set = torchvision.datasets.FashionMNIST(
    root = "./data/FashionMNIST"
    , train = True
    , download = True
    , transform = transforms.Compose([
        transforms.ToTensor()
    ])
)

In [27]:
train_set.classes

['T-shirt/top',
 'Trouser',
 'Pullover',
 'Dress',
 'Coat',
 'Sandal',
 'Shirt',
 'Sneaker',
 'Bag',
 'Ankle boot']

In [5]:
in_features = img.numel()
in_features

784

In [6]:
#This can be playes with
out_features = math.floor(in_features / 2)
out_features

392

In [7]:
out_classes = len(train_set.classes)
out_classes

10

In [8]:
net1 = nn.Sequential(
    nn.Flatten(start_dim = 1)
    , nn.Linear(in_features, out_features)
    , nn.Linear(out_features, out_classes)
)
net1

Sequential(
  (0): Flatten()
  (1): Linear(in_features=784, out_features=392, bias=True)
  (2): Linear(in_features=392, out_features=10, bias=True)
)

In [9]:
net1[1]

Linear(in_features=784, out_features=392, bias=True)

In [10]:
image = img.unsqueeze(0) #unsqueeze
image.shape

torch.Size([1, 1, 28, 28])

In [11]:
net1(image)

tensor([[ 0.1510,  0.0812,  0.0858,  0.0926,  0.0241, -0.1825,  0.0839,  0.0868,
          0.2423, -0.0405]], grad_fn=<AddmmBackward>)

In [12]:
layers = OrderedDict([
    ('flat', nn.Flatten(start_dim = 1)),
    ('linear', nn.Linear(in_features, out_features)),
    ('out', nn.Linear(out_features, out_classes))
])
layers

OrderedDict([('flat', Flatten()),
             ('linear', Linear(in_features=784, out_features=392, bias=True)),
             ('out', Linear(in_features=392, out_features=10, bias=True))])

In [13]:
net2 = nn.Sequential(layers)
net2

Sequential(
  (flat): Flatten()
  (linear): Linear(in_features=784, out_features=392, bias=True)
  (out): Linear(in_features=392, out_features=10, bias=True)
)

In [14]:
net1(image), net2(image)

(tensor([[ 0.1510,  0.0812,  0.0858,  0.0926,  0.0241, -0.1825,  0.0839,  0.0868,
           0.2423, -0.0405]], grad_fn=<AddmmBackward>),
 tensor([[ 0.1804, -0.1383, -0.0181, -0.0633,  0.3174, -0.0770,  0.2227,  0.0905,
           0.2683,  0.0203]], grad_fn=<AddmmBackward>))

In [15]:
#You can access those layers by their given name
net2.flat

Flatten()

In [16]:
net2.out

Linear(in_features=392, out_features=10, bias=True)

In [17]:
#Add modules

net2.add_module('output', nn.Linear(out_classes, out_classes))

In [18]:
net2

Sequential(
  (flat): Flatten()
  (linear): Linear(in_features=784, out_features=392, bias=True)
  (out): Linear(in_features=392, out_features=10, bias=True)
  (output): Linear(in_features=10, out_features=10, bias=True)
)

### Build the same NN as before with nn.Sequential

In [19]:
net = nn.Sequential(
    nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 5)
    , nn.ReLU()
    , nn.MaxPool2d(kernel_size = 2, stride = 2)
    , nn.Conv2d(in_channels = 6, out_channels = 12, kernel_size = 5)
    , nn.ReLU()
    , nn.MaxPool2d(kernel_size =2, stride = 2)
    
    , nn.Flatten(start_dim = 1)
    , nn.Linear(in_features = 12*4*4, out_features = 120)
    , nn.ReLU()
    , nn.Linear(in_features = 120, out_features = 60)
    , nn.ReLU()
    , nn.Linear(in_features = 60, out_features = 10)
)
net

Sequential(
  (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (1): ReLU()
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (4): ReLU()
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Flatten()
  (7): Linear(in_features=192, out_features=120, bias=True)
  (8): ReLU()
  (9): Linear(in_features=120, out_features=60, bias=True)
  (10): ReLU()
  (11): Linear(in_features=60, out_features=10, bias=True)
)

... works also in the other 2 ways as demonstrated before.

### Implementing BatchNorm

normalizing activations after a particular layer - in that way data is normalized before it hits the next layer  
Inside a batch-norm layer, learnable parameters exist, such as scale and shift

In [20]:
net_bn = nn.Sequential(
    #2D because we deal with pictures
    nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 5)
    , nn.ReLU()
    , nn.MaxPool2d(kernel_size = 2, stride = 2)
    ## set right before next layer, 6 for number of in_channels = same as out_channels
    , nn.BatchNorm2d(6)
    , nn.Conv2d(in_channels = 6, out_channels = 12, kernel_size = 5)
    , nn.ReLU()
    , nn.MaxPool2d(kernel_size =2, stride = 2)
    
    , nn.Flatten(start_dim = 1)
    , nn.Linear(in_features = 12*4*4, out_features = 120)
    , nn.ReLU()
    ##
    , nn.BatchNorm1d(120)
    , nn.Linear(in_features = 120, out_features = 60)
    , nn.ReLU()
    ## 1D because we Flattened our data
    , nn.BatchNorm1d(60)
    , nn.Linear(in_features = 60, out_features = 10)
)
net_bn

Sequential(
  (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (1): ReLU()
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (4): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (5): ReLU()
  (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (7): Flatten()
  (8): Linear(in_features=192, out_features=120, bias=True)
  (9): ReLU()
  (10): BatchNorm1d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (11): Linear(in_features=120, out_features=60, bias=True)
  (12): ReLU()
  (13): BatchNorm1d(60, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (14): Linear(in_features=60, out_features=10, bias=True)
)

#### Let's test the NNs!

In [21]:
train_set_normal = train_set = torchvision.datasets.FashionMNIST(
    root = "./data/FashionMNIST"
    , train = True
    , download = True
    , transform = transforms.Compose([
        transforms.ToTensor()
        , transforms.Normalize(mean = [0.2859], std = [0.3530])
    ])
)

In [22]:
#Defining testing dics
trainsets = {
    'normal': train_set_normal,
    'not_normal': train_set
}

networks = {
    'net': net,
    'net_bn': net_bn
}

params = OrderedDict(
    lr = [.01]
    , batch_size = [1000]
    , num_workers = [1]
    , epochs = [1]
    , trainset = list(trainsets.keys())
    , network = list(networks.keys())
)

In [24]:
###Training Loop

m = RunManager()
for run in RunBuilder.get_runs(params):
    network = networks[run.network]
    loader = DataLoader(trainsets[run.trainset], batch_size = run.batch_size, num_workers = run.num_workers)
    optimizer = torch.optim.Adam(network.parameters(), lr = run.lr)
    
    m.begin_run(run, network, loader)
    for epoch in range(run.epochs):
        m.begin_epoch()
        
        for batch in loader:
            images, labels = batch
            preds = network(images)
            loss = F.cross_entropy(preds, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            m.track_loss(loss)
            m.track_num_correct(preds, labels)
        m.end_epoch()
    m.end_run()

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,num_workers,epochs,trainset,network
0,1,1,0.836251,0.6802,11.528177,12.404298,0.01,1000,1,1,normal,net
1,2,1,0.548108,0.80155,13.414044,14.417874,0.01,1000,1,1,normal,net_bn
2,3,1,0.711222,0.76145,12.491821,13.487617,0.01,1000,1,1,not_normal,net
3,4,1,0.373756,0.861317,14.123362,15.149796,0.01,1000,1,1,not_normal,net_bn
