# ResNet50

This notebook was motivated by

https://arxiv.org/pdf/1512.03385.pdf

will be later cited as ResNetPaper
Implementation: Oleh Bakumenko, Univerity of Duisburg-Essen

# Imports

In [1]:
import sys
sys.path.append("/datashare/MLCourse/Course_Materials") # Preferentially import from the datashare.
sys.path.append("../") # Otherwise, import from the local folder's parent folder, where your stuff lives.

import numpy as np
import time
import matplotlib.pyplot as plt
import torch, torch.nn as nn
import torchvision, torchvision.transforms as tt
from torch.multiprocessing import Manager
torch.multiprocessing.set_sharing_strategy("file_system")
from torchsummary import summary


# Data augmentations

Data augmentation is a technique used to artificially increase the size of a dataset by transforming existing data points to create new, similar instances. This can help prevent overfitting in machine learning models, as well as improve their ability to generalize to unseen data. Common types of data augmentation include flipping, rotation, scaling, and adding noise to images.
We can generate the augmentation list with torchvision.transforms module


In [2]:
data_augments = torchvision.transforms.Compose([ 
    torchvision.transforms.RandomHorizontalFlip(p = .5),
    torchvision.transforms.RandomCrop((224, 224)), 
    ])


Load the dataset from utils

In [3]:
cache_me = False
if cache_me is True:
    cache_mgr = Manager()
    cache_mgr.data = cache_mgr.dict()
    cache_mgr.cached = cache_mgr.dict()
    for k in ["train", "val", "test"]:
        cache_mgr.data[k] = cache_mgr.dict()
        cache_mgr.cached[k] = False

ds = uu.LiTS_Classification_Dataset(
    data_dir = "/home/coder/Course_Materials/data/Clean_LiTS/",
    transforms = data_augments,
    verbose = True,
    cache_data = cache_me,
    cache_mgr = (cache_mgr if cache_me is True else None),
    debug = True,
)

NameError: name 'uu' is not defined

# Hyperparameters

In [4]:
# Default settings
batch_size = 32
learning_rate = 1e-4
weight_decay = 5e-5
epochs = 15
run_name = "ResNet50"
device = ("cuda" if torch.cuda.is_available() else "cpu")
time_me  = True
wantToPrint = False
mod_step = 50

In [5]:
# Dataloader
dl = torch.utils.data.DataLoader(
    dataset = ds, 
    batch_size = batch_size, 
    num_workers = 4, 
    shuffle = True, 
    drop_last = False, 
    pin_memory = True,
    persistent_workers = (not cache_me),
    prefetch_factor = 1
    )

NameError: name 'ds' is not defined

ResNet (Residual Network) is a deep neural network architecture introduced in 2015, designed to address the issue of vanishing gradients in very deep networks. It's named ResNet because it uses residual connections (skip connections), which allow for the flow of gradients from earlier layers to later layers even in very deep networks.

The residual connections in ResNet consist of adding the input of a layer to the output of a layer several layers deeper, allowing the network to more easily learn identity functions. This design helps prevent the issue of vanishing gradients and allows ResNet to train much deeper networks than previously possible.
This architecture showed very high benchmarks in the seminar in comparison to earlier AlexNet model

Original ResNet was used in the ImageNet Challenge to classify 1000 classes, we use only 3 in our exercise:
		0: Image does not include liver
		1: Liver is visible
		2: Liver is visible and lesion is visible


In [6]:
num_params = torch.zeros(4)

In [7]:
num_params

tensor([0., 0., 0., 0.])

# ResNet18

In [8]:
class ResBlock(nn.Module):
    def __init__(self, n_chans):
        super(ResBlock, self).__init__()
        self.conv1 = nn.Conv2d(n_chans, n_chans, kernel_size=3, padding=1)
        self.batch_norm1 = nn.BatchNorm2d(num_features=n_chans)
        self.relu = torch.nn.ReLU()
        self.conv2 = nn.Conv2d(n_chans, n_chans, kernel_size=3, padding=1)
        self.batch_norm2 = nn.BatchNorm2d(num_features=n_chans)

        torch.nn.init.kaiming_normal_(self.conv1.weight,
                                      nonlinearity='relu')
        torch.nn.init.kaiming_normal_(self.conv2.weight,
                                      nonlinearity='relu')


        torch.nn.init.constant_(self.batch_norm1.weight, 0.5)
        torch.nn.init.zeros_(self.batch_norm1.bias)

        torch.nn.init.constant_(self.batch_norm2.weight, 0.5)
        torch.nn.init.zeros_(self.batch_norm2.bias)

    def forward(self, x):
        out = self.conv1(x)
        out = self.batch_norm1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.batch_norm2(out)
        return out + x


class ResBlockDimsReduction(nn.Module):
    def __init__(self, n_chans_in, n_chans_out):
        super(ResBlockDimsReduction, self).__init__()
        self.conv1 = nn.Conv2d(n_chans_in, n_chans_out, kernel_size=3, stride=2,padding=1)
        self.batch_norm1 = nn.BatchNorm2d(num_features=n_chans_out)
        self.relu = torch.nn.ReLU()
        self.conv2 = nn.Conv2d(n_chans_out, n_chans_out, kernel_size=3,padding=1)
        self.batch_norm2 = nn.BatchNorm2d(num_features=n_chans_out)

        torch.nn.init.kaiming_normal_(self.conv1.weight,
                                      nonlinearity='relu')
        torch.nn.init.kaiming_normal_(self.conv2.weight,
                                      nonlinearity='relu')


        torch.nn.init.constant_(self.batch_norm1.weight, 0.5)
        torch.nn.init.zeros_(self.batch_norm1.bias)

        torch.nn.init.constant_(self.batch_norm2.weight, 0.5)
        torch.nn.init.zeros_(self.batch_norm2.bias)


        self.downsample = nn.Sequential(
            nn.Conv2d(n_chans_in, n_chans_out, kernel_size=1, stride=2),
            nn.BatchNorm2d(num_features=n_chans_out)
        )


    def forward(self, x):
        out = self.conv1(x)
        out = self.batch_norm1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.batch_norm2(out)
        if (x.shape[1] != out.shape[1]):
            x = self.downsample(x)
        return out + x


class ResNetMLMed18(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = torch.nn.Conv2d(in_channels = 1, out_channels = 64, kernel_size =7, stride =2, padding=1)
        self.batch_norm1 = nn.BatchNorm2d(num_features=64)
        self.pool2 = torch.nn.MaxPool2d(kernel_size = 3, stride = 2)
        self.relu = torch.nn.ReLU()

        self.resblocks2 =nn.Sequential(
            *(2 * [ResBlock(n_chans=64)]))
        self.resblocks3 = nn.Sequential(ResBlockDimsReduction(n_chans_in=64,n_chans_out=128),
            *(1 * [ResBlock(n_chans=128)]))
        self.resblocks4 = nn.Sequential(ResBlockDimsReduction(n_chans_in=128,n_chans_out=256),
            *(1 * [ResBlock(n_chans=256)]))
        self.resblocks5 = nn.Sequential(ResBlockDimsReduction(n_chans_in=256,n_chans_out=512),
            *(1 * [ResBlock(n_chans=512)]))
        self.avgpool6 = nn.AdaptiveAvgPool2d(output_size=(1, 1))
        self.fc = nn.Linear(in_features=512, out_features=3, bias=True)



    def forward(self, x):

        out_1 = self.conv1(x)
        out_1 = self.batch_norm1(out_1)
        out_1 = self.relu(out_1)
        out_1 = self.pool2(out_1)

        out_2 = self.resblocks2(out_1)

        out_3 = self.resblocks3(out_2)

        out_4 = self.resblocks4(out_3)

        out_5 = self.resblocks5(out_4)

        out_6 = self.avgpool6(out_5)

        out_6= self.fc(torch.flatten(out_6, start_dim=1))

        return out_6

    pass



In [9]:
model = ResNetMLMed18()
summary(model, (1, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 110, 110]           3,200
       BatchNorm2d-2         [-1, 64, 110, 110]             128
              ReLU-3         [-1, 64, 110, 110]               0
         MaxPool2d-4           [-1, 64, 54, 54]               0
            Conv2d-5           [-1, 64, 54, 54]          36,928
       BatchNorm2d-6           [-1, 64, 54, 54]             128
              ReLU-7           [-1, 64, 54, 54]               0
            Conv2d-8           [-1, 64, 54, 54]          36,928
       BatchNorm2d-9           [-1, 64, 54, 54]             128
         ResBlock-10           [-1, 64, 54, 54]               0
           Conv2d-11           [-1, 64, 54, 54]          36,928
      BatchNorm2d-12           [-1, 64, 54, 54]             128
             ReLU-13           [-1, 64, 54, 54]               0
           Conv2d-14           [-1, 64,

In [10]:
num_params[0] = 11176579

# ResNet 34

It is strongly recommended to parallel look into Table 1 (page 5) and Figure 5 (page 6), ResNetPaper,

Implementing the normal ResNet Block = [conv -> batch_norm -> activation] *2

At the beginnig of each new layer (in the Table 1, left) the image size will be reduced using convolution with kernel 1 and a stride of 2 (so-called projection), this feature was generalised in the implemention of ResNet 50 below. As an example it was decided to include both variations.

First we start with building the blocks. Note the downsampling operation in the ResBlockDimsReduction, because the input image $x$ has different dimentions that the output. If this is not clear, try print(out.shape).

Class ResNetMLMed34 will inherit the torch.nn.module, so we need to write the init() and forward() functions. Using the Table 1 and Figure 5 form ResNetPaper we define each resblocks2-5 part, the indexing is the same as in Table 1 so the one can compare number blocks, kernel sizes and number channels.
Do not forget to put downsampling block as the first in each resblocks2-5


Couple words about torch.nn.init. part:
Pytorch initialise the parameters for Conv and batch norm randomly. Initialization of the weights and biases in a normal distribution helps the model backtrack gradients in early epoch's.
For smaller models like 34 and 50 layer it was tested, that initialization of the weights and biases has almost no impact on performance or convergence of the model.

For ResNet 152 on the other had, random initialised model did not converge after 15 epochs and showed very bad error and accuracy rates. With initialization, it still was not great, but may could be tuned by the hyperparameters and better optimizer.

In [11]:
# ResBlock Class
#       - constructs a block [conv -> batch_norm -> activation] *2, which we will stack in the network
# Input:    int: n_chans - number channels
# Output:   nn.Sequential() block

class ResBlock(nn.Module):
    def __init__(self, n_chans):
        super().__init__()
        self.conv1 = nn.Conv2d(n_chans, n_chans, kernel_size=3, padding=1, bias= False)
        self.batch_norm1 = nn.BatchNorm2d(num_features=n_chans)
        self.relu = torch.nn.ReLU()
        self.conv2 = nn.Conv2d(n_chans, n_chans, kernel_size=3, padding=1, bias= False)
        self.batch_norm2 = nn.BatchNorm2d(num_features=n_chans)

        torch.nn.init.kaiming_normal_(self.conv1.weight,
                                      nonlinearity='relu')
        torch.nn.init.kaiming_normal_(self.conv2.weight,
                                      nonlinearity='relu')

        torch.nn.init.constant_(self.batch_norm1.weight, 0.5)
        torch.nn.init.zeros_(self.batch_norm1.bias)

        torch.nn.init.constant_(self.batch_norm2.weight, 0.5)
        torch.nn.init.zeros_(self.batch_norm2.bias)

    def forward(self, x):
        out = self.conv1(x)
        out = self.batch_norm1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.batch_norm2(out)
        out = self.relu(out)
        return out + x # this sum realise the skip connection


# ResBlockDimsReduction Class
#       - constructs a first block in the layer
#       - [conv -> batch_norm -> activation] *2
#       - downsampling performed with stride 2
# Input:    int: n_chans_in; int:n_chans_out
# Output:   nn.Sequential() block

class ResBlockDimsReduction(nn.Module):
    def __init__(self, n_chans_in, n_chans_out):
        super().__init__()
        self.conv1 = nn.Conv2d(n_chans_in, n_chans_out, kernel_size=3, stride=2,padding=1,bias= False)
        self.batch_norm1 = nn.BatchNorm2d(num_features=n_chans_out)
        self.relu = torch.nn.ReLU()
        self.conv2 = nn.Conv2d(n_chans_out, n_chans_out, kernel_size=3, padding=1, bias= False)
        self.batch_norm2 = nn.BatchNorm2d(num_features=n_chans_out)

        torch.nn.init.kaiming_normal_(self.conv1.weight,
                                      nonlinearity='relu')
        torch.nn.init.kaiming_normal_(self.conv2.weight,
                                      nonlinearity='relu')
        torch.nn.init.constant_(self.batch_norm1.weight, 0.5)
        torch.nn.init.zeros_(self.batch_norm1.bias)
        torch.nn.init.constant_(self.batch_norm2.weight, 0.5)
        torch.nn.init.zeros_(self.batch_norm2.bias)

        self.downsample = nn.Sequential(
            nn.Conv2d(n_chans_in, n_chans_out, kernel_size=1, stride=2,bias= False),
            nn.BatchNorm2d(num_features=n_chans_out),
            nn.ReLU()
        )


    def forward(self, x):
        out = self.conv1(x)
        out = self.batch_norm1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.batch_norm2(out)
        out = self.relu(out)
        # input and output dimensions not match, so we need to project x into the dimensions of out
        x = self.downsample(x)
        return out + x

# ResNetMLMed34 Class
#       - constructs a ResNet34 as described above.
# Input:    Tensor: [Batch,1,Height,Width]
# Output:   Tensor: [Batch,3]
class ResNetMLMed34(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = torch.nn.Conv2d(in_channels = 1, out_channels = 64, kernel_size =7, stride =2, padding=1, bias= False)
        self.batch_norm1 = nn.BatchNorm2d(num_features=64)
        self.pool2 = torch.nn.MaxPool2d(kernel_size = 3, stride = 2)
        self.relu = torch.nn.ReLU()

        self.resblocks2 =nn.Sequential(
            *(3 * [ResBlock(n_chans=64)]))
        self.resblocks3 = nn.Sequential(ResBlockDimsReduction(n_chans_in=64,n_chans_out=128),
            *(3 * [ResBlock(n_chans=128)]))
        self.resblocks4 = nn.Sequential(ResBlockDimsReduction(n_chans_in=128,n_chans_out=256),
            *(5 * [ResBlock(n_chans=256)]))
        self.resblocks5 = nn.Sequential(ResBlockDimsReduction(n_chans_in=256,n_chans_out=512),
            *(2 * [ResBlock(n_chans=512)]))
        self.avgpool6 = nn.AdaptiveAvgPool2d(output_size=(1, 1))
        self.fc = nn.Linear(in_features=512, out_features=3, bias=True)


    def forward(self, x):

        out_1 = self.conv1(x)
        out_1 = self.batch_norm1(out_1)
        out_1 = self.relu(out_1)

        out_1 = self.pool2(out_1)

        out_2 = self.resblocks2(out_1)

        out_3 = self.resblocks3(out_2)

        out_4 = self.resblocks4(out_3)

        out_5 = self.resblocks5(out_4)

        out_6 = self.avgpool6(out_5)

        out_6= self.fc(torch.flatten(out_6, start_dim=1))

        return out_6

In [12]:
model = ResNetMLMed34()
summary(model, (1, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 110, 110]           3,136
       BatchNorm2d-2         [-1, 64, 110, 110]             128
              ReLU-3         [-1, 64, 110, 110]               0
         MaxPool2d-4           [-1, 64, 54, 54]               0
            Conv2d-5           [-1, 64, 54, 54]          36,864
       BatchNorm2d-6           [-1, 64, 54, 54]             128
              ReLU-7           [-1, 64, 54, 54]               0
            Conv2d-8           [-1, 64, 54, 54]          36,864
       BatchNorm2d-9           [-1, 64, 54, 54]             128
             ReLU-10           [-1, 64, 54, 54]               0
         ResBlock-11           [-1, 64, 54, 54]               0
           Conv2d-12           [-1, 64, 54, 54]          36,864
      BatchNorm2d-13           [-1, 64, 54, 54]             128
             ReLU-14           [-1, 64,

In [13]:
num_params[1] = 21279939

# ResNet 50

ResNet50 introduced a new structure - the bottleneck block = [conv -> batch_norm -> activation]*3, where besides the 3 * 3 convolution we also oscillate the number of channels in each block with the 1 * 1 convolution.
For more tuning opportunities, the variables number channels in, between and out are given to the block, mostly in channels will be the same as out channels.

A generalized solution for the downsampling issue is used: instead of 2 different blocks in ResNet34 we define the boolean variable and a stride for this purpose. If the want to downsample at the beginning of the resblocks3-5, we set downsample=True, stride=2. Note that there is no need to change the stride in the second resblocks2 part.
If downsample is not needed, this operation will be set to nn.Identity().

In [14]:
# ResBlockBottleneck Class
#       - constructs a block [conv -> batch_norm -> activation]*3, which we will stack in the network
# Input:    int: n_chans_in, int:n_chans_between, int:n_chans_out
#           boolean: downsample = False, set True if first block
#           int: stride = 1, set 2 if want to downsample
# Output:   nn.Sequential() block
class ResBlockBottleneck(nn.Module):
    def __init__(self, n_chans_in,n_chans_between,n_chans_out, downsample = False, stride = 1):
        super().__init__()
        self.conv1 = nn.Conv2d(n_chans_in, n_chans_between, kernel_size=1, padding=0, bias=False)
        self.batch_norm1 = nn.BatchNorm2d(num_features=n_chans_between)
        self.relu = torch.nn.ReLU()
        self.conv2 = nn.Conv2d(n_chans_between, n_chans_between, kernel_size=3, stride= stride, padding=1, bias=False)
        self.batch_norm2 = nn.BatchNorm2d(num_features=n_chans_between)
        self.relu = torch.nn.ReLU()
        self.conv3 = nn.Conv2d(n_chans_between, n_chans_out, kernel_size=1, padding=0, bias=False)
        self.batch_norm3 = nn.BatchNorm2d(num_features=n_chans_out)
        self.relu = torch.nn.ReLU()

        torch.nn.init.kaiming_normal_(self.conv1.weight,
                                      nonlinearity='relu')
        torch.nn.init.kaiming_normal_(self.conv2.weight,
                                      nonlinearity='relu')
        torch.nn.init.kaiming_normal_(self.conv3.weight,
                                      nonlinearity='relu')

        torch.nn.init.constant_(self.batch_norm1.weight, 0.5)
        torch.nn.init.zeros_(self.batch_norm1.bias)

        torch.nn.init.constant_(self.batch_norm2.weight, 0.5)
        torch.nn.init.zeros_(self.batch_norm2.bias)

        torch.nn.init.constant_(self.batch_norm3.weight, 0.5)
        torch.nn.init.zeros_(self.batch_norm3.bias)

        if downsample:
            self.downsample = nn.Sequential(
                nn.Conv2d(n_chans_in, n_chans_out, kernel_size=1,padding=0,stride=stride, bias=False),
                nn.BatchNorm2d(num_features=n_chans_out),
                nn.ReLU()
            )
        else:
            self.downsample = nn.Identity()

    def forward(self, x):
        out = self.conv1(x)
        out = self.batch_norm1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.batch_norm2(out)
        out = self.relu(out)
        out = self.conv3(out)
        out = self.batch_norm3(out)
        out = self.relu(out)
        return out + self.downsample(x)

# ResNetMLMed50 Class
#       - constructs a ResNet50 as described above.
# Input:    Tensor: [Batch,1,Height,Width]
# Output:   Tensor: [Batch,3]
class ResNetMLMed50(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = torch.nn.Conv2d(in_channels = 1, out_channels = 64, kernel_size =7, stride =2, padding=1)
        self.batch_norm1 = nn.BatchNorm2d(num_features=64)
        self.relu = torch.nn.ReLU()
        self.pool2 = torch.nn.MaxPool2d(kernel_size = 3, stride = 2,padding=1)
        self.resblocks2 = nn.Sequential(
            ResBlockBottleneck(n_chans_in = 64,n_chans_between =64 ,n_chans_out=256,downsample= True),
            *(2 * [ResBlockBottleneck(n_chans_in = 256,n_chans_between=64,n_chans_out= 256)]))
        self.resblocks3 = nn.Sequential(
            ResBlockBottleneck(n_chans_in = 256, n_chans_between=128, n_chans_out= 512, downsample=True, stride=2),
            *(3 * [ResBlockBottleneck(n_chans_in = 512,n_chans_between=128,n_chans_out= 512)]))
        self.resblocks4 = nn.Sequential(
            ResBlockBottleneck(n_chans_in = 512, n_chans_between=256, n_chans_out= 1024,downsample=True, stride=2),
            *(5 * [ResBlockBottleneck(n_chans_in = 1024,n_chans_between=256,n_chans_out= 1024)]))
        self.resblocks5 = nn.Sequential(
            ResBlockBottleneck(n_chans_in = 1024, n_chans_between=512, n_chans_out= 2048,downsample=True, stride=2),
            *(2 * [ResBlockBottleneck(n_chans_in = 2048,n_chans_between=512,n_chans_out= 2048)]))
        self.avgpool6 = nn.AdaptiveAvgPool2d(output_size=(1, 1))
        self.fc = nn.Linear(in_features=2048, out_features=3, bias=True)



    def forward(self, x):
        out_1 = self.conv1(x)
        out_1 = self.batch_norm1(out_1)
        out_1 = self.relu(out_1)
        out_1 = self.pool2(out_1)

        out_2 = self.resblocks2(out_1)

        out_3 = self.resblocks3(out_2)

        out_4= self.resblocks4(out_3)

        out_5= self.resblocks5(out_4)

        out_6 = self.avgpool6(out_5)

        out_6= self.fc(torch.flatten(out_6, start_dim=1))
        return out_6

In [15]:
model = ResNetMLMed50()
summary(model, (1, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 110, 110]           3,200
       BatchNorm2d-2         [-1, 64, 110, 110]             128
              ReLU-3         [-1, 64, 110, 110]               0
         MaxPool2d-4           [-1, 64, 55, 55]               0
            Conv2d-5           [-1, 64, 55, 55]           4,096
       BatchNorm2d-6           [-1, 64, 55, 55]             128
              ReLU-7           [-1, 64, 55, 55]               0
            Conv2d-8           [-1, 64, 55, 55]          36,864
       BatchNorm2d-9           [-1, 64, 55, 55]             128
             ReLU-10           [-1, 64, 55, 55]               0
           Conv2d-11          [-1, 256, 55, 55]          16,384
      BatchNorm2d-12          [-1, 256, 55, 55]             512
             ReLU-13          [-1, 256, 55, 55]               0
           Conv2d-14          [-1, 256,

In [16]:
num_params[2] = 23507971

# ResNet 152

The same as ResNet 50 but with more blocks

In [17]:
class ResNetMLMed152(torch.nn.Module):
    def __init__(self):
        super(ResNetMLMed152, self).__init__()
        self.conv1 = torch.nn.Conv2d(in_channels = 1, out_channels = 64, kernel_size =7, stride =2, padding=1)
        self.batch_norm1 = nn.BatchNorm2d(num_features=64)
        self.relu = torch.nn.ReLU()
        self.pool2 = torch.nn.MaxPool2d(kernel_size = 3, stride = 2,padding=1)
        self.resblocks2 = nn.Sequential(
            ResBlockBottleneck(n_chans_in = 64,n_chans_between =64 ,n_chans_out=256,downsample= True),
            *(2 * [ResBlockBottleneck(n_chans_in = 256,n_chans_between=64,n_chans_out= 256)]))
        self.resblocks3 = nn.Sequential(
            ResBlockBottleneck(n_chans_in = 256, n_chans_between=128, n_chans_out= 512, downsample=True, stride=2),
            *(7 * [ResBlockBottleneck(n_chans_in = 512,n_chans_between=128,n_chans_out= 512)]))
        self.resblocks4 = nn.Sequential(
            ResBlockBottleneck(n_chans_in = 512, n_chans_between=256, n_chans_out= 1024,downsample=True, stride=2),
            *(35 * [ResBlockBottleneck(n_chans_in = 1024,n_chans_between=256,n_chans_out= 1024)]))
        self.resblocks5 = nn.Sequential(
            ResBlockBottleneck(n_chans_in = 1024, n_chans_between=512, n_chans_out= 2048,downsample=True, stride=2),
            *(2 * [ResBlockBottleneck(n_chans_in = 2048,n_chans_between=512,n_chans_out= 2048)]))
        self.avgpool6 = nn.AdaptiveAvgPool2d(output_size=(1, 1))
        self.fc = nn.Linear(in_features=2048, out_features=3, bias=True)

    def forward(self, x):
        out_1 = self.conv1(x)
        out_1 = self.batch_norm1(out_1)
        out_1 = self.relu(out_1)
        out_1 = self.pool2(out_1)

        out_2 = self.resblocks2(out_1)

        out_3 = self.resblocks3(out_2)

        out_4= self.resblocks4(out_3)

        out_5= self.resblocks5(out_4)

        out_6 = self.avgpool6(out_5)

        out_6= self.fc(torch.flatten(out_6, start_dim=1))

        return out_6

In [18]:
model = ResNetMLMed152()
summary(model, (1, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 110, 110]           3,200
       BatchNorm2d-2         [-1, 64, 110, 110]             128
              ReLU-3         [-1, 64, 110, 110]               0
         MaxPool2d-4           [-1, 64, 55, 55]               0
            Conv2d-5           [-1, 64, 55, 55]           4,096
       BatchNorm2d-6           [-1, 64, 55, 55]             128
              ReLU-7           [-1, 64, 55, 55]               0
            Conv2d-8           [-1, 64, 55, 55]          36,864
       BatchNorm2d-9           [-1, 64, 55, 55]             128
             ReLU-10           [-1, 64, 55, 55]               0
           Conv2d-11          [-1, 256, 55, 55]          16,384
      BatchNorm2d-12          [-1, 256, 55, 55]             512
             ReLU-13          [-1, 256, 55, 55]               0
           Conv2d-14          [-1, 256,

In [20]:
num_params[3] = 58143747

In [38]:
num_params/(1000000)

tensor([11.1766, 21.2799, 23.5080, 58.1437])

In [None]:
for step, (data, targets) in enumerate(dl):
    data, targets = data.to(device), targets.to(device)
    if step ==1:
        break

In [None]:
model(data).shape

In [None]:
optimizer = torch.optim.AdamW(model.parameters(), lr = learning_rate, weight_decay = weight_decay)
criterion = nn.CrossEntropyLoss()

In [None]:
if time_me is True:
    c_start = time.time()

num_steps = len(ds.file_names['train'])//batch_size

for epoch in range(epochs):

    # If we are caching, we now have all data and let the (potentially non-persistent) workers know
    if cache_me is True and epoch > 0:
        dl.dataset.set_cached("train")
        dl.dataset.set_cached("val")

    # Time me
    if time_me is True:
        e_start = time.time()

    # Go to train mode
    ds.set_mode("train")
    model.train()

    # Train loop
    for step, (data, targets) in enumerate(dl):

        # Manually drop last batch (this is for example relevant with BatchNorm)
        if step == num_steps - 1 and (epoch > 0 or ds.cache_data is False):
            continue

        # Train loop: Zero gradients, forward step, evaluate, log, backward step
        optimizer.zero_grad()
        data, targets = data.to(device), targets.to(device)
        if time_me is True:
            c_end = time.time()
            if step % mod_step == 0 and wantToPrint:
                print(f"CPU time: {c_end-c_start:.4f}s")
            g_start = time.time()
        predictions = model(data)
        if time_me is True:
            g_end = time.time()
            c_start = time.time()
        if step % mod_step == 0 and time_me is True and wantToPrint:
            print(f"GPU time: {g_end-g_start:.4f}s")
        loss = criterion(predictions, targets)
        if step % mod_step == 0:
            print(f"Epoch [{epoch+1}/{epochs}]\t Step [{step+1}/{num_steps}]\t Train Loss: {loss.item():.4f}")
        uu.csv_logger(
            logfile = f"../logs/{run_name}_train.csv",
            content = {"epoch": epoch, "step": step, "loss": loss.item()},
            first = (epoch == 0 and step == 0),
            overwrite = (epoch == 0 and step == 0)
                )
        loss.backward()
        optimizer.step()

    # Go to eval mode
    ds.set_mode("val")
    model.eval()

    # Validation loop
    val_accuracy, avg_val_loss = evaluate_classifier_model(model = model, dataloader = dl, device = device)
    print(f"Epoch [{epoch+1}/{epochs}]\t Val Loss: {avg_val_loss:.4f}\t Val Accuracy: {val_accuracy:.4f}")
    uu.csv_logger(
        logfile = f"../logs/{run_name}_val.csv",
        content = {"epoch": epoch, "val_loss": avg_val_loss, "val_accuracy": val_accuracy},
        first = (epoch == 0),
        overwrite = (epoch == 0)
            )

    if time_me is True:
        cur_time = time.time()-e_start
        uu.csv_logger(
            logfile = f"../logs/{run_name}_runtime.csv",
            content = {"epoch": epoch, "time": cur_time},
            first = (epoch == 0),
            overwrite = (epoch == 0)
                )
        print(f"Epoch nr {epoch+1} time: {time.time()-e_start:.4f}s")

# Finally, test time
ds.set_mode("test")
model.eval()

test_accuracy, avg_test_loss = evaluate_classifier_model(model = model, dataloader = dl, device = device)
print(f"Epoch [{epoch+1}/{epochs}]\t Test Loss: {avg_test_loss:.4f}\t Test Accuracy: {test_accuracy:.4f}")
uu.csv_logger(
    logfile = f"../logs/{run_name}_test.csv",
    content = {"epoch": epoch, "test_loss": avg_test_loss, "test_accuracy": test_accuracy},
    first = True,
    overwrite = True
        )

In [None]:
# We can save our model to the disk like so ('../models' means 'go back 1 directory, then into 'models'):
def save_model(name, model):
    loc = os.path.join("../models", name)
    os.makedirs("../models", exist_ok = True)

    # To save a DataParallel model generically, save the model.module.state_dict().
    # This way, you have the flexibility to load the model any way you want to any device you want.
    if isinstance(model, torch.nn.DataParallel):
        torch.save(model.module.state_dict(), loc)
    else:
        torch.save(model.state_dict(), loc)
#save_model("ResNet50_abgabe.tar", model)

--------

### Some useful functions (maybe)

In [None]:
# sometimes it is conveniently to generalize the training loop.
# In my case I often used it to change one parameter and look how the train/validation errors change
# I also include this code

# training loop function makes the same as training loop upper cell, could be extracted in a separate .py file and imported in the beginning.
def training_loop(epochs, optimizer, model, criterion, ds,
                  dl, batch_size, run_name, device, cache_me = False, wantToPrint =1,
                  mod_step = 500, time_me = True, time = time):
    if time_me is True:
        c_start = time.time()

    num_steps = len(ds.file_names['train'])//batch_size

    for epoch in range(epochs):

        # If we are caching, we now have all data and let the (potentially non-persistent) workers know
        if cache_me is True and epoch > 0:
            dl.dataset.set_cached("train")
            dl.dataset.set_cached("val")

        # Time me
        if time_me is True:
            e_start = time.time()

        # Go to train mode
        ds.set_mode("train")
        model.train()

        # Train loop
        for step, (data, targets) in enumerate(dl):

            # Manually drop last batch (this is for example relevant with BatchNorm)
            if step == num_steps - 1 and (epoch > 0 or ds.cache_data is False):
                continue

            # Train loop: Zero gradients, forward step, evaluate, log, backward step
            optimizer.zero_grad()
            data, targets = data.to(device), targets.to(device)
            if time_me is True:
                c_end = time.time()
                if step % mod_step == 0 and wantToPrint:
                    print(f"CPU time: {c_end-c_start:.4f}s")
                g_start = time.time()
            predictions = model(data)
            if time_me is True:
                g_end = time.time()
                c_start = time.time()
            if step % mod_step == 0 and time_me is True and wantToPrint:
                print(f"GPU time: {g_end-g_start:.4f}s")
            loss = criterion(predictions, targets)
            if step % mod_step == 0:
                print(f"Epoch [{epoch+1}/{epochs}]\t Step [{step+1}/{num_steps}]\t Train Loss: {loss.item():.4f}")
            uu.csv_logger(
                logfile = f"../logs/{run_name}_train.csv",
                content = {"epoch": epoch, "step": step, "loss": loss.item()},
                first = (epoch == 0 and step == 0),
                overwrite = (epoch == 0 and step == 0)
                    )
            loss.backward()
            optimizer.step()

        # Go to eval mode
        ds.set_mode("val")
        model.eval()

        # Validation loop
        val_accuracy, avg_val_loss = evaluate_classifier_model(model = model, dataloader = dl, device = device)
        print(f"Epoch [{epoch+1}/{epochs}]\t Val Loss: {avg_val_loss:.4f}\t Val Accuracy: {val_accuracy:.4f}")
        uu.csv_logger(
            logfile = f"../logs/{run_name}_val.csv",
            content = {"epoch": epoch, "val_loss": avg_val_loss, "val_accuracy": val_accuracy},
            first = (epoch == 0),
            overwrite = (epoch == 0)
                )

        if time_me is True:
            uu.csv_logger(
                logfile = f"../logs/{run_name}_runtime.csv",
                content = {"epoch": epoch, "time": time.time()-e_start},
                first = (epoch == 0 and step == 0),
                overwrite = (epoch == 0 and step == 0)
                    )
            print(f"Epoch nr {epoch+1} time: {time.time()-e_start:.4f}s")


    # Finally, test time
    ds.set_mode("test")
    model.eval()

    test_accuracy, avg_test_loss = evaluate_classifier_model(model = model, dataloader = dl, device = device)
    print(f"Epoch [{epoch+1}/{epochs}]\t Test Loss: {avg_test_loss:.4f}\t Test Accuracy: {test_accuracy:.4f}")
    uu.csv_logger(
        logfile = f"../logs/{run_name}_test.csv",
        content = {"epoch": epoch, "test_loss": avg_test_loss, "test_accuracy": test_accuracy},
        first = True,
        overwrite = True
            )


training_loop(
    epochs = epochs,
    optimizer = optimizer,
    model = model,
    criterion = criterion,
    ds = ds, # dataset
    dl = dl, # dataloader
    batch_size = batch_size,
    run_name = run_name,
    cache_me = cache_me,
    device = device,
    wantToPrint = 1, # boolean if want to print GPU and CPU time
    mod_step = 500,  # train error will be printed each steps mod this number
    time_me = True)

In [None]:
# if you want to analyse the logs, it is comfortably to use pandas dataframes for it

from os.path import exists
import pandas as pd
# creates a pandas dataframe if file exist
def create_df_if_exist(path):
    if exists(path):
        return pd.read_csv(path, sep=',')
    else:
        print('No file exist', path)
        return None

# creates 4 dataframes, with validate, train, test and runtime logs.
def create_dfs_filename(path):
    path_val = path + '_val.csv'
    path_train =  path + '_train.csv'
    path_test = path + '_test.csv'
    path_runtime = path + '_runtime.csv'

    return create_df_if_exist(path_val), create_df_if_exist(path_train), create_df_if_exist(path_test), create_df_if_exist(path_runtime)


logs_folder = '/home/coder/Course_Materials/logs/'
filename = 'AlexNet'
df_AlexNet_val, df_AlexNet_train, df_AlexNet_test, df_AlexNet_runtime = create_dfs_filename(logs_folder+filename)

In [None]:
# a simple function to plot an image from torch.tensor
# we need a utility function to switch dimensions from [channel*Height*Width] to [Height*Width*Channel] and convert tensor to numpy array
def plotImg(tensor_image_in, cmap = 'bone'):
    tensor_image = tensor_image_in.to('cpu')
    array_image = uu.convert_tensor_to_opencv_array(tensor_image)
    plt.figure()
    plt.imshow(array_image, cmap=cmap)
    plt.xlim((0, 256))
    plt.ylim((0, 256))
    plt.title("Image")
    plt.show