In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style

import torch 
from torch import nn

from torchsummary import summary

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [59]:
class res_block(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, use_1x1_conv=False):
        super(res_block, self).__init__()
        self.use_1x1_conv = use_1x1_conv
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3,
                            stride=stride, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU())
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=3,
                              stride=1, padding=1),
            nn.BatchNorm2d(out_channels))
        
        self.conv3 = None
        if self.use_1x1_conv:
            self.conv3 = nn.Sequential(
                        nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
                                  kernel_size=1, stride=stride))
        
    def forward(self, x):
        op1 = self.conv1(x)
        op1 = self.conv2(op1)
        op2 = x
        if self.use_1x1_conv:
            op2 = self.conv3(op2)
        return nn.ReLU()(op1 + op2)

In [60]:
temp = res_block(3, 64, stride=2, use_1x1_conv=True).to(device)
summary(temp, (3, 64, 64))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,792
       BatchNorm2d-2           [-1, 64, 32, 32]             128
              ReLU-3           [-1, 64, 32, 32]               0
            Conv2d-4           [-1, 64, 32, 32]          36,928
       BatchNorm2d-5           [-1, 64, 32, 32]             128
            Conv2d-6           [-1, 64, 32, 32]             256
Total params: 39,232
Trainable params: 39,232
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.05
Forward/backward pass size (MB): 3.00
Params size (MB): 0.15
Estimated Total Size (MB): 3.20
----------------------------------------------------------------


In [64]:
class complete_block(nn.Module):
    def __init__(self, in_channels ,out_channels, num_blocks, first_block=False):
        super(complete_block, self).__init__()
        blk = []
        for i in range(num_blocks):
            if i == 0 and not first_block:
                blk.append(res_block(in_channels, out_channels, use_1x1_conv=True, stride=2))
            else:
                blk.append(res_block(in_channels, out_channels))
            in_channels = out_channels
        self.conv = nn.Sequential(*blk)
        
    def forward(self, x):
        return self.conv(x)
    

In [65]:
temp = complete_block(64, 64, 2, first_block=True).to(device)
summary(temp, (64, 56, 56))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 56, 56]          36,928
       BatchNorm2d-2           [-1, 64, 56, 56]             128
              ReLU-3           [-1, 64, 56, 56]               0
            Conv2d-4           [-1, 64, 56, 56]          36,928
       BatchNorm2d-5           [-1, 64, 56, 56]             128
         res_block-6           [-1, 64, 56, 56]               0
            Conv2d-7           [-1, 64, 56, 56]          36,928
       BatchNorm2d-8           [-1, 64, 56, 56]             128
              ReLU-9           [-1, 64, 56, 56]               0
           Conv2d-10           [-1, 64, 56, 56]          36,928
      BatchNorm2d-11           [-1, 64, 56, 56]             128
        res_block-12           [-1, 64, 56, 56]               0
Total params: 148,224
Trainable params: 148,224
Non-trainable params: 0
-------------------------------

In [66]:
temp = complete_block(in_channels=64, out_channels=128, num_blocks=2).to(device)
summary(temp, (64, 56, 56))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 128, 28, 28]          73,856
       BatchNorm2d-2          [-1, 128, 28, 28]             256
              ReLU-3          [-1, 128, 28, 28]               0
            Conv2d-4          [-1, 128, 28, 28]         147,584
       BatchNorm2d-5          [-1, 128, 28, 28]             256
            Conv2d-6          [-1, 128, 28, 28]           8,320
         res_block-7          [-1, 128, 28, 28]               0
            Conv2d-8          [-1, 128, 28, 28]         147,584
       BatchNorm2d-9          [-1, 128, 28, 28]             256
             ReLU-10          [-1, 128, 28, 28]               0
           Conv2d-11          [-1, 128, 28, 28]         147,584
      BatchNorm2d-12          [-1, 128, 28, 28]             256
        res_block-13          [-1, 128, 28, 28]               0
Total params: 525,952
Trainable params:

In [67]:
temp = complete_block(in_channels=128, out_channels=256, num_blocks=2).to(device)
summary(temp, (128, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 256, 14, 14]         295,168
       BatchNorm2d-2          [-1, 256, 14, 14]             512
              ReLU-3          [-1, 256, 14, 14]               0
            Conv2d-4          [-1, 256, 14, 14]         590,080
       BatchNorm2d-5          [-1, 256, 14, 14]             512
            Conv2d-6          [-1, 256, 14, 14]          33,024
         res_block-7          [-1, 256, 14, 14]               0
            Conv2d-8          [-1, 256, 14, 14]         590,080
       BatchNorm2d-9          [-1, 256, 14, 14]             512
             ReLU-10          [-1, 256, 14, 14]               0
           Conv2d-11          [-1, 256, 14, 14]         590,080
      BatchNorm2d-12          [-1, 256, 14, 14]             512
        res_block-13          [-1, 256, 14, 14]               0
Total params: 2,100,480
Trainable param

In [68]:
temp = complete_block(in_channels=256, out_channels=512, num_blocks=2).to(device)
summary(temp, (256, 14, 14))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 512, 7, 7]       1,180,160
       BatchNorm2d-2            [-1, 512, 7, 7]           1,024
              ReLU-3            [-1, 512, 7, 7]               0
            Conv2d-4            [-1, 512, 7, 7]       2,359,808
       BatchNorm2d-5            [-1, 512, 7, 7]           1,024
            Conv2d-6            [-1, 512, 7, 7]         131,584
         res_block-7            [-1, 512, 7, 7]               0
            Conv2d-8            [-1, 512, 7, 7]       2,359,808
       BatchNorm2d-9            [-1, 512, 7, 7]           1,024
             ReLU-10            [-1, 512, 7, 7]               0
           Conv2d-11            [-1, 512, 7, 7]       2,359,808
      BatchNorm2d-12            [-1, 512, 7, 7]           1,024
        res_block-13            [-1, 512, 7, 7]               0
Total params: 8,395,264
Trainable param

In [72]:
class resnet(nn.Module):
    def __init__(self, input_shape, num_classes):
        super(resnet, self).__init__()
        in_channels, height, width = input_shape
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=64, kernel_size=(7,7),
                              stride=(2,2), padding=(3,3)),
            nn.BatchNorm2d(64),
            nn.ReLU(), 
            nn.MaxPool2d(kernel_size=(3,3), stride=(2,2), padding=(1,1)))
        self.block1 = complete_block(in_channels=64, out_channels=64, num_blocks=2, first_block=True)
        self.block2 = complete_block(in_channels=64, out_channels=128, num_blocks=2)
        self.block3 = complete_block(in_channels=128, out_channels=256, num_blocks=2)
        self.block4 = complete_block(in_channels=256, out_channels=512, num_blocks=2)
        
        height = height//(32) # due to strides and maxpool
        width = width//(32)
        self.fc = nn.Linear(in_features=512*height*width, out_features=10)
        
    
    def forward(self, x):
        x = self.conv(x)
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = nn.Flatten()(x)
        x = self.fc(x)
        return x

In [73]:
input_shape = (3, 224, 224)
num_classes = 10
model = resnet(input_shape, num_classes).to(device)
summary(model, input_shape)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,472
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]          36,928
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,928
       BatchNorm2d-9           [-1, 64, 56, 56]             128
        res_block-10           [-1, 64, 56, 56]               0
           Conv2d-11           [-1, 64, 56, 56]          36,928
      BatchNorm2d-12           [-1, 64, 56, 56]             128
             ReLU-13           [-1, 64, 56, 56]               0
           Conv2d-14           [-1, 64,