In [None]:
# !pip install torchsummary

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

import os
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# torch에서 몇몇 모델들을 기본적으로 제공해주긴 합니다.

In [None]:
from torchvision.models import alexnet
from torchvision.models import vgg16
from torchvision.models import googlenet
from torchvision.models import resnet18
from torchvision.models import densenet121

In [None]:
if torch.cuda.is_available():
    device=torch.device('cuda:0')
else:
    device = torch.device('cpu')
print(device)

## 그래도 한번씩 모델을 직접 짜보도록 하죠.

## AlexNet

<img src="img/AlexNet.png" width="600px" height="400px"></img><br/>

<img src="img/alexnet(2).png" width="600px" height="800px"></img><br/>

<span style = 'font-size:1.4em;line-height:1.5em'>Input Size가 224\*224\*3이 아닌 227\*227\*3으로 변경되었습니다. 관련 자료는 아래 link를 참조하세요</span>

https://datascience.stackexchange.com/questions/29245/what-is-the-input-size-of-alex-net

In [None]:
class MyAlexNet(nn.Module):
    def __init__(self, num_classes=1000, dropout=0.5):
        super(MyAlexNet, self).__init__()
        
        self.conv1 = nn.Sequential(nn.Conv2d(3, 96, kernel_size=11, stride=4),
                                   nn.ReLU(inplace=True), 
                                   nn.MaxPool2d(kernel_size=3, stride=2))
        self.conv2 = nn.Sequential(nn.Conv2d(96,256,kernel_size=5, padding=2), 
                                   nn.ReLU(inplace=True), 
                                   nn.MaxPool2d(kernel_size=3,stride=2))
        self.conv3 = nn.Sequential(nn.Conv2d(256, 384, kernel_size=3, padding=1), 
                                   nn.ReLU(inplace=True))
        self.conv4 = nn.Sequential(nn.Conv2d(384, 384, kernel_size=3, padding=1), 
                                   nn.ReLU(inplace=True))
        self.conv5 = nn.Sequential(nn.Conv2d(384, 256, kernel_size=3, padding=1), 
                                   nn.ReLU(inplace=True), 
                                   nn.MaxPool2d(kernel_size=3, stride=2))
        self.fc = nn.Sequential(nn.Dropout(p=dropout),
                                nn.Linear(6*6*256, 4096), 
                                nn.ReLU(inplace=True), 
                                nn.Dropout(p=dropout), 
                                nn.Linear(4096,4096), 
                                nn.ReLU(inplace=True), 
                                nn.Linear(4096, num_classes))
        self.apply(self._init_weights)
        
    def _init_weights(self, submodule):
        if isinstance(submodule, nn.Conv2d):
            nn.init.xavier_normal_(submodule.weight)
            if submodule.bias is not None:
                submodule.bias.data.fill_(0)
        if isinstance(submodule, nn.Linear): # submodule이 nn.Linear에서 생성된 객체(혹은 인스턴스이면)
            nn.init.kaiming_normal_(submodule.weight) #해당 submodule의 weight는 He Initialization으로 초기화
            if submodule.bias is not None:
                submodule.bias.data.fill_(0) # 해당 submodule의 bias는 0으로 초기화
                
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = torch.flatten(x,1)
        x = self.fc(x)
        out = F.softmax(x, dim=1)
        return out


In [None]:
model = MyAlexNet().to(device)
print(model)

In [None]:
from torchsummary import summary
summary(model, input_size=(3, 227, 227), device=device.type)

# VGG16

<img src="img/vgg16.png" width="600px" height="800px"></img><br/>

<img src="img/VGG16(2).png" width="600px" height="400px"></img><br/>

In [None]:
def make_layer(config):
    layers = []
    in_planes = 3
    for value in config:
        if value == "M":
            layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        else:
            layers.append(nn.Conv2d(in_planes, value, kernel_size=3, padding=1))
            layers.append(nn.ReLU())
            in_planes = value
    return nn.Sequential(*layers)

In [None]:
vgg16_configs = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']

In [None]:
result = make_layer(vgg16_configs)

In [None]:
result.modules

In [None]:
class MyVGG16(nn.Module):
    def __init__(self, num_classes=1000, dropout=0.5, initialize_weight = False):
        super(MyVGG16, self).__init__()
        self.convs = make_layer(vgg16_configs)
        self.fc = nn.Sequential(nn.Linear(512 * 7 * 7, 4096),
                                nn.ReLU(),
                                nn.Dropout(0.5),
                                nn.Linear(4096, 4096),
                                nn.ReLU(),
                                nn.Dropout(0.5),
                                nn.Linear(4096, num_classes))
        if initialize_weight:
            self._init_weights()
    
    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
                
    def forward(self, x):
        x = self.convs(x)
        x = torch.flatten(x,1)
        x = self.fc(x)
        out = F.softmax(x, dim=1)
        return out


In [None]:
model = MyVGG16().to(device)
print(model)

In [None]:
summary(model, input_size=(3, 227, 227), device=device.type)

## GoogLeNet

<img src="img/GoogleNet.png"></img><br/>

<img src="img/GoogleNet(2).png"></img><br/>

(참고: https://devlee247.com/papers/2022-06-20-googlenet/)

### Step1. Inception Module 구현하기

In [None]:
class BaseConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(BaseConv2d, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
        self.ReLU = nn.ReLU(inplace=True)
        
    def forward(self, x):
        return self.ReLU(self.conv(x))

class InceptionModule(nn.Module):
    def __init__(self, in_channels, ch1x1, ch3x3_red, ch3x3, ch5x5_red, ch5x5, pool):
        super(InceptionModule, self).__init__()
        
        self.conv1x1 = nn.Conv2d(in_channels, ch1x1, kernel_size=1)
        
        self.conv3x3 = nn.Sequential(BaseConv2d(in_channels, ch3x3_red, kernel_size=1),
                                     BaseConv2d(ch3x3_red, ch3x3, kernel_size=3, padding=1))
        
        self.conv5x5 = nn.Sequential(BaseConv2d(in_channels, ch5x5_red, kernel_size=1),
                                     BaseConv2d(ch5x5_red, ch5x5, kernel_size=5, padding=2))
        
        self.pool = nn.Sequential(nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
                                  BaseConv2d(in_channels, pool, kernel_size=1))
        
    def forward(self, x):
        x1 = self.conv1x1(x)
        x2 = self.conv3x3(x)
        x3 = self.conv5x5(x)
        x4 = self.pool(x)
        
        # x1,x2,x3,x4는 각각 (batch_size, n_channel, height, width)로 된 4차원 tensor
        # channel concat --> 1차원 방향으로 concatenate
        return torch.cat([x1, x2, x3, x4], 1)

### step2. Auxiliary Module 구현하기

In [None]:
class AuxModule(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(AuxModule, self).__init__()

        self.avgpool = nn.AdaptiveAvgPool2d((4,4))
        self.conv1 = BaseConv2d(in_channels, 128, kernel_size=1)
        self.fc = nn.Sequential(
            nn.Linear(4*4*128, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout2d(p=0.7),
            nn.Linear(1024, num_classes)
        )

    def forward(self, x):
        x = self.avgpool(x)
        x = self.conv1(x)
        x = torch.flatten(x, 1)
        return self.fc(x)

### Step3. GoogLeNet 구현하기

In [None]:
class MyGoogleNet(nn.Module):
    def __init__(self, num_classes=1000):
        super(MyGoogleNet, self).__init__()
        self.is_training=True
        self.conv1 = nn.Sequential(nn.Conv2d(3,64,kernel_size=7, stride=2, padding=3), 
                                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
                                   nn.LocalResponseNorm(2))
        
        self.conv2 = nn.Sequential(BaseConv2d(64, 64, kernel_size=1),
                                   BaseConv2d(64, 192, kernel_size=3, padding=1),
                                   nn.LocalResponseNorm(2),
                                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
        
        self.inception_3a = InceptionModule(in_channels=192, 
                                            ch1x1=64, 
                                            ch3x3_red=96, 
                                            ch3x3=128, 
                                            ch5x5_red=16, 
                                            ch5x5=32, 
                                            pool=32)
        self.inception_3b = InceptionModule(in_channels=256, 
                                            ch1x1=128, 
                                            ch3x3_red=128, 
                                            ch3x3=192, 
                                            ch5x5_red=32, 
                                            ch5x5=96, 
                                            pool=64)
        self.maxpool_3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.inception_4a = InceptionModule(in_channels=480, 
                                            ch1x1=192, 
                                            ch3x3_red=96, 
                                            ch3x3=208, 
                                            ch5x5_red=16, 
                                            ch5x5=48, 
                                            pool=64)
        self.aux1 = AuxModule(512, num_classes)

        self.inception_4b = InceptionModule(in_channels=512, 
                                            ch1x1=160, 
                                            ch3x3_red=112, 
                                            ch3x3=224, 
                                            ch5x5_red=24, 
                                            ch5x5=64, 
                                            pool=64)
        self.inception_4c = InceptionModule(in_channels=512, 
                                            ch1x1=128, 
                                            ch3x3_red=128, 
                                            ch3x3=256,
                                            ch5x5_red=24,
                                            ch5x5=64,
                                            pool=64)
        self.inception_4d = InceptionModule(in_channels=512,
                                            ch1x1=112,
                                            ch3x3_red=144,
                                            ch3x3=288,
                                            ch5x5_red=32,
                                            ch5x5=64,
                                            pool=64)
        self.aux2 = AuxModule(528, num_classes)

        self.inception_4e = InceptionModule(in_channels=528,
                                            ch1x1=256,
                                            ch3x3_red=160,
                                            ch3x3=320,
                                            ch5x5_red=32,
                                            ch5x5=128,
                                            pool=128)
        self.maxpool_4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.inception_5a = InceptionModule(in_channels=832, 
                                            ch1x1=256, 
                                            ch3x3_red=160,
                                            ch3x3=320,
                                            ch5x5_red=32,
                                            ch5x5=128,
                                            pool=128)
        self.inception_5b = InceptionModule(in_channels=832,
                                            ch1x1=384,
                                            ch3x3_red=192,
                                            ch3x3=384,
                                            ch5x5_red=48,
                                            ch5x5=128,
                                            pool=128)

        # AdaptiveAvgPool explanation
        # https://stackoverflow.com/questions/58692476/what-is-adaptive-average-pooling-and-how-does-it-work
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.dropout = nn.Dropout2d(p=0.4)
        self.fc = nn.Linear(1024, num_classes)
        
        self._init_weights()
    
    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
                
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        
        x = self.inception_3a(x)
        x = self.inception_3b(x)
        x = self.maxpool_3(x)

        x = self.inception_4a(x)
        if self.is_training:
            out1 = self.aux1(x)

        x = self.inception_4b(x)
        x = self.inception_4c(x)
        x = self.inception_4d(x)
        if self.is_training:
            out2 = self.aux2(x)

        x = self.inception_4e(x)
        x = self.maxpool_4(x)

        x = self.inception_5a(x)
        x = self.inception_5b(x)

        x = self.avgpool(x)

        x = torch.flatten(x, 1)
        x = self.dropout(x)

        x = self.fc(x)                
        if self.is_training:
            return [x, out1, out2]
        else:
            return x


In [None]:
model = MyGoogleNet().to(device)
print(model)

In [None]:
summary(model, input_size=(3, 227, 227), device=device.type)

## ResNet (18-layers)

<img src="img/resnet.png"></img><br/>

<img src="img/resnet(2).png"></img><br/>

### 18-layer resnet구현해보기

In [None]:
def conv3x3(in_ch, out_ch, stride=1, groups=1, dilation=1):
    r"""
    3x3 convolution with padding
    - in_planes: in_channels
    - out_channels: out_channels
    - bias=False: BatchNorm에 bias가 포함되어 있으므로, conv2d는 bias=False로 설정.
    """
    return nn.Conv2d(in_ch, out_ch, kernel_size=3, stride=stride,
                     padding=dilation, groups=groups, bias=False, dilation=dilation)

In [None]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
                 base_width=64, dilation=1, norm_layer=None):
        super(BasicBlock, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if groups != 1 or base_width != 64:
            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
        if dilation > 1:
            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = norm_layer(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = norm_layer(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

In [None]:
class MyResNet18(nn.Module):

    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
                 norm_layer=None):
        super(MyResNet18, self).__init__()
        self.inplanes = 64
        self.dilation = 1
        self.groups = groups
        self.base_width = width_per_group
        self.zero_init_residual = zero_init_residual
        
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer
        
        # each element in the tuple indicates if we should replace
        # the 2x2 stride with a dilated convolution instead
        if replace_stride_with_dilation is None:    
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
        
        
        # 구조 정의
        self.conv1   = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1     = norm_layer(self.inplanes)
        self.relu    = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.block2 = self._make_layer(block, 64, layers[0])
        self.block3 = self._make_layer(block, 128, layers[1], stride=2, 
                                       dilate = replace_stride_with_dilation[0])
        self.block4 = self._make_layer(block, 256, layers[2], stride=2, 
                                       dilate=replace_stride_with_dilation[1])
        self.block5 = self._make_layer(block, 512, layers[3], stride=2, 
                                       dilate=replace_stride_with_dilation[2])
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(512*block.expansion, num_classes)
        
    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
        
        if self.zero_init_residual:
            for m in self.modules():
                if isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)
                

    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
        norm_layer = self._norm_layer
        downsample = None
        previous_dilation = self.dilation
        if dilate:
            self.dilation *= stride
            stride = 1
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                norm_layer(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
                            self.base_width, previous_dilation, norm_layer))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups=self.groups,
                                base_width=self.base_width, dilation=self.dilation,
                                norm_layer=norm_layer))

        return nn.Sequential(*layers)

#     def _forward_impl(self, x):
#         # See note [TorchScript super()]
#         x = self.conv1(x)
#         x = self.bn1(x)
#         x = self.relu(x)
#         x = self.maxpool(x)

#         x = self.layer1(x)
#         x = self.layer2(x)
#         x = self.layer3(x)
#         x = self.layer4(x)

#         x = self.avgpool(x)
#         x = torch.flatten(x, 1)
#         x = self.fc(x)

#         return x

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        result = self.fc(x)
        return result
        
#         return self._forward_impl(x)

In [None]:
model = MyResNet18(block=BasicBlock, layers=[2,2,2,2]).to(device)
print(model)

In [None]:
summary(model, input_size=(3, 224, 224), device=device.type)

## DenseNet

<img src="img/DenseNet.png"></img><br/>

<img src="img/DenseNet(2).png"></img><br/>

### (1) BottleNeck 구현

In [None]:
class BottleNeck(nn.Module):
    def __init__(self, in_channels, growth_rate):
        super(BottleNeck, self).__init__()
        inner_channels = 4 * growth_rate

        self.residual = nn.Sequential(nn.BatchNorm2d(in_channels),
                                      nn.ReLU(),
                                      nn.Conv2d(in_channels, inner_channels, 1, stride=1, padding=0, bias=False),
                                      nn.BatchNorm2d(inner_channels),
                                      nn.ReLU(),
                                      nn.Conv2d(inner_channels, growth_rate, 3, stride=1, padding=1, bias=False))

        self.shortcut = nn.Sequential()

    def forward(self, x):
        return torch.cat([self.shortcut(x), self.residual(x)], 1)

### (2) Transition layer구현

In [None]:
class Transition(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(Transition, self).__init__()
        self.down_sample = nn.Sequential(nn.BatchNorm2d(in_channels),
                                         nn.ReLU(),
                                         nn.Conv2d(in_channels, out_channels, 1, stride=1, padding=0, bias=False),
                                         nn.AvgPool2d(2, stride=2))

    def forward(self, x):
        return self.down_sample(x)

### (3) DenseNet구현

In [None]:
class MyDenseNet121(nn.Module):
    def __init__(self, nblocks, growth_rate=12, reduction=0.5, num_classes=10, init_weights=True):
        super().__init__()

        self.growth_rate = growth_rate
        inner_channels = 2 * growth_rate # output channels of conv1 before entering Dense Block

        self.conv1 = nn.Sequential(nn.Conv2d(3, inner_channels, 7, stride=2, padding=3),
                                   nn.MaxPool2d(3, 2, padding=1))

        self.features = nn.Sequential()
        
        
        ## Dense block, transition layer 1~3까지 생성
        for i in range(len(nblocks)-1):
            self.features.add_module('dense_block_{}'.format(i), self._make_dense_block(nblocks[i], inner_channels))
            inner_channels += growth_rate * nblocks[i]
            out_channels = int(reduction * inner_channels) #downsampling out_channel
            self.features.add_module('transition_layer_{}'.format(i), Transition(inner_channels, out_channels))
            inner_channels = out_channels 
        
        ## Dense block 4생성
        self.features.add_module('dense_block_{}'.format(len(nblocks)-1), self._make_dense_block(nblocks[len(nblocks)-1], inner_channels))
        inner_channels += growth_rate * nblocks[len(nblocks)-1]
        self.features.add_module('bn', nn.BatchNorm2d(inner_channels))
        self.features.add_module('relu', nn.ReLU())
        
        ## Global Avg를 AdaptiveAvgPool2d를 사용하여 수행
        self.avg_pool = nn.AdaptiveAvgPool2d((1,1))
        self.linear = nn.Linear(inner_channels, num_classes)

        # weight initialization
        if init_weights:
            self._initialize_weights()
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.features(x)
        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.linear(x)
        return x

    def _make_dense_block(self, nblock, inner_channels):
        dense_block = nn.Sequential()
        for i in range(nblock):
            dense_block.add_module('bottle_neck_layer_{}'.format(i), BottleNeck(inner_channels, self.growth_rate))
            inner_channels += self.growth_rate
        return dense_block

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


In [None]:
model = MyDenseNet121([6,12,24,6]).to(device)
print(model)

In [None]:
summary(model, input_size=(3, 224, 224), device=device.type)