## DenseNet

- [Paper](https://arxiv.org/pdf/1608.06993.pdf)

![](img/densenet-02.png)

In [23]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary

In [33]:
class BnReluConv(nn.Sequential):
    def __init__(self, nin, nout, kernel_size, stride, padding, bias=False):
        super().__init__()
        self.add_module(
            'batch_norm', 
            nn.BatchNorm2d(nin)
        )
        self.add_module(
            'relu', 
            nn.ReLU(True)
        )
        self.add_module(
            'conv',
            nn.Conv2d(
                in_channels=nin, 
                out_channels=nout, 
                kernel_size=kernel_size, 
                stride=stride, 
                padding=padding, 
                bias=bias
            )
        )
    
    def forward(self, x):
        out = super().forward(x)        
        return out

In [64]:
class BottleneckLayer(nn.Sequential):
    """Bottleneck Layer
    growth_rate : growth_rate of k
    """
    def __init__(self, nin, growth_rate, drop_rate=0.2):
        super().__init__()
        self.add_module(
            'conv_1x1',
            BnReluConv(
                nin=nin, 
                nout=growth_rate*4, 
                kernel_size=1, 
                stride=1, 
                padding=0, 
                bias=False
            )
        )
        self.add_module(
            'conv_3x3', 
            BnReluConv(
                nin=growth_rate*4, 
                nout=growth_rate, 
                kernel_size=3, 
                stride=1, 
                padding=1, 
                bias=False
            )
        )
        self.drop_rate = drop_rate
      
    def forward(self, x):
        output = super().forward(x)
        if self.drop_rate > 0:
            bottleneck_output = F.dropout(
                input=output, 
                p=self.drop_rate, 
                training=self.training
            )
        output = torch.cat((x, output), 1)
      
        return output

In [65]:
class TransitionLayer(nn.Sequential):
    def __init__(self, nin, theta=0.5):
        super().__init__()
        self.add_module(
            'conv_1x1', 
            BnReluConv(
                nin=nin,
                nout=int(nin*theta),
                kernel_size=1,
                stride=1,
                padding=0,
                bias=False
            )
        )
        self.add_module(
            'avg_pool_2x2',
            nn.AvgPool2d(
                kernel_size=2,
                stride=2,
                padding=0
            )
        )

![](img/densenet-01.png)

In [66]:
class DenseBlock(nn.Sequential):
    def __init__(self, nin, num_bottleneck_layers, growth_rate, drop_rate=0.2):
        super().__init__()
        for i in range(num_bottleneck_layers):
            nin_bottleneck_layer = nin + growth_rate * i
            self.add_module(
                f'bottleneck_layer_{i:d}', 
                BottleneckLayer(
                    nin=nin_bottleneck_layer,
                    growth_rate=growth_rate,
                    drop_rate=drop_rate
                )
            )

In [72]:
class DenseNet(nn.Module):
    def __init__(self, growth_rate=12, num_layers=100, theta=0.5, drop_rate=0.2, num_classes=10):
        super().__init__()
        assert (num_layers - 4) % 6 == 0
        
        # (num_layers-4)//6 
        num_bottleneck_layers = (num_layers - 4) // 6
        
        # 32 x 32 x 3 --> 32 x 32 x (growth_rate*2)
        self.init_conv = nn.Conv2d(
            in_channels=3,
            out_channels=growth_rate*2, 
            kernel_size=3, 
            stride=1, 
            padding=1,
            bias=True
        )
        # 32 x 32 x (growth_rate*2) --> 32 x 32 x [(growth_rate*2) + (growth_rate * num_bottleneck_layers)]
        self.dense_block_1 = DenseBlock(
            nin=growth_rate*2, 
            num_bottleneck_layers=num_bottleneck_layers, 
            growth_rate=growth_rate, 
            drop_rate=drop_rate
        )
        # 32 x 32 x [(growth_rate*2) + (growth_rate * num_bottleneck_layers)] --> 16 x 16 x [(growth_rate*2) + (growth_rate * num_bottleneck_layers)]*theta
        nin_transition_layer_1 = (growth_rate*2) + (growth_rate * num_bottleneck_layers) 
        self.transition_layer_1 = TransitionLayer(
            nin=nin_transition_layer_1,
            theta=theta
        )
        # 16 x 16 x nin_transition_layer_1*theta --> 16 x 16 x [nin_transition_layer_1*theta + (growth_rate * num_bottleneck_layers)]
        self.dense_block_2 = DenseBlock(
            nin=int(nin_transition_layer_1*theta), 
            num_bottleneck_layers=num_bottleneck_layers,
            growth_rate=growth_rate, 
            drop_rate=drop_rate
        )
        # 16 x 16 x [nin_transition_layer_1*theta + (growth_rate * num_bottleneck_layers)] --> 8 x 8 x [nin_transition_layer_1*theta + (growth_rate * num_bottleneck_layers)]*theta
        nin_transition_layer_2 = int(nin_transition_layer_1*theta) + (growth_rate * num_bottleneck_layers) 
        self.transition_layer_2 = TransitionLayer(
            nin=nin_transition_layer_2,
            theta=theta
        )
        # 8 x 8 x nin_transition_layer_2*theta --> 8 x 8 x [nin_transition_layer_2*theta + (growth_rate * num_bottleneck_layers)]
        self.dense_block_3 = DenseBlock(
            nin=int(nin_transition_layer_2*theta), 
            num_bottleneck_layers=num_bottleneck_layers,
            growth_rate=growth_rate, 
            drop_rate=drop_rate
        )
        nin_fc_layer = int(nin_transition_layer_2*theta) + (growth_rate * num_bottleneck_layers) 
        
        # [nin_transition_layer_2*theta + (growth_rate * num_bottleneck_layers)] --> num_classes
        self.fc_layer = nn.Linear(nin_fc_layer, num_classes)
        
    def forward(self, x):
        dense_init_output = self.init_conv(x)
        
        dense_block_1_output = self.dense_block_1(dense_init_output)
        transition_layer_1_output = self.transition_layer_1(dense_block_1_output)
        
        dense_block_2_output = self.dense_block_2(transition_layer_1_output)
        transition_layer_2_output = self.transition_layer_2(dense_block_2_output)
        
        dense_block_3_output = self.dense_block_3(transition_layer_2_output)
        
        global_avg_pool_output = F.adaptive_avg_pool2d(dense_block_3_output, (1, 1))                
        global_avg_pool_output_flat = global_avg_pool_output.view(global_avg_pool_output.size(0), -1)

        output = self.fc_layer(global_avg_pool_output_flat)
        
        return output

In [73]:
def densenet_121():
    return DenseNet(
        growth_rate=12,
        num_layers=40, 
        theta=0.5, 
        drop_rate=0.2, 
        num_classes=10
    )

In [74]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
net = densenet_121().to(device)

![](img/densenet-03.png)

In [79]:
summary(net, input_data=(3, 32, 32), verbose=0)

------------------------------------------------------------------------------------------
Layer (type:depth-idx)                   Output Shape              Param #
├─Conv2d: 1-1                            [-1, 24, 32, 32]          672
├─DenseBlock: 1-2                        [-1, 96, 32, 32]          --
|    └─BottleneckLayer: 2-1              [-1, 36, 32, 32]          --
|    |    └─BnReluConv: 3-1              [-1, 48, 32, 32]          1,200
|    |    └─BnReluConv: 3-2              [-1, 12, 32, 32]          5,280
|    └─BottleneckLayer: 2-2              [-1, 48, 32, 32]          --
|    |    └─BnReluConv: 3-3              [-1, 48, 32, 32]          1,800
|    |    └─BnReluConv: 3-4              [-1, 12, 32, 32]          5,280
|    └─BottleneckLayer: 2-3              [-1, 60, 32, 32]          --
|    |    └─BnReluConv: 3-5              [-1, 48, 32, 32]          2,400
|    |    └─BnReluConv: 3-6              [-1, 12, 32, 32]          5,280
|    └─BottleneckLayer: 2-4              [-1,