In [67]:
class IoULoss(nn.Module) :
    def __init__(self, method = 'IoU') :
        super().__init__()
        self.method = method

    def forward(self, inp, target) :
        '''
        input : (B, # of bboxes, 6)
        '''
        inp_w = inp[..., 2:3]
        inp_h = inp[..., 3:4]
        target_w = target[..., 2:3]
        target_h = target[..., 3:4]

        inp_area = inp_w * inp_h
        target_area = target_w * target_h

        inp_xmin = inp[..., 0:1] - inp_w / 2
        inp_ymin = inp[..., 1:2] - inp_h / 2
        inp_xmax = inp[..., 0:1] + inp_w / 2
        inp_ymax = inp[..., 1:2] + inp_w / 2

        target_xmin = target[..., 0:1] - target_w / 2
        target_ymin = target[..., 1:2] - target_h / 2
        target_xmax = target[..., 0:1] + target_w / 2
        target_ymax = target[..., 1:2] + target_w / 2
        
        inp_topleft = torch.cat([inp_xmin, inp_ymin], axis = -1)
        target_topleft = torch.cat([target_xmin, target_ymin], axis = -1)

        inp_bottomright = torch.cat([inp_xmax, inp_ymax], axis = -1)
        target_bottomright = torch.cat([target_xmax, target_ymax], axis = -1)

        intersection_top_left = torch.max(inp_topleft, target_topleft)
        intersection_bottom_right = torch.min(inp_bottomright, target_bottomright)



        area_inter = torch.prod(
            torch.clip(intersection_bottom_right - intersection_top_left, min = 0 , max = None), -1).unsqueeze(-1)

        iou = area_inter / (inp_area + target_area - area_inter + 1e-9)

        # GIoU : IoU - |C \ (A U B)| over C. C는 bbox와 GT를 모두 포함하는 최소 크기의 박스.
        C_top_left = torch.min(inp_topleft, target_topleft)
        C_bottom_right = torch.max(inp_bottomright, target_bottomright)
        C_area = torch.prod(C_bottom_right - C_top_left, -1).unsqueeze(-1)

        # DIoU : 중심좌표 반영. 1 - IoU + euclidean(pred_center, gt_center) / (diagonal length of C)**2 . C는 bbox와 GT를 모두 포함하는 최소 크기의 박스.
        euclidean = torch.sqrt(torch.sum((inp[..., 0:2] - target[..., 0:2]) ** 2, dim = -1)).unsqueeze(-1)
        diagonal_length_C = torch.sum((C_bottom_right - C_top_left) ** 2, dim = -1).unsqueeze(-1)

        # CIoU : overlap area, central point distance, aspect ratio 고려. 
        # 1 - IoU + 1 - IoU + euclidean(pred_center, gt_center) / (diagonal length of C)**2 + aspect_ratio_resemblance * alpha
        # aspect_ratio_resemblance = 4 / pi**2 (arctan(w_gt/h_gt) - arctan(w_pred/h_pred)) ** 2. 
        # (4/pi**2) * (arctan(w/h)) range from -0.5 to 0.5
        # alpha = positive trade-off parameter. aspect_ratio_resemblance / (1-IoU) + aspect_ratio_resemblance. IoU가 클수록 aspect_ratio_resemblance의 영향력을 키운다.
        aspect_ratio_resemblance = (4 / torch.pi ** 2) * (torch.atan(target_w / target_h) - torch.atan(inp_w / inp_h)) ** 2
        alpha = aspect_ratio_resemblance / ( (1 - iou) + aspect_ratio_resemblance)

        if self.method == 'IoU' : 
            return 1 - iou
        elif self.method == 'GIoU' :
            return 1 - (iou - (C_area - (inp_area + target_area - area_inter)) / C_area)
        elif self.method == 'DIoU' :
            return 1 - iou + (euclidean / diagonal_length_C)
        elif self.method == 'CIoU' :
            return 1 - iou + (euclidean / diagonal_length_C) + alpha * aspect_ratio_resemblance

In [70]:
# test case 1. IoU 0.36
# test case 2. IoU 1.
# test case 3. IoU 0
# test case 4. IoU 0
pred = torch.tensor([[0.5, 0.5, 0.5, 0.5],
                     [0.5, 0.5, 0.3, 0.3],
                     [0.1, 0.3, 0.2, 0.2],
                     [0.1, 0.3, 0.2, 0.2]
                ]).unsqueeze(0) # for batch

gt = torch.tensor([[0.7, 0.7, 0.4, 0.4],
                   [0.5, 0.5, 0.3, 0.3],
                   [0.6, 0.3, 0.2, 0.2],
                   [0.3, 0.3, 0.2, 0.2]
                ]).unsqueeze(0) # for batch

pred = torch.randn(16, 3, 13, 13, 4)
gt = torch.randn(16, 3, 13, 13, 4)


In [71]:
iouloss = IoULoss(method = 'CIoU')

In [73]:
iouloss(pred, gt).shape

torch.Size([16, 3, 13, 13, 1])

In [47]:
0.0625 / (0.25 + 0.16 - 0.0625)

0.17985611510791366

In [54]:
torch.pi

3.141592653589793

In [62]:
torch.atan(torch.tensor([1/6 * torch.pi]))

tensor([0.4823])

In [59]:
2 **(1/2) / 2

0.7071067811865476

In [79]:
# k = {1, 5, 9, 13}
# dilated convolution
# kernel size 3: dilated ratio equals to k, max-pooling of stride 1, 
k = 5
dilated_conv = torch.nn.Conv2d(3, 64, kernel_size = 3, stride = 1, padding = 1, dilation= k)

batch_size = 4

inp = torch.randn(batch_size, 3, 256, 256)

dilated_conv(inp).shape

torch.Size([4, 64, 248, 248])

In [83]:
bn = nn.BatchNorm2d(32)
samp = torch.randn((1, 32, 64, 64))

bn(samp).shape

torch.Size([1, 32, 64, 64])

In [84]:
bn.eps

1e-05

In [85]:
class ChannelPool(nn.Module):
    def forward(self, x):
        return torch.cat( (torch.max(x,1)[0].unsqueeze(1), torch.mean(x,1).unsqueeze(1)), dim=1 )


In [3]:
import torch
import torch.nn as nn
import numpy as np

In [24]:
baseblock = BaseBlock(3, 64, 3, 1, 1)
baseblock(torch.randn(1,3,24,24)).shape

torch.Size([1, 64, 24, 24])

In [30]:
class BaseBlock(nn.Module) :
    def __init__(self, in_channels, out_channels, kernel_size, stride = 1, padding = 0, act_fn = 'mish') :
        super().__init__()
        if act_fn.lower() == 'mish' :
            activation = nn.Mish()
        elif act_fn.lower() == 'leakyrelu' :
            activation = nn.LeakyReLU()
        elif act_fn.lower() == 'relu' :
            activation = nn.ReLU()
        else :
            raise ValueError(f'{act_fn} activation function is not covered. add on DarkNetBottleneck module.')

        self.activation = activation
        self.conv = nn.Sequential(
                        nn.Conv2d(in_channels, out_channels, kernel_size = kernel_size, stride = stride, padding = padding),
                        nn.BatchNorm2d(out_channels),
                        self.activation)
                        
    def forward(self, x) :
        return self.conv(x)
        

class DarkNetBottleneck(nn.Module) :
    def __init__(self, in_channels, out_channels, expansion = 2, act_fn = 'mish') :
        super().__init__()

        mid_channels = int(out_channels / expansion)
        self.conv1 = BaseBlock(in_channels, mid_channels, act_fn = act_fn, kernel_size = 1, stride = 1, padding = 0)
        self.conv2 = BaseBlock(mid_channels, out_channels, act_fn = act_fn, kernel_size = 3, stride = 1, padding = 1)

    def forward(self, x) :
        residual = x
        output = self.conv1(x)
        output = self.conv2(output)
        output += residual

        return output

In [31]:
class CSPStage(nn.Module) : 
    def __init__(self, in_channels, mid_channels, out_channels, block_fn, expansion, act_fn, num_blocks) :
        '''
        input x will be channel-wise splited into part1 and part2.
        During CSP, only part2 will go through block_fn.
        downsampling layer before CSP, and transition after concatenation.
        input x : (B, C, H, W)
        part1&part2 : (B, C // 2, H, W)
        C should be divisible by 2.

        in_channels : input channel of downsample layer. downsample layer reduce the feature size by 2
        mid_channels : output channel of downsample layer and input channel of cspblock
        block_fn : block function that will be applied on part2. For Darknet53, we are going to use DarkNetBottleneck.
        expansion : expansion of block_fn. e.g) expansion=2, C_in -> C_out//2 -> C_out. C means channel.
        act_fn : activation function. For DarkNet53, we are going to use mish.
        num_blocks : number of iterations of block_fn
        '''
        super().__init__()
        self.downsample = BaseBlock(in_channels, mid_channels, kernel_size = 3, stride = 2, padding = 1)
        self.cspblock = nn.Sequential()

        block_channels = mid_channels // 2 # input_channel for part2

        for i in range(num_blocks) :

            block = block_fn(in_channels = block_channels, 
                                             out_channels = block_channels,
                                             expansion = expansion,
                                             act_fn = act_fn
                                             ) # this only covers DarkNetBottleneck module.

            self.cspblock.add_module(f'partial_block_{i+1}', block )
            
        self.after_cspblock = BaseBlock(in_channels = block_channels, 
                                        out_channels = block_channels,
                                        kernel_size = 1,
                                        stride = 1,
                                        padding = 0,
                                        )
        
        self.transition = BaseBlock(in_channels = 2 * block_channels, 
                                        out_channels = out_channels,
                                        kernel_size = 1,
                                        stride = 1,
                                        padding = 0,
                                        )


    def forward(self, x) :
        x = self.downsample(x)
        split = x.shape[1] // 2
        part1, part2 = x[:, :split], x[:, split:]

        part2 = self.cspblock(part2)
        part2 = self.after_cspblock(part2).contiguous()

        output = torch.cat([part1, part2], dim = 1)
        output = self.transition(output)

        return output


In [35]:
modules = nn.ModuleList()

modules.add_module('abc', nn.Conv2d(3,6,3,1,1,1))

modules

Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

In [36]:
class DarkNet53(nn.Module) :
    '''
    initial layer : conv(3,3,32)/1, mish

    in_channels  : [3, 32,  64,  64, 128,  256]
    mid_channels : [64, 128, 256, 512, 1024]
    out_channels : [64,  64, 128, 256,  512]

    num_blocks of cspstage : [1,2,8,8,4]
    '''
    def __init__(self, act_fn, block_fn, expansion, in_channels_list = [], mid_channels_list = [], out_channels_list = [], num_blocks_list = []) :
        super().__init__()
        
        self.input_layer = BaseBlock(in_channels_list[0], in_channels_list[1], kernel_size = 3, stride = 1, padding = 0)
        
        self.modules = nn.Modulelist()
        for i, num_blocks in enumerate(num_blocks_list) :
            
            cspstage = CSPStage(in_channels = in_channels_list[i], 
                            mid_channels = mid_channels_list[i], 
                            out_channels = out_channels_list[i], 
                            block_fn = block_fn, 
                            expansion = expansion, 
                            act_fn = act_fn, num_blocks = num_blocks)
            self.modules.add_module(f'CSPStage_{i+1}', cspstage)

    def forward(self, x) :
        output = self.input_layer(x)
        for stage in self.modules :
            output = stage(output)
        return output


            

                                            

In [None]:
in_channels_list  = [3, 32,  64,  64, 128,  256]
mid_channels_list = [64, 128, 256, 512, 1024]
out_channels_list = [64,  64, 128, 256,  512]


model = DarkNet53(act_fn = 'mish', block_fn = DarkNetBottleneck, expansion = 2, 
                    in_channels_list = in_channels_list,
                    mid_channels_list = mid_channels_list,
                    out_channels_list = out_channels_list
                    )

model(torch.randn((1,3,)))

In [1]:
# DenseNet BottleNeck
class BottleNeck(nn.Module):
    def __init__(self, in_channels, growth_rate):
        super().__init__()
        inner_channels = 4 * growth_rate

        self.residual = nn.Sequential(
            nn.BatchNorm2d(in_channels),
            nn.ReLU(),
            nn.Conv2d(in_channels, inner_channels, 1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(inner_channels),
            nn.ReLU(),
            nn.Conv2d(inner_channels, growth_rate, 3, stride=1, padding=1, bias=False)
        )

        self.shortcut = nn.Sequential()

    def forward(self, x):
        return torch.cat([self.shortcut(x), self.residual(x)], 1)


bottleneck = BottleNeck(in_channels = 3 , growth_rate= 12)

bottleneck(torch.randn(1, 3, 224, 224)).shape

NameError: name 'nn' is not defined

In [96]:
# Transition Block: reduce feature map size and number of channels
class Transition(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()

        self.down_sample = nn.Sequential(
            nn.BatchNorm2d(in_channels),
            nn.ReLU(),
            nn.Conv2d(in_channels, out_channels, 1, stride=1, padding=0, bias=False),
            nn.AvgPool2d(2, stride=2)
        )

    def forward(self, x):
        return self.down_sample(x)

transition = Transition(3, 15)

transition(torch.randn(1, 3, 224, 224)).shape

torch.Size([1, 15, 112, 112])

In [97]:
# DenseNet BottleNeck
class BottleNeck(nn.Module):
    def __init__(self, in_channels, growth_rate):
        super().__init__()
        inner_channels = 4 * growth_rate

        self.residual = nn.Sequential(
            nn.BatchNorm2d(in_channels),
            nn.ReLU(),
            nn.Conv2d(in_channels, inner_channels, 1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(inner_channels),
            nn.ReLU(),
            nn.Conv2d(inner_channels, growth_rate, 3, stride=1, padding=1, bias=False)
        )

        self.shortcut = nn.Sequential()

    def forward(self, x):
        return torch.cat([self.shortcut(x), self.residual(x)], 1)

# Transition Block: reduce feature map size and number of channels
class Transition(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()

        self.down_sample = nn.Sequential(
            nn.BatchNorm2d(in_channels),
            nn.ReLU(),
            nn.Conv2d(in_channels, out_channels, 1, stride=1, padding=0, bias=False),
            nn.AvgPool2d(2, stride=2)
        )

    def forward(self, x):
        return self.down_sample(x)

# DenseNet
class DenseNet(nn.Module):
    def __init__(self, nblocks, growth_rate=12, reduction=0.5, num_classes=10, init_weights=True):
        super().__init__()

        self.growth_rate = growth_rate
        inner_channels = 2 * growth_rate # output channels of conv1 before entering Dense Block

        self.conv1 = nn.Sequential(
            nn.Conv2d(3, inner_channels, 7, stride=2, padding=3),
            nn.MaxPool2d(3, 2, padding=1)
        )

        self.features = nn.Sequential()

        for i in range(len(nblocks)-1):
            self.features.add_module('dense_block_{}'.format(i), self._make_dense_block(nblocks[i], inner_channels))
            inner_channels += growth_rate * nblocks[i]
            out_channels = int(reduction * inner_channels)
            self.features.add_module('transition_layer_{}'.format(i), Transition(inner_channels, out_channels))
            inner_channels = out_channels 
        
        self.features.add_module('dense_block_{}'.format(len(nblocks)-1), self._make_dense_block(nblocks[len(nblocks)-1], inner_channels))
        inner_channels += growth_rate * nblocks[len(nblocks)-1]
        self.features.add_module('bn', nn.BatchNorm2d(inner_channels))
        self.features.add_module('relu', nn.ReLU())

        self.avg_pool = nn.AdaptiveAvgPool2d((1,1))
        self.linear = nn.Linear(inner_channels, num_classes)

        # weight initialization
        if init_weights:
            self._initialize_weights()
    
    def forward(self, x):
        x = self.conv1(x)
        print(x.shape)
        x = self.features(x)
        print(x.shape)
        x = self.avg_pool(x)
        print(x.shape)
        x = x.view(x.size(0), -1)
        x = self.linear(x)
        return x

    def _make_dense_block(self, nblock, inner_channels):
        dense_block = nn.Sequential()
        for i in range(nblock):
            dense_block.add_module('bottle_neck_layer_{}'.format(i), BottleNeck(inner_channels, self.growth_rate))
            inner_channels += self.growth_rate
        return dense_block

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

def DenseNet_121():
    return DenseNet([6, 12, 24, 6])

In [98]:
model = DenseNet_121()

In [99]:
model(torch.randn(1,3, 224,224))

torch.Size([1, 24, 56, 56])
torch.Size([1, 264, 7, 7])
torch.Size([1, 264, 1, 1])


tensor([[ 0.0327, -0.0391,  0.0829, -0.0473, -0.0329, -0.1048,  0.0238, -0.0491,
          0.0917,  0.0419]], grad_fn=<AddmmBackward0>)