<a href="https://colab.research.google.com/github/yoonwanggyu/Self_Study/blob/main/%EC%98%A4%ED%94%84%EB%9D%BC%EC%9D%B8/Yolo_v2_Pytorch_tensorflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Yolo_v2_Pytorch

In [None]:
import torch
import torch.nn as nn

**darknet-19**

In [None]:
class ConvBlock(nn.Module):
    def __init__(self,in_channels,out_channels,kernel_size):
        super(ConvBlock,self).__init__()

        self.conv = nn.Conv2d(in_channels,out_channels,kernel_size,padding=kernel_size // 2)
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.LeakyReLU(0.1)

    def forward(self,x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        return x

class Darknet19(nn.Module):
    def __init__(self,num_classes=1000):
        super(Darknet19,self).__init__()

        self.features = nn.Sequential(
            ConvBlock(3,32,3),
            nn.MaxPool2d(2),

            ConvBlock(32,64,3),
            nn.MaxPool2d(2),

            ConvBlock(64,128,3),
            ConvBlock(128,64,1),
            ConvBlock(64,128,3),
            nn.MaxPool2d(2),

            ConvBlock(128,256,3),
            ConvBlock(256,128,1),
            ConvBlock(128,256,3),
            nn.MaxPool2d(2),

            ConvBlock(256,512,3),
            ConvBlock(512,256,1),
            ConvBlock(256,512,3),
            ConvBlock(512,256,1),
            ConvBlock(256,512,3),
            nn.MaxPool2d(2),

            ConvBlock(512,1024,3),
            ConvBlock(1024,512,1),
            ConvBlock(512,1024,3),
            ConvBlock(1024,512,1),
            ConvBlock(512,1024,3),

            ConvBlock(1024,1000,1)
        )
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(1000,num_classes)

    def forward(self,x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view(x.size(0),-1)
        x = self.fc(x)
        return x

In [None]:
from torchsummary import summary

In [None]:
model = Darknet19(num_classes=1000)
summary(model,input_size = (3,224,224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 224, 224]             896
       BatchNorm2d-2         [-1, 32, 224, 224]              64
         LeakyReLU-3         [-1, 32, 224, 224]               0
         ConvBlock-4         [-1, 32, 224, 224]               0
         MaxPool2d-5         [-1, 32, 112, 112]               0
            Conv2d-6         [-1, 64, 112, 112]          18,496
       BatchNorm2d-7         [-1, 64, 112, 112]             128
         LeakyReLU-8         [-1, 64, 112, 112]               0
         ConvBlock-9         [-1, 64, 112, 112]               0
        MaxPool2d-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 128, 56, 56]          73,856
      BatchNorm2d-12          [-1, 128, 56, 56]             256
        LeakyReLU-13          [-1, 128, 56, 56]               0
        ConvBlock-14          [-1, 128,

**YOLO v2**

In [None]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size):
        super(ConvBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, padding=kernel_size // 2)
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.LeakyReLU(0.1)

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        return x

class Yolo_v2(nn.Module):
    def __init__(self, num_classes=20, bbox_per_cell=5):
        super(Yolo_v2, self).__init__()

        self.features = nn.Sequential(
            ConvBlock(3, 32, 3),
            nn.MaxPool2d(2),

            ConvBlock(32, 64, 3),
            nn.MaxPool2d(2),

            ConvBlock(64, 128, 3),
            ConvBlock(128, 64, 1),
            ConvBlock(64, 128, 3),
            nn.MaxPool2d(2),

            ConvBlock(128, 256, 3),
            ConvBlock(256, 128, 1),
            ConvBlock(128, 256, 3),
            nn.MaxPool2d(2),

            ConvBlock(256, 512, 3),
            ConvBlock(512, 256, 1),
            ConvBlock(256, 512, 3),
            ConvBlock(512, 256, 1),
            ConvBlock(256, 512, 3),
            nn.MaxPool2d(2),

            ConvBlock(512, 1024, 3),
            ConvBlock(1024, 512, 1),
            ConvBlock(512, 1024, 3),
            ConvBlock(1024, 512, 1),
            ConvBlock(512, 1024, 3),
            ConvBlock(1024, 1024, 3),
            ConvBlock(1024, 1024, 3),

            ConvBlock(1024, 3072, 3),
            ConvBlock(3072, 1024, 1)
        )

        self.avgpool = nn.AdaptiveAvgPool2d((13, 13))  # Adjusted to match the output shape

        self.final_conv = ConvBlock(3072, bbox_per_cell * (num_classes + 5), 1)

    def forward(self, x):
        x = self.features(x)

        # Fine Grained Features / 0~12 + 13~25
        x_1 = x[:, :, ::2, ::2]     # method 2
        x_2 = x[:, :, ::2, 1::2]
        x_3 = x[:, :, 1::2, ::2]
        x_4 = x[:, :, 1::2, 1::2]
        fine_grained = torch.cat([x_1, x_2, x_3, x_4], dim=-1)  # batch x 13 x 13 x 2048

        x = nn.MaxPool2d(2)(x)  # Max pooling

        # Block 6
        x = ConvBlock(1024, 1024, 3)(x)  # Add missing conv_block layers
        x = ConvBlock(1024, 1024, 3)(x)

        x = torch.cat([x, fine_grained], dim=-1)  # batch x 13 x 13 x 3072

        # Output
        x = ConvBlock(3072, bbox_per_cell * (num_classes + 5), 1)(x)

        return x

# Test the model
input_shape = (3, 416, 416)  # NCHW format for PyTorch
model = Yolo_v2()
x = torch.randn(1, *input_shape)
output = model(x)
print(output.shape)  # Check the shape of the output



RuntimeError: Sizes of tensors must match except in dimension 3. Expected size 7 but got size 6 for tensor number 2 in the list.

# Yolo_v2_tensorflow

## darknet-19

In [None]:
import tensorflow as tf
from tensorflow.keras import layers

In [None]:
def conv_block(x, filters, kernel_size):
    x = layers.Conv2D(filters, kernel_size, padding='same', activation='linear')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    return x

def darknet19(input_shape=(224, 224, 3), num_classes=1000):
    inputs = tf.keras.Input(shape=input_shape)

    # Block 1
    x = conv_block(inputs, 32, 3)
    x = layers.MaxPooling2D(pool_size=2)(x)

    # Block 2
    x = conv_block(x, 64, 3)
    x = layers.MaxPooling2D(pool_size=2)(x)

    # Block 3
    x = conv_block(x, 128, 3)
    x = conv_block(x, 64, 1)
    x = conv_block(x, 128, 3)
    x = layers.MaxPooling2D(pool_size=2)(x)

    # Block 4
    x = conv_block(x, 256, 3)
    x = conv_block(x, 128, 1)
    x = conv_block(x, 256, 3)
    x = layers.MaxPooling2D(pool_size=2)(x)

    # Block 5
    x = conv_block(x, 512, 3)
    x = conv_block(x, 256, 1)
    x = conv_block(x, 512, 3)
    x = conv_block(x, 256, 1)
    x = conv_block(x, 512, 3)
    x = layers.MaxPooling2D(pool_size=2)(x)

    # Block 6
    x = conv_block(x, 1024, 3)
    x = conv_block(x, 512, 1)
    x = conv_block(x, 1024, 3)
    x = conv_block(x, 512, 1)
    x = conv_block(x, 1024, 3)

    # Additional 1x1 conv Layer
    x = conv_block(x, 1000, 1)

    # Output
    x = layers.GlobalAveragePooling2D()(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

In [None]:
darknet_19 = darknet19()
darknet_19.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 conv2d (Conv2D)             (None, 224, 224, 32)      896       
                                                                 
 batch_normalization (BatchN  (None, 224, 224, 32)     128       
 ormalization)                                                   
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 224, 224, 32)      0         
                                                                 
 max_pooling2d (MaxPooling2D  (None, 112, 112, 32)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 112, 112, 64)      18496 

In [None]:
darknet_19 = darknet19((448, 448, 3))
darknet_19.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 448, 448, 3)]     0         
                                                                 
 conv2d_38 (Conv2D)          (None, 448, 448, 32)      896       
                                                                 
 batch_normalization_38 (Bat  (None, 448, 448, 32)     128       
 chNormalization)                                                
                                                                 
 leaky_re_lu_38 (LeakyReLU)  (None, 448, 448, 32)      0         
                                                                 
 max_pooling2d_10 (MaxPoolin  (None, 224, 224, 32)     0         
 g2D)                                                            
                                                                 
 conv2d_39 (Conv2D)          (None, 224, 224, 64)      1849

## YOLO V2

In [None]:
def conv_block(x, filters, kernel_size):
    x = layers.Conv2D(filters, kernel_size, padding='same', activation='linear')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    return x

def yolo_v2(input_shape=(416, 416, 3), num_class=20, bbox_per_cell=5):
    inputs = tf.keras.Input(shape=input_shape)

    # Block 1
    x = conv_block(inputs, 32, 3)
    x = layers.MaxPooling2D(pool_size=2)(x)

    # Block 2
    x = conv_block(x, 64, 3)
    x = layers.MaxPooling2D(pool_size=2)(x)

    # Block 3
    x = conv_block(x, 128, 3)
    x = conv_block(x, 64, 1)
    x = conv_block(x, 128, 3)
    x = layers.MaxPooling2D(pool_size=2)(x)

    # Block 4
    x = conv_block(x, 256, 3)
    x = conv_block(x, 128, 1)
    x = conv_block(x, 256, 3)
    x = layers.MaxPooling2D(pool_size=2)(x)

    # Block 5
    x = conv_block(x, 512, 3)
    x = conv_block(x, 256, 1)
    x = conv_block(x, 512, 3)
    x = conv_block(x, 256, 1)
    x = conv_block(x, 512, 3) # 26 x 26 x 512

    # Fine Grained Features / 0~12 + 13~25
    ## method 1
    # x_1 = x[:, :13, :13, :] # batch x 13 x 13 x 512
    # x_2 = x[:, :13, 13:, :]
    # x_3 = x[:, 13:, :13, :]
    # x_4 = x[:, 13:, 13:, :]
    ## method 2
    x_1 = x[:, ::2, ::2, :]
    x_2 = x[:, ::2, 1::2, :]
    x_3 = x[:, 1::2, ::2, :]
    x_4 = x[:, 1::2, 1::2, :]
    fine_grained = tf.concat([x_1, x_2, x_3, x_4], axis = -1) # batch x 13 x 13 x 2048

    x = layers.MaxPooling2D(pool_size=2)(x)

    # Block 6
    x = conv_block(x, 1024, 3)
    x = conv_block(x, 512, 1)
    x = conv_block(x, 1024, 3)
    x = conv_block(x, 512, 1)
    x = conv_block(x, 1024, 3)
    x = conv_block(x, 1024, 3) # add
    x = conv_block(x, 1024, 3) # add

    x = tf.concat([x, fine_grained], axis = -1) # batch x 13 x 13 x 3072

    # Output
    x = conv_block(x, 1024, 3)
    outputs = conv_block(x, bbox_per_cell*(num_class+5), 1)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

In [None]:
yolo_v2 = yolo_v2()
yolo_v2.summary()

Model: "model_4"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_5 (InputLayer)           [(None, 416, 416, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d_79 (Conv2D)             (None, 416, 416, 32  896         ['input_5[0][0]']                
                                )                                                                 
                                                                                                  
 batch_normalization_79 (BatchN  (None, 416, 416, 32  128        ['conv2d_79[0][0]']              
 ormalization)                  )                                                           