# Creating YOLOv3 Model

In [11]:
#Feature Extractor
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv2D, ZeroPadding2D, LeakyReLU, BatchNormalization

IMG_SHAPE = 416
CHANNELS = 3
NUM_CLASS = 2
STRIDES = np.array([16, 32, 64])
YOLO_ANCHORS = [[[10,  13], [16,   30], [33,   23]],
                [[30,  61], [62,   45], [59,  119]],
                [[116, 90], [156, 198], [373, 326]]]
ANCHORS = (np.array(YOLO_ANCHORS).T/STRIDES).T
    
def convolution(input_data, filters, size, downsample, bn, activation):
    if downsample:
        input_data = ZeroPadding2D(((1, 0), (1, 0)))(input_data)
        padding = 'valid'
        strides = 2
    else:
        strides = 1
        padding = 'same'
        
    conv = Conv2D(filters, size, padding = padding, strides = strides)(input_data)
    
    if bn:
        conv = BatchNormalization()(conv)
    if activation == True:
        conv = LeakyReLU(alpha=0.1)(conv)
        
    return conv

def residual_block(input_data, filter1, size1, filter2, size2):
    short_cut = input_data
    
    conv = convolution(input_data, filter1, size1, False, True, True)
    conv = convolution(conv, filter2, size2, False, True, True)
    
    residual_output = short_cut + conv
    
    return residual_output

def upsample(input_layer):
    return tf.image.resize(input_layer, (input_layer.shape[1] * 2, input_layer.shape[2] * 2), method='nearest')

def darknet53(input_data):
    input_data = convolution(input_data, 32, (3,3), False, True, True)
    input_data = convolution(input_data, 64, (3,3), True, True, True)
    
    for i in range(1):
        input_data = residual_block(input_data, 32, (1,1), 64, (3,3))
        
    input_data = convolution(input_data, 128, (3,3), True, True, True)
    
    for i in range(2):
        input_data = residual_block(input_data, 64, (1,1), 128, (3,3))
 
    input_data = convolution(input_data, 256, (3,3), True, True, True)
    
    for i in range(8):
        input_data = residual_block(input_data, 128, (1,1), 256, (3,3))
     
    output1 = input_data
    input_data = convolution(input_data, 512, (3,3), True, True, True)
    
    for i in range(8):
        input_data = residual_block(input_data, 256, (1,1), 512, (3,3))
     
    output2 = input_data
    input_data = convolution(input_data, 1024, (3,3), True, True, True)

    for i in range(4):
        input_data = residual_block(input_data, 512, (1,1), 1024, (3,3))
     
    output3 = input_data
    
    return output1, output2, output3

def create_yolo(input_layer):
    #input_layer = Input([IMG_SHAPE, IMG_SHAPE, CHANNELS])
    output1, output2, output3 = darknet53(input_layer)

    conv = convolution(output3, 512, (1, 1), False, True, True)
    conv = convolution(conv, 1024, (3, 3), False, True, True)
    conv = convolution(conv, 512, (1, 1), False, True, True)
    conv = convolution(conv, 1024, (3, 3), False, True, True)
    conv = convolution(conv, 512, (1, 1), False, True, True)

    conv_lobj_branch = convolution(conv, 1024, (1,1),  False, True, True)
    conv_lbox = convolution(conv_lobj_branch, 3*(NUM_CLASS+5), (1,1), False, False, False)

    conv = convolution(conv, 256, (1,1), False, True, True)
    conv = upsample(conv)

    conv = tf.concat([conv, output2], axis=-1)
    conv = convolution(conv, 256, (1,1), False, True, True)
    conv = convolution(conv, 512, (3,3), False, True, True)
    conv = convolution(conv, 256, (1,1), False, True, True)
    conv = convolution(conv, 512, (3,3), False, True, True)
    conv = convolution(conv, 256, (1,1), False, True, True)

    conv_mobj_branch = convolution(conv, 512, (3,3), False, True, True)
    conv_mbox = convolution(conv_mobj_branch, 3*(NUM_CLASS+5), (1,1), False, False, False)

    conv = convolution(conv, 128, (1,1), False, True, True)
    conv = upsample(conv)

    conv = tf.concat([conv, output1], axis = -1)
    conv = convolution(conv, 128, (1,1), False, True, True)
    conv = convolution(conv, 256, (3,3), False, True, True)
    conv = convolution(conv, 128, (1,1), False, True, True)
    conv = convolution(conv, 256, (3,3), False, True, True)
    conv = convolution(conv, 128, (1,1), False, True, True)

    conv_sobj_branch = convolution(conv, 256, (1,1), False, True, True)
    conv_sbox = convolution(conv_sobj_branch, 3*(NUM_CLASS+5), (1,1), False, False, False)

    return [conv_sbox, conv_mbox, conv_lbox]

def decode(conv_output, NUM_CLASS, i=0):
    # where i = 0, 1 or 2 to correspond to the three grid scales  
    conv_shape       = tf.shape(conv_output)
    batch_size       = conv_shape[0]
    output_size      = conv_shape[1]

    conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))

    #conv_raw_dxdy = conv_output[:, :, :, :, 0:2] # offset of center position     
    #conv_raw_dwdh = conv_output[:, :, :, :, 2:4] # Prediction box length and width offset
    #conv_raw_conf = conv_output[:, :, :, :, 4:5] # confidence of the prediction box
    #conv_raw_prob = conv_output[:, :, :, :, 5: ] # category probability of the prediction box
    conv_raw_dxdy, conv_raw_dwdh, conv_raw_conf, conv_raw_prob = tf.split(conv_output, (2, 2, 1, NUM_CLASS), axis=-1)

    # next need Draw the grid. Where output_size is equal to 13, 26 or 52  
    #y = tf.range(output_size, dtype=tf.int32)
    #y = tf.expand_dims(y, -1)
    #y = tf.tile(y, [1, output_size])
    #x = tf.range(output_size,dtype=tf.int32)
    #x = tf.expand_dims(x, 0)
    #x = tf.tile(x, [output_size, 1])
    xy_grid = tf.meshgrid(tf.range(output_size), tf.range(output_size))
    xy_grid = tf.expand_dims(tf.stack(xy_grid, axis=-1), axis=2)  # [gx, gy, 1, 2]
    xy_grid = tf.tile(tf.expand_dims(xy_grid, axis=0), [batch_size, 1, 1, 3, 1])
    xy_grid = tf.cast(xy_grid, tf.float32)
    
    #xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]], axis=-1)
    #xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :], [batch_size, 1, 1, 3, 1])
    #y_grid = tf.cast(xy_grid, tf.float32)

    # Calculate the center position of the prediction box:
    pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * STRIDES[i]
    # Calculate the length and width of the prediction box:
    pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i]) * STRIDES[i]

    pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1)
    pred_conf = tf.sigmoid(conv_raw_conf) # object box calculates the predicted confidence
    pred_prob = tf.sigmoid(conv_raw_prob) # calculating the predicted probability category box object

    # calculating the predicted probability category box object
    return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1)


#main
input_layer = Input([IMG_SHAPE, IMG_SHAPE, CHANNELS])
conv_tensors = create_yolo(input_layer)
output_tensors = []
for i, conv_tensor in enumerate(conv_tensors):
    pred_tensor = decode(conv_tensor, NUM_CLASS, i)
    output_tensors.append(pred_tensor)
    
Yolo_model = tf.keras.Model(input_layer, output_tensors)

In [12]:
Yolo_model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            [(None, 416, 416, 3) 0                                            
__________________________________________________________________________________________________
conv2d_410 (Conv2D)             (None, 416, 416, 32) 896         input_7[0][0]                    
__________________________________________________________________________________________________
batch_normalization_398 (BatchN (None, 416, 416, 32) 128         conv2d_410[0][0]                 
__________________________________________________________________________________________________
leaky_re_lu_398 (LeakyReLU)     (None, 416, 416, 32) 0           batch_normalization_398[0][0]    
_______________________________________________________________________________________

tf_op_layer_AddV2_169 (TensorFl [(None, None, None,  0           tf_op_layer_Sigmoid_20[0][0]     
                                                                 tf_op_layer_Cast_8[0][0]         
__________________________________________________________________________________________________
tf_op_layer_Mul_38 (TensorFlowO [(None, None, None,  0           tf_op_layer_Exp_7[0][0]          
__________________________________________________________________________________________________
tf_op_layer_Mul_27 (TensorFlowO [(None, None, None,  0           tf_op_layer_AddV2_167[0][0]      
__________________________________________________________________________________________________
tf_op_layer_Mul_29 (TensorFlowO [(None, None, None,  0           tf_op_layer_Mul_28[0][0]         
__________________________________________________________________________________________________
tf_op_layer_Mul_32 (TensorFlowO [(None, None, None,  0           tf_op_layer_AddV2_168[0][0]      
__________