# <font color = 'blue'> One Shot Video Object Segmentation</font>
The Implementation is based on the paper [https://arxiv.org/abs/1611.05198]. For base Network VGG16 is used along with skip connections.Whole implementation is in tensorflow keras.The Dataset used for training purpose is Davis. The Pipeline followed in the notebook is
- Import of module and packages.
- Data Processing and analysis.
- DataGenerator along with Augmentation function.
- Model Creation.
- Training Script.
- Fine tunning and Testing on videos

 ## <font color='blue'>Import of different modules and packages.</font>

In [2]:
import numpy as np
import tensorflow as tf
import cv2
import os
import glob
import random
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow import keras

img_folder_path = "/Users/tangerine/PycharmProjects/dataset/DAVIS2017/Train"
img_annotation_path = "/Users/tangerine/PycharmProjects/dataset/DAVIS2017/Train_Annotated/"
data = "/Users/tangerine/PycharmProjects/dataset/DAVIS2017/"
weight_path = "/Users/tangerine/stryker/models/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5"

## <font color='blue'> Data Processing and analysis.</font>

In [3]:
class Data_analysis:
    def __init__(self,img_folder_path,img_annotation_path):
        
        self.img_folder_path = img_folder_path   
        self.img_annotation_path = img_annotation_path
        
        self.img_path_train = []
        self.target_path_train = []
        self.img_path_val = []
        self.target_path_val = []
        
    def __call__(self,visualize=False):
        for roots,dirs,files in os.walk(self.img_folder_path):
            for dir in dirs:
                dir_path_image = os.path.join(roots,dir)
                dir_path_anno  = os.path.join(self.img_annotation_path,dir)
                image_files   = os.listdir(dir_path_image)
                random.shuffle(image_files)
                length  = len(image_files)
                length_train = int(length*0.85)
                for file in image_files[:length_train]:
                    img_path = os.path.join(dir_path_image,file)
                    annotation_path = os.path.join(dir_path_anno,file[:-4]+'.png')
                    self.img_path_train.append(img_path)
                    self.target_path_train.append(annotation_path)
                
                for file in image_files[length_train:]:
                    img_path = os.path.join(dir_path_image,file)
                    annotation_path = os.path.join(dir_path_anno,file[:-4]+'.png')
                    self.img_path_val.append(img_path)
                    self.target_path_val.append(annotation_path)
                    
        if visualize:
            visualization()                          
                    
    def visualization():
        pass
data = data_analysis(img_folder_path,img_annotation_path)
data()

## <font color='blue'>DataGenerator and Augmentation</font>

In [4]:
class DataGenerator(keras.utils.Sequence):
    def __init__(self,batch_size, input_img_paths, target_img_paths,img_size=(300,300)):
        self.batch_size = batch_size
        self.img_size = img_size
        self.input_img_paths = input_img_paths
        self.target_img_paths = target_img_paths

    def __len__(self):
        return len(self.target_img_paths)//self.batch_size

    def __getitem__(self, idx):
        i = idx * self.batch_size
        batch_inp = self.input_img_paths[i:i+self.batch_size]
        batch_target = self.target_img_paths[i:i+self.batch_size]
        x = np.zeros((self.batch_size,) + (self.img_size[1],self.img_size[0]) + (3,), dtype="float32")
        y = np.zeros((self.batch_size,) + (self.img_size[1],self.img_size[0]) + (1,), dtype="uint8")
        
        for j in range(self.batch_size):
            img_path = batch_inp[j]
            mask_path = batch_target[j]
            
            img = cv2.imread(img_path)
            img = cv2.resize(img,self.img_size)
            
            mask = cv2.imread(mask_path,0)
            mask = cv2.resize(mask,self.img_size)
            mask = np.where(mask>0,1,0).astype('float32')
            
            img,mask = self.augment(img,mask)
            x[j] = img
            y[j] = np.expand_dims(mask,2)
                

        return x,y
    
    def augment(self,img,mask):
        flag = np.random.randint(0,5)
        if flag == 0:# horizontal_flip
            img = cv2.flip(img,1)
            mask = cv2.flip(mask,1)
        if flag == 1: # rotation in range of -15 to 15
            height, width = img.shape[:2]
            center = (width/2, height/2)
            rotate_mat = cv2.getRotationMatrix2D(center=center,angle = 20,scale = 1)
            img = cv2.warpAffine(src=img, M=rotate_mat,dsize=(width,height))
            mask = cv2.warpAffine(src=mask, M=rotate_mat,dsize=(width,height))

        return img,mask

data_gen_train = DataGenerator(16,data.img_path_train,data.target_path_train)
dat_gen_test  = DataGenerator(16,data.img_path_val,data.target_path_val)

# for x,y in data_gen_train:
    # print(x.shape,y.shape)

## <font color='blue'>Model creation and Training.</font>

In [5]:
def weighted_pixelwise_cross_entropy(label, output):
    output = tf.nn.sigmoid(output)
    labels_pos = tf.cast(tf.greater(label, 0), tf.float32)
    labels_neg = tf.cast(tf.less(label, 1), tf.float32)

    num_labels_pos = tf.reduce_sum(labels_pos)
    num_labels_neg = tf.reduce_sum(labels_neg)
    num_total = num_labels_pos + num_labels_neg

    loss_pos = tf.reduce_sum(tf.multiply(labels_pos, tf.math.log(output + 0.00001)))
    loss_neg = tf.reduce_sum(tf.multiply(labels_neg, tf.math.log(1 - output + 0.00001)))

    final_loss = -num_labels_neg / num_total * loss_pos - num_labels_pos / num_total * loss_neg

    return final_loss

def VGG16(input_shape,weight='imagenet'):
    """
    Args:
        weights: either Initialization method available in tensorflow or Imagenet weights
        input_shape: Input shape of Image
        pooling:
    Returns:
    """ 

    vgg_arch =[
        # block1
        [['conv', 64 ],['conv', 64 ],['pool']],
        # block2       
        [['conv', 128],['conv', 128],['pool']],
        #block3
        [['conv', 256],['conv', 256],['conv', 256  ],['pool']],
        #block4
        [['conv', 512],['conv', 512],['conv', 512  ],['pool']],
        #block5
        [['conv', 512],['conv', 512],['conv', 512  ],['pool']],          
    ]
    
    img_input = layers.Input(shape=input_shape)
    _,h,w,_ = tf.shape(img_input)
    block_cnt = 0
    aux_tensor = []
    for block in vgg_arch:
        block_cnt +=1
        lyr_cnt = 0
        for i,lyr in enumerate(block):
            lyr_cnt+=1
            if lyr[0] == 'conv':
                out_ch = lyr[1]
                name = f'block{block_cnt}_conv{lyr_cnt}'
                if lyr_cnt == 1 and block_cnt == 1:                   
                    x = layers.Conv2D(out_ch,(3,3),padding='same',activation='relu',name=name)(img_input)
                    x = layers.BatchNormalization()(x)
                else:
                    x = layers.Conv2D(out_ch,(3,3),padding='same',activation= 'relu',name=name)(x)
                    x = layers.BatchNormalization()(x)

            
            elif lyr[0] == 'pool':
                aux_lyr = f'aux_lyr1_{block_cnt}'
                aux_lyr = layers.Conv2D(16,(3,3),padding='same',name=aux_lyr)(x)
                aux_tensor.append(aux_lyr)
                
                name = f'block{block_cnt}_pool'
                x = layers.MaxPooling2D((2, 2), strides=(2, 2), name=name)(x)

    ### Main Output ####
    stage = 'transposed_lyr_'


    tr_lyr2 = layers.Conv2DTranspose(16,(4,4),strides=2,name = stage+'1')(aux_tensor[1])
    tr_lyr2 = tr_lyr2[:,:h,:w,:]
    
    tr_lyr3 = layers.Conv2DTranspose(16,(8,8),strides=4,name = stage+'2')(aux_tensor[2])
    tr_lyr3 = tr_lyr3[:,:h,:w,:]
    
    tr_lyr4 = layers.Conv2DTranspose(16,(16,16),strides=8,name = stage+'3')(aux_tensor[3])
    tr_lyr4 = tr_lyr4[:,:h,:w,:]
    
    tr_lyr5 = layers.Conv2DTranspose(16,(32,32),strides=16,name = stage+'4')(aux_tensor[4])
    tr_lyr5 = tr_lyr5[:,:h,:w,:]

    concat = tf.concat([tr_lyr2,tr_lyr3,tr_lyr4,tr_lyr5],axis=3)
    
    output = layers.Conv2D(1,(1,1))(concat)
    # output = tf.nn.sigmoid(output)
    
    model = Model(inputs = img_input,outputs=output)
    
    if weight=='imagenet':
        model.load_weights(weight_path,by_name=True)
        
    return model

model = VGG16((None,None,3),weight='imagenet')
loss = weighted_pixelwise_cross_entropy
model.compile(optimizer='adam',
              loss= loss,        
              metrics=['accuracy'])
# model.fit(data_gen_train,epochs=100,verbose=1,validation_data=dat_gen_test
         # )



2021-10-16 14:26:45.262644: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


## <font color='blue'> FineTunning and Testing. </font>

In [6]:
def finetunning():
    # to do
    pass

def testing():
    # to do
    pass