# <font color = 'blue'> One Shot Video Object Segmentation</font>
The Implementation is based on the paper [https://arxiv.org/abs/1611.05198]. For base Network VGG16 is used along with skip connections.Whole implementation is in tensorflow keras.The Dataset used for training purpose is Davis. The Pipeline followed in the notebook is
- Import of module and packages.
- Data Processing and analysis.
- DataGenerator along with Augmentation function.
- Model Creation.
- Training Script.
- testing and finetunning on videos

 ## <font color='blue'>Import of different modules and packages.</font>

In [1]:
import numpy as np
import tensorflow as tf
import cv2
import os
import glob
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow import keras

img_folder_path = "/Users/tangerine/PycharmProjects/dataset/DAVIS2017/Train"
img_annotation_path = "/Users/tangerine/PycharmProjects/dataset/DAVIS2017/Train_Annotated/"
data = "/Users/tangerine/PycharmProjects/dataset/DAVIS2017/"

## <font color='blue'> Data Processing and analysis.</font>

In [2]:
# to do Data Analysis

def data_analysis(img_folder_path,img_annotation_path,visualize = False):
    """
    """
    image_files = []
    annotation_files = []
    cnt = 0
    for filename in glob.iglob(img_folder_path + '/**/*.jpg', recursive=True):
        image_name = filename.split('/')[-2:]
        annotation_file = os.path.join(img_annotation_path,image_name[0],image_name[1][:-4]+'.png') 
        image = cv2.imread(annotation_file)
        image_files.append([filename,annotation_file])
        cnt+=1
        if cnt==100:
            break
        
    if visualize:
        for data in image_files:
            # try:
            image_org = cv2.imread(data[0])
            image_mask = cv2.imread(data[1],0)
            mask = np.where(image_mask>0,255,0).astype('float32')
            cv2.imshow('image',mask)
            cv2.waitKey(100)
            cv2.destroyAllWindows()       
    return image_files
image_data_list = data_analysis(img_folder_path,img_annotation_path)


## <font color='blue'>DataGenerator and Augmentation</font>

In [3]:
class DataGenerator(keras.utils.Sequence):
    def __init__(self,batch_size, input_img_paths, target_img_paths,img_size=(224,224)):
        self.batch_size = batch_size
        self.img_size = img_size
        self.input_img_paths = input_img_paths
        self.target_img_paths = target_img_paths

    def __len__(self):
        return len(self.target_img_paths)//self.batch_size

    def __getitem__(self, idx):
        i = idx * self.batch_size
        batch_inp = self.input_img_paths[i:i+self.batch_size]
        batch_target = self.target_img_paths[i:i+self.batch_size]
        x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype="float32")
        for j, path in enumerate(batch_inp):
            # img = load_img(path, target_size=self.img_size)
            img = cv2.imread(path)
            img = cv2.resize(img,self.img_size)
            x[j] = img
        y = np.zeros((self.batch_size,) + self.img_size + (1,), dtype="uint8")
        for j, path in enumerate(batch_inp):
            # img = load_img(path, target_size=self.img_size, color_mode="grayscale")
            img = cv2.imread(path,0)
            img = cv2.resize(img,self.img_size)
            img = np.where(img>0,1,0).astype('float32')
            y[j] = np.expand_dims(img, 2)
            # y[j] -= 1

        return x, y
    
# creating input image path lis and image_label_path list for data generator 
input_img_paths = []
target_img_paths = []
for val in image_data_list:
    input_img_paths.append(val[0])
    target_img_paths.append(val[1])

data_gen_train = DataGenerator(5,input_img_paths,target_img_paths)

# Testing data genrator
# for item in data_gen_train:
#     print(item.shape)
    

## <font color='blue'>Model creation.</font>

In [4]:
# loss either pixelwise Binary cross entropy or class balancing pixelwise cross entropy

def vgg16Vos(weights='Imagenet',input_shape = None,pooling = None):
    """
    Args:
        weights: either Initialization method available in tensorflow or Imagenet weights
        input_shape: Input shape of Image
        pooling:
    Returns:
    """ 
    vgg_arch =[
        # block1
        [['conv', 64 ],['conv', 64 ],['pool']],
        # block2       
        [['conv', 128],['conv', 128],['pool']],
        #block3
        [['conv', 256],['conv', 256],['conv', 256  ],['pool']],
        #block4
        [['conv', 512],['conv', 512],['conv', 512  ],['pool']],
        #block5
        [['conv', 512],['conv', 512],['conv', 512  ],['pool']],          
    ]
    
    ####. Model Defn ########
    img_input = layers.Input(shape=input_shape)
    block_cnt = 0
    aux_tensor = []
    for block in vgg_arch:
        block_cnt +=1
        lyr_cnt = 0
        for i,lyr in enumerate(block):
            lyr_cnt+=1
            if lyr[0] == 'conv':
                out_ch = lyr[1]
                name = f'block{block_cnt}_conv{lyr_cnt}'
                if lyr_cnt == 1 and block_cnt == 1:                   
                    x = layers.Conv2D(out_ch,(3,3),padding='same',name=name)(img_input)
                else:
                    x = layers.Conv2D(out_ch,(3,3),padding='same',name=name)(x)
            
            elif lyr[0] == 'pool':
                aux_lyr = f'aux_lyr1_{block_cnt}'
                aux_lyr = layers.Conv2D(16,(3,3),padding='same',name=aux_lyr)(x)
                aux_tensor.append(aux_lyr)
                
                name = f'block{block_cnt}_pool'
                x = layers.MaxPooling2D((2, 2), strides=(2, 2), name=name)(x)

    ### Main Output ####
    stage = 'transposed_lyr_'
    tr_lyr2 = layers.Conv2DTranspose(16,(4,4),strides=2,name=name+'1')(aux_tensor[1])
    tr_lyr2 = tr_lyr2[:,:224,:224,:]
    
    tr_lyr3 = layers.Conv2DTranspose(16,(8,8),strides=4,name = name+'2')(aux_tensor[2])
    tr_lyr3 = tr_lyr3[:,:224,:224,:]
    
    tr_lyr4 = layers.Conv2DTranspose(16,(16,16),strides=8,name=name+'3')(aux_tensor[3])
    tr_lyr4 = tr_lyr4[:,:224,:224,:]
    
    tr_lyr5 = layers.Conv2DTranspose(16,(32,32),strides=16,name=name+'4')(aux_tensor[4])
    tr_lyr5 = tr_lyr5[:,:224,:224,:]

    concat = tf.concat([tr_lyr2,tr_lyr3,tr_lyr4,tr_lyr5],axis=3,name='concat_layer')
    
    output = layers.Conv2D(1,(1,1),name='output')(concat)
    
    model = Model(inputs = img_input,outputs=output)
    return model

model = vgg16Vos(None,(224,224,3))
model.summary()
model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
              metrics=['accuracy'])

# model.fit(data_gen_train,epochs=100,verbose=1
         # )



2021-10-14 08:06:28.281293: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 224, 224, 64) 1792        input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv2 (Conv2D)           (None, 224, 224, 64) 36928       block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_pool (MaxPooling2D)      (None, 112, 112, 64) 0           block1_conv2[0][0]               
______________________________________________________________________________________________