### Semantic Video (and Image) segmentaion using DeepLab-v3

In [86]:
# import requisite Packages
import numpy as np
import tensorflow as tf
import cv2 as cv
import matplotlib.pyplot as plt

In [87]:
# image-objects which the model is capable of performing segmentation on
LABELS = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
          'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike',
          'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tv']

# dict, represent each label with an integer
encodings = {v: k for k, v in enumerate(LABELS)}

class DeepLabModel(object):
    """Class to load deeplab model and run inference."""

    INPUT_TENSOR_NAME = 'ImageTensor:0'
    OUTPUT_TENSOR_NAME = 'SemanticPredictions:0'
    INPUT_SIZE = 513

    def __init__(self, frozen_graph):
        """Creates and loads pretrained deeplab model."""
        self.graph = tf.Graph()
    
        with tf.io.gfile.GFile(frozen_graph, 'rb') as f:
            graph_def = tf.compat.v1.GraphDef()
            graph_def.ParseFromString(f.read())

        if graph_def is None:
            raise RuntimeError('Cannot find inference graph in tar archive.')

        with self.graph.as_default():
            tf.import_graph_def(graph_def, name='')

        self.sess = tf.compat.v1.Session(graph=self.graph)

    def run(self, image):
        """Runs inference on a single image.

        Args:
          image: A numpy array, raw input image.

        Returns:
          resized_image: RGB image resized from original input image.
          seg_map: Segmentation map of `resized_image`."""
        width, height = image.shape[1::-1]
        resize_ratio = 1.0 * self.INPUT_SIZE / max(width, height)
        target_size = (int(resize_ratio * width), int(resize_ratio * height))
        resized_image = cv.resize(image, target_size, interpolation=cv.INTER_AREA)
        batch_seg_map = self.sess.run(
            self.OUTPUT_TENSOR_NAME,
            feed_dict={self.INPUT_TENSOR_NAME: [np.asarray(resized_image)]})
        seg_map = batch_seg_map[0]
        return resized_image, seg_map

In [88]:
def get_segmented(image, interest_labels_encoded):
    """
    Args:
        image: numpy array, RGB format   

    Returns: 
        resized-segmented-image(nd array)
    """
    resized_im, seg_map = model.run(image)
    mask = np.isin(seg_map, interest_labels_encoded).astype('uint8')
    segmented = cv.bitwise_and(resized_im, resized_im, mask=mask)
    return segmented

def on_image(path, interest_labels):
    '''
    Args:
        path: image path with extension(jpg/png)
        interest_labels: list of labels which you want the model to identify
        
    Returns: 
        Saves the segmented-image to the current directory
    '''
    interest_labels_encoded = [encodings[n] for n in interest_labels]
    image = cv.imread(path)
    image = cv.cvtColor(image, cv.COLOR_BGR2RGB) 
    segmented = get_segmented(image, interest_labels_encoded)
    segmented = cv.cvtColor(segmented, cv.COLOR_RGB2BGR) 
    cv.imwrite('segmented.jpg', segmented)
    print('Done!')
    
def on_video(path, interest_labels, fps=30, codec='XVID'):
    '''
    Args:
        path: video path with extension(mp4, etc)
        interest_labels: list of labels which you want the model to identify
        fps: desired FPS(Frames Per Second)
    
    Saves the segmented-video to the current directory
    '''
    interest_labels_encoded = [encodings[n] for n in interest_labels]
    cap = cv.VideoCapture(path)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        segmented = get_segmented(frame, interest_labels_encoded)
        dimensions = segmented.shape
        break

    cap.release()

    cap = cv.VideoCapture(path)
    fourcc = cv.VideoWriter_fourcc(*'XVID')
    out = cv.VideoWriter('segmented.mp4', fourcc, 30, dimensions[:2][::-1])

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("Done!")
            break
        frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB) # conversion because get_segmented() expects RGB
        segmented = get_segmented(frame, interest_labels_encoded)
        segmented = cv.cvtColor(segmented, cv.COLOR_RGB2BGR) # conversion because write() expects BGR
        out.write(segmented)
        
    cap.release()
    out.release()

### Load model into the memory

In [89]:
# Choose any one (Download links in readme file)
# mobilenetv2 as backbone, Small size, relatively low accuracy
# frozen_graph = 'deeplabv3_mnv2_pascal_trainval/frozen_inference_graph.pb' 

# xception as backbone, large size, relatively better accuracy
frozen_graph = 'deeplabv3_pascal_trainval/frozen_inference_graph.pb'

In [90]:
# Load model
model = DeepLabModel(frozen_graph)
print('model loaded successfully!')

model loaded successfully!


#### Choose interest-labels from the below list

In [91]:
np.array(LABELS)

array(['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
       'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
       'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
       'tv'], dtype='<U11')

In [92]:
# image-object which you want the model to segment
interest_labels = ['person', 'cow']

## Inference

##### On Image

In [93]:
# Path of image to process
img = 'test/img.JPG'

on_image(img, interest_labels) # saves the results to currect directory

Done!


##### On Video

In [94]:
# Path of video to process
vid = 'test/test.mp4'

on_video(vid, interest_labels, fps=25) # saves the results to currect directory

Done!
