## Video Inference

reference for predict on video: https://github.com/divamgupta/image-segmentation-keras/blob/master/keras_segmentation/predict.py

In [2]:
#Mount google drive into colab root content/
from google.colab import drive
drive.mount('/content/gDrive')

Mounted at /content/gDrive


In [4]:
!pip install opencv-python
!pip install keras-segmentation

Collecting keras-segmentation
  Downloading keras_segmentation-0.3.0.tar.gz (23 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting imageio==2.5.0 (from keras-segmentation)
  Downloading imageio-2.5.0-py3-none-any.whl (3.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m26.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting imgaug==0.2.9 (from keras-segmentation)
  Downloading imgaug-0.2.9-py2.py3-none-any.whl (753 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m753.3/753.3 kB[0m [31m39.5 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: keras-segmentation
  Building wheel for keras-segmentation (setup.py) ... [?25l[?25hdone
  Created wheel for keras-segmentation: filename=keras_segmentation-0.3.0-py3-none-any.whl size=29057 sha256=fd4187eeca6bc37613f01d4d6e30e317edabc292b421bca6b4c8ef7a1b4d4515
  Stored in directory: /root/.cache/pip/wheels/2a/87/5b/ddc104c262fe1280e0565a2cc1b9b6b04963af12

In [5]:

import cv2
from time import time
import numpy as np
from keras.models import load_model
from keras_segmentation.predict import predict


In [50]:
# Load the UNet model
model_unet = load_model('/content/gDrive/MyDrive/AI_bootcamp/project03_triwahyu/cityscapes_unet.h5')

# Retrieve the configuration from the loaded model
config = model_unet.get_config()

# Assuming the model has a specific structure, you may need to adjust the indices
output_layer_config = config['layers'][-1]['config']  # Assuming the last layer is the output layer
n_classes = model_unet.layers[-1].output_shape[-1]

# Assuming the input layer is the first layer
input_layer_config = config['layers'][0]['config']
input_height = input_layer_config['batch_input_shape'][1]
input_width = input_layer_config['batch_input_shape'][2]

print("n_classes:", n_classes)
print("input_height:", input_height)
print("input_width:", input_width)

n_classes: 34
input_height: 256
input_width: 256


In [51]:
model_unet.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 256, 256, 3)]        0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 256, 256, 64)         1792      ['input_1[0][0]']             
                                                                                                  
 batch_normalization (Batch  (None, 256, 256, 64)         256       ['conv2d[0][0]']              
 Normalization)                                                                                   
                                                                                                  
 conv2d_1 (Conv2D)           (None, 256, 256, 64)         36928     ['batch_normalization[0][0

In [52]:
def set_video(inp, video_name):
    cap = cv2.VideoCapture(inp)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    size = (video_width, video_height)
    fourcc = cv2.VideoWriter_fourcc(*"XVID")
    video = cv2.VideoWriter(video_name, fourcc, fps, size)
    return cap, video, fps

In [53]:
import random

class_colors = [(random.randint(0, 255), random.randint(
    0, 255), random.randint(0, 255)) for _ in range(5000)]

In [54]:
def get_colored_segmentation_image(seg_arr, n_classes, colors=class_colors):
    output_height = seg_arr.shape[0]
    output_width = seg_arr.shape[1]

    seg_img = np.zeros((output_height, output_width, 3))

    for c in range(n_classes):
        seg_arr_c = seg_arr[:, :] == c
        seg_img[:, :, 0] += ((seg_arr_c)*(colors[c][0])).astype('uint8')
        seg_img[:, :, 1] += ((seg_arr_c)*(colors[c][1])).astype('uint8')
        seg_img[:, :, 2] += ((seg_arr_c)*(colors[c][2])).astype('uint8')

    return seg_img

In [55]:
def overlay_seg_image(inp_img, seg_img):
    orininal_h = inp_img.shape[0]
    orininal_w = inp_img.shape[1]
    seg_img = cv2.resize(seg_img, (orininal_w, orininal_h), interpolation=cv2.INTER_NEAREST)

    fused_img = (inp_img/2 + seg_img/2).astype('uint8')
    return fused_img

In [56]:
def get_legends(class_names, colors=class_colors):

    n_classes = len(class_names)
    legend = np.zeros(((len(class_names) * 25) + 25, 125, 3),
                      dtype="uint8") + 255

    class_names_colors = enumerate(zip(class_names[:n_classes],
                                       colors[:n_classes]))

    for (i, (class_name, color)) in class_names_colors:
        color = [int(c) for c in color]
        cv2.putText(legend, class_name, (5, (i * 25) + 17),
                    cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0), 1)
        cv2.rectangle(legend, (100, (i * 25)), (125, (i * 25) + 25),
                      tuple(color), -1)

    return legend

In [57]:
def concat_lenends(seg_img, legend_img):

    new_h = np.maximum(seg_img.shape[0], legend_img.shape[0])
    new_w = seg_img.shape[1] + legend_img.shape[1]

    out_img = np.zeros((new_h, new_w, 3)).astype('uint8') + legend_img[0, 0, 0]

    out_img[:legend_img.shape[0], :  legend_img.shape[1]] = np.copy(legend_img)
    out_img[:seg_img.shape[0], legend_img.shape[1]:] = np.copy(seg_img)

    return out_img

In [58]:
def visualize_segmentation(seg_arr, inp_img=None, n_classes=None,
                           colors=class_colors, class_names=None,
                           overlay_img=False, show_legends=False,
                           prediction_width=None, prediction_height=None):

    if n_classes is None:
        n_classes = np.max(seg_arr)

    seg_img = get_colored_segmentation_image(seg_arr, n_classes, colors=colors)

    if inp_img is not None:
        original_h = inp_img.shape[0]
        original_w = inp_img.shape[1]
        seg_img = cv2.resize(seg_img, (original_w, original_h), interpolation=cv2.INTER_NEAREST)

    if (prediction_height is not None) and (prediction_width is not None):
        seg_img = cv2.resize(seg_img, (prediction_width, prediction_height), interpolation=cv2.INTER_NEAREST)
        if inp_img is not None:
            inp_img = cv2.resize(inp_img,
                                 (prediction_width, prediction_height))

    if overlay_img:
        assert inp_img is not None
        seg_img = overlay_seg_image(inp_img, seg_img)

    if show_legends:
        assert class_names is not None
        legend_img = get_legends(class_names, colors=colors)

        seg_img = concat_lenends(seg_img, legend_img)

    return seg_img

In [78]:
def predict_video(model=None, inp=None, output=None,
                  checkpoints_path=None, display=False, overlay_img=True,
                  class_names=None, show_legends=False, colors=class_colors,
                  prediction_width=None, prediction_height=None):

    # Retrieve the configuration from the loaded model
    config = model.get_config()

    # Assuming the model has a specific structure, you may need to adjust the indices
    output_layer_config = config['layers'][-1]['config']  # Assuming the last layer is the output layer
    n_classes = None  # Set n_classes to None for now

    # Assuming the input layer is the first layer
    input_layer_config = config['layers'][0]['config']
    input_height = input_layer_config['batch_input_shape'][1]
    input_width = input_layer_config['batch_input_shape'][2]

    cap, video, fps = set_video(inp, output)
    while(cap.isOpened()):
        prev_time = time()
        ret, frame = cap.read()
        if frame is not None:
            frame = cv2.resize(frame, (256, 256))
            pr = model.predict(np.expand_dims(frame, axis=0))
            pr = np.argmax(pr.squeeze(), axis=-1)

            output_height, output_width = pr.shape[:2]

            fused_img = visualize_segmentation(
                pr, frame, n_classes=n_classes,
                colors=colors,
                overlay_img=overlay_img,
                show_legends=show_legends,
                class_names=class_names,
                prediction_width=output_width,
                prediction_height=output_height
            )
        else:
            break

        print("FPS: {}".format(1/(time() - prev_time)))
        if output is not None:
            video.write(fused_img)
        if display:
            cv2.imshow('Frame masked', fused_img)
            if cv2.waitKey(fps) & 0xFF == ord('q'):
                break

    cap.release()
    if output is not None:
        video.release()
    cv2.destroyAllWindows()


In [79]:
predict_video(model=model_unet,
              inp='/content/gDrive/MyDrive/AI_bootcamp/project03_triwahyu/raw_video.mp4',
              output='/content/gDrive/MyDrive/AI_bootcamp/project03_triwahyu/train_results/cityscapes_video.mp4')


FPS: 9.254985723553268
FPS: 10.330009285005332
FPS: 10.859517441544353
FPS: 11.008787493831957
FPS: 11.08147224416574
FPS: 10.309721505297054
FPS: 9.314941691447709
FPS: 9.972595183352633
FPS: 10.086898804041201
FPS: 10.886265715679862
FPS: 11.023804329828085
FPS: 10.670492908478026
FPS: 10.881803228501305
FPS: 10.998827820769337
FPS: 11.246350697684395
FPS: 10.260741929486365
FPS: 11.19481136481924
FPS: 10.803741110226749
FPS: 10.977784291965913
FPS: 10.708223339886134
FPS: 11.201508381827844
FPS: 10.89772110195672
FPS: 10.838610980469173
FPS: 10.474290651736348
FPS: 10.410595500486487
FPS: 10.79617708153689
FPS: 10.819933650805115
FPS: 10.215334555967159
FPS: 10.244827274570293
FPS: 10.423350190732986
FPS: 10.691899828697284
FPS: 9.899464230923552
FPS: 11.018186307861644
FPS: 9.816449904393679
FPS: 10.849461058327794
FPS: 10.642416368950958
FPS: 11.054806146385177
FPS: 10.483191618053576
FPS: 10.186580141251445
FPS: 9.703691022075802
FPS: 10.0013687099551
FPS: 10.799290398494286
FPS: