## Mask R-CNN & GrabCut Example 
- Ref : https://www.pyimagesearch.com/2020/09/28/image-segmentation-with-mask-r-cnn-grabcut-and-opencv/

### Flow
- 1. Computes pixel-wise segmentation mask for each object in the input image based on Mask R-CNN (Tensorflow)
- 2. Applies GrabCut to the object via the mask to improve the image segmentation

In [1]:
import cv2
import imutils
import numpy as np
import os

from matplotlib import pyplot as plt

### Parameters

In [3]:
# Parameter -- Input Files
IMG_PATH = 'example.jpg'
IMG_RESIZE_WIDTH = 600

In [4]:
# Parameter -- Mask R-CNN
MODEL_PATH = 'mask-rcnn-coco'
MODEL_LABEL_PATH = os.path.join(MODEL_PATH, 'object_detection_classes_coco.txt')
MODEL_PB_PATH = os.path.join(MODEL_PATH, 'frozen_inference_graph.pb')
MODEL_PBTXT_PATH = os.path.join(MODEL_PATH, 'mask_rcnn_inception_v2_coco_2018_01_28.pbtxt')\

USE_GPU = True
CONF_TH = 0.5    # Object Detection Threshold
MIN_SEG_TH = 0   # Minimum Threshold Pixel-Wise Mask Segmentation
OUT_NODES = ['detection_out_final', 'detection_masks']

In [5]:
# Parameter -- GrabCut
ITER = 10                    # Grabcut Iteration
MODE = cv2.GC_INIT_WITH_MASK # Grabcut mode (GC_INIT_WITH_RECT / GC_INIT_WITH_MASK)

### Grabcut Segmentation

In [6]:
def grabcut_seg_mask(img, mask_init):
    # Allocate memory for Grabcut
    mask = np.zeros(img.shape[:2], dtype='uint8')  # Output mask
    bg_model = np.zeros((1, 65), dtype='float')    # Internal use
    fg_model = np.zeros((1, 65), dtype='float')    # Internal use

    # Convert to Grabcut mask value
    mask[mask_init > 0] = cv2.GC_PR_FGD
    mask[mask_init == 0] = cv2.GC_BGD

    # Grabcut Segmentation
    mask, bg_model, fg_model = cv2.grabCut(img, mask, None, bg_model, fg_model, ITER, MODE)     

    return mask

### Load Image

In [7]:
img = cv2.imread(IMG_PATH)
img = imutils.resize(img, width=IMG_RESIZE_WIDTH)

### Load Model

In [8]:
# Load COCO label (90 class names)
labels = open(MODEL_LABEL_PATH).read().strip().split('\n')

In [10]:
# Load Mask R-CNN model
net = cv2.dnn.readNetFromTensorflow(MODEL_PB_PATH, MODEL_PBTXT_PATH)
if USE_GPU:
    net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
    net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

### Run Mask R-CNN

In [11]:
# Inference
# boxes -- (1, 1, #Objects, 7). For each object => (X, ClassID, Confidence, x0_n, y0_n, x1_n, y1_n)
# masks -- (#Objects, #Class, 15, 15)
blob = cv2.dnn.blobFromImage(img, swapRB=True, crop=False)
net.setInput(blob)
boxes, masks = net.forward(OUT_NODES)

### Post Processing & Visualization

In [None]:
# Iterate over each object
cv2.namedWindow('Result', cv2.WINDOW_NORMAL)

h, w = img.shape[:2]
for i in range(boxes.shape[2]):
    # Extract Class ID & its Confidence
    class_id = int(boxes[0, 0, i, 1])
    conf = boxes[0, 0, i, 2]

    # Confidence Threshold
    if conf > CONF_TH:
        # Extract bounding box
        box = boxes[0, 0, i, 3:7] * np.array([w, h, w, h])
        x0, y0, x1, y1 = box.astype(np.int)
        bh, bw = y1 - y0, x1 - x0

        # Extract & Resize Pixel-Wise Mask
        mask = masks[i, class_id]
        mask = cv2.resize(mask, (bw, bh), interpolation=cv2.INTER_CUBIC)
        mask = (mask > MIN_SEG_TH).astype(np.uint8) * 255

        # Mask R-CNN Result
        rcnn_mask = np.zeros(img.shape[:2], dtype=np.uint8)
        rcnn_mask[y0:y1, x0:x1] = mask
        rcnn_out = cv2.bitwise_and(img, img, mask=rcnn_mask)

        # Apply Grabcut
        # Set Definite / Propable BG to 0 and Definite / Propable FG to 1        
        grab_mask = grabcut_seg_mask(img, rcnn_mask.copy())
        grab_mask = np.where((grab_mask == cv2.GC_BGD) | (grab_mask == cv2.GC_PR_BGD), 0, 1)

        # Grabcut Result
        grab_out = cv2.bitwise_and(img, img, mask=(grab_mask * 255).astype('uint8'))        

        # Show Result
        cv2.setWindowTitle('Result', labels[class_id])
        cv2.imshow('Result', np.hstack([img, rcnn_out, grab_out]))
        if cv2.waitKey(0) == 27:
            cv2.destroyAllWindows()
            break

cv2.destroyAllWindows()