### Obtain bounding box from instance and semantic segmentation

In [1]:
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches
% matplotlib inline  
from PIL import Image
import numpy as np

import csv
import os
from scipy import ndimage

In [3]:
def obtain_box(instance_dir, semantic_dir, box_dir):
    cnt = 0
    for sub_dir in os.listdir(instance_dir):
        cnt = cnt + 1
        if cnt > 2000:
            break
        instance_files = os.listdir(os.path.join(instance_dir, sub_dir))

        if not os.path.exists(os.path.join(box_dir, sub_dir)):
            os.mkdir(os.path.join(box_dir, sub_dir))
        print(cnt, os.path.join(box_dir, sub_dir))
        
        for instance_file in instance_files:
            instance_file_name = os.path.join(instance_dir, sub_dir, instance_file)
            file_id, _ = instance_file.split('_')
            semantic_file = file_id + '_category40.png'
            semantic_file_name = os.path.join(semantic_dir, sub_dir, semantic_file)
            
            ins_map = np.array(Image.open(instance_file_name))
            cls_map = np.array(Image.open(semantic_file_name))
            height, width = ins_map.shape[0], ins_map.shape[1]
            # print height, width
            
            # plt.subplots(1)
            # plt.imshow(ins_map)
            # plt.subplots(1)
            # plt.imshow(cls_map)
            
            hash_map = dict()
            for i in range(ins_map.shape[0]):
                for j in range(ins_map.shape[1]):
                    element = ins_map[i, j]
                    if element not in hash_map:
                        hash_map[element] = [cls_map[i, j]]
                    elif cls_map[i, j] not in hash_map[element]:
                        hash_map[element].append(cls_map[i, j])
            # print hash_map

            ins_label = np.unique(ins_map)
            # print ins_label
            objects = ndimage.find_objects(ins_map)
            # print objects
            real_objects = [objects[i - 1] for i in ins_label]
            # print real_objects
            
            box = []
            for i in range(len(real_objects)):
                obj = real_objects[i]
                # print obj
                cls = hash_map[ins_label[i]][0]
                box.append([int(obj[1].start), int(obj[0].start), int(obj[1].stop), int(obj[0].stop), int(cls)])
            # print box
            box = np.array(box).astype(np.int)

            anno = []
            for i in range(box.shape[0]):
                a = box[i, 0:4]
                cls = box[i, 4].astype(np.int)
                # remove small objects
                width = a[2] - a[0]
                height = a[3] - a[1]
                # box_area = (a[2] - a[0]) * (a[3] - a[1])
                # if box_area < 4096:
                #     continue
                if width < 128 or height < 128:
                    continue
                # remove non-object class
                if cls not in [4 - 1, 5 - 1, 6 - 1, 7 - 1, 14 - 1, 25 - 1]:
                    continue
                anno.append([a[0], a[1], a[2], a[3], cls])
            # print anno
                
            if len(anno) == 0:
                continue
            
            box_file = os.path.join(box_dir, sub_dir, file_id + '.txt')
            # print(box_file)
            with open(box_file, 'w') as handle:
                for i in range(len(anno)):
                    handle.write('%d %d %d %d %d\n' % (anno[i][0], anno[i][1], anno[i][2], anno[i][3], anno[i][4]))

In [4]:
instance_dir = '/media/yi/DATA/data-orig/MLT/node'
semantic_dir = '/media/yi/DATA/data-orig/MLT/category'
box_dir = '/home/yi/code/few_shot/mlt/box'

obtain_box(instance_dir, semantic_dir, box_dir)

(1, '/home/yi/code/few_shot/mlt/box/0004d52d1aeeb8ae6de39d6bd993e992')
(2, '/home/yi/code/few_shot/mlt/box/0004dd3cb11e50530676f77b55262d38')
(3, '/home/yi/code/few_shot/mlt/box/00052c0562bde7790f8354e6123ae7ff')
(4, '/home/yi/code/few_shot/mlt/box/000539881d82c92e43ff2e471a97fcf9')
(5, '/home/yi/code/few_shot/mlt/box/0005b92a9ed6349df155a462947bfdfe')
(6, '/home/yi/code/few_shot/mlt/box/00065ecbdd7300d35ef4328ffe871505')
(7, '/home/yi/code/few_shot/mlt/box/000cf80f9ff74db95a46cd3a269a6e7c')
(8, '/home/yi/code/few_shot/mlt/box/000d0395709d2a16e195c6f0189155c4')
(9, '/home/yi/code/few_shot/mlt/box/000d939dc2257995adcb27483b04ad04')
(10, '/home/yi/code/few_shot/mlt/box/000e051cb512c617d32441a8a382b317')
(11, '/home/yi/code/few_shot/mlt/box/000e51f173e711e0160784036c92e74f')
(12, '/home/yi/code/few_shot/mlt/box/0011725c3f4c57108aa17f90ed8bea54')
(13, '/home/yi/code/few_shot/mlt/box/0017aeff679f53cd65edf72ef2349ff1')
(14, '/home/yi/code/few_shot/mlt/box/0017b414c92137d87625f0b35967e19b')
(