## CONVERT MHP DATA TO COCO FORMAT

In [None]:
import os
import numpy as np
import json
import cv2
import matplotlib.pyplot as plt
from concurrent.futures import ProcessPoolExecutor

In [130]:
DATASET_PATH ="LV-MHP-v2/"

In [131]:
data_type = "val"

IMAGES_PATH = DATASET_PATH + "{}/images/".format(data_type)
ANNOTAION_MASK = DATASET_PATH + "{}/parsing_annos/".format(data_type)
CATEGORY = DATASET_PATH + "mhp.names"


## Parse MHP Images

read all the images in mhp data set and convert to coco format

In [19]:
images =[os.path.join(IMAGES_PATH,x) for x in  os.listdir(IMAGES_PATH)]

In [20]:
len(images)

5000

In [16]:
def read_images(im_path):
    im = cv2.imread(im_path)
    h,w = im.shape[:2]
    name = os.path.basename(im_path)
    id = int(name.split(".")[0])

    im_dict = dict(),
    im_dict["file_name"] = name
    im_dict["height"] = h
    im_dict["width"] = w
    im_dict["id"] = id
    return im_dict

In [37]:
coco_images = []
with ProcessPoolExecutor(6) as executor:
    for im_dict in executor.map(read_images,images):
        coco_images.append(im_dict)
        if(len(coco_images)%1000)==0:
            print(len(coco_images))
    

1000
2000
3000
4000
5000


## Parse categories



In [38]:
with open(CATEGORY,"r") as f:
    data = f.read().strip().split("\n")

In [32]:
#data = ["human","face"] 

In [33]:
coco_categories = []
for i,cat in enumerate(data):
    cat_dict = {}
    cat_dict['name'] = cat
    cat_dict['id'] = i+1
    cat_dict['supercategory'] = None
    
    coco_categories.append(cat_dict)

## ANNOTATION MASK

In [93]:
ann_mask = [os.path.join(ANNOTAION_MASK,x) for x in os.listdir(ANNOTAION_MASK)]
ann_mask = sorted(ann_mask,key=lambda x : int(os.path.basename(x).split(".")[0].split("_")[0]))

In [94]:
len(ann_mask)

13260

In [132]:
## IF you want to convert mhp to just detect human face and body call this function in create_mask_to_ann

def mask_human_nd_face(mask):
    mask_new = np.zeros(mask.shape)
    
    mask_new[mask!=0] = 1
    mask_new[(mask==3) | (mask==4)] = 2
    return mask_new


In [96]:
def create_mask_to_ann(mask_path):
    mask  = cv2.imread(mask_path)
    
    # Uncomment below to convert in face and body
    #mask = mask_human_nd_face(mask)
    
    mask_ = mask.copy()
    cat = np.unique(mask)
    list_anns = []
    
    image_id = int(os.path.basename(mask_path).split(".")[0].split("_")[0])
    id = int(image_id)*1000
    
    for c in cat:
        # to include face in body
#         if c==1:
#             mask_ = (mask!=0).astype("uint8")
#         else:
#             mask_ = mask
        
        if c!=0:
            id+=1
            mask_face = (mask_==c).astype("uint8")
            mask_face = mask_face.any(axis=-1).astype("uint8")
            kernel = np.ones((5,5),np.uint8)
            erosion = cv2.erode(mask_face,kernel,iterations = 1)
            
            x1,y1,w,h = cv2.boundingRect(erosion)
            #im2 = cv2.rectangle(im2,(x1, y1),(x1+w, y1+h),(0,0,255),5)
            x1,y1,w,h = x1-5,y1-5,w+10,h+10
            
            
            category_id = int(c)
            
            ann_dict = dict()
            ann_dict["id"] = id
            ann_dict["bbox"] = [x1,y1,w,h]
            ann_dict["image_id"] = image_id
            ann_dict["segmentation"] = []
            ann_dict["ignore"]= 0
            ann_dict["area"] = w*h
            ann_dict["iscrowd"] = 0
            ann_dict["category_id"] = category_id
            list_anns.append(ann_dict)
            
    return list_anns
        

In [100]:
coco_annotations = []
c= 0
with ProcessPoolExecutor(6) as executors:
    for ann_list in executors.map(create_mask_to_ann,ann_mask):
        coco_annotations.extend(ann_list)
        c+=1
        if c%1000==0:
            print(c)

1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000


In [101]:
len(coco_annotations)

26472

### COCO 

In [50]:
mhp_coco = {"type": "instances"}


In [51]:
mhp_coco["images"] = coco_images
mhp_coco["categories"] = coco_categories
mhp_coco["annotations"] = coco_annotations


In [126]:
with open("instances_{}2017.json".format(data_type),"w") as f:
    f.write(json.dumps(val))

##  data sanity

In [None]:
# random image id in the set it might be in val or train try other if not found in perticular set

image_id = 6964

In [123]:
t_ann = []
anns = mhp_coco["annotations"]
for ann in anns:
    #print(ann)
    if ann["image_id"] == image_id:
        t_ann.append(ann)

for im in mhp_coco["images"]:
    if im["id"] ==image_id:
        print(im)

{'file_name': '6964.jpg', 'height': 414, 'width': 414, 'id': 6964}


In [124]:
for ann in t_ann:
    print(ann)
    im = cv2.imread(IMAGES_PATH+str(ann["image_id"])+".jpg")
    x1,y1,x2,y2 = ann["bbox"]
    im = cv2.rectangle(im,(x1, y1),(x2+x1, y1+y2),(0,0,255),2)
    cv2.imshow("",im)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

{'id': 6964001, 'bbox': [45, 39, 111, 365], 'image_id': 6964, 'segmentation': [], 'ignore': 0, 'area': 40515, 'iscrowd': 0, 'category_id': 1}
{'id': 6964002, 'bbox': [106, 39, 50, 63], 'image_id': 6964, 'segmentation': [], 'ignore': 0, 'area': 3150, 'iscrowd': 0, 'category_id': 2}
{'id': 6964001, 'bbox': [81, 20, 148, 382], 'image_id': 6964, 'segmentation': [], 'ignore': 0, 'area': 56536, 'iscrowd': 0, 'category_id': 1}
{'id': 6964002, 'bbox': [142, 20, 45, 62], 'image_id': 6964, 'segmentation': [], 'ignore': 0, 'area': 2790, 'iscrowd': 0, 'category_id': 2}
