In [60]:
import cv2 as cv
import glob
import numpy as np
import os
import pylab
import skimage.io as io

from pycocotools.coco import COCO
from shutil import copyfile

In [61]:
dataDir='/Volumes/UNTITLED'
dataType='train2017'
annFile='{}/annotations/instances_{}.json'.format(dataDir,dataType)
filtered_data = '/Users/matt/masters_thesis/filtered_data'
save_data_path = '/Users/matt/masters_thesis/resized_data'

In [64]:
# initialize COCO api for instance annotations# initi 
coco=COCO(annFile)

loading annotations into memory...
Done (t=93.00s)
creating index...
index created!


In [65]:
# get all images containing person
catIds = coco.getCatIds(catNms=['person'])

In [66]:
# get images ids
imgIds = coco.getImgIds(catIds=catIds)
# get images
imgs = coco.loadImgs(imgIds)

In [68]:
filtered_imgs = [f for f in glob.glob('{}/*.jpg'.format(filtered_data)) if '_mask' not in f]

In [70]:
filtered_imgs = list(map(lambda x: x.split('/')[-1], filtered_imgs))

In [71]:
imgs = list(filter(lambda x: x['file_name'] in filtered_imgs, imgs))

In [72]:
def map_paths(file_name):
    filtered_path = os.path.join(filtered_data, file_name)
    filtered_mask = os.path.join(filtered_data, '{}_mask.jpg'.format(file_name.split('.')[0]))
    save_path = os.path.join(save_data_path, file_name)
    mask_name = os.path.join(save_data_path, '{}_mask.jpg'.format(file_name.split('.')[0]))
    return filtered_path, filtered_mask, save_path, mask_name

In [73]:
def rescale(image):
    image = image.astype('float32')
    current_min = np.min(image)
    current_max = np.max(image)
    image = (image - current_min)/(current_max - current_min) * 255
    return image

In [74]:
def resize_filtered(file_name):
    img = cv.imread(filename=file_name).astype(np.float32)
    img = cv.resize(img,(224, 224), interpolation = cv.INTER_CUBIC)
    return rescale(img).astype(np.uint8)

In [112]:
for img in imgs:
    file_name = img['file_name']
    annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
    anns = coco.loadAnns(annIds)
    max_ann = max(anns, key=lambda x: x['area'])
    mask = coco.annToMask(max_ann)
    object_perc = (mask==1).sum()/len(mask.flatten())
    
    if object_perc < 0.3 or object_perc > 0.7 or (len(anns) > 2 and object_perc < 0.4):
        continue
        
    
    filtered_file, filtered_mask, resized_file_path, mask_path = map_paths(img['file_name'])
    
    img_masked = cv.imread(filename=filtered_file).astype(np.uint8)
    idx = (mask == 0)
    img_masked[idx] = 0
    
    filtered_file, filtered_mask, resized_file_path, mask_path = map_paths(img['file_name'])
    cv.imwrite(filtered_mask, img_masked)
    
    resized_img = resize_filtered(filtered_file)
    resized_mask = rescale(
        cv.resize(img_masked, (224, 224), interpolation = cv.INTER_CUBIC)
    ).astype(np.uint8)
    
    cv.imwrite(resized_file_path, resized_img)
    cv.imwrite(mask_path, resized_mask)