In [None]:
# Import necessary libraries
import os, sys, shutil, glob, argparse
import numpy as np
from PIL import Image
from lxml import etree
from scipy.io import loadmat
from datetime import datetime

python_version = sys.version_info.major

###########################################################
##########        INRIA PERSON to JSON Conversion  ##########
###########################################################
abs_path = 'datasets'
inria_path = os.path.join(abs_path, 'INRIA/')
json_path = os.path.join(abs_path, 'JSON/')

def make_json_directories():
    """
    Definition: Make directories for json images and annotations.
        Removes previously created json image and annotation directories.
    Returns: None
    """
    if os.path.exists(json_path):
        if python_version == 3:
            prompt = input('Directory already exists. Overwrite? (yes, no): ')
        else:
            prompt = raw_input('Directory already exists. Overwrite? (yes, no): ')
        if prompt == 'no':
            exit(0)
        shutil.rmtree(json_path)
    os.makedirs(json_path)
    os.makedirs(json_path + "images")
    os.makedirs(json_path + "annotations")
    
def copy_images_json():
    """
    Definition: Copy images from images directory of inria dataset
         to image directory in in JSON/images folder.
    """
    for filename in glob.glob(os.path.join(inria_path + "images", "*.*")):
        shutil.copy(filename, json_path + "images/")
     # Convert from .png to .jpg
    for filename in glob.glob(os.path.join(json_path + "images", "*.*")):
        im = Image.open(filename)
        im.save(filename.split(".png")[0] + ".jpg","jpeg")
        os.remove(filename)
   
def parse():
    """
    Definition: Parses label file to extract label and bounding box
    coordintates.
    """
    objects = []   
    object_info = {}
    coords = []
    for f in os.listdir(inria_path + "annotations/"):
        object_info['filename'] = f.split(".")[0]+".jpg"
        object_info['objects'] = []
        
        with open(inria_path + "annotations/"+f) as f:
            data = f.read()
            
        import re
        objs = re.findall('\(\d+, \d+\)[\s\-]+\(\d+, \d+\)', data)
        num_objs = len(objs)
        boxes = np.zeros((num_objs, 4), dtype=np.uint16)
        # Load object bounding boxes into a data frame.
        for ix,  obj in enumerate(objs):
            # Make pixel indexes 0-based
            coor = re.findall('\d+', obj)
            x1 = int(coor[0])
            y1 = int(coor[1])
            x2 = int(coor[2])
            y2 = int(coor[3])

            tmp = [x1,y1,x2,y2]
            coords.append(tmp)
            person_info = {'class_name':'Person'}
            person_info ['bounding_box'] = tmp
            object_info['objects'].append(person_info)
            objects.append(object_info)
            tmp = []
            for j in objects:
                f= open(json_path+'annotations/'+j["filename"].split(".jpg")[0]+'.json',"w+")
                f.write(str(j))
                f.close()

def json():
    make_json_directories()
    copy_images_json()
    parse()
    
json()
