In [None]:
# Import necessary libraries
import os, sys, shutil, glob, argparse
import numpy as np
import json
from PIL import Image
from lxml import etree

python_version = sys.version_info.major



###########################################################
##########        JSON to VOC Conversion        ##########
###########################################################
abs_path = 'datasets'
json_path = os.path.join(abs_path, 'JSON/')
voc_path = os.path.join(abs_path, 'VOC/')

image_count = len(glob.glob(os.path.join(json_path+"/images", "*.*")))

def to_pasvoc_xml(fname, labels, coords, img_width, img_height):
    
    annotation = etree.Element('annotation')
    filename = etree.Element('filename')
    f = fname.split("/")
    filename.text = f[-1]
    annotation.append(filename)
    folder = etree.Element('folder')
    folder.text = "/".join(f[:-1])
    annotation.append(folder)
    for i in range(len(coords)):
        object = etree.Element('object')
        annotation.append(object)
        name = etree.Element('name')
        name.text = labels[i]
        object.append(name)
        bndbox = etree.Element('bndbox')
        object.append(bndbox)
        xmax = etree.Element('xmax')
        xmax.text = str(coords[i][2])
        bndbox.append(xmax)
        xmin = etree.Element('xmin')
        xmin.text = str(coords[i][0])
        bndbox.append(xmin)
        ymax = etree.Element('ymax')
        ymax.text = str(coords[i][3])
        bndbox.append(ymax)
        ymin = etree.Element('ymin')
        ymin.text = str(coords[i][1])
        bndbox.append(ymin)
        difficult = etree.Element('difficult')
        difficult.text = '0'
        object.append(difficult)
        occluded = etree.Element('occluded')
        occluded.text = '0'
        object.append(occluded)
        pose = etree.Element('pose')
        pose.text = 'Unspecified'
        object.append(pose)
        truncated = etree.Element('truncated')
        truncated.text = '1'
        object.append(truncated)
    img_size = etree.Element('size')
    annotation.append(img_size)
    depth = etree.Element('depth')
    depth.text = '3'
    img_size.append(depth)
    height = etree.Element('height')
    height.text = str(img_height)
    img_size.append(height)
    width = etree.Element('width')
    width.text = str(img_width)
    img_size.append(width)

    return annotation

def parse_json_ann(filename):
        """
        Definition: Parses json annotation file to extract bounding box coordintates.
        
        Returns: all_clases - contains a list of clases
                 all_coords - contains a list of bdn_bxs
        """
        lfile = open(filename)
        classes = []
        bdn_bxs = []
        f= open(filename)
        for line in f:
            line = line.replace("'", '"')
            my_dict = json.loads(line)
            for obj in my_dict["objects"]:
                classes.append(obj["class_name"])
                bdn_bxs.append(obj["bounding_box"])
            
        return  classes,bdn_bxs

def make_voc_directories():
    """
    Definition: Make directories for voc images and labels.
        Removes previously created voc image and label directories.
    Returns: None
    """
    if os.path.exists(voc_path):
        if python_version == 3:
            prompt = input('Directory already exists. Overwrite? (yes, no): ')
        else:
            prompt = raw_input('Directory already exists. Overwrite? (yes, no): ')
        if prompt == 'no':
            exit(0)
        shutil.rmtree(voc_path)
    os.makedirs(voc_path)
    os.makedirs(voc_path + "train")
    os.makedirs(voc_path + "train/images")
    os.makedirs(voc_path + "train/annotations")
    os.makedirs(voc_path + "val")
    os.makedirs(voc_path + "val/images")
    os.makedirs(voc_path + "val/annotations")
    os.makedirs(voc_path + "test")
    os.makedirs(voc_path + "test/images")
    os.makedirs(voc_path + "test/annotations")
    
def voc(label=None):
    print ("Convert json to voc")
    indx = 0
    # Make all directories for voc dataset
    make_voc_directories()

    # Iterate through json annotations data
    #Copy all images from json/images to voc training, validation and test image folders.
    for f in os.listdir(json_path + "annotations/"):
        indx += 1
        fname = (json_path + "images/" + f).split(".json")[0] + ".jpg"
        if os.path.isfile(fname):
            img = Image.open(fname)
            w, h = img.size
            img.close()
            labels, coords = parse_json_ann(os.path.join(json_path +
                "annotations/" + f))
            annotation = to_pasvoc_xml(fname, labels, coords, w, h)
            et = etree.ElementTree(annotation)
            if indx <= image_count/2:
                et.write(voc_path + "train/annotations/" + f.split(".json")[0] + ".xml", pretty_print=True)
                shutil.copy(fname, voc_path + "train/images/")
            if image_count/2 < indx <= image_count*3/4:
                et.write(voc_path + "val/annotations/" + f.split(".json")[0] + ".xml", pretty_print=True)
                shutil.copy(fname, voc_path + "val/images/")
            if indx > image_count*3/4:
                et.write(voc_path + "test/annotations/" + f.split(".json")[0] + ".xml", pretty_print=True)
                shutil.copy(fname, voc_path + "test/images/")
voc()