In [16]:
%matplotlib inline
from pycocotools.coco import COCO
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import pylab
pylab.rcParams['figure.figsize'] = (8.0, 10.0)

#looking at images (and their annotations) in the val2017 folder
dataDir='..'
dataType='val2017'
annFile='{}/annotations/instances_{}.json'.format(dataDir,dataType)

# initialize COCO api for instance annotations
coco=COCO(annFile);


dataTypes = ["val2017","train2014","val2014","test2014","test2015","train2017","test2017"]
valid_categories=[cat['name'] for cat in coco.loadCats(coco.getCatIds())];

loading annotations into memory...
Done (t=0.95s)
creating index...
index created!


In [17]:
def boundaries(bbox):#function for extracting bounding box from bbox class found in each image
    return {'ymax':bbox[1],'ymin':(bbox[3]+bbox[1]),'xmin':bbox[0],'xmax':(bbox[0]+bbox[2])}

In [22]:
#create xml for an image for 1st plane
from lxml import etree as e

def build_folder_xmls(imgFolder,category):
     #confirm category and image folder exist
    if(not(imgFolder in dataTypes)):
        print("Folder does not exist in Images")
        return
    if(not(category in valid_categories)):
        print("Catergory does not exist in COCO database")
        return
    
    #reload coco API 
    dataDir='..'
    dataType=imgFolder
    annFile='{}/annotations/instances_{}.json'.format(dataDir,dataType)
    
    # initialize COCO api for instance annotations
    coco=COCO(annFile);
    
    # get all the catergory's image file_names
    catIds = coco.getCatIds(catNms=[category]);
    imgIds = coco.getImgIds(catIds=catIds );
    
    #write subfunction that will use these parameters
    def build_xml(i):



        #load image data
        img = coco.loadImgs(imgIds[i])[0]

        #build xml
        root = e.Element('annotation')

        folder = e.Element('folder')
        folder.text="images"
        root.append(folder)

        file = e.Element('filename')
        file.text=img['file_name']
        root.append(file)

        size = e.Element('size')
        width = e.Element('width')
        width.text = str(img['width'])
        height = e.Element('height')
        height.text = str(img['height'])
        depth = e.Element('depth')
        depth.text="3"
        size.append(width)
        size.append(height)
        size.append(depth)
        root.append(size)

        seg = e.Element('segmented')
        seg.text = "0"
        root.append(seg)

        annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
        anns = coco.loadAnns(annIds)
        for i in range(0,len(anns)):
            #bounding values for one plane
            bbox = anns[i]['bbox']
            bounds = boundaries(bbox)

            #Header info for each plane
            obj = e.Element('object')
            name = e.Element('name')
            name.text=category
            pose = e.Element('pose')
            pose.text = 'Unspecified'
            truncated = e.Element('truncated')
            truncated.text='0'
            difficult = e.Element('difficult')
            difficult.text='0'
            obj.append(name)
            obj.append(pose)
            obj.append(truncated)
            obj.append(difficult)

            #bounding box
            bndbox = e.Element('bndbox')
            xmin = e.Element('xmin')
            xmin.text = str(bounds['xmin'])

            ymin = e.Element('ymin')
            ymin.text = str(bounds['ymin'])

            xmax = e.Element('xmax')
            xmax.text = str(bounds['xmax'])

            ymax = e.Element('ymax')
            ymax.text = str(bounds['ymax'])

            bndbox.append(xmin)
            bndbox.append(ymin)
            bndbox.append(xmax)
            bndbox.append(ymax)

            obj.append(bndbox)

            root.append(obj)






        #root.append(e.Element('folder'))

        s= e.tostring(root,pretty_print=True)
        #s=str(e)
        print(s)

        f=open(("{}/xml_converted/"+category+"/" + img['file_name'][0:(len(img['file_name'])-4)]+'.xml').format(dataDir,dataType),'wb')
        f.write(s)
        f.close()
    
    #call sub-funtion on every element of the set of images
    for i in range(0,len(imgIds)):
        build_xml(i);


In [26]:
#copy all images in a category out
from shutil import copyfile


def copy_category_images(imgFolder,category):
    #confirm category and image folder exist
    if(not(imgFolder in dataTypes)):
        print("Folder does not exist in Images")
        return
    if(not(category in valid_categories)):
        print("Catergory does not exist in COCO database")
        return
    
    #re-build coco-API
    dataDir='..'
    dataType=imgFolder
    annFile='{}/annotations/instances_{}.json'.format(dataDir,dataType)
    
    # initialize COCO api for instance annotations
    coco=COCO(annFile);
    
    # get all the catergory's image file_names
    catIds = coco.getCatIds(catNms=[category]);
    imgIds = coco.getImgIds(catIds=catIds );
    
    #copy images
    for i in range (0,len(imgIds)):
        img = coco.loadImgs(imgIds[i])[0]
        png =img['file_name']
        src =('{}/images/'+imgFolder+'/'+ png).format(dataDir,dataType)
        dst =('{}/categorized_images/'+category+"/"+png).format(dataDir,dataType)
        copyfile(src,dst)

In [24]:
build_folder_xmls("val2017","airplane");

loading annotations into memory...
Done (t=0.60s)
creating index...
index created!
b'<annotation>\n  <folder>images</folder>\n  <filename>000000208901.jpg</filename>\n  <size>\n    <width>640</width>\n    <height>427</height>\n    <depth>3</depth>\n  </size>\n  <segmented>0</segmented>\n  <object>\n    <name>airplane</name>\n    <pose>Unspecified</pose>\n    <truncated>0</truncated>\n    <difficult>0</difficult>\n    <bndbox>\n      <xmin>232.67</xmin>\n      <ymin>284.59000000000003</ymin>\n      <xmax>425.99</xmax>\n      <ymax>148.37</ymax>\n    </bndbox>\n  </object>\n</annotation>\n'
b'<annotation>\n  <folder>images</folder>\n  <filename>000000090631.jpg</filename>\n  <size>\n    <width>640</width>\n    <height>389</height>\n    <depth>3</depth>\n  </size>\n  <segmented>0</segmented>\n  <object>\n    <name>airplane</name>\n    <pose>Unspecified</pose>\n    <truncated>0</truncated>\n    <difficult>0</difficult>\n    <bndbox>\n      <xmin>176.77</xmin>\n      <ymin>228.1700000000000

In [27]:
copy_category_images("val2017","airplane");

loading annotations into memory...
Done (t=0.76s)
creating index...
index created!


In [28]:
copy_category_images("val2017","bird");

loading annotations into memory...
Done (t=0.82s)
creating index...
index created!


In [29]:
build_folder_xmls("val2017","bird");

loading annotations into memory...
Done (t=0.70s)
creating index...
index created!
b'<annotation>\n  <folder>images</folder>\n  <filename>000000055299.jpg</filename>\n  <size>\n    <width>640</width>\n    <height>429</height>\n    <depth>3</depth>\n  </size>\n  <segmented>0</segmented>\n  <object>\n    <name>bird</name>\n    <pose>Unspecified</pose>\n    <truncated>0</truncated>\n    <difficult>0</difficult>\n    <bndbox>\n      <xmin>433.61</xmin>\n      <ymin>326.44</ymin>\n      <xmax>473.28000000000003</xmax>\n      <ymax>213.88</ymax>\n    </bndbox>\n  </object>\n</annotation>\n'
b'<annotation>\n  <folder>images</folder>\n  <filename>000000523782.jpg</filename>\n  <size>\n    <width>640</width>\n    <height>480</height>\n    <depth>3</depth>\n  </size>\n  <segmented>0</segmented>\n  <object>\n    <name>bird</name>\n    <pose>Unspecified</pose>\n    <truncated>0</truncated>\n    <difficult>0</difficult>\n    <bndbox>\n      <xmin>260.27</xmin>\n      <ymin>244.51</ymin>\n      <xma

In [30]:

def xml_category(category):
    for folder in dataTypes:
        build_folder_xmls(folder,category)
def image_category(category):
    for folder in dataTypes:
        copy_category_images(folder,category)
def convert_category(category):
    xml_category(category)
    image_category(category)
    

In [31]:
convert_category("airplane") #not possible until all folders downloaded

loading annotations into memory...
Done (t=0.62s)
creating index...
index created!
b'<annotation>\n  <folder>images</folder>\n  <filename>000000208901.jpg</filename>\n  <size>\n    <width>640</width>\n    <height>427</height>\n    <depth>3</depth>\n  </size>\n  <segmented>0</segmented>\n  <object>\n    <name>airplane</name>\n    <pose>Unspecified</pose>\n    <truncated>0</truncated>\n    <difficult>0</difficult>\n    <bndbox>\n      <xmin>232.67</xmin>\n      <ymin>284.59000000000003</ymin>\n      <xmax>425.99</xmax>\n      <ymax>148.37</ymax>\n    </bndbox>\n  </object>\n</annotation>\n'
b'<annotation>\n  <folder>images</folder>\n  <filename>000000090631.jpg</filename>\n  <size>\n    <width>640</width>\n    <height>389</height>\n    <depth>3</depth>\n  </size>\n  <segmented>0</segmented>\n  <object>\n    <name>airplane</name>\n    <pose>Unspecified</pose>\n    <truncated>0</truncated>\n    <difficult>0</difficult>\n    <bndbox>\n      <xmin>176.77</xmin>\n      <ymin>228.1700000000000

FileNotFoundError: [Errno 2] No such file or directory: '../annotations/instances_train2014.json'