## Convert WebCamT labels to yolo

The purpose of this notebook is to translate the original WebCamT dataset labels into yolo format.  There is a significant difference between the two; the original look like they are in pascal voc xml, containing many containers and tags.  Yolo format contains X & Y coordinates, along with height, width, and object type for each annotation on a single line.  The coordinates themselves are in a different format and will also be converted. 

In [None]:
# TODO: Make this a script that will accept a directory and execute on all subdirectories with an argument

In [8]:
import numpy as np
import pandas as pd
import xml.etree.ElementTree as ET
import os
import shutil

In [51]:
working_directory = '~/DSI/projects/tvd/data/wcbatch4/labels/train/398-20160704-15'

### Breaking down XML into lists of vehicle tags

In [52]:
def extract_from_webcamt(file):

    # Create XML tree
    tree = ET.parse(file)
    root = tree.getroot()

    # extract vehicle tags
    vehs = root.findall('vehicle')
    xmax_list = [int(veh[1][0].text) for veh in vehs]
    xmin_list = [int(veh[1][1].text) for veh in vehs]
    ymax_list = [int(veh[1][2].text) for veh in vehs]
    ymin_list = [int(veh[1][3].text) for veh in vehs]

    # convert coordinates from bounding box to midpoint & size
    xmid_list = (np.array(xmax_list)+np.array(xmin_list)) / 704 # midpoint / 352 for ratio distance
    ymid_list = (np.array(ymax_list)+np.array(ymin_list)) / 480 # midpoint / 240 for ratio distance
    xsize_list = (np.array(xmax_list) - np.array(xmin_list)) / 352
    ysize_list = (np.array(ymax_list) - np.array(ymin_list)) / 240

    # create an array containing new coords
    out = np.empty((0,5))
    for i in range(len(vehs)):
        each_mark = np.array([[int(0), xmid_list[i], ymid_list[i], xsize_list[i], ysize_list[i]]])
        #print(each_mark)
        out = np.concatenate((out, each_mark), axis=0)
    return(out)

In [53]:
# To reference home dir / from deitrich epp
def cd(path):
    os.chdir(os.path.expanduser(path))

### Navigating a directory

In [54]:
cd(working_directory)

In [55]:
if 'xml_backup' not in os.listdir():
    os.mkdir('xml_backup')
print('making directory ./xml_backup')
for item in os.listdir():

    # test if called on an xml file first
    if item[-4::]=='.xml':
        
        # use function created above to extract annotations
        try:
            annotations = extract_from_webcamt(item)
        except:
            print(f'error extracting {item}')
        
        # moving old annots to backup dir
        shutil.move(item, './xml_backup/'+item)
        
        #write annotations to new file given same name as the original xml
        filename = item[:-4]+'.txt'
        annotfile = open(filename, 'w+')
        for i in range(len(annotations)):
            annotfile.write(str(annotations[i])[1:-1]+'\n')
        annotfile.close()
        #print (filename)
print('done')

making directory ./xml_backup
done
