In [37]:
import json
import os
import glob
import pandas as pd
import argparse
import xml.etree.ElementTree as ET
import ntpath
import shutil
import cv2

In [32]:
input_datadir = '../data'
merge_dir = '../mergedData'


def xml_to_csv(xml_dir):
    """Reads all XML files, generated by labelImg, from a directory and generates a dataframe"""
    annotations = []
    for xml_file in glob.glob(xml_dir + '/*.xml'):
        try:
            tree = ET.parse(xml_file)
            root = tree.getroot()
            for member in root.findall('object'):
                value = (path_leaf(xml_file)[:-4]+'.jpg',
                         int(root.find('size')[0].text),
                         int(root.find('size')[1].text), member[0].text,
                         int(member[4][0].text), int(member[4][1].text),
                         int(member[4][2].text), int(member[4][3].text))
                annotations.append(value)
        except Exception as e:
            print('xml_to_csv for {} failed'.format(xml_file))
    column_name = [
        'filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax'
    ]
    
    xml_df = pd.DataFrame(annotations, columns=column_name)
    print("Following classes existe in the annotation:", xml_df['class'].unique())
    return xml_df

def path_leaf(path):
    head, tail = ntpath.split(path)
    return tail or ntpath.basename(head)


In [41]:
input_datadir = '../data'
merge_dir = '../mergedData'


def xml_to_csv(jpg_dir,xml_dir):
    """Reads all XML files, generated by labelImg, from a directory and generates a dataframe"""
    annotations = []
    for xml_file in glob.glob(xml_dir + '/*.xml'):
        
        tree = ET.parse(xml_file)
        root = tree.getroot()
        
        fn = path_leaf(xml_file)[:-4]+'.jpg'
        size_element = root.find('size')
        if size_element:
            width = int(size_element.find('width').text)
            height = int(size_element.find('height').text)
        else:
            img_path = os.path.join(jpg_dir,fn)
            img = cv2.imread(img_path)  
            height, width,_ = img.shape

        for member in root.findall('object'):
            cl_name = member.find('name').text
            bndbox_element = member.find('bndbox')
            xmin = int(bndbox_element.find('xmin').text)
            ymin = int(bndbox_element.find('ymin').text)
            xmax = int(bndbox_element.find('xmax').text)
            ymax = int(bndbox_element.find('ymax').text)
            value = (fn,width,height, 
                     cl_name,xmin, ymin, xmax, ymax)
            annotations.append(value)

    column_name = [
        'filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax'
    ]
    
    xml_df = pd.DataFrame(annotations, columns=column_name)
    print("Following classes existe in the annotation:", xml_df['class'].unique())
    return xml_df

def path_leaf(path):
    head, tail = ntpath.split(path)
    return tail or ntpath.basename(head)

In [42]:
def create_lable_mapping_template(input_datadir, lb_mapping_fn='merge_label_mapping.json'):
    """
        create json file to map class name in different annotations directoryes
    """
    config_file = os.path.join(input_datadir,lb_mapping_fn) 

    txt = '{' 
    for data_dir in [x[1] for x in os.walk(input_datadir)][0]:       
        print(data_dir)
        txt = txt+ f'\n\t"{data_dir}":'
        txt = txt+'\n\t{'

        xml_dir = os.path.join(input_datadir, data_dir, 'Annotations')
        jpg_dir = os.path.join(input_datadir, data_dir, 'JPEGImages')
        xml_df = xml_to_csv(jpg_dir,xml_dir)
        csv_outfile = os.path.join(input_datadir, data_dir,data_dir + '.csv')
        xml_df.to_csv(csv_outfile,index=False)

        for cl in list(xml_df['class'].unique()):
            txt = txt+'\n\t\t"'+cl+'":'+'"'+cl+'",'

        txt = txt[:-1]

        txt = txt+ '},'
    txt = txt[:-1]
    txt = txt+ '\n}'   

    with open(config_file, "w+") as f:
        f.write(txt)

In [44]:
create_lable_mapping_template(input_datadir)

data1
Following classes existe in the annotation: ['face' 'face_mask']
data1_test
Following classes existe in the annotation: ['face_mask' 'face' 'face_nask']
data2
Following classes existe in the annotation: ['unmask' 'mask']
datavideo
Following classes existe in the annotation: ['mask' 'unmask']


In [45]:
def merge_image_files(input_datadir, merge_dir):
    """
    Add folder name as prefix to files in  "JPEGImages" folders, generated by lableImage
    """
    merge_image_dir = os.path.join(merge_dir,'JPEGImages')
    if not os.path.exists(merge_image_dir):
        os.makedirs(merge_image_dir)

    data_subdir_ls = [x[1] for x in os.walk(input_datadir)][0]
    for datasubdir in data_subdir_ls:
        datasubdir_path = os.path.join(input_datadir, datasubdir,'JPEGImages')
        for fn in os.listdir(datasubdir_path):
            if fn.endswith(('jpg','JPG','png','PNG')):
                shutil.copy(os.path.join(datasubdir_path,fn),os.path.join(merge_image_dir,datasubdir+'_'+fn[:-4]+'.jpg'))


In [46]:
def merge_csv(input_datadir, merge_dir,lb_mapping_fn = 'merge_label_mapping.json',merged_fn = "Annotation.csv"):
    lb_mapping_path = os.path.join(input_datadir, lb_mapping_fn)
    with open(lb_mapping_path) as f:
        lb_mappings = json.load(f)

    csv_df_ls = []
    for data_dir in [x[1] for x in os.walk(input_datadir)][0]: 
        csv_path = os.path.join(input_datadir,data_dir, data_dir+'.csv')
        csv_df = pd.read_csv(csv_path)

        # mapping class name
        lb_mapping_dict = lb_mappings[data_dir]
        lb_mapping_dict = {k:v for k,v in lb_mapping_dict.items() if k!=v}
        if lb_mapping_dict:
            csv_df['class'] = csv_df['class'].replace(lb_mapping_dict)

        # change filename with prefix
        csv_df['filename'] = csv_df['filename'].apply(lambda c: data_dir+'_'+c)

        csv_df_ls.append(csv_df)

    merge_df = pd.concat(csv_df_ls)
    
    if not os.path.exists(merge_dir):
        os.makedirs(merge_dir)
        
    merge_csv_path = os.path.join(merge_dir, merged_fn)
    merge_df.to_csv(merge_csv_path, index=False)
    
    

In [47]:
merge_csv(input_datadir, merge_dir)
merge_image_files(input_datadir, merge_dir)