In [10]:
import os, time
import cv2
import xml.etree.ElementTree as ET
from PIL import Image
import numpy as np
import random
from pathlib import Path
SEED_XML_DIR = '/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/'
SEED_IMG_DIR = '/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/JPEGImages/'
GENE_XML_DIR = '/home/yyp/data/VOCdevkit_Person_7cls_20181202/FillteredObjXml/'
# RT:RightTop 
# LB:LeftBottom 
# bbox: [xmin, xax, ymin, ymax]
def IOU(bbox_a, bbox_b):
    '''
    W = min(A.RT.x, B.RT.x) - max(A.LB.x, B.LB.x) 
    H = min(A.RT.y, B.RT.y) - max(A.LB.y, B.LB.y) 
    if W <= 0 or H <= 0: 
        return 0 
    SA = (A.RT.x - A.LB.x) * (A.RT.y - A.LB.y) 
    SB = (B.RT.x - B.LB.x) * (B.RT.y - B.LB.y) 
    cross = W * H return cross/(SA + SB - cross)
    '''
    W = min(bbox_a[1], bbox_b[1]) - max(bbox_a[0], bbox_b[0]) 
    H = min(bbox_a[3], bbox_b[3]) - max(bbox_a[2], bbox_b[2]) 
    if W <= 0 or H <= 0: 
        return 0
    SA = (bbox_a[1] - bbox_a[0]) * (bbox_a[3] - bbox_a[2]) 
    SB = (bbox_b[1] - bbox_b[0]) * (bbox_b[3] - bbox_b[2])  
    cross = W * H 
    return cross/(SA + SB - cross)

def get_obj_from_xml(xml):
    in_file = open(xml_name)
    tree=ET.parse(in_file)
    root = tree.getroot()
    return [obj for obj in root.iter('object')]

def get_obj_from_image_file(file, bbox):
    img = cv2.imread(file)
    img_obj = img[int(bbox[2]):int(bbox[3]), int(bbox[0]):int(bbox[1])]
    return img_obj

def get_bboxes_from_etree(etree):
    root = etree.getroot()  
    objects = root.findall('object')
    bboxes = []
    for obj in objects:
        xmlbox = obj.find('bndbox')
        b = [float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)]
        bboxes.append(b)
    return bboxes

seed_xml_names = os.listdir(SEED_XML_DIR)
seed_xml_names.sort()

classes_ = ['bus', 'car', 'truck', 'motorbike', 'bicycle', 'person']
temp_index_array = [205, 206]

for xml_name in seed_xml_names:
         
    head = xml_name[:8]
    if(not head.isdigit()):
        continue
    if(not (int(head) in temp_index_array)):
        continue
    
    if(not xml_name.endswith('.xml')):
        continue
    
    #img_data = cv2.imread(SEED_IMG_DIR + xml_name[:-3] + 'jpg')
    print(SEED_XML_DIR + xml_name)
    in_file = open(SEED_XML_DIR + xml_name)
    tree=ET.parse(in_file)

    root = tree.getroot()
    size = root.find('size')   
    boxes = []
    b = []  
    wrong_detection_objs = [[393, 408, 52, 83]]

    array_object = root.findall('object')
    for obj in array_object:
        erro_obj_removed = False
        difficult = obj.find('difficult').text
        cls_ = obj.find('name').text
        if cls_ not in classes_ or int(difficult)==1:
            continue
        xmlbox = obj.find('bndbox')
        b = [float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)]
        for erro_bbox in wrong_detection_objs:
            if(IOU(erro_bbox, b) > 0.4):
                print(IOU(erro_bbox, b))
                print('removed')
                root.remove(obj)
                erro_obj_removed = True
                break
        if(erro_obj_removed):
            continue
        '''
        #filter size which smaller than 16X16 fit for SSD300
        bbox_width = b[1] - b[0]
        bbox_heigt = b[3] - b[2]
        if(not(bbox_width > 16 or bbox_heigt > 16)):
            print('remove:{0}:{1}'.format(bbox_width, bbox_heigt))
            root.remove(obj)
            continue 
        else:
            print('add:{0}:{1}'.format(bbox_width, bbox_heigt))
        '''    
    tree.write(GENE_XML_DIR + xml_name)

/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205000.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205001.xml
[397.0, 407.0, 50.0, 82.0]
removed
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205002.xml
[398.0, 406.0, 50.0, 81.0]
removed
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205003.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205004.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205005.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205006.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205007.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205008.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205009.xml
[394.0, 406.0, 49.0, 83.0]
removed
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/000002050

/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205221.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205222.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205223.xml
[394.0, 407.0, 52.0, 82.0]
removed
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205224.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205225.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205226.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205227.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205228.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205229.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205230.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205231.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotatio

/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205496.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205497.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205498.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205499.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205500.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205501.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205502.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205503.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205504.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205505.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205506.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205507.xml
/home/yyp/data/V

/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205868.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205869.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205870.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205871.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205872.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205873.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205874.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205875.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205876.xml
[394.0, 407.0, 48.0, 82.0]
removed
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205877.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000205878.xml
[390.0, 406.0, 52.0, 83.0]
removed
/home/yyp/data/VOCdevkit_Per

/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000206296.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000206297.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000206298.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000206299.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000206300.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000206301.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000206302.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000206303.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000206304.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000206305.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000206306.xml
/home/yyp/data/VOCdevkit_Person_7cls_20181202/VOC2007/Annotations/00000206307.xml
/home/yyp/data/V