In [23]:
import os
from glob import glob
import pandas as pd
from functools import reduce
from xml.etree import ElementTree as et

In [24]:
xml_list = glob("./images/data_images/*.xml")
xml_list = list(map(lambda x :x.replace("\\","/"),xml_list))
# xml_list

In [25]:
# extract filename, size(width, height), object(name, xmin, xmax, ymin, ymax)
def extract_text(filename):
    tree = et.parse(filename)
    root = tree.getroot()

    # extract filename
    image_name = root.find('filename').text

    # width and height
    width = root.find('size').find('width').text
    height = root.find('size').find('height').text

    # objects
    objects = root.findall('object')
    
    parser = []
    for obj in objects:
        name = obj.find("name").text
        xmin = obj.find("bndbox").find("xmin").text
        xmax = obj.find("bndbox").find("xmax").text
        ymin = obj.find("bndbox").find("ymin").text
        ymax = obj.find("bndbox").find("ymax").text
        parser.append([image_name, width, height, name, xmin, xmax, ymin, ymax])
    
    return parser

In [26]:
parser_all = list(map(extract_text, xml_list))

In [27]:
len(parser_all)

5012

In [28]:
data = reduce(lambda x,y : x+y, parser_all)

In [30]:
len(data)

15663

In [31]:
df = pd.DataFrame(data, columns=['filename', 'width', 'height', 'name', 'xmin', 'xmax','ymin', 'ymax'])

In [32]:
df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax
0,000001.jpg,1024,657,car,14,301,335,522
1,000001.jpg,1024,657,car,269,571,345,489
2,000001.jpg,1024,657,car,502,798,342,450
3,000001.jpg,1024,657,car,709,1009,333,438
4,000002.jpg,800,600,car,41,768,240,497


In [33]:
df.shape

(15663, 8)

In [34]:
df['name'].value_counts()

name
person         5447
car            1650
chair          1427
bottle          634
pottedplant     625
bird            599
dog             538
sofa            425
bicycle         418
horse           406
boat            398
motorbike       390
cat             389
tvmonitor       367
cow             356
sheep           353
aeroplane       331
train           328
diningtable     310
bus             272
Name: count, dtype: int64

In [36]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15663 entries, 0 to 15662
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   filename  15663 non-null  object
 1   width     15663 non-null  object
 2   height    15663 non-null  object
 3   name      15663 non-null  object
 4   xmin      15663 non-null  object
 5   xmax      15663 non-null  object
 6   ymin      15663 non-null  object
 7   ymax      15663 non-null  object
dtypes: object(8)
memory usage: 979.1+ KB


In [37]:
#type conversion
cols = ['width','height','xmin','xmax','ymin','ymax']
df[cols] = df[cols].astype(int)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15663 entries, 0 to 15662
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   filename  15663 non-null  object
 1   width     15663 non-null  int32 
 2   height    15663 non-null  int32 
 3   name      15663 non-null  object
 4   xmin      15663 non-null  int32 
 5   xmax      15663 non-null  int32 
 6   ymin      15663 non-null  int32 
 7   ymax      15663 non-null  int32 
dtypes: int32(6), object(2)
memory usage: 612.0+ KB


In [38]:
# center x, center y
df["center_x"] = ((df["xmax"] + df["xmin"]) / 2) / df["width"]
df["center_y"] = ((df["ymax"] + df["ymin"]) / 2) / df["height"]

# w , h
df["w"] = (df["xmax"] - df["xmin"]) / df["width"]
df["h"] = (df["ymax"] - df["ymin"]) / df["height"]

In [39]:
df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h
0,000001.jpg,1024,657,car,14,301,335,522,0.153809,0.652207,0.280273,0.284627
1,000001.jpg,1024,657,car,269,571,345,489,0.410156,0.634703,0.294922,0.219178
2,000001.jpg,1024,657,car,502,798,342,450,0.634766,0.60274,0.289062,0.164384
3,000001.jpg,1024,657,car,709,1009,333,438,0.838867,0.586758,0.292969,0.159817
4,000002.jpg,800,600,car,41,768,240,497,0.505625,0.614167,0.90875,0.428333


In [44]:
images = df['filename'].unique()

In [48]:
img_df = pd.DataFrame(images, columns=['filename'])
img_train = tuple(img_df.sample(frac=0.8)['filename'])
img_test = tuple(img_df.query(f"filename not in {img_train}")["filename"])

In [49]:
len(img_train), len(img_test)

(4010, 1002)

In [50]:
train_df = df.query(f'filename in {img_train}')
test_df = df.query(f'filename in {img_test}')

In [51]:
train_df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h
6,000007.jpg,500,333,car,141,500,50,330,0.641,0.570571,0.718,0.840841
12,000016.jpg,334,500,bicycle,92,305,72,473,0.594311,0.545,0.637725,0.802
13,000017.jpg,480,364,person,185,279,62,199,0.483333,0.358516,0.195833,0.376374
14,000017.jpg,480,364,horse,90,403,78,336,0.513542,0.568681,0.652083,0.708791
15,000019.jpg,500,375,cat,231,483,88,256,0.714,0.458667,0.504,0.448


In [52]:
test_df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h
0,000001.jpg,1024,657,car,14,301,335,522,0.153809,0.652207,0.280273,0.284627
1,000001.jpg,1024,657,car,269,571,345,489,0.410156,0.634703,0.294922,0.219178
2,000001.jpg,1024,657,car,502,798,342,450,0.634766,0.60274,0.289062,0.164384
3,000001.jpg,1024,657,car,709,1009,333,438,0.838867,0.586758,0.292969,0.159817
4,000002.jpg,800,600,car,41,768,240,497,0.505625,0.614167,0.90875,0.428333


In [61]:
labels = {}
for i , label in enumerate(df['name'].unique()):
    labels.update({label:i})

def label_encoding(x):
    return labels[x]

In [63]:
train_df.loc[:, "id"] = train_df["name"].apply(label_encoding)
test_df.loc[:, "id"] = test_df["name"].apply(label_encoding)

In [64]:
train_df.head(10)

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h,id
6,000007.jpg,500,333,car,141,500,50,330,0.641,0.570571,0.718,0.840841,0
12,000016.jpg,334,500,bicycle,92,305,72,473,0.594311,0.545,0.637725,0.802,3
13,000017.jpg,480,364,person,185,279,62,199,0.483333,0.358516,0.195833,0.376374,2
14,000017.jpg,480,364,horse,90,403,78,336,0.513542,0.568681,0.652083,0.708791,1
15,000019.jpg,500,375,cat,231,483,88,256,0.714,0.458667,0.504,0.448,4
16,000019.jpg,500,375,cat,11,266,113,259,0.277,0.496,0.51,0.389333,4
17,000020.jpg,375,500,car,33,371,148,416,0.538667,0.564,0.901333,0.536,0
18,000021.jpg,336,500,dog,1,182,235,388,0.272321,0.623,0.53869,0.306,5
19,000021.jpg,336,500,person,210,336,36,482,0.8125,0.518,0.375,0.892,2
20,000021.jpg,336,500,person,46,170,82,365,0.321429,0.447,0.369048,0.566,2


Save Image and label as text

In [65]:
import os
from shutil import move

In [73]:
train_folder = "./images/data_images/train"
test_folder = "./images/data_images/test"

os.makedirs(train_folder, exist_ok=True)
os.makedirs(test_folder, exist_ok= True)

In [74]:
cols = ['filename','id','center_x','center_y','w','h']
groupby_obj_train = train_df[cols].groupby('filename')
groupby_obj_test = test_df[cols].groupby("filename")

In [89]:
def save_data(filename, folder_path, groupby_obj):
    src = os.path.join("images/data_images", filename)
    dst = os.path.join(folder_path, filename)
    move(src, dst)

    # save the labels
    text_filename = os.path.join(folder_path, os.path.splitext(filename)[0] + ".txt")
    groupby_obj.get_group(filename).set_index("filename").to_csv(
        text_filename, sep=" ", index=False, header=False
    )

In [90]:
filename_series = pd.Series(groupby_obj_train.groups.keys())

In [93]:
filename_series.apply(save_data, args=(train_folder, groupby_obj_train))

0       None
1       None
2       None
3       None
4       None
        ... 
4005    None
4006    None
4007    None
4008    None
4009    None
Length: 4010, dtype: object

In [95]:
filename_series_test = pd.Series(groupby_obj_test.groups.keys())
filename_series_test.apply(save_data, args=(test_folder, groupby_obj_test))

0       None
1       None
2       None
3       None
4       None
        ... 
997     None
998     None
999     None
1000    None
1001    None
Length: 1002, dtype: object