In [1]:
import os 
import yaml
import torch
import shutil
import numpy as np 
import pandas as pd 
from tqdm import tqdm
import matplotlib.pyplot as plt 
import xml.etree.ElementTree as ET
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
path = os.getcwd()
ANNOTATIONS_PATH = os.path.join(path, 'data', 'annotations')

In [4]:
filenames = []

size_props = {
    'height':[],
    'width':[]
}

bounding_box_props = {
    'xmin':[],
    'ymin':[],
    'xmax':[],
    'ymax':[]
}

In [5]:

for file in tqdm(os.listdir(ANNOTATIONS_PATH)):
    annotation = ET.parse(os.path.join(ANNOTATIONS_PATH, file))
    filenames.append(os.path.join(ANNOTATIONS_PATH, file))
    size = annotation.find('size')
    
    for name, prop_list in size_props.items():
        prop_value = size.find(name).text
        size_props[name].append(int(prop_value))
    bounding_box = annotation.find('object').find('bndbox')

    for name, prop_list in bounding_box_props.items():
        prop_value = bounding_box.find(name).text
        bounding_box_props[name].append(int(prop_value))

100%|███████████████████████████████████████████████████████████████████████████████| 433/433 [00:01<00:00, 273.71it/s]


In [6]:
df = pd.DataFrame({
    'file':filenames,
    'width':size_props['width'],
    'height':size_props['height'],
    'xmin':bounding_box_props['xmin'],
    'ymin':bounding_box_props['ymin'],
    'xmax':bounding_box_props['xmax'],
    'ymax':bounding_box_props['ymax']
})

# Making annotations compatible with YOLOv5

In [7]:
df['center_x'] = (df['xmax'] + df['xmin'])/(2*df['width'])
df['center_y'] = (df['ymax'] + df['ymin'])/(2*df['height'])

df['bb_width'] = (df['xmax'] - df['xmin'])/df['width']
df['bb_height'] = (df['ymax'] - df['ymin'])/df['height']

In [8]:
# Keeping important columns only 
yolo_df = df[['file', 'center_x', 'center_y', 'bb_width', 'bb_height']]
# Performing 70-15-15 split
test_size = int(0.15 * len(df))

df_train, df_test = train_test_split(yolo_df, test_size=test_size)
df_train, df_val = train_test_split(df_train, test_size=test_size)

In [9]:
TRAIN_PATH = os.path.join(path, 'data', 'train')
VAL_PATH= os.path.join(path, 'data', 'val')
TEST_PATH = os.path.join(path, 'data', 'test')

IMAGES_PATH =  os.path.join(path, 'data', 'images')

if not os.path.exists(TRAIN_PATH):
    os.makedirs(TRAIN_PATH)
    print('Made folder for train set')

if not os.path.exists(VAL_PATH):
    os.makedirs(VAL_PATH)
    print('Made folder for val set')

if not os.path.exists(TEST_PATH):
    os.makedirs(TEST_PATH)
    print('Made folder for test set')

In [10]:
print('Moving images for train set')
for _, row  in tqdm(df_train.iterrows()):
    annotation_path = row['file']
    image_name = os.path.split(annotation_path)[-1].replace('.xml','')
    image_src = os.path.join(IMAGES_PATH, f'{image_name}.png')
    image_dst = os.path.join(TRAIN_PATH, f'{image_name}.png')
    shutil.copy2(image_src, image_dst)
    label_text = f"0 {row['center_x']} {row['center_y']} {row['bb_width']} {row['bb_height']}"
    with open(os.path.join(TRAIN_PATH, f'{image_name}.txt'), 'w') as f:
        f.write(label_text)
print('Done moving images for train set')

print('Moving images for val set')
for _, row  in tqdm(df_val.iterrows()):
    annotation_path = row['file']
    image_name = os.path.split(annotation_path)[-1].replace('.xml','')
    image_src = os.path.join(IMAGES_PATH, f'{image_name}.png')
    image_dst = os.path.join(VAL_PATH, f'{image_name}.png')
    shutil.copy2(image_src, image_dst)
    label_text = f"0 {row['center_x']} {row['center_y']} {row['bb_width']} {row['bb_height']}"
    with open(os.path.join(VAL_PATH, f'{image_name}.txt'), 'w') as f:
        f.write(label_text)
print('Done moving images for val set')

print('Moving images for test set')
for _, row  in tqdm(df_test.iterrows()):
    annotation_path = row['file']
    image_name = os.path.split(annotation_path)[-1].replace('.xml','')
    image_src = os.path.join(IMAGES_PATH, f'{image_name}.png')
    image_dst = os.path.join(TEST_PATH, f'{image_name}.png')
    shutil.copy2(image_src, image_dst)
    label_text = f"0 {row['center_x']} {row['center_y']} {row['bb_width']} {row['bb_height']}"
    with open(os.path.join(TEST_PATH, f'{image_name}.txt'), 'w') as f:
        f.write(label_text)
print('Done moving images for test set')

Moving images for train set


305it [00:01, 156.87it/s]


Done moving images for train set
Moving images for val set


64it [00:00, 158.49it/s]


Done moving images for val set
Moving images for test set


64it [00:00, 150.86it/s]

Done moving images for test set





In [11]:
# Cloning the ultralytics yolo repository
! git clone https://github.com/ultralytics/yolov5.git

fatal: destination path 'yolov5' already exists and is not an empty directory.


In [12]:
! pip install -r yolov5/requirements.txt



In [13]:
data = {
    'names':['License Plate'],
    'nc':1,
    'train':os.path.abspath(TRAIN_PATH),
    'val':os.path.abspath(VAL_PATH)
}

with open('data.yaml', 'w') as f:
    yaml.dump(data, f)

In [2]:
!python ./yolov5/train.py --data ./LicensePlate.yaml  --batch-size 8  --epochs 80 --weights yolov5/yolov5s.pt