## Import Libraries

In [None]:
!pip install albumentations -U #https://albumentations.ai/docs/getting_started/installation/

In [3]:
import albumentations as A
import cv2
import pandas as pd
from PIL import Image, ImageDraw, ImageFont
import re, glob
from six import BytesIO
import numpy as np
import os
from matplotlib.image import imread
import seaborn as sns
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET

import warnings
warnings.filterwarnings("ignore")

## Load the Training Set (from PascalVOC to DataFrame)

In [4]:
# training set
df_train = pd.DataFrame(columns=['type','path','label','XMin','YMin','XMax','YMax'])
df_train.head()

# path to the image files
path_to_images = "F:/Downloads/projects/hand-gestures/data_for_object_detection/data/"

# path to the annotation files (in xml format)
directory_in_str = "F:/Downloads/projects/hand-gestures/data_for_object_detection/xml/"
directory = os.fsencode(directory_in_str)

for file in os.listdir(directory):
    filename = os.fsdecode(file)

    if filename.endswith(".xml"):
        name = os.path.splitext(filename)[0]
        path = directory_in_str + filename
        
        # X divided by the width
        # Y divided by the height
        # how to calculate normalized coordinates: https://stackoverflow.com/questions/48915003/get-the-bounding-box-coordinates-in-the-tensorflow-object-detection-api-tutorial

        tree = ET.parse(path)
        root = tree.getroot()
        image_name = path_to_images + root.find('filename').text
        
        for size in root.findall('size'):
            width = int(size.find('width').text)
            height = int(size.find('height').text)
            
        for object in root.findall('object'):
            label_type = object.find('name').text
            for bndbox in object.findall('bndbox'):
                xmin = int(bndbox.find('xmin').text) / width
                xmax = int(bndbox.find('xmax').text) / width
                ymin = int(bndbox.find('ymin').text) / height
                ymax = int(bndbox.find('ymax').text) / height
                
                new_row = {
                 'type':"TRAIN",
                 'path':image_name,
                 'label':label_type,
                 'XMin':xmin,
                 'YMin':ymin,
                 'XMax':xmax,
                 'YMax':ymax,
                 }

                df_train = df_train.append(new_row, ignore_index=True)

df_train.head(2)

Unnamed: 0,type,path,label,XMin,YMin,XMax,YMax
0,TRAIN,F:/Downloads/projects/hand-gestures/data_for_o...,option3,0.7365,0.274536,0.821,0.526525
1,TRAIN,F:/Downloads/projects/hand-gestures/data_for_o...,option3,0.247,0.365341,0.4655,0.88072


## Define Transformations

In [5]:
transform_0 = A.Compose([
    A.SafeRotate(p=1, limit=[-90, -90]),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
    A.PixelDropout(p=0.5),
], bbox_params=A.BboxParams(format='albumentations', min_area=1024, min_visibility=0.6))

transform_1 = A.Compose([
    A.SafeRotate(p=1, limit=[90, 90]),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
    A.PixelDropout(p=0.5),
], bbox_params=A.BboxParams(format='albumentations', min_area=1024, min_visibility=0.6))

transform_2 = A.Compose([
    A.HorizontalFlip(p=1),
    A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
    A.PixelDropout(p=0.5),
], bbox_params=A.BboxParams(format='albumentations', min_area=1024, min_visibility=0.6))

transforms = [transform_0, transform_1, transform_2]

## Run Image Augmentation for Each Transformation

In [18]:
from matplotlib.image import imread

for index, row in df_train.iterrows():
    image = imread(row.path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    file_path, file_extension = os.path.splitext(row.path)
    file_name = os.path.basename(file_path)
    
    bboxes = []
    bboxes.append([row.XMin, row.YMin, row.XMax, row.YMax, row.label])
    
    i = 0
    for transform in transforms:
        transformed = transform(image=image, bboxes=bboxes)
        transformed_image = transformed['image']
        transformed_bboxes = transformed['bboxes']

        augmented_img_path = 'F:/Downloads/projects/hand-gestures/data_for_object_detection/augmented/data/'+file_name+'_a'+str(i)+file_extension
        cv2.imwrite(augmented_img_path, transformed_image)

        tree = ET.parse('F:/Downloads/projects/hand-gestures/data_for_object_detection/xml/'+file_name+'.xml')
        root = tree.getroot()

        height, width, _ = transformed_image.shape

        for size in root.findall('size'):
            size.find('width').text = str(width)
            size.find('height').text = str(height)

        xmin, ymin, xmax, ymax, label = transformed_bboxes[0]
        for object in root.findall('object'):
            label_type = object.find('name').text
            for bndbox in object.findall('bndbox'):
                bndbox.find('xmin').text = str(xmin * width)
                bndbox.find('xmax').text = str(xmax * width)
                bndbox.find('ymin').text = str(ymin * height)
                bndbox.find('ymax').text = str(ymax * height)

        tree.write('F:/Downloads/projects/hand-gestures/data_for_object_detection/augmented/xml/'+file_name+'_a'+str(i)+'.xml')
        
        i+=1

## Load the Augmented Training Set into a DataFrame

In [21]:
# training set
df_train = pd.DataFrame(columns=['type','path','label','XMin','YMin','XMax','YMax'])
df_train.head()

# path to the image files
path_to_images = "F:/Downloads/projects/hand-gestures/data_for_object_detection/augmented/merged/data/"

# path to the annotation files (in xml format)
directory_in_str = "F:/Downloads/projects/hand-gestures/data_for_object_detection/augmented/merged/xml/"
directory = os.fsencode(directory_in_str)

for file in os.listdir(directory):
    filename = os.fsdecode(file)

    if filename.endswith(".xml"):
        name = os.path.splitext(filename)[0]
        path = directory_in_str + filename
        
        # X divided by the width
        # Y divided by the height
        # how to calculate normalized coordinates: https://stackoverflow.com/questions/48915003/get-the-bounding-box-coordinates-in-the-tensorflow-object-detection-api-tutorial

        tree = ET.parse(path)
        root = tree.getroot()
        image_name = path_to_images + root.find('filename').text
        
        for size in root.findall('size'):
            width = int(size.find('width').text)
            height = int(size.find('height').text)
            
        for object in root.findall('object'):
            label_type = object.find('name').text
            for bndbox in object.findall('bndbox'):
                xmin = int(float(bndbox.find('xmin').text)) / width
                xmax = int(float(bndbox.find('xmax').text)) / width
                ymin = int(float(bndbox.find('ymin').text)) / height
                ymax = int(float(bndbox.find('ymax').text)) / height
                
                new_row = {
                 'type':"TRAIN",
                 'path':image_name,
                 'label':label_type,
                 'XMin':xmin,
                 'YMin':ymin,
                 'XMax':xmax,
                 'YMax':ymax,
                 }

                df_train = df_train.append(new_row, ignore_index=True)

df_train.head(2)

Unnamed: 0,type,path,label,XMin,YMin,XMax,YMax
0,TRAIN,F:/Downloads/projects/hand-gestures/data_for_o...,option3,0.7365,0.274536,0.821,0.526525
1,TRAIN,F:/Downloads/projects/hand-gestures/data_for_o...,option3,0.2745,0.736074,0.5265,0.820955


## Count of Each Class after Augmentation

In [23]:
df_train.label.value_counts().to_frame()

Unnamed: 0,label
option3,404
back,400
ok,400
option1,400
option2,400
option4,400
thumb,400
punch,400


In [25]:
df_train.label.value_counts().to_frame().sum()[0]

3204

## Count of File Types

In [31]:
import collections
import os

cnt = collections.Counter()
def get_file_format_count():
    for root_dir, sub_dirs, files in os.walk("F:/Downloads/projects/hand-gestures/data_for_object_detection/augmented/merged/data/"):
        for filename in files:
            name, ext = os.path.splitext(filename)
            cnt[ext] += 1
    return cnt

get_file_format_count()

Counter({'.jpg': 121, '.xml': 3204, '.JPG': 3288, '.jpeg': 8, '.JPEG': 428})