In [None]:
import albumentations as A
import cv2
import pandas as pd
from PIL import Image, ImageDraw, ImageFont
import re, glob
from six import BytesIO
import numpy as np
import os
from matplotlib.image import imread
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# path to csv with original dataset
path_to_csv = "sidehelper.csv"

# read csv dataset
df = pd.read_csv(path_to_csv, header=None)

# the original dataset has no columns; add column names
df.rename(columns={0: "type", 1: "path", 2: "label", 3: "XMin",
                   4: "YMin", 5: "XMax", 6: "YMin", 7: "XMax",
                   8: "YMax", 9: "XMin", 10: "YMax"}, inplace=True)

# apply image augmentation only to the training set
df_train = df[df['type'] == "TRAIN"].copy()
df_train

In [None]:
# create an empty data frame for first round of image augmentation
df_augmented_set0 = pd.DataFrame()

In [None]:
# apply transformation properties
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
    A.PixelDropout(p=0.5),
    A.ToGray(p=0.3),
], bbox_params=A.BboxParams(format='albumentations', min_area=1024, min_visibility=0.6))

# p = probability of the transformation being applied
# format = albumentations; read more here: https://albumentations.ai/docs/getting_started/bounding_boxes_augmentation/
# min_visibility = ensures more than half of the object is visible in augmented image

In [None]:
# group dataframe by image path;
# since there may be 2 or more objects in one image, we don't want to augment same image more than once
grouped = df_train.groupby('path')

# iterate through each image path
for path, group in grouped:
    # read image
    image = cv2.imread(path)
    
    # extract file_path and extension
    file_path, file_extension = os.path.splitext(path)
    
    # extract file name from path
    file_name = os.path.basename(file_path)
    
    # create new image name to save as
    # I changed the path from the original directory of images so they are not mixed up
    augmented_img_path = 'some_path/'+file_name+"_set0"+file_extension

    # to store bounding boxes for transforming
    bboxes = []
    
    # iterate through each object in the image
    for index, row in group.iterrows():
        
        # store the coordinates & label in the list
        xmin = row['XMin'].iloc[0]
        ymin = row['YMin'].iloc[0]
        xmax = row['XMax'].iloc[0]
        ymax = row['YMax'].iloc[0]
        label = row['label']
        bboxes.append([xmin, ymin, xmax, ymax, label])
    
    try:
        # do the actual augmentation and store the augmented image and properties
        transformed = transform(image=image, bboxes=bboxes)
        transformed_image = transformed['image']
        transformed_bboxes = transformed['bboxes']
    except:
        # some images cant be read by cv2, I found that matplotlib.image works for those
        image = imread(path)
        transformed = transform(image=image, bboxes=bboxes)
        transformed_image = transformed['image']
        transformed_bboxes = transformed['bboxes']
    
    # retrieve augmented image height & width
    img_h, img_w, _ = transformed_image.shape
    
    # if the tranformation was unsuccessful, the length of transformed_bboxes will be 0;
    # we don't want to store original image if not augmented
    if (len(transformed_bboxes) != 0):
        
        # if there were more than one object in the image, we have to iterate through each object
        # and store it in the dictionary; then store it in the empty dataframe we created to store augmented image details
        for i in range(len(transformed_bboxes)):
            xmin, ymin, xmax, ymax, label = transformed_bboxes[i]
            new_row = {
                 'type':"TRAIN",
                 'path':augmented_img_path,
                 'label':label,
                 'XMin':xmin,
                 'YMin':ymin,
                 'XMax':xmax,
                 'YMax':ymax,
                 }
            df_augmented_set0 = df_augmented_set0.append(new_row, ignore_index=True)
            
        # write the transformed image to path
        cv2.imwrite(augmented_img_path, transformed_image)

In [None]:
# to store augmented images from round 2
df_augmented_set1 = pd.DataFrame()

In [None]:
# the only difference is we crop the image this time
# I made four transform properties because some images are smaller than 640x640, etc. So we crop it to even lower resolution
# width and height accepts int only, so we cannot supply %

transform1 = A.Compose([
    A.RandomCrop(width=640, height=640),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
    A.PixelDropout(p=0.5),
    A.ToGray(p=0.3),
], bbox_params=A.BboxParams(format='albumentations', min_area=1024, min_visibility=0.6))

transform2 = A.Compose([
    A.RandomCrop(width=600, height=600),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
    A.PixelDropout(p=0.5),
    A.ToGray(p=0.3),
], bbox_params=A.BboxParams(format='albumentations', min_area=1024, min_visibility=0.6))

transform3 = A.Compose([
    A.RandomCrop(width=480, height=480),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
    A.PixelDropout(p=0.5),
    A.ToGray(p=0.3),
], bbox_params=A.BboxParams(format='albumentations', min_area=1024, min_visibility=0.6))

transform4 = A.Compose([
    A.RandomCrop(width=320, height=320),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
    A.PixelDropout(p=0.5),
    A.ToGray(p=0.3),
], bbox_params=A.BboxParams(format='albumentations', min_area=1024, min_visibility=0.6))

In [None]:
grouped = df_train.groupby('path')

for path, group in grouped:
    try:
        image = cv2.imread(path)
        image_h, image_w, _ = image.shape
    except AttributeError:
        image = imread(path)
        image_h, image_w, _ = image.shape    
    
    if (image_h >= 320 and image_w >= 320):
        file_path, file_extension = os.path.splitext(path)
        file_name = os.path.basename(file_path)
        filepath = 'E:/Images/SideHelper-Augmented/'+file_name+"_set1"+file_extension

        bboxes = []
        for index, row in group.iterrows():
            xmin = row['XMin'].iloc[0]
            ymin = row['YMin'].iloc[0]
            xmax = row['XMax'].iloc[0]
            ymax = row['YMax'].iloc[0]
            label = row['label']
            bboxes.append([xmin, ymin, xmax, ymax, label])
            
        try:
            if (image_w < 480 or image_h < 480):
                transformed = transform4(image=image, bboxes=bboxes)
            elif (image_w < 600 or image_h < 600):
                transformed = transform3(image=image, bboxes=bboxes)
            elif (image_w < 640 or image_h < 640):
                transformed = transform2(image=image, bboxes=bboxes)
            else:
                transformed = transform1(image=image, bboxes=bboxes)
        except:
            image = imread(path)
            if (image_w < 480 or image_h < 480):
                transformed = transform4(image=image, bboxes=bboxes)
            elif (image_w < 600 or image_h < 600):
                transformed = transform3(image=image, bboxes=bboxes)
            elif (image_w < 640 or image_h < 640):
                transformed = transform2(image=image, bboxes=bboxes)
            else:
                transformed = transform1(image=image, bboxes=bboxes)

        transformed_image = transformed['image']
        transformed_bboxes = transformed['bboxes']

        img_h, img_w, _ = transformed_image.shape

        if (len(transformed_bboxes) != 0):
            for i in range(len(transformed_bboxes)):
                xmin, ymin, xmax, ymax, label = transformed_bboxes[i]
                new_row = {
                 'type':"TRAIN",
                 'path':filepath,
                 'label':label,
                 'XMin':xmin,
                 'YMin':ymin,
                 'XMax':xmax,
                 'YMax':ymax,
                 }#
                df_augmented_set1 = df_augmented_set1.append(new_row, ignore_index=True)
                           
            file_path, file_extension = os.path.splitext(path)
            file_name = os.path.basename(file_path)
            cv2.imwrite(filepath, transformed_image)

In [None]:
# we change the format of our csv: https://cloud.google.com/vision/automl/object-detection/docs/csv-format
df_augmented_set0 = df_augmented_set0[["type", "path", "label", "XMin", "YMin", "XMax", "YMin", "XMax", "YMax", "XMin", "YMax"]]
df_augmented_set1 = df_augmented_set1[["type", "path", "label", "XMin", "YMin", "XMax", "YMin", "XMax", "YMax", "XMin", "YMax"]]

In [None]:
# merge all dataframes
df_sidehelper = pd.concat([df, df_augmented_set0, df_augmented_set1], ignore_index=True, sort=False)
df_sidehelper.to_csv("sidehelper_final.csv", index=False, header=False)

In [None]:
print("Original: {}\nAugmented: {}".format(df_train.label.count(), df_sidehelper[df_sidehelper.type == 'TRAIN'].label.count()))