In [1]:
import numpy as np
import time
import sys
import os
import random
from skimage import io
import pandas as pd
from matplotlib import pyplot as plt
from shutil import copyfile

import cv2
import tensorflow as tf

In [2]:
base_path = '~/Development/Learning/cek/LicensePlate_CLAHE'
image_links = 'train-images-boxable.csv'
annot_box = 'train-annotations-bbox.csv'
class_labels = 'class-descriptions-boxable.csv'

In [3]:
df_image_links = pd.read_csv(base_path+image_links)

FileNotFoundError: [Errno 2] No such file or directory: '/Users/asdarmld/Development/Learning/cek/LicensePlate_CLAHEtrain-images-boxable.csv'

In [None]:
df_annot_box = pd.read_csv(base_path+annot_box)

In [None]:
df_class_labels = pd.read_csv(base_path+class_labels,header=None)
df_class_labels.columns=['id','name']


In [None]:
def plot_org_img_with_boxes(image_name):
    """
    Input: 
    image_name(string) = the actual file name '6b5bfa4e9b0e767c.jpg'
    
    Return:
    One plot of the original image no bounding boxes, the other with bounding boxes.
    """
    temp = df_image_links[df_image_links['image_name']==image_name]
    img_url = temp['image_url'].values[0]
    img_id = image_name[:16]
    
    img = io.imread(img_url)
    
    height, width, _ = img.shape
    plt.figure(figsize=(10,10))
    plt.subplot(1,2,1)
    plt.title('Original Image')
    plt.imshow(img)
    boxes = df_annot_box[df_annot_box['ImageID']==img_id]
    img_bbox = img.copy()
    for index, row in boxes.iterrows():
        xmin,xmax,ymin,ymax = row['XMin'],row['XMax'],row['YMin'],row['YMax']
        xmin,xmax,ymin,ymax = int(xmin*width),int(xmax*width),int(ymin*height),int(ymax*height)
        label_name = row['LabelName']
        
        temp_df = df_class_labels[df_class_labels['id']==label_name]
        class_of_box = temp_df['name'].values[0]
        
        cv2.rectangle(img_bbox,(xmin,ymin),(xmax,ymax),(255,255,0),2)
        font = cv2.FONT_HERSHEY_SIMPLEX
        cv2.putText(img_bbox,class_of_box,(xmin,ymin-10), font, 1,(255,255,0),2)
    plt.subplot(1,2,2)
    plt.title('Image with Bounding Box')
    plt.imshow(img_bbox)
    plt.show()
    

In [None]:
def create_csv(which_class, num_rows,save=False,save_path ='~/Object-detection-using-Faster-RCNN/createddata/'):
    """Input: 
    which_class(str): one of 600 classes available in the dataset
    num_rows(int): how many rows you want your csv to be could potentially be less than inputted if not that many in data
    
    Returns: 
    Df with num_rows randomly chosen number of rows
    if save==True: saves it as a csv in save_path + (which_class)_1000.csv
    
    """
    class_id = df_class_labels[df_class_labels['name']==which_class].values[0][0] #collect the class_id value 
    num_total_pics = df_annot_box[df_annot_box['LabelName']==class_id] #select all the annotation boxes with that id value
    print('Total amount of {} in data'.format(which_class))
    print(len(num_total_pics))
    
    print('Number of unique pictures featuring atleast one of {}'.format(which_class))
    num_unique_pics_of_class = np.unique(num_total_pics['ImageID']) #remove duplicate images from the df, 
    #such as smooshing down a picture that has 2 birds to one value
    print(len(num_unique_pics_of_class))
    random_rows = np.random.choice(num_unique_pics_of_class,num_rows,replace=False)#randomly choose Num_rows
    array_append_jpg = [df_image_links[df_image_links['image_name']==name+'.jpg'] for name in random_rows]
    df = pd.DataFrame()
    for i in range(len(array_append_jpg)):
        df = df.append(array_append_jpg[i], ignore_index = True)
    if save:
        df.to_csv(save_path + '{}_{}.csv'.format(which_class,num_rows))
    
    return df



In [None]:
def download_images(csv_file_path, save_file_path, type_of_class):
    """
    INPUT:
    csv_file_path(string) = path to where you saved the csv generated from create_csv
    save_file_path(string) = path to where you want to save all of the images
    type_of_class(string) = whichever class('Person', 'Bird', etc...)
    
    Returns: None
    
    Generates: A new folder with the name of type_of_class with all the images downloaded inside of it
    """
    df = pd.read_csv(csv_file_path)
    if len(df.columns) >=3:
        df.drop('Unnamed: 0',axis=1,inplace=True)
    urls = df['image_url'].values
    directory = save_file_path + type_of_class
    os.mkdir(directory)
    for url in urls:
        img = io.imread(url)
        file_name=url[-20:]
        io.imsave(directory+'/'+file_name, img)
    
    

In [None]:
train_path = '../createddata/train/'
test_path = '../createddata/test/'
# os.mkdir(train_path)
# os.mkdir(test_path)
def split_train_test(file_path_to_imgs, percentage_split=.8, save_path_train = '../createddata/train/',save_path_test = '../createddata/test/'):
    """
    Input: 
    file_path_to_imgs = file path to the image directory where all the downloaded images from download_images() are saved
    percentage_split(int between 0-1) = Default at .8 The percentage you want to be train images, remaining percent is test
    save_path_train = file path where you want to save the train images
    save_path_test = file path where you want to save the test images
    
    RETURNS: None
    
    Generates:
    Copied images into specificed train and test directorys
    """
    imgs = os.listdir(file_path_to_imgs)
    imgs = [f for f in imgs if not f.startswith('.')]
    random.seed(1)
    random.shuffle(imgs)
    num_of_train_imgs = int(len(imgs)*percentage_split)
    num_of_test_imgs =len(imgs)-int(len(imgs)*(1-percentage_split))
    train_imgs = imgs[:num_of_train_imgs]
    test_imgs = imgs[num_of_test_imgs:]
    for val in train_imgs:
        img_loc = file_path_to_imgs + val
        save_loc = save_path_train+val
        copyfile(img_loc, save_loc)
    for val in test_imgs:
        img_loc = file_path_to_imgs + val
        save_loc = save_path_test+val
        copyfile(img_loc, save_loc)

In [None]:
def create_df_out_of_image_folders(path,names_of_classes=[],save=False, type_of_data='train'):
    """
    INPUTS:
    path = path to where you just stored your train or test pictures
    names_of_classes = A list with the classes you've chosen for your data ('Bird','Person,'Traffic light'...etc)
    save = True save a csv file you can load in with this information
    type_of_data = actually just used as a variable to name your saved csv 
    ex:
    if type_of_data = 'train' file will save as 'train_df.csv'
    
    Returns:
    dataframe with format [FileName, XMIN, XMax, YMin, Ymax, ClassName] for each bounding box
    
    """
    class_id = []
    for val in names_of_classes:
        class_id.append(df_class_labels[df_class_labels['name']==val].values[0][0])
    df = pd.DataFrame(columns=['FileName', 'XMin', 'XMax', 'YMin', 'YMax', 'ClassName'])
    train_imgs = os.listdir(path)
    train_imgs = [name for name in train_imgs if not name.startswith('.')]
    for i in range(len(train_imgs)):
        sys.stdout.write('Parse train_imgs ' + str(i) + '; Number of boxes: ' + str(len(df)) + '\r')
        sys.stdout.flush()
        img_name = train_imgs[i]
        img_id = img_name[0:16]
        tmp_df = df_annot_box[df_annot_box['ImageID']==img_id]
        for index,row in tmp_df.iterrows():
            labelname=row['LabelName']
            for val in range(len(names_of_classes)):
                if labelname == class_id[val]:
                    df = df.append({'FileName': img_name, 
                                            'XMin': row['XMin'], 
                                            'XMax': row['XMax'], 
                                            'YMin': row['YMin'], 
                                            'YMax': row['YMax'], 
                                            'ClassName': names_of_classes[val]}, 
                                           ignore_index=True)
    if save:
        df.to_csv('../createddata/{}_df.csv'.format(type_of_data))
    return df

In [None]:
#Use case for preparing data from the google AI dataset
base_path = '~/Object-detection-using-Faster-RCNN/createddata/'
create_csv('Person',num_rows=2500, save=True, save_path = base_path)
download_images(base_path + 'Person_1000.csv',base_path +'/images/', 'Person')
split_train_test(base_path+'/images/', percentage_splot=.8,save_path_train='../createddata/train/',save_path_test= '../createddata/test')
train_df = create_df_out_of_image_folders('../createddata/train/',names_of_classes=['Person'], save =True, type_of_data='train')
test_df = create_df_out_of_image_folders('../createddata/test/',names_of_classes=['Person'], save =True, type_of_data='test')

#Example of where to call in the csvs generated from 'create_df_out_of_image_folders' if working with them later
# train_df = pd.read_csv('../createddata/train_df.csv')
# train_df.drop('Unnamed: 0',inplace=True,axis=1)

# test_df = pd.read_csv('../createddata/test_df.csv')
# test_df.drop('Unnamed: 0',inplace=True,axis=1)


In [None]:
# For training

f= open('../createddata' + "/train_annotation.txt","w+")
for idx, row in train_df.iterrows():
#     sys.stdout.write(str(idx) + '\r')
#     sys.stdout.flush()
    img = cv2.imread(('../createddata' + '/train/' + row['FileName']))
    plt.imshow(img)
    height, width = img.shape[:2]
    x1 = int(row['XMin'] * width)
    x2 = int(row['XMax'] * width)
    y1 = int(row['YMin'] * height)
    y2 = int(row['YMax'] * height)
    
    fileName = '/home/ubuntu/Object-detection-using-Faster-RCNN/createddata/train/' +row['FileName']
    className = row['ClassName']
    other_name = '/home/ubuntu/Object-detection-using-Faster-RCNN/createddata/train/' +row['FileName']
    f.write(other_name + ',' + str(x1) + ',' + str(y1) + ',' + str(x2) + ',' + str(y2) + ',' + className + '\n')
f.close()

In [None]:

f= open('../createddata' + "/test_annotation.txt","w+")
for idx, row in test_df.iterrows():
#     sys.stdout.write(str(idx) + '\r')
#     sys.stdout.flush()
    img = cv2.imread(('../createddata' + '/test/' + row['FileName']))
    height, width = img.shape[:2]
    x1 = int(row['XMin'] * width)
    x2 = int(row['XMax'] * width)
    y1 = int(row['YMin'] * height)
    y2 = int(row['YMax'] * height)
    
    fileName = '/Users/davidheller/Object-detection-using-Faster-RCNN/createddata/test/' +row['FileName']
    other_name = '/home/ubuntu/Object-detection-using-Faster-RCNN/createddata/test/' +row['FileName']
    className = row['ClassName']
    f.write(other_name + ',' + str(x1) + ',' + str(y1) + ',' + str(x2) + ',' + str(y2) + ',' + className + '\n')
f.close()





