In [150]:
import pandas as pd
import numpy as np
import h5py
import os
import glob
import cv2
import dlib

from matplotlib import pyplot as plt
%matplotlib inline
from scipy import ndimage

In [151]:
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator

In [None]:
#read data biasa, convert color, resize

In [317]:
x_train = []

df = pd.read_csv('sample_train.csv')
base_dir = "train"
paths = base_dir + os.sep + df['label'] + os.sep + df['sequence'] + os.sep + df['path']
paths = paths.values.tolist()

for path in paths:
    img = cv2.imread(path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (96, 96), interpolation=cv2.INTER_CUBIC)
    
    x_train.append(img)
    
y_train = df['label']

In [318]:
x_test = []

df = pd.read_csv('sample_test.csv')
base_dir = "test"
paths = base_dir + os.sep + df['label'] + os.sep + df['sequence'] + os.sep + df['path']
paths = paths.values.tolist()

for path in paths:
    img = cv2.imread(path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (96, 96), interpolation=cv2.INTER_CUBIC)
    
    x_test.append(img)

y_test = df['label']

In [319]:
x_train = np.array(x_train, dtype=np.uint8) #list to array
x_test = np.array(x_test, dtype=np.uint8)

In [320]:
map_label = {} #mapping label string ke angka
invert_map_label = {}
label_uniq = np.unique(y_train)
for i in range(len(label_uniq)):
    map_label[label_uniq[i]] = i
    invert_map_label[str(i)] = label_uniq[i]

In [321]:
y_train = y_train.map(map_label) #mapping
y_test = y_test.map(map_label)

y_train = np_utils.to_categorical(y_train, len(label_uniq)) #one hot encoding
y_test = np_utils.to_categorical(y_test, len(label_uniq))

In [322]:
x_train_augm = []
y_train_augm = []

In [323]:
datagen = ImageDataGenerator(brightness_range=[0.2, 1.0]) #augmentasi random brightness
it = datagen.flow(x_train, y_train, batch_size=1)

for i in range(len(x_train)):
    batch = it.next()
    image = batch[0].astype('uint8')
    x_train_augm.append(image[0]) #hasil image augmentasi
    y_train_augm.append(batch[1][0]) #label augmentasi

In [324]:
datagen = ImageDataGenerator(horizontal_flip=True) #augmentasi horizontal flip
it = datagen.flow(x_train, y_train, batch_size=1)

for i in range(len(x_train)):
    batch = it.next()
    image = batch[0].astype('uint8')
    x_train_augm.append(image[0])
    y_train_augm.append(batch[1][0])

In [325]:
y_train_augm = np.array(y_train_augm).argmax(axis=1)

In [326]:
#save image doang
labels = []
paths = []

for cl in range(len(label_uniq)):
    idx = np.where(y_train_augm == cl)
    folder_path = "dataset" + os.sep + "train" + os.sep + invert_map_label[str(cl)]
    
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    
    for i in range(len(idx[0])):
        label = invert_map_label[str(cl)]
        path = folder_path + os.sep + "img_{}_{}.png".format(label, i)
        
        labels.append(label)
        paths.append(path)
        
        img = cv2.cvtColor(x_train_augm[idx[0][i]], cv2.COLOR_RGB2BGR)
        cv2.imwrite(path, img)

In [327]:
train_df = pd.DataFrame()

train_df["label"] = labels
train_df["path"] = paths

train_df.to_csv("augmented_train.csv", index=False)

In [328]:
y_test = y_test.argmax(axis=1)

In [329]:
labels = []
paths = []

for cl in range(len(label_uniq)):
    idx = np.where(y_test == cl)
    folder_path = "dataset" + os.sep + "test" + os.sep + invert_map_label[str(cl)]
    
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    
    for i in range(len(idx[0])):
        label = invert_map_label[str(cl)]
        path = folder_path + os.sep + "img_{}_{}.png".format(label, i)
        
        labels.append(label)
        paths.append(path)
        
        img = cv2.cvtColor(x_test[idx[0][i]], cv2.COLOR_RGB2BGR)
        cv2.imwrite(path, img)

In [330]:
test_df = pd.DataFrame()

test_df["label"] = labels
test_df["path"] = paths

test_df.to_csv("augmented_test.csv", index=False)