# Test out data augmentation

In [1]:
import matplotlib.pyplot as plt
import os
import cv2
import numpy as np
import pandas as pd

# We import the data set from tensorflow and build the model there
import tensorflow as tf
from tensorflow.keras import layers, models
from keras.utils import np_utils

In [2]:
CLASS = [["c0", "Safe Driving"], ["c1", "Text"], ["c2", "Phone"],
         ["c3", "Adjusting Radio"], ["c4", "Drinking"],
         ["c5", "Reaching Behind"], ["c6", "Hair or Makeup"],
         ["c7", "Talking to Passenger"]]
TEST_CLS = [os.path.join(os.getcwd(), "../Data/Distracted Driver Dataset", "Combined", "test", cls[0]) for cls in CLASS]
TRAIN_CLS = [os.path.join(os.getcwd(), "../Data/Distracted Driver Dataset", "Combined", "train", cls[0]) for cls in CLASS]

train_paths = []
test_paths = []
for cls in range(8):
  for train_instance in os.listdir(TRAIN_CLS[cls]):
    train_paths.append(os.path.join(TRAIN_CLS[cls], train_instance))
  for test_instance in os.listdir(TEST_CLS[cls]):
    test_paths.append(os.path.join(TEST_CLS[cls], test_instance))
classes = []
test_classes = []
for cls in range(8):
  for train_instance in os.listdir(TRAIN_CLS[cls]):
    classes.append(cls)
  for test_instance in os.listdir(TEST_CLS[cls]):
    test_classes.append(cls)
df = pd.DataFrame({
  'filename': train_paths,
  'class': classes
})
df_test = pd.DataFrame({
  'filename': test_paths,
  'class': test_classes
})

In [14]:
X_train = []
Y_train = []
for index, path in df.iterrows():
    img_orig = cv2.imread(path[0], cv2.IMREAD_COLOR)
    k = min(1.0, 1024/max(img_orig.shape[0], img_orig.shape[1]))
    img = cv2.resize(img_orig, (100, 100), fx=k, fy=k, interpolation=cv2.INTER_LANCZOS4)
    X_train.append(np.asarray(img/255))
    Y_train.append(path[1])

# Train Validation split

In [15]:
from sklearn.model_selection import train_test_split
X_val, X_train_final, Y_val, Y_train_final = train_test_split(X_train, Y_train, test_size=0.8, stratify=Y_train, random_state=42)
X_val = np.asarray(X_val)
Y_val = np.asarray(Y_val)
Y_val = np_utils.to_categorical(Y_val, 8)
X_train_final = np.asarray(X_train_final)
Y_train_final = np.asarray(Y_train_final)
Y_train_final = np_utils.to_categorical(Y_train_final, 8)
Y_train_final.shape

(10044, 8)

In [16]:
X_test = []
Y_test = []
for index, path in df_test.iterrows():
    img_orig = cv2.imread(path[0], cv2.IMREAD_COLOR)
    k = min(1.0, 1024/max(img_orig.shape[0], img_orig.shape[1]))
    img = cv2.resize(img_orig, (100, 100), fx=k, fy=k, interpolation=cv2.INTER_LANCZOS4)
    X_test.append(np.asarray(img/255))
    Y_test.append(path[1])

In [17]:
X_test = np.asarray(X_test)
Y_test = np.asarray(Y_test)
Y_test = np_utils.to_categorical(Y_test, 8)
X_test.shape

(1923, 100, 100, 3)

# Layers of random image transformation

In [18]:
transform = tf.keras.Sequential([
    layers.RandomFlip("horizontal_and_vertical", seed=1, input_shape=(100, 100, 3)),
    layers.RandomRotation(0.3, seed=1),
    layers.RandomBrightness(0.1, value_range=(0.0, 1.0), seed=1),
    layers.RandomContrast(0.2, seed=1)
])

In [34]:
model = tf.keras.Sequential([
    transform,
    layers.Conv2D(16, 3, padding='same', activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPool2D(3, padding='same'),
    layers.Conv2D(32, 3, padding='same', activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPool2D(3, padding='same'),
    layers.Conv2D(48, 3, padding='same', activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPool2D(3, padding='same'),
    layers.Flatten(),
    layers.Dropout(0.3),
    layers.Dense(8, activation='softmax')
])
model.summary()

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential_3 (Sequential)   (None, 100, 100, 3)       0         
                                                                 
 conv2d_15 (Conv2D)          (None, 100, 100, 16)      448       
                                                                 
 batch_normalization_15 (Bat  (None, 100, 100, 16)     64        
 chNormalization)                                                
                                                                 
 max_pooling2d_15 (MaxPoolin  (None, 34, 34, 16)       0         
 g2D)                                                            
                                                                 
 conv2d_16 (Conv2D)          (None, 34, 34, 32)        4640      
                                                                 
 batch_normalization_16 (Bat  (None, 34, 34, 32)      

In [35]:
import keras

callbacks_list = [
    keras.callbacks.EarlyStopping(monitor='accuracy', patience=2)
]

opt = keras.optimizers.Adam(learning_rate=0.01)
model.compile(loss='categorical_crossentropy',
                optimizer=opt, metrics=['accuracy'])

BATCH_SIZE = 200
EPOCHS = 10

history = model.fit(X_train_final, Y_train_final, epochs=EPOCHS, batch_size=BATCH_SIZE,callbacks=callbacks_list, validation_data=(X_val, Y_val), shuffle = True)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [38]:
score = model.evaluate(X_test, Y_test, verbose=1)
print("\nAccuracy on test data: %0.2f" % score[1])
print("\nLoss on test data: %0.2f" % score[0])


Accuracy on test data: 0.20

Loss on test data: 2.05


In [39]:
keras.backend.clear_session()

# Without data augmentation

In [42]:
model = tf.keras.Sequential([
    layers.Conv2D(16, 3, 2, padding='same', activation='relu', input_shape=(100, 100, 3)),
    layers.BatchNormalization(),
    layers.MaxPool2D(padding='same'),
    layers.Conv2D(32, 3, 2, padding='same', activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPool2D(padding='same'),
    layers.Conv2D(48, 3, 2, padding='same', activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPool2D(padding='same'),
    layers.Flatten(),
    layers.Dropout(0.3),
    layers.Dense(8, activation='softmax')
])
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 50, 50, 16)        448       
                                                                 
 batch_normalization_3 (Batc  (None, 50, 50, 16)       64        
 hNormalization)                                                 
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 25, 25, 16)       0         
 2D)                                                             
                                                                 
 conv2d_4 (Conv2D)           (None, 13, 13, 32)        4640      
                                                                 
 batch_normalization_4 (Batc  (None, 13, 13, 32)       128       
 hNormalization)                                                 
                                                      

In [43]:
import keras

callbacks_list = [
    keras.callbacks.EarlyStopping(monitor='accuracy', patience=2)
]

opt = keras.optimizers.Adam(learning_rate=0.001)
model.compile(loss='categorical_crossentropy',
                optimizer=opt, metrics=['accuracy'])

BATCH_SIZE = 200
EPOCHS = 10

history = model.fit(X_train_final, Y_train_final, epochs=EPOCHS, batch_size=BATCH_SIZE,callbacks=callbacks_list, validation_data=(X_val, Y_val), shuffle = True)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [44]:
score = model.evaluate(X_test, Y_test, verbose=1)
print("\nAccuracy on test data: %0.2f" % score[1])
print("\nLoss on test data: %0.2f" % score[0])


Accuracy on test data: 0.21

Loss on test data: 2.38


Data augmentation reduces over-fitting but does not improve test accuracy.