# Detect distracted drivers
*Cyril Pecoraro - June 1st 2018*

The goal is to predict what the driver is doing based on pictures

This project is from a [Kaggle competition : State Farm Distracted Driver Detection
](https://www.kaggle.com/c/state-farm-distracted-driver-detection)

There are 10 classes to predict:
* c0: safe driving
* c1: texting - right
* c2: talking on the phone - right
* c3: texting - left
* c4: talking on the phone - left
* c5: operating the radio
* c6: drinking
* c7: reaching behind
* c8: hair and makeup
* c9: talking to passenger

### Explanation of my work


### Content:


In [1]:
import pandas as pd
import cv2
import os 
import glob
from joblib import Parallel, delayed
import numpy as np
from scipy.stats import randint as sp_randint
import math
import matplotlib.pyplot as plt
% matplotlib inline
import warnings
warnings.filterwarnings("ignore", module="matplotlib")
pd.options.mode.chained_assignment = None

from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss

from keras.applications import vgg16
from keras.preprocessing import image
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
import keras

Using TensorFlow backend.


## File Loading

In [2]:
img_list = pd.read_csv('data/driver_imgs_list.csv')

In [None]:
img_list.head()

In [3]:
def load_image(img_file, img_reshape_size):
    """Load an image
    Args:
    - img_file: image file
    - img_reshape_size: shape(w,h) to resize the image
    Return:
    - img: openCV image
    """   
    
    img = cv2.imread(img_file)
    img = cv2.resize(img, img_reshape_size)
    return img

def load_dataset(dataset_dir, X, y, img_reshape_size, train_dir=True, nprocs=10):
    """Load the images located in the main folder dataset_dir Each class is in a separate subfolder
    Args:
    - dataset_dir: path to the directory containing subdirectories of images
    - X: list to append data
    - y: list to append class
    - img_reshape_size: shape(w,h) to resize the image
    - nprocs:Number of processors to use
    Return:
    - X: list with each image data as a row
    - y: list with each class as an integer for each image
    """
    # Train dataset
    if (train_dir): 
        for i in range(10):
            path = os.path.join(dataset_dir, 'c'+str(i),'*.jpg')
            files = glob.glob(path)

            X.extend(Parallel(n_jobs=nprocs)(delayed(load_image)(im_file, img_reshape_size) for im_file in files))
            y.extend([i]*len(files))
            print('folder train/c'+str(i), 'loaded')
            
    # Test dataset
    else: 
        path = os.path.join(dataset_dir, '*.jpg')
        files = glob.glob(path)

        X.extend(Parallel(n_jobs=nprocs)(delayed(load_image)(im_file, img_reshape_size) for im_file in files))
        print('folder test loaded')

    X = np.array(X)
    y = np.array(y)
    return X, y


def convert_to_one_hot(Y, C=10):
    Y = np.eye(C)[Y.reshape(-1)]
    return Y

In [4]:
# Working directories#  
dataset_dir = 'data'
dataset_dir_train = os.path.join(dataset_dir, 'train')
dataset_dir_test = os.path.join(dataset_dir, 'test')

# Image sizes
img_reshape_size = (224,224)

# Load train dataset
print('Load dataset train')
X_train_ = []
y_train_ = []
X_train_, y_train_ = load_dataset(dataset_dir_train, X_train_, y_train_, img_reshape_size)

Load dataset train
folder train/c 0 loaded
folder train/c 1 loaded
folder train/c 2 loaded
folder train/c 3 loaded
folder train/c 4 loaded
folder train/c 5 loaded
folder train/c 6 loaded
folder train/c 7 loaded
folder train/c 8 loaded
folder train/c 9 loaded


In [5]:
# Creation of a validation set
X_train, X_val, y_train, y_val = train_test_split(X_train_, y_train_, 
                                                  test_size=0.2, 
                                                  random_state=42)

# Preprocess input according to VGG16
vgg16.preprocess_input(X_train)
vgg16.preprocess_input(X_val)

# One-hot encoding of the target vector
y_train = convert_to_one_hot(y_train)
y_val = convert_to_one_hot(y_val)


# Shapes 
print('X_train shape:', X_train.shape)
print('y_train shape:', y_train.shape)
print('X_val shape:', X_val.shape)
print('y_val shape:', y_val.shape)

X_train shape: (15696, 224, 224, 3)
y_train shape: (15696, 10)
X_val shape: (6728, 224, 224, 3)
y_val shape: (6728, 10)


## 2. Building the model

In [6]:
def create_VGG16_model(n_classes=10, n_layers_freeze=1, input_shape=(224, 224, 3)):
    """Load the images located in the main folder dataset_dir Each class is in a separate subfolder
    Args:
    - n_classes: number of classes to predict for the classifier
    - n_layers_freeze: number of last layers to freeze so that they are not trained again
    Return:
    - model: Keras model
    """
    #Load the VGG model
    vgg16_base = vgg16.VGG16(weights='imagenet', include_top=False, input_shape=input_shape)

    # Freeze the layers except the last n_layers_freeze layers
    for layer in vgg16_base.layers[:-n_layers_freeze]:
        layer.trainable = False

    model = Sequential(name='VGG16-classifier')
    model.add(vgg16_base)
    model.add(Flatten(input_shape=model.output_shape[1:]))
    model.add(Dense(n_classes, activation='softmax'))
    
    model.compile(loss='binary_crossentropy',
          optimizer='adam',
          metrics=['accuracy'])

    model.summary()
    return model

In [7]:
batch_size = 100
n_epoch = 2

model = create_VGG16_model()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 7, 7, 512)         14714688  
_________________________________________________________________
flatten_1 (Flatten)          (None, 25088)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                250890    
Total params: 14,965,578
Trainable params: 4,970,506
Non-trainable params: 9,995,072
_________________________________________________________________


In [2]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 9086172009200653865
]


In [8]:
model_history = model.fit(x=X_train, y=y_train,
          batch_size=batch_size,
          nb_epoch=n_epoch,
          shuffle=True,
          verbose=10,
          validation_data=(X_val, y_val),
          )

  


Train on 15696 samples, validate on 6728 samples
Epoch 1/2


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/peco/miniconda3/envs/myenv/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-8-e028a49d906f>", line 6, in <module>
    validation_data=(X_val, y_val),
  File "/home/peco/miniconda3/envs/myenv/lib/python3.6/site-packages/keras/engine/training.py", line 1042, in fit
    validation_steps=validation_steps)
  File "/home/peco/miniconda3/envs/myenv/lib/python3.6/site-packages/keras/engine/training_arrays.py", line 199, in fit_loop
    outs = f(ins_batch)
  File "/home/peco/miniconda3/envs/myenv/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 2661, in __call__
    return self._call(inputs)
  File "/home/peco/miniconda3/envs/myenv/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 2631, in _call
    fetched = self._callable_fn(*array_vals)
  File "/home/peco/miniconda3/envs/myenv/lib/python3.

KeyboardInterrupt: 

In [9]:
predictions_valid = model.predict(X_val, batch_size=batch_size, verbose=1)
score = log_loss(Y_val, predictions_valid)

 400/6728 [>.............................] - ETA: 24:24

KeyboardInterrupt: 

In [10]:
acc = model_history.history['acc']
val_acc = model_history.history['val_acc']
loss = model_history.history['loss']
val_loss = model_history.history['val_loss']
 
epochs = range(len(acc))
 
plt.plot(epochs, acc, 'b', label='Training acc')
plt.plot(epochs, val_acc, 'r', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
 
plt.figure()
 
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
 
plt.show()


NameError: name 'model_history' is not defined