<a href="https://colab.research.google.com/github/yeejingye/GrabAIforSEA_Safety/blob/master/GrabAIforSEA_Safety.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **GRAB AI FOR S.E.A. - Safety**

## **Importing file.csv containing features and labels.**

### First Method - file is uploaded in Google Drive.

Installing and linking the path to the Google Drive folder containing the features and label file.

In [0]:
# Code to read csv file into Colaboratory:
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

Insert the link of Google Drive file containing all the features files.

In [0]:
# Declare the Google Drive file link.
## The sample of the link: 'https://drive.google.com/open?id=1NONER4dapWGYKK6D-CGb8SAhavKzETQ1'
link = 'https://drive.google.com/open?id=1NONER4dapWGYKK6D-CGb8SAhavKzETQ1'

# Acquiring the id after the equal sign in the shareable link
fluff, id = link.split('=')
print (id)

# Read and store data in pandas dataframe
import pandas as pd
downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('full.csv')  
data_df = pd.read_csv('full.csv')

# Sorting the dataframe by bookingID
data_df = data_df.sort_values('bookingID')

1NONER4dapWGYKK6D-CGb8SAhavKzETQ1


Insert the link of Google Drive file containing the label file.

In [0]:
# Declare the Google Drive file link.
## The sample of the link: 'https://drive.google.com/open?id=1NONER4dapWGYKK6D-CGb8SAhavKzETQ1'
link = 'https://drive.google.com/open?id=1NONER4dapWGYKK6D-CGb8SAhavKzETQ1'

# Acquiring the id after the equal sign in the shareable link
fluff, id = link.split('=')
print (id)

# Read and store data in pandas dataframe
import pandas as pd
downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('label.csv')  
label_df = pd.read_csv('label.csv')

Unnamed: 0,bookingID,label
count,16135560.0,16135560.0
mean,818481900000.0,0.3055349
std,495247600000.0,0.4606336
min,0.0,0.0
25%,377957100000.0,0.0
50%,807453900000.0,0.0
75%,1254130000000.0,1.0
max,1709397000000.0,1.0


### Second method - running from local disk.

In [0]:
%matplotlib notebook
import pandas as pd
import matplotlib.pyplot as plt
import scipy.signal as sig
import numpy as np
from numpy import array, vstack, newaxis
import glob
import os

# Change the path to feature directory
featdir = 'C:\\Program Files\\WPy-3662\\notebooks\\grab_safety\\features'

# Retrieve all the csv files in feature directory
featfiles = glob.glob(os.path.join(featdir, '*.csv'))

# Loop through and read all the files
dataframes = []
for featfile in featfiles:
    data_df = pd.read_csv(featfile)
    dataframes.append(df)

# Concatenate the CSV files together
data_df = pd.concat(dataframes, ignore_index=False)

# Rearranging data based on bookingID
data_df = data_df.sort_values('bookingID')

# # Optional: Print out to a new CSV file
# data_df.to_csv('features.csv')

In [0]:
# Change the path to Label file directory
labeldir = 'C:\\Program Files\\WPy-3662\\notebooks\\grab_safety\\labels'

# Retrieve all the csv files in that directory
labelfiles = glob.glob(os.path.join(labeldir, '*.csv'))

# Loop through and read all the files
dataframes = []
for labelfile in labelfiles:
    label_df = pd.read_csv(labelfile)
    dataframes.append(label_df)

# Concatenate the CSV files together
label_df = pd.concat(dataframes, ignore_index=False)

# # Setting bookingID as index
label_df = label_df.set_index('bookingID')

# Rearranging data based on bookingID
label_df = label_df.sort_index(ascending=True)

# Optional: Print out to a new CSV file
# label_df.to_csv('label.csv')

1NONER4dapWGYKK6D-CGb8SAhavKzETQ1


## Mapping the Label to the Main Dataframe based on bookingID

In [0]:
# Inserting label into main dataframe

nesteddict = label_df.to_dict()
label = nesteddict['label']

data_df['label'] = data_df['bookingID'].map(label)

## **Transforming smartphone coordinate frame to vehicle coordinate frame**
All the datas in the features files are recorded by accelerometer and gyroscope of the phone. They have to be transformed to vehicle coordinate frame using transformation matrix extracted from this paper:

**Title: Dangerous driving behavior detection using smartphone sensors**

**Author: Fu Li ; Hai Zhang ; Huan Che ; Xiaochen Qiu**

**Link: https://ieeexplore.ieee.org/document/7795864**

To obtain transformation matrix, we have to first acquire the roll, yaw, and pitch angle. The formulae are given in the same paper.

In [0]:
# Finding angle of roll, yaw, pitch
import numpy as np
g = 9.81 #gravity value in m/s2
data_df['pitchangle'] = np.arcsin(data_df['acceleration_y']/g)
data_df['rollangle'] = np.arctan(-(data_df['acceleration_x'])/data_df['acceleration_z'])
data_df['yawangle'] = (data_df['Bearing'])/180*np.pi

  This is separate from the ipykernel package so we can avoid doing imports until


Some data could not be calculated, maybe due to some error in the recorded data as well. Thus, all the row with NaN data are removed.

In [0]:
data_df = data_df.dropna()

data_df

Unnamed: 0,bookingID,Accuracy,Bearing,acceleration_x,acceleration_y,acceleration_z,gyro_x,gyro_y,gyro_z,second,Speed,pitchangle,rollangle,yawangle
0,0,16.0,142.124496,-0.660800,-9.543246,-1.814920,-0.002753,-0.035185,0.014585,183.0,0.227891,-1.337061,-0.349174,2.480540
1,0,8.0,315.071838,-0.674707,-9.464740,-1.566092,-0.001937,-0.019093,0.009868,942.0,2.682516,-1.304702,-0.406792,5.499041
2,0,16.0,144.125198,-1.183429,-8.491260,-3.712085,0.022289,-0.002134,0.052846,123.0,11.216825,-1.046291,-0.308618,2.515459
4,0,8.0,38.000866,-0.889590,-8.826669,-1.920493,0.041287,-0.017650,0.033933,1037.0,12.354941,-1.119225,-0.433785,0.663240
5,0,8.0,350.114624,-0.931610,-8.326770,-0.422290,-0.002761,0.076504,-0.077719,1555.0,18.729626,-1.013720,-1.145209,6.110653
7,0,12.0,231.777420,-2.857935,-8.667563,-3.708795,0.027518,0.082160,-0.144197,932.0,5.179691,-1.083375,-0.656545,4.045279
9,0,12.0,312.296448,-0.054431,-9.750354,0.372195,0.080203,0.326508,0.009699,1152.0,4.589510,-1.460467,0.145214,5.450601
10,0,16.0,327.476074,-1.083987,-8.093045,1.784265,-0.114234,0.005863,-0.125186,229.0,1.999781,-0.970165,0.545935,5.715536
11,0,8.0,20.008207,-1.134830,-9.485226,-0.893030,0.030607,-0.071624,-0.046537,1477.0,17.879084,-1.312763,-0.904077,0.349209
12,0,8.0,293.012756,0.334064,-9.324924,-2.542563,-0.068618,-0.115205,0.019865,1221.0,7.469669,-1.255011,0.130640,5.114037


After roll, yaw, and pitch angle are obtained, the transformation matrix can be calculated. Then, the acceleration in the 3 axes of the phone coordinate frame can be converted to vehicle coordinate frame.

In [0]:
data_df.loc['acceleration_xv'] = ""
data_df.loc['acceleration_yv'] = ""
data_df.loc['acceleration_zv'] = ""

for index, row in data_df.iterrows():
    # Transformation Matrix
    A = np.array([[np.cos(row['rollangle']), 0, -np.sin(row['rollangle'])], [0, 1, 0], [np.sin(row['rollangle']), 0, np.cos(row['rollangle'])]])
    B = np.array([[1, 0, 0], [0, np.cos(row['pitchangle']), np.sin(row['pitchangle'])], [0, -np.sin(row['pitchangle']), np.cos(row['pitchangle'])]])
    C = np.array([[np.cos(row['yawangle']), np.sin(row['yawangle']), 0], [-np.sin(row['yawangle']), np.cos(row['yawangle']), 0], [0, 0, 1]])
    D = np.dot(A, B)
    Cv_p = np.dot(D, C)
    Cp_v = Cv_p.transpose()
    E = np.array([[row['acceleration_x']], [row['acceleration_y']], [row['acceleration_z']]])
    G = np.dot(Cp_v, E)
    
    # Recording the transformed acceleration data into new columns.
    data_df.loc[[index], ['acceleration_xv']] = G[0]
    data_df.loc[[index], ['acceleration_yv']] = G[1]
    data_df.loc[[index], ['acceleration_zv']] = G[2]

According to the same paper ([link text](https://ieeexplore.ieee.org/document/7795864)), the 4 main dangerous driving behaviours of drivers are:



1.   Speeding (up and down) abnormally.
2.   Steering at a high speed.
3.   Weaving (change lane too frequently).
4.   Operating smart phone during the drive.

And all these mode can be derived from the acceleration of the vehicle and also the data of the gyroscope, thus all the features calculated will be used as the main features.



# Applying Median Filter

Some of the datas do not really make sense and might be noise.
E.g. One of the speed data recorded is 1.480186e+02 m/s, which translates to around 532 km/hr.

In [0]:
data_df.describe()

Thus, median filter is applied to remove some outliers.

In [0]:
import scipy.signal as sig
import numpy as np

# Apply Median Filter except for BookingID
columns = list(data_df) 
columns.remove('bookingID')
for column in columns: 
  # Apply Median Filter with size=5
  data_df[column] = sig.medfilt(data_df[column], 5)
  
data_df.describe()

## Declaring New Pandas Dataframe for Important Features

In [0]:
feature_df = data_df[['bookingID', 'Accuracy', 'gyro_x', 'gyro_y', 'gyro_z', 'second', 'Speed', 'acceleration_xv', 'acceleration_yv','acceleration_zv', 'label']]

feature_df

Shuffling the row of dataframe to increase robustness of model to-be-built.

In [0]:
feature_df = feature_df.sample(frac=1)

## Building Multi-Layer Perceptron Model

In [0]:
# Installing & importing tensorflow

from __future__ import absolute_import, division, print_function, unicode_literals

!pip install -q tensorflow-gpu==2.0.0-beta0
import tensorflow as tf

tf.keras.backend.clear_session()

In [0]:
# Building and declaring the layers. 

from tensorflow import keras
from tensorflow.keras import layers

inputs = keras.Input(shape=(9,))
x = layers.Dense(108, activation='relu')(inputs)
x = layers.Dense(54, activation='relu')(x)
x = layers.Dense(81, activation='relu')(x)
outputs = layers.Dense(1, activation='sigmoid')(x)

model = keras.Model(inputs=inputs, outputs=outputs, name='grabsafety_model')

In [0]:
# Visualising the built-model

model.summary()

In [0]:
# Recalling dataframe as x and y
train_x = feature_df.iloc[:-4000, 1:-1]
train_y = feature_df[:-4000, 'label']
test_x = feature_df.iloc[-4000:-2000, 1:-1]
test_y = feature_df.iloc[-4000:-2000 ,1:-1]

# Compile model
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),  # Adam optimiser is used with lr=0.001
              loss='binary_crossentropy', # Loss function chosen
              metrics=['accuracy']) # Metrics to be monitored

# Fit/Train the Built-MLP
model.fit(train_x, train_y, epochs=50, batch_size=512)

# Evaluate the Built-model
print('\n# Evaluate using Test Data')
scores = model.evaluate(test_x, test_y)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))