# Training driving model with behavioral cloning

## In this notebook, we will train a NN using samples obtained from Udacity Simulator

### First, we create a dataset from different runs (folders) to train the network

In [1]:
import os
from os import listdir
from os.path import isfile, join
from shutil import copyfile
import pandas
import fileinput
#Create directory with all data merged into one
datafolder = '/media/josealb/HDD_1/Datasets/Self_driving/Simulator_Data/Merge'
outputfolder = '/media/josealb/HDD_1/Datasets/Self_driving/Simulator_Data/All_tracks'
subdirs = [x[0] for x in os.walk(datafolder)] 
subdirs.pop(0)
subdirs
fileList=[]
os.mkdir(outputfolder)
os.mkdir(outputfolder+'/IMG') 


for dir in subdirs:
    if "IMG" in dir:
        print(dir)
        files= [f for f in listdir(dir) if isfile(join(dir, f))]
        for file in files:
            copyfile(dir+'/'+file,outputfolder+'/IMG/'+file)
    if "IMG" not in dir:
        print(dir)
        fileList.append(dir+'/driving_log.csv')
        
dfList=[]
for filename in fileList:
    print(filename)
    df=pandas.read_csv(filename,header=None)
    dfList.append(df)
concatDf=pandas.concat(dfList,axis=0)
concatDf.to_csv(outputfolder+'/driving_log.csv',index=None,header=None)

with fileinput.FileInput(outputfolder+'/driving_log.csv', inplace=True, backup='.bak') as file:
    for line in file:
        print(line.replace('\\', '/'), end='')

/media/josealb/HDD_1/Datasets/Self_driving/Simulator_Data/Merge/ND_Sample
/media/josealb/HDD_1/Datasets/Self_driving/Simulator_Data/Merge/ND_Sample/IMG
/media/josealb/HDD_1/Datasets/Self_driving/Simulator_Data/Merge/right_lane_2
/media/josealb/HDD_1/Datasets/Self_driving/Simulator_Data/Merge/right_lane_2/IMG
/media/josealb/HDD_1/Datasets/Self_driving/Simulator_Data/Merge/Track1_2
/media/josealb/HDD_1/Datasets/Self_driving/Simulator_Data/Merge/Track1_2/IMG
/media/josealb/HDD_1/Datasets/Self_driving/Simulator_Data/Merge/Track2_2
/media/josealb/HDD_1/Datasets/Self_driving/Simulator_Data/Merge/Track2_2/IMG
/media/josealb/HDD_1/Datasets/Self_driving/Simulator_Data/Merge/Track_1
/media/josealb/HDD_1/Datasets/Self_driving/Simulator_Data/Merge/Track_1/IMG
/media/josealb/HDD_1/Datasets/Self_driving/Simulator_Data/Merge/Track_2
/media/josealb/HDD_1/Datasets/Self_driving/Simulator_Data/Merge/Track_2/IMG
/media/josealb/HDD_1/Datasets/Self_driving/Simulator_Data/Merge/track_2_right_lane
/media/jose

### Now, we train the network on the created dataset

In [1]:
import os
import csv
import pdb

from keras.models import Sequential
from keras.layers.core import Dense, Activation, Flatten, Lambda
from keras.layers import Cropping2D
from keras.layers.convolutional import Convolution2D, Conv2D
from keras.layers import Dropout
from keras.layers.normalization import BatchNormalization
from keras.callbacks import ModelCheckpoint
from keras import optimizers

import cv2
import numpy as np
import sklearn

datafolder = '/media/josealb/HDD_1/Datasets/Self_driving/Simulator_Data/All_tracks_20_09_2017/'
#datafolder = '/media/josealb/HDD_1/Datasets/Self_driving/Simulator_Data/track_2_right_lane/'

samples = []

with open(datafolder+'driving_log.csv') as csvfile:
    reader = csv.reader(csvfile)
    for line in reader:
        samples.append(line)
                   
        
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

train_samples, validation_samples = train_test_split(samples, test_size=0.2)


def random_augmentation(image,steering_angle):
    if rand(0,1)>0:
        image=np.fliplr(image)
        angle=-angle
    return image, steering_angle
        

def generator(samples, batch_size=32):
    num_samples = len(samples)
    while 1: # Loop forever so the generator never terminates
        shuffle(samples)
        for offset in range(0, num_samples, batch_size):
            batch_samples = samples[offset:offset+batch_size]

            images = []
            angles = []
            for batch_sample in batch_samples:
                name_center = datafolder+'IMG/'+batch_sample[0].split('/')[-1]
                name_left = datafolder+'IMG/'+batch_sample[1].split('/')[-1]
                name_right = datafolder+'IMG/'+batch_sample[2].split('/')[-1]

                original_center = cv2.imread(name_center)
                center_image = cv2.cvtColor(original_center, cv2.COLOR_BGR2RGB)

                original_left = cv2.imread(name_left)
                left_image=cv2.cvtColor(original_left, cv2.COLOR_BGR2RGB)
                
                original_right = cv2.imread(name_right)
                right_image=cv2.cvtColor(original_right, cv2.COLOR_BGR2RGB)
                
                if center_image is None: #Enter debugger if something went wrong loading the image
                    pdb.set_trace()

                center_angle = float(batch_sample[3])
                center_angle = center_angle#*turn_aggresiveness #Makes Neural network turn more aggresively
                correction = 0.2
                
                left_angle = center_angle + correction
                right_angle= center_angle - correction
                
                images.append(center_image)
                angles.append(center_angle)
                images.append(left_image)
                angles.append(left_angle)
                images.append(right_image)
                angles.append(right_angle)     
                   
            X_train = np.array(images)
            y_train = np.array(angles)
            yield sklearn.utils.shuffle(X_train, y_train)

# compile and train the model using the generator function
train_generator = generator(train_samples, batch_size=128)
validation_generator = generator(validation_samples, batch_size=128)

#ch, row, col = 3, 80, 320  # Trimmed image format
ch, row, col = 3, 160, 320  # UnTrimmed image format

model = Sequential()
# Preprocess incoming data, centered around zero with small standard deviation 

model.add(Cropping2D(cropping=((50,20), (0,0)), input_shape=(row,col,ch)))
model.add(Lambda(lambda x: x/127.5 - 1.))#,
       # input_shape=(row, col, ch),
        #output_shape=(row, col, ch)))

#model.add(Convolution2D(24,9,9, activation="relu"))#experimental layer with larger filter size for the first activations      
model.add(Conv2D(34,5,5,subsample=(2,2), activation="elu"))
model.add(BatchNormalization())
model.add(Convolution2D(46,5,5,subsample=(2,2), activation="elu"))
model.add(BatchNormalization())
model.add(Convolution2D(58,5,5,subsample=(2,2), activation="elu"))
model.add(BatchNormalization())
model.add(Convolution2D(74,3,3, activation="elu"))
model.add(BatchNormalization())
model.add(Convolution2D(74,3,3, activation="elu"))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(1164))
model.add(BatchNormalization())
model.add(Dense(200))
model.add(BatchNormalization())
model.add(Dense(100))
model.add(BatchNormalization())
model.add(Dense(20))
model.add(BatchNormalization())
model.add(BatchNormalization())
model.add(Dense(1))


model.compile(loss='mse', optimizer='adam')

checkpoint = ModelCheckpoint('model-{epoch:03d}-{val_loss:03f}.h5',
                            monitor='val_loss',
                            verbose=0,
                            save_best_only=True,
                            mode = 'auto')

model.fit_generator(train_generator, samples_per_epoch= \
            len(train_samples)*3, validation_data=validation_generator, \
            nb_val_samples=len(validation_samples)*3, nb_epoch=25, callbacks= [checkpoint], verbose=1)

model.save('model.h5')

Using TensorFlow backend.


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


### If necessary, we can reduce the learning rate and continue training

In [None]:
adam.lr=0.0001

In [None]:
model.fit_generator(train_generator, samples_per_epoch= \
            len(train_samples)*3, validation_data=validation_generator, \
            nb_val_samples=len(validation_samples)*3, nb_epoch=50, callbacks= [checkpoint], verbose=1)
model.save('model.h5')


## Data exploration

Here is some data exploration. The main goal was to visualize how the distribution was between frames of straight driving and frames of turning.
However, I did not remove the straight driving data, since I wanted the neural network to learn from all data

In [None]:
type(train_samples)

In [None]:
angles = np.asarray([item[3] for item in train_samples]).astype(np.float)

In [None]:
type(angles)

In [None]:
max(angles)
np.histogram(angles)

In [None]:
import matplotlib.pyplot as plt
plt.hist(angles)


In [None]:
fig = plt.gcf()
