File structure
Folder and files in main directory:
- model.py
- Behavior-Cloning.ipynb
- model.json
- model.h5
- drive.py
- driving_log.csv
- 'IMG' folder
    -- 'All .jpg images'

In [1]:
# Get training dataset from Udacity website

# Imports
from urllib.request import urlretrieve
from os.path import isfile
from tqdm import tqdm
import zipfile
import shutil

class DLProgress(tqdm):
    last_block = 0

    def hook(self, block_num=1, block_size=1, total_size=None):
        self.total = total_size
        self.update((block_num - self.last_block) * block_size)
        self.last_block = block_num

if not isfile('driving_log1.csv'):
    with DLProgress(unit='B', unit_scale=True, miniters=1, desc='Train Dataset') as pbar:
        urlretrieve(
            'https://d17h27t6h515a5.cloudfront.net/topher/2016/December/584f6edd_data/data.zip',
            'data.zip',
            pbar.hook)

    # Unzip data set
    zip_ref = zipfile.ZipFile('data.zip', 'r')
    zip_ref.extractall('')
    zip_ref.close()

    # Move dataset to folder location
    shutil.move('data/IMG/','IMG')
    shutil.move('data/driving_log.csv','driving_log.csv')
    
    print('Training data downloaded.')

Train Dataset: 333MB [01:07, 4.95MB/s]                           

Training data downloaded.





In [None]:
# Imports
import pickle
import numpy as np
import math

# Fix error with TF and Keras
import tensorflow as tf
tf.python.control_flow_ops = tf

print('Modules loaded.')

Modules loaded.


In [None]:
# Load the images

# Imports
import os
import os.path
from PIL import Image
import numpy as np

img_path = os.getcwd()+'/IMG'
imgs = np.empty((0,160,320,3))
valid_ext = [".jpg"]

for f in os.listdir(img_path):
    f_ext = os.path.splitext(f)[1]
    if f_ext.lower() not in valid_ext:
        continue
    img = np.squeeze(Image.open(os.path.join(img_path,f)))
    imgs = np.append(imgs, np.expand_dims(img, axis=0),axis=0)
    imgs = imgs.astype(np.uint8)

print('Images loaded.')

In [None]:
# Load the driving log data

# Imports
import csv, sys

filename = 'driving_log.csv'
angles = []
imgs_fname = []

with open(filename, 'rt') as csvfile:
    
    # Load driving angle data
    anglereader = csv.reader(csvfile, delimiter=',')
    
    try:
        for row in anglereader:
            angles.append(row[-1])
          
            fln = row[0].split('/')
            imgs_fname.append(fln[-1])
            
    except csv.Error as e:
        sys.exit('file %s, line %d: %s' % (filename, reader.line_num, e)) 
    
    print('Driving driving log data loaded.')

In [None]:
# Verify that the images and driving data are loaded correctly
# Display random sample image and matching driving data

# Imports
import random
import matplotlib.image as mpimg
import matplotlib.pyplot as plt

%matplotlib inline

index = random.randint(0, len(imgs)-1)
image = imgs[index]
plt.figure(figsize=(9,9))
plt.imshow(image)

print('The image dimensions are: ',image.shape)
print('The image filename is: ',imgs_fname[index])
print('The driving angle is: ',angles[index])

In [None]:
# Set up train data
X_train, y_train = imgs, angles

In [None]:
# Summary of Dataset information

# Number of training examples
n_train = X_train.shape[0]

# Shape of an traffic sign image
image_shape = X_train[0].shape

# Number of unique classes/labels there are in the dataset
n_classes = len(np.unique(y_train))

# Display information
print("Number of training examples =", n_train)
print("Image data shape =", image_shape)
print("Number of classes =", n_classes)

In [None]:
# Preprocessing functions for image set

# Imports
import cv2

def preprocess_set(img):
    
    img_gr = grayscale_set(img)
    img_nm = normalize_set(img_gr)
    img_p = np.expand_dims(img_nm, axis=3)
    
    return img_p
    
# Grayscale image set
def grayscale_set(img):
    img_gr = []
    
    for i in range(0,len(img)):
        img_gr.append(cv2.cvtColor(img[i],cv2.COLOR_BGR2GRAY))

    return img_gr

# Normalize image set (values between 0-1)
def normalize_set(img):
    grayscale_max = 255
    img_nm = []
    
    for i in range(0,len(img)):
        img_nm.append(img[i]/grayscale_max)
    
    return img_nm

In [None]:
# Shuffle training image set

# Imports
from sklearn.utils import shuffle

X_train, y_train = shuffle(X_train, y_train)

In [None]:
# Preprocess training and validation image sets
X_train = preprocess_set(X_train)

In [None]:
# One Hot encode the labels to the variable y_one_hot

# Imports
from sklearn.preprocessing import LabelBinarizer

label_binarizer = LabelBinarizer()
y_train_one_hot = label_binarizer.fit_transform(y_train)

In [None]:
# Set up train and validation data

# Imports
from sklearn.cross_validation import train_test_split

# Split data into train and validation data
X_train, X_validation, y_train_one_hot, y_validation_one_hot = train_test_split(X_train, y_train_one_hot, train_size = 0.8)

In [None]:
# CHOOSE TO IMPORT MODEL AND WEIGHTS
# OPTIONAL: import model.json and model.h5

# Imports
import json
from keras.models import model_from_json

# Load the model from model.json
json_data=open('model.json').read()
json_string = json.loads(json_data)
model = model_from_json(json_string)

#Load the weights from model.h5
model.load_weights('model.h5', by_name=True)

In [None]:
# CHOOSE TO SET UP NEW MODEL AND WEIGHTS

# Network model

# Imports
from keras.models import Sequential
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Flatten, Dropout
from keras.layers.convolutional import Convolution2D
from keras.layers.pooling import MaxPooling2D

# Parameters
n_classes = len(np.unique(y_train))

model = Sequential()
model.add(Convolution2D(32, 3, 3, input_shape=(160, 320, 1)))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.5))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dense(n_classes))
model.add(Activation('softmax'))

In [None]:
# Define train batch generator

# Imports
import math

# Parameters
batch_size = 100

def generate_batch(X_train, y_train_one_hot, batch_size):
    
    batch_max = math.floor(len(X_train)/batch_size)
    
    while 1:
        for batch_num in range (batch_max):
            if batch_num > batch_max:
                return
            X_train_batch = X_train[batch_num*batch_size:(batch_num+1)*batch_size]
            y_train_one_hot_batch = y_train_one_hot[batch_num*batch_size:(batch_num+1)*batch_size]
            yield X_train_batch, y_train_one_hot_batch 

# Compile and train the mod
model.compile('adam', 'categorical_crossentropy', ['accuracy'])

history = model.fit_generator(generate_batch(X_train, y_train_one_hot, batch_size),
                              samples_per_epoch = len(X_train), nb_epoch = 10, 
                              validation_data = (X_validation, y_validation_one_hot), verbose = 1)

In [None]:
# Save the model and weights

# Imports
from keras.models import model_from_json
import json

# Save the model to model.json
json_string = model.to_json()
with open('model.json', 'w') as f:
     json.dump(json_string, f)

# Save the weights to model.h5
model.save_weights('model.h5')