In [None]:
print("Loading modules...")

import numpy as np
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os
from moviepy.editor import VideoFileClip
from IPython.display import HTML
import glob
import time
import pickle
import zipfile
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from scipy.ndimage.measurements import label

%matplotlib inline

print("Done.")

In [None]:
print("Defining parameters...")

'''
This class holds the parameters that will be passed to the different
stages of the lane detection and representation pipeline.
'''
class Params():
    def __init__(self):
        # New params in P5
        
        self.output_dir = 'output'
        self.training_images = 'data/cars'
        self.training_images_vehicles_zip = os.path.join(self.training_images, 'vehicles.zip')
        self.training_images_non_vehicles_zip = os.path.join(self.training_images, 'non-vehicles.zip')
        self.training_images_vehicles = os.path.join(self.output_dir, 'vehicles')
        self.training_images_non_vehicles = os.path.join(self.output_dir, 'non-vehicles')
        self.pickle_file = os.path.join(self.output_dir, 'training.pickle')
        self.hard_negative_mining_pickle_file = os.path.join(self.output_dir, 'hard_negative_mining.pickle')
        self.test_images = 'data/test_images'
        self.random_seed = 42
        self.test_ratio = 0.15
        self.heatmap_threshold = 3
        self.prev_heatmaps = 10
        
        # DOWN HERE ARE PARAMS from P4
        
        # Distortion correction parameters (chessboard pattern size)
        self.calibration_nx = 9
        self.calibration_ny = 6
        
        # Input values for the perspective transformation
        self.perspective_src = np.float32([[600, 450], [680, 450], [1130, 720], [270, 720]])
        x_min = 430
        x_max = 870
        y_min = 0
        y_max = 720
        self.perspective_dst = np.float32([[x_min, y_min], [x_max, y_min], [x_max, y_max], [x_min, y_max]])
        
        self.sobel_kernel = 3
        self.sobelx_threshold = (15, 175)
        self.angle_thresh = (0.1 * np.pi/2, 0.5 * np.pi/2)
        
        self.h_threshold = (0, 160)
        self.l_threshold = (250, 255)
        self.s_threshold = (250, 255)
        
        self.window_size_ratio = 0.1 # Ratio of the search window height to the image height
        self.stride_ratio = 1. # Ratio of the stride to the search window height
        self.search_width = 50 # Width in pixels of the search width given a previous detection guide exists
        
        self.xm_per_pix = 3.7/700 # Meters per pixel in x dimension
        self.ym_per_pix = 30/720 # Meters per pixel in y dimension
        self.num_y_vals = 101 # Num of displayed y fit values
        
        self.low_pass_a = 0.5 # Low pass filter value when smoothing previous fits
        self.max_prev_frames = 3 # Number of previous frames to be considered in a video stream

params = Params()

print("Using:\n\tparams.output_dir={}\n\tparams.training_images={}\n\tparams.training_images_vehicles_zip={}\n\tparams.training_images_non_vehicles_zip={}\n\tparams.training_images_vehicles={}\n\tparams.training_images_non_vehicles={} \
      \n\tparams.pickle_file={}\n\tparams.hard_negative_mining_pickle_file={}\n\tparams.test_images={}\n\tparams.random_seed={}\n\tparams.test_ratio={}\n\tparams.heatmap_threshold={}\n\tparams.prev_heatmaps={}"
      .format(params.output_dir, params.training_images, params.training_images_vehicles_zip, params.training_images_non_vehicles_zip, params.training_images_vehicles, params.training_images_non_vehicles, params.pickle_file, params.hard_negative_mining_pickle_file, params.test_images,
              params.random_seed, params.test_ratio, params.heatmap_threshold, params.prev_heatmaps))

print("Using:\n\tcalibration_nx={}\n\tcalibration_ny={}\n\tperspective_src={}\n\tperspective_dst={}\n\twindow_size_ratio={}\n\tstride_ratio={}\n\tsearch_width={}\n\tsobel_kernel={}\n\tsobelx_threshold={}\n\tangle_thresh={}\n\th_threshold={}\n\tl_threshold={}\n\ts_threshold={}" \
      "\n\txm_per_pix={}\n\tym_per_pix={}\n\tnum_y_vals={}\n\tmax_prev_frames={}\n\tlow_pass_a={}"
      .format(params.calibration_nx, params.calibration_ny, params.perspective_src, params.perspective_dst, params.window_size_ratio, params.stride_ratio, params.search_width, params.sobel_kernel, params.sobelx_threshold, params.angle_thresh,
              params.h_threshold, params.l_threshold, params.s_threshold, params.xm_per_pix, params.ym_per_pix, params.num_y_vals, params.max_prev_frames, params.low_pass_a))

print("Done.")

In [None]:
# Prepare the training dataset

os.makedirs(params.output_dir, exist_ok=True)

# Load the dataset and create a Pickle file
print ("Loading the training dataset...")

print ("\tUnzipping the data...")

with zipfile.ZipFile(params.training_images_vehicles_zip, "r") as zip_ref:
    zip_ref.extractall(params.training_images_vehicles)
with zipfile.ZipFile(params.training_images_non_vehicles_zip, "r") as zip_ref:
    zip_ref.extractall(params.training_images_non_vehicles)

print ("\tDone.")

data = {'vehicles': [], 'non-vehicles': []}

vehicles = data['vehicles']
non_vehicles = data['non-vehicles']
for filename in glob.iglob(params.training_images_vehicles + '/**/*.png', recursive=True):
    image = mpimg.imread(filename)
    vehicles.append(image)
for filename in glob.iglob(params.training_images_non_vehicles + '/**/*.png', recursive=True):
    image = mpimg.imread(filename)
    non_vehicles.append(image)

print("Loaded:\n\t{} Vehicles from '{}'\n\t{} Non-Vehicles from '{}'".format(len(vehicles), params.training_images_vehicles, len(non_vehicles), params.training_images_non_vehicles))

print("Writing the Pickle file to '{}'...".format(params.pickle_file))

with open(params.pickle_file, "wb") as file:
    pickle.dump(data, file)

print("Done.")

In [None]:
print("Running data sanity checks...")

data_shape = vehicles[0].shape
data_type = vehicles[0].dtype

for image in vehicles:
    assert image.shape == data_shape
    assert image.dtype == data_type
for image in non_vehicles:
    assert image.shape == data_shape
    assert image.dtype == data_type

print("All data has a shape of {} and a type of {}".format(data_shape, data_type))

print("Done.")

In [None]:
# Load the data from the pickle file

vehicles = []
non_vehicles = []

print("Loading the Pickle file from '{}'...".format(params.pickle_file))

with open(params.pickle_file, "rb") as file:
    data = pickle.load(file)
    vehicles = data['vehicles']
    non_vehicles = data['non-vehicles']
    
print("Loaded:\n\t{} Vehicles\n\t{} Non-Vehicles".format(len(vehicles), len(non_vehicles)))

if (False):
#if (os.path.isfile(params.hard_negative_mining_pickle_file)):
    print("Loading hard negative mining data Pickle file from '{}'...".format(params.hard_negative_mining_pickle_file))

    with open(params.hard_negative_mining_pickle_file, "rb") as file:
        data_hard = pickle.load(file)
        vehicles_hard = data_hard['vehicles']
        non_vehicles_hard = data_hard['non-vehicles']
    
    print("Loaded:\n\t{} Vehicles\n\t{} Non-Vehicles".format(len(vehicles_hard), len(non_vehicles_hard)))
    
    vehicles.extend(vehicles_hard)
    non_vehicles.extend(non_vehicles_hard)
    
    print("Total is now:\n\t{} Vehicles\n\t{} Non-Vehicles".format(len(vehicles), len(non_vehicles)))

print("Done.")

In [None]:
print("Data exploration...")

np.random.seed(params.random_seed)

vehicle_index = np.random.randint(0, len(vehicles))
non_vehicle_index = np.random.randint(0, len(non_vehicles))

print("Plotting vehicle of index {} and non-vehicle of index {}".format(vehicle_index, non_vehicle_index))

vehicle_image = vehicles[vehicle_index]
non_vehicle_image = non_vehicles[non_vehicle_index]

fig = plt.figure()
plt.subplot(121)
plt.imshow(vehicle_image)
plt.title("Vehicle index {}".format(vehicle_index))
plt.subplot(122)
plt.imshow(non_vehicle_image)
plt.title("Non-vehicle index {}".format(non_vehicle_index))

print("Done.")

In [None]:
# FULL PIPELINE ON SINGLE IMAGE
# Most of this code is from the lectures

''' Assumes the image has been read with mpimg.imread() and is therefore in RGB format '''
def convertImage(image, color_space='RGB'):
    if color_space == 'RGB':
        return image
    elif color_space == 'HSV':
        return cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    elif color_space == 'LUV':
        return cv2.cvtColor(image, cv2.COLOR_RGB2LUV)
    elif color_space == 'HLS':
        return cv2.cvtColor(image, cv2.COLOR_RGB2HLS)
    elif color_space == 'YUV':
        return cv2.cvtColor(image, cv2.COLOR_RGB2YUV)
    elif color_space == 'YCrCb':
        return cv2.cvtColor(image, cv2.COLOR_RGB2YCrCb)
    else:
        assert False, "Can't handle color_space={}".format(color_space)
        return image

# Define a function to compute binned color features  
def bin_spatial(img, size=(32, 32)):
    # Use cv2.resize().ravel() to create the feature vector
    features = cv2.resize(img, size).ravel() 
    # Return the feature vector
    return features

# Define a function to compute color histogram features 
# NEED TO CHANGE bins_range if reading .png files with mpimg!
def color_hist(img, nbins=32, bins_range=(0, 256)):
    # Compute the histogram of the color channels separately
    channel1_hist = np.histogram(img[:,:,0], bins=nbins, range=bins_range)
    channel2_hist = np.histogram(img[:,:,1], bins=nbins, range=bins_range)
    channel3_hist = np.histogram(img[:,:,2], bins=nbins, range=bins_range)

    #bin_edges = rhist[1]
    #bin_centers = (bin_edges[1:]  + bin_edges[0:len(bin_edges)-1])/2

    # Concatenate the histograms into a single feature vector
    hist_features = np.concatenate((channel1_hist[0], channel2_hist[0], channel3_hist[0]))
    
    # Return the individual histograms, bin_centers and feature vector
    # return rhist, ghist, bhist, bin_centers, hist_features
    return hist_features

# Define a function to return HOG features and visualization
def get_hog_features(img, orient, pix_per_cell, cell_per_block, vis=False, feature_vec=True):
    if vis == True:
        # Call with two outputs if vis==True
        features, hog_image = hog(img, orientations=orient, pixels_per_cell=(pix_per_cell, pix_per_cell), cells_per_block=(cell_per_block, cell_per_block), transform_sqrt=True, visualise=vis, feature_vector=feature_vec)
        return features, hog_image
    else:
         # Otherwise call with one output
        features = hog(img, orientations=orient, pixels_per_cell=(pix_per_cell, pix_per_cell), cells_per_block=(cell_per_block, cell_per_block), transform_sqrt=True, visualise=vis, feature_vector=feature_vec)
        return features

# Define a function to extract features from a single image window
# This function is very similar to extract_features()
# just for a single image rather than list of images
# Define a function to extract features from a list of images
# Have this function call bin_spatial() and color_hist()
def single_img_features(img, color_space='RGB', spatial_size=(32, 32), hist_bins=32, orient=9, pix_per_cell=8, cell_per_block=2, hog_channel=0, spatial_feat=True, hist_feat=True, hog_feat=True):    
    #1) Define an empty list to receive features
    img_features = []
    #2) Apply color conversion if other than 'RGB'
    feature_image = convertImage(img, color_space)

    #3) Compute spatial features if flag is set
    if spatial_feat == True:
        spatial_features = bin_spatial(feature_image, size=spatial_size)
        #4) Append features to list
        img_features.append(spatial_features)
    #5) Compute histogram features if flag is set
    if hist_feat == True:
        hist_features = color_hist(feature_image, nbins=hist_bins)
        #6) Append features to list
        img_features.append(hist_features)
    #7) Compute HOG features if flag is set
    if hog_feat == True:
        if hog_channel == 'ALL':
            hog_features = []
            for channel in range(feature_image.shape[2]):
                hog_features.extend(get_hog_features(feature_image[:,:,channel], 
                                    orient, pix_per_cell, cell_per_block, 
                                    vis=False, feature_vec=True))      
        else:
            hog_features = get_hog_features(feature_image[:,:,hog_channel], orient, 
                        pix_per_cell, cell_per_block, vis=False, feature_vec=True)
        #8) Append features to list
        img_features.append(hog_features)

    #9) Return concatenated array of features
    return np.concatenate(img_features)

def extract_features(imgs, color_space='RGB', spatial_size=(32, 32), hist_bins=32, orient=9, pix_per_cell=8, cell_per_block=2, hog_channel=0, spatial_feat=True, hist_feat=True, hog_feat=True):
    # Create a list to append feature vectors to
    features = []
    # Iterate through the list of images
    for image in imgs:
        file_features = single_img_features(image, color_space=color_space, spatial_size=spatial_size, hist_bins=hist_bins, orient=orient, pix_per_cell=pix_per_cell, cell_per_block=cell_per_block, hog_channel=hog_channel, spatial_feat=spatial_feat, hist_feat=hist_feat, hog_feat=hog_feat)
        
        features.append(file_features)
    # Return list of feature vectors
    return features
    
# Define a function that takes an image,
# start and stop positions in both x and y, 
# window size (x and y dimensions),  
# and overlap fraction (for both x and y)
def slide_window(img, x_start_stop=[None, None], y_start_stop=[None, None], xy_window=(64, 64), xy_overlap=(0.5, 0.5)):
    # If x and/or y start/stop positions not defined, set to image size
    if x_start_stop[0] == None:
        x_start_stop[0] = 0
    if x_start_stop[1] == None:
        x_start_stop[1] = img.shape[1]
    if y_start_stop[0] == None:
        y_start_stop[0] = 0
    if y_start_stop[1] == None:
        y_start_stop[1] = img.shape[0]
    # Compute the span of the region to be searched    
    xspan = x_start_stop[1] - x_start_stop[0]
    yspan = y_start_stop[1] - y_start_stop[0]
    # Compute the number of pixels per step in x/y
    nx_pix_per_step = np.int(xy_window[0]*(1 - xy_overlap[0]))
    ny_pix_per_step = np.int(xy_window[1]*(1 - xy_overlap[1]))
    # Compute the number of windows in x/y
    nx_windows = np.int(xspan/nx_pix_per_step) - 1
    ny_windows = np.int(yspan/ny_pix_per_step) - 1
    # Initialize a list to append window positions to
    window_list = []
    # Loop through finding x and y window positions
    # Note: you could vectorize this step, but in practice
    # you'll be considering windows one by one with your
    # classifier, so looping makes sense
    for ys in range(ny_windows):
        for xs in range(nx_windows):
            # Calculate window position
            startx = xs*nx_pix_per_step + x_start_stop[0]
            endx = startx + xy_window[0]
            starty = ys*ny_pix_per_step + y_start_stop[0]
            endy = starty + xy_window[1]
            
            # Append window position to list
            window_list.append(((startx, starty), (endx, endy)))
    # Return the list of windows
    return window_list

# Define a function you will pass an image 
# and the list of windows to be searched (output of slide_windows())
def search_windows(img, windows, clf, scaler, color_space='RGB', spatial_size=(32, 32), hist_bins=32, hist_range=(0, 256), orient=9, pix_per_cell=8, cell_per_block=2, hog_channel=0, spatial_feat=True, hist_feat=True, hog_feat=True):
    #1) Create an empty list to receive positive detection windows
    on_windows = []
    #2) Iterate over all windows in the list
    for window in windows:
        #3) Extract the test window from original image
        test_img = cv2.resize(img[window[0][1]:window[1][1], window[0][0]:window[1][0]], (64, 64))      
        #4) Extract features for that window using single_img_features()
        features = single_img_features(test_img, color_space=color_space, 
                            spatial_size=spatial_size, hist_bins=hist_bins, 
                            orient=orient, pix_per_cell=pix_per_cell, 
                            cell_per_block=cell_per_block, 
                            hog_channel=hog_channel, spatial_feat=spatial_feat, 
                            hist_feat=hist_feat, hog_feat=hog_feat)
        #5) Scale extracted features to be fed to classifier
        test_features = scaler.transform(np.array(features).reshape(1, -1))
        #6) Predict using your classifier
        prediction = clf.predict(test_features)
        #prediction = clf.decision_function(features)
        #7) If positive (prediction == 1) then save the window
        #if prediction > 0.5: #prediction == 1:
        if prediction == 1:
            on_windows.append(window)
    #8) Return windows for positive detections
    return on_windows

In [None]:
# Plot HOG results for different values of the color_space input parameter

fontsize = 8

orientations = [9]
pix_per_cells = [8]
cell_per_block = 2
color_spaces = ['RGB', 'YUV', 'YCrCb', 'HSV', 'LUV', 'HLS']
num_color_spaces = len(color_spaces)

for orient in orientations:
    for pix_per_cell in pix_per_cells:
        
        fig = plt.figure(figsize=(10,10))
        num_rows = num_color_spaces
        num_cols = 3
    
        for i in range(num_color_spaces):
            color_space = color_spaces[i]
            
            converted_image = convertImage(vehicle_image, color_space)
            features, hog_image_0 = get_hog_features(converted_image[:,:,0], orient, pix_per_cell, cell_per_block, vis=True, feature_vec=False)
            features, hog_image_1 = get_hog_features(converted_image[:,:,1], orient, pix_per_cell, cell_per_block, vis=True, feature_vec=False)
            features, hog_image_2 = get_hog_features(converted_image[:,:,2], orient, pix_per_cell, cell_per_block, vis=True, feature_vec=False)
            
            subplot = plt.subplot(num_rows, num_cols, i * num_cols  + 1)
            plt.imshow(hog_image_0, cmap='gray')
            plt.title('HOG for {} channel 0'.format(color_space), fontsize=fontsize)
            plt.axis('off')
            
            subplot = plt.subplot(num_rows, num_cols, i * num_cols  + 2)
            plt.imshow(hog_image_1, cmap='gray')
            plt.title('HOG for {} channel 1'.format(color_space), fontsize=fontsize)
            plt.axis('off')
            
            subplot = plt.subplot(num_rows, num_cols, i * num_cols  + 3)
            plt.imshow(hog_image_2, cmap='gray')
            plt.title('HOG for {} channel 2'.format(color_space), fontsize=fontsize)
            plt.axis('off')
            
        fig.tight_layout(w_pad=-2, h_pad=0, rect=[0, 0, 1, 1])

In [None]:
# Plot HOG results for different values of the orientation input parameter

orientations = [3, 5, 7, 9, 11, 13]
pix_per_cells = [8]
cell_per_block = 2
color_spaces = ['YCrCb']
num_orientations = len(orientations)

for color_space in color_spaces:
    for pix_per_cell in pix_per_cells:
        
        fig = plt.figure(figsize=(10,10))
        num_rows = 2
        num_cols = 3
    
        for i in range(num_orientations):
            orient = orientations[i]
            
            converted_image = convertImage(vehicle_image, color_space)
            features, hog_image_0 = get_hog_features(converted_image[:,:,0], orient, pix_per_cell, cell_per_block, vis=True, feature_vec=False)
            
            subplot = plt.subplot(num_rows, num_cols, (int(i / num_cols) * num_cols + (i % num_cols) + 1))
            plt.imshow(hog_image_0, cmap='gray')
            plt.title('HOG for {} channel 0 orientation={}'.format(color_space, orient), fontsize=fontsize)
            plt.axis('off')
            
        fig.tight_layout(w_pad=0, h_pad=0, rect=[0, 0, 1, 1])

In [None]:
# Plot HOG results for different values of the pix_per_cell input parameter

orientations = [9]
pix_per_cells = [4, 6, 8, 10, 12, 14]
cell_per_block = 2
color_spaces = ['YCrCb']
num_pix_per_cells = len(pix_per_cells)

for color_space in color_spaces:
    for orient in orientations:
        
        fig = plt.figure(figsize=(10,10))
        num_rows = 2
        num_cols = 3
    
        for i in range(num_pix_per_cells):
            pix_per_cell = pix_per_cells[i]
            
            converted_image = convertImage(vehicle_image, color_space)
            features, hog_image_0 = get_hog_features(converted_image[:,:,0], orient, pix_per_cell, cell_per_block, vis=True, feature_vec=False)
            
            subplot = plt.subplot(num_rows, num_cols, (int(i / num_cols) * num_cols + (i % num_cols) + 1))
            plt.imshow(hog_image_0, cmap='gray')
            plt.title('HOG for {} channel 0 pix_per_cell={}'.format(color_space, pix_per_cell), fontsize=fontsize)
            plt.axis('off')
            
        fig.tight_layout(w_pad=0, h_pad=0, rect=[0, 0, 1, 1])

In [None]:
print("Define a utility class to process a single frame...")

'''
Utility class to process take a single frame of input and process it
to detect cars.

Accepts an optional prev_hot_windows parameter that contains a set of previous detected cars
in the previous frames and which can be useful in the context of a video to
help detecttion in the current frame.
'''
class Frame:
    def __init__(self, image, title="", prev_hot_windows=[]):
        self.image = image
        self.title = title
        self.x_size = image.shape[1]
        self.y_size = image.shape[0]
        
        self.search_windows = []
        self.hot_windows = []
        self.prev_hot_windows = prev_hot_windows
        self.car_boxes = []

        self.image_windows = None
        self.current_heatmap = None
        self.full_heatmap = None
        self.thresholded_heatmap = None
        self.labels = None
                
        self.result = None
    
    ''' Utility function to create a 3 channel image from a single channel image '''
    def create_empty_3_chan_for_1_chan(self, image):
        empty_one_channel = np.zeros_like(image).astype(np.uint8)
        empty_three_channels = np.dstack((empty_one_channel, empty_one_channel, empty_one_channel))
        
        return empty_three_channels
    
    ''' Utility function to draw the zone between the detected lines as single lane '''
    def draw_fit_area(self, image, color):
        # Recast the x and y points into usable format for cv2.fillPoly()
        pts_left = np.array([np.transpose(np.vstack([self.l_line.current_fitx, self.l_line.yvals]))])
        pts_right = np.array([np.flipud(np.transpose(np.vstack([self.r_line.current_fitx, self.l_line.yvals])))])
        pts = np.hstack((pts_left, pts_right))
        
        cv2.fillPoly(image, np.int_([pts]), color)
    
    ''' Utility function to draw the lane width information (text and arrow between the two detected lines) '''
    def draw_width_info(self, image, font_face, font_scale, color, thickness, thickness_arrow):
        lane_width_middle_m = self.r_line.value_at_middle_m - self.l_line.value_at_middle_m
        
        middle_text = "{0:5.2f}m".format(lane_width_middle_m)
        text_size, base_line = cv2.getTextSize(middle_text, font_face, font_scale, thickness) 
        
        y_ref = np.int(self.y_size / 2)
        x_middle = np.int((self.l_line.value_at_middle + self.r_line.value_at_middle) / 2 - text_size[0] / 2)
        middle_pos = (x_middle, y_ref + text_size[1] + base_line)
        cv2.putText(image, middle_text, middle_pos, font_face, font_scale, color, thickness, cv2.LINE_AA)

        # Draw two arrows in order to have two arrowheads
        l_x = np.int(self.l_line.value_at_middle)
        r_x = np.int(self.r_line.value_at_middle)
        cv2.arrowedLine(image, (l_x, y_ref), (r_x, y_ref), color, thickness_arrow)
        cv2.arrowedLine(image, (r_x, y_ref), (l_x, y_ref), color, thickness_arrow)
        
    ''' Utility function to draw the lane center information and the current offset with regard to it '''
    def draw_offset_info(self, image, font_face, font_scale, color, thickness, thickness_line, tick_height):
        lane_center_bottom = np.int((self.r_line.value_at_bottom + self.l_line.value_at_bottom) / 2)
        half_x_size = np.int(self.x_size / 2)
        car_offset_from_center_m = (self.l_line.bottom_offset_m + self.r_line.bottom_offset_m) / 2
        
        half_tick_height = np.int(tick_height / 2)
        quarter_tick_height = np.int(tick_height / 4)
        
        thickness_half_line = np.int(thickness_line / 2)
        thickness_arrow = thickness_half_line
        
        # The big tick represents the center of the lane
        pos_big_tick_start = (lane_center_bottom, self.y_size)
        pos_big_tick_end = tuple(np.subtract(pos_big_tick_start, (0, tick_height)))
        
        # The small tick represents the car (center of the image)
        pos_small_tick_start = (half_x_size, self.y_size)
        pos_small_tick_end = tuple(np.subtract(pos_small_tick_start, (0, half_tick_height)))
        
        # Draw an arrow from the big tick to the small tick to represent how much the car is offset from the center
        # Stop arrow just before the small tick starts
        cut_arrow = thickness_half_line if (car_offset_from_center_m > 0) else -thickness_half_line
        pos_arrow_start = tuple(np.subtract(pos_big_tick_start, (0, quarter_tick_height)))
        pos_arrow_end = tuple(np.subtract(pos_small_tick_start, (cut_arrow, quarter_tick_height)))
        
        offset_text = "{0:5.2f}m".format(car_offset_from_center_m)
        text_size, base_line = cv2.getTextSize(offset_text, font_face, font_scale, thickness) 
        
        pos_text = tuple(np.subtract(pos_small_tick_end, (np.int(text_size[0] / 2), half_tick_height + base_line)))
        
        cv2.line(image, pos_big_tick_start, pos_big_tick_end, color, thickness_line)
        cv2.line(image, pos_small_tick_start, pos_small_tick_end, color, thickness_half_line)
        cv2.arrowedLine(image, pos_arrow_start, pos_arrow_end, color, thickness_arrow)
        cv2.putText(image, offset_text, pos_text, font_face, font_scale, color, thickness, cv2.LINE_AA)
    
    ''' Utility function to draw the lane radius information '''
    def draw_radii_info(self, image, font_face, font_scale, color, thickness):
        radii_text = "RadiusL={0:7.2f}m RadiusR={1:7.2f}m".format(self.l_line.radius_of_curvature, self.r_line.radius_of_curvature)
        text_size, base_line = cv2.getTextSize(radii_text, font_face, font_scale, thickness) 
        
        pos_text = (self.x_size - text_size[0], text_size[1] + base_line)
        
        cv2.putText(self.result, radii_text, pos_text, font_face, font_scale, color, thickness, cv2.LINE_AA)
    
    ''' Utility function to draw the detection windows for a given line detection pass '''
    def draw_windows(self, image, windows, color=(0, 0, 255), thickness=5):
        for window in windows:
            cv2.rectangle(image, window[0], window[1], color, thickness)
    
    ''' Utility function to draw debug information (intermediate frames used for detection) on the final result image '''
    def stack_thumbnails(self):
        num_cols = 1
        thumbnail_scale = 0.15
        thumbnail_size = (np.int(self.x_size * thumbnail_scale), np.int(self.y_size * thumbnail_scale))
        
        #l_gray = np.dstack((self.l, self.l, self.l))
        #s_gray = np.dstack((self.s, self.s, self.s))
        
        current_heatmap = 255/np.max(self.current_heatmap) * np.dstack((self.current_heatmap, self.current_heatmap, self.current_heatmap))
        full_heatmap = 255/np.max(self.full_heatmap) * np.dstack((self.full_heatmap, self.full_heatmap, self.full_heatmap))
        thresholded_heatmap = 255 * np.dstack((self.thresholded_heatmap, self.thresholded_heatmap, self.thresholded_heatmap))
        labels_max = np.max(self.labels[0]) 
        if (labels_max == 0):
            labels_max = 1
        labels = 255/labels_max * np.dstack((self.labels[0], self.labels[0], self.labels[0]))
        #s_binary_gray = 255 * np.dstack((self.s_binary, self.s_binary, self.s_binary))
        
        #sobelx_binary_gray = 255 * np.dstack((self.sobelx_binary, self.sobelx_binary, self.sobelx_binary))
        #sobel_angle_binary_gray = 255 * np.dstack((self.sobel_angle_binary, self.sobel_angle_binary, self.sobel_angle_binary))
        
        #combined_binary_gray = 255 * np.dstack((self.combined_binary, self.combined_binary, self.combined_binary))        
        
        thumbnails = []
        
        thumbnails.append(cv2.resize(current_heatmap, thumbnail_size, interpolation=cv2.INTER_AREA))
        thumbnails.append(cv2.resize(full_heatmap, thumbnail_size, interpolation=cv2.INTER_AREA))
        thumbnails.append(cv2.resize(thresholded_heatmap, thumbnail_size, interpolation=cv2.INTER_AREA))
        thumbnails.append(cv2.resize(labels, thumbnail_size, interpolation=cv2.INTER_AREA))
        #thumbnails.append(None)
        
        #thumbnails.append(cv2.resize(sobelx_binary_gray, thumbnail_size, interpolation=cv2.INTER_AREA))
        #thumbnails.append(cv2.resize(sobel_angle_binary_gray, thumbnail_size, interpolation=cv2.INTER_AREA))
        #thumbnails.append(cv2.resize(l_binary_gray, thumbnail_size, interpolation=cv2.INTER_AREA))
        #thumbnails.append(cv2.resize(s_binary_gray, thumbnail_size, interpolation=cv2.INTER_AREA))
        
        #thumbnails.append(cv2.resize(combined_binary_gray, thumbnail_size, interpolation=cv2.INTER_AREA))
        #thumbnails.append(cv2.resize(self.warped_fit, thumbnail_size, interpolation=cv2.INTER_AREA))
        
        for i in range(len(thumbnails)):
            if (thumbnails[i] == None):
                continue
            
            x_offset = thumbnail_size[0] * (i % num_cols)
            y_offset = thumbnail_size[1] * np.int(i / num_cols)
            
            self.result[y_offset:y_offset+thumbnail_size[1], x_offset:x_offset+thumbnail_size[0]] = thumbnails[i]
    
    '''
    This is the main function that processes the current frame to detect cars.
    It is controlled by a list of parameters provided as input.
    '''
    def process(self, params):
        self.result = np.copy(self.image)
        # This code comes mostly from the lessons
                
        def create_heatmaps():
            threshold = params.heatmap_threshold if self.title == "" else 1
            self.current_heatmap = np.zeros_like(self.image[:,:,0]).astype(np.uint8)
            for box in self.hot_windows:
                self.current_heatmap[box[0][1]:box[1][1], box[0][0]:box[1][0]] = 1
            self.full_heatmap = np.copy(self.current_heatmap)
            for box in self.prev_hot_windows:
                self.full_heatmap[box[0][1]:box[1][1], box[0][0]:box[1][0]] += 1
            self.thresholded_heatmap = np.copy(self.full_heatmap)
            self.thresholded_heatmap[self.full_heatmap < threshold] = 0
            self.thresholded_heatmap[self.full_heatmap >= threshold] = 1
            self.labels = label(self.thresholded_heatmap)
            #print(self.labels[1], 'cars found')
            
            self.car_boxes = []
            for car_number in range(1, self.labels[1] + 1):
                # Find pixels with each car_number label value
                nonzero = (self.labels[0] == car_number).nonzero()
                # Identify x and y values of those pixels
                nonzeroy = np.array(nonzero[0])
                nonzerox = np.array(nonzero[1])
                # Define a bounding box based on min/max x and y
                bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox), np.max(nonzeroy)))
                if (bbox[1][0] - bbox[0][0]) > 32 and (bbox[1][1] - bbox[0][1]) > 32:
                    self.car_boxes.append(bbox)

        image = self.image.astype(np.float32)/255 # TODO This shoule be done when reading the images!
        
        self.image_windows = np.copy(self.image)
        
        y_max = image.shape[0] - 64
        y_tiny = 64
        y_small = 96
        y_big = 128
        
        #y_start_stop = (350, 650)
        y_start_stop = (y_max - np.int(y_big * 1.5) - 64, y_max)
        windows_tiny = slide_window(image, x_start_stop=[None, None], y_start_stop=y_start_stop, xy_window=(y_tiny, y_tiny), xy_overlap=(0.5, 0.5))
        y_start_stop = (y_max - np.int(y_big * 1.5) - 64, y_max)
        windows_small = slide_window(image, x_start_stop=[None, None], y_start_stop=y_start_stop, xy_window=(y_small, y_small), xy_overlap=(0.6, 0.6))
        y_start_stop = (y_max - np.int(y_big * 1.5), y_max)
        windows_big = slide_window(image, x_start_stop=[None, None], y_start_stop=y_start_stop, xy_window=(y_big, y_big), xy_overlap=(0.6, 0.6))
        
        self.search_windows = []
        self.search_windows.extend(windows_tiny)
        self.search_windows.extend(windows_small)
        #self.search_windows.extend(windows_big)
        
        self.draw_windows(self.image_windows, self.search_windows)

        self.hot_windows = search_windows(image, self.search_windows, svc, X_scaler, color_space=color_space, 
                                spatial_size=spatial_size, hist_bins=hist_bins, 
                                orient=orient, pix_per_cell=pix_per_cell, 
                                cell_per_block=cell_per_block, 
                                hog_channel=hog_channel, spatial_feat=spatial_feat, 
                                hist_feat=hist_feat, hog_feat=hog_feat)
        
        create_heatmaps()
        
        self.draw_windows(self.result, self.car_boxes)
        
print("Done.")

In [None]:
# SVM CLASSIFIER
# Mostly from the lessons

print("Training a classifier on the data...")

cars = vehicles
notcars = non_vehicles

color_space = 'YUV' # Can be RGB, HSV, LUV, HLS, YUV, YCrCb
orient = 9  # HOG orientations
pix_per_cell = 8 # HOG pixels per cell
cell_per_block = 2 # HOG cells per block
hog_channel = "ALL" # Can be 0, 1, 2, or "ALL"
spatial_size = (16, 16) # Spatial binning dimensions
hist_bins = 16    # Number of histogram bins
spatial_feat = True # Spatial features on or off
hist_feat = True # Histogram features on or off
hog_feat = True # HOG features on or off
y_start_stop = [None, None] # Min and max in y to search in slide_window()

print("\tCreating features...")

car_features = extract_features(cars, color_space=color_space, 
                        spatial_size=spatial_size, hist_bins=hist_bins, 
                        orient=orient, pix_per_cell=pix_per_cell, 
                        cell_per_block=cell_per_block, 
                        hog_channel=hog_channel, spatial_feat=spatial_feat, 
                        hist_feat=hist_feat, hog_feat=hog_feat)
notcar_features = extract_features(notcars, color_space=color_space, 
                        spatial_size=spatial_size, hist_bins=hist_bins, 
                        orient=orient, pix_per_cell=pix_per_cell, 
                        cell_per_block=cell_per_block, 
                        hog_channel=hog_channel, spatial_feat=spatial_feat, 
                        hist_feat=hist_feat, hog_feat=hog_feat)
print("\tDone.")

print("\tScaling the data...")
X = np.vstack((car_features, notcar_features)).astype(np.float64)                        
# Fit a per-column scaler
X_scaler = StandardScaler().fit(X)
# Apply the scaler to X
scaled_X = X_scaler.transform(X)

# Define the labels vector
y = np.hstack((np.ones(len(car_features)), np.zeros(len(notcar_features))))

print("\tFeature vector length: {}".format(len(X[0])))

print("\tDone.")

print ("\tSplitting the data with {}% test data".format(params.test_ratio * 100))

# Split up data into randomized training and test sets
X_train, X_test, y_train, y_test = train_test_split(scaled_X, y, test_size=params.test_ratio, random_state=params.random_seed)

print("\tTraining a linear SVC...")

# Use a linear SVC 
svc = LinearSVC()

# Check the training time for the SVC
t=time.time()
svc.fit(X_train, y_train)
t2 = time.time()
print("\t{} seconds to train SVC".format(round(t2-t, 2)))

# Check the score of the SVC
print('\tTest Accuracy of SVC = {}'.format(round(svc.score(X_test, y_test), 4)))

print("Done.")

In [None]:
images_dir = params.test_images
list_images = os.listdir(images_dir)

images_out_dir = params.output_dir

print("Processing images in directory '{0}':".format(images_dir))

total_time = 0.

frames = []
for image_name in list_images:
    image_path = os.path.join(images_dir, image_name)
    if os.path.isdir(image_path):
        continue
    
    image = mpimg.imread(image_path)
    print("  {0:40s} size={1} type={2}".format(image_path, image.shape, image.dtype))
    
    t1 = time.time()
    frame = Frame(image, image_name)
    frame.process(params)
    t2 = time.time()
    total_time = total_time + t2 - t1
    
    print("    Search windows={}".format(len(frame.search_windows)))
    
    windows_name = "windows_" + image_name
    windows_path = os.path.join(images_out_dir, windows_name)
    mpimg.imsave(windows_path, frame.image_windows)
    
    current_heatmap = 255 * np.dstack((frame.current_heatmap, frame.current_heatmap, frame.current_heatmap))
    thresholded_heatmap = 255 * np.dstack((frame.thresholded_heatmap, frame.thresholded_heatmap, frame.thresholded_heatmap))
    labels_max = np.max(frame.labels[0]) 
    if (labels_max == 0):
        labels_max = 1
    labels = 255/labels_max * np.dstack((frame.labels[0], frame.labels[0], frame.labels[0]))
    
    heatmap_current_name = "heatmap_current_" + image_name
    heatmap_current_path = os.path.join(images_out_dir, heatmap_current_name)
    mpimg.imsave(heatmap_current_path, current_heatmap)
    
    heatmap_thresholded_name = "heatmap_thresholded_" + image_name
    heatmap_thresholded_path = os.path.join(images_out_dir, heatmap_thresholded_name)
    mpimg.imsave(heatmap_thresholded_path, thresholded_heatmap)
    
    labels_name = "labels_" + image_name
    labels_path = os.path.join(images_out_dir, labels_name)
    mpimg.imsave(labels_path, labels)
    
    result_name = "result_" + image_name
    result_path = os.path.join(images_out_dir, result_name)
    mpimg.imsave(result_path, frame.result)
    
    frames.append(frame)
    
num_images = len(frames)

print("Processed {0} images in {1:4.2f} seconds ({2:4.2f}s per frame)".format(num_images, total_time, total_time / num_images))

In [None]:
print('Showing', num_images, 'images:')

fontsize = 8

for frame in frames:
    fig = plt.figure(figsize=(10,10))
    
    plt.subplot(131)
    plt.imshow(frame.image)
    plt.title(frame.title, fontsize=fontsize)
    plt.axis('off')
    
    plt.subplot(132)
    plt.imshow(frame.image_windows)
    plt.title("Search Windows", fontsize=fontsize)
    plt.axis('off')
    
    plt.subplot(133)
    plt.imshow(frame.result)
    plt.title("Result", fontsize=fontsize)
    plt.axis('off')
    
    fig.tight_layout(w_pad=-2, h_pad=-10, rect=[0, 0, 1, 1])

print("Done.")

In [None]:
print('Showing', num_images, 'images:')

for frame in frames:
    fig = plt.figure(figsize=(10,10))
    
    plt.subplot(131)
    plt.imshow(frame.current_heatmap, cmap="gray")
    plt.title("Current Heatmap", fontsize=fontsize)
    plt.axis('off')
    
    plt.subplot(132)
    plt.imshow(frame.thresholded_heatmap, cmap="gray")
    plt.title("Thresholded Heatmap", fontsize=fontsize)
    plt.axis('off')
    
    plt.subplot(133)
    plt.imshow(frame.labels[0], cmap="gray")
    plt.title("Labeled Heatmap", fontsize=fontsize)
    plt.axis('off')
    
    fig.tight_layout(w_pad=-2, h_pad=-10, rect=[0, 0, 1, 1])

print("Done.")

In [None]:
print("Define video pipeline...")

num_f = 0
hot_windows = []
hot_windows_sizes = []

def reset_globals():
    global num_f
    global hot_windows
    global hot_windows_sizes
    
    num_f = 0
    hot_windows = []
    hot_windows_sizes = []
    
def process_image(image):
    global num_f
    global hot_windows
    global hot_windows_sizes
    
    frame = Frame(image, prev_hot_windows=hot_windows)
    frame.process(params)
    
    if (len(hot_windows_sizes) >= params.prev_heatmaps):
        size_to_cut = hot_windows_sizes[0]
        hot_windows = hot_windows[size_to_cut:]
        hot_windows_sizes = hot_windows_sizes[1:]
    
    new_boxes = []
    #new_boxes.extend(frame.car_boxes)
    new_boxes.extend(frame.hot_windows)
    hot_windows.extend(new_boxes)
    hot_windows_sizes.append(len(new_boxes))
    #print (hot_windows)
    
    frame.stack_thumbnails()
    
    num_f = num_f + 1
    
    return frame.result

def process_video(video_filename, video_out_filename):
    print("Processing '{0}' and saving the result into '{1}'...".format(video_filename, video_out_filename))
    
    reset_globals()
    
    video_clip = VideoFileClip(video_filename)#.subclip(38,39)

    print("  Size={0} Duration={1}s {2}FPS".format(video_clip.size, video_clip.duration, video_clip.fps))

    video_clip_out = video_clip.fl_image(process_image)
    %time video_clip_out.write_videofile(video_out_filename, audio=False)
    
    print("Done.")

print("Done.")

In [None]:
video_out_filename = "output/project_video_out.mp4"
process_video("data/project_video.mp4", video_out_filename)

HTML("""
    <video width="960" height="540" controls>
      <source src="{0}">
    </video>
    """.format(video_out_filename))

In [None]:
# Collecting hard negative mining data

xy_window=(64, 64)

''' Utility function to find if two bounding boxes overlap '''
def find_overlap(bb1, bb2):
    left1 = bb1[0][0]
    right1 = bb1[1][0]
    top1 = bb1[0][1]
    bottom1 = bb1[1][1]
    
    left2 = bb2[0][0]
    right2 = bb2[1][0]
    top2 = bb2[0][1]
    bottom2 = bb2[1][1]
    
    if (right2 < left1 or left2 > right1):
        return None
    if (bottom2 < top1 or top2 > bottom1):
        return None
    return ((max(left1, left2), max(top1, top2)), (min(right1, right2), min(bottom1, bottom2)))

images_dir = params.test_images
list_images = os.listdir(images_dir)

print("Processing images in directory '{0}':".format(images_dir))

path_vehicles = os.path.join(self.output_dir, 'hard_negative_mining/vehicles')
path_non_vehicles = os.path.join(self.output_dir, 'hard_negative_mining/non_vehicles')

images_to_car_bboxes = {'test1.jpg': [((810, 400), (950, 500)), ((1040, 400), (1275, 510))],
                        'test2.jpg': [],
                        'test3.jpg': [((860, 405), (970, 475))],
                        'test4.jpg': [((810, 400), (950, 500)), ((1030, 400), (1260, 500))],
                        'test5.jpg': [((800, 400), (950, 500)), ((1080, 390), (1280, 520))],
                        'test6.jpg': [((800, 400), (950, 500)), ((1000, 400), (1210, 505))],
                       }

for image_name in list_images:
    image_path = os.path.join(images_dir, image_name)
    if os.path.isdir(image_path):
        continue
    
    image = mpimg.imread(image_path)
    print("  {0:40s} size={1} type={2}".format(image_path, image.shape, image.dtype))
    
    if (image_name in images_to_car_bboxes):
        save_image = np.copy(image)
        new_name = image_name.replace("jpg", "png")
        
        slide_windows = slide_window(image, y_start_stop=[350, 650], xy_window=xy_window, xy_overlap=(0.5, 0.5))
        print("{} bounding boxes for {}".format(len(slide_windows), image_name))
        
        fig = plt.figure()
        vehicles_counter = 0
        non_vehicles_counter = 0
        for car_bb in images_to_car_bboxes[image_name]:
            cv2.rectangle(image, car_bb[0], car_bb[1], (255, 255, 255), 5)
        for window in slide_windows:
            is_vehicle = False
            cv2.rectangle(image, window[0], window[1], (255, 0, 0), 4)
            for car_bb in images_to_car_bboxes[image_name]:
                overlap = find_overlap(car_bb, window)
                if (overlap != None):
                    overlap_x = (overlap[1][0] - overlap[0][0])
                    overlap_y = (overlap[1][1] - overlap[0][1])
                    if (overlap_x / xy_window[0] >= 0.5 and overlap_y / xy_window[1] > 0.5):
                        cv2.rectangle(image, window[0], window[1], (0, 0, 255), 3)
                        #cv2.rectangle(image, overlap[0], overlap[1], (255, 255, 255), 3)
                        is_vehicle = True
                    else:
                        cv2.rectangle(image, window[0], window[1], (255, 192, 0), 3)
                    
            if is_vehicle:
                path = os.path.join(path_vehicles, "{0:05d}_{1}".format(vehicles_counter, new_name))
                vehicles_counter += 1
            else:
                path = os.path.join(path_non_vehicles, "{0:05d}_{1}".format(non_vehicles_counter, new_name))
                non_vehicles_counter += 1
                
            sub = save_image[window[0][1]:window[1][1], window[0][0]:window[1][0]]
            mpimg.imsave(path, sub)
        
        plt.imshow(image)
        plt.title(image_name, fontsize=fontsize)
    
# Load the dataset and create a Pickle file
print ("Loading the training dataset...")

data = {'vehicles': [], 'non-vehicles': []}

vehicles = data['vehicles']
non_vehicles = data['non-vehicles']
for filename in glob.iglob(path_vehicles + '/**/*.png', recursive=True):
    image = mpimg.imread(filename)
    vehicles.append(image)
for filename in glob.iglob(path_non_vehicles + '/**/*.png', recursive=True):
    image = mpimg.imread(filename)
    non_vehicles.append(image)

print("Loaded:\n\t{} Vehicles from '{}'\n\t{} Non-Vehicles from '{}'".format(len(vehicles), path_vehicles, len(non_vehicles), path_non_vehicles))

print("Writing the Pickle file to '{}'...".format(params.hard_negative_mining_pickle_file))

with open(params.hard_negative_mining_pickle_file, "wb") as file:
    pickle.dump(data, file)

print("Done.")