In [1]:
import cv2
import glob
import numpy as np
import pickle
import scipy.stats
import skimage.feature
import sklearn.model_selection
import sklearn.preprocessing
import sklearn.svm
import time

# Extract hog features only

In [2]:
def hog_helper(img_single_ch):
    pixels_per_cell = 8
    cells_per_block = 2
    orientations = 9
    
    return skimage.feature.hog(
        img_single_ch,
        orientations=orientations,
        pixels_per_cell=(pixels_per_cell, pixels_per_cell),
        cells_per_block=(cells_per_block, cells_per_block),
        transform_sqrt=True,
        visualise=False,
        feature_vector=False,
        block_norm='L2-Hys')

### Pipeline functions

In [3]:
def get_hog_features_from_test_image(path):
    """Depends on global function `GET_HOG_FEATURES_FROM_IMAGE`."""
    
    img_bgr = cv2.imread(path)
    assert img_bgr.shape[:2] == (64, 64)
    
    hog = GET_HOG_FEATURES_FROM_IMAGE(img_bgr)
    assert hog.shape[0] == hog.shape[1]
    
    return hog.flatten()

def get_classifier_features(car_hog_features, noncar_hog_features):
    X = np.vstack((car_hog_features, noncar_hog_features)).astype(np.float64)
    print('classifier features shape is', X.shape)

    X_scaler = sklearn.preprocessing.StandardScaler().fit(X)

    scaled_X = X_scaler.transform(X)

    y = np.hstack((np.ones(len(car_hog_features)), np.zeros(len(noncar_hog_features))))

    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
        scaled_X, y, test_size=0.2)
    
    return (X_train, X_test, y_train, y_test), X_scaler

def get_classifier_features_from_paths(car_paths, noncar_paths):
    car_hog_features, noncar_hog_features = [
        [get_hog_features_from_test_image(path) for path in paths]
        for paths in [car_paths, noncar_paths]
    ]
    print('training set size is', [len(features) for features in [car_hog_features, noncar_hog_features]])
    
    return get_classifier_features(car_hog_features, noncar_hog_features)

def get_classifier_features_small():
    car_paths = glob.glob('working_dir/vehicles_smallset/**/*')
    noncar_paths = glob.glob('working_dir/non-vehicles_smallset/**/*')
    return get_classifier_features_from_paths(car_paths, noncar_paths)

def get_classifier_features_large():
    car_paths = glob.glob('working_dir/vehicles/**/*')
    noncar_paths = glob.glob('working_dir/non-vehicles/**/*')
    return get_classifier_features_from_paths(car_paths, noncar_paths)

In [9]:
def train_and_validate():
    """Before running, set global function `GET_HOG_FEATURES_FROM_IMAGE`."""
    
    def train_and_validate_small():
        (X_train, X_test, y_train, y_test), X_scaler = get_classifier_features_small()

        t0 = time.time()

        grid_search_params = {
            'C': scipy.stats.expon(scale=100),
            'gamma': scipy.stats.expon(scale=.1),
            'kernel': ['linear', 'rbf'],
            'class_weight':['balanced', None]}
        clf = sklearn.model_selection.RandomizedSearchCV(sklearn.svm.SVC(), grid_search_params)
        clf.fit(X_train, y_train)

        t1 = time.time()
        
        print(round(t1 - t0, 2), 'Seconds to optimize SVC')
        print('score of optimized classifier on small dataset', clf.score(X_test, y_test))
        print('optimized classifier params', clf.best_params_)

        return X_scaler, clf
    
    def train_and_validate_large(optimized_clf):
        (X_train, X_test, y_train, y_test), X_scaler = get_classifier_features_large()
    
        print('score of optimized classifier on large dataset', optimized_clf.score(X_test, y_test))

        t0 = time.time()

        simple_clf = sklearn.svm.LinearSVC()
        simple_clf.fit(X_train, y_train)

        t1 = time.time()
        
        print(round(t1 - t0, 2), 'Seconds to train default LinearSVC')
        print('score of default LinearSVC on large dataset', simple_clf.score(X_test, y_test))

        return X_scaler, simple_clf
    
    
    X_scaler_small, optimized_clf = train_and_validate_small()
    X_scaler_large, simple_clf = train_and_validate_large(optimized_clf)
    return X_scaler_small, X_scaler_large, optimized_clf, simple_clf

### yuv

In [11]:
def get_hog_features_yuv(img_bgr):
    img_yuv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2YUV)
    img_ych = img_yuv[:,:,0]
    return hog_helper(img_ych)

GET_HOG_FEATURES_FROM_IMAGE = get_hog_features_yuv

YUV_RESULTS = train_and_validate()

training set size is [1196, 1125]
classifier features shape is (2321, 1764)
138.6 Seconds to optimize SVC
score of optimized classifier on small dataset 0.94623655914
optimized classifier params {'class_weight': None, 'C': 15.51560389085947, 'gamma': 0.060393851905433817, 'kernel': 'linear'}
training set size is [8792, 8968]
classifier features shape is (17760, 1764)
score of optimized classifier on large dataset 0.84009009009
22.71 Seconds to train default LinearSVC
score of default LinearSVC on large dataset 0.918074324324


### hsv

In [14]:
def get_hog_features_hsv(img_bgr):
    img_hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
    img_sch = img_hsv[:,:,1]
    img_vch = img_hsv[:,:,2]
    hog_sch = hog_helper(img_sch)
    hog_vch = hog_helper(img_vch)
    return np.stack((hog_sch, hog_vch), axis=-1)

GET_HOG_FEATURES_FROM_IMAGE = get_hog_features_hsv

HSV_RESULTS = train_and_validate()

training set size is [1196, 1125]
classifier features shape is (2321, 3528)
341.43 Seconds to optimize SVC
score of optimized classifier on small dataset 0.967741935484
optimized classifier params {'class_weight': None, 'C': 203.54919322663201, 'gamma': 0.002980498106739218, 'kernel': 'rbf'}
training set size is [8792, 8968]
classifier features shape is (17760, 3528)
score of optimized classifier on large dataset 0.907376126126
23.18 Seconds to train default LinearSVC
score of default LinearSVC on large dataset 0.957488738739


### hls

In [15]:
def get_hog_features_hls(img_bgr):
    img_hls = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HLS)
    img_lch = img_hls[:,:,1]
    img_sch = img_hls[:,:,2]
    hog_lch = hog_helper(img_lch)
    hog_sch = hog_helper(img_sch)
    return np.stack((hog_lch, hog_sch), axis=-1)

GET_HOG_FEATURES_FROM_IMAGE = get_hog_features_hls

HLS_RESULTS = train_and_validate()

training set size is [1196, 1125]
classifier features shape is (2321, 3528)
243.39 Seconds to optimize SVC
score of optimized classifier on small dataset 0.967741935484
optimized classifier params {'class_weight': 'balanced', 'C': 77.099607798986028, 'gamma': 0.023107860854446646, 'kernel': 'linear'}
training set size is [8792, 8968]
classifier features shape is (17760, 3528)
score of optimized classifier on large dataset 0.862331081081
23.59 Seconds to train default LinearSVC
score of default LinearSVC on large dataset 0.952702702703


### luv

In [16]:
def get_hog_features_luv(img_bgr):
    img_luv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2LUV)
    img_lch = img_luv[:,:,0]
    return hog_helper(img_lch)

GET_HOG_FEATURES_FROM_IMAGE = get_hog_features_luv

LUV_RESULTS = train_and_validate()

training set size is [1196, 1125]
classifier features shape is (2321, 1764)
124.67 Seconds to optimize SVC
score of optimized classifier on small dataset 0.954838709677
optimized classifier params {'class_weight': 'balanced', 'C': 111.27604465991836, 'gamma': 0.0099291947018337184, 'kernel': 'rbf'}
training set size is [8792, 8968]
classifier features shape is (17760, 1764)
score of optimized classifier on large dataset 0.889076576577
22.32 Seconds to train default LinearSVC
score of default LinearSVC on large dataset 0.928490990991


### ycrcb

In [17]:
def get_hog_features_ycrcb(img_bgr):
    img_ycrcb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2YCrCb)
    img_ych = img_ycrcb[:,:,0]
    return hog_helper(img_ych)

GET_HOG_FEATURES_FROM_IMAGE = get_hog_features_ycrcb

YCRCB_RESULTS = train_and_validate()

training set size is [1196, 1125]
classifier features shape is (2321, 1764)
138.84 Seconds to optimize SVC
score of optimized classifier on small dataset 0.969892473118
optimized classifier params {'class_weight': 'balanced', 'C': 13.143214309351748, 'gamma': 0.0056035539553426451, 'kernel': 'rbf'}
training set size is [8792, 8968]
classifier features shape is (17760, 1764)
score of optimized classifier on large dataset 0.887668918919
23.2 Seconds to train default LinearSVC
score of default LinearSVC on large dataset 0.920045045045


### Pickle

In [18]:
with open('working_dir/hog_training_result.p', 'wb') as f:
    training_result = {
        'yuv_results': YUV_RESULTS,
        'hsv_results': HSV_RESULTS,
        'hls_results': HLS_RESULTS,
        'luv_results': LUV_RESULTS,
        'ycrcb_results': YCRCB_RESULTS
    }
    pickle.dump(training_result, f)

# Hog and color features

In [31]:
### Warning, hiding the function above!
### Also, the name is now a misnomer, since we're adding histogram and spatial binning.
def get_hog_features_from_test_image(path):
    """Depends on global function `GET_COLOR_FEATURES_FROM_IMAGE`."""
    
    img_bgr = cv2.imread(path)
    
    # for hog, hsv is the winner.
    hog = get_hog_features_hsv(img_bgr).flatten()
    assert hog.shape == (7 * 7 * 2 * 2 * 9 * 2,)
    
    clr = GET_COLOR_FEATURES_FROM_IMAGE(img_bgr)
    assert clr.shape == (24 * 24 + 32,)
    
    return np.concatenate((hog, clr))

### hsv

In [32]:
def color_features_hsv(img_bgr_64):
    img_bgr_24 = cv2.resize(img_bgr_64, (24, 24))
    img_hsv_24 = cv2.cvtColor(img_bgr_24, cv2.COLOR_BGR2HSV)
    sch = img_hsv_24[:,:,1]
    
    spatial = sch.flatten()
    
    hist = np.histogram(sch, bins=32, range=(0, 256))
    
    return np.concatenate((spatial, hist[0]))

GET_COLOR_FEATURES_FROM_IMAGE = color_features_hsv

CLR_HSV_RESULTS = train_and_validate()

training set size is [1196, 1125]
classifier features shape is (2321, 4136)
312.38 Seconds to optimize SVC
score of optimized classifier on small dataset 0.989247311828
optimized classifier params {'class_weight': None, 'C': 218.15986146051839, 'gamma': 0.00038997838423473563, 'kernel': 'rbf'}
training set size is [8792, 8968]
classifier features shape is (17760, 4136)
score of optimized classifier on large dataset 0.910472972973
23.83 Seconds to train default LinearSVC
score of default LinearSVC on large dataset 0.969031531532


### yuv

In [33]:
def color_features_yuv(img_bgr_64):
    img_bgr_24 = cv2.resize(img_bgr_64, (24, 24))
    img_yuv_24 = cv2.cvtColor(img_bgr_24, cv2.COLOR_BGR2YUV)
    ych = img_yuv_24[:,:,0]
    
    spatial = ych.flatten()
    
    hist = np.histogram(ych, bins=32, range=(0, 256))
    
    return np.concatenate((spatial, hist[0]))

GET_COLOR_FEATURES_FROM_IMAGE = color_features_yuv

CLR_YUV_RESULTS = train_and_validate()

training set size is [1196, 1125]
classifier features shape is (2321, 4136)
307.19 Seconds to optimize SVC
score of optimized classifier on small dataset 0.963440860215
optimized classifier params {'class_weight': 'balanced', 'C': 246.71716974322243, 'gamma': 0.088727278500685292, 'kernel': 'linear'}
training set size is [8792, 8968]
classifier features shape is (17760, 4136)
score of optimized classifier on large dataset 0.910191441441
20.45 Seconds to train default LinearSVC
score of default LinearSVC on large dataset 0.965934684685


### Pickle

In [36]:
with open('working_dir/overall_training_result.p', 'wb') as f:
    overall_training_result = {
        'clr_hsv_results': CLR_HSV_RESULTS,
        'clr_yuv_results': CLR_YUV_RESULTS
    }
    pickle.dump(overall_training_result, f)