In [24]:
import cv2
import glob
import numpy as np
import pickle
import scipy.stats
import skimage.feature
import sklearn.model_selection
import sklearn.preprocessing
import sklearn.svm

In [12]:
def image_to_hog_features(img_bgr):
    img_hls = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HLS)
    img_lch = img_hls[:,:,1]
    img_sch = img_hls[:,:,2]
    
    hog_lch, hog_sch = [
        skimage.feature.hog(
            ch,
            orientations=9,
            pixels_per_cell=(8, 8),
            cells_per_block=(2, 2),
            transform_sqrt=True,
            visualise=False,
            feature_vector=True, # Do flatten features.
            block_norm='L2-Hys')
        for ch in [img_lch, img_sch]]
    
    return np.concatenate((hog_lch, hog_sch))

In [14]:
car_paths = glob.glob('working_dir/vehicles_smallset/**/*')
noncar_paths = glob.glob('working_dir/non-vehicles_smallset/**/*')

car_features = [image_to_hog_features(cv2.imread(path)) for path in car_paths]
noncar_features = [image_to_hog_features(cv2.imread(path)) for path in noncar_paths]

In [17]:
X = np.vstack((car_features, noncar_features)).astype(np.float64)
assert X.shape[1:] == (14 * 7 * 2 * 2 * 9,)

X_scaler = sklearn.preprocessing.StandardScaler().fit(X)
scaled_X = X_scaler.transform(X)

y = np.hstack((np.ones(len(car_features)), np.zeros(len(noncar_features))))

X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    scaled_X, y, test_size=0.2)

In [20]:
# svc = sklearn.svm.LinearSVC()
# svc.fit(X_train, y_train)
# svc.score(X_test, y_test)

0.91397849462365588

In [25]:
grid_search_params = {'C': scipy.stats.expon(scale=100), 'gamma': scipy.stats.expon(scale=.1),
  'kernel': ['rbf'], 'class_weight':['balanced', None]}
clf = sklearn.model_selection.RandomizedSearchCV(sklearn.svm.SVC(), grid_search_params)
clf.fit(X_train, y_train)

RandomizedSearchCV(cv=None, error_score='raise',
          estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
          fit_params=None, iid=True, n_iter=10, n_jobs=1,
          param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x12407c7f0>, 'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x12407cc18>, 'kernel': ['rbf'], 'class_weight': ['balanced', None]},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score='warn', scoring=None, verbose=0)

In [26]:
clf.best_params_

{'C': 45.379942882185325,
 'class_weight': 'balanced',
 'gamma': 0.0041731726747204975,
 'kernel': 'rbf'}

In [27]:
clf.score(X_test, y_test)

0.967741935483871

In [28]:
with open('working_dir/training_result.p', 'wb') as f:
    training_result = {
        clf: clf # TODO fix the pickle
    }
    pickle.dump(training_result, f)