In [1]:
%matplotlib inline

import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimage
import cv2
import numpy as np


In [2]:
import glob
vehicle_images=glob.glob("dataset/vehicles/*/*.png")
non_vehicle_images=glob.glob("dataset/non-vehicles/*/*.png")
print("Dataset Contains: {} Vehicle and {} Non-Vehicle images".format(len(vehicle_images),len(non_vehicle_images)))

Dataset Contains: 8792 Vehicle and 8968 Non-Vehicle images


In [3]:
from skimage.feature import hog
def get_features(img,cell_per_block=2,pix_per_cell=8,orient=9, colorspace='HSV', color_hist_bins=16, img_resize=(32,32)):
#     returns the feature vector for the image
    if(img_resize != (img.shape[0]),img.shape[1]):
        img=cv2.resize(img,img_resize)
#     get hog features before colorspace conversion
    feat=hog(cv2.cvtColor(img,cv2.COLOR_BGR2GRAY), orientations=orient, pixels_per_cell=(pix_per_cell, pix_per_cell), cells_per_block=(cell_per_block, cell_per_block), visualise=False, feature_vector=True)
#     Convert to required colorspace
    if(colorspace!='BGR'):
        if(colorspace=='HSV'):
            img=cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
        elif(colorspace=='HLS'):
            img=cv2.cvtColor(img,cv2.COLOR_BGR2HLS)
        else:
            pass    
    for i in range(3):
#         loop over three channels
        ch_hist = np.histogram(img[:,:,i],bins=color_hist_bins,range=(0,256))[0]
        feat = np.concatenate((feat,ch_hist))
    return feat

Read the images and create X_raw, y_raw


In [4]:
vehicle_features=[]
for vehicle_image in vehicle_images:
    vehicle_features.append(get_features(cv2.imread(vehicle_image)))
non_vehicle_features=[]
for non_vehicle_image in non_vehicle_images:
    non_vehicle_features.append(get_features(cv2.imread(non_vehicle_image)))


Scale the X features

In [6]:
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split

X_raw = np.vstack((vehicle_features,non_vehicle_features)).astype(np.float64)
y_raw = np.hstack((np.ones(len(vehicle_features)),np.zeros(len(non_vehicle_features))))

stdscaler = StandardScaler().fit(X_raw)
X_raw_scaled = stdscaler.transform(X_raw)

# Test train split
X_train, X_test, y_train, y_test = train_test_split(
    X_raw_scaled, y_raw, test_size=0.2, random_state=12)

SVM GridSearch CV

In [68]:
from sklearn.model_selection import GridSearchCV
from sklearn import svm

parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10, 100, 1000], 'gamma': [0.1, 0.001,0.0001]}
svr = svm.SVC()
grid_svc_clf = GridSearchCV(svr, parameters,n_jobs=6)
grid_svc_clf.fit(X_train, y_train)

GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params={}, iid=True, n_jobs=6,
       param_grid={'kernel': ('linear', 'rbf'), 'gamma': [0.1, 0.001, 0.0001], 'C': [1, 10, 100, 1000]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

Random Forest Classifier

In [69]:
from sklearn.ensemble import RandomForestClassifier
# use a full grid over all parameters
clf = RandomForestClassifier(n_estimators=20)
param_grid = {"max_depth": [3, None],
              "max_features": [1, 3, 10],
              "min_samples_split": [1, 3, 10],
              "min_samples_leaf": [1, 3, 10],
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}
grid_rf_clf = GridSearchCV(clf, param_grid=param_grid,n_jobs=6)
grid_rf_clf.fit(X_train, y_train)

GridSearchCV(cv=None, error_score='raise',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=20, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False),
       fit_params={}, iid=True, n_jobs=6,
       param_grid={'bootstrap': [True, False], 'min_samples_leaf': [1, 3, 10], 'criterion': ['gini', 'entropy'], 'max_depth': [3, None], 'min_samples_split': [1, 3, 10], 'max_features': [1, 3, 10]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

In [70]:
# Utility function to report best scores
def report(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results['rank_test_score'] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
                  results['mean_test_score'][candidate],
                  results['std_test_score'][candidate]))
            print("Parameters: {0}".format(results['params'][candidate]))
            print("")


SVM Parameters Report


In [73]:
print("GridSearchCV results for SVC")
report(grid_svc_clf.cv_results_)

GridSearchCV results for SVC
Model with rank: 1
Mean validation score: 0.993 (std: 0.001)
Parameters: {'kernel': 'rbf', 'gamma': 0.001, 'C': 10}

Model with rank: 2
Mean validation score: 0.993 (std: 0.001)
Parameters: {'kernel': 'rbf', 'gamma': 0.001, 'C': 100}

Model with rank: 2
Mean validation score: 0.993 (std: 0.001)
Parameters: {'kernel': 'rbf', 'gamma': 0.001, 'C': 1000}



RF Parameters Report

In [74]:
print("GridSearchCV results for Random Forest Classifier")
report(grid_rf_clf.cv_results_)


GridSearchCV results for Random Forest Classifier
Model with rank: 1
Mean validation score: 0.990 (std: 0.001)
Parameters: {'bootstrap': False, 'min_samples_leaf': 3, 'criterion': 'entropy', 'max_depth': None, 'min_samples_split': 3, 'max_features': 10}

Model with rank: 2
Mean validation score: 0.990 (std: 0.001)
Parameters: {'bootstrap': False, 'min_samples_leaf': 1, 'criterion': 'entropy', 'max_depth': None, 'min_samples_split': 1, 'max_features': 10}

Model with rank: 3
Mean validation score: 0.990 (std: 0.000)
Parameters: {'bootstrap': False, 'min_samples_leaf': 1, 'criterion': 'entropy', 'max_depth': None, 'min_samples_split': 3, 'max_features': 10}



Set the best classifiers

In [18]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier

svm_clf=svm.SVC(kernel='rbf',C=10,gamma=0.001)
rf_clf=RandomForestClassifier(n_estimators=20,bootstrap=False, min_samples_leaf=3,criterion='entropy',max_depth=None, min_samples_split=3, max_features=10)

Try Voting Classifier

In [19]:
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import cross_val_score

vote_clf = VotingClassifier(estimators=[('svm', svm_clf), ('rf', rf_clf)], voting='hard')
for clf, label in zip([svm_clf, rf_clf, vote_clf], ['SVM', 'Random Forest', 'Ensemble']):
    scores = cross_val_score(clf, X_train, y_train, cv=5, scoring='accuracy',n_jobs=6,verbose=1)
    print("Accuracy: %0.6f (+/- %0.6f) [%s]" % (scores.mean(), scores.std(), label))

    

Accuracy: 0.993877 (+/- 0.001231) [SVM]


[Parallel(n_jobs=6)]: Done   5 out of   5 | elapsed:    8.5s finished


Accuracy: 0.989724 (+/- 0.002056) [Random Forest]


[Parallel(n_jobs=6)]: Done   5 out of   5 | elapsed:    4.1s finished
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)


Accuracy: 0.990709 (+/- 0.001519) [Ensemble]


[Parallel(n_jobs=6)]: Done   5 out of   5 | elapsed:   12.4s finished


Save the models to re-use (model persistance)

In [22]:
from sklearn.externals import joblib

# We need to fit the model again, as cross_val procedure doesn't do it!
vote_clf.fit(X_train,y_train)
svm_clf.fit(X_train,y_train)
rf_clf.fit(X_train, y_train)

joblib.dump(vote_clf, 'model_vote.pkl') 
joblib.dump(svm_clf,'model_svm.pkl')
joblib.dump(rf_clf,'model_rf.pkl')

# Save the data
joblib.dump(np.array(X_train, X_test, y_train, y_test),'dataset_bkp.pkl')

ValueError: only 2 non-keyword arguments accepted