In [1]:
import pandas as pd
import numpy as np

In [3]:
import matplotlib.pyplot as plt
import seaborn

In [4]:
%matplotlib inline

In [5]:
data = pd.read_csv('workvoltwoaa.csv')

In [6]:
data.keys()

Index(['StateFormated', 'DateFormated', 'TimeFormated', 'Crash_TypeFormated',
       'Bus_InvolvementFormated', 'Heavy_Rigid_Truck_InvolvementFormated',
       'Speed_LimitFormated', 'Road_UserFormated', 'GenderFormated',
       'AgeFormated'],
      dtype='object')

In [7]:
from sklearn.preprocessing import StandardScaler

In [8]:
scaler = StandardScaler()

In [9]:
scaler.fit(data.drop('Crash_TypeFormated', axis=1))

StandardScaler(copy=True, with_mean=True, with_std=True)

In [10]:
scaled_features = scaler.transform(data.drop('Crash_TypeFormated',axis=1))

In [11]:
df_feat = pd.DataFrame(scaled_features,columns=data.columns[:-1])

In [12]:
df_feat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48365 entries, 0 to 48364
Data columns (total 9 columns):
StateFormated                            48365 non-null float64
DateFormated                             48365 non-null float64
TimeFormated                             48365 non-null float64
Crash_TypeFormated                       48365 non-null float64
Bus_InvolvementFormated                  48365 non-null float64
Heavy_Rigid_Truck_InvolvementFormated    48365 non-null float64
Speed_LimitFormated                      48365 non-null float64
Road_UserFormated                        48365 non-null float64
GenderFormated                           48365 non-null float64
dtypes: float64(9)
memory usage: 3.3 MB


In [13]:
from sklearn.model_selection import train_test_split

In [14]:
X = df_feat
y = data['Crash_TypeFormated']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [15]:
from sklearn.svm import SVC

In [16]:
model = SVC()

In [17]:
model.fit(X_train,y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [18]:
predictions = model.predict(X_test)

In [19]:
from sklearn.metrics import classification_report,confusion_matrix

In [20]:
print(confusion_matrix(y_test,predictions))
print('\n')
print(classification_report(y_test,predictions))

[[4255    1 1872]
 [   6 2258   22]
 [2364    3 3729]]


             precision    recall  f1-score   support

          1       0.64      0.69      0.67      6128
          2       1.00      0.99      0.99      2286
          3       0.66      0.61      0.64      6096

avg / total       0.71      0.71      0.71     14510



In [21]:
from sklearn.model_selection import GridSearchCV

In [22]:
param_grid = {'C':[0.1,1,10,100,1000],'gamma': [1,0.1,0.01,0.001,0.0001]}

In [None]:
grid = GridSearchCV(SVC(),param_grid, verbose=3)

In [None]:
grid.fit(X_train,y_train)

Fitting 3 folds for each of 25 candidates, totalling 75 fits
[CV] C=0.1, gamma=1 ..................................................
[CV] .......... C=0.1, gamma=1, score=0.677536552946389, total=  47.2s
[CV] C=0.1, gamma=1 ..................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.1min remaining:    0.0s


[CV] ......... C=0.1, gamma=1, score=0.6799291094373061, total=  47.2s
[CV] C=0.1, gamma=1 ..................................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  2.3min remaining:    0.0s


[CV] ......... C=0.1, gamma=1, score=0.6807266282676119, total=  48.9s
[CV] C=0.1, gamma=0.1 ................................................
[CV] ....... C=0.1, gamma=0.1, score=0.6982720425343376, total=  41.8s
[CV] C=0.1, gamma=0.1 ................................................
[CV] ....... C=0.1, gamma=0.1, score=0.6933097031457687, total=  42.1s
[CV] C=0.1, gamma=0.1 ................................................
[CV] ....... C=0.1, gamma=0.1, score=0.6936641559592379, total=  40.5s
[CV] C=0.1, gamma=0.01 ...............................................
[CV] ...... C=0.1, gamma=0.01, score=0.6493575542755871, total=  44.5s
[CV] C=0.1, gamma=0.01 ...............................................
[CV] ...... C=0.1, gamma=0.01, score=0.6483828090385467, total=  44.0s
[CV] C=0.1, gamma=0.01 ...............................................
[CV] ...... C=0.1, gamma=0.01, score=0.6509525919361985, total=  46.0s
[CV] C=0.1, gamma=0.001 ..............................................
[CV] .

In [None]:
grid.best_params_

In [None]:
grid.best_estimator_

In [None]:
grid_predictions = grid.predict(X_test)

In [None]:
print(confusion_matrix(y_test,grid_predictions))
print('\n')
print(classifcation_report(y_test,grid_prediction))