In [1]:
import pandas as pd
import numpy as np

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn import metrics

from joblib import dump, load
from utility import loadData, signsLabels
import time
from time import perf_counter as timer


In [2]:
X_train, y_train, X_test, y_test = loadData()

In [3]:
param_grid =  {'n_neighbors':[3,5,7,10,15],'leaf_size': [20, 30, 40],  'algorithm': [ 'auto']}

neigh = KNeighborsClassifier()
clf = GridSearchCV(neigh, param_grid , n_jobs=-1)
clf.fit(X_train, y_train)

print("Optimized Parameters: \n{}".format(clf.best_params_))

Optimized Parameters: 
{'algorithm': 'auto', 'leaf_size': 20, 'n_neighbors': 3}


In [4]:
y_pred = clf.predict(X_test)

print("Classification report :\n{}\n".format(metrics.classification_report(y_test, y_pred)))
# Model Accuracy: how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

# Model Precision: what percentage of positive tuples are labeled as such?
print("Precision:",metrics.precision_score(y_test, y_pred , average='micro'))

# Model Recall: what percentage of positive tuples are labelled as such?
print("Recall:",metrics.recall_score(y_test, y_pred, average='micro'))

Classification report :
              precision    recall  f1-score   support

           0       0.80      1.00      0.89       331
           1       0.95      0.94      0.94       432
           2       0.95      1.00      0.97       310
           3       0.76      0.95      0.85       245
           4       0.78      0.97      0.86       498
           5       0.87      0.91      0.89       247
           6       0.91      0.94      0.92       348
           7       0.95      0.94      0.94       436
           8       0.87      0.65      0.75       288
          10       0.85      0.59      0.70       331
          11       0.93      0.89      0.91       209
          12       0.81      0.49      0.61       394
          13       0.78      0.59      0.67       291
          14       1.00      0.88      0.94       246
          15       1.00      1.00      1.00       347
          16       0.95      1.00      0.97       164
          17       0.32      0.60      0.41       144
   

In [5]:
# save the model to disk
filename = 'models/KNN_model.joblib'
dump(clf, filename) 

['models/KNN_model.joblib']

In [6]:
report = metrics.classification_report(y_test, y_pred, output_dict=True)
columns = list(signsLabels().values())
df = pd.DataFrame(report)
columns.extend(['accuracy','macro avg','weighted avg'])
df.columns = columns
df.to_csv('output/KNN_classification_report.csv')
df

Unnamed: 0,A,B,C,D,E,F,G,H,I,K,...,S,T,U,V,W,X,Y,accuracy,macro avg,weighted avg
precision,0.79759,0.946136,0.95092,0.763158,0.775806,0.872093,0.908333,0.953271,0.87037,0.854626,...,0.671875,0.703252,0.423529,0.67658,0.717949,0.796537,0.965812,0.80396,0.809835,0.826044
recall,1.0,0.935185,1.0,0.946939,0.965863,0.910931,0.939655,0.93578,0.652778,0.586103,...,0.873984,0.697581,0.676692,0.526012,0.679612,0.689139,0.680723,0.80396,0.797957,0.80396
f1-score,0.887399,0.940629,0.974843,0.845173,0.860465,0.891089,0.923729,0.944444,0.746032,0.695341,...,0.759717,0.700405,0.520984,0.59187,0.698254,0.738956,0.798587,0.80396,0.793191,0.804562
support,331.0,432.0,310.0,245.0,498.0,247.0,348.0,436.0,288.0,331.0,...,246.0,248.0,266.0,346.0,206.0,267.0,332.0,0.80396,7172.0,7172.0
