In [1]:
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn import metrics

from joblib import dump, load
from utility import loadData, signsLabels

In [2]:
X_train, y_train, X_test, y_test = loadData()

In [3]:
param_grid = {'n_estimators': [20, 50, 100],  'criterion': ['gini'] }

rfc = RandomForestClassifier()
clf = GridSearchCV(rfc, param_grid , n_jobs=-1)
clf.fit(X_train, y_train)

print("Optimized Parameters: \n{}".format(clf.best_params_))

Optimized Parameters: 
{'criterion': 'gini', 'n_estimators': 100}


In [4]:
y_pred = clf.predict(X_test)

print("Classification report :\n{}\n".format(metrics.classification_report(y_test, y_pred)))
# Model Accuracy: how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

# Model Precision: what percentage of positive tuples are labeled as such?
print("Precision:",metrics.precision_score(y_test, y_pred , average='micro'))

# Model Recall: what percentage of positive tuples are labelled as such?
print("Recall:",metrics.recall_score(y_test, y_pred, average='micro'))

Classification report :
              precision    recall  f1-score   support

           0       0.92      1.00      0.96       331
           1       0.96      0.93      0.94       432
           2       0.94      0.99      0.96       310
           3       0.89      0.95      0.92       245
           4       0.89      0.95      0.92       498
           5       0.93      0.92      0.92       247
           6       0.91      0.83      0.87       348
           7       1.00      0.94      0.97       436
           8       0.80      0.72      0.76       288
          10       0.72      0.69      0.70       331
          11       0.81      0.99      0.89       209
          12       0.82      0.68      0.75       394
          13       0.73      0.52      0.61       291
          14       0.96      0.87      0.91       246
          15       0.90      1.00      0.95       347
          16       0.90      0.98      0.94       164
          17       0.32      0.62      0.43       144
   

In [5]:
# save the model to disk
filename = 'models/RF_model.joblib'
dump(clf, filename) 

['models/RF_model.joblib']

In [6]:
report = metrics.classification_report(y_test, y_pred, output_dict=True)
columns = list(signsLabels().values())
df = pd.DataFrame(report)
columns.extend(['accuracy','macro avg','weighted avg'])
df.columns = columns
df.to_csv('output/RF_classification_report.csv')
df

Unnamed: 0,A,B,C,D,E,F,G,H,I,K,...,S,T,U,V,W,X,Y,accuracy,macro avg,weighted avg
precision,0.924581,0.961631,0.935976,0.885496,0.890566,0.926531,0.91195,0.997555,0.796154,0.721519,...,0.554286,0.541209,0.734783,0.805085,0.535581,0.819005,0.910377,0.809677,0.804279,0.82914
recall,1.0,0.928241,0.990323,0.946939,0.947791,0.919028,0.833333,0.93578,0.71875,0.688822,...,0.788618,0.794355,0.635338,0.549133,0.694175,0.677903,0.581325,0.809677,0.803563,0.809677
f1-score,0.960813,0.944641,0.962382,0.915187,0.918288,0.922764,0.870871,0.96568,0.755474,0.704791,...,0.651007,0.643791,0.681452,0.652921,0.604651,0.741803,0.709559,0.809677,0.794752,0.811415
support,331.0,432.0,310.0,245.0,498.0,247.0,348.0,436.0,288.0,331.0,...,246.0,248.0,266.0,346.0,206.0,267.0,332.0,0.809677,7172.0,7172.0
