In [1]:
# Importing required libraries
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Load the data
df = pd.read_csv('../datasets/classificationData.csv')
print("The dataset has {} rows and {} columns.".format(df.shape[0], df.shape[1]))
df = df.drop(df.columns[0], axis=1) # Drop first column

del df['Crime Occurences']
df.head(3)

The dataset has 20160 rows and 7 columns.


Unnamed: 0,district,day,month,hour,Class
0,SOUTHERN,Wednesday,1,18,Extreme
1,SOUTHERN,Wednesday,3,18,Extreme
2,SOUTHERN,Friday,8,18,Extreme


In [3]:
from sklearn.preprocessing import LabelEncoder
y = df.iloc[:, -1].values
le = LabelEncoder()
y = le.fit_transform(y)
y = y.astype('float')

In [4]:
print(y)

[0. 0. 0. ... 2. 2. 2.]


In [5]:
%%capture
df = pd.get_dummies(df, columns=['district', 'day'], drop_first=True)
print(df.head())

X = df.loc[:, df.columns != 'Class'].values;
X = X.astype('float')

In [6]:
print(X)

[[ 1. 18.  0. ...  0.  0.  1.]
 [ 3. 18.  0. ...  0.  0.  1.]
 [ 8. 18.  0. ...  0.  0.  0.]
 ...
 [12.  4.  0. ...  0.  1.  0.]
 [ 9.  4.  0. ...  1.  0.  0.]
 [12.  5.  0. ...  0.  0.  1.]]


In [7]:
# Train-test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 1)

In [11]:
# Importing classification packages.
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from xgboost import XGBClassifier

SEED = 1 # Random Seed

# Classification
lr = LogisticRegression(random_state=SEED)
knn = KNN()
xg = XGBClassifier()
dt = DecisionTreeClassifier(random_state=SEED)
rf = RandomForestClassifier(random_state=SEED)

# Define a list called classifier that contains the tuples (classifier_name, classifier)
classifiers = [('Logistic Regression', lr),
               ('K Nearest Neighbours', knn),
               ('Classification Tree', dt),
               ('Forest', rf),
               ('XGB', xg)]

In [12]:
# Iterating over the defined list of tuples containing the classifiers
for clf_name, clf in classifiers:
    # Fitting
    clf.fit(X_train, y_train)
    # Predicting
    y_pred = clf.predict(X_test)
    
    print('{:s} : {:s}'.format(clf_name, classification_report(y_test, y_pred)))

Logistic Regression :               precision    recall  f1-score   support

         0.0       0.68      0.70      0.69       517
         1.0       0.54      0.62      0.58       478
         2.0       0.65      0.77      0.70       512
         3.0       0.72      0.47      0.57       509

    accuracy                           0.64      2016
   macro avg       0.65      0.64      0.64      2016
weighted avg       0.65      0.64      0.64      2016

K Nearest Neighbours :               precision    recall  f1-score   support

         0.0       0.79      0.84      0.82       517
         1.0       0.62      0.64      0.63       478
         2.0       0.83      0.88      0.86       512
         3.0       0.71      0.60      0.65       509

    accuracy                           0.74      2016
   macro avg       0.74      0.74      0.74      2016
weighted avg       0.74      0.74      0.74      2016

Classification Tree :               precision    recall  f1-score   support

        

In [13]:
vc = VotingClassifier(estimators=classifiers)
# Fit 'vc' to the traing set and predict test set labels
vc.fit(X_train, y_train)
y_pred = vc.predict(X_test)
# Evaluate the test-set accuracy of 'vc'
print('Voting Classifier: {:s}'.format(classification_report(y_test, y_pred)))

Voting Classifier:               precision    recall  f1-score   support

         0.0       0.86      0.85      0.85       517
         1.0       0.67      0.73      0.70       478
         2.0       0.86      0.89      0.88       512
         3.0       0.75      0.66      0.70       509

    accuracy                           0.78      2016
   macro avg       0.78      0.78      0.78      2016
weighted avg       0.79      0.78      0.78      2016



In [14]:
from sklearn.model_selection import GridSearchCV

params = {
    'n_estimators': np.arange(10,100,10),
    'criterion': ('gini', 'entropy'),
    'max_depth': (3,5,7,9,10),
    'min_samples_split': (2,4,6)
}

RFm = GridSearchCV(RandomForestClassifier(n_jobs = -1, random_state = SEED), param_grid = params, cv = 3, verbose = 3)

RF_grid_model = RFm.fit(X_train, y_train)

Fitting 3 folds for each of 270 candidates, totalling 810 fits
[CV] criterion=gini, max_depth=3, min_samples_split=2, n_estimators=10 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  criterion=gini, max_depth=3, min_samples_split=2, n_estimators=10, score=0.638, total=   3.1s
[CV] criterion=gini, max_depth=3, min_samples_split=2, n_estimators=10 
[CV]  criterion=gini, max_depth=3, min_samples_split=2, n_estimators=10, score=0.644, total=   0.1s
[CV] criterion=gini, max_depth=3, min_samples_split=2, n_estimators=10 
[CV]  criterion=gini, max_depth=3, min_samples_split=2, n_estimators=10, score=0.633, total=   0.1s
[CV] criterion=gini, max_depth=3, min_samples_split=2, n_estimators=20 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    3.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    3.2s remaining:    0.0s


[CV]  criterion=gini, max_depth=3, min_samples_split=2, n_estimators=20, score=0.644, total=   0.1s
[CV] criterion=gini, max_depth=3, min_samples_split=2, n_estimators=20 
[CV]  criterion=gini, max_depth=3, min_samples_split=2, n_estimators=20, score=0.656, total=   0.1s
[CV] criterion=gini, max_depth=3, min_samples_split=2, n_estimators=20 
[CV]  criterion=gini, max_depth=3, min_samples_split=2, n_estimators=20, score=0.653, total=   0.2s
[CV] criterion=gini, max_depth=3, min_samples_split=2, n_estimators=30 
[CV]  criterion=gini, max_depth=3, min_samples_split=2, n_estimators=30, score=0.653, total=   0.2s
[CV] criterion=gini, max_depth=3, min_samples_split=2, n_estimators=30 
[CV]  criterion=gini, max_depth=3, min_samples_split=2, n_estimators=30, score=0.662, total=   0.1s
[CV] criterion=gini, max_depth=3, min_samples_split=2, n_estimators=30 
[CV]  criterion=gini, max_depth=3, min_samples_split=2, n_estimators=30, score=0.657, total=   0.2s
[CV] criterion=gini, max_depth=3, min_sa

[CV]  criterion=gini, max_depth=3, min_samples_split=4, n_estimators=90, score=0.659, total=   0.4s
[CV] criterion=gini, max_depth=3, min_samples_split=4, n_estimators=90 
[CV]  criterion=gini, max_depth=3, min_samples_split=4, n_estimators=90, score=0.669, total=   0.5s
[CV] criterion=gini, max_depth=3, min_samples_split=4, n_estimators=90 
[CV]  criterion=gini, max_depth=3, min_samples_split=4, n_estimators=90, score=0.657, total=   0.3s
[CV] criterion=gini, max_depth=3, min_samples_split=6, n_estimators=10 
[CV]  criterion=gini, max_depth=3, min_samples_split=6, n_estimators=10, score=0.638, total=   0.1s
[CV] criterion=gini, max_depth=3, min_samples_split=6, n_estimators=10 
[CV]  criterion=gini, max_depth=3, min_samples_split=6, n_estimators=10, score=0.644, total=   0.1s
[CV] criterion=gini, max_depth=3, min_samples_split=6, n_estimators=10 
[CV]  criterion=gini, max_depth=3, min_samples_split=6, n_estimators=10, score=0.633, total=   0.1s
[CV] criterion=gini, max_depth=3, min_sa

[CV]  criterion=gini, max_depth=5, min_samples_split=2, n_estimators=70, score=0.671, total=   0.4s
[CV] criterion=gini, max_depth=5, min_samples_split=2, n_estimators=70 
[CV]  criterion=gini, max_depth=5, min_samples_split=2, n_estimators=70, score=0.683, total=   0.6s
[CV] criterion=gini, max_depth=5, min_samples_split=2, n_estimators=70 
[CV]  criterion=gini, max_depth=5, min_samples_split=2, n_estimators=70, score=0.684, total=   0.4s
[CV] criterion=gini, max_depth=5, min_samples_split=2, n_estimators=80 
[CV]  criterion=gini, max_depth=5, min_samples_split=2, n_estimators=80, score=0.671, total=   0.4s
[CV] criterion=gini, max_depth=5, min_samples_split=2, n_estimators=80 
[CV]  criterion=gini, max_depth=5, min_samples_split=2, n_estimators=80, score=0.683, total=   0.3s
[CV] criterion=gini, max_depth=5, min_samples_split=2, n_estimators=80 
[CV]  criterion=gini, max_depth=5, min_samples_split=2, n_estimators=80, score=0.684, total=   0.4s
[CV] criterion=gini, max_depth=5, min_sa

[CV]  criterion=gini, max_depth=5, min_samples_split=6, n_estimators=50, score=0.670, total=   0.5s
[CV] criterion=gini, max_depth=5, min_samples_split=6, n_estimators=50 
[CV]  criterion=gini, max_depth=5, min_samples_split=6, n_estimators=50, score=0.684, total=   0.3s
[CV] criterion=gini, max_depth=5, min_samples_split=6, n_estimators=50 
[CV]  criterion=gini, max_depth=5, min_samples_split=6, n_estimators=50, score=0.683, total=   0.4s
[CV] criterion=gini, max_depth=5, min_samples_split=6, n_estimators=60 
[CV]  criterion=gini, max_depth=5, min_samples_split=6, n_estimators=60, score=0.670, total=   0.3s
[CV] criterion=gini, max_depth=5, min_samples_split=6, n_estimators=60 
[CV]  criterion=gini, max_depth=5, min_samples_split=6, n_estimators=60, score=0.683, total=   0.3s
[CV] criterion=gini, max_depth=5, min_samples_split=6, n_estimators=60 
[CV]  criterion=gini, max_depth=5, min_samples_split=6, n_estimators=60, score=0.684, total=   0.3s
[CV] criterion=gini, max_depth=5, min_sa

[CV]  criterion=gini, max_depth=7, min_samples_split=4, n_estimators=30, score=0.701, total=   0.3s
[CV] criterion=gini, max_depth=7, min_samples_split=4, n_estimators=30 
[CV]  criterion=gini, max_depth=7, min_samples_split=4, n_estimators=30, score=0.718, total=   0.3s
[CV] criterion=gini, max_depth=7, min_samples_split=4, n_estimators=30 
[CV]  criterion=gini, max_depth=7, min_samples_split=4, n_estimators=30, score=0.715, total=   0.2s
[CV] criterion=gini, max_depth=7, min_samples_split=4, n_estimators=40 
[CV]  criterion=gini, max_depth=7, min_samples_split=4, n_estimators=40, score=0.702, total=   0.3s
[CV] criterion=gini, max_depth=7, min_samples_split=4, n_estimators=40 
[CV]  criterion=gini, max_depth=7, min_samples_split=4, n_estimators=40, score=0.716, total=   0.2s
[CV] criterion=gini, max_depth=7, min_samples_split=4, n_estimators=40 
[CV]  criterion=gini, max_depth=7, min_samples_split=4, n_estimators=40, score=0.717, total=   0.2s
[CV] criterion=gini, max_depth=7, min_sa

[CV]  criterion=gini, max_depth=9, min_samples_split=2, n_estimators=10, score=0.741, total=   0.2s
[CV] criterion=gini, max_depth=9, min_samples_split=2, n_estimators=10 
[CV]  criterion=gini, max_depth=9, min_samples_split=2, n_estimators=10, score=0.755, total=   0.2s
[CV] criterion=gini, max_depth=9, min_samples_split=2, n_estimators=20 
[CV]  criterion=gini, max_depth=9, min_samples_split=2, n_estimators=20, score=0.741, total=   0.1s
[CV] criterion=gini, max_depth=9, min_samples_split=2, n_estimators=20 
[CV]  criterion=gini, max_depth=9, min_samples_split=2, n_estimators=20, score=0.758, total=   0.2s
[CV] criterion=gini, max_depth=9, min_samples_split=2, n_estimators=20 
[CV]  criterion=gini, max_depth=9, min_samples_split=2, n_estimators=20, score=0.755, total=   0.2s
[CV] criterion=gini, max_depth=9, min_samples_split=2, n_estimators=30 
[CV]  criterion=gini, max_depth=9, min_samples_split=2, n_estimators=30, score=0.738, total=   0.2s
[CV] criterion=gini, max_depth=9, min_sa

[CV]  criterion=gini, max_depth=9, min_samples_split=4, n_estimators=80, score=0.757, total=   0.5s
[CV] criterion=gini, max_depth=9, min_samples_split=4, n_estimators=80 
[CV]  criterion=gini, max_depth=9, min_samples_split=4, n_estimators=80, score=0.756, total=   0.5s
[CV] criterion=gini, max_depth=9, min_samples_split=4, n_estimators=90 
[CV]  criterion=gini, max_depth=9, min_samples_split=4, n_estimators=90, score=0.741, total=   0.6s
[CV] criterion=gini, max_depth=9, min_samples_split=4, n_estimators=90 
[CV]  criterion=gini, max_depth=9, min_samples_split=4, n_estimators=90, score=0.756, total=   0.6s
[CV] criterion=gini, max_depth=9, min_samples_split=4, n_estimators=90 
[CV]  criterion=gini, max_depth=9, min_samples_split=4, n_estimators=90, score=0.754, total=   0.6s
[CV] criterion=gini, max_depth=9, min_samples_split=6, n_estimators=10 
[CV]  criterion=gini, max_depth=9, min_samples_split=6, n_estimators=10, score=0.747, total=   0.1s
[CV] criterion=gini, max_depth=9, min_sa

[CV]  criterion=gini, max_depth=10, min_samples_split=2, n_estimators=60, score=0.765, total=   0.8s
[CV] criterion=gini, max_depth=10, min_samples_split=2, n_estimators=60 
[CV]  criterion=gini, max_depth=10, min_samples_split=2, n_estimators=60, score=0.770, total=   0.6s
[CV] criterion=gini, max_depth=10, min_samples_split=2, n_estimators=70 
[CV]  criterion=gini, max_depth=10, min_samples_split=2, n_estimators=70, score=0.756, total=   1.0s
[CV] criterion=gini, max_depth=10, min_samples_split=2, n_estimators=70 
[CV]  criterion=gini, max_depth=10, min_samples_split=2, n_estimators=70, score=0.762, total=   1.0s
[CV] criterion=gini, max_depth=10, min_samples_split=2, n_estimators=70 
[CV]  criterion=gini, max_depth=10, min_samples_split=2, n_estimators=70, score=0.769, total=   0.9s
[CV] criterion=gini, max_depth=10, min_samples_split=2, n_estimators=80 
[CV]  criterion=gini, max_depth=10, min_samples_split=2, n_estimators=80, score=0.752, total=   0.9s
[CV] criterion=gini, max_dept

[CV]  criterion=gini, max_depth=10, min_samples_split=6, n_estimators=40, score=0.764, total=   0.5s
[CV] criterion=gini, max_depth=10, min_samples_split=6, n_estimators=40 
[CV]  criterion=gini, max_depth=10, min_samples_split=6, n_estimators=40, score=0.774, total=   0.4s
[CV] criterion=gini, max_depth=10, min_samples_split=6, n_estimators=50 
[CV]  criterion=gini, max_depth=10, min_samples_split=6, n_estimators=50, score=0.752, total=   0.6s
[CV] criterion=gini, max_depth=10, min_samples_split=6, n_estimators=50 
[CV]  criterion=gini, max_depth=10, min_samples_split=6, n_estimators=50, score=0.764, total=   0.5s
[CV] criterion=gini, max_depth=10, min_samples_split=6, n_estimators=50 
[CV]  criterion=gini, max_depth=10, min_samples_split=6, n_estimators=50, score=0.773, total=   0.6s
[CV] criterion=gini, max_depth=10, min_samples_split=6, n_estimators=60 
[CV]  criterion=gini, max_depth=10, min_samples_split=6, n_estimators=60, score=0.752, total=   0.6s
[CV] criterion=gini, max_dept

[CV]  criterion=entropy, max_depth=3, min_samples_split=4, n_estimators=20, score=0.644, total=   0.2s
[CV] criterion=entropy, max_depth=3, min_samples_split=4, n_estimators=20 
[CV]  criterion=entropy, max_depth=3, min_samples_split=4, n_estimators=20, score=0.661, total=   0.3s
[CV] criterion=entropy, max_depth=3, min_samples_split=4, n_estimators=20 
[CV]  criterion=entropy, max_depth=3, min_samples_split=4, n_estimators=20, score=0.663, total=   0.3s
[CV] criterion=entropy, max_depth=3, min_samples_split=4, n_estimators=30 
[CV]  criterion=entropy, max_depth=3, min_samples_split=4, n_estimators=30, score=0.656, total=   0.4s
[CV] criterion=entropy, max_depth=3, min_samples_split=4, n_estimators=30 
[CV]  criterion=entropy, max_depth=3, min_samples_split=4, n_estimators=30, score=0.638, total=   0.4s
[CV] criterion=entropy, max_depth=3, min_samples_split=4, n_estimators=30 
[CV]  criterion=entropy, max_depth=3, min_samples_split=4, n_estimators=30, score=0.623, total=   0.6s
[CV] cr

[CV]  criterion=entropy, max_depth=3, min_samples_split=6, n_estimators=80, score=0.664, total=   0.9s
[CV] criterion=entropy, max_depth=3, min_samples_split=6, n_estimators=90 
[CV]  criterion=entropy, max_depth=3, min_samples_split=6, n_estimators=90, score=0.660, total=   0.6s
[CV] criterion=entropy, max_depth=3, min_samples_split=6, n_estimators=90 
[CV]  criterion=entropy, max_depth=3, min_samples_split=6, n_estimators=90, score=0.669, total=   0.7s
[CV] criterion=entropy, max_depth=3, min_samples_split=6, n_estimators=90 
[CV]  criterion=entropy, max_depth=3, min_samples_split=6, n_estimators=90, score=0.662, total=   0.8s
[CV] criterion=entropy, max_depth=5, min_samples_split=2, n_estimators=10 
[CV]  criterion=entropy, max_depth=5, min_samples_split=2, n_estimators=10, score=0.675, total=   0.2s
[CV] criterion=entropy, max_depth=5, min_samples_split=2, n_estimators=10 
[CV]  criterion=entropy, max_depth=5, min_samples_split=2, n_estimators=10, score=0.695, total=   0.2s
[CV] cr

[CV]  criterion=entropy, max_depth=5, min_samples_split=4, n_estimators=60, score=0.685, total=   0.5s
[CV] criterion=entropy, max_depth=5, min_samples_split=4, n_estimators=60 
[CV]  criterion=entropy, max_depth=5, min_samples_split=4, n_estimators=60, score=0.678, total=   0.4s
[CV] criterion=entropy, max_depth=5, min_samples_split=4, n_estimators=70 
[CV]  criterion=entropy, max_depth=5, min_samples_split=4, n_estimators=70, score=0.674, total=   0.6s
[CV] criterion=entropy, max_depth=5, min_samples_split=4, n_estimators=70 
[CV]  criterion=entropy, max_depth=5, min_samples_split=4, n_estimators=70, score=0.686, total=   0.6s
[CV] criterion=entropy, max_depth=5, min_samples_split=4, n_estimators=70 
[CV]  criterion=entropy, max_depth=5, min_samples_split=4, n_estimators=70, score=0.679, total=   0.6s
[CV] criterion=entropy, max_depth=5, min_samples_split=4, n_estimators=80 
[CV]  criterion=entropy, max_depth=5, min_samples_split=4, n_estimators=80, score=0.674, total=   0.7s
[CV] cr

[CV]  criterion=entropy, max_depth=7, min_samples_split=2, n_estimators=40, score=0.726, total=   0.2s
[CV] criterion=entropy, max_depth=7, min_samples_split=2, n_estimators=40 
[CV]  criterion=entropy, max_depth=7, min_samples_split=2, n_estimators=40, score=0.720, total=   0.2s
[CV] criterion=entropy, max_depth=7, min_samples_split=2, n_estimators=50 
[CV]  criterion=entropy, max_depth=7, min_samples_split=2, n_estimators=50, score=0.710, total=   0.2s
[CV] criterion=entropy, max_depth=7, min_samples_split=2, n_estimators=50 
[CV]  criterion=entropy, max_depth=7, min_samples_split=2, n_estimators=50, score=0.725, total=   0.2s
[CV] criterion=entropy, max_depth=7, min_samples_split=2, n_estimators=50 
[CV]  criterion=entropy, max_depth=7, min_samples_split=2, n_estimators=50, score=0.723, total=   0.2s
[CV] criterion=entropy, max_depth=7, min_samples_split=2, n_estimators=60 
[CV]  criterion=entropy, max_depth=7, min_samples_split=2, n_estimators=60, score=0.705, total=   0.3s
[CV] cr

[CV]  criterion=entropy, max_depth=7, min_samples_split=6, n_estimators=20, score=0.730, total=   0.1s
[CV] criterion=entropy, max_depth=7, min_samples_split=6, n_estimators=20 
[CV]  criterion=entropy, max_depth=7, min_samples_split=6, n_estimators=20, score=0.722, total=   0.1s
[CV] criterion=entropy, max_depth=7, min_samples_split=6, n_estimators=30 
[CV]  criterion=entropy, max_depth=7, min_samples_split=6, n_estimators=30, score=0.705, total=   0.2s
[CV] criterion=entropy, max_depth=7, min_samples_split=6, n_estimators=30 
[CV]  criterion=entropy, max_depth=7, min_samples_split=6, n_estimators=30, score=0.731, total=   0.2s
[CV] criterion=entropy, max_depth=7, min_samples_split=6, n_estimators=30 
[CV]  criterion=entropy, max_depth=7, min_samples_split=6, n_estimators=30, score=0.722, total=   0.2s
[CV] criterion=entropy, max_depth=7, min_samples_split=6, n_estimators=40 
[CV]  criterion=entropy, max_depth=7, min_samples_split=6, n_estimators=40, score=0.706, total=   0.2s
[CV] cr

[CV]  criterion=entropy, max_depth=9, min_samples_split=2, n_estimators=90, score=0.737, total=   0.4s
[CV] criterion=entropy, max_depth=9, min_samples_split=2, n_estimators=90 
[CV]  criterion=entropy, max_depth=9, min_samples_split=2, n_estimators=90, score=0.755, total=   0.5s
[CV] criterion=entropy, max_depth=9, min_samples_split=2, n_estimators=90 
[CV]  criterion=entropy, max_depth=9, min_samples_split=2, n_estimators=90, score=0.754, total=   0.5s
[CV] criterion=entropy, max_depth=9, min_samples_split=4, n_estimators=10 
[CV]  criterion=entropy, max_depth=9, min_samples_split=4, n_estimators=10, score=0.745, total=   0.1s
[CV] criterion=entropy, max_depth=9, min_samples_split=4, n_estimators=10 
[CV]  criterion=entropy, max_depth=9, min_samples_split=4, n_estimators=10, score=0.751, total=   0.1s
[CV] criterion=entropy, max_depth=9, min_samples_split=4, n_estimators=10 
[CV]  criterion=entropy, max_depth=9, min_samples_split=4, n_estimators=10, score=0.747, total=   0.1s
[CV] cr

[CV]  criterion=entropy, max_depth=9, min_samples_split=6, n_estimators=60, score=0.753, total=   0.3s
[CV] criterion=entropy, max_depth=9, min_samples_split=6, n_estimators=70 
[CV]  criterion=entropy, max_depth=9, min_samples_split=6, n_estimators=70, score=0.741, total=   0.3s
[CV] criterion=entropy, max_depth=9, min_samples_split=6, n_estimators=70 
[CV]  criterion=entropy, max_depth=9, min_samples_split=6, n_estimators=70, score=0.752, total=   0.3s
[CV] criterion=entropy, max_depth=9, min_samples_split=6, n_estimators=70 
[CV]  criterion=entropy, max_depth=9, min_samples_split=6, n_estimators=70, score=0.753, total=   0.3s
[CV] criterion=entropy, max_depth=9, min_samples_split=6, n_estimators=80 
[CV]  criterion=entropy, max_depth=9, min_samples_split=6, n_estimators=80, score=0.744, total=   0.4s
[CV] criterion=entropy, max_depth=9, min_samples_split=6, n_estimators=80 
[CV]  criterion=entropy, max_depth=9, min_samples_split=6, n_estimators=80, score=0.752, total=   0.4s
[CV] cr

[CV]  criterion=entropy, max_depth=10, min_samples_split=4, n_estimators=40, score=0.753, total=   0.2s
[CV] criterion=entropy, max_depth=10, min_samples_split=4, n_estimators=40 
[CV]  criterion=entropy, max_depth=10, min_samples_split=4, n_estimators=40, score=0.763, total=   0.2s
[CV] criterion=entropy, max_depth=10, min_samples_split=4, n_estimators=40 
[CV]  criterion=entropy, max_depth=10, min_samples_split=4, n_estimators=40, score=0.771, total=   0.3s
[CV] criterion=entropy, max_depth=10, min_samples_split=4, n_estimators=50 
[CV]  criterion=entropy, max_depth=10, min_samples_split=4, n_estimators=50, score=0.752, total=   0.3s
[CV] criterion=entropy, max_depth=10, min_samples_split=4, n_estimators=50 
[CV]  criterion=entropy, max_depth=10, min_samples_split=4, n_estimators=50, score=0.764, total=   0.4s
[CV] criterion=entropy, max_depth=10, min_samples_split=4, n_estimators=50 
[CV]  criterion=entropy, max_depth=10, min_samples_split=4, n_estimators=50, score=0.769, total=   0

[Parallel(n_jobs=1)]: Done 810 out of 810 | elapsed:  4.7min finished


In [15]:
RF_model = RF_grid_model.best_estimator_

In [16]:
RF_model.fit(X_train, y_train)

RandomForestClassifier(max_depth=10, min_samples_split=4, n_estimators=10,
                       n_jobs=-1, random_state=1)

In [19]:
y_pred = RF_model.predict(X_test)
rf_cr = classification_report(y_test, y_pred)

print('Random Forest: {:s}'.format(classification_report(y_test, y_pred)))

Random Forest:               precision    recall  f1-score   support

         0.0       0.86      0.79      0.83       517
         1.0       0.61      0.72      0.66       478
         2.0       0.85      0.90      0.87       512
         3.0       0.73      0.62      0.67       509

    accuracy                           0.76      2016
   macro avg       0.76      0.76      0.76      2016
weighted avg       0.76      0.76      0.76      2016

