In [19]:
from sklearn.datasets import make_classification

X,Y = make_classification(n_samples = 200,n_classes = 2,n_features = 10,n_redundant = 0,random_state =1)

In [20]:
X.shape,Y.shape

((200, 10), (200,))

In [21]:
from sklearn.model_selection import train_test_split

X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2)

In [22]:
X_train.shape,Y_train.shape

((160, 10), (160,))

In [23]:
X_test.shape,Y_test.shape

((40, 10), (40,))

In [24]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

rf = RandomForestClassifier(max_features = 5, n_estimators=10000)

rf.fit(X_train,Y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features=5, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10000, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [25]:
score = rf.score(X_test,Y_test)
score

0.925

In [26]:
Y_pred = rf.predict(X_test)

In [27]:
accuracy_score(Y_pred,Y_test)

0.925

In [28]:
from sklearn.model_selection import GridSearchCV
import numpy as np

max_feature_range = np.arange(1,6,1)
n_estimator_range = np.arange(10,210,10)
param_grid = dict(max_features = max_feature_range, n_estimators = n_estimator_range)

rf = RandomForestClassifier()

grid = GridSearchCV(estimator=rf , param_grid=param_grid, cv=5)

In [29]:
grid.fit(X_test,Y_test)

GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'max_features': array([1, 2, 3, 4, 5]), 'n_estimators': array([ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100, 110, 120, 130,
       140, 150, 160, 170, 180, 190, 200])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [30]:
print("The best parameters are %s with a score of %0.2f"
      % (grid.best_params_, grid.best_score_))

The best parameters are {'max_features': 3, 'n_estimators': 50} with a score of 0.93


In [31]:
import pandas as pd

grid_results = pd.concat([pd.DataFrame(grid.cv_results_["params"]),pd.DataFrame(grid.cv_results_["mean_test_score"],columns = ["Accuracy"])],axis = 1)
grid_results

Unnamed: 0,max_features,n_estimators,Accuracy
0,1,10,0.725
1,1,20,0.800
2,1,30,0.800
3,1,40,0.850
4,1,50,0.850
5,1,60,0.825
6,1,70,0.850
7,1,80,0.850
8,1,90,0.900
9,1,100,0.850


In [32]:
grid_contour = grid_results.groupby(['max_features','n_estimators']).mean()
grid_contour

Unnamed: 0_level_0,Unnamed: 1_level_0,Accuracy
max_features,n_estimators,Unnamed: 2_level_1
1,10,0.725
1,20,0.800
1,30,0.800
1,40,0.850
1,50,0.850
1,60,0.825
1,70,0.850
1,80,0.850
1,90,0.900
1,100,0.850


In [33]:
grid_reset = grid_contour.reset_index()
grid_reset.columns = ['max_features', 'n_estimators', 'Accuracy']
grid_pivot = grid_reset.pivot('max_features', 'n_estimators')
grid_pivot

Unnamed: 0_level_0,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy
n_estimators,10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200
max_features,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
1,0.725,0.8,0.8,0.85,0.85,0.825,0.85,0.85,0.9,0.85,0.8,0.85,0.875,0.825,0.85,0.85,0.875,0.9,0.85,0.875
2,0.8,0.875,0.875,0.875,0.875,0.875,0.875,0.875,0.9,0.875,0.9,0.875,0.875,0.9,0.9,0.9,0.9,0.9,0.9,0.9
3,0.825,0.9,0.875,0.875,0.925,0.9,0.9,0.9,0.925,0.9,0.9,0.925,0.9,0.9,0.9,0.9,0.925,0.9,0.925,0.925
4,0.9,0.875,0.875,0.9,0.925,0.925,0.9,0.925,0.9,0.9,0.9,0.9,0.925,0.9,0.9,0.9,0.925,0.925,0.9,0.925
5,0.85,0.9,0.925,0.9,0.875,0.9,0.925,0.925,0.925,0.925,0.925,0.9,0.9,0.9,0.925,0.9,0.875,0.925,0.9,0.9


In [34]:
x = grid_pivot.columns.levels[1].values
y = grid_pivot.index.values
z = grid_pivot.values

In [35]:
import plotly.graph_objects as go

layout = go.Layout(
              xaxis=go.layout.XAxis(
              title=go.layout.xaxis.Title(
              text='n_estimators')
             ),
             yaxis=go.layout.YAxis(
              title=go.layout.yaxis.Title(
              text='max_features') 
            ))
fig = go.Figure(data = [go.Contour(z=z,x=x,y=y)], layout = layout)

fig.update_layout(title = 'Hyperparameter tuning', autosize = False, width = 500, height = 500, margin = dict(l=65,r=50,b=65,t=90))
fig.show()

In [42]:
import plotly.graph_objects as go


fig = go.Figure(data= [go.Surface(z=z, y=y, x=x)], layout=layout )
fig.update_layout(title='Hyperparameter tuning',
                  scene = dict(
                    xaxis_title='n_estimators',
                    yaxis_title='max_features',
                    zaxis_title='Accuracy'),
                  autosize=False,
                  width=800, height=800,
                  margin=dict(l=65, r=50, b=65, t=90))
fig.show()