<h1>Tuning Hyperparameters of ML Model </h1>

<h2> 1. Make Synthetic Dataset</h2>

In [32]:
from sklearn.datasets import make_classification
x,y = make_classification(n_samples=200, n_features=10, n_classes=2, n_redundant=0, random_state=1)

In [33]:
x.shape,y.shape

((200, 10), (200,))

In [87]:
import pandas as pd
pd.DataFrame(x)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,-1.511077,0.608749,-0.153236,0.507984,-0.324032,-2.432509,1.592056,-0.864830,-0.202901,-0.871422
1,1.445445,0.518969,0.645153,2.038777,-0.396293,1.282142,-2.170249,-1.043400,0.048547,-2.621012
2,0.371670,0.513505,-1.398813,-0.459943,0.644354,0.081768,-1.757065,0.142251,-1.132835,1.853009
3,2.565453,0.145652,1.177052,1.322694,0.194175,-0.641108,0.878631,-0.202694,-1.199798,-0.464115
4,-0.710656,1.050615,0.354602,-1.774596,-0.312230,-0.212373,0.826484,-0.621252,-1.187774,1.131129
...,...,...,...,...,...,...,...,...,...,...
195,-1.098083,-1.277636,0.419595,0.482176,-1.879287,-0.091079,-2.428480,0.032615,1.164204,0.758637
196,0.165211,1.937132,-1.307971,0.074876,-1.786935,1.472396,1.666002,-0.696028,-0.162525,0.976296
197,-0.950909,-0.218733,1.293550,0.590039,-0.679384,-0.438998,-0.188582,-0.045867,-0.972107,-0.704350
198,-0.446699,0.744885,-0.961264,0.494342,-1.494194,-1.458324,2.820244,0.612233,1.679779,0.204377


In [35]:
pd.DataFrame(y)

Unnamed: 0,0
0,1
1,0
2,0
3,1
4,1
...,...
195,0
196,1
197,0
198,1


<h2> 2. Data Split (80/20 ratio)</h2>

In [36]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2)

In [37]:
x_train.shape, y_train.shape

((160, 10), (160,))

In [38]:
x_test.shape, y_test.shape

((40, 10), (40,))

<h2> 3. Building a simple ML Model using RandomForest</h2>

In [39]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

rf = RandomForestClassifier(max_features= 5, n_estimators = 100)

In [48]:
rf.fit(x_train, y_train)

RandomForestClassifier(max_features=5)

In [49]:
rf.score (x_test ,y_test)

0.85

In [50]:
y_pred = rf.predict(x_test)

In [51]:
accuracy_score(y_pred, y_test)

0.85

In [52]:
y_pred, y_test

(array([1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1,
        1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1]),
 array([1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
        1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1]))

<h2> 4. Hyperparameter Tuning </h2>

#### Two parameters are Max_features and n_estimators features. Max_features represents the number of features used to construct a prediction model. n_estimators is the number of trees used to construct a prediction model.

In [71]:
from sklearn.model_selection import GridSearchCV
import numpy as np

max_features_range = np.arange(1,6,1)
n_estimators_range = np.arange(10,210,10)
param_grid = dict(max_features = max_features_range, n_estimators = n_estimators_range)

rf = RandomForestClassifier()

grid = GridSearchCV(estimator=rf,param_grid=param_grid, cv=5 )

In [72]:
grid.fit (x_train, y_train)

GridSearchCV(cv=5, estimator=RandomForestClassifier(),
             param_grid={'max_features': array([1, 2, 3, 4, 5]),
                         'n_estimators': array([ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100, 110, 120, 130,
       140, 150, 160, 170, 180, 190, 200])})

In [78]:
print ("The best parameters are %s with a score of %0.2f" 
       % (grid.best_params_, grid.best_score_)
      )

The best parameters are {'max_features': 3, 'n_estimators': 170} with a score of 0.91


<h2> 5.Dataframe of grid search parameters and their accuracy scores </h2>

In [88]:
import pandas as pd
grid_results = pd.concat([pd.DataFrame(grid.cv_results_["params"]), pd.DataFrame(grid.cv_results_["mean_test_score"], columns = ["Accuracy"])], axis=1)
grid_results.head()

Unnamed: 0,max_features,n_estimators,Accuracy
0,1,10,0.7375
1,1,20,0.8
2,1,30,0.8125
3,1,40,0.86875
4,1,50,0.875


<h2>6. Preparing data for making contour plots <h2>

In [91]:
grid_contour = grid_results.groupby(['max_features', 'n_estimators']).mean()
grid_contour

Unnamed: 0_level_0,Unnamed: 1_level_0,Accuracy
max_features,n_estimators,Unnamed: 2_level_1
1,10,0.73750
1,20,0.80000
1,30,0.81250
1,40,0.86875
1,50,0.87500
...,...,...
5,160,0.88125
5,170,0.89375
5,180,0.89375
5,190,0.89375
