In [0]:
%matplotlib inline

In [0]:
import numpy as np
from time import time
from scipy.stats import randint as sp_randint
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier

In [0]:
# get data
digits = load_digits()  # Source : http://archive.ics.uci.edu/ml/datasets/Pen-Based+Recognition+of+Handwritten+Digits
X, y = digits.data, digits.target

In [0]:
# build a classifier
clf = RandomForestClassifier(n_estimators=50)

In [0]:
# specify parameters and distributions to sample from
param_dist = {"max_depth": [3, None],
              "max_features": sp_randint(1, 11),
              "min_samples_split": sp_randint(2, 11),
              "min_samples_leaf": sp_randint(1, 11),
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

In [0]:
# run randomized search
samples = 10  # number of random samples 
randomCV = RandomizedSearchCV(clf, param_distributions=param_dist, n_iter=samples) #default cv = 3

In [0]:
randomCV.fit(X, y)

 
print(randomCV.best_params_)




{'bootstrap': False, 'criterion': 'entropy', 'max_depth': None, 'max_features': 6, 'min_samples_leaf': 1, 'min_samples_split': 5}


In [0]:
# use a full grid over all parameters
param_grid = {"max_depth": [3, None],
              "max_features": [1, 3, 10],
              "min_samples_split": [2, 3, 10],
              "min_samples_leaf": [1, 3, 10],
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

In [0]:
# run grid search
grid_search = GridSearchCV(clf, param_grid=param_grid)
start = time()
grid_search.fit(X, y)



GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=50, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'max_depth': [3, None], 'max_features': [1, 3, 10], 'min_samples_split': [2, 3, 10], 'min_samples_leaf': [1, 3, 10], 'bootstrap': [True, False], 'criterion': ['gini', 'entropy']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [0]:
grid_search.best_params_

{'bootstrap': False,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 3,
 'min_samples_leaf': 1,
 'min_samples_split': 3}

In [0]:
grid_search.cv_results_['mean_test_score']

array([0.82470785, 0.82915971, 0.81469115, 0.81803005, 0.7902059 ,
       0.82248191, 0.81023929, 0.81747357, 0.7968837 , 0.84307179,
       0.85030607, 0.83806344, 0.82749026, 0.84362827, 0.83528102,
       0.83416806, 0.82971619, 0.83639399, 0.81803005, 0.83528102,
       0.82749026, 0.80745687, 0.82192543, 0.83917641, 0.82025598,
       0.83583751, 0.84418475, 0.92765721, 0.92431831, 0.92209238,
       0.91318865, 0.91096272, 0.9081803 , 0.88647746, 0.87145242,
       0.88313856, 0.93989983, 0.93377852, 0.92877017, 0.92932666,
       0.92710072, 0.92209238, 0.9081803 , 0.90929327, 0.91430161,
       0.93711742, 0.93934335, 0.92598776, 0.92821369, 0.92932666,
       0.92320534, 0.91541458, 0.90595437, 0.90873678, 0.8230384 ,
       0.82470785, 0.8263773 , 0.8163606 , 0.82860323, 0.7968837 ,
       0.82582081, 0.80022259, 0.80578742, 0.83973289, 0.8491931 ,
       0.84641068, 0.84529772, 0.84974958, 0.84028937, 0.83082916,
       0.83472454, 0.8458542 , 0.83416806, 0.83027268, 0.82915

In [0]:
grid_search.best_estimator_

RandomForestClassifier(bootstrap=False, class_weight=None, criterion='gini',
            max_depth=None, max_features=3, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=3,
            min_weight_fraction_leaf=0.0, n_estimators=50, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)