In [27]:
import pandas as pd
from time import time

In [10]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split


# Loading Data

In [3]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ["sepal-length", "sepal-width", "petal-length", "petal-width", "class"]
dataset = pd.read_csv(url, names=names)

In [13]:
array = dataset.values
X = array[:, 0:4]
Y = array[:, 4]
validation_size = 0.2
seed = 7
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=validation_size,
                                                                                random_state=seed)

# Setting up Classifier

In [14]:
clf = RandomForestClassifier()

Possible parameters

In [16]:
clf.get_params()

{'bootstrap': True,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'min_impurity_split': 1e-07,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 10,
 'n_jobs': 1,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [28]:
parameters = [
    {"min_samples_leaf": [1, 4, 6, 8, 10, 12, 20]}
 ]

jobs = []
times = []
for n_jobs in range(1, 10):
    clf = GridSearchCV(estimator=RandomForestClassifier(), n_jobs=n_jobs, scoring="precision", param_grid=parameters)
    t0 = time()
    clf.fit(X,Y)
    times.append(time() - t0)
    jobs.append(n_jobs)

ValueError: Target is multiclass but average='binary'. Please choose another average setting.

In [24]:
clf.best_params_

{'min_samples_leaf': 8, 'n_jobs': 5}

In [25]:
clf.grid_scores_



[mean: 0.97500, std: 0.00051, params: {'n_jobs': 1, 'min_samples_leaf': 1},
 mean: 0.97500, std: 0.00051, params: {'n_jobs': 2, 'min_samples_leaf': 1},
 mean: 0.96667, std: 0.01254, params: {'n_jobs': 3, 'min_samples_leaf': 1},
 mean: 0.97500, std: 0.00051, params: {'n_jobs': 4, 'min_samples_leaf': 1},
 mean: 0.96667, std: 0.01106, params: {'n_jobs': 5, 'min_samples_leaf': 1},
 mean: 0.96667, std: 0.01106, params: {'n_jobs': 6, 'min_samples_leaf': 1},
 mean: 0.97500, std: 0.00051, params: {'n_jobs': 1, 'min_samples_leaf': 4},
 mean: 0.97500, std: 0.00051, params: {'n_jobs': 2, 'min_samples_leaf': 4},
 mean: 0.97500, std: 0.00051, params: {'n_jobs': 3, 'min_samples_leaf': 4},
 mean: 0.98333, std: 0.01180, params: {'n_jobs': 4, 'min_samples_leaf': 4},
 mean: 0.98333, std: 0.01164, params: {'n_jobs': 5, 'min_samples_leaf': 4},
 mean: 0.98333, std: 0.01180, params: {'n_jobs': 6, 'min_samples_leaf': 4},
 mean: 0.98333, std: 0.01164, params: {'n_jobs': 1, 'min_samples_leaf': 6},
 mean: 0.975

In [20]:
sorted(clf.cv_results_.keys())

['mean_fit_time',
 'mean_score_time',
 'mean_test_score',
 'mean_train_score',
 'param_min_samples_leaf',
 'param_n_jobs',
 'params',
 'rank_test_score',
 'split0_test_score',
 'split0_train_score',
 'split1_test_score',
 'split1_train_score',
 'split2_test_score',
 'split2_train_score',
 'std_fit_time',
 'std_score_time',
 'std_test_score',
 'std_train_score']