In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_digits

%matplotlib inline

#scaler = StandardScaler()
#X = pd.DataFrame(data=scaler.fit_transform(X), columns=X.columns)

def estimate_accuracy(clf, X, y):
    return cross_val_score(clf, X, y, cv=10, scoring='f1_micro').mean()

digits = load_digits()
X, y = digits.data, digits.target

random_tree = DecisionTreeClassifier(splitter = 'random', random_state = 0)
print("Random tree:", estimate_accuracy(random_tree, X, y))


bagging_random_trees = BaggingClassifier(random_tree, n_estimators=100, max_features=int(np.sqrt(len(X))))
print("Random tree bagging:", estimate_accuracy(bagging_random_trees, X, y))

Random tree: 0.8280912476722533
Random tree bagging: 0.9504593420235878


In [None]:
#count = 0
#for depth in range(2, 10, 2):  #max_depth
#    for estimator in range(90, 110, 1):#n_estimators
#        for samples_split in range(2, 6, 1):#min_samples_split
#            print(depth, estimator, samples_split)
#            count += 1
#
#print(count)

In [15]:
%%time

for depth in range(2, 10, 2):  #max_depth
    clf = RandomForestClassifier(max_depth=depth, random_state=0)
    print("accuracy:", estimate_accuracy(clf, X, y), " depth:", depth)

clf = RandomForestClassifier(random_state=0)
print("accuracy:", estimate_accuracy(clf, X, y), " depth: no_limit")



accuracy: 0.7868684047175666  depth: 2
accuracy: 0.8903538175046556  depth: 4
accuracy: 0.9293047796399752  depth: 6
accuracy: 0.9376567349472376  depth: 8
accuracy: 0.9476939788950961  depth: no_limit
CPU times: user 14.3 s, sys: 127 ms, total: 14.5 s
Wall time: 14.6 s


In [16]:
for estimator in range(5, 150, 5):#n_estimators
    clf = RandomForestClassifier(n_estimators=estimator, max_depth=5, random_state=0)
    print("accuracy:", estimate_accuracy(clf, X, y), " estimator:", estimator)    

accuracy: 0.8241433891992551  estimator: 5
accuracy: 0.856415270018622  estimator: 10
accuracy: 0.8864773432650528  estimator: 15
accuracy: 0.8948199875853508  estimator: 20
accuracy: 0.8992644320297952  estimator: 25
accuracy: 0.898147113594041  estimator: 30
accuracy: 0.9059435133457481  estimator: 35
accuracy: 0.9131781502172565  estimator: 40
accuracy: 0.9131781502172565  estimator: 45
accuracy: 0.9148417132216016  estimator: 50
accuracy: 0.9165052762259467  estimator: 55
accuracy: 0.9131688392302918  estimator: 60
accuracy: 0.9164990689013036  estimator: 65
accuracy: 0.9170608317815023  estimator: 70
accuracy: 0.9192892613283676  estimator: 75
accuracy: 0.9231843575418995  estimator: 80
accuracy: 0.9231812538795781  estimator: 85
accuracy: 0.9209590316573559  estimator: 90
accuracy: 0.9181843575418996  estimator: 95
accuracy: 0.9187368094351337  estimator: 100
accuracy: 0.9170701427684669  estimator: 105
accuracy: 0.9192954686530106  estimator: 110
accuracy: 0.9192923649906891  es

In [17]:
for samples_split in range(5, 80, 4):#min_samples_split
    clf = RandomForestClassifier(min_samples_split=samples_split, random_state=0)
    print("accuracy:", estimate_accuracy(clf, X, y), " samples_split:", samples_split)

accuracy: 0.9499068901303538  samples_split: 5
accuracy: 0.9443389199255121  samples_split: 9
accuracy: 0.941548727498448  samples_split: 13
accuracy: 0.9365487274984481  samples_split: 17
accuracy: 0.9371011793916821  samples_split: 21
accuracy: 0.9387616387337058  samples_split: 25
accuracy: 0.9320825574177529  samples_split: 29
accuracy: 0.9304220980757293  samples_split: 33
accuracy: 0.9292985723153322  samples_split: 37
accuracy: 0.92707635009311  samples_split: 41
accuracy: 0.9237337057728119  samples_split: 45
accuracy: 0.9242923649906892  samples_split: 49
accuracy: 0.9181781502172566  samples_split: 53
accuracy: 0.9170670391061455  samples_split: 57
accuracy: 0.9187306021104906  samples_split: 61
accuracy: 0.9165083798882684  samples_split: 65
accuracy: 0.9131781502172565  samples_split: 69
accuracy: 0.9103879577901927  samples_split: 73
accuracy: 0.9165145872129115  samples_split: 77
