In [1]:
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import random

In [2]:
fruits = np.random.choice(["Apple", "Orange", "Pear"], size=1000)
diameter = []
weight = []
color = []

for mark in fruits:
    if mark == "Apple":
        d = random.randint(6, 8)
        diameter.append(d)
        w = random.randint(30, 40)
        weight.append(w)
        c = random.uniform(0,1.2)
        color.append(c)
    if mark == "Orange":
        d = random.randint(6, 8)
        diameter.append(d)
        w = random.randint(30, 40)
        weight.append(w)
        c = random.uniform(1,2.2)
        color.append(c)
    if mark == "Pear":
        d = random.randint(6, 8)
        diameter.append(d)
        w = random.randint(25, 35)
        weight.append(w)
        c = random.uniform(2,3.5)
        color.append(c)

In [3]:
X = np.column_stack((diameter, weight, color))

In [4]:
Y = fruits

In [5]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, 
test_size=0.2, random_state=42)

In [6]:
rf = RandomForestClassifier(max_features=5, n_estimators=100)

In [7]:
rf.fit(X_train, Y_train)

In [8]:
rf.score(X_test, Y_test)

0.905

In [9]:
max_features_range = np.arange(1,6,1)
n_estimators_range = np.arange(10,210,10)
param_grid = dict(max_features=max_features_range, n_estimators=n_estimators_range)

rf_improved = RandomForestClassifier()

grid = GridSearchCV(estimator=rf_improved, param_grid=param_grid, cv=5)

In [10]:
grid.fit(X_train, Y_train)

In [11]:
#df = pd.DataFrame()
#df['max_features_range'] = max_features_range
#df['n_estimators_range'] = n_estimators_range
#df['score'] = grid.cv_results_["Accuracy"]
#df = df.sort_values(by=['score'], ascending=False)
#df

grid_results = pd.concat([pd.DataFrame(grid.cv_results_["params"]),pd.DataFrame(grid.cv_results_["mean_test_score"], columns=["Accuracy"])],axis=1)
grid_results = grid_results.sort_values(by=['Accuracy'], ascending=False)
grid_results.head(10)

Unnamed: 0,max_features,n_estimators,Accuracy
60,4,10,0.90875
69,4,100,0.9075
2,1,30,0.90625
6,1,70,0.90625
35,2,160,0.90625
40,3,10,0.90625
23,2,40,0.905
30,2,110,0.905
34,2,150,0.905
25,2,60,0.905


In [12]:
import seaborn as sns
cm = sns.light_palette("green", as_cmap=True)
s = grid_results.style.background_gradient(cmap=cm)
s

Unnamed: 0,max_features,n_estimators,Accuracy
60,4,10,0.90875
69,4,100,0.9075
2,1,30,0.90625
6,1,70,0.90625
35,2,160,0.90625
40,3,10,0.90625
23,2,40,0.905
30,2,110,0.905
34,2,150,0.905
25,2,60,0.905
