# Day 08. Exercise 02
# Multiclass classification. One-hot encoding. Random forest

## 0. Imports

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

## 1. Preprocessing

1. Read the file [`checker-submits.csv`](https://drive.google.com/file/d/14voc4fNJZiLEFaZyd8nEG-lQt5JjatYw/view?usp=sharing).
2. Create a dataframe `df` with the columns: `uid`, `labname`, `numTrials`, `hour`, `dayofweek` where `hour` is extracted from the `timestamp` as well as the `dayofweek` (`0` is Monday, `6` is Sunday). We will try to predict the day of the week having data about which user made a commit for which lab at which hour and which try it was.
3. Using `OneHotEncoder()` transform your categorical features, delete from the dataframe the initial columns.
4. Use `StandardScaler()` and scale your continuous features.
5. Save the dataframe as `dayofweek.csv`.
6. Before trying out different algorithms, find out the accuracy of the naive algorithms ‚Äì the one that predicts everything as the most popular class.

–°–æ–∑–¥–∞–¥–∏–º –¥–∞—Ç–∞—Ñ—Ä–µ–π–º df —Å–æ —Å—Ç–æ–ª–±—Ü–∞–º–∏: uid, labname, numTrials, hour, dayofweek

–°–Ω–∞—á–∞–ª–∞ –ø–æ—Å–º–æ—Ç—Ä–∏–º, —á—Ç–æ –≤ –∏—Å—Ö–æ–¥–Ω–æ–º –¥–∞—Ç–∞—Ñ—Ä–µ–π–º–µ

In [None]:
df = pd.read_csv('../data/checker_submits.csv', parse_dates=['timestamp'])
df

–ü–µ—Ä–µ–¥–µ–ª–∞–µ–º —Å—Ç–æ–ª–±—Ü—ã –ø–æ –∑–∞–¥–∞–Ω–∏—é

In [None]:
df['hour'] = df['timestamp'].dt.hour
df['dayofweek'] = df['timestamp'].dt.dayofweek
df = df.drop('timestamp', axis=1)
df

–ú—ã –ø–æ–ø—ã—Ç–∞–µ–º—Å—è –ø—Ä–µ–¥—Å–∫–∞–∑–∞—Ç—å –¥–µ–Ω—å –Ω–µ–¥–µ–ª–∏, –∏–º–µ—è –¥–∞–Ω–Ω—ã–µ –æ —Ç–æ–º, –∫–∞–∫–æ–π –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—å —Å–¥–µ–ª–∞–ª –∫–æ–º–º–∏—Ç, –¥–ª—è –∫–∞–∫–æ–π –ª–∞–±–æ—Ä–∞—Ç–æ—Ä–∏–∏, –≤ –∫–∞–∫–æ–µ –≤—Ä–µ–º—è –∏ –∫–∞–∫–∞—è —ç—Ç–æ –±—ã–ª–∞ –ø–æ–ø—ã—Ç–∫–∞.

–ò—Å–ø–æ–ª—å–∑—É—è OneHotEncoder(), –ø—Ä–µ–æ–±—Ä–∞–∑—É–µ–º —Å–≤–æ–∏ –∫–∞—Ç–µ–≥–æ—Ä–∏–∞–ª—å–Ω—ã–µ —Ñ—É–Ω–∫—Ü–∏–∏, —É–¥–∞–ª–∏–º –∏–∑ –¥–∞—Ç–∞—Ñ—Ä–µ–π–º–∞ –∏—Å—Ö–æ–¥–Ω—ã–µ —Å—Ç–æ–ª–±—Ü—ã.

In [None]:
df = pd.get_dummies(df, prefix=['uid', 'labname'], columns=['uid', 'labname'])
df

–ò—Å–ø–æ–ª—å–∑—É—è StandardScaler(), –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ–º –Ω–µ–ø—Ä–µ—Ä—ã–≤–Ω—ã–µ —Ñ—É–Ω–∫—Ü–∏–∏.

In [None]:
scaler = StandardScaler()
df[['numTrials', 'hour']] = scaler.fit_transform(df[['numTrials', 'hour']])
df

–°–æ—Ö—Ä–∞–Ω–∏–º –¥–∞—Ç–∞—Ñ—Ä–µ–π–º

In [None]:
df.to_csv('../data/dayofweek.csv', index=False)

–ü—Ä–µ–∂–¥–µ —á–µ–º –ø—Ä–æ–±–æ–≤–∞—Ç—å —Ä–∞–∑–Ω—ã–µ –∞–ª–≥–æ—Ä–∏—Ç–º—ã, —É–∑–Ω–∞–µ–º —Ç–æ—á–Ω–æ—Å—Ç—å –Ω–∞–∏–≤–Ω–æ–≥–æ –∞–ª–≥–æ—Ä–∏—Ç–º–∞ ‚Äî —Ç–æ–≥–æ, –∫–æ—Ç–æ—Ä—ã–π –≤—Å–µ –ø—Ä–µ–¥—Å–∫–∞–∑—ã–≤–∞–µ—Ç –∫–∞–∫ —Å–∞–º—ã–π –ø–æ–ø—É–ª—è—Ä–Ω—ã–π –∫–ª–∞—Å—Å.

In [None]:
X = df.drop('dayofweek', axis=1)
y = df['dayofweek']
y_pred_naive = np.array([np.argmax(np.bincount(y))] * len(y))

–†–∞–∑—Ä–∞–±–æ—Ç–∞–Ω–Ω—ã–π –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä –ø—Ä–µ–¥—Å–∫–∞–∑—ã–≤–∞–µ—Ç –∫ –∫–∞–∫–æ–º—É –∫–ª–∞—Å—Å—É –ø—Ä–∏–Ω–∞–¥–ª–µ–∂–∏—Ç –æ–±—ä–µ–∫—Ç –Ω–∞ –æ—Å–Ω–æ–≤–∞–Ω–∏–∏ –∑–Ω–∞—á–µ–Ω–∏–π –µ–≥–æ –ø—Ä–∏–∑–Ω–∞–∫–æ–≤. –í —Ä–µ–∑—É–ª—å—Ç–∞—Ç–µ –≤–æ–∑–º–æ–∂–Ω—ã —á–µ—Ç—ã—Ä–µ —Å–∏—Ç—É–∞—Ü–∏–∏:

–û–±—ä–µ–∫—Ç –∏–º–µ–µ—Ç —Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏—Å—Ç–∏–∫—É –∫–ª–∞—Å—Å–∞ 1 –∏ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä –æ–ø—Ä–µ–¥–µ–ª—è–µ—Ç —Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏—Å—Ç–∏–∫—É –∫–∞–∫ 1. –≠—Ç–æ –æ–∑–Ω–∞—á–∞–µ—Ç, —á—Ç–æ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä —Å—Ä–∞–±–æ—Ç–∞–ª –≤–µ—Ä–Ω–æ. –í–µ–ª–∏—á–∏–Ω–∞ TP (True positive) —Å–æ–¥–µ—Ä–∂–∏—Ç –æ–±—â–µ–µ –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ —Ç–∞–∫–∏—Ö –æ—Ç–≤–µ—Ç–æ–≤.

–û–±—ä–µ–∫—Ç –∏–º–µ–µ—Ç —Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏—Å—Ç–∏–∫—É –∫–ª–∞—Å—Å–∞ 0, –∞ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä –æ–ø—Ä–µ–¥–µ–ª—è–µ—Ç —Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏—Å—Ç–∏–∫—É –∫–∞–∫ 1. –≠—Ç–æ –æ–∑–Ω–∞—á–∞–µ—Ç, —á—Ç–æ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä —Å—Ä–∞–±–æ—Ç–∞–ª –Ω–µ –≤–µ—Ä–Ω–æ. –í–µ–ª–∏—á–∏–Ω–∞ FP (False positive) —Å–æ–¥–µ—Ä–∂–∏—Ç –æ–±—â–µ–µ –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ —Ç–∞–∫–∏—Ö –æ—Ç–≤–µ—Ç–æ–≤.

–û–±—ä–µ–∫—Ç –∏–º–µ–µ—Ç —Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏—Å—Ç–∏–∫—É –∫–ª–∞—Å—Å–∞ 0 –∏ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä –æ–ø—Ä–µ–¥–µ–ª—è–µ—Ç —Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏—Å—Ç–∏–∫—É –∫–∞–∫ 0. –≠—Ç–æ –æ–∑–Ω–∞—á–∞–µ—Ç, —á—Ç–æ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä —Å—Ä–∞–±–æ—Ç–∞–ª –≤–µ—Ä–Ω–æ. –í–µ–ª–∏—á–∏–Ω–∞ TN (True negative) —Å–æ–¥–µ—Ä–∂–∏—Ç –æ–±—â–µ–µ –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ —Ç–∞–∫–∏—Ö –æ—Ç–≤–µ—Ç–æ–≤.

–û–±—ä–µ–∫—Ç –∏–º–µ–µ—Ç —Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏—Å—Ç–∏–∫—É –∫–ª–∞—Å—Å–∞ 1, –∞ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä –æ–ø—Ä–µ–¥–µ–ª—è–µ—Ç —Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏—Å—Ç–∏–∫—É –∫–∞–∫ 0. –≠—Ç–æ –æ–∑–Ω–∞—á–∞–µ—Ç, —á—Ç–æ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä —Å—Ä–∞–±–æ—Ç–∞–ª –Ω–µ –≤–µ—Ä–Ω–æ. –í–µ–ª–∏—á–∏–Ω–∞ FN (False negative) —Å–æ–¥–µ—Ä–∂–∏—Ç –æ–±—â–µ–µ –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ —Ç–∞–∫–∏—Ö –æ—Ç–≤–µ—Ç–æ–≤.

Accuracy –æ–ø—Ä–µ–¥–µ–ª—è–µ—Ç –¥–æ–ª—é –ø—Ä–∞–≤–∏–ª—å–Ω—ã—Ö –æ—Ç–≤–µ—Ç–æ–≤ –∏ —Å—á–∏—Ç–∞–µ—Ç—Å—è —Å–ª–µ–¥—É—é—â–∏–º –æ–±—Ä–∞–∑–æ–º: ùê¥ùëêùëêùë¢ùëüùëéùëêùë¶=ùëáùëÉ+ùëáùëÅ / ùëáùëÉ+ùëáùëÅ+ùêπùëÉ+ùêπùëÅ

In [None]:
accuracy_score(y, y_pred_naive)

## 2. Algorithms

### a. Logreg

1. Train logistic regression, for the baseline model use `random_state=21`, `fit_intercept=False`. 
2. Calculate the accuracy.
3. Write a function that draws the plot (`barh`) taking coefficients of any trained models, names of the features and the number of `top-n` most important features to display.
4. Draw a plot (`barh`) for the baseline model with top-10 most important features (absolute value) for the trained model.
5. Remember that it is a multiclass classification and `coef_` returns a matrix, to calculate importance for a feature you need to sum all the individual feature importances for all the target values.

In [None]:
lr = LogisticRegression(random_state=21, fit_intercept=False)
lr.fit(X, y)
y_pred = lr.predict(X)
accuracy_score(y, y_pred)

–ü–æ—Å—Ç—Ä–æ–∏–º –≥—Ä–∞—Ñ–∏–∫ (barh) –Ω–∞ –æ—Å–Ω–æ–≤–∞–Ω–∏–∏ —Ä–∞—Å—Å—á–∏—Ç–∞–Ω–Ω—ã—Ö –∫–æ—ç—Ñ—Ñ–∏—Ü–∏–µ–Ω—Ç–æ–≤ —Ä–µ–≥—Ä–µ—Å—Å–∏–∏ –¥–ª—è n –Ω–∞–∏–±–æ–ª–µ–µ –∑–Ω–∞—á–∏–º—ã—Ö —Ñ–∏—á–µ–π

In [None]:
def plot_features(coefs, features, n=10):
    fig, ax = plt.subplots(figsize=(15, 8)) # –æ–ø—Ä–µ–¥–µ–ª—è–µ–º –ø–∞—Ä–∞–º–µ—Ç—Ä—ã –≥—Ä–∞—Ñ–∏–∫–∞
    coefs /= coefs.sum() # –∫–æ—ç—Ñ—Ñ–∏—Ü–∏–µ–Ω—Ç—ã –±–µ—Ä–µ–º —Å —É–¥–µ–ª—å–Ω—ã–º –≤–µ—Å–æ–º
    indices = coefs.argsort()[::-1][:n] # –∏–Ω–¥–µ–∫—Å–∞–º–∏ –±–µ—Ä–µ–º —Ñ–∏—á–∏ (–∞—Ä–≥—É–º–µ–Ω—Ç—ã –∫–æ—ç—Ñ—Ñ–∏—Ü–∏–µ–Ω—Ç–æ–≤ —Ä–µ–≥—Ä–µ—Å—Å–∏–∏), –æ—Ç—Å–æ—Ä—Ç–∏—Ä–æ–≤–∞–Ω–Ω—ã–µ –ø–æ –≤–æ–∑—Ä–∞—Å—Ç–∞–Ω–∏—é, –ø–æ—ç—Ç–æ–º—É —Å—Ä–µ–∑ —Å –∫–æ–Ω—Ü–∞, –ø–æ—Å–ª–µ–¥–Ω–∏–µ n
    ax.barh(np.arange(n), coefs[indices], color='green')
    ax.set_yticks(np.arange(n))
    ax.set_yticklabels(features[indices])
    ax.invert_yaxis() # —Å–¥–µ–ª–∞–µ–º, —á—Ç–æ–±—ã –±–æ–ª–µ–µ –∑–Ω–∞—á–∏–º—ã–µ —Ñ–∏—á–∏ –±—ã–ª–∏ —Å–≤–µ—Ä—Ö—É
    plt.show()

In [None]:
plot_features(lr.coef_.mean(axis=0), X.columns)

### b. SVC

1. Train a `SVC` model, for the baseline model use parameters `kernel='linear'`, `probability=True`, `random_state=21`. 
2. Try different kernels, calculate the accuracies.
3. Draw a plot (`barh`) for the baseline model with top-10 most important features (absolute value) for the trained model for the linear kernel *

*By default SVC uses ‚Äúone vs one‚Äù strategy of the classification, thus in `coef_` it returns a matrix. To calculate importance for a feature you need to use [OneVsRestClassifier](https://scikit-learn.org/stable/modules/generated/sklearn.multiclass.OneVsRestClassifier.html) over the SVC and sum all the individual feature importances for all the target values.

SVS - –º–µ—Ç–æ–¥ –æ–ø–æ—Ä–Ω—ã—Ö –≤–µ–∫—Ç–æ—Ä–æ–≤.

http://datascientist.one/support-vector-machines/

–û–±—É—á–∏–º –º–æ–¥–µ–ª—å SVC, –∏—Å–ø–æ–ª—å–∑—É–π—è –ø–∞—Ä–∞–º–µ—Ç—Ä—ã —è–¥—Ä–∞ = '–ª–∏–Ω–µ–π–Ω—ã–π', –≤–µ—Ä–æ—è—Ç–Ω–æ—Å—Ç—å = –∏—Å—Ç–∏–Ω–∞, random_state = 21

In [None]:
svc = SVC(kernel='linear', probability=True, random_state=21)
svc.fit(X, y)
y_svc = svc.predict(X)

In [None]:
accuracy_score(y, y_svc)

–ù–∞—Ä–∏—Å—É–π–µ–º –≥—Ä–∞—Ñ–∏–∫ –¥–ª—è –±–∞–∑–æ–≤–æ–π –º–æ–¥–µ–ª–∏ —Å 10 –Ω–∞–∏–±–æ–ª–µ–µ –≤–∞–∂–Ω—ã–º–∏ —Ñ—É–Ω–∫—Ü–∏—è–º–∏ (–∞–±—Å–æ–ª—é—Ç–Ω–æ–µ –∑–Ω–∞—á–µ–Ω–∏–µ –≤–µ—Å–∞) –¥–ª—è –æ–±—É—á–µ–Ω–Ω–æ–π –º–æ–¥–µ–ª–∏ –¥–ª—è –ª–∏–Ω–µ–π–Ω–æ–≥–æ —è–¥—Ä–∞

In [None]:
plot_features(svc.coef_.mean(axis=0), X.columns)

–ü—Ä–æ–±—É–µ–º —Ä–∞–∑–Ω—ã–µ —è–¥—Ä–∞, –ø–æ—Å—á–∏—Ç–∞–µ–º —Ç–æ—á–Ω–æ—Å—Ç—å.

In [None]:
svc = SVC(probability=True, random_state=21)
param_grid = {'kernel': ['linear', 'poly', 'rbf', 'sigmoid']}
gs = GridSearchCV(svc, param_grid, scoring='accuracy')
gs.fit(X, y)
gs.best_params_

–õ—É—á—à–∏–º —è–¥—Ä–æ–º –æ–∫–∞–∑–∞–ª–æ—Å—å poly

In [None]:
y_pred = gs.predict(X)
accuracy_score(y, y_pred)

### c. Decision tree

1. Train a `DecisionTreeClassifier` using for the baseline model `max_depth=4`, `random_state=21`. 
2. Try different values of `max_depth`, calculate the accuracies.
3. Draw a plot (`barh`) for the baseline model with top-10 most important features (absolute value) for the trained model using the written function.

–û–±—É—á–∏–º DecisionTreeClassifier, –∏—Å–ø–æ–ª—å–∑—É—è –¥–ª—è –±–∞–∑–æ–≤–æ–π –º–æ–¥–µ–ª–∏ max_depth=4, random_state=21.

In [None]:
dtc = DecisionTreeClassifier(max_depth=4, random_state=21)
dtc.fit(X, y)
y_dts = dtc.predict(X)
accuracy_score(y, y_dts)

–ü–æ–ø—Ä–æ–±—É–µ–º —Ä–∞–∑–Ω—ã–µ –∑–Ω–∞—á–µ–Ω–∏—è max_depth, —Ä–∞—Å—Å—á–∏—Ç–∞–µ–º —Ç–æ—á–Ω–æ—Å—Ç—å.

In [None]:
dtc = DecisionTreeClassifier(max_depth=6, random_state=21)
dtc.fit(X, y)
y_dts = dtc.predict(X)
accuracy_score(y, y_dts)

In [None]:
dtc = DecisionTreeClassifier(max_depth=8, random_state=21)
dtc.fit(X, y)
y_dts = dtc.predict(X)
accuracy_score(y, y_dts)

In [None]:
dtc = DecisionTreeClassifier(max_depth=10, random_state=21)
dtc.fit(X, y)
y_dts = dtc.predict(X)
accuracy_score(y, y_dts)

In [None]:
dtc = DecisionTreeClassifier(max_depth=12, random_state=21)
dtc.fit(X, y)
y_dts = dtc.predict(X)
accuracy_score(y, y_dts)

In [None]:
dtc = DecisionTreeClassifier(max_depth=28, random_state=21)
dtc.fit(X, y)
y_dts = dtc.predict(X)
accuracy_score(y, y_dts)

–ü—Ä–∏ —É–≤–µ–ª–∏—á–µ–Ω–∏–∏ –≥–ª—É–±–∏–Ω—ã —Ç–æ—á–Ω–æ—Å—Ç—å —É–≤–µ–ª–∏—á–∏–≤–∞–µ—Ç—Å—è. –ü—Ä–∏ –≥–ª—É–±–∏–Ω–µ 28 –¥–æ—Å—Ç–∏–≥–∞–µ—Ç—Å—è 1. –°–∏—Å—Ç–µ–º–∞ —Å—Ç–∞–Ω–æ–≤–∏—Ç—Å—è –ø–µ—Ä–µ–æ–±—É—á–µ–Ω–Ω–æ–π.

–ù–∞—Ä–∏—Å—É–µ–º –≥—Ä–∞—Ñ–∏–∫ –¥–ª—è –±–∞–∑–æ–≤–æ–π –º–æ–¥–µ–ª–∏ —Å 10 –Ω–∞–∏–±–æ–ª–µ–µ –≤–∞–∂–Ω—ã–º–∏ —Ñ–∏—á–∞–º–∏ (–ø–æ –∞–±—Å–æ–ª—é—Ç–Ω–æ–º—É –∑–Ω–∞—á–µ–Ω–∏—é) –¥–ª—è –æ–±—É—á–µ–Ω–Ω–æ–π –º–æ–¥–µ–ª–∏, –∏—Å–ø–æ–ª—å–∑—É—è –Ω–∞–ø–∏—Å–∞–Ω–Ω—É—é —Ñ—É–Ω–∫—Ü–∏—é.

In [None]:
dtc = DecisionTreeClassifier(random_state=21)
param_grid = {'max_depth': [3, 5, 7, 10, 20, 28]}
gs = GridSearchCV(dtc, param_grid, scoring='accuracy', n_jobs=-1)
gs.fit(X, y)

In [None]:
gs.best_params_

In [None]:
y_pred = gs.predict(X)
accuracy_score(y, y_pred)

In [None]:
plot_features(gs.best_estimator_.feature_importances_, X.columns)

### d. Random forest

In real life forest is a set of trees. The same thing is with machine learning. Random forest is a set of individual decision trees (check the documentation for more details).

1. Train a `RandomForestClassifier` using for the baseline model parameters `n_estimators=100`, `max_depth = 25`, `random_state=21`. 
2. Try different values of `max_depth` and `n_estimators`, calculate the accuracies.
3. Draw a plot (`barh`) for the baseline model with top-10 most important features (absolute value) for the trained model using the written function.

–û–±—É—á–∏–º RandomForestClassifier, –∏—Å–ø–æ–ª—å–∑—É—è –ø–∞—Ä–∞–º–µ—Ç—Ä—ã –±–∞–∑–æ–≤–æ–π –º–æ–¥–µ–ª–∏ n_estimators=100, max_depth = 25, random_state=21.

In [None]:
rfm = RandomForestClassifier(n_estimators=100,
                             max_depth=25,
                             random_state=21)
rfm.fit(X, y) # –æ–±—É—á–µ–Ω–∏–µ
y_rfm = rfm.predict(X) # –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ

accuracy_score(y, y_rfm)

–ü–æ–ø—Ä–æ–±—É–µ–º —Ä–∞–∑–Ω—ã–µ –∑–Ω–∞—á–µ–Ω–∏—è max_depth –∏ n_estimators, —Ä–∞—Å—Å—á–∏—Ç–∞–µ–º —Ç–æ—á–Ω–æ—Å—Ç—å.

In [None]:
rfm = RandomForestClassifier(n_estimators=10,
                             max_depth=25,
                             random_state=21)
rfm.fit(X, y) # –æ–±—É—á–µ–Ω–∏–µ
y_rfm = rfm.predict(X) # –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ

accuracy_score(y, y_rfm)

In [None]:
rfm = RandomForestClassifier(n_estimators=100,
                             max_depth=20,
                             random_state=21)
rfm.fit(X, y) # –æ–±—É—á–µ–Ω–∏–µ
y_rfm = rfm.predict(X) # –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ

accuracy_score(y, y_rfm)

–ù–∞—Ä–∏—Å—É–µ–º –≥—Ä–∞—Ñ–∏–∫ –¥–ª—è –±–∞–∑–æ–≤–æ–π –º–æ–¥–µ–ª–∏ —Å 10 –Ω–∞–∏–±–æ–ª–µ–µ –≤–∞–∂–Ω—ã–º–∏ —Ñ—É–Ω–∫—Ü–∏—è–º–∏ (–ø–æ –∞–±—Å–æ–ª—é—Ç–Ω–æ–º—É –∑–Ω–∞—á–µ–Ω–∏—é) –¥–ª—è –æ–±—É—á–µ–Ω–Ω–æ–π –º–æ–¥–µ–ª–∏, –∏—Å–ø–æ–ª—å–∑—É—è –Ω–∞–ø–∏—Å–∞–Ω–Ω—É—é —Ñ—É–Ω–∫—Ü–∏—é.

In [None]:
rfm = RandomForestClassifier(random_state=21)
param_grid = {'n_estimators': [100, 200, 300],
              'max_depth': [3, 5, 7, 10, 20, 25, 30]}
gs = GridSearchCV(rfm, param_grid, scoring='accuracy', n_jobs=-1)
gs.fit(X, y)

In [None]:
gs.best_params_

In [None]:
plot_features(gs.best_estimator_.feature_importances_, X.columns)