In [1]:
from sklearn.datasets import load_breast_cancer

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [14]:
# Breast Cancer dataset
bc = load_breast_cancer()

In [15]:
bc.feature_names

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error', 'fractal dimension error',
       'worst radius', 'worst texture', 'worst perimeter', 'worst area',
       'worst smoothness', 'worst compactness', 'worst concavity',
       'worst concave points', 'worst symmetry', 'worst fractal dimension'],
      dtype='<U23')

In [23]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    bc.data, bc.target, test_size=0.33, random_state=0)

In [24]:
from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators=500, max_features=10,
                                 max_depth=6, max_leaf_nodes=20,
                                 
                                 n_jobs=-1, random_state=73)
rnd_clf.fit(X_train, y_train)

# Feature Importance
for feature, score in zip(bc['feature_names'], rnd_clf.feature_importances_):
    print(feature, score)

mean radius 0.00886435164576
mean texture 0.0134590040904
mean perimeter 0.0250307027928
mean area 0.0107840940594
mean smoothness 0.00328170016452
mean compactness 0.00392455874327
mean concavity 0.057874746527
mean concave points 0.202834215946
mean symmetry 0.00367935683118
mean fractal dimension 0.00238808163768
radius error 0.00731112409401
texture error 0.00362972829377
perimeter error 0.00587011307935
area error 0.0270768687283
smoothness error 0.00322745535683
compactness error 0.0034760681177
concavity error 0.00483294386317
concave points error 0.00378381284719
symmetry error 0.00282385720463
fractal dimension error 0.00481063064988
worst radius 0.0711697748017
worst texture 0.0144591769076
worst perimeter 0.154519678422
worst area 0.0782490763347
worst smoothness 0.00719256967885
worst compactness 0.0138594891733
worst concavity 0.0207914873455
worst concave points 0.225555179618
worst symmetry 0.0105827152142
worst fractal dimension 0.0046574378308


In [30]:
rnd_clf.fit(X_train, y_train)
y_pred = y_test

y_pred_rf = rnd_clf.predict(X_test)
np.sum(y_pred == y_pred_rf) / len(y_pred)

0.95744680851063835

In [31]:
from sklearn.metrics import accuracy_score
accuracy_score(y_pred, y_pred_rf)

0.95744680851063835