# Decision Tree
- https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html
- https://github.com/rickiepark/introduction_to_ml_with_python/blob/master/02-supervised-learning.ipynb

## 필요한 라이브러리 임포트

In [None]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree


## 데이터셋 준비

In [None]:

cancer = load_breast_cancer()


In [None]:
print("암 데이터의 형태:", cancer.data.shape)

In [None]:
print("클래스별 샘플 갯수:\n",
      {n: v for n, v in zip(cancer.target_names, np.bincount(cancer.target))})

In [None]:
print("특성 이름:\n", cancer.feature_names)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    cancer.data, cancer.target, stratify=cancer.target, random_state=42)


## 기본 파라미터로 훈련

In [None]:
tree = DecisionTreeClassifier(random_state=0)
tree.fit(X_train, y_train)
print("훈련 세트 정확도: {:.3f}".format(tree.score(X_train, y_train)))
print("테스트 세트 정확도: {:.3f}".format(tree.score(X_test, y_test)))

## 최고 깊이 조정 (사전 가지치기)

In [None]:
tree = DecisionTreeClassifier(max_depth=4, random_state=0)
tree.fit(X_train, y_train)

print("훈련 세트 정확도: {:.3f}".format(tree.score(X_train, y_train)))
print("테스트 세트 정확도: {:.3f}".format(tree.score(X_test, y_test)))

## 훈련된 트리 시각화

In [None]:
plt.figure(figsize=(20,10))
plot_tree(tree, class_names=["malignant", "benign"], feature_names=cancer.feature_names,
         impurity=False, filled=True, rounded=True, fontsize=10)
plt.show()

## 특성 중요도 확인

In [None]:
def plot_feature_importances_cancer(model):
    plt.figure(figsize=(10,8))
    n_features = cancer.data.shape[1]
    plt.barh(np.arange(n_features), model.feature_importances_, align='center')
    plt.yticks(np.arange(n_features), cancer.feature_names)
    plt.xlabel("Feature Importance")
    plt.ylabel("Feature")
    plt.ylim(-1, n_features)

plot_feature_importances_cancer(tree)

## Q. 가장 성능이 좋은 max_depth는?

In [None]:
#TODO for문을 활용해 테스트해보세요!
