In [1]:
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score

In [3]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import mean_squared_error

In [5]:
data = datasets.load_breast_cancer()
X = data.data
y = data.target

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [28]:
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)
clf

DecisionTreeClassifier()

In [29]:
y_pred = clf.predict(X_test)

In [30]:
print('Confusion Matrix')
print(confusion_matrix(y_test, y_pred))

Confusion Matrix
[[41  3]
 [ 4 66]]


In [31]:
print('Accuracy')
print(accuracy_score(y_test, y_pred, normalize=False))


Accuracy
107


In [32]:
print('Classification Report')
print(classification_report(y_test, y_pred))

Classification Report
              precision    recall  f1-score   support

           0       0.91      0.93      0.92        44
           1       0.96      0.94      0.95        70

    accuracy                           0.94       114
   macro avg       0.93      0.94      0.94       114
weighted avg       0.94      0.94      0.94       114



In [33]:
print('AUC')
print(roc_auc_score(y_test, y_pred))

AUC
0.9373376623376624


In [34]:
print('Mean Squared Error')
print(mean_squared_error(y_test, y_pred))

Mean Squared Error
0.06140350877192982


In [35]:
skf = StratifiedKFold(n_splits=10)
skf.get_n_splits(X, y)
print(skf)  

StratifiedKFold(n_splits=10, random_state=None, shuffle=False)


In [36]:
clf = DecisionTreeClassifier()
scores = cross_val_score(clf, X, y, cv=skf)
print('K Fold Cross Validation Score')
print(scores)
print("Average Accuracy")
print(scores.mean())

K Fold Cross Validation Score
[0.92982456 0.87719298 0.9122807  0.87719298 0.94736842 0.87719298
 0.85964912 0.94736842 0.92982456 0.94642857]
Average Accuracy
0.9104323308270675


In [46]:
skf_sh = StratifiedKFold(n_splits=10, shuffle=True)
skf_sh.get_n_splits(X, y)
print(skf_sh)  

StratifiedKFold(n_splits=10, random_state=None, shuffle=True)


In [47]:
clf = DecisionTreeClassifier()
scores = cross_val_score(clf, X, y, cv=skf_sh)
print('K Fold Cross Validation Score')
print(scores)
print("Average Accuracy")
print(scores.mean())

K Fold Cross Validation Score
[0.92982456 0.96491228 0.96491228 0.94736842 0.96491228 0.96491228
 0.87719298 0.89473684 0.98245614 0.92857143]
Average Accuracy
0.9419799498746867
