# Decision Tree

In [1]:
# (1) 필요한 모듈 import
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

# (2) 데이터 준비
breast_cancer = load_breast_cancer()
breast_cancer_data = breast_cancer.data
breast_cancer_label = breast_cancer.target

# (3) train, test 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(breast_cancer_data, 
                                                    breast_cancer_label, 
                                                    test_size=0.2, 
                                                    random_state=7)

# (4) 모델 학습 및 예측
decision_tree = DecisionTreeClassifier(random_state=32)
decision_tree.fit(X_train, y_train)
y_pred = decision_tree.predict(X_test)

print(classification_report(y_test, y_pred))

# (5) 모델 평가
from sklearn.metrics import accuracy_score
accuracy=accuracy_score(y_test, y_pred)
from sklearn.metrics import recall_score
recall=recall_score(y_test, y_pred)
print("accuracy:", accuracy)
print("recall:", recall)

              precision    recall  f1-score   support

           0       0.92      0.82      0.87        40
           1       0.91      0.96      0.93        74

    accuracy                           0.91       114
   macro avg       0.91      0.89      0.90       114
weighted avg       0.91      0.91      0.91       114

accuracy: 0.9122807017543859
recall: 0.9594594594594594


# Random Forest

In [2]:
from sklearn.ensemble import RandomForestClassifier

X_train, X_test, y_train, y_test=train_test_split(breast_cancer_data, breast_cancer_label, test_size=0.2, random_state=21)

random_forest=RandomForestClassifier(random_state=32)
random_forest.fit(X_train, y_train)
y_pred=random_forest.predict(X_test)

print(classification_report(y_test, y_pred))

from sklearn.metrics import accuracy_score
accuracy=accuracy_score(y_test, y_pred)
from sklearn.metrics import recall_score
recall=recall_score(y_test, y_pred)
print("accuracy:", accuracy)
print("recall:", recall)

              precision    recall  f1-score   support

           0       0.97      0.95      0.96        39
           1       0.97      0.99      0.98        75

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114

accuracy: 0.9736842105263158
recall: 0.9866666666666667


# Support Vector Machine (SVM)

In [3]:
from sklearn import svm
svm_model=svm.SVC()

svm_model.fit(X_train, y_train)
y_pred=svm_model.predict(X_test)

print(classification_report(y_test, y_pred))

from sklearn.metrics import accuracy_score
accuracy=accuracy_score(y_test, y_pred)
from sklearn.metrics import recall_score
recall=recall_score(y_test, y_pred)
print("accuracy:", accuracy)
print("recall:", recall)

              precision    recall  f1-score   support

           0       0.93      0.72      0.81        39
           1       0.87      0.97      0.92        75

    accuracy                           0.89       114
   macro avg       0.90      0.85      0.86       114
weighted avg       0.89      0.89      0.88       114

accuracy: 0.8859649122807017
recall: 0.9733333333333334


# Stochastic Gradient Descent Classifier (SGDClassifier)

In [4]:
from sklearn.linear_model import SGDClassifier
sgd_model=SGDClassifier()

sgd_model.fit(X_train, y_train)
y_pred=sgd_model.predict(X_test)

print(classification_report(y_test, y_pred))

from sklearn.metrics import accuracy_score
accuracy=accuracy_score(y_test, y_pred)
from sklearn.metrics import recall_score
recall=recall_score(y_test, y_pred)
print("accuracy:", accuracy)
print("recall:", recall)

              precision    recall  f1-score   support

           0       1.00      0.64      0.78        39
           1       0.84      1.00      0.91        75

    accuracy                           0.88       114
   macro avg       0.92      0.82      0.85       114
weighted avg       0.90      0.88      0.87       114

accuracy: 0.8771929824561403
recall: 1.0


# Logistic Regression

In [8]:
from sklearn.linear_model import LogisticRegression
logistic_model=LogisticRegression(max_iter=5000) # logistic Regression max_iter

logistic_model.fit(X_train, y_train)
y_pred=logistic_model.predict(X_test)

print(classification_report(y_test, y_pred))

from sklearn.metrics import accuracy_score
accuracy=accuracy_score(y_test, y_pred)
from sklearn.metrics import recall_score
recall=recall_score(y_test, y_pred)
print("accuracy:", accuracy)
print("recall:", recall)

              precision    recall  f1-score   support

           0       0.92      0.87      0.89        39
           1       0.94      0.96      0.95        75

    accuracy                           0.93       114
   macro avg       0.93      0.92      0.92       114
weighted avg       0.93      0.93      0.93       114

accuracy: 0.9298245614035088
recall: 0.96
