유방암 여부 판단
===============
------------------------------------------------

+ 의사결정나무, 랜덤포레스트, SVM, SGD, logistic regression 모델

-----------------------------

+ 모델 평가에서는 정확도 정밀도 재현율 F1-Score를 이용

+ 이중에서 재현율이 제일 중요하다고 생각합니다.

+ 실제 양성인 것들 중에 모델이 양성으로 올바르게 예측한 비율입니다. 암 환자를 놓치는 것을 최소화해야 하기 때문입니다.

In [4]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

In [5]:
breast_cancer = load_breast_cancer()
breast_cancer_data = breast_cancer.data
breast_cancer_label = breast_cancer.target

X_train, X_test, y_train, y_test = train_test_split(breast_cancer_data, breast_cancer_label, test_size=0.2, random_state=7)

In [6]:
#의사결정나무
from sklearn.tree import DecisionTreeClassifier 
decision_tree = DecisionTreeClassifier(random_state=32)
decision_tree.fit(X_train, y_train)
y_pred = decision_tree.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.92      0.82      0.87        40
           1       0.91      0.96      0.93        74

    accuracy                           0.91       114
   macro avg       0.91      0.89      0.90       114
weighted avg       0.91      0.91      0.91       114



In [7]:
#랜덤포레스트
from sklearn.ensemble import RandomForestClassifier
random_forest = RandomForestClassifier(random_state=32) 
random_forest.fit(X_train, y_train) 
y_pred = random_forest.predict(X_test) 

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       1.00      1.00      1.00        74

    accuracy                           1.00       114
   macro avg       1.00      1.00      1.00       114
weighted avg       1.00      1.00      1.00       114



In [8]:
#SVM모델
from sklearn import svm 
svm_model = svm.SVC()
svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.72      0.84        40
           1       0.87      1.00      0.93        74

    accuracy                           0.90       114
   macro avg       0.94      0.86      0.89       114
weighted avg       0.92      0.90      0.90       114



In [9]:
#SGD
from sklearn.linear_model import SGDClassifier 
sgd_model = SGDClassifier() 
sgd_model.fit(X_train, y_train)
y_pred = sgd_model.predict(X_test)
print(classification_report(y_test, y_pred))

print(sgd_model._estimator_type)

              precision    recall  f1-score   support

           0       1.00      0.50      0.67        40
           1       0.79      1.00      0.88        74

    accuracy                           0.82       114
   macro avg       0.89      0.75      0.77       114
weighted avg       0.86      0.82      0.81       114

classifier


In [13]:
#logistic regression
from sklearn.linear_model import LogisticRegression 
logistic_model = LogisticRegression(max_iter=10000) 
logistic_model.fit(X_train, y_train)
y_pred = logistic_model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.85      0.92        40
           1       0.93      1.00      0.96        74

    accuracy                           0.95       114
   macro avg       0.96      0.93      0.94       114
weighted avg       0.95      0.95      0.95       114



In [14]:
#평가

from sklearn.metrics import confusion_matrix

fake_pred = [0] * len(y_pred)

print(classification_report(y_test, y_pred))

print(classification_report(y_test, fake_pred, zero_division=0))

accuracy_score(y_test, y_pred), accuracy_score(y_test, fake_pred)

              precision    recall  f1-score   support

           0       1.00      0.85      0.92        40
           1       0.93      1.00      0.96        74

    accuracy                           0.95       114
   macro avg       0.96      0.93      0.94       114
weighted avg       0.95      0.95      0.95       114

              precision    recall  f1-score   support

           0       0.35      1.00      0.52        40
           1       0.00      0.00      0.00        74

    accuracy                           0.35       114
   macro avg       0.18      0.50      0.26       114
weighted avg       0.12      0.35      0.18       114



(0.9473684210526315, 0.3508771929824561)