## 평가(분류)

In [2]:
# 데이터 로드
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer

def make_dataset():
    bc = load_breast_cancer()
    df = pd.DataFrame(bc.data, columns=bc.feature_names)
    df['target'] = bc.target
    X_train, X_test, y_train, y_test = train_test_split(
        df.drop('target', axis=1), df['target'], test_size=0.5, random_state=1004)
    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = make_dataset()
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((284, 30), (285, 30), (284,), (285,))

In [6]:
from xgboost import XGBClassifier

model = XGBClassifier(random_state=42)

model.fit(X_train, y_train)

pred = model.predict(X_test)

In [7]:
# accuracy
from sklearn.metrics import accuracy_score
accuracy_score(y_test, pred) 

0.9614035087719298

In [8]:
# precision
from sklearn.metrics import precision_score
precision_score(y_test, pred)

0.9431818181818182

In [9]:
# recall
from sklearn.metrics import recall_score
recall_score(y_test, pred)

0.9940119760479041

In [10]:
# f1
from sklearn.metrics import f1_score
f1_score(y_test, pred)

0.967930029154519

In [13]:
# roc_auc
from sklearn.metrics import roc_auc_score

pred_proba = model.predict_proba(X_test)
roc_auc_score(y_test, pred_proba[:, 1])

0.9831269664061707

In [14]:
# 과정 복습

# 데이터 로드
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer
def make_dataset():
    bc = load_breast_cancer()
    df = pd.DataFrame(bc.data, columns=bc.feature_names)
    df['target'] = bc.target
    X_train, X_test, y_train, y_test = train_test_split(
        df.drop('target', axis=1), df['target'], test_size=0.5, random_state=1004)
    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = make_dataset()

# 랜덤포레스트
from sklearn.ensemble import RandomForestClassifier
model1 = RandomForestClassifier(
    n_estimators=500, 
    max_depth=5, 
    random_state=0)
model1.fit(X_train, y_train)
pred1 = model1.predict_proba(X_test)
print(accuracy_score(y_test, pred1[:,1].round()))

# xgboost
from xgboost import XGBClassifier
model2 = XGBClassifier(
    n_estimators=500, 
    max_depth=5, 
    random_state=0,
    eval_metric='logloss',
    objective = 'binary:logistic',
    use_label_encoder=False)
model2.fit(X_train, y_train)
pred2 = model2.predict_proba(X_test)
print(accuracy_score(y_test, pred2[:,1].round()))

0.9473684210526315
0.9614035087719298


In [19]:
# Q. 위 랜덤포레스트, xgboost 모델을 앙상블하여 그 결과를 확인해보세요. 
# (각 모델의 가중치는 동일하게 해주세요.)

pred_ensemble = (pred1 + pred2) / 2
print(accuracy_score(y_test, pred_ensemble[:,1].round()))

0.9614035087719298
