<a href="https://colab.research.google.com/github/vitaldb/examples/blob/master/predict_mortality.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#원내 사망 예측 모델
본 예제에서는 vitaldb 데이터셋으로부터 술 후 원내 사망을 예측하는 모델을 개발해 본다.

In [1]:
# 데이터를 읽고 입출력 변수를 지정
import pandas as pd
OUTCOME_VAR = 'death_inhosp'
INPUT_VARS = ['age', 'preop_hb', 'preop_alb', 'preop_ast', 'preop_cr']
df = pd.read_csv('https://api.vitaldb.net/cases')
df = df[df['asa'] < 6]
ntrain = int(len(df) * 0.7)
y_train = df.loc[:ntrain, OUTCOME_VAR]
x_train = df.loc[:ntrain, INPUT_VARS]
y_test = df.loc[ntrain:, OUTCOME_VAR]
x_test = df.loc[ntrain:, INPUT_VARS]
print('{}/{} train, {}/{} test'.format(sum(y_train), len(y_train), sum(y_test), len(y_test)))

37/4273 train, 18/1970 test


In [2]:
# ASA class
from sklearn.metrics import roc_auc_score, auc, precision_recall_curve
y_pred_asa = df.loc[ntrain:, 'asa']
auroc_asa = roc_auc_score(y_test, y_pred_asa)
prc_asa, rec_asa, thresholds = precision_recall_curve(y_test, y_pred_asa)
auprc_asa = auc(rec_asa, prc_asa)
print('ASA auroc: {:.3f}, auprc: {:.3f}'.format(auroc_asa, auprc_asa))

ASA auroc: 0.900, auprc: 0.335


In [3]:
# logistic regression
from sklearn.impute import SimpleImputer
imp = SimpleImputer().fit(x_train)
x_train_imputed = imp.transform(x_train)
x_test_imputed = imp.transform(x_test)
from sklearn.linear_model import LogisticRegression
model = LogisticRegression().fit(x_train_imputed, y_train)
y_pred_lr = model.predict_proba(x_test_imputed)[:, 1]
auroc_lr = roc_auc_score(y_test, y_pred_lr)
prc_lr, rec_lr, thresholds = precision_recall_curve(y_test, y_pred_lr)
auprc_lr = auc(rec_lr, prc_lr)
print('LR auroc: {:.3f}, auprc: {:.3f}'.format(auroc_lr, auprc_lr))

LR auroc: 0.947, auprc: 0.236


In [4]:
# gradient boosting machine
from xgboost import XGBClassifier
model = XGBClassifier(learning_rate=0.07, max_depth=4, n_estimators=50, subsample=0.8, colsample_bytree=0.8, use_label_encoder=False, eval_metric='logloss')
model.fit(x_train, y_train)
y_pred_gbm = model.predict_proba(x_test)[:, 1]
auroc_gbm = roc_auc_score(y_test, y_pred_gbm)
prc_gbm, rec_gbm, thresholds = precision_recall_curve(y_test, y_pred_gbm)
auprc_gbm = auc(rec_gbm, prc_gbm)
print('GBM auroc: {:.3f}, auprc: {:.3f}'.format(auroc_gbm, auprc_gbm))

GBM auroc: 0.964, auprc: 0.620
