In [None]:
import pandas as pd
from sklearn.metrics import (accuracy_score, recall_score, precision_score, 
                            f1_score, roc_auc_score, confusion_matrix)
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import random

random.seed(42)

np.random.seed(42)
def plot_confusion_matrix(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=['Survived', 'Died'], 
                yticklabels=['Survived', 'Died'])
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title('Confusion Matrix')
    plt.show()

def evaluate_model(y_true, y_pred, y_proba):
    metrics = {
        'Accuracy' : accuracy_score(y_true, y_pred),
        'Sensitivity (Recall)': recall_score(y_true, y_pred), 
        'Specificity': confusion_matrix(y_true, y_pred)[0,0] / (confusion_matrix(y_true, y_pred)[0,0] + confusion_matrix(y_true, y_pred)[0,1]),  # 特异度
        'Precision': precision_score(y_true, y_pred),
        'F1-Score': f1_score(y_true, y_pred),
        'AUC-ROC': roc_auc_score(y_true, y_proba)
    }
    return pd.DataFrame.from_dict(metrics, orient='index', columns=['Value'])

In [None]:
'''
Import dataset
'''
df = pd.read_csv('df_original.csv')
df_clean = pd.read_csv('df_clean.csv')

In [None]:
df_clean

In [None]:
'''
Drop id from dataset
'''
new_df = df_clean.drop(columns=['subject_id', 'hadm_id', 'stay_id'])

In [None]:
new_df

In [None]:
'''
Check class balance
'''
print(new_df['in_hospital_mortality'].value_counts()) 

## Do sampling for data imbalance

In [None]:
from imblearn.over_sampling import SMOTE

X = new_df.drop('in_hospital_mortality', axis=1)
y = new_df['in_hospital_mortality']

smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X, y)

## Standardalize

In [None]:
num_cols = X_res.keys()

In [None]:
num_cols

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_res[num_cols] = scaler.fit_transform(X_res[num_cols])

In [None]:
X_res

In [None]:
X_res['gender'] = X_res['gender'].apply(lambda x: 1 if x >= 0.5 else -1)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

X = X_res
y = y_res
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression

models = {
    "Random Forest": RandomForestClassifier(class_weight='balanced'),
    "XGBoost": XGBClassifier(scale_pos_weight=1),
    "Logistic Regression": LogisticRegression(class_weight='balanced')
}

In [None]:
from sklearn.model_selection import cross_val_score

for name, model in models.items():
    scores = cross_val_score(model, X_train, y_train, cv=10, scoring='roc_auc')
    print(f"{name} AUC: {scores.mean():.3f} (±{scores.std():.3f})")

In [None]:
model = XGBClassifier(
    scale_pos_weight=sum(y==0)/sum(y==1), 
    eval_metric='auc',
    n_estimators=200,
    max_depth=5,
    learning_rate=0.1
)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:,1]

print(evaluate_model(y_test,y_pred,y_proba))

pd.DataFrame({
    'feature': X.columns,
    'importance': model.feature_importances_
}).sort_values('importance', ascending=False)


In [None]:
plot_confusion_matrix(y_test, y_pred)

In [None]:
from sklearn.metrics import classification_report, roc_curve

print(classification_report(y_test, y_pred))
fpr, tpr, _ = roc_curve(y_test, y_proba)
plt.plot(fpr, tpr, label=f'AUC = {roc_auc_score(y_test, y_proba):.2f}')
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()

In [None]:
from sklearn.calibration import calibration_curve

# Compute calibration curve
prob_true, prob_pred = calibration_curve(y_test, y_proba, n_bins=10, strategy='quantile')

# Plot
plt.figure(figsize=(6, 6))
plt.plot(prob_pred, prob_true, marker='o', label='Model')
plt.plot([0, 1], [0, 1], linestyle='--', label='Perfectly calibrated')
plt.xlabel('Predicted probability')
plt.ylabel('Observed frequency')
plt.title('Calibration Plot')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
import shap

explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_res)
shap.summary_plot(shap_values, X_res)

In [None]:
from xgboost import XGBClassifier
from sklearn.model_selection import RandomizedSearchCV
import scipy.stats as stats

param_dist = {
    'eval_metric' : ['auc'],
    'n_estimators' : stats.randint(50,500),
    'learning_rate': stats.uniform(0.01, 0.3),
    'max_depth': stats.randint(3,13),
    'min_child_weight': stats.randint(1, 11),
    'subsample': stats.uniform(0.5, 0.5),
    'colsample_bytree': stats.uniform(0.5, 0.5),
    'gamma': stats.uniform(0, 0.5),
    'scale_pos_weight': [sum(y==0)/sum(y==1)] 
}

In [None]:
X_train

In [None]:
model = XGBClassifier(random_state=42)
search = RandomizedSearchCV(
    estimator=model,
    param_distributions=param_dist,
    n_iter=5000,
    scoring='roc_auc',
    cv=10,
    verbose=2,
    n_jobs=-1
)
search.fit(X_train, y_train)

In [None]:
best_model = search.best_estimator_
print("Best params:", search.best_params_)

In [None]:
y_pred = best_model.predict(X_test)
y_proba = best_model.predict_proba(X_test)[:, 1]

print(evaluate_model(y_test, y_pred, y_proba))

pd.DataFrame({
    'feature': X_train.columns,
    'importance': best_model.feature_importances_
}).sort_values('importance', ascending=False)

In [None]:
plot_confusion_matrix(y_test, y_pred)

In [None]:
from sklearn.metrics import classification_report, roc_curve

print(classification_report(y_test, y_pred))
fpr, tpr, _ = roc_curve(y_test, y_proba)
plt.plot(fpr, tpr, label=f'AUC = {roc_auc_score(y_test, y_proba):.2f}')
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()

In [None]:
pd.DataFrame({
    'Feature': X.columns,
    'Importance': best_model.feature_importances_
}).sort_values('Importance', ascending=False)

In [None]:
import shap
explainer = shap.TreeExplainer(best_model)
shap_values = explainer.shap_values(X_test)
shap.summary_plot(shap_values, X_test)

In [None]:
from sklearn.calibration import calibration_curve

# Compute calibration curve
prob_true, prob_pred = calibration_curve(y_test, y_proba, n_bins=10, strategy='quantile')

# Plot
plt.figure(figsize=(6, 6))
plt.plot(prob_pred, prob_true, marker='o', label='Model')
plt.plot([0, 1], [0, 1], linestyle='--', label='Perfectly calibrated')
plt.xlabel('Predicted probability')
plt.ylabel('Observed frequency')
plt.title('Calibration Plot')
plt.legend()
plt.grid(True)
plt.show()

# Fairness analysis

In [None]:
X_test_male = X_test[X_test['gender'] == 1]
y_test_male = y_test[X_test['gender'] == 1]

X_test_female = X_test[X_test['gender'] == -1]
y_test_female = y_test[X_test['gender'] == -1]

In [None]:
y_pred_male = best_model.predict(X_test_male)
y_pred_female = best_model.predict(X_test_female)

y_prob_male = best_model.predict_proba(X_test_male)[:, 1]
y_prob_female = best_model.predict_proba(X_test_female)[:, 1]

In [None]:
print("Male group performance:")
print(evaluate_model(y_test_male, y_pred_male, y_prob_male))

print("\nFemale group performance:")
print(evaluate_model(y_test_female, y_pred_female, y_prob_female))


In [None]:
'''
Demographic Parity:
DP: (TP + FP)/Total

Equality of Opportunity: (Recall) 
TPR: TP / (TP + FN)

Equality of Odds:
FNR: FN / (TP + FN)
FPR: FP / (FP + TN)
'''

from sklearn.metrics import confusion_matrix

print(f"F/M ratio: {X_train['gender'].value_counts()[-1]}/{X_train['gender'].value_counts()[1]}")

print("Male group:")
# print(evaluate_model(y_test_male, y_pred_male, y_prob_male))
tn, fp, fn, tp = confusion_matrix(y_test_male, y_pred_male).ravel()
total_samples = len(y_test_male)
dp = (tp + fp) / total_samples

print(f"Detection Prevalence (DP): {dp:.4f}")
print(f"Equality of Opportunity: (Recall) {tp/(tp+fn):.4f}")
print(f"Equiality of Odds:\n \
      FNR: {fn/(tp+fn):.4f}\n \
      FPR: {fp/(fp+tn):.4f} \
      ")



print("\nFemale group:")
# print(evaluate_model(y_test_female, y_pred_female, y_prob_female))
tn, fp, fn, tp = confusion_matrix(y_test_female, y_pred_female).ravel()


total_samples = len(y_test_female)
dp = (tp + fp) / total_samples

print(f"Detection Prevalence (DP): {dp:.4f}")
print(f"Equality of Opportunity: (Recall) {tp/(tp+fn):.4f}")
print(f"Equiality of Odds:\n \
      FNR: {fn/(tp+fn):.4f}\n \
      FPR: {fp/(fp+tn):.4f} \
      ")

# Ablation study 

In [None]:
X = new_df.drop('in_hospital_mortality', axis=1)
y = new_df['in_hospital_mortality']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [None]:
test_model = XGBClassifier(
    colsample_bytree=np.float64(0.7204789960803597), 
    eval_metric= 'auc', 
    gamma=np.float64(0.0028207188158109187), 
    learning_rate=np.float64(0.09624593233299222), 
    max_depth=10, 
    min_child_weight=1, 
    n_estimators=494, 
    scale_pos_weight=sum(y==0)/sum(y==1), 
    subsample=np.float64(0.8661020114176173)
)
test_model2 = XGBClassifier(
    colsample_bytree=np.float64(0.7204789960803597), 
    eval_metric= 'auc', 
    gamma=np.float64(0.0028207188158109187), 
    learning_rate=np.float64(0.09624593233299222), 
    max_depth=10, 
    min_child_weight=1, 
    n_estimators=494, 
    scale_pos_weight=1, 
    subsample=np.float64(0.8661020114176173)
)
test_model.fit(X_train,y_train)
test_model2.fit(X_train,y_train)

In [None]:
y_pred = test_model.predict(X_test)
y_proba = test_model.predict_proba(X_test)[:, 1]

print(evaluate_model(y_test, y_pred, y_proba))
Origin_AUC = roc_auc_score(y_test, y_proba)

pd.DataFrame({
    'feature': X.columns,
    'importance': test_model.feature_importances_
}).sort_values('importance', ascending=False)

In [None]:
y_pred = test_model2.predict(X_test)
y_proba = test_model2.predict_proba(X_test)[:, 1]

print(evaluate_model(y_test, y_pred, y_proba))

pd.DataFrame({
    'feature': X.columns,
    'importance': test_model2.feature_importances_
}).sort_values('importance', ascending=False)

In [None]:
X = X_res
y = y_res
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

best_model = XGBClassifier(
    colsample_bytree=np.float64(0.7204789960803597), 
    eval_metric= 'auc', 
    gamma=np.float64(0.0028207188158109187), 
    learning_rate=np.float64(0.09624593233299222), 
    max_depth=10, 
    min_child_weight=1, 
    n_estimators=494, 
    scale_pos_weight=1, 
    subsample=np.float64(0.8661020114176173)
)
best_model.fit(X_train,y_train)
y_pred = best_model.predict(X_test)
y_proba = best_model.predict_proba(X_test)[:, 1]

In [None]:
Origin_AUC = roc_auc_score(y_test, y_proba)
print(f"Original AUC = {Origin_AUC}")
all_col = X_train.columns
increase_col = []
decrease_col = []
for col in all_col:
    X_train_tmp = X_train.drop(col,axis=1)
    X_test_tmp = X_test.drop(col,axis=1)
    best_model = XGBClassifier(
        colsample_bytree=np.float64(0.7204789960803597), 
        eval_metric= 'auc', 
        gamma=np.float64(0.0028207188158109187), 
        learning_rate=np.float64(0.09624593233299222), 
        max_depth=10, 
        min_child_weight=1, 
        n_estimators=494, 
        scale_pos_weight=1, 
        subsample=np.float64(0.8661020114176173)
    )
    best_model.fit(X_train_tmp,y_train)
    y_pred = best_model.predict(X_test_tmp)
    y_proba = best_model.predict_proba(X_test_tmp)[:, 1]

    print(f"Drop {col}, AUC-ROC: {roc_auc_score(y_test, y_proba)}")
    if(Origin_AUC - roc_auc_score(y_test, y_proba)>0.0000000000001):
        decrease_col.append(col)
    else:
        increase_col.append(col)


In [None]:
print("Decrease:\n",decrease_col)
print("Increase:\n",increase_col)

In [None]:
X_train_final = X_train.drop(increase_col,axis=1)
X_test_final = X_test.drop(increase_col,axis=1)

In [None]:
best_model = XGBClassifier(
    colsample_bytree=np.float64(0.7204789960803597), 
    eval_metric= 'auc', 
    gamma=np.float64(0.0028207188158109187), 
    learning_rate=np.float64(0.09624593233299222), 
    max_depth=10, 
    min_child_weight=1, 
    n_estimators=494, 
    scale_pos_weight=1, 
    subsample=np.float64(0.8661020114176173)
)
best_model.fit(X_train_tmp,y_train)
y_pred = best_model.predict(X_test_tmp)
y_proba = best_model.predict_proba(X_test_tmp)[:, 1]

print(evaluate_model(y_test, y_pred,y_proba))

In [None]:
plot_confusion_matrix(y_test, y_pred)

In [None]:
from sklearn.metrics import classification_report, roc_curve

print(classification_report(y_test, y_pred))
fpr, tpr, _ = roc_curve(y_test, y_proba)
plt.plot(fpr, tpr, label=f'AUC = {roc_auc_score(y_test, y_proba):.2f}')
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()

In [None]:
from sklearn.calibration import calibration_curve

# Compute calibration curve
prob_true, prob_pred = calibration_curve(y_test, y_proba, n_bins=10, strategy='quantile')

# Plot
plt.figure(figsize=(6, 6))
plt.plot(prob_pred, prob_true, marker='o', label='Model')
plt.plot([0, 1], [0, 1], linestyle='--', label='Perfectly calibrated')
plt.xlabel('Predicted probability')
plt.ylabel('Observed frequency')
plt.title('Calibration Plot')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
import shap
explainer = shap.TreeExplainer(best_model)
shap_values = explainer.shap_values(X_test_tmp)
shap.summary_plot(shap_values, X_test_tmp)