## Mount Google Drive


In [4]:
import os
import sys
from google.colab import drive

# Change according to your google drive's file path
workdir = "/content/drive/MyDrive/Internship/Diabetes(NSTEMI)/Development"

try:
    # Mount Drive
    drive.mount('/content/drive', force_remount=True)

    # Change Current Directory
    os.chdir(workdir)
    print("Current Working Directory: ", os.getcwd())

except Exception as e:
    print("Error: Failed to Mount Google Drive : ", e)

Error: Failed to Mount Google Drive :  Error: credential propagation was unsuccessful


## Import Library

In [None]:
import sys
import os
import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.svm import SVC
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingGridSearchCV
from xgboost import XGBClassifier
from sklearn.ensemble import GradientBoostingClassifier

In [None]:
# Add the uitilities into path
project_root = os.path.abspath('./utilities')

if project_root not in sys.path:
    sys.path.append(project_root)

# Import evaluation from the evaluation.py
from evaluation import evaluate, nri_score


## Import and selected data

In [None]:
# Import Files
X_train = joblib.load('./split/X_train.pkl')
y_train = joblib.load('./split/y_train.pkl')
X_test = joblib.load('./split/X_test.pkl')
y_test = joblib.load('./split/y_test.pkl')
timi_nstemi_test = joblib.load('./split/timi_nstemi_test.pkl')

In [None]:
# Load Selected Features
selected_features = joblib.load('./selected_features/best_features.pkl')
print(selected_features)

['ptageatnotification', 'killipclass', 'crenal', 'cardiaccath', 'pci', 'bb', 'acei', 'arb', 'calcantagonist', 'bpsys', 'bpdias', 'ck', 'tc', 'ldlc', 'tg', 'fbg']


In [None]:
# Selected Features
X_train = X_train[selected_features]
X_test = X_test[selected_features]

In [None]:
# NRI Testing: NA value will be excluded during evaluation will not be counted for improvement #(if for AMI need combine as one person only will hv score in STEMI/NSTEMI)
timi_score_test = timi_nstemi_test

## Logistic Regression

In [None]:
# Fixing label type error
y_train = y_train.astype(int)
y_test = y_test.astype(int)

In [None]:
# Logistic Regression
lr = LogisticRegression(random_state = 88)

# grid search
param_grid = {
    'penalty': ['l1', 'l2'],
    'C': [0.4, 0.5, 0.6],
    'class_weight': ['balanced'], # adjust for class imbalance automatically
    'solver': ['liblinear'],
    'max_iter': [5, 6, 7, 8, 9, 10] # will selects the combination that produces the best ROC-AUC
}

grid_search_lr = GridSearchCV(
    estimator=lr, # model you want to optimize
    param_grid=param_grid, #  dictionary of parameters to try
    cv=5, # 5-fold cross-validation
    scoring='roc_auc', # evaluate each parameter combination based on ROC-AUC
    n_jobs=-1, #  use all available CPU cores to train as many combinations simultaneously
    verbose=2 # print detailed progress logs
)

grid_search_lr.fit(X_train, y_train)

print("Best Parameters for Logistic Regression:")
print(grid_search_lr.best_params_)

best_lr = grid_search_lr.best_estimator_

Fitting 5 folds for each of 36 candidates, totalling 180 fits
Best Parameters for Logistic Regression:
{'C': 0.4, 'class_weight': 'balanced', 'max_iter': 7, 'penalty': 'l1', 'solver': 'liblinear'}




In [None]:
# Predict probabilities (Training)
y_train_probs = best_lr.predict_proba(X_train)[:, 1] # # [:, 1]: returns only probabilities for death

# Predict probabilities (Testing)
y_prob = best_lr.predict_proba(X_test)[:, 1]

# convert missing value to -1
old_preds = timi_score_test.apply(lambda x: 1 if x >= 4 else (0 if pd.notna(x) else -1)) # TIMI score ≥ 4 → 1 (death), not NaN & < 4 → 0 (alive), NaN → -1 (missing label)

# Evaluate model and get threshold
best_threshold, metrics = evaluate(y_test, y_prob, y_train=y_train, y_train_pred_probs=y_train_probs)

# Use threshold to binarize the new model prediction
new_preds = (y_prob > best_threshold).astype(int)

# Use nri_score to compare with old predictions
nri_value = nri_score(y_test, old_preds, new_preds)


🔹 Threshold Used (Youden's J): 0.49
🧪 AUC-ROC Score (Train): 0.8253
✔️ AUC-ROC Score (Test): 0.8042 (95% CI: 0.7741 - 0.8317)
✔️ Accuracy: 0.7539
✔️ Balanced Accuracy: 0.7394
✔️ Precision: 0.2586
✔️ Recall (Sensitivity): 0.7211
✔️ Specificity: 0.7577
✔️ F1-score: 0.3807
✔️ F2-Score: 0.5311
✔️ Brier Score: 0.1655

Confusion Matrix:
 [[1623  519]
 [  70  181]]

Classification Report:
               precision    recall  f1-score   support

           0     0.9587    0.7577    0.8464      2142
           1     0.2586    0.7211    0.3807       251

    accuracy                         0.7539      2393
   macro avg     0.6086    0.7394    0.6135      2393
weighted avg     0.8852    0.7539    0.7976      2393

🧹 Removed 5 rows with Old Model Prediction = -1 (missing values).

🔹 Reclassification Summary:
✔️ Correctly Upgraded (0 → 1, actual = 1): 101
❌ Incorrectly Upgraded (0 → 1, actual = 0): 303
✔️ Correctly Downgraded (1 → 0, actual = 0): 578
❌ Incorrectly Downgraded (1 → 0, actual = 1): 3

In [None]:
# Save the best_lr model
joblib.dump(best_lr, './models/logistic_regression.pkl')

['./models/logistic_regression.pkl']

## Random Forest

In [None]:
# Random Forest
rf = RandomForestClassifier(random_state=88)

# Grid search for Random Forest
param_grid_rf = {
    'n_estimators': [800, 900, 1000], # number of trees
    'max_depth': [5, 8, 10], # maximum depth of each tree
    'min_samples_split': [12, 15, 20], # minimum number of samples needed to split a node
    'min_samples_leaf': [1, 5, 10], # minimum number of samples that a leaf node must have
    'class_weight': ['balanced'] # adjust for class imbalance automatically
}

grid_search_rf = GridSearchCV(
    estimator=rf,
    param_grid=param_grid_rf,
    cv=5,
    scoring='roc_auc',
    n_jobs=-1,
    verbose=1 # show basic progress messages
)

grid_search_rf.fit(X_train, y_train)

print("Best Parameters for Random Forest:")
print(grid_search_rf.best_params_)

best_rf = grid_search_rf.best_estimator_

Fitting 5 folds for each of 81 candidates, totalling 405 fits
Best Parameters for Random Forest:
{'class_weight': 'balanced', 'max_depth': 5, 'min_samples_leaf': 5, 'min_samples_split': 12, 'n_estimators': 1000}


In [None]:
# Predict probabilities (Training)
y_train_probs = best_rf.predict_proba(X_train)[:, 1]

# Predict probabilities (Testing)
y_prob = best_rf.predict_proba(X_test)[:, 1]

# convert missing value to -1
old_preds = timi_score_test.apply(lambda x: 1 if x >= 4 else (0 if pd.notna(x) else -1))

# Evaluate model and get threshold
best_threshold, metrics = evaluate(y_test, y_prob, y_train=y_train, y_train_pred_probs=y_train_probs)

# Use threshold to binarize the new model prediction
new_preds = (y_prob > best_threshold).astype(int)

# Use nri_score to compare with old predictions
nri_value = nri_score(y_test, old_preds, new_preds)


🔹 Threshold Used (Youden's J): 0.41
🧪 AUC-ROC Score (Train): 0.8778
✔️ AUC-ROC Score (Test): 0.8239 (95% CI: 0.7972 - 0.8488)
✔️ Accuracy: 0.7179
✔️ Balanced Accuracy: 0.7492
✔️ Precision: 0.2415
✔️ Recall (Sensitivity): 0.7888
✔️ Specificity: 0.7096
✔️ F1-score: 0.3697
✔️ F2-Score: 0.5428
✔️ Brier Score: 0.1526

Confusion Matrix:
 [[1520  622]
 [  53  198]]

Classification Report:
               precision    recall  f1-score   support

           0     0.9663    0.7096    0.8183      2142
           1     0.2415    0.7888    0.3697       251

    accuracy                         0.7179      2393
   macro avg     0.6039    0.7492    0.5940      2393
weighted avg     0.8903    0.7179    0.7713      2393

🧹 Removed 5 rows with Old Model Prediction = -1 (missing values).

🔹 Reclassification Summary:
✔️ Correctly Upgraded (0 → 1, actual = 1): 110
❌ Incorrectly Upgraded (0 → 1, actual = 0): 381
✔️ Correctly Downgraded (1 → 0, actual = 0): 552
❌ Incorrectly Downgraded (1 → 0, actual = 1): 2

In [None]:
# Save the best_lr model
joblib.dump(best_rf, './models/random_forest.pkl')

['./models/random_forest.pkl']

## Linear SVC

In [3]:
# Define SVC with balanced class weights
svc = SVC(kernel='linear', class_weight='balanced', probability=True, random_state=88)

# Parameter grid
param_grid = {
    'C': [11, 13, 15], # how strict is the model
    'gamma': ['scale', 'auto', 0.0001, 0.00001], # controls how far the model looks around each point # small = model looks wider = smoother boundary # big = model looks closer = sharp decision lines
    'kernel': ['rbf', 'poly', 'sigmoid'] # determines the shape of the decision line
}

# Halving grid search
grid = HalvingGridSearchCV( # faster than GridSearchCV, used when have many parameter combinations
    estimator=svc,
    param_grid=param_grid,
    scoring='roc_auc',
    cv=5,
    factor=2, # At each iteration, only keep the top 50% of models (1/2)
    n_jobs=-1,
    verbose=1,
    random_state=42
)

# Fit
grid.fit(X_train, y_train)

# Output best params
print("Best Parameters for SVC:")
print(grid.best_params_)

# Get best model
best_svc_linear = grid.best_estimator_

NameError: name 'SVC' is not defined

In [None]:
# Predict probabilities (Training)
y_train_probs = best_svc_linear.predict_proba(X_train)[:, 1]

# Predict probabilities (Testing)
y_prob = best_svc_linear.predict_proba(X_test)[:, 1]

# convert missing value to -1
old_preds = timi_score_test.apply(lambda x: 1 if x >= 4 else (0 if pd.notna(x) else -1))

# Evaluate model and get threshold
best_threshold, metrics = evaluate(y_test, y_prob, y_train=y_train, y_train_pred_probs=y_train_probs)

# Use threshold to binarize the new model prediction
new_preds = (y_prob > best_threshold).astype(int)

# Use nri_score to compare with old predictions
nri_value = nri_score(y_test, old_preds, new_preds)


🔹 Threshold Used (Youden's J): 0.04
🧪 AUC-ROC Score (Train): 0.8189
✔️ AUC-ROC Score (Test): 0.8138 (95% CI: 0.7858 - 0.8406)
✔️ Accuracy: 0.7363
✔️ Balanced Accuracy: 0.7437
✔️ Precision: 0.2493
✔️ Recall (Sensitivity): 0.7530
✔️ Specificity: 0.7344
✔️ F1-score: 0.3746
✔️ F2-Score: 0.5363
✔️ Brier Score: 0.0849

Confusion Matrix:
 [[1573  569]
 [  62  189]]

Classification Report:
               precision    recall  f1-score   support

           0     0.9621    0.7344    0.8329      2142
           1     0.2493    0.7530    0.3746       251

    accuracy                         0.7363      2393
   macro avg     0.6057    0.7437    0.6038      2393
weighted avg     0.8873    0.7363    0.7849      2393

🧹 Removed 5 rows with Old Model Prediction = -1 (missing values).

🔹 Reclassification Summary:
✔️ Correctly Upgraded (0 → 1, actual = 1): 104
❌ Incorrectly Upgraded (0 → 1, actual = 0): 338
✔️ Correctly Downgraded (1 → 0, actual = 0): 564
❌ Incorrectly Downgraded (1 → 0, actual = 1): 3

In [None]:
# Save the best_lr model
joblib.dump(best_svc_linear, './models/linear_svc.pkl')

['./models/linear_svc.pkl']

## XGBoost

In [None]:
# Calculate scale_pos_weight from y_train
neg_count = np.sum(y_train == 0)
pos_count = np.sum(y_train == 1)
calculated_spw = neg_count / pos_count # weighting factor

In [None]:
# Convert all object columns to category (FOR XGBoost compatibility)
for col in X_train.columns:
    if X_train[col].dtype == 'object':
        X_train[col] = X_train[col].astype('category')
        X_test[col] = X_test[col].astype('category')

# XGBoost Classifier
xgb = XGBClassifier(
    use_label_encoder=False, # disables the old label encoder to convert class labels into numbers (0, 1)
    enable_categorical=True,
    eval_metric='logloss', # measures how well the predicted probabilities match the actual labels
    random_state=88
    )

# Grid search for XGBoost
param_grid_xgb = {
    'n_estimators': [400, 600, 800],
    'max_depth': [2, 3],
    'learning_rate': [0.001, 0.01],
    'subsample': [0.01, 0.1], # percentage of data used to grow each tree.
    'colsample_bytree': [0.3, 0.5], # fraction of features (columns) used in each tree
    'scale_pos_weight': [calculated_spw],  # class imbalance weight
    'reg_lambda': [1],
    'reg_alpha': [0, 1],             # L1 regularization
    'min_child_weight': [1]           # Minimum child node size
}


grid_search_xgb = GridSearchCV(
    estimator=xgb,
    param_grid=param_grid_xgb,
    cv=5,
    scoring='roc_auc',
    n_jobs=-1,
    verbose=2
)

# Object error fix
# X_train['ptsex'] = X_train['ptsex'].astype('category')
# X_test['ptsex'] = X_test['ptsex'].astype('category')

grid_search_xgb.fit(X_train, y_train)

print("Best Parameters for XGBoost:")
print(grid_search_xgb.best_params_)

best_xgb = grid_search_xgb.best_estimator_

Fitting 5 folds for each of 96 candidates, totalling 480 fits


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Best Parameters for XGBoost:
{'colsample_bytree': 0.5, 'learning_rate': 0.01, 'max_depth': 2, 'min_child_weight': 1, 'n_estimators': 400, 'reg_alpha': 0, 'reg_lambda': 1, 'scale_pos_weight': np.float64(21.228682170542637), 'subsample': 0.1}


In [None]:
# Predict probabilities (Training)
y_train_probs = best_xgb.predict_proba(X_train)[:, 1]

# Predict probabilities (Testing)
y_prob = best_xgb.predict_proba(X_test)[:, 1]

# convert missing value to -1
old_preds = timi_score_test.apply(lambda x: 1 if x >= 4 else (0 if pd.notna(x) else -1))

# Evaluate model and get threshold
best_threshold, metrics = evaluate(y_test, y_prob, y_train=y_train, y_train_pred_probs=y_train_probs)

# Use threshold to binarize the new model prediction
new_preds = (y_prob > best_threshold).astype(int)

# Use nri_score to compare with old predictions
nri_value = nri_score(y_test, old_preds, new_preds)


🔹 Threshold Used (Youden's J): 0.44
🧪 AUC-ROC Score (Train): 0.8419
✔️ AUC-ROC Score (Test): 0.8243 (95% CI: 0.7978 - 0.8483)
✔️ Accuracy: 0.7480
✔️ Balanced Accuracy: 0.7520
✔️ Precision: 0.2596
✔️ Recall (Sensitivity): 0.7570
✔️ Specificity: 0.7470
✔️ F1-score: 0.3866
✔️ F2-Score: 0.5472
✔️ Brier Score: 0.1503

Confusion Matrix:
 [[1600  542]
 [  61  190]]

Classification Report:
               precision    recall  f1-score   support

           0     0.9633    0.7470    0.8414      2142
           1     0.2596    0.7570    0.3866       251

    accuracy                         0.7480      2393
   macro avg     0.6114    0.7520    0.6140      2393
weighted avg     0.8895    0.7480    0.7937      2393

🧹 Removed 5 rows with Old Model Prediction = -1 (missing values).

🔹 Reclassification Summary:
✔️ Correctly Upgraded (0 → 1, actual = 1): 99
❌ Incorrectly Upgraded (0 → 1, actual = 0): 329
✔️ Correctly Downgraded (1 → 0, actual = 0): 582
❌ Incorrectly Downgraded (1 → 0, actual = 1): 25

In [None]:
# Save the best_lr model
joblib.dump(best_xgb, './models/xgboost.pkl')

['./models/xgboost.pkl']

## GBM

In [None]:
gbm = GradientBoostingClassifier(random_state=88)

# Step 2: Define hyperparameter grid
param_grid_gbm = {
    'n_estimators': [400],
    'learning_rate': [0.1],
    'max_depth': [1],
    'min_samples_split': [0.001, 0.01],
    'min_samples_leaf': [1],
    'subsample': [1]
}

# Step 3: Setup GridSearchCV
grid_search_gbm = GridSearchCV(
    estimator=gbm,
    param_grid=param_grid_gbm,
    cv=5,
    scoring='roc_auc',
    n_jobs=-1,
    verbose=1
)

# Step 4: Fit the model
grid_search_gbm.fit(X_train, y_train)

# Step 5: Output best parameters and model
print("Best Parameters for Gradient Boosting:")
print(grid_search_gbm.best_params_)

best_gbm = grid_search_gbm.best_estimator_

Fitting 5 folds for each of 2 candidates, totalling 10 fits
Best Parameters for Gradient Boosting:
{'learning_rate': 0.1, 'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 0.001, 'n_estimators': 400, 'subsample': 1}


In [None]:
# Predict probabilities (Training)
y_train_probs = best_gbm.predict_proba(X_train)[:, 1]

# Predict probabilities (Testing)
y_prob = best_gbm.predict_proba(X_test)[:, 1]

# convert missing value to -1
old_preds = timi_score_test.apply(lambda x: 1 if x >= 4 else (0 if pd.notna(x) else -1))

# Evaluate model and get threshold
best_threshold, metrics = evaluate(y_test, y_prob, y_train=y_train, y_train_pred_probs=y_train_probs)

# Use threshold to binarize the new model prediction
new_preds = (y_prob > best_threshold).astype(int)

# Use nri_score to compare with old predictions
nri_value = nri_score(y_test, old_preds, new_preds)


🔹 Threshold Used (Youden's J): 0.03
🧪 AUC-ROC Score (Train): 0.8485
✔️ AUC-ROC Score (Test): 0.8143 (95% CI: 0.7851 - 0.8399)
✔️ Accuracy: 0.6770
✔️ Balanced Accuracy: 0.7387
✔️ Precision: 0.2200
✔️ Recall (Sensitivity): 0.8167
✔️ Specificity: 0.6606
✔️ F1-score: 0.3466
✔️ F2-Score: 0.5294
✔️ Brier Score: 0.0855

Confusion Matrix:
 [[1415  727]
 [  46  205]]

Classification Report:
               precision    recall  f1-score   support

           0     0.9685    0.6606    0.7855      2142
           1     0.2200    0.8167    0.3466       251

    accuracy                         0.6770      2393
   macro avg     0.5942    0.7387    0.5660      2393
weighted avg     0.8900    0.6770    0.7394      2393

🧹 Removed 5 rows with Old Model Prediction = -1 (missing values).

🔹 Reclassification Summary:
✔️ Correctly Upgraded (0 → 1, actual = 1): 109
❌ Incorrectly Upgraded (0 → 1, actual = 0): 430
✔️ Correctly Downgraded (1 → 0, actual = 0): 498
❌ Incorrectly Downgraded (1 → 0, actual = 1): 2

In [None]:
# Save the model
joblib.dump(best_gbm, './models/gbm.pkl')

['./models/gbm.pkl']

## Stack All Models

In [None]:
# Load Models
xgboost_model = joblib.load('./models/xgboost.pkl')
logistic_regression_model = joblib.load('./models/logistic_regression.pkl')
random_forest_model = joblib.load('./models/random_forest.pkl')
svc_linear_model = joblib.load('./models/linear_svc.pkl')
gbm_model = joblib.load('./models/gbm.pkl')

In [None]:
for col in X_train.columns:
    if X_train[col].dtype == 'object':
        X_train[col] = X_train[col].astype('category')
        X_test[col] = X_test[col].astype('category')

In [None]:
# Base Model
estimators = [
    ('lr', logistic_regression_model),
    ('rf', random_forest_model),
    ('svc', svc_linear_model),
    ('xgb', xgboost_model),
    ('gbm', gbm_model),
]

# Ensure categorical data types
# X_train['ptsex'] = X_train['ptsex'].astype('category')
# X_test['ptsex'] = X_test['ptsex'].astype('category')

# === Step 3: Generate meta-features from pre-trained base models ===
print("Generating meta-features from base models...")
meta_features_train = np.column_stack([ # np.column_stack: combines all model outputs into one feature matrix
    model.predict_proba(X_train)[:, 1] for _, model in estimators #  returns the probability of the death in loop every model
])

meta_features_test = np.column_stack([
    model.predict_proba(X_test)[:, 1] for _, model in estimators
])


Generating meta-features from base models...


In [None]:
# LR Param Grid
param_grid = {
    'penalty': ['l1', 'l2'],
    'C': [1, 2, 5],
    'class_weight': ['balanced'],
    'solver': ['liblinear'],
    'max_iter': [5, 10, 15]
}

# Tune meta-learner using GridSearchC # train new model: Meta-Learner
meta_learner_gs = GridSearchCV(
    estimator=LogisticRegression(random_state=88),
    param_grid=param_grid,
    cv=5,
    scoring='roc_auc',
    n_jobs=-1
)

meta_learner_gs.fit(meta_features_train, y_train)
print("Best meta-learner parameters:", meta_learner_gs.best_params_)

# === Step 6: Retrieve the best meta-learner ===
best_meta_learner = meta_learner_gs.best_estimator_

# === Step 7: Construct StackingClassifier with pre-fit base models === # train stacking (need meta learner first)
stacking_model_prefit = StackingClassifier(
    estimators=estimators, #  list of pre-trained base models
    final_estimator=best_meta_learner,
    cv='prefit', # all models inside estimators are already trained
    stack_method='auto',
    n_jobs=-1,
    passthrough=False # only base models’ probability predictions are used by the meta-learner, no X_train
)

# === Step 8: Fit stacking classifier on full training data (required for final_estimator) ===
print(f"\nTraining the meta-learner using X_train (shape: {X_train.shape})...")
stacking_model_prefit.fit(X_train, y_train)
print("Meta-learner training complete.")




Best meta-learner parameters: {'C': 5, 'class_weight': 'balanced', 'max_iter': 5, 'penalty': 'l2', 'solver': 'liblinear'}

Training the meta-learner using X_train (shape: (11470, 16))...
Meta-learner training complete.




In [None]:
# Predict probabilities (Training)
y_train_probs = stacking_model_prefit.predict_proba(X_train)[:, 1]

# Predict probabilities (Testing)
y_prob = stacking_model_prefit.predict_proba(X_test)[:, 1]

# convert missing value to -1
old_preds = timi_score_test.apply(lambda x: 1 if x >= 4 else (0 if pd.notna(x) else -1))

# Evaluate model and get threshold
best_threshold, metrics = evaluate(y_test, y_prob, y_train=y_train, y_train_pred_probs=y_train_probs)

# Use threshold to binarize the new model prediction
new_preds = (y_prob > best_threshold).astype(int)

# Use nri_score to compare with old predictions
nri_value = nri_score(y_test, old_preds, new_preds)


🔹 Threshold Used (Youden's J): 0.32
🧪 AUC-ROC Score (Train): 0.8936
✔️ AUC-ROC Score (Test): 0.7954 (95% CI: 0.7657 - 0.8242)
✔️ Accuracy: 0.6862
✔️ Balanced Accuracy: 0.7262
✔️ Precision: 0.2191
✔️ Recall (Sensitivity): 0.7769
✔️ Specificity: 0.6755
✔️ F1-score: 0.3418
✔️ F2-Score: 0.5148
✔️ Brier Score: 0.1539

Confusion Matrix:
 [[1447  695]
 [  56  195]]

Classification Report:
               precision    recall  f1-score   support

           0     0.9627    0.6755    0.7940      2142
           1     0.2191    0.7769    0.3418       251

    accuracy                         0.6862      2393
   macro avg     0.5909    0.7262    0.5679      2393
weighted avg     0.8847    0.6862    0.7465      2393

🧹 Removed 5 rows with Old Model Prediction = -1 (missing values).

🔹 Reclassification Summary:
✔️ Correctly Upgraded (0 → 1, actual = 1): 108
❌ Incorrectly Upgraded (0 → 1, actual = 0): 425
✔️ Correctly Downgraded (1 → 0, actual = 0): 523
❌ Incorrectly Downgraded (1 → 0, actual = 1): 2

In [None]:
# Save the model
joblib.dump(stacking_model_prefit, './models/stacked_all.pkl')

['./models/stacked_all.pkl']

## Stack Ensemble Models

In [None]:
# Base Model
estimators = [
    # ('lr', logistic_regression_model),
    ('rf', random_forest_model),
    # ('svc', svc_linear_model),
    ('xgb', xgboost_model),
    ('gbm', gbm_model),
]

# Ensure categorical data types
# X_train['ptsex'] = X_train['ptsex'].astype('category')
# X_test['ptsex'] = X_test['ptsex'].astype('category')

# === Step 3: Generate meta-features from pre-trained base models ===
print("Generating meta-features from base models...")
meta_features_train = np.column_stack([
    model.predict_proba(X_train)[:, 1] for _, model in estimators
])

meta_features_test = np.column_stack([
    model.predict_proba(X_test)[:, 1] for _, model in estimators
])


Generating meta-features from base models...


In [None]:
# LR Param Grid # LR is default
param_grid = {
    'penalty': ['l1', 'l2'],
    'C': [20, 50],
    'class_weight': ['balanced'],
    'solver': ['liblinear'],
    'max_iter': [10,25,50,70,100]
}

# Tune meta-learner using GridSearchC
meta_learner_gs = GridSearchCV(
    estimator=LogisticRegression(random_state=88),
    param_grid=param_grid,
    cv=5,
    scoring='roc_auc',
    n_jobs=-1
)

meta_learner_gs.fit(meta_features_train, y_train)
print("Best meta-learner parameters:", meta_learner_gs.best_params_)

# === Step 6: Retrieve the best meta-learner ===
best_meta_learner = meta_learner_gs.best_estimator_

# === Step 7: Construct StackingClassifier with pre-fit base models ===
stacking_ensemble_model_prefit = StackingClassifier(
    estimators=estimators,
    final_estimator=best_meta_learner,
    cv='prefit',
    stack_method='auto',
    n_jobs=-1,
    passthrough=False
)

# === Step 8: Fit stacking classifier on full training data (required for final_estimator) ===
print(f"\nTraining the meta-learner using X_train (shape: {X_train.shape})...")
stacking_ensemble_model_prefit.fit(X_train, y_train)
print("Meta-learner training complete.")


Best meta-learner parameters: {'C': 50, 'class_weight': 'balanced', 'max_iter': 70, 'penalty': 'l1', 'solver': 'liblinear'}

Training the meta-learner using X_train (shape: (11470, 16))...
Meta-learner training complete.


In [None]:
# Predict probabilities (Training)
y_train_probs = stacking_ensemble_model_prefit.predict_proba(X_train)[:, 1]

# Predict probabilities (Testing)
y_prob = stacking_ensemble_model_prefit.predict_proba(X_test)[:, 1]

# convert missing value to -1
old_preds = timi_score_test.apply(lambda x: 1 if x >= 4 else (0 if pd.notna(x) else -1))

# Evaluate model and get threshold
best_threshold, metrics = evaluate(y_test, y_prob, y_train=y_train, y_train_pred_probs=y_train_probs)

# Use threshold to binarize the new model prediction
new_preds = (y_prob > best_threshold).astype(int)

# Use nri_score to compare with old predictions
nri_value = nri_score(y_test, old_preds, new_preds)


🔹 Threshold Used (Youden's J): 0.34
🧪 AUC-ROC Score (Train): 0.8908
✔️ AUC-ROC Score (Test): 0.8026 (95% CI: 0.7738 - 0.8294)
✔️ Accuracy: 0.7183
✔️ Balanced Accuracy: 0.7266
✔️ Precision: 0.2333
✔️ Recall (Sensitivity): 0.7371
✔️ Specificity: 0.7162
✔️ F1-score: 0.3544
✔️ F2-Score: 0.5147
✔️ Brier Score: 0.1522

Confusion Matrix:
 [[1534  608]
 [  66  185]]

Classification Report:
               precision    recall  f1-score   support

           0     0.9587    0.7162    0.8199      2142
           1     0.2333    0.7371    0.3544       251

    accuracy                         0.7183      2393
   macro avg     0.5960    0.7266    0.5871      2393
weighted avg     0.8827    0.7183    0.7711      2393

🧹 Removed 5 rows with Old Model Prediction = -1 (missing values).

🔹 Reclassification Summary:
✔️ Correctly Upgraded (0 → 1, actual = 1): 105
❌ Incorrectly Upgraded (0 → 1, actual = 0): 375
✔️ Correctly Downgraded (1 → 0, actual = 0): 560
❌ Incorrectly Downgraded (1 → 0, actual = 1): 3

In [None]:
# Save the model
joblib.dump(stacking_ensemble_model_prefit, './models/stacked_ensemble.pkl')

['./models/stacked_ensemble.pkl']