In [1]:
import sys
import os

# Use os.getcwd() to get the current working directory in a Jupyter notebook
parent_dir = os.path.dirname(os.getcwd())  # Get the parent directory of the current working directory
sys.path.append(parent_dir)

In [2]:
import pandas as pd
from skopt.space import Real, Integer
from driver import Driver

# Load Cleaned Master MIMIC-IV Dataset

In [3]:
# Cleaned Master MIMIC Data Set
df_master = pd.read_pickle('../MIMIC_IV/df_ckd_master_clean.pkl')
df_base = pd.read_pickle('../MIMIC_IV/df_ckd_base.pkl')

# Evaluate KNN Pipeline

In [4]:
KNN_cfg = {
    'tag': 'knn', # Identifier tag for feature selection model
    'response': 'stage_delta',  # Response variable for binary classification and Cox Proportional Hazards
    'duration': 'duration', # Duration in days until response or censorship
    'n_novel': 40, # Number of ranked novel features included in augmented KFRE dataset
    'test_size': 0.1, # Ratio of data reserved for testing
    'val_size': 0.2, # Ratio of data reserved for testing in validation sets
    'n_valsets': 5, # Number of training validation sets
    'n_bayesian': 10, # Number of Bayesian hyperparameter optimization iterations 
    'n_folds': 5, # Number of folds for CoxPH cross validation
    'penalizer': 0.0007, # CoxPH model penalizer
    'auc_space': [
        Integer(1, 50, name="n_neighbors")
    ], # Search space for Bayesian hyperparameter optimization
    'random_state': 42 # Seed for reproducibility
}

driver = Driver(KNN_cfg, df_master, df_base)
driver.ClassicalLearningPipe()

Testing params: {'n_neighbors': 40}
AUC for params: 0.4987792206142424
Testing params: {'n_neighbors': 10}
AUC for params: 0.5116053635187476
Testing params: {'n_neighbors': 39}
AUC for params: 0.49737141119820427
Testing params: {'n_neighbors': 30}
AUC for params: 0.4956053596185189
Testing params: {'n_neighbors': 23}
AUC for params: 0.5011950134775687
Testing params: {'n_neighbors': 6}
AUC for params: 0.5123235386200508
Testing params: {'n_neighbors': 24}
AUC for params: 0.4992356361273068
Testing params: {'n_neighbors': 17}
AUC for params: 0.5075243252447466
Testing params: {'n_neighbors': 8}
AUC for params: 0.5070314457424231
Testing params: {'n_neighbors': 33}
AUC for params: 0.49643261808297723
Best parameters found:  {'n_neighbors': 6}
Best average AUC across validation sets:  0.5123235386200508
Test AUC: 0.5


Using 12610 background data samples could cause slower run times. Consider using shap.sample(data, K) or shap.kmeans(data, K) to summarize the background as K samples.


  0%|          | 0/1402 [00:00<?, ?it/s]

KeyboardInterrupt: 