In [None]:
import pickle
import os
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt

## Torch ##
import torch
print(torch.__version__)
from torch import nn, optim
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

## SHAP ##
import shap 

## Utils ##
from deeputils.DeepLearningArchitecture import DeepLearningArchitecture
from deeputils.feature_name_map import feature_name_dict
from deeputils.SHAP_Features import model_xai

from sklearn.model_selection import train_test_split

### Use best Device (CUDA vs CPU) ###
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
## Print Device Properties ##
if device == torch.device('cuda'):
    print( torch.cuda.get_device_properties( device ) )

### Seed ###
random_state = 1234
np.random.seed(random_state)
torch.manual_seed(random_state)
torch.cuda.manual_seed(random_state)
torch.cuda.manual_seed_all(random_state)
## CUDNN ##
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.deterministic = True

In [None]:
## Skip Completed Models ##
skip_complete=True
problem_id = 20
time_len = '1year'
loss_type = 'weighted_loss'

## Folder paths ##
raw_train_data = pd.read_csv('/home/wenqi/GL-SMART-aim2/pre-processed-data/pp_data_preop_{}.csv'.format(time_len))
feature_list = raw_train_data.columns[:172]
x = raw_train_data[feature_list]
x = x.drop('PID', axis='columns')
focus_id = problem_id + 201
focus_label = raw_train_data.columns[focus_id]
y = raw_train_data[focus_label]
criteria = [4, 4, 5, 5, 4, 5, 5, 5, 5, 5, 4, 4, 4, 5, 5, 4, 5, 4, 5, 4, 4, 5]
c = criteria[problem_id]
if c == 4:
    y[y<4] = 0
    y[y>=4] = 1
elif c == 5:
    y[y<4.5] = 0
    y[y>=4.5] = 1
results_folder = '/home/wenqi/GL-SMART-aim2/{}/deep-models-{}/{}/results/'.format(loss_type, time_len, focus_label)
results_file = '/home/wenqi/GL-SMART-aim2/{}/deep-models-{}/{}/results/Validation-80Trained_HPTuneSpreadsheet.csv'.format(loss_type, time_len, focus_label)
models_folder='/home/wenqi/GL-SMART-aim2/{}/deep-models-{}/{}/'.format(loss_type, time_len, focus_label)
figures_folder='/home/wenqi/GL-SMART-aim2/Figures/FeatureRanks/'

## File with Deep Learning models HP Tuning Information ##
hptune_csv = results_file
hptune_df = pd.read_csv(hptune_csv)

## Outcome variable ##
outcome_column='death90+vent'
## Feature Ranks (output) ##
feature_ranks_path = results_folder+'FeatureRanks_All.csv'

## SHAP Variables ##
top_n_features = 20

### Deep Learning Model Classifiers ###
model_type_list = [
    'relu',
    # 'softmax',
    # 'sigmoid',
    # 'gumbel_softmax',
]

## Dictionary with Feature Ranks ##
f_rankings_dict = {}

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=random_state)


In [None]:
## Iterate across model variants ##
for model_variant in model_type_list:
    print("Variant: %s"%model_variant)
    ## Variant Start Time ##
    variant_start = datetime.now()
    
    ## SHAP Results dictionary path ##
    output_path = results_folder+'FeatureRanks_'+model_variant+'.pkl'

    ## Check if Feature Ranks already exists for model ##
    # if(not os.path.exists( output_path ) or not skip_complete):
        ## Load Model Hyperparameters from HPTune dataframe ##
        # hyperparameters_of_interest = hptune_df[(hptune_df['penultimate_activation_type']==model_variant) & 
                                                # (hptune_df['optimized_model']==True) & 
                                                # (hptune_df['outcome_variable']==outcome_column)]
    ## Setup Hyperparameter Values ##
    dropout1_p = 0.0 #hyperparameters_of_interest['dropout1_p'].values[0]
    dropout2_p = 0.0 #hyperparameters_of_interest['dropout2_p'].values[0]
    dropout3_p = 0.0 #hyperparameters_of_interest['dropout3_p'].values[0]
    clustering_neurons = 128 #hyperparameters_of_interest['clustering_neurons'].values[0]
    learning_rate = 1e-4 #hyperparameters_of_interest['learning_rate'].values[0]
    outcome_variable = 'death90+vent'

    ## Model Name ##
    model_name = 'ALLFEATURES_ACTIVATION={}_CN={}_D1P={}_D2P={}_D3P={}_LR={}_OUTCOME={}'.format(model_variant, clustering_neurons, dropout1_p, dropout2_p, dropout3_p, learning_rate, outcome_variable)
    ## Model Load ##
    model = DeepLearningArchitecture(dropout1_p=dropout1_p, dropout2_p=dropout2_p, dropout3_p=dropout3_p,
                                                clustering_neurons=clustering_neurons,
                                                penultimate_activation_type=model_variant)
    model.load_state_dict(torch.load('{}{}_checkpoint.pth'.format(models_folder, model_name)))
    model.to(device)
    model.eval()

    ## Set SHAP Results ##
    f_rankings_dict[model_variant] = {}
    f_rankings_dict[model_variant]['ranks'], f_rankings_dict[model_variant]['shap_values'] = model_xai(model, x_train, x_train, 
                                                                                                        model_type=model_variant,
                                                                                                        feature_names=np.array(x_train.columns),
                                                                                                        top_n_features=top_n_features,
                                                                                                        savefigpath=figures_folder,
                                                                                                        random_state=random_state,
                                                                                                        device=device)

    ## Save Dictionary ##
    with open(output_path, 'wb') as fh:
        pickle.dump(f_rankings_dict[model_variant], fh)
# else:
    #     print('Exists: %s'% model_variant)
        
    ## Variant Time (minutes) ##
    variant_time= (datetime.now()-variant_start).total_seconds() / 60.
    print("Variant Time (mins): {:.2f}".format(variant_time) )