### Imports 

In [21]:
import pandas as pd
import numpy as np
import pickle as pkl
import os
import virtual_biopsy_utils as vbu
import integration_images_features_utils as image_utils
import ast
import delong

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from sklearn.calibration import calibration_curve
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import roc_curve, auc, roc_auc_score, brier_score_loss, precision_score, recall_score
from sklearn.calibration import CalibratedClassifierCV, calibration_curve
from xgboost import XGBClassifier
import xgboost as xgb
from sklearn.model_selection import RandomizedSearchCV
import shap

### Data paths and filenames

In [22]:
x_train_path = '../pkls/x_train_ready_for_training.pkl'
y_train_path = '../pkls/y_train_ready_for_training.pkl'

x_val_path = '../pkls/x_val_ready_for_training.pkl'
y_val_path = '../pkls/y_val_ready_for_training.pkl'

x_test_path = '../pkls/x_test_ready_for_testing.pkl'
y_test_path = '../pkls/y_test_ready_for_testing.pkl'

shap_path = '../pkls/sentara_union_shap.pkl'

### Load pickles

In [23]:
x_train = pkl.load(open(x_train_path, 'rb'))
y_train = pkl.load(open(y_train_path, 'rb'))

x_val = pkl.load(open(x_val_path, 'rb'))
y_val = pkl.load(open(y_val_path, 'rb'))

x_test = pkl.load(open(x_test_path, 'rb'))
y_test = pkl.load(open(y_test_path, 'rb'))

shap_feats = pkl.load(open(shap_path, 'rb'))

### Define grid search for each model

In [4]:
# XGBoost

min_child_weight = [1, 2, 3, 5, 7]  
gamma= [0, 0.1, 0.2, 0.3, 0.4]
colsample_bytree= [0.3, 0.4, 0.5, 0.7, 1.0]
learning_rate= [0.05, 0.10, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40, 0.50]  
max_depth= [3, 4, 5, 6, 7, 8, 10] 

# Random Forest

n_estimators = [50, 100, 200, 300]
max_features = ['auto', 'sqrt']
min_samples_split = [2, 5, 10]

# Logistic Regression

C = [0.001, 0.01, 0.1, 1, 10]
tol = [1e-3, 1e-4, 1e-5]


grid_params_xgb = {'min_child_weight': min_child_weight,
                 'gamma': gamma,
                 'colsample_bytree': colsample_bytree,
                 'learning_rate': learning_rate,
                 'max_depth': max_depth}

grid_params_RF = {'clf__n_estimators': n_estimators,
                 'clf__max_features': max_features,
                 'clf__max_depth': max_depth,
                 'clf__min_samples_split': min_samples_split}


grid_params_lr = {'clf__estimator__C': C,
                 'clf__estimator__tol': tol}

### Define variables

In [5]:
n_iter = 10

classes = ['outcome_cancer_type_DCIS', 'outcome_cancer_type_Invasive', 'outcome_cancer_type_BenignHR',
           'outcome_cancer_type_Papilloma', 'outcome_cancer_type_Benign']

# annotations = [x for x in x_train if 'report' in x] + [x for x in x_train if 'calcification_in' in x] + \
#     [x for x in x_train if 'findings' in x] + \
#     ['Calcification', 'Breast Assymetry', 'Tumor', 'Architectural Distortion', 'Axillary lymphadenopathy']
    
predictions = [x for x in x_train if 'pred' in x]


## Main code - TRAIN + Grid Search

In [6]:
def train_model(algorithm, x_train, x_val, y_train, y_val, feature_set, n_iter ):
    
    if algorithm == 'xgboost':
        
        model = XGBClassifier( eval_metric = 'auc')
        
        PARAM_DIST = grid_params_xgb
        
    elif algorithm == 'RandomForest':
        
        model = Pipeline([
    ('imputation', SimpleImputer(missing_values = np.nan, strategy = 'mean')), 
    ('scaler', MinMaxScaler()), 
    ('clf', RandomForestClassifier(n_estimators = 100))
    ])
        PARAM_DIST = grid_params_RF
        
    elif algorithm == 'LogRes':
        
        model = Pipeline([
    ('imputation', SimpleImputer(missing_values = np.nan, strategy = 'mean')), 
    ('scaler', MinMaxScaler()), 
    ('clf', LogisticRegression())
    ])
        PARAM_DIST = grid_params_lr
        
    model_CV= RandomizedSearchCV(estimator = model,
                 param_distributions = PARAM_DIST,
                 scoring = 'roc_auc', n_iter = 30, 
                 cv=5, n_jobs = -1) 
    
    if feature_set == 'clinical':
        
        x_train.drop(columns=['study_id'] + predictions, inplace=True) 
        x_val.drop(columns=['study_id'] + predictions, inplace=True)
        x_val = x_val[x_train.columns]
        
    elif feature_set == 'shap':
        
        x_train = x_train[shap_feats]
        x_val = x_val[shap_feats]
        x_val = x_val[x_train.columns]
        
#     elif feature_set == 'annotations':

#         x_train = x_train [annotations]
#         x_val = x_val [annotations]
#         x_val = x_val[x_train.columns]
        
    elif feature_set == 'predictions':
        
        x_train = x_train [predictions]
        x_val = x_val [predictions]
        x_val = x_val[x_train.columns]
    
#     elif feature_set == 'all':
        
#         x_train = x_train[shap_feats + predictions + annotations] 
#         x_val = x_val[shap_feats + predictions + annotations] 
#         x_val = x_val[x_train.columns]
    
    elif feature_set == 'all':
        
        x_train = x_train[shap_feats + predictions] 
        x_val = x_val[shap_feats + predictions] 
        x_val = x_val[x_train.columns]
        
    stats_runs = {}
    stats_runs['AUC_mean'] = []
    stats_runs['AUC_CI1'] = []
    stats_runs['AUC_CI2'] = []
    stats_runs['probabilities'] = []


    for i in range(n_iter):

        print ('{}/{}\r'.format(i+1, n_iter), end = '', flush=True)

        for category in classes:

            print('**Processing class {} ...**'.format(category))

            model_CV.fit(x_train, y_train[category])
            y_pred = model_CV.predict(x_val)
            prob = model_CV.predict_proba(x_val)[:,1]


            stats_runs['AUC_mean'].append(roc_auc_score(y_val[category], 
                                            prob))
            stats_runs['AUC_CI1'].append(delong.get_delong_ci(prob, y_val[category])[0])
            stats_runs['AUC_CI2'].append(delong.get_delong_ci(prob, y_val[category])[1])                        
            stats_runs['probabilities'].append(prob)

            pkl.dump(model_CV.best_estimator_, 
            open('../pkls/cancer_prediction_pkls/'+str(algorithm)+'/'+str(feature_set)+'/model_' + str(category) + '_run_' + str(i) + '.pkl', 'wb')) 


    # Save probabilities
    np.savetxt('significance_tests/predict_probs_' +str(algorithm)+'_'+str(feature_set)+'.csv', stats_runs['probabilities'], delimiter=',')
        
    # Create output tables with AUCs in each run
    
    
    outputs = {'DCIS_AUC_mean': stats_runs['AUC_mean'][0::5],
          'DCIS_AUC_lowCI': stats_runs['AUC_CI1'][0::5],
          'DCIS_AUC_highCI': stats_runs['AUC_CI2'][0::5]}
    df_DCIS = pd.DataFrame(data=outputs)
    df_DCIS.to_csv('output_AUCs_all_models/'+str(algorithm)+'_DCIS_'+str(feature_set)+'.csv')
    
    outputs = {'Invasive_AUC_mean': stats_runs['AUC_mean'][1::5],
          'Invasive_AUC_lowCI': stats_runs['AUC_CI1'][1::5],
          'Invasive_AUC_highCI': stats_runs['AUC_CI2'][1::5]}
    df_Invasive = pd.DataFrame(data=outputs)
    df_Invasive.to_csv('output_AUCs_all_models/'+str(algorithm)+'_Invasive_'+str(feature_set)+'.csv')
    
    outputs = {'BenignHR_AUC_mean': stats_runs['AUC_mean'][2::5],
          'BenignHR_AUC_lowCI': stats_runs['AUC_CI1'][2::5],
          'BenignHR_AUC_highCI': stats_runs['AUC_CI2'][2::5]}
    df_BenignHR = pd.DataFrame(data=outputs)
    df_BenignHR.to_csv('output_AUCs_all_models/'+str(algorithm)+'_BenignHR_'+str(feature_set)+'.csv')
    
    outputs = {'Papilloma_AUC_mean': stats_runs['AUC_mean'][3::5],
              'Papilloma_AUC_lowCI': stats_runs['AUC_CI1'][3::5],
              'Papilloma_AUC_highCI': stats_runs['AUC_CI2'][3::5]}
    df_Papilloma = pd.DataFrame(data=outputs)
    df_Papilloma.to_csv('output_AUCs_all_models/'+str(algorithm)+'_Papilloma_'+str(feature_set)+'.csv')
    
    outputs = {'Benign_AUC_mean': stats_runs['AUC_mean'][4::5],
              'Benign_AUC_lowCI': stats_runs['AUC_CI1'][4::5],
              'Benign_AUC_highCI': stats_runs['AUC_CI2'][4::5]}
    df_Benign = pd.DataFrame(data=outputs)
    df_Benign.to_csv('output_AUCs_all_models/'+str(algorithm)+'_Benign_'+str(feature_set)+'.csv')

In [13]:
train_model(algorithm = 'xgboost', x_train = x_train, 
            x_val = x_val, y_train=y_train, y_val = y_val, feature_set = 'all', n_iter = 10)

**Processing class outcome_cancer_type_DCIS ...**
**Processing class outcome_cancer_type_Invasive ...**
**Processing class outcome_cancer_type_BenignHR ...**
**Processing class outcome_cancer_type_Papilloma ...**
**Processing class outcome_cancer_type_Benign ...**
**Processing class outcome_cancer_type_DCIS ...**
**Processing class outcome_cancer_type_Invasive ...**
**Processing class outcome_cancer_type_BenignHR ...**
**Processing class outcome_cancer_type_Papilloma ...**
**Processing class outcome_cancer_type_Benign ...**
**Processing class outcome_cancer_type_DCIS ...**
**Processing class outcome_cancer_type_Invasive ...**
**Processing class outcome_cancer_type_BenignHR ...**
**Processing class outcome_cancer_type_Papilloma ...**
**Processing class outcome_cancer_type_Benign ...**
**Processing class outcome_cancer_type_DCIS ...**
**Processing class outcome_cancer_type_Invasive ...**
**Processing class outcome_cancer_type_BenignHR ...**
**Processing class outcome_cancer_type_Papillom

# TEST

In [16]:
def test_model(algorithm, x_test, y_test, feature_set, n_iter=10):
    
    stats_runs = {}
    stats_runs['AUC_mean'] = []
    stats_runs['AUC_CI1'] = []
    stats_runs['AUC_CI2'] = []
    stats_runs['probabilities'] = []

    for category in classes:

        for i in range(n_iter):

            model_path = '../pkls/cancer_prediction_pkls/'+str(algorithm)+'/'+str(feature_set)+'/model_' + str(category) + '_run_' + str(i) + '.pkl'

            model = pkl.load(open(model_path, 'rb'))

            f = model.get_booster().feature_names
            x_test = x_test[f]
            y_pred = model.predict(x_test)
            prob = model.predict_proba(x_test)[:,1]

            stats_runs['AUC_mean'].append(roc_auc_score(y_test[category], 
                                            prob))
            stats_runs['AUC_CI1'].append(delong.get_delong_ci(prob, y_test[category])[0])
            stats_runs['AUC_CI2'].append(delong.get_delong_ci(prob, y_test[category])[1])                        
            stats_runs['probabilities'].append(prob)
            
    # Save probabilities
    np.savetxt('significance_tests/test_predict_probs_' +str(algorithm)+'_'+str(feature_set)+'.csv', stats_runs['probabilities'], delimiter=',')

        # Create output tables with AUCs in each run

    outputs = {'DCIS_AUC_mean': stats_runs['AUC_mean'][:10],
              'DCIS_AUC_lowCI': stats_runs['AUC_CI1'][:10],
              'DCIS_AUC_highCI': stats_runs['AUC_CI2'][:10]}
    df_DCIS = pd.DataFrame(data=outputs)
    df_DCIS.to_csv('output_AUCs_all_models/test_'+str(algorithm)+'_DCIS_'+str(feature_set)+'.csv')

    outputs = {'Invasive_AUC_mean': stats_runs['AUC_mean'][10:20],
              'Invasive_AUC_lowCI': stats_runs['AUC_CI1'][10:20],
              'Invasive_AUC_highCI': stats_runs['AUC_CI2'][10:20]}
    df_Invasive = pd.DataFrame(data=outputs)
    df_Invasive.to_csv('output_AUCs_all_models/test_'+str(algorithm)+'_Invasive_'+str(feature_set)+'.csv')

    outputs = {'BenignHR_AUC_mean': stats_runs['AUC_mean'][20:30],
              'BenignHR_AUC_lowCI': stats_runs['AUC_CI1'][20:30],
              'BenignHR_AUC_highCI': stats_runs['AUC_CI2'][20:30]}
    df_BenignHR = pd.DataFrame(data=outputs)
    df_BenignHR.to_csv('output_AUCs_all_models/test_'+str(algorithm)+'_BenignHR_'+str(feature_set)+'.csv')

    outputs = {'Papilloma_AUC_mean': stats_runs['AUC_mean'][30:40],
                  'Papilloma_AUC_lowCI': stats_runs['AUC_CI1'][30:40],
                  'Papilloma_AUC_highCI': stats_runs['AUC_CI2'][30:40]}
    df_Papilloma = pd.DataFrame(data=outputs)
    df_Papilloma.to_csv('output_AUCs_all_models/test_'+str(algorithm)+'_Papilloma_'+str(feature_set)+'.csv')

    outputs = {'Benign_AUC_mean': stats_runs['AUC_mean'][40:50],
                  'Benign_AUC_lowCI': stats_runs['AUC_CI1'][40:50],
                  'Benign_AUC_highCI': stats_runs['AUC_CI2'][40:50]}
    df_Benign = pd.DataFrame(data=outputs)
    df_Benign.to_csv('output_AUCs_all_models/test_'+str(algorithm)+'_Benign_'+str(feature_set)+'.csv') 


In [17]:
test_model('xgboost', x_test, y_test, feature_set = 'shap', n_iter=10)

In [18]:
test_model('xgboost', x_test, y_test, feature_set = 'predictions', n_iter=10)

In [19]:
test_model('xgboost', x_test, y_test, feature_set = 'all', n_iter=10)

### Boostrapping

In [33]:
def add_subsampling_weights(df, outcome_freq):
    '''Adds a weights colum to the df according to given frequencies'''
    
    # DCIS
    inds_dcis = df['outcome_cancer_type_DCIS'] == 1
    
    # Inv
    inds_inv = df['outcome_cancer_type_Invasive'] == 1
    
    # BenHR
    inds_benhr = df['outcome_cancer_type_BenignHR'] == 1

    # PAp
    inds_pap = df['outcome_cancer_type_Papilloma'] == 1

    # Benign
    inds_ben = df['outcome_cancer_type_Benign'] == 1
    
    
    outcome_num_all = [sum(inds_dcis), sum(inds_inv), sum(inds_benhr), sum(inds_pap), sum(inds_ben)]
    
    weights = [outcome_freq[i]/outcome_num_all[i] for i in range(len(outcome_num_all))]
    
    df.loc[inds_dcis, 'outcome_weights'] = weights[0]
    df.loc[inds_inv, 'outcome_weights'] = weights[1]
    df.loc[inds_benhr, 'outcome_weights'] = weights[2]
    df.loc[inds_pap, 'outcome_weights'] = weights[3]
    df.loc[inds_ben, 'outcome_weights'] = weights[4]
    
    return df

In [57]:
def test_ensemble_model_bootstrap(algorithm, y_test, bootstrap_rep =10000, sample_size = y_test.shape[0]):
    
    stats_runs = {}
    stats_runs['AUC_DCIS_img'] = []
    stats_runs['AUC_DCIS_CI1_img'] = []
    stats_runs['AUC_DCIS_CI2_img'] = []
    
    stats_runs['AUC_INV_img'] = []
    stats_runs['AUC_INV_CI1_img'] = []
    stats_runs['AUC_INV_CI2_img'] = []
    
    stats_runs['AUC_BENHR_img'] = []
    stats_runs['AUC_BENHR_CI1_img'] = []
    stats_runs['AUC_BENHR_CI2_img'] = []
    
    stats_runs['AUC_PAP_img'] = []
    stats_runs['AUC_PAP_CI1_img'] = []
    stats_runs['AUC_PAP_CI2_img'] = []
    
    stats_runs['AUC_BEN_img'] = []
    stats_runs['AUC_BEN_CI1_img'] = []
    stats_runs['AUC_BEN_CI2_img'] = []

 

    stats_runs['AUC_DCIS_both'] = []
    stats_runs['AUC_DCIS_CI1_both'] = []
    stats_runs['AUC_DCIS_CI2_both'] = []
    
    stats_runs['AUC_INV_both'] = []
    stats_runs['AUC_INV_CI1_both'] = []
    stats_runs['AUC_INV_CI2_both'] = []
    
    stats_runs['AUC_BENHR_both'] = []
    stats_runs['AUC_BENHR_CI1_both'] = []
    stats_runs['AUC_BENHR_CI2_both'] = []
    
    stats_runs['AUC_PAP_both'] = []
    stats_runs['AUC_PAP_CI1_both'] = []
    stats_runs['AUC_PAP_CI2_both'] = []
    
    stats_runs['AUC_BEN_both'] = []
    stats_runs['AUC_BEN_CI1_both'] = []
    stats_runs['AUC_BEN_CI2_both'] = []
    
    
    
    
    # Take enseble of the 10 test runs (load predicted probabilities of all 10 runs on test and take average)

    # these files contain predictions for all classes (each 10 rows is one class)

    path_prob_img = 'significance_tests/test_predict_probs_xgboost_predictions.csv'
    path_prob_both = 'significance_tests/test_predict_probs_xgboost_all.csv'
    
#     prob_shap = pd.read_csv(path_prob_shap, header = None)
    prob_img = pd.read_csv(path_prob_img, header = None)
    prob_both = pd.read_csv(path_prob_both, header = None)
     
    # All features
    
    avg_predictions_dcis_all = prob_both[:10].mean()
    avg_predictions_inv_all = prob_both[10:20].mean()
    avg_predictions_benhr_all = prob_both[20:30].mean()
    avg_predictions_pap_all = prob_both[30:40].mean()
    avg_predictions_ben_all = prob_both[40:50].mean()
    

    # imaging
    
    avg_predictions_dcis_img = prob_img[:10].mean()
    avg_predictions_inv_img = prob_img[10:20].mean()
    avg_predictions_benhr_img = prob_img[20:30].mean()
    avg_predictions_pap_img = prob_img[30:40].mean()
    avg_predictions_ben_img = prob_img[40:50].mean()
    
    OUTCOME_FREQ = [11.33, 24.36, 2.83, 4.82, 58.07] # frequencies dcis, inv, benhr, pap, ben
        
    y_test_with_weights = add_subsampling_weights(y_test, OUTCOME_FREQ)
    norm_weights = y_test['outcome_weights']/np.sum(y_test['outcome_weights'])

            
    for i in range(bootstrap_rep):
                                
        print ('{}/{}\r'.format(i+1, bootstrap_rep), end = '', flush=True)
                
        samp_inds = np.random.choice(y_test.shape[0], sample_size, replace=True, p=norm_weights)
                        
        y_test_sample_dcis = y_test.iloc[samp_inds]['outcome_cancer_type_DCIS'].to_numpy()
        y_test_sample_inv = y_test.iloc[samp_inds]['outcome_cancer_type_Invasive'].to_numpy()
        y_test_sample_benhr = y_test.iloc[samp_inds]['outcome_cancer_type_BenignHR'].to_numpy()
        y_test_sample_pap = y_test.iloc[samp_inds]['outcome_cancer_type_Papilloma'].to_numpy()
        y_test_sample_ben = y_test.iloc[samp_inds]['outcome_cancer_type_Benign'].to_numpy()
        
#         print(y_test_sample_dcis.sum()/len(samp_inds)*100)
#         print(y_test_sample_inv.sum()/len(samp_inds)*100)
#         print(y_test_sample_benhr.sum()/len(samp_inds)*100)
#         print(y_test_sample_pap.sum()/len(samp_inds)*100)
#         print(y_test_sample_ben.sum()/len(samp_inds)*100)
        
#         print('\n')
        
        # imaging sample probabilities
        predicted_probs_img_dcis_sample = avg_predictions_dcis_img[samp_inds].to_numpy()
        predicted_probs_img_inv_sample = avg_predictions_inv_img[samp_inds].to_numpy()
        predicted_probs_img_benhr_sample = avg_predictions_benhr_img[samp_inds].to_numpy()
        predicted_probs_img_pap_sample = avg_predictions_pap_img[samp_inds].to_numpy()
        predicted_probs_img_ben_sample = avg_predictions_ben_img[samp_inds].to_numpy()
        
        # both features sample probabilities
        predicted_probs_all_dcis_sample = avg_predictions_dcis_all[samp_inds].to_numpy()
        predicted_probs_all_inv_sample = avg_predictions_inv_all[samp_inds].to_numpy()
        predicted_probs_all_benhr_sample = avg_predictions_benhr_all[samp_inds].to_numpy()
        predicted_probs_all_pap_sample = avg_predictions_pap_all[samp_inds].to_numpy()
        predicted_probs_all_ben_sample = avg_predictions_ben_all[samp_inds].to_numpy()        
        

        # AUCs images
        stats_runs['AUC_DCIS_img'].append(roc_auc_score(y_test_sample_dcis, 
                                                predicted_probs_img_dcis_sample))
        stats_runs['AUC_DCIS_CI1_img'].append(delong.get_delong_ci(predicted_probs_img_dcis_sample, y_test_sample_dcis)[0])
        stats_runs['AUC_DCIS_CI2_img'].append(delong.get_delong_ci(predicted_probs_img_dcis_sample, y_test_sample_dcis)[1]) 
 
        stats_runs['AUC_INV_img'].append(roc_auc_score(y_test_sample_inv, 
                                                predicted_probs_img_inv_sample))
        stats_runs['AUC_INV_CI1_img'].append(delong.get_delong_ci(predicted_probs_img_inv_sample, y_test_sample_inv)[0])
        stats_runs['AUC_INV_CI2_img'].append(delong.get_delong_ci(predicted_probs_img_inv_sample, y_test_sample_inv)[1])

        stats_runs['AUC_BENHR_img'].append(roc_auc_score(y_test_sample_benhr, 
                                                predicted_probs_img_benhr_sample))
        stats_runs['AUC_BENHR_CI1_img'].append(delong.get_delong_ci(predicted_probs_img_benhr_sample, y_test_sample_benhr)[0])
        stats_runs['AUC_BENHR_CI2_img'].append(delong.get_delong_ci(predicted_probs_img_benhr_sample, y_test_sample_benhr)[1])
        
        stats_runs['AUC_PAP_img'].append(roc_auc_score(y_test_sample_pap, 
                                                predicted_probs_img_pap_sample))
        stats_runs['AUC_PAP_CI1_img'].append(delong.get_delong_ci(predicted_probs_img_pap_sample, y_test_sample_pap)[0])
        stats_runs['AUC_PAP_CI2_img'].append(delong.get_delong_ci(predicted_probs_img_pap_sample, y_test_sample_pap)[1])
        
        stats_runs['AUC_BEN_img'].append(roc_auc_score(y_test_sample_ben, 
                                                predicted_probs_img_ben_sample))
        stats_runs['AUC_BEN_CI1_img'].append(delong.get_delong_ci(predicted_probs_img_ben_sample, y_test_sample_ben)[0])
        stats_runs['AUC_BEN_CI2_img'].append(delong.get_delong_ci(predicted_probs_img_ben_sample, y_test_sample_ben)[1])
    

        # Performance Both
        stats_runs['AUC_DCIS_both'].append(roc_auc_score(y_test_sample_dcis, 
                                                predicted_probs_all_dcis_sample))
        stats_runs['AUC_DCIS_CI1_both'].append(delong.get_delong_ci(predicted_probs_all_dcis_sample, y_test_sample_dcis)[0])
        stats_runs['AUC_DCIS_CI2_both'].append(delong.get_delong_ci(predicted_probs_all_dcis_sample, y_test_sample_dcis)[1]) 
 
        stats_runs['AUC_INV_both'].append(roc_auc_score(y_test_sample_inv, 
                                                predicted_probs_all_inv_sample))
        stats_runs['AUC_INV_CI1_both'].append(delong.get_delong_ci(predicted_probs_all_inv_sample, y_test_sample_inv)[0])
        stats_runs['AUC_INV_CI2_both'].append(delong.get_delong_ci(predicted_probs_all_inv_sample, y_test_sample_inv)[1])

        stats_runs['AUC_BENHR_both'].append(roc_auc_score(y_test_sample_benhr, 
                                                predicted_probs_all_benhr_sample))
        stats_runs['AUC_BENHR_CI1_both'].append(delong.get_delong_ci(predicted_probs_all_benhr_sample, y_test_sample_benhr)[0])
        stats_runs['AUC_BENHR_CI2_both'].append(delong.get_delong_ci(predicted_probs_all_benhr_sample, y_test_sample_benhr)[1])
        
        stats_runs['AUC_PAP_both'].append(roc_auc_score(y_test_sample_pap, 
                                                predicted_probs_all_pap_sample))
        stats_runs['AUC_PAP_CI1_both'].append(delong.get_delong_ci(predicted_probs_all_pap_sample, y_test_sample_pap)[0])
        stats_runs['AUC_PAP_CI2_both'].append(delong.get_delong_ci(predicted_probs_all_pap_sample, y_test_sample_pap)[1])
        
        stats_runs['AUC_BEN_both'].append(roc_auc_score(y_test_sample_ben, 
                                                predicted_probs_all_ben_sample))
        stats_runs['AUC_BEN_CI1_both'].append(delong.get_delong_ci(predicted_probs_all_ben_sample, y_test_sample_ben)[0])
        stats_runs['AUC_BEN_CI2_both'].append(delong.get_delong_ci(predicted_probs_all_ben_sample, y_test_sample_ben)[1])
    

       

        # Create output tables with AUCs in each run

        outputs = {'AUC_DCIS_img': stats_runs['AUC_DCIS_img'],
              'AUC_DCIS_CI1_img': stats_runs['AUC_DCIS_CI1_img'],
              'AUC_DCIS_CI2_img': stats_runs['AUC_DCIS_CI2_img'],
                  
                  'AUC_INV_img': stats_runs['AUC_INV_img'],
              'AUC_INV_CI1_img': stats_runs['AUC_INV_CI1_img'],
              'AUC_INV_CI2_img': stats_runs['AUC_INV_CI2_img'],

                  'AUC_BENHR_img': stats_runs['AUC_BENHR_img'],
              'AUC_BENHR_img': stats_runs['AUC_BENHR_CI1_img'],
              'AUC_BENHR_img': stats_runs['AUC_BENHR_CI2_img'],  
                  
                  'AUC_PAP_img': stats_runs['AUC_PAP_img'],
              'AUC_PAP_img': stats_runs['AUC_PAP_CI1_img'],
              'AUC_PAP_img': stats_runs['AUC_PAP_CI2_img'],
                  
                  'AUC_BEN_img': stats_runs['AUC_BEN_img'],
              'AUC_BEN_img': stats_runs['AUC_BEN_CI1_img'],
              'AUC_BEN_img': stats_runs['AUC_BEN_CI2_img'],                
                  
                
                   
                  'AUC_DCIS_both': stats_runs['AUC_DCIS_both'],
              'AUC_DCIS_CI1_both': stats_runs['AUC_DCIS_CI1_both'],
              'AUC_DCIS_CI2_both': stats_runs['AUC_DCIS_CI2_both'],
                  
                  'AUC_INV_both': stats_runs['AUC_INV_both'],
              'AUC_INV_CI1_both': stats_runs['AUC_INV_CI1_both'],
              'AUC_INV_CI2_both': stats_runs['AUC_INV_CI2_both'],

                  'AUC_BENHR_both': stats_runs['AUC_BENHR_both'],
              'AUC_BENHR_both': stats_runs['AUC_BENHR_CI1_both'],
              'AUC_BENHR_both': stats_runs['AUC_BENHR_CI2_both'],  
                  
                  'AUC_PAP_both': stats_runs['AUC_PAP_both'],
              'AUC_PAP_both': stats_runs['AUC_PAP_CI1_both'],
              'AUC_PAP_both': stats_runs['AUC_PAP_CI2_both'],
                  
                  'AUC_BEN_both': stats_runs['AUC_BEN_both'],
              'AUC_BEN_both': stats_runs['AUC_BEN_CI1_both'],
              'AUC_BEN_both': stats_runs['AUC_BEN_CI2_both'], 
                  
     
                  
                  }
        df = pd.DataFrame(data=outputs)

#         df.to_csv('output_AUCs_all_models/test_boostrap_ensemble_Multiclass.csv')
        df.to_csv('output_AUCs_all_models/test_boostrap_ensemble_Multiclass_sample_size_1000.csv')

In [56]:
test_ensemble_model_bootstrap('xgboost', y_test.reset_index(drop=True), bootstrap_rep =10000, sample_size = y_test.shape[0])

822/10000

Degrees of freedom <= 0 for slice
divide by zero encountered in true_divide
invalid value encountered in multiply
Degrees of freedom <= 0 for slice
divide by zero encountered in true_divide
invalid value encountered in multiply


2103/10000

Degrees of freedom <= 0 for slice
divide by zero encountered in true_divide
invalid value encountered in multiply
Degrees of freedom <= 0 for slice
divide by zero encountered in true_divide
invalid value encountered in multiply


4434/10000

Degrees of freedom <= 0 for slice
divide by zero encountered in true_divide
invalid value encountered in multiply
Degrees of freedom <= 0 for slice
divide by zero encountered in true_divide
invalid value encountered in multiply


6433/10000

Degrees of freedom <= 0 for slice
divide by zero encountered in true_divide
invalid value encountered in multiply
Degrees of freedom <= 0 for slice
divide by zero encountered in true_divide
invalid value encountered in multiply


6980/10000

Degrees of freedom <= 0 for slice
divide by zero encountered in true_divide
invalid value encountered in multiply
Degrees of freedom <= 0 for slice
divide by zero encountered in true_divide
invalid value encountered in multiply


9662/10000

Degrees of freedom <= 0 for slice
divide by zero encountered in true_divide
invalid value encountered in multiply
Degrees of freedom <= 0 for slice
divide by zero encountered in true_divide
invalid value encountered in multiply


10000/10000

In [58]:
test_ensemble_model_bootstrap('xgboost', y_test.reset_index(drop=True), 
                              bootstrap_rep =10000, sample_size = 1000)

10000/10000