In [84]:
# importing standard packages
import pandas as pd
import numpy as np 
from scipy import stats 

# importing the plot funnctions
import seaborn as sns 
import matplotlib.pyplot as plt
%matplotlib inline 

# preprocessing/ model selection 
from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_val_score

# importing the classifiers 
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier

# importing the metrics 
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, classification_report,f1_score
from sklearn.metrics import roc_curve

# oversampling techniques 
from imblearn.over_sampling import SMOTE

# importing model saving package 
from joblib import dump, load

from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline, make_pipeline

  import pandas.util.testing as tm
Using TensorFlow backend.


In [85]:
# grab the stored data frames
%store -r X_wids
%store -r y_wids

In [104]:
# Split train-test dataset 
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_wids, y_wids, test_size = 0.2, random_state = 31, stratify = y_wids)

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(73370, 598) (18343, 598) (73370, 1) (18343, 1)


In [105]:
# Drop one column
X_test = X_test.drop(columns= ['hospital_admit_source_Observation'])

In [106]:
# load model
clf = load('LGB_u_best.joblib')

# predict proba of X_test
y_proba = clf.predict_proba(X_test)
y_pred = clf.predict(X_test)

In [107]:
X_test.head()

Unnamed: 0_level_0,age,bmi,elective_surgery,height,pre_icu_los_days,readmission_status,weight,albumin_apache,apache_post_operative,arf_apache,...,apache_3j_diagnosis_desc_Sepsis__other_than_urinary,apache_3j_diagnosis_desc_Sepsis_of_urinary_tract_origin,apache_3j_diagnosis_desc_Skin_surgery,apache_3j_diagnosis_desc_Stroke,apache_3j_diagnosis_desc_Subarachnoid_haemorrhage,apache_3j_diagnosis_desc_Subdural_Epidural_haematoma,apache_3j_diagnosis_desc_Unstable_angina,apache_3j_diagnosis_desc_Valvular_heart_surgery,apache_3j_diagnosis_desc_Viral_pneumonia,apache_3j_diagnosis_desc_unknown
patient_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
51422,79.0,33.515625,0,160.0,2.385417,0,85.8,3.0,0,0.0,...,0,0,0,0,0,0,0,0,0,0
54198,57.0,61.130334,0,153.0,0.655556,0,143.1,3.0,0,0.0,...,0,1,0,0,0,0,0,0,0,0
119435,53.0,35.502662,1,175.3,1.00625,0,109.1,2.1,1,0.0,...,0,0,0,0,0,0,0,0,0,0
87853,71.0,30.948701,0,165.1,0.207639,0,84.36,3.0,0,0.0,...,0,0,0,0,0,0,0,0,0,0
108282,43.0,32.13302,0,154.9,0.025,0,77.1,3.0,0,0.0,...,0,0,0,0,0,0,0,0,0,0


In [108]:
# Append predicted probability to X_test
X_test['y_proba'] = y_proba[:,1]

In [109]:
# Grab icu probability in X_test

# import raw data
wids = pd.read_csv('../../data/training_v2.csv')
wids.set_index('patient_id', inplace=True)

In [110]:
# grab x_test with icu and hospital probability
icu_proba = wids.loc[X_test.index,['apache_4a_icu_death_prob']]
hos_proba = wids.loc[X_test.index,'apache_4a_hospital_death_prob']

In [111]:
# append two death probability to X_test
X_test['icu_proba'] = icu_proba
X_test['hos_proba'] = hos_proba

In [118]:
# make prediction based on icu_proba
icu_pred = icu_proba>.5
icu_pred = icu_pred*1

hos_pred = hos_proba>.5
hos_pred = hos_pred*1

In [119]:
# Metrixs measure
# recall
print('our model')
print(recall_score(y_test, y_pred))

print('icu probability')
print(recall_score(y_test, icu_pred))

print('hospital probability')
print(recall_score(y_test, hos_pred))

our model
0.8212255211623499
icu probability
0.17435249526216046
hospital probability
0.2874289324068225


In [120]:
# f1
# Metrixs measure
# recall
print('our model')
print(f1_score(y_test, y_pred))

print('icu probability')
print(f1_score(y_test, icu_pred))

print('hospital probability')
print(f1_score(y_test, hos_pred))

our model
0.4291137151345107
icu probability
0.27353815659068387
hospital probability
0.37775010377750096
