In [4]:
import pandas as pd
import numpy as np
import random
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler
from imblearn.under_sampling import RandomUnderSampler
from sklearn.metrics import precision_recall_curve, auc, roc_auc_score, average_precision_score
from sklearn.metrics import precision_score, recall_score, f1_score
import joblib
import warnings
warnings.filterwarnings(action='ignore')
np.random.seed(123)
random.seed(123)

EHRs_DrugRel_Lab = pd.read_csv("preprocessed_data(dummy)/(dummy)EHRs_DrugRel_Lab.csv")
Lab_col=EHRs_DrugRel_Lab.columns[101:136]
DrugRel_col=EHRs_DrugRel_Lab.columns[346:1741]
EHRs_DrugRel=EHRs_DrugRel_Lab.drop(Lab_col.values,axis=1)
EHRs=EHRs_DrugRel.drop(DrugRel_col,axis=1)

data={"EHRs":EHRs,"EHRs_DrugRel":EHRs_DrugRel,"EHRs_DrugRel_Lab":EHRs_DrugRel_Lab}

for d in data.keys():
    data_df=data[d]
    print(d,": ")
    data_df=data_df.drop(["Sepsis_Date"],axis=1)    
    padding = pd.DataFrame(0*np.ones((len(data_df), 1742-len(data_df.columns)-1)))
    data_df = pd.concat([data_df,padding],axis=1)
    test_data=data_df
    test_feature = test_data.drop(["Label"], axis=1)
    test_label = test_data[["Label"]]
    scaler = joblib.load("trained_model/"+d+"_scaler.pkl")
    test_feature = scaler.transform(test_feature)
    
    model = joblib.load("trained_model/"+d+"_lr.pkl")
    
    lr_acc = model.score(test_feature, test_label)
    lr_roc = roc_auc_score(test_label,model.predict_proba(test_feature)[:,1])
    lr_prc = average_precision_score(test_label,model.predict_proba(test_feature)[:,1])
    lr_pre = recall_score(test_label,  model.predict(test_feature))
    lr_rec = precision_score(test_label, model.predict(test_feature))
    lr_f1  =f1_score(y_true=test_label, y_pred =model.predict(test_feature))
        
    model = joblib.load("trained_model/"+d+"_rf.pkl")
    
    rf_acc = model.score(test_feature, test_label)
    rf_roc = roc_auc_score(test_label,model.predict_proba(test_feature)[:,1])
    rf_prc = average_precision_score(test_label,model.predict_proba(test_feature)[:,1])
    rf_pre = recall_score(test_label,  model.predict(test_feature))
    rf_rec = precision_score(test_label, model.predict(test_feature))
    rf_f1  =f1_score(y_true=test_label, y_pred =model.predict(test_feature))

    print("LogisticRegression(LR) : ")
    print("  Acc      : ",np.round(lr_acc,3)) 
    print("  AUROC    : ",np.round(lr_roc,3))
    print("  AUPRC    : ",np.round(lr_prc,3))
    print("  Precision: ",np.round(lr_pre,3))
    print("  Recall   : ",np.round(lr_rec,3))
    print("  F1-score : ",np.round(lr_f1 ,3))
    
    print("RandomForestClassifier(RF) : ")
    print("  Acc      : ",np.round(rf_acc,3)) 
    print("  AUROC    : ",np.round(rf_roc,3))
    print("  AUPRC    : ",np.round(rf_prc,3))
    print("  Precision: ",np.round(rf_pre,3))
    print("  Recall   : ",np.round(rf_rec,3))
    print("  F1-score : ",np.round(rf_f1 ,3))
    print("\n")

EHRs : 
LogisticRegression(LR) : 
  Acc      :  0.7
  AUROC    :  0.92
  AUPRC    :  0.943
  Precision:  1.0
  Recall   :  0.625
  F1-score :  0.769
RandomForestClassifier(RF) : 
  Acc      :  0.8
  AUROC    :  1.0
  AUPRC    :  1.0
  Precision:  1.0
  Recall   :  0.714
  F1-score :  0.833


EHRs_DrugRel : 
LogisticRegression(LR) : 
  Acc      :  1.0
  AUROC    :  1.0
  AUPRC    :  1.0
  Precision:  1.0
  Recall   :  1.0
  F1-score :  1.0
RandomForestClassifier(RF) : 
  Acc      :  1.0
  AUROC    :  1.0
  AUPRC    :  1.0
  Precision:  1.0
  Recall   :  1.0
  F1-score :  1.0


EHRs_DrugRel_Lab : 
LogisticRegression(LR) : 
  Acc      :  0.9
  AUROC    :  1.0
  AUPRC    :  1.0
  Precision:  1.0
  Recall   :  0.833
  F1-score :  0.909
RandomForestClassifier(RF) : 
  Acc      :  0.7
  AUROC    :  1.0
  AUPRC    :  1.0
  Precision:  1.0
  Recall   :  0.625
  F1-score :  0.769


