In [7]:
import pandas as pd
import numpy as np
import random
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler
from imblearn.under_sampling import RandomUnderSampler
from sklearn.metrics import precision_recall_curve, auc, roc_auc_score, average_precision_score
from sklearn.metrics import precision_score, recall_score, f1_score
import joblib
import warnings
warnings.filterwarnings(action='ignore')
np.random.seed(123)
random.seed(123)

EHRs_DrugRel_Lab = pd.read_csv("preprocessed_data(dummy)/EHRs_DrugRel_Lab.csv")
Lab_col=EHRs_DrugRel_Lab.columns[101:136]
DrugRel_col=EHRs_DrugRel_Lab.columns[346:1741]
EHRs_DrugRel=EHRs_DrugRel_Lab.drop(Lab_col.values,axis=1)
EHRs=EHRs_DrugRel.drop(DrugRel_col,axis=1)

tc=[[1,2],[3,4],[5,6],[7,8],[9,0]]
data={"EHRs":EHRs,"EHRs_DrugRel":EHRs_DrugRel,"EHRs_DrugRel_Lab":EHRs_DrugRel_Lab}

for d in data.keys():
    data_df=data[d]
    print(d,": ")
    data_df=data_df.drop(["Sepsis_Date"],axis=1)
    
    lr_acc = []
    lr_roc = []
    lr_prc = []
    lr_pre = []
    lr_rec = []
    lr_f1  = []
    
    rf_acc = []
    rf_roc = []
    rf_prc = []
    rf_pre = []
    rf_rec = []
    rf_f1  = []
    
    for tc_1, tc_2 in tc:
        
        padding = pd.DataFrame(0*np.ones((len(data_df), 1742-len(data_df.columns)-1)))
        data_df = pd.concat([data_df,padding],axis=1)
        train_data=data_df.loc[(data_df["PT_ID"]%10!=tc_1) & (data_df["PT_ID"]%10!=tc_2)]
        test_data=data_df.loc[(data_df["PT_ID"]%10==tc_1) |(data_df["PT_ID"]%10==tc_2)]
        train_feature = train_data.drop(["Label"], axis=1)
        train_label = train_data[["Label"]]
        test_feature = test_data.drop(["Label"], axis=1)
        test_label = test_data[["Label"]]
        scaler = MinMaxScaler()
        train_feature = scaler.fit_transform(train_feature)
        test_feature = scaler.transform(test_feature)
        rd = RandomUnderSampler()
        train_feature, train_label = rd.fit_resample(train_feature,train_label)
        
        model = LogisticRegression()
        model.fit(train_feature, train_label)
        
        train_acc = model.score(train_feature, train_label)
        test_acc = model.score(test_feature, test_label)
        AUROC = roc_auc_score(test_label,model.predict_proba(test_feature)[:,1])
        AUPRC = average_precision_score(test_label,model.predict_proba(test_feature)[:,1])
        recall = recall_score(test_label,  model.predict(test_feature))
        precision = precision_score(test_label, model.predict(test_feature))
        f1_score_=f1_score(y_true=test_label, y_pred =model.predict(test_feature))
        
        lr_acc = lr_acc + [test_acc]
        lr_roc = lr_roc + [AUROC]
        lr_prc = lr_prc + [AUPRC]
        lr_pre = lr_pre + [precision]
        lr_rec = lr_rec + [recall]
        lr_f1  = lr_f1  + [f1_score_]
        
        #joblib.dump(model, "trained_model/"+d+"_"+str(tc_1)+"_lr.pkl")
        
        model = RandomForestClassifier()
        model.fit(train_feature, train_label)
        
        train_acc = model.score(train_feature, train_label)
        test_acc = model.score(test_feature, test_label)
        AUROC = roc_auc_score(test_label,model.predict_proba(test_feature)[:,1])
        AUPRC = average_precision_score(test_label,model.predict_proba(test_feature)[:,1])
        recall = recall_score(test_label,  model.predict(test_feature))
        precision = precision_score(test_label, model.predict(test_feature))
        f1_score_=f1_score(y_true=test_label, y_pred =model.predict(test_feature))
        
        rf_acc = rf_acc + [test_acc]
        rf_roc = rf_roc + [AUROC]
        rf_prc = rf_prc + [AUPRC]
        rf_pre = rf_pre + [precision]
        rf_rec = rf_rec + [recall]
        rf_f1  = rf_f1  + [f1_score_]
        
        #joblib.dump(model, "trained_model/"+d+"_"+str(tc_1)+"_rf.pkl")
        #joblib.dump(scaler, "trained_model/"+d+"_"+str(tc_1)+"_scaler.pkl")        
        
    print("LogisticRegression(LR) : ")
    print("  Acc      : ",np.round(np.array(lr_acc).mean(),3)) 
    print("  AUROC    : ",np.round(np.array(lr_roc).mean(),3))
    print("  AUPRC    : ",np.round(np.array(lr_prc).mean(),3))
    print("  Precision: ",np.round(np.array(lr_pre).mean(),3))
    print("  Recall   : ",np.round(np.array(lr_rec).mean(),3))
    print("  F1-score : ",np.round(np.array(lr_f1 ).mean(),3))
    
    print("RandomForestClassifier(RF) : ")
    print("  Acc      : ",np.round(np.array(rf_acc).mean(),3)) 
    print("  AUROC    : ",np.round(np.array(rf_roc).mean(),3))
    print("  AUPRC    : ",np.round(np.array(rf_prc).mean(),3))
    print("  Precision: ",np.round(np.array(rf_pre).mean(),3))
    print("  Recall   : ",np.round(np.array(rf_rec).mean(),3))
    print("  F1-score : ",np.round(np.array(rf_f1 ).mean(),3))
    print("\n")

EHRs : 
LogisticRegression(LR) : 
  Acc      :  0.626
  AUROC    :  0.663
  AUPRC    :  0.465
  Precision:  0.447
  Recall   :  0.647
  F1-score :  0.528
RandomForestClassifier(RF) : 
  Acc      :  0.635
  AUROC    :  0.704
  AUPRC    :  0.494
  Precision:  0.456
  Recall   :  0.679
  F1-score :  0.546


EHRs_DrugRel : 
LogisticRegression(LR) : 
  Acc      :  0.638
  AUROC    :  0.675
  AUPRC    :  0.483
  Precision:  0.457
  Recall   :  0.625
  F1-score :  0.527
RandomForestClassifier(RF) : 
  Acc      :  0.639
  AUROC    :  0.692
  AUPRC    :  0.496
  Precision:  0.458
  Recall   :  0.64
  F1-score :  0.533


EHRs_DrugRel_Lab : 
LogisticRegression(LR) : 
  Acc      :  0.655
  AUROC    :  0.715
  AUPRC    :  0.53
  Precision:  0.475
  Recall   :  0.64
  F1-score :  0.545
RandomForestClassifier(RF) : 
  Acc      :  0.693
  AUROC    :  0.755
  AUPRC    :  0.577
  Precision:  0.517
  Recall   :  0.729
  F1-score :  0.605


