In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings(action='ignore')
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from imblearn.under_sampling import RandomUnderSampler 
from utils.Dataset import Dataset
from net.networks import lstm
import random
import torch.random
from sklearn.metrics import precision_recall_curve, auc, roc_auc_score,average_precision_score
from sklearn.metrics import precision_score, recall_score, f1_score
import joblib
torch.manual_seed(123)
torch.cuda.manual_seed(123)
np.random.seed(123)
random.seed(123)

EHRs_DrugRel_Lab = pd.read_csv("preprocessed_data(dummy)/rnn_EHRs_DrugRel_Lab.csv")
Lab_col=EHRs_DrugRel_Lab.columns[101:136]
DrugRel_col=EHRs_DrugRel_Lab.columns[346:1741]
EHRs_DrugRel=EHRs_DrugRel_Lab.drop(Lab_col.values,axis=1)
EHRs=EHRs_DrugRel.drop(DrugRel_col,axis=1)

tc=[[1,2],[3,4],[5,6],[7,8],[9,0]]
data={"EHRs":EHRs,"EHRs_DrugRel":EHRs_DrugRel,"EHRs_DrugRel_Lab":EHRs_DrugRel_Lab}

for d in data.keys():
    data_df=data[d]
    print(d,": ")
    
    lstm_acc = []
    lstm_roc = []
    lstm_prc = []
    lstm_pre = []
    lstm_rec = []
    lstm_f1  = []
    
    label_df = data_df.drop_duplicates(["PT_ID","Sepsis_Date"],keep="last")
    padding = pd.DataFrame(0*np.ones((len(data_df), 1742-len(data_df.columns)+2)))
    data_df = pd.concat([data_df,padding],axis=1)
    
    def padder (group):
        padding=pd.DataFrame(np.append(np.ones((6-len(group), 1)) * group["PT_ID"].values[0],
                                       0*np.ones((6-len(group), len(data_df.columns) -1)),
                                   axis=1),
                             columns=data_df.columns)
        return pd.concat([padding,group],axis=0)
    
    data_df = data_df.groupby(['PT_ID','Sepsis_Date']).apply(padder).reset_index(drop=True)    
    
    for tc_1, tc_2 in tc:
        print(int(tc_1/2)+1,"fold")        
        
        train_data=data_df.loc[(data_df["PT_ID"]%10!=tc_1) & (data_df["PT_ID"]%10!=tc_2)]
        test_data=data_df.loc[(data_df["PT_ID"]%10==tc_1) |(data_df["PT_ID"]%10==tc_2)]
        train_feature = train_data.drop(["Label","Sepsis_Date"], axis=1)
        test_feature = test_data.drop(["Label","Sepsis_Date"], axis=1)        
        train_label_df=label_df.loc[(label_df["PT_ID"]%10!=tc_1) & (label_df["PT_ID"]%10!=tc_2)]
        test_label_df=label_df.loc[(label_df["PT_ID"]%10==tc_1) |(label_df["PT_ID"]%10==tc_2)]
        train_label = train_label_df[["Label"]]
        test_label = test_label_df[["Label"]]
        
        scaler = MinMaxScaler()
        train_feature = scaler.fit_transform(train_feature)
        test_feature = scaler.transform(test_feature)        
        train_feature=train_feature.reshape(-1, 6, 1742)
        test_feature=test_feature.reshape(-1, 6, 1742)        
        train_feature=np.reshape(train_feature, (-1, 1742*6))
        rd = RandomUnderSampler()
        train_feature, train_label = rd.fit_resample(train_feature,train_label)
        train_feature=np.reshape(train_feature, (-1,6,1742))
        
        BATCH_SIZE=int(len(train_feature)/2)
        in_size= test_feature.shape[2]
        h_size=256
        n_epochs=1500
        LEARNING_RATE=0.00001
        
        train_data = Dataset(torch.FloatTensor(train_feature), torch.FloatTensor(train_label.values))
        test_data = Dataset(torch.FloatTensor(test_feature), torch.FloatTensor(test_label.values))
        train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
        test_loader = DataLoader(dataset=test_data, batch_size=len(test_feature), shuffle=False)
        
        model = lstm(in_size=in_size,h_size=h_size)
        model=model.cuda()
        loss_f = nn.BCELoss()
        optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, n_epochs*len(train_loader), eta_min=0)

        #forward loop
        losses = []
        accur = []
        val_losses = []
        val_accur = []       
        
        for i in range(n_epochs):
            if i == 850:
                break;
            total_loss = 0
            total_acc = 0 
            val_total_loss = 0
            val_total_acc = 0 
            for j,(x_train,y_train) in enumerate(train_loader):
                x_train,y_train=x_train.cuda(),y_train.cuda()
                h = torch.zeros(1, len(x_train), h_size, requires_grad=True)
                h = h.cuda()
                c = torch.zeros(1, len(x_train), h_size, requires_grad=True)
                c = h.cuda()
                output = model(x_train,(h,c))
                loss = loss_f(output,y_train.reshape(-1,1))        
                acc = (torch.round(output.reshape(-1)) == y_train.reshape(-1)).sum()/len(y_train)
        
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                scheduler.step()
                total_loss+=loss.item()
                total_acc+=acc.item()
                
            with torch.no_grad():    
                for j,(x_test,y_test) in enumerate(test_loader):
                    x_test,y_test=x_test.cuda(),y_test.cuda()            
                    h = torch.zeros(1, len(x_test), h_size, requires_grad=False)
                    h = h.cuda()            
                    c = torch.zeros(1, len(x_test), h_size, requires_grad=False)
                    c = h.cuda()            
                    y_pre = model(x_test,(h,c))
                    val_loss = loss_f(y_pre,y_test.reshape(-1,1))                    
                    val_acc=(torch.round(y_pre.reshape(-1)) == y_test.reshape(-1)).sum()/len(y_test)
                    val_total_loss+=val_loss.item()
                    val_total_acc+=val_acc.item()
                    
            total_loss = total_loss/len(train_loader)
            total_acc = total_acc/len(train_loader)
            val_total_loss = val_total_loss/len(test_loader)
            val_total_acc = val_total_acc/len(test_loader)
            
            if i%100 == 0:
                losses.append(loss)
                accur.append(acc)
                val_losses.append(val_loss)
                print("epoch{}\tloss:{}\tacc:{}"
                      .format(i,np.round(total_loss,3),np.round(total_acc,3)),
                      "\tval_loss:{}\tv_acc:{}"
                      .format(np.round(val_total_loss,3),np.round(val_total_acc,3)))
                
        h = torch.zeros(1, len(test_feature), h_size, requires_grad=False)
        c = torch.zeros(1, len(test_feature), h_size, requires_grad=False)
        
        y_pre=torch.round(model(torch.cuda.FloatTensor(test_feature),(h.cuda(),c.cuda()))).cpu().detach().numpy()
        y_proba=model(torch.cuda.FloatTensor(test_feature),(h.cuda(),c.cuda())).cpu().detach().numpy()
        y_label = test_label.values       
        
        test_acc  = (y_pre==y_label).sum()/len(y_label)
        AUROC     = roc_auc_score(y_label, y_proba)
        AUPRC     = average_precision_score(y_label, y_proba)
        precision = precision_score(y_label, y_pre, pos_label=1)
        recall    = recall_score(y_label, y_pre)
        f1_score_ = f1_score(y_label, y_pre)

        lstm_acc = lstm_acc + [test_acc]
        lstm_roc = lstm_roc + [AUROC]
        lstm_prc = lstm_prc + [AUPRC]
        lstm_pre = lstm_pre + [precision]
        lstm_rec = lstm_rec + [recall]
        lstm_f1  = lstm_f1  + [f1_score_]  
        
        #torch.save(model.state_dict(), "trained_model/"+d+"_"+str(tc_1)+"_lstm.pt")
        #joblib.dump(scaler, "trained_model/"+d+"_"+str(tc_1)+"_scaler_lstm.pkl")
        
    print("\n")
    print("lstm : ")
    print("  Acc      : ",np.round(np.array(lstm_acc).mean(),3))
    print("  AUROC    : ",np.round(np.array(lstm_roc).mean(),3))
    print("  AUPRC    : ",np.round(np.array(lstm_prc).mean(),3))
    print("  Precision: ",np.round(np.array(lstm_pre).mean(),3))
    print("  Recall   : ",np.round(np.array(lstm_rec).mean(),3))
    print("  F1-score : ",np.round(np.array(lstm_f1 ).mean(),3))
    print("\n")
                
                

EHRs : 
1 fold
epoch0	loss:0.695	acc:0.5 	val_loss:0.709	v_acc:0.307
epoch100	loss:0.691	acc:0.5 	val_loss:0.701	v_acc:0.307
epoch200	loss:0.687	acc:0.607 	val_loss:0.695	v_acc:0.492
epoch300	loss:0.682	acc:0.673 	val_loss:0.689	v_acc:0.591
epoch400	loss:0.677	acc:0.677 	val_loss:0.684	v_acc:0.661
epoch500	loss:0.67	acc:0.668 	val_loss:0.677	v_acc:0.657
epoch600	loss:0.661	acc:0.664 	val_loss:0.671	v_acc:0.665
epoch700	loss:0.652	acc:0.673 	val_loss:0.668	v_acc:0.665
epoch800	loss:0.644	acc:0.677 	val_loss:0.666	v_acc:0.673
2 fold
epoch0	loss:0.692	acc:0.5 	val_loss:0.701	v_acc:0.324
epoch100	loss:0.689	acc:0.618 	val_loss:0.692	v_acc:0.537
epoch200	loss:0.685	acc:0.662 	val_loss:0.687	v_acc:0.626
epoch300	loss:0.681	acc:0.649 	val_loss:0.681	v_acc:0.633
epoch400	loss:0.675	acc:0.651 	val_loss:0.675	v_acc:0.644
epoch500	loss:0.668	acc:0.662 	val_loss:0.667	v_acc:0.651
epoch600	loss:0.659	acc:0.661 	val_loss:0.659	v_acc:0.662
epoch700	loss:0.651	acc:0.656 	val_loss:0.654	v_acc:0.655
epo