In [1]:
import torch
import torch.nn as nn
from torch.nn import Parameter
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F
use_cuda = torch.cuda.is_available()
torch.cuda.set_device(0)
import sys, random
import numpy as np
try:
    import cPickle as pickle
except:
    import pickle
from torch.nn._functions.thnn import rnnFusedPointwise as fusedBackend
import math
from sklearn.metrics import roc_auc_score

In [2]:
use_cuda = True

In [3]:
train_sl= pickle.load(open('/data/projects/py_ehr_2/Data/hf50_cl2_h143_ref_t.train', 'rb'), encoding='bytes')
test_sl= pickle.load(open('/data/projects/py_ehr_2/Data/hf50_cl2_h143_ref_t.test', 'rb'), encoding='bytes')
valid_sl= pickle.load(open('/data/projects/py_ehr_2/Data/hf50_cl2_h143_ref_t.valid', 'rb'), encoding='bytes')
print (len(train_sl),len(valid_sl),len(test_sl))

29504 4269 8956


In [4]:
class TPLSTM(nn.Module):

    def __init__(self, input_size, hidden_size, bias=True):
        super(TPLSTM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.bias = bias
        self.weight_ih = Parameter(torch.Tensor(4 * hidden_size, input_size))
        self.weight_hh = Parameter(torch.Tensor(4 * hidden_size, hidden_size))
        self.W_decomp = Parameter(torch.Tensor(hidden_size, hidden_size))
        if bias:
            self.bias_ih = Parameter(torch.Tensor(4 * hidden_size))
            self.bias_hh = Parameter(torch.Tensor(4 * hidden_size))
            self.b_decomp = Parameter(torch.Tensor(hidden_size))

        else:
            self.register_parameter('bias_ih', None)
            self.register_parameter('bias_hh', None)
            self.register_parameter('b_decomp', None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1.0 / math.sqrt(self.hidden_size)
        for weight in self.parameters():
            weight.data.uniform_(-stdv, stdv)

    def forward(self, input, hx):
        outputh=[]
        outputc=[]
        #h=hx[0][0]
        for i in range(input.size()[0]):
            h,c = self.TPLSTMCell(input[i],hx,self.weight_ih, self.weight_hh,self.W_decomp,self.bias_ih, self.bias_hh,self.b_decomp)
            hx=(h,c)
            outputh.append(h)
            outputc.append (c)
        return outputh,hx,outputc
    
    def TPLSTMCell(self,input, hidden, w_ih, w_hh,w_decomp, b_ih=None, b_hh=None,b_decomp=None):
        t= torch.transpose(input,0,1)[-1]
        input= (torch.transpose(input,0,1)[:-1]).transpose(0,1)    
        hx, cx = hidden
        T = self.map_elapse_time(t)
        C_ST = F.tanh(F.linear(cx, w_decomp, b_decomp) )
        C_ST_dis =( T * C_ST.squeeze(0)).unsqueeze(0) ###starting time discount
        cpt = cx - C_ST + C_ST_dis
        gates = F.linear(input, w_ih, b_ih) + F.linear(hx, w_hh, b_hh)
        ingate, forgetgate, cellgate, outgate = gates.chunk(4, 2)
        ingate = F.sigmoid(ingate)
        forgetgate = F.sigmoid(forgetgate)
        cellgate = F.tanh(cellgate)
        outgate = F.sigmoid(outgate)
        ct = (forgetgate * cpt) + (ingate * cellgate)         ## Current Memory cell with time
        ht = outgate * F.tanh(ct)

        return ht, ct
    
    def map_elapse_time(self, t):

        c1 = torch.Tensor([1.0])
        c2 = torch.Tensor([2.7183])
        #print('t',t)       #print ('t abs',torch.abs(t*100))
        Ones = torch.ones([1,self.hidden_size])   
        if use_cuda:
            c1=c1.cuda()
            c2=c2.cuda()
            Ones=Ones.cuda()
        T = torch.div(c1, torch.log(t + c2))#, name='Log_elapse_time')
        T = torch.matmul(T.view(-1,1), Ones)
        #T[T.ne(T)] = 0.0000001 ##remove nans

        return T

In [5]:
class EHR_TLSTM(nn.Module):
    def __init__(self, input_size,embed_dim, hidden_size, n_layers=1,dropout_r=0.1,cell_type='TLSTM'):#,bi=False , preTrainEmb=''):
        super(EHR_TLSTM,self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.embed_dim = embed_dim
        self.dropout_r = dropout_r
        self.cell_type = cell_type
        self.preTrainEmb=preTrainEmb=''
        bi=False
        if bi: self.bi=2 
        else: self.bi=1
              
        if len(self.preTrainEmb)>0:
            emb_t= torch.FloatTensor(np.asmatrix(self.preTrainEmb))
            self.embed= nn.Embedding.from_pretrained(emb_t)#,freeze=False)  
        else:
            self.embed= nn.Embedding(input_size, self.embed_dim,padding_idx=0)
        
        if self.cell_type == "GRU":
            cell = nn.GRU
        elif self.cell_type == "RNN":
            cell = nn.RNN
        elif self.cell_type == "LSTM":
            cell = nn.LSTM
        elif self.cell_type == "BNLSTM":
            cell = bnlstm.LSTM    
        elif self.cell_type == "TLSTM":
            cell = TPLSTM 
        else:
            raise NotImplementedError
      
        if self.cell_type == "BNLSTM":
            self.rnn_c = cell(bnlstm.BNLSTMCell, self.embed_dim, hidden_size,num_layers=n_layers,use_bias=False, dropout= dropout_r,max_length=30)
        elif self.cell_type == "TLSTM":
            self.bi=1 
            #self.rnn_c = cell(self.embed_dim, 1, hidden_size, hidden_size/2)
            self.rnn_c = cell(self.embed_dim, hidden_size)

        else:
            self.rnn_c = cell(self.embed_dim, hidden_size,num_layers=n_layers, dropout= dropout_r , bidirectional=bi  )
        
        self.out = nn.Linear(self.hidden_size*self.bi,1)
        self.sigmoid = nn.Sigmoid()

        
    def EmbedPatient_MB(self, input): # x is a ehr_seq_tensor
        
        mb=[]
        mtd=[]
        lbt=[]
        seq_l=[]
        self.bsize=len(input)
        lp= len(max(input, key=lambda xmb: len(xmb[-1]))[-1])
        #print (max(input, key=lambda xmb: len(xmb[-1])),lp) #verified
        llv=0
        for x in input:
            lv= len(max(x[-1], key=lambda xmb: len(xmb[1]))[1])
            #print(max(x[-1], key=lambda xmb: len(xmb[1:])),lv) #verified  
            if llv < lv:
                llv=lv             
        #print (llv)
        for pt in input:
            sk,label,ehr_seq_l = pt
            lpx=len(ehr_seq_l)
            seq_l.append(lpx)
            label_tensor = Variable(torch.FloatTensor([[float(label)]]))
            if use_cuda:
                label_tensor = label_tensor.cuda()
            lbt.append(label_tensor)
            if use_cuda:
                    flt_typ=torch.cuda.FloatTensor
                    lnt_typ=torch.cuda.LongTensor
            else: 
                lnt_typ=torch.LongTensor
                flt_typ=torch.FloatTensor
            ml=(len(max(ehr_seq_l, key=len)))
            ehr_seq_tl=[]
            time_dim=[]
            for ehr_seq in ehr_seq_l:
                #print (ehr_seq,ehr_seq[1])#verified
                #print(n_ehr_seq)
                pd=(0, (llv -len(ehr_seq[1])))
                time_dim.append(Variable(torch.from_numpy(np.asarray(ehr_seq[0],dtype=int)).type(flt_typ)))
                result = F.pad(torch.from_numpy(np.asarray(ehr_seq[1],dtype=int)).type(lnt_typ),pd,"constant", 0)
                ehr_seq_tl.append(result)
            ehr_seq_t= Variable(torch.stack(ehr_seq_tl,0)) 
            time_dim_v= Variable(torch.stack(time_dim,0))
            lpp= lp-lpx
            zp= nn.ZeroPad2d((0,0,0,lpp))
            ehr_seq_t= zp(ehr_seq_t)
            time_dim_pv= zp(time_dim_v)
            mb.append(ehr_seq_t)
            mtd.append(time_dim_pv)
            #print('ehr_seq_t',ehr_seq_t) #verified
            
        mb_t= Variable(torch.stack(mb,0)) 
        mtd_t= Variable(torch.stack(mtd,0))
        if use_cuda:
            mb_t.cuda()
            mtd_t.cuda()
        embedded = self.embed(mb_t)
        #print(mb_t,embedded) #verified
        embedded = torch.sum(embedded, dim=2) 
        lbt_t= Variable(torch.stack(lbt,0))
        #dem_t= Variable(torch.stack(demt,0))
        #if use_cuda: dem_t.cuda()
        #dem_emb=self.embed(dem_t)
        #dem_emb = torch.sum(dem_emb, dim=1) 
        #print ('embedded',embedded.shape,embedded,'time_dim_pv',mtd_t.shape,mtd_t)
        out_emb= torch.cat((embedded,mtd_t),dim=2)
        #print ('out_emb with time',out_emb.shape,out_emb)
        return out_emb, lbt_t,seq_l #,dem_emb
    
    def init_hidden(self):
        
        h_0 = Variable(torch.rand(self.n_layers*self.bi,self.bsize, self.hidden_size))
        if use_cuda:
            h_0= h_0.cuda()
        if self.cell_type == "LSTM"or self.cell_type == "TLSTM":
            result = (h_0,h_0)
        else: 
            result = h_0

        return result
    
    def forward(self, input):
        
        x_in , lt ,x_lens = self.EmbedPatient_MB(input)
        x_in = x_in.permute(1,0,2) ## QRNN not support batch first
        #x_inp = nn.utils.rnn.pack_padded_sequence(x_in,x_lens,batch_first=True)
        h_0 = self.init_hidden()
        output, hidden,_ = self.rnn_c(x_in,h_0) 
        if self.cell_type == "LSTM" or self.cell_type == "TLSTM":
            hidden=hidden[0]
        if self.bi==2:
            output = self.sigmoid(self.out(torch.cat((hidden[-2],hidden[-1]),1)))
        #elif self.cell_type == "TLSTM":
            #output = hidden
        else:
            output = self.sigmoid(self.out(hidden[-1]))
        return output.squeeze(), lt.squeeze()

In [6]:
model = EHR_TLSTM(input_size=16000, hidden_size=64 ,embed_dim=128, dropout_r=0, cell_type='TLSTM', n_layers=1)
if use_cuda:
    model = model.cuda()


In [7]:
def train (tmodel,mini_batch, criterion, optimizer):  
    
    tmodel.train()
    tmodel.zero_grad()
    output , label_tensor = tmodel(mini_batch)
    loss = criterion(output, label_tensor)
    loss.backward()
    optimizer.step()
   
    return output, loss.item()

In [8]:
# training all samples in random order
import time
import math

def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [9]:
def run_model_train(tmodel,dataset,batch_size,learning_rate = 0.01, l2=1e-04,epsl=1e-06 ):
    
    #optimizer = optim.SGD(tmodel.parameters(), lr=learning_rate)#, weight_decay=l2)
    #optimizer = optim.Adadelta(tmodel.parameters(), lr=learning_rate, weight_decay=l2)
    #optimizer = optim.ASGD(tmodel.parameters(), lr=learning_rate, weight_decay=l2 )
    #optimizer = optim.SparseAdam (tmodel.parameters(),lr=learning_rate) #'''lr=learning_rate,''' 
    optimizer = optim.Adagrad (tmodel.parameters(),lr=learning_rate, weight_decay=l2) #'''lr=learning_rate,''' 
    #optimizer = optim.Adamax(tmodel.parameters(), lr=learning_rate, weight_decay=l2 ,eps=epsl)
    #optimizer = optim.Adamax(filter(lambda p: p.requires_grad, tmodel.parameters()), lr=learning_rate, weight_decay=l2 ,eps=epsl) ### Beta defaults (0.9, 0.999)
    #optimizer = optim.RMSprop (tmodel.parameters(),lr=learning_rate, weight_decay=l2 ,eps=epsl)
    #optimizer = optim.Adam(tmodel.parameters(), lr=learning_rate, weight_decay=learning_rate)
    dataset.sort(key=lambda pt:len(pt[-1]),reverse=True) 
    # Keep track of losses for plotting
    current_loss = 0
    all_losses = []
    print_every = 10#int(batch_size/2)
    plot_every = 5
    iter=0
    n_batches = int(np.ceil(int(len(dataset)) / int(batch_size)))
    start = time.time()

    for index in random.sample(range(n_batches), n_batches):
            batch = dataset[index*batch_size:(index+1)*batch_size]
            output, loss = train(tmodel,batch, criterion = nn.BCELoss(), optimizer = optimizer)
            current_loss += loss
            iter +=1
            # Add current loss avg to list of losses
            if iter % plot_every == 0:
                all_losses.append(current_loss / plot_every)
                current_loss = 0
                
    return current_loss,all_losses


In [10]:
def calculate_auc(test_model, dataset, batch_size=200):
    test_model.eval()
    dataset.sort(key=lambda pt:len(pt[-1]),reverse=True) 
    n_batches = int(np.ceil(int(len(dataset)) / int(batch_size)))
    labelVec =[]
    y_hat= []
    
    for index in range(n_batches):
            batch = dataset[index*batch_size:(index+1)*batch_size]
            output, label_t = test_model(batch)
            y_hat.extend(output.cpu().data.view(-1).numpy())
            labelVec.extend(label_t.cpu().data.view(-1).numpy())
    auc = roc_auc_score(labelVec, y_hat)
    
    return auc

In [None]:
epochs=100
batch_size=100
current_loss_l=[]
all_losses_l=[]
train_auc_allep =[]
valid_auc_allep =[]
test_auc_allep=[]
bestValidAuc = 0.0
bestTestAuc = 0.0
bestValidEpoch = 0
                   


### Run Epochs    
for ep in range(epochs):
    
    #print (model.embed.weight.data[135] )
    start = time.time()
    current_loss_la,all_losses_la = run_model_train(model,train_sl,batch_size)
    train_time = timeSince(start)
    eval_start = time.time()
    train_auc = calculate_auc(model,train_sl,batch_size)
    test_auc = calculate_auc(model,test_sl,batch_size)
    valid_auc = calculate_auc(model,valid_sl,batch_size)
    eval_time = timeSince(eval_start)
    all_losses_l.append (all_losses_la)
    avg_loss = np.mean(all_losses_la)
    train_auc_allep.append(train_auc)
    valid_auc_allep.append(valid_auc)
    test_auc_allep.append(test_auc)
    current_loss_l.append(current_loss_la)
    print ("Epoch ", ep," Train_auc :", train_auc, " , Valid_auc : ", valid_auc, " ,& Test_auc : " , test_auc," Avg Loss: ", avg_loss, 'Train Time (%s) Eval Time (%s)'%(train_time,eval_time) )
     
    if valid_auc > bestValidAuc: 
        bestValidAuc = valid_auc
        bestValidEpoch = ep
        bestTestAuc = test_auc
        best_model = model
        #torch.save(best_model, bmodel_pth)
        #torch.save(best_model.state_dict(), bmodel_st)
    if ep - bestValidEpoch >12: break
            
print ('bestValidAuc %f has a TestAuc of %f at epoch %d ' % (bestValidAuc, bestTestAuc, bestValidEpoch))



Epoch  0  Train_auc : 0.7888585630703242  , Valid_auc :  0.7553419050725477  ,& Test_auc :  0.7700903216528217  Avg Loss:  0.3315429733971418 Train Time (1m 40s) Eval Time (2m 18s)
Epoch  1  Train_auc : 0.8177999458896359  , Valid_auc :  0.7879396786649973  ,& Test_auc :  0.7867326742326743  Avg Loss:  0.31018149989136196 Train Time (1m 39s) Eval Time (2m 17s)
Epoch  2  Train_auc : 0.8328543859317925  , Valid_auc :  0.7942590566176703  ,& Test_auc :  0.8036399833274833  Avg Loss:  0.29449717447919366 Train Time (1m 40s) Eval Time (2m 17s)
Epoch  3  Train_auc : 0.8468364619705285  , Valid_auc :  0.8024855062551951  ,& Test_auc :  0.8135030931905933  Avg Loss:  0.28779094989522036 Train Time (1m 39s) Eval Time (2m 18s)
Epoch  4  Train_auc : 0.8536931596892955  , Valid_auc :  0.811376142927789  ,& Test_auc :  0.8199531512031513  Avg Loss:  0.28420023837332 Train Time (1m 41s) Eval Time (2m 18s)
Epoch  5  Train_auc : 0.8571772880631728  , Valid_auc :  0.8138340295241002  ,& Test_auc :  0.8