In [990]:
import os
import itertools
import numpy as np
import nltk
import math
import pickle

import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm

In [1042]:
class Config:
    dataset = "resource"  # change this to the right data name
    data_path = "./"
    n_topics = 50
    learning_rate = 0.00002
    vocab_size = 619
    n_stops = 22 
    lda_vocab_size = vocab_size - n_stops
    n_hidden = 200
    n_layers = 2
    input_dim = 619
    gru_embedding_dim = 100
    projector_embed_dim = 100
    generator_embed_dim = 200
    max_len = 300

In [1043]:
import pickle as cPickle
import numpy as np

def save_pkl(path, obj):
    with open(path, 'w') as f:
        cPickle.dump(obj, f)
    print(" [*] save %s" % path)

def load_pkl(path):
    with open(path, 'rb') as f:
        obj = cPickle.load(f)
    print(" [*] load %s" % path)
    return obj

def save_npy(path, obj):
    np.save(path, obj)
    print(" [*] save %s" % path)

def load_npy(path):
    obj = np.load(path)
    print(" [*] load %s" % path)
    return obj

In [1044]:
DATA_PATH = './'

In [1045]:
vocab = pickle.load(open(os.path.join(DATA_PATH,'vocab.pkl'), 'rb'))
x_train = pickle.load(open(os.path.join(DATA_PATH,'X_train.pkl'), 'rb'))
y_train = pickle.load(open(os.path.join(DATA_PATH,'Y_train.pkl'), 'rb'))
x_val = pickle.load(open(os.path.join(DATA_PATH,'X_valid.pkl'), 'rb'))
y_val = pickle.load(open(os.path.join(DATA_PATH,'Y_valid.pkl'), 'rb'))

global_config = Config()

assert len(x_train) == len(y_train)
print(len(vocab))

490


In [1046]:
from torch.utils.data import Dataset


class CustomDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def __len__(self):
        
        """
        TODO: Return the number of samples (i.e. patients).
        """
        
        # your code here
        #raise NotImplementedError
        return len(self.x)
    
    def __getitem__(self, index):
        
        """
        TODO: Generates one sample of data.
        
        Note that you DO NOT need to covert them to tensor as we will do this later.
        """
        
        # your code here
        #raise NotImplementedError
        return (self.x[index],self.y[index])
        
    def getVisits(self,index):
        visits =  []
        labels = []
        for i,v in enumerate(self.x[index]):
            visits.append(v)
            labels.append(self.y[index][i])
        return visits,labels

In [1047]:
train_dataset = CustomDataset(x_train, y_train)
val_dataset = CustomDataset(x_val, y_val)


## Collate Function.
This function convets the data to tensor format. Dataloader calls the collate finction after fecthing a sample set from the customdataset to collate the list of samples into batches


In [997]:
def collate_fn(data): 
    
    vocabsize= 619#global_config.vocab_size
    maxlen = 300#global_config.max_len
    seqs, labels = zip(*data)
    lengths = [len(s) for s in seqs]

    eventSeq = []

    for seq in seqs:
        t = []
        for visit in seq:
            t.extend(visit)
        eventSeq.append(t)
    eventLengths = [len(s) for s in eventSeq]


    if maxlen is not None:
        new_seqs = []
        new_lengths = []
        new_labels = []
        for l, s, la in zip(lengths, seqs, labels):
            if l < maxlen:
                new_seqs.append(s)
                new_lengths.append(l)
                new_labels.append(la)
            else:
                new_seqs.append(s[:maxlen])
                new_lengths.append(maxlen)
                new_labels.append(la[:maxlen])
        lengths = new_lengths
        seqs = new_seqs
        labels = new_labels

        if len(lengths) < 1:
            return None, None, None

    n_samples = len(seqs)
    #maxlen = np.max(lengths)

    x = torch.zeros((n_samples, maxlen, vocabsize),dtype=torch.float)
    x_mask = torch.zeros((n_samples, maxlen,vocabsize),dtype=torch.bool)
    y = torch.zeros((n_samples, maxlen),dtype=torch.float)
    for idx, s in enumerate(seqs):
        x_mask[idx, :lengths[idx]] = 1
        for j, sj in enumerate(s):
            for tsj in sj:
                x[idx, j, tsj-1] = 1
    for idx, t in enumerate(labels):
        y[idx,:lengths[idx]] = torch.tensor(t, dtype=torch.float)
        #if lengths[idx] < maxlen:
          #  y[idx,lengths[idx]:] = t[-1]

    return x, x_mask, y#, lengths, eventLengths

In [998]:
from torch.utils.data import DataLoader

loader = DataLoader(train_dataset, batch_size=1, collate_fn=collate_fn, drop_last=True)
loader_iter = iter(loader)
x, masks, y = next(loader_iter)


print(x.shape)
print(x[0][3])
print(masks.shape)
print(y.shape)

torch.Size([1, 300, 619])
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.

In [None]:
from torch.utils.data import DataLoader

def load_data(train_dataset, val_dataset, collate_fn):
    
    '''
    TODO: Implement this function to return the data loader for  train and validation dataset. 
    Set batchsize to 32. Set `shuffle=True` only for train dataloader.
    
    Arguments:
        train dataset: train dataset of type `CustomDataset`
        val dataset: validation dataset of type `CustomDataset`
        collate_fn: collate function
        
    Outputs:
        train_loader, val_loader: train and validation dataloaders
    
    Note that you need to pass the collate function to the data loader `collate_fn()`.
    '''
    
    batch_size = 1
    # your code here
    #raise NotImplementedError
    train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size,shuffle=True,collate_fn=collate_fn)
    val_loader = DataLoader(dataset = val_dataset,batch_size = batch_size,shuffle=False,collate_fn=collate_fn, drop_last = True )
   
    
   
    return train_loader, val_loader

In [None]:
train_loader, val_loader = load_data(train_dataset,val_dataset,collate_fn )
val_loader_iter = iter(val_loader)
x, masks, y = next(val_loader_iter)

In [None]:
print(x.shape)
print(masks.shape)
print(y.shape)

In [None]:
def sum_embeddings_with_mask(x, masks):
    """
    Mask select the embeddings for true visits (not padding visits) and then sum the embeddings for each visit up.

    Arguments:
        x: the embeddings of diagnosis sequence of shape (batch_size, # visits, # diagnosis codes, embedding_dim)
        masks: the padding masks of shape (batch_size, # visits, # diagnosis codes)

    Outputs:
        sum_embeddings: the sum of embeddings of shape (batch_size, # visits, embedding_dim)
    """
    #print("sum_embeddings_with_mask x.shape",x.shape)
    #print("sum_embeddings_with_mask masks.shape",masks.shape)
    #print("unsqueezed mask looks like", masks.unsqueeze(-1).shape)
    x = x * masks.unsqueeze(-1)
    #print("x.shape after masking", x.shape)
    x = torch.sum(x, dim = -2)
    #print("x after torch.sum", x.shape)
    return x

## GRU MODEL

# Building  GRU to get local context
GRU takes input is visits and gives out hidden state hn, which we need as an input to our logistic regression


Input :  visit dimensions (300 *619)
output : hx , dimensions ( 300 * 100)

In [1037]:
class GRU(nn.Module):
    def __init__(self, num_codes, embedding_dim, num_layers, output_size):
        super(GRU, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = embedding_dim
        self.embedding = nn.Embedding(num_codes, embedding_dim)
        self.sig = nn.Linear(100,y.shape[1])

        self.gru  = nn.GRU(embedding_dim,embedding_dim, batch_first=True)
        
    def forward(self, x, masks):
        h0 =  torch.zeros(self.num_layers, x.size(0), self.hidden_size, dtype = torch.float)
        #print(x.shape)
        x = x.type(torch.LongTensor)
        x =  self.embedding(x)
        x = sum_embeddings_with_mask(x,masks)
        ht,_ = self.gru(x,h0)
        #Out : #catch_size, #seq_len, #hidden_size : N, num_of_visits, embed_dim
        
        #needed  out : N,embed_dim
        ht = ht[:,-1,:]
        #print(ht.shape)
        return ht
    

In [1038]:
gru_model = GRU(global_config.input_dim, 100,1,200)
out = gru_model(x,masks )
print(out)
gru_optimizer = torch.optim.Adam(gru_model.parameters(), lr=0.00002)
gru_lossFn = nn.BCELoss()


tensor([[-0.0667,  0.1270, -0.0573,  ..., -0.0619,  0.0554,  0.0395],
        [-0.0667,  0.1270, -0.0573,  ..., -0.0619,  0.0554,  0.0395],
        [-0.0667,  0.1270, -0.0573,  ..., -0.0619,  0.0554,  0.0395],
        ...,
        [-0.0667,  0.1270, -0.0573,  ..., -0.0619,  0.0554,  0.0395],
        [-0.0667,  0.1270, -0.0573,  ..., -0.0619,  0.0554,  0.0395],
        [-0.0667,  0.1270, -0.0573,  ..., -0.0619,  0.0554,  0.0395]],
       grad_fn=<SliceBackward0>)


# Loss calculation for GRU Model


In [1039]:
gru_loss_fn = nn.BCELoss()

In [1040]:
def get_GRU_loss(x_hat,  y ):
    y_hat = torch.sigmoid(x_hat)
    loss = gru_loss_fn(y_hat,y)
    return loss
    

# Train GRU Model

In [1041]:
def gru_fit(model, train_loader, val_loader,n_epochs):
    for epoch in range(n_epochs):
        model.train()
        train_loss = 0.0
        for x, masks, y in train_loader:
            gru_optimizer.zero_grad()
            hx = model(x,masks)
            
            mask =  np.where(torch.sum(y,dim=1)>0 )
            y[mask]=1
            #print("Y shape", y.shape)
            loss = get_GRU_loss(hx,y)
            train_loss += loss.item()
            loss.backward()
            optimizer.step()
            
        train_loss = train_loss / len(train_loader)  
        print('Epoch: {} \t Training Loss: {:.6f}'.format(epoch+1, train_loss))

           # print("mask looks like", mask)
           # print("y looks like: \n",y.shape,"\n ", y)
gru_fit(gru_model,train_loader,val_loader,5)           

KeyboardInterrupt: 

# Recognition Network Model

This network takes the patient data as  input. 
Patient data x has dimensions Batchsize(32) * 300 (Visit size ) * 619 (embeding dim)




In [None]:

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader  

In [None]:

class REC_NW(nn.Module):
    def __init__(self, input_dim, z_dim = 50, h_dim=100):
        super().__init__()
        # encoder
        self.L1 = nn.Linear(in_features = input_dim, out_features = h_dim, bias = True)
        self.L2 = nn.Linear(h_dim, h_dim)
        
        

        # one for mu and one for stds, note how we only output
        # diagonal values of covariance matrix. Here we assume
        # the pixels are conditionally independent 
        self.hid_2mu = nn.Linear(h_dim, z_dim)
        self.hid_2sigma = nn.Linear(h_dim, z_dim)#(log_var)

        # decoder
        self.z_2hid = nn.Linear(z_dim, h_dim)
        self.hid_2img = nn.Linear(h_dim, input_dim)
        self.log_scale = nn.Parameter(torch.Tensor([0.0]))
        return

    def encode(self, x):
        x = x.type(torch.FloatTensor)
        x = x.squeeze()
    
        h = F.relu(self.L1(x))
        h = F.relu(self.L2(h))
        return h

    def decode(self, z):
        h = F.relu(self.z_2hid(z))
        x_hat = torch.sigmoid(self.hid_2img(h))
        return x_hat
               
                   
    def reparametarize(self,mu, log_var):
        std = torch.exp(0.5*log_var)
        eps = torch.randn_like(std)
        sample = mu + (eps * std)
        return sample
    
    def forward(self, x):
        x = x.type(torch.FloatTensor)
        z = self.encode(x)
        mu = self.hid_2mu(z)
        log_var = self.hid_2sigma(z)
        #print("z in latent space:", z.shape)
        z = self.reparametarize(mu,log_var)
     
        #print("Rec_net Fwd: z looks like", z.shape)
        reconstruct = self.decode(z)
        
   
        return reconstruct,mu,log_var,z

# Loss Calculation for REC_NW

In [None]:
loss_fn = nn.BCELoss()

In [None]:
def get_REC_NW_loss(z,x,mu,log_var):
    bce_loss = loss_fn(z, x)
    loss = bce_loss + KLDivergence( mu, log_var)
    return loss

In [None]:
def KLDivergence( mu, logvar):
        KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
        return KLD

In [None]:
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
INPUT_DIM = 619
Z_DIM = 50
H_DIM = 100
NUM_EPOCHS = 10
BATCH_SIZE = 32
LR_RATE = 3e-4
OUT_DIM = 1

# Training REC_NW model

In [None]:
model = REC_NW(INPUT_DIM, Z_DIM,H_DIM)
optimizer = torch.optim.Adam(model.parameters(), lr=0.00002)
loss_fn = nn.BCELoss()

In [1008]:
def REC_NW_fit(model, dataloader):
    model.train()
    total_loss = 0.0
    running_loss =0.0
    for x, masks, y in dataloader:
        
        #x = x.view(-1, INPUT_DIM)
        x = torch.nn.functional.normalize(x, dim=2)
        #for patient in x:
        optimizer.zero_grad()
            #patient =torch.sum(patient,dim =0)
            #print(patient.shape)
        x_hat, mu, logvar ,z= model(x)
            
        bce_loss = loss_fn(x_hat, x)
        loss = bce_loss + KLDivergence( mu, logvar)
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
        
    train_loss = total_loss/len(train_loader)
    print(f"Train Loss: {train_loss:.4f}")
    return train_loss
        

In [1009]:
train_loss = []
val_loss = []
epochs =50
for epoch in range(epochs):
    print(f"Epoch {epoch+1} of {epochs}")
    train_epoch_loss = REC_NW_fit(model, train_loader)
    #val_epoch_loss = validate(model, val_loader)
    train_loss.append(train_epoch_loss)
    #val_loss.append(val_epoch_loss)
    print(f"Train Loss: {train_epoch_loss:.4f}")
    #print(f"Val Loss: {val_epoch_loss:.4f}")

Epoch 1 of 50
Train Loss: 1167.4131
Train Loss: 1167.4131
Epoch 2 of 50
Train Loss: 951.8227
Train Loss: 951.8227
Epoch 3 of 50


KeyboardInterrupt: 

# Ploting latent space


Ablation 1: Recognition Network is a VAE which is projecting the patient data in latent space, using VAE in the architecture makes the data analysis more interpretable

In [None]:
import torch.nn as nn
import torch.nn.functional as F
import torch.utils
import torch.distributions
import torchvision
import numpy as np
import matplotlib.pyplot as plt; plt.rcParams['figure.dpi'] = 200


def plot_latent(model, data, num_batches=30):
    for i, (x,masks, y) in enumerate(data):
        z = model.encode(x)
        z = z.numpy()
        plt.scatter(z[:, 0], z[:, 1], c=y, cmap='tab10')
        if i > num_batches:
            plt.colorbar()
            break

# Logistic Regression layer

ref: https://towardsdatascience.com/logistic-regression-with-pytorch-3c8bbea594be

In [981]:
class LogisticRegression(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LogisticRegression, self).__init__()
        self.linear = torch.nn.Linear(input_dim,output_dim)
    def forward(self,hx):
        
        out = torch.sigmoid(self.linear(hx))
        out = out.squeeze().reshape(out.shape[1],out.shape[0])
        #print(out.shape, out)
        return out

In [982]:
regression_model = LogisticRegression(100,1)
criterion = torch.nn.BCELoss()

# Calculate Logistic Regression loss


In [984]:
def get_logistic_regressionLoss(y_hat, y):
    #print("y_hat.shape", y_hat.shape)
    #print("y.shape", y.shape)
    loss = criterion(y_hat,y)
    return loss


# CONTENT MODEL
This model brings the GRU, REC_Network and Logistic Regression layers together and apply training on the assembly of networks.


In [985]:
class CONTENT(torch.nn.Module):
    def __init__(self, input_dim, out_dim):
        super(CONTENT, self).__init__()
        self.gru = GRU(global_config.input_dim, 100,1,200)
        self.rec_nw = REC_NW(global_config.input_dim, 50,100)
        self.regression = LogisticRegression(50,1)
        
        
        self.Lb = nn.Linear(in_features =input_dim, out_features =  50)
        self.simpleLinear = nn.Linear(100,50)


        
        return
    def get_Losses(self,x,y):
        rec_nw_loss = get_REC_NW_loss(self.z,x,self.mu,self.log_var)
        gru_loss = get_GRU_loss(self.hx,x)
        regressionloss = get_regression_loss(self.h,x,y)
        
    def forward(self, x,masks,y):
        self.hx = self.gru(x,masks)
        hx_for_loss = self.hx
        self.x_reconstruct,self.mu,self.log_var,z = self.rec_nw(x)
        
        
        lb_out = self.Lb(x)
      
        long_context = torch.mul(z,lb_out)
       
        #print("h shape", self.hx.shape)
        self.hx = self.simpleLinear(self.hx)
        long_context = long_context.view(long_context.shape[1],long_context.shape[0],long_context.shape[2])
        #print("long_context.shape",long_context.shape)
        self.h = torch.add(self.hx,long_context)
        #print("self.h.shape", self.h.shape)
        self.y_hat = self.regression(self.h)
        return self.y_hat,self.mu,self.log_var,self.x_reconstruct, hx_for_loss
                        
        

In [986]:
content_model = CONTENT(INPUT_DIM, OUT_DIM)

# Calculate content model loss


In [987]:
def get_total_loss(x,x_hat,y_hat, mu, log_var,y,hx):
    rec_nw_loss = get_REC_NW_loss(x_hat,x,mu,log_var)
    lr_loss = get_logistic_regressionLoss(y_hat,y)
    gru_loss = get_GRU_loss(hx,y)
    return rec_nw_loss+lr_loss+gru_loss

In [988]:
def content_fit(model, dataloader):
    model.train()
    total_loss = 0.0
    running_content_loss =0.0
    for x, masks, y in dataloader:
        
        #x = x.view(-1, INPUT_DIM)
        #print(x.shape)
        x = torch.nn.functional.normalize(x, dim=1)
        
        optimizer.zero_grad()
        y_hat, mu, log_var,x_reconstruct,hx = model(x,masks,y)
        
        loss = get_total_loss(x,x_reconstruct,y_hat,mu, log_var,y,hx)
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
        
    train_loss = total_loss/len(train_loader)
    print('Epoch: {} \t Training Loss: {:.6f}'.format(epoch+1, train_loss))
    return train_loss

In [989]:
train_loss = []
val_loss = []
epochs =5
for epoch in range(epochs):
    print(f"Epoch {epoch+1} of {epochs}")
    train_epoch_loss = content_fit(content_model, train_loader)
    #val_epoch_loss = validate(model, val_loader)
    train_loss.append(train_epoch_loss)


Epoch 1 of 5


KeyboardInterrupt: 