## Data Preprocess

In [11]:
import os
import sys
import pickle
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

# set seed
seed = 24
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

# set device
device = torch.device('cpu')
# set task
task = 'h' 
# set batch size
batch_size = 32

In [4]:
# run preprocessor
from run_preprocess import preprocess
preprocess()

patient num: 7493
max admission num: 42
mean admission num: 2.66
max code num in an admission: 39
mean code num in an admission: 13.06
encoding code ...
There are 4880 codes
generating code levels ...
	100%00%
There are 5712 train, 781 valid, 1000 test samples
generating code code adjacent matrix ...
	5712 / 5712
building train codes features and labels ...
	5712 / 5712
building valid codes features and labels ...
	781 / 781
building test codes features and labels ...
	1000 / 1000
generating train neighbors ...
	5712 / 5712
generating valid neighbors ...
	781 / 781
generating test neighbors ...
	1000 / 1000
generating train middles ...
	5712 / 5712
generating valid middles ...
	781 / 781
generating test middles ...
	1000 / 1000
building train heart failure labels ...
building valid heart failure labels ...
building test heart failure labels ...
saving encoded data ...
saving standard data ...
	saving training data
	saving valid data
	saving test data


## Load Data

In [15]:

# Todo: Add data analysis here to explain each sub datasets eg: code_x, visit_lens, divided, y, neighbors, code_adj. Also can give an example by printing some data
# Todo: Add some initial data analysis. 1) number/ratio of heart failure patients. 2) some statistic for neighbors
# Possible Todo: build our own data builder using torch.utils.data.DataLoader as HWs

from preprocess import load_sparse
from utils import load_adj, EHRDataset
DATA_PATH = "data/mimic3/"

code_adj = load_adj(os.path.join(DATA_PATH, 'standard'), device=device)
train_data = EHRDataset(os.path.join(DATA_PATH, 'standard/train'), label=task, batch_size=batch_size, shuffle=True, device=device)
valid_data = EHRDataset(os.path.join(DATA_PATH, 'standard/valid'), label=task, batch_size=batch_size, shuffle=False, device=device)
test_data = EHRDataset(os.path.join(DATA_PATH, 'standard/test'), label=task, batch_size=batch_size, shuffle=False, device=device)


## Model

In [17]:
class GraphLayer(nn.Module):
    def __init__(self, adj, code_num, code_size, graph_size):
        super().__init__()
        self.embedding =  nn.Embedding(code_num, code_size)
        self.adj = adj 
        # Fully connected layer
        self.fc = nn.Linear(code_size, graph_size)
        self.LeakyReLU = nn.LeakyReLU()

    def forward(self, code_x, neighbor):
        # embedding matrices for for diseases appearing in current diagnoses
        M_embedding_matrices = self.embedding(code_x)
        # embedding matrices for for diseases appearing in direct neighbors
        N_embedding_matrices = self.embedding(neighbor)
        # static adjacency matrix
        # keep these unsqueeze for now, may need change if we change the data loader
        center_codes = torch.unsqueeze(code_x, dim=-1)
        neighbor_codes = torch.unsqueeze(neighbor, dim=-1)

        center_embeddings = center_codes * M_embedding_matrices
        neighbor_embeddings = neighbor_codes * N_embedding_matrices

        adj_mul_center = torch.matmul(self.adj, center_embeddings)
        adj_mul_neighbor = torch.matmul(self.adj, neighbor_embeddings)

        # All the calculation here are using the memory-efficient calculation as proved by the author in Subgraphs' Adjacency Matrix Calculation
        # aggregated diagnosis local context and diagnosis global context
        aggregated_diagnosis_embedding = center_embeddings + center_codes * adj_mul_center + center_codes * adj_mul_neighbor
        # aggregated neighbor global context
        aggregated_neighbor_embedding = neighbor_embeddings + neighbor_codes * adj_mul_neighbor + neighbor_codes * adj_mul_center

        # hidden embeddings of diagnoses and neighbors
        hidden_diagnosis_embedding = self.LeakyReLU(self.fc(aggregated_diagnosis_embedding))
        hidden_neighbor_embedding = self.LeakyReLU(self.fc(aggregated_neighbor_embedding))
        return hidden_diagnosis_embedding, hidden_neighbor_embedding


In [None]:
class EmbeddingWithAttentionLayer(nn.Module):
    def __init__(self, value_size, attention_size):
        super().__init__()
        self.attention_size = attention_size
        # define context vector
        self.context = nn.Parameter(data=nn.init.xavier_uniform_(torch.empty(attention_size, 1)))
        self.linear = nn.Linear(value_size, attention_size)

    def forward(self, x):
        # max pooling
        t = self.linear(x)
        # calculate attention score
        score = torch.softmax(torch.matmul(t, self.context).squeeze(), dim=-1)
        # final hidden embedding
        output = torch.matmul(self.context, torch.transpose(t))
        return output

In [None]:
class Classifier(nn.Module):
    def __init__(self, input_size, output_size):
        super().__init__()
        self.linear = nn.Linear(input_size, output_size)
        self.activation = torch.nn.Sigmoid()
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, x):
        output = self.activation(self.dropout(self.linear(x)))
        return output

In [None]:
# still need further editing to integrate
class Model(nn.Module):
    def __init__(self, code_num, code_size,
                 adj, graph_size, hidden_size, t_attention_size, t_output_size,
                 output_size, dropout_rate, activation):
        super().__init__()
        self.graph_layer = GraphLayer(adj, code_size, graph_size)
        self.transition_layer = TransitionLayer(code_num, graph_size, hidden_size, t_attention_size, t_output_size)
        self.attention = EmbeddingWithAttentionLayer(hidden_size, 32)
        self.classifier = Classifier(hidden_size, output_size, dropout_rate, activation)

    def forward(self, code_x, divided, neighbors, lens):
        output = []
        for code_x_i, divided_i, neighbor_i, len_i in zip(code_x, divided, neighbors, lens):
            no_embeddings_i_prev = None
            output_i = []
            h_t = None
            for t, (c_it, d_it, n_it, len_it) in enumerate(zip(code_x_i, divided_i, neighbor_i, range(len_i))):
                co_embeddings, no_embeddings = self.graph_layer(c_it, n_it, c_embeddings, n_embeddings)
                output_it, h_t = self.transition_layer(t, co_embeddings, d_it, no_embeddings_i_prev, u_embeddings, h_t)
                no_embeddings_i_prev = no_embeddings
                output_i.append(output_it)
            output_i = self.attention(torch.vstack(output_i))
            output.append(output_i)
        output = torch.vstack(output)
        output = self.classifier(output)
        return output