In [1]:
import json

with open('GNA_Files/train.json','r') as f:
    train_data = json.loads(f.read())
    
with open('GNA_Files/test.json','r') as f:
    test_data = json.loads(f.read())

print(len(train_data))
print(len(test_data))

1017
2035


In [2]:
train_data[0].keys()

dict_keys(['title', 'abstract', 'journal', 'authors', 'pubdate', 'pmid', 'pmc', 'mesh_terms', 'publication_types', 'chemical_list', 'keywords', 'doi', 'references', 'delete', 'affiliations', 'medline_ta', 'nlm_unique_id', 'issn_linking', 'country', 'fullText', 'sentences'])

In [3]:
data = []
train_corpus_sentences = []
for x in train_data:
    train_corpus_sentences.extend(x['sentences'])
    data.extend([(x['pmid'],y) for y in x['sentences']])
    
test_corpus_sentences = []
for x in test_data:
    test_corpus_sentences.extend(x['sentences'])

In [4]:
print(data[0])

('34580243', ['continuous', 'cardiorespiratory', 'monitoring', 'is', 'a', 'dominant', 'source', 'of', 'predictive', 'signal', 'in', 'machine', 'learning', 'for', 'risk', 'stratification', 'and', 'clinical', 'decision', 'support'])


In [5]:
## https://www.analyticsvidhya.com/blog/2020/08/build-a-natural-language-generation-nlg-system-using-pytorch/#h2_1

In [6]:

import re
import pickle
import random

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F

In [7]:
# create sequences of length 5 tokens
def create_seq(sent, seq_len = 5):
    
    sequences = []

    # if the number of tokens in 'text' is greater than 5
    if len(sent) > seq_len:
        for i in range(seq_len, len(sent)):
            # select sequence of tokens
            seq = sent[i-seq_len:i+1]
            # add to the list
            sequences.append(" ".join(seq))

        return sequences

    # if the number of tokens in 'text' is less than or equal to 5
    else:
        return sent + ['<UNK>']*(len(sent)-seq_len)

In [8]:
train_seqs = [create_seq(i) for i in train_corpus_sentences]

# merge list-of-lists into a single list
train_seqs = sum(train_seqs, [])

# count of sequences
len(train_seqs)

203435

In [9]:
train_seqs[40:50]

['there should be caution in assuming',
 'should be caution in assuming that',
 'be caution in assuming that predictive',
 'caution in assuming that predictive models',
 'in assuming that predictive models could',
 'assuming that predictive models could improve',
 'that predictive models could improve clinical',
 'predictive models could improve clinical decision-making',
 'models could improve clinical decision-making beaulieu-joneset',
 'could improve clinical decision-making beaulieu-joneset al2021']

In [10]:
print(train_corpus_sentences[0:3])

[['continuous', 'cardiorespiratory', 'monitoring', 'is', 'a', 'dominant', 'source', 'of', 'predictive', 'signal', 'in', 'machine', 'learning', 'for', 'risk', 'stratification', 'and', 'clinical', 'decision', 'support'], ['beaulieu-jones', 'and', 'coworkers', 'propose', 'a', 'litmus', 'test', 'for', 'the', 'field', 'of', 'predictive', 'analytics-performance', 'improvements', 'must', 'be', 'demonstrated', 'to', 'be', 'the', 'result', 'of', 'non-clinician-initiated', 'data', 'otherwise', 'there', 'should', 'be', 'caution', 'in', 'assuming', 'that', 'predictive', 'models', 'could', 'improve', 'clinical', 'decision-making', 'beaulieu-joneset', 'al2021'], ['they', 'demonstrate', 'substantial', 'prognostic', 'information', 'in', 'unsorted', 'physician', 'orders', 'made', 'before', 'the', 'first', 'midnight', 'of', 'hospital', 'admission', 'and', 'we', 'are', 'persuaded', 'that', 'it', 'is', 'fair', 'to', 'ask-if', 'the', 'physician', 'thought', 'of', 'it', 'first', 'what', 'exactly', 'is', 'ma

In [11]:
len(max(train_corpus_sentences, key=len))

144

In [12]:
# create inputs and targets (x and y)
x = []
y = []

for s in train_seqs:
    x.append(" ".join(s.split()[:-1]))
    y.append(" ".join(s.split()[1:]))

In [13]:
# create integer-to-token mapping
int2token = {0:'<UNK>'}
cnt = 1

data = []
for sent in train_corpus_sentences+test_corpus_sentences:
    data.extend(sent)

for w in set(data):
    int2token[cnt] = w
    cnt+= 1

# create token-to-integer mapping
token2int = {t: i for i, t in int2token.items()}

print(token2int["the"], int2token[5177])

12071 0.805+/-0.056


In [14]:
# set vocabulary size
vocab_size = len(int2token)
vocab_size

26610

In [15]:
def get_integer_seq(seq):
    return [token2int[w] for w in seq.split()]

# convert text sequences to integer sequences
x_int = [get_integer_seq(i) for i in x]
y_int = [get_integer_seq(i) for i in y]

seq_len = 5
x_int = [i for i in x_int if len(i)==seq_len]
y_int = [i for i in y_int if len(i)==seq_len]

# convert lists to numpy arrays
x_int = np.array(x_int, dtype="int32")
y_int = np.array(y_int, dtype="int32")

In [16]:
print(x_int[0], y_int[0], sep='\n')

[14091 16438 21250 22784 18321]
[16438 21250 22784 18321 24688]


In [17]:
for x in y_int:
    if len(x)!=5:
        print(x)

In [18]:
y_int[0]

array([16438, 21250, 22784, 18321, 24688], dtype=int32)

In [19]:
x_int[0]

array([14091, 16438, 21250, 22784, 18321], dtype=int32)

In [20]:
def get_batches(arr_x, arr_y, batch_size):
         
    # iterate through the arrays
    prv = 0
    for n in range(batch_size, arr_x.shape[0], batch_size):
        x = arr_x[prv:n,:]
        y = arr_y[prv:n,:]
        prv = n
        yield x, y

In [21]:
class WordLSTM(nn.Module):
    
    def __init__(self, n_hidden=256, n_layers=4, drop_prob=0.3, lr=0.001):
        super().__init__()

        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.lr = lr
        
        self.emb_layer = nn.Embedding(vocab_size, 200)

        ## define the LSTM
        self.lstm = nn.LSTM(200, n_hidden, n_layers, 
                            dropout=drop_prob, batch_first=True)
        
        ## define a dropout layer
        self.dropout = nn.Dropout(drop_prob)
        
        ## define the fully-connected layer
        self.fc = nn.Linear(n_hidden, vocab_size)      
    
    def forward(self, x, hidden):
        ''' Forward pass through the network. 
            These inputs are x, and the hidden/cell state `hidden`. '''
        ## pass input through embedding layer
        embedded = self.emb_layer(x)     
        
        ## Get the outputs and the new hidden state from the lstm
        lstm_output, hidden = self.lstm(embedded, hidden)
        
        ## pass through a dropout layer
        out = self.dropout(lstm_output)
        
        #out = out.contiguous().view(-1, self.n_hidden) 
        out = out.reshape(-1, self.n_hidden) 

        ## put "out" through the fully-connected layer
        out = self.fc(out)

        # return the final output and the hidden state
        return out, hidden
    
    
    def init_hidden(self, batch_size):
        ''' initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x n_hidden,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data

        # if GPU is available
        if (torch.cuda.is_available()):
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
                    weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
        
        # if GPU is not available
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
                    weight.new(self.n_layers, batch_size, self.n_hidden).zero_())
        
        return hidden

In [22]:
# instantiate the model
net = WordLSTM()

# push the model to GPU (avoid it if you are not using the GPU)
net.cuda()

print(net)

WordLSTM(
  (emb_layer): Embedding(26610, 200)
  (lstm): LSTM(200, 256, num_layers=4, batch_first=True, dropout=0.3)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc): Linear(in_features=256, out_features=26610, bias=True)
)


In [21]:
def train(net, epochs=10, batch_size=32, lr=0.001, clip=1, print_every=32):
    
    # optimizer
    opt = torch.optim.Adam(net.parameters(), lr=lr)
    
    # loss
    criterion = nn.CrossEntropyLoss()
    
    # push model to GPU
    net.cuda()
    
    counter = 0

    net.train()
    
    for e in range(epochs):
        
        losses = []

        # initialize hidden state
        h = net.init_hidden(batch_size)
        
        for x, y in get_batches(x_int, y_int, batch_size):
            counter+= 1
            
            # convert numpy arrays to PyTorch arrays
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
            targets = targets.type(torch.LongTensor)
            
            # push tensors to GPU
            inputs, targets = inputs.cuda(), targets.cuda()

            # detach hidden states
            h = tuple([each.data for each in h])

            # zero accumulated gradients
            net.zero_grad()
            
            # get the output from the model
            output, h = net(inputs, h)
            
            # calculate the loss and perform backprop
            loss = criterion(output, targets.view(-1))
            
            losses.append(loss.detach().item())

            # back-propagate error
            loss.backward()

            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(net.parameters(), clip)

            # update weigths
            opt.step()            
            
        print("Epoch: {}/{}...".format(e+1, epochs),"Step: {}...".format(counter),"mean_loss : %0.2f, Perplexity : %0.2f"%(np.mean(losses), np.exp(np.mean(losses))))

In [27]:
# train the model
train(net, batch_size = 32, epochs=20, print_every=256)

Epoch: 1/20... Step: 6338... mean_loss : 7.14, Perplexity : 1264.15
Epoch: 2/20... Step: 12676... mean_loss : 6.40, Perplexity : 603.84
Epoch: 3/20... Step: 19014... mean_loss : 6.00, Perplexity : 403.78
Epoch: 4/20... Step: 25352... mean_loss : 5.70, Perplexity : 298.88
Epoch: 5/20... Step: 31690... mean_loss : 5.50, Perplexity : 244.04
Epoch: 6/20... Step: 38028... mean_loss : 5.34, Perplexity : 208.73
Epoch: 7/20... Step: 44366... mean_loss : 5.21, Perplexity : 183.13
Epoch: 8/20... Step: 50704... mean_loss : 5.10, Perplexity : 164.70
Epoch: 9/20... Step: 57042... mean_loss : 5.02, Perplexity : 150.67
Epoch: 10/20... Step: 63380... mean_loss : 4.94, Perplexity : 139.45
Epoch: 11/20... Step: 69718... mean_loss : 4.87, Perplexity : 130.28
Epoch: 12/20... Step: 76056... mean_loss : 4.81, Perplexity : 122.93
Epoch: 13/20... Step: 82394... mean_loss : 4.77, Perplexity : 117.36
Epoch: 14/20... Step: 88732... mean_loss : 4.72, Perplexity : 111.94
Epoch: 15/20... Step: 95070... mean_loss : 

In [22]:
# predict next token
def predict(net, tkn, h=None):

    # tensor inputs
    x = np.array([[token2int[tkn]]])
    inputs = torch.from_numpy(x)

    # push to GPU
    inputs = inputs.cuda()

    # detach hidden state from history
    h = tuple([each.data for each in h])

    # get the output of the model
    out, h = net(inputs, h)

    # get the token probabilities
    p = F.softmax(out, dim=1).data

    p = p.cpu()

    p = p.numpy()
    p = p.reshape(p.shape[1],)

    # get indices of top 3 values
    top_n_idx = p.argsort()[-3:][::-1]

    # randomly select one of the three indices
    sampled_token_index = top_n_idx[random.sample([0,1,2],1)[0]]

    # return the encoded value of the predicted char and the hidden state
    return int2token[sampled_token_index], h

In [23]:
# function to generate text
def sample(net, size, prime='it is'):
        
    # push to GPU
    net.cuda()
    
    net.eval()

    # batch size is 1
    h = net.init_hidden(1)

    toks = prime.split()

    # predict next token
    for t in prime.split():
        token, h = predict(net, t, h)
    
    toks.append(token)

    # predict subsequent tokens
    for i in range(size-1):
        token, h = predict(net, toks[-1], h)
        toks.append(token)

    return ' '.join(toks)

In [30]:
sample(net, 15)

'it is the leading to patients with a large risk for the first day of patients and'

In [31]:
sample(net, 15, prime = "one of the")

'one of the clinical data to develop the prognostic model to identify the economic cost-effectiveness and to assess'

In [32]:
sample(net, 15, prime = "ordering system")

'ordering system for a randomized review was used to compare a large number and economic studies of'

In [33]:
sample(net, 15, prime = "clinical decision")

'clinical decision support systems for antimicrobial prescribing and patient safety and quality improvement and the basis for'

In [34]:
sample(net, 45, prime = "clinical decision support systems")

'clinical decision support systems for drug therapy and clinical data and a large basis for selecting and implementing a cdss for epilepsy and bleeding in patients and the the the clinical decision making for the clinical impact and the basis for the clinical pharmacist with no significant predictive values'

In [35]:
sample(net, 45, prime = "prescription errors")

'prescription errors of patients without the wells system for a decision support system cdss to identify patients and the second of the clinical trials with the number to be cost-effective for clinical practice in clinical care systems to identify the use of the cdss for a patient'

In [36]:
# !wget https://www.dropbox.com/s/699kgut7hdb5tg9/GoogleNews-vectors-negative300.bin.gz?dl=1
# !mv 'GoogleNews-vectors-negative300.bin.gz?dl=1' GoogleNews-vectors-negative300.bin.gz
# !gunzip GoogleNews-vectors-negative300.bin.gz

In [24]:
import io, os, re
import gensim

def init_embeddings(vocab_size, embed_dim, unif):
    return np.random.uniform(-unif, unif, (vocab_size, embed_dim))
    

class EmbeddingsReader:

    @staticmethod
    def from_text(filename, vocab, unif=0.25):
        
        with io.open(filename, "r", encoding="utf-8") as f:
            for i, line in enumerate(f):
                line = line.rstrip("\n ")
                values = line.split(" ")

                if i == 0:
                    # fastText style
                    if len(values) == 2:
                        weight = init_embeddings(len(vocab), values[1], unif)
                        continue
                    # glove style
                    else:
                        weight = init_embeddings(len(vocab), len(values[1:]), unif)
                word = values[0]
                if word in vocab:
                    vec = np.asarray(values[1:], dtype=np.float32)
                    weight[vocab[word]] = vec
        if '[PAD]' in vocab:
            weight[vocab['[PAD]']] = 0.0
        
        embeddings = nn.Embedding(weight.shape[0], weight.shape[1])
        embeddings.weight = nn.Parameter(torch.from_numpy(weight).float())
        return embeddings, weight.shape[1]
    
    @staticmethod
    def from_binary(filename, vocab, unif=0.25):
        def read_word(f):

            s = bytearray()
            ch = f.read(1)

            while ch != b' ':
                s.extend(ch)
                ch = f.read(1)
            s = s.decode('utf-8')
            # Only strip out normal space and \n not other spaces which are words.
            return s.strip(' \n')

        vocab_size = len(vocab)
        with io.open(filename, "rb") as f:
            header = f.readline()
            file_vocab_size, embed_dim = map(int, header.split())
            weight = init_embeddings(len(vocab), embed_dim, unif)
            if '[PAD]' in vocab:
                weight[vocab['[PAD]']] = 0.0
            width = 4 * embed_dim
            for i in range(file_vocab_size):
                word = read_word(f)
                raw = f.read(width)
                if word in vocab:
                    vec = np.fromstring(raw, dtype=np.float32)
                    weight[vocab[word]] = vec
        embeddings = nn.Embedding(weight.shape[0], weight.shape[1])
        embeddings.weight = nn.Parameter(torch.from_numpy(weight).float())
        return embeddings, embed_dim

model = gensim.models.KeyedVectors.load('GNA_Files/wordvectors.kv', mmap='r')
weights = torch.FloatTensor(model.vectors)
class WordLSTM_V2(nn.Module):
    
    def __init__(self, n_hidden=256, n_layers=4, drop_prob=0.3, lr=0.001):
        super().__init__()

        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.lr = lr
        self.emb_layer, self.emb_dim = EmbeddingsReader.from_binary("GNA_Files/word2vec.bin", token2int)
        # self.emb_layer, self.emb_dim = EmbeddingsReader.from_binary("GoogleNews-vectors-negative300.bin", token2int) 
        #self.emb_layer = nn.Embedding.from_pretrained(weights) # nn.Embedding(vocab_size, 300)

        ## define the LSTM
        self.lstm = nn.LSTM(self.emb_dim, n_hidden, n_layers, 
                            dropout=drop_prob, batch_first=True)
        
        ## define a dropout layer
        self.dropout = nn.Dropout(drop_prob)
        
        ## define the fully-connected layer
        self.fc = nn.Linear(n_hidden, vocab_size)      
    
    def forward(self, x, hidden):
        ''' Forward pass through the network. 
            These inputs are x, and the hidden/cell state `hidden`. '''
        ## pass input through embedding layer
        embedded = self.emb_layer(x)     
        
        ## Get the outputs and the new hidden state from the lstm
        lstm_output, hidden = self.lstm(embedded, hidden)
        
        ## pass through a dropout layer
        out = self.dropout(lstm_output)
        
        #out = out.contiguous().view(-1, self.n_hidden) 
        out = out.reshape(-1, self.n_hidden) 

        ## put "out" through the fully-connected layer
        out = self.fc(out)

        # return the final output and the hidden state
        return out, hidden
    
    
    def init_hidden(self, batch_size):
        ''' initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x n_hidden,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data

        # if GPU is available
        if (torch.cuda.is_available()):
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
                    weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
        
        # if GPU is not available
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
                    weight.new(self.n_layers, batch_size, self.n_hidden).zero_())
        
        return hidden

In [38]:
# instantiate the model
net = WordLSTM_V2()

# push the model to GPU (avoid it if you are not using the GPU)
net.cuda()

print(net)

  vec = np.fromstring(raw, dtype=np.float32)


WordLSTM_V2(
  (emb_layer): Embedding(26610, 300)
  (lstm): LSTM(300, 256, num_layers=4, batch_first=True, dropout=0.3)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc): Linear(in_features=256, out_features=26610, bias=True)
)


In [39]:
# for name, param in net.named_parameters():
#     print('name: ', name)
#     print(type(param))
#     print('param.shape: ', param.shape)
#     print('param.requires_grad: ', param.requires_grad)
#     print('=====')

In [40]:
# train the model
train(net, batch_size = 32, epochs=20, print_every=256)

Epoch: 1/20... Step: 6338... mean_loss : 7.32, Perplexity : 1506.26
Epoch: 2/20... Step: 12676... mean_loss : 6.88, Perplexity : 969.90
Epoch: 3/20... Step: 19014... mean_loss : 6.42, Perplexity : 615.95
Epoch: 4/20... Step: 25352... mean_loss : 5.96, Perplexity : 387.34
Epoch: 5/20... Step: 31690... mean_loss : 5.65, Perplexity : 284.04
Epoch: 6/20... Step: 38028... mean_loss : 5.45, Perplexity : 231.73
Epoch: 7/20... Step: 44366... mean_loss : 5.30, Perplexity : 199.93
Epoch: 8/20... Step: 50704... mean_loss : 5.18, Perplexity : 177.34
Epoch: 9/20... Step: 57042... mean_loss : 5.08, Perplexity : 161.25
Epoch: 10/20... Step: 63380... mean_loss : 4.99, Perplexity : 146.50
Epoch: 11/20... Step: 69718... mean_loss : 4.91, Perplexity : 135.63
Epoch: 12/20... Step: 76056... mean_loss : 4.84, Perplexity : 126.94
Epoch: 13/20... Step: 82394... mean_loss : 4.78, Perplexity : 119.38
Epoch: 14/20... Step: 88732... mean_loss : 4.73, Perplexity : 113.05
Epoch: 15/20... Step: 95070... mean_loss : 

In [41]:
 ## Pre-trained CDSS Word2Vec
print(sample(net, 45, prime = "clinical decision support systems"),"\n")
print(sample(net, 45, prime = "prescription errors"),"\n")
print(sample(net, 45, prime = "one of the"),"\n")
print(sample(net, 45, prime = "asthma management programs for primary care providers increasing adherence to asthma guidelines"))


prescription errors and the use in order to predict the incidence of venous partial vte risk factors and a positive predictive value was used for further review and the development and development of an electronic health records ehrs for a patient and clinical outcomes and their health 

one of the most important and more common in terms of clinical decision rule for resuming extremity to the use of a cdss for predicting patients who had 18,008 provoking vte is a leading cause of patients who are likely with a valid model and the use and 

asthma management programs for primary care providers increasing adherence to asthma guidelines and clinical impact of the proposed framework is to develop the clinical trial is conducted to develop a decision tree algorithm and clinical decision rule for the detection and management strategies for the use and the use of clinical decision making and clinical practice and


In [47]:
sample(net, 45, prime = "clinical decision support systems") ## Pre-trained Google-News

'clinical decision support systems cdss are used to identify patients with a large patient in patients who were randomly assigned to either the final and superficial vein thrombosis uedvt n < 0.001 and bleeding management and the clinical outcomes of the clinical practice and management of patients who had'

In [48]:
sample(net, 45, prime = "prescription errors")

'prescription errors and to identify the first step for clinical outcomes of patients with a decision rule for resuming oac prescriptions and a decision support system cdss for resuming oac therapy in patients and the cockcroft-gault y had recurrent vte vs. uesvt had a high risk for'

In [49]:
sample(net, 15, prime = "one of the")

'one of the most common risk of the number and clinical outcomes in clinical medicine in clinical medicine'

In [52]:
sample(net, 15, prime = "asthma management programs for primary care providers increasing adherence to asthma guidelines")

'asthma management programs for primary care providers increasing adherence to asthma guidelines in the emergency care applied for a clinical decision making in the management process in'

In [53]:
sample(net, 15, prime = "although a number of studies have evaluated the effectiveness of computerized decision-support systems cdss there is lack of data on user perspectives barriers and")

'although a number of studies have evaluated the effectiveness of computerized decision-support systems cdss there is lack of data on user perspectives barriers and satisfaction of clinical decision rules for a clinical decision aid with an electronic health record-based'

In [56]:
torch.save(net.state_dict(), 'GNA_Files/BILM_google_emb.pth')

In [None]:
# model_new = WordLSTM_V2()
# model_new.load_state_dict(torch.load('GNA_Files/BILM_google_emb.pth'))

In [42]:
## Bi-Directional LSTM
class WordLSTM_V3(nn.Module):
    
    def __init__(self, n_hidden=256, n_layers=4, drop_prob=0.3, lr=0.001):
        super().__init__()

        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.lr = lr
        
        self.emb_layer, self.emb_dim = EmbeddingsReader.from_binary("GNA_Files/word2vec.bin", token2int)
        # self.emb_layer, self.emb_dim = EmbeddingsReader.from_binary("GoogleNews-vectors-negative300.bin", token2int) 

        ## define the LSTM
        self.lstm = nn.LSTM(self.emb_dim, n_hidden, n_layers, 
                            dropout=drop_prob, batch_first=True, bidirectional=True)
        
        ## define a dropout layer
        self.dropout = nn.Dropout(drop_prob)
        
        ## define the fully-connected layer
        self.fc = nn.Linear(n_hidden*2, vocab_size)      
    
    def forward(self, x, hidden):
        ''' Forward pass through the network. 
            These inputs are x, and the hidden/cell state `hidden`. '''
        ## pass input through embedding layer
        embedded = self.emb_layer(x)     
        
        ## Get the outputs and the new hidden state from the lstm
        lstm_output, hidden = self.lstm(embedded, hidden)
        
        ## pass through a dropout layer
        out = self.dropout(lstm_output)
        
        #out = out.contiguous().view(-1, self.n_hidden*2) 
        out = out.reshape(-1, self.n_hidden*2) 

        ## put "out" through the fully-connected layer
        out = self.fc(out)

        # return the final output and the hidden state
        return out, hidden
    
    
    def init_hidden(self, batch_size):
        ''' initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x n_hidden,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data

        # if GPU is available
        if (torch.cuda.is_available()):
            hidden = (weight.new(self.n_layers*2, batch_size, self.n_hidden).zero_().cuda(),
                    weight.new(self.n_layers*2, batch_size, self.n_hidden).zero_().cuda())
        
        # if GPU is not available
        else:
            hidden = (weight.new(self.n_layers*2, batch_size, self.n_hidden).zero_(),
                    weight.new(self.n_layers*2, batch_size, self.n_hidden).zero_())
        return hidden

In [43]:
# instantiate the model
net3 = WordLSTM_V3()

# push the model to GPU (avoid it if you are not using the GPU)
net3.cuda()

print(net3)

  vec = np.fromstring(raw, dtype=np.float32)


WordLSTM_V3(
  (emb_layer): Embedding(26610, 300)
  (lstm): LSTM(300, 256, num_layers=4, batch_first=True, dropout=0.3, bidirectional=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc): Linear(in_features=512, out_features=26610, bias=True)
)


In [44]:
# train the model
train(net3, batch_size = 32, epochs=20, print_every=256)

Epoch: 1/20... Step: 6338... mean_loss : 7.49, Perplexity : 1791.99
Epoch: 2/20... Step: 12676... mean_loss : 6.85, Perplexity : 947.29
Epoch: 3/20... Step: 19014... mean_loss : 4.55, Perplexity : 94.66
Epoch: 4/20... Step: 25352... mean_loss : 2.93, Perplexity : 18.79
Epoch: 5/20... Step: 31690... mean_loss : 2.24, Perplexity : 9.37
Epoch: 6/20... Step: 38028... mean_loss : 1.84, Perplexity : 6.33
Epoch: 7/20... Step: 44366... mean_loss : 1.59, Perplexity : 4.91
Epoch: 8/20... Step: 50704... mean_loss : 1.46, Perplexity : 4.32
Epoch: 9/20... Step: 57042... mean_loss : 1.35, Perplexity : 3.84
Epoch: 10/20... Step: 63380... mean_loss : 1.28, Perplexity : 3.60
Epoch: 11/20... Step: 69718... mean_loss : 1.23, Perplexity : 3.43
Epoch: 12/20... Step: 76056... mean_loss : 1.17, Perplexity : 3.22
Epoch: 13/20... Step: 82394... mean_loss : 1.13, Perplexity : 3.10
Epoch: 14/20... Step: 88732... mean_loss : 1.12, Perplexity : 3.06
Epoch: 15/20... Step: 95070... mean_loss : 1.08, Perplexity : 2.9

In [45]:
 ## Pre-trained CDSS Word2Vec + Bi-Directional LM with 4 LSTM layers
print(sample(net, 45, prime = "clinical decision support systems"),"\n")
print(sample(net, 45, prime = "prescription errors"),"\n")
print(sample(net, 45, prime = "one of the"),"\n")
print(sample(net, 45, prime = "ordering system"),"\n")
print(sample(net, 45, prime = "asthma management programs for primary care providers increasing adherence to asthma guidelines"))

clinical decision support systems for improving quality of life and quality and care of the clinical impact on the use on health outcomes of clinical decision support system cdss to assess whether cdss to assess the impact on quality of care in a clinical decision rule to assess if 

prescription errors and a clinical decision support systems for the management and treatment of patients with uedvt in the united states management in patients with the use of a computerized clinical decision support systems cdss and workflow and patient care to identify patients with the aim and 

one of the study was to determine a wide range of concept of the clinical decision rule in a chinese setting in a clinical decision making to identify the use of a decision rule to assess whether cdss for patients with recurrence and economic models and thus reduce 

ordering system and a clinical decision tree and clinical practice guidelines cpgs in the emergency departments in clinical trials in this paper we

In [112]:
sample(net3, 45, prime = "clinical decision support systems")

'clinical decision support systems support to use of cdss of the in the in patient management these management these study in cdss in outcomes with terms with terms as context of terms in terms from a from a had the reduction the reduction a protocol for content by challenges'

In [113]:
sample(net3, 45, prime = "ordering system")

'ordering system ordering system trigger system to it will review will be will been will most used more evaluated also identified and order the identified the effect on context we aim and effectiveness of lack of a for two by a clinical an decision medical impact support'

In [25]:
class WordLSTM_V4(nn.Module):
    
    def __init__(self, n_hidden=256, n_layers=1, drop_prob=0.3, lr=0.001):
        super().__init__()

        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.lr = lr
        
        self.emb_layer, self.emb_dim = EmbeddingsReader.from_binary("GNA_Files/word2vec.bin", token2int)
        #self.emb_layer = nn.Embedding(vocab_size, 200)

        ## define the LSTM
        self.lstm = nn.LSTM(self.emb_dim, n_hidden, n_layers, 
                            dropout=drop_prob, batch_first=True, bidirectional=True)
        
        ## define a dropout layer
        self.dropout = nn.Dropout(drop_prob)
        
        ## define the fully-connected layer
        self.fc = nn.Linear(n_hidden*2, vocab_size)      
    
    def forward(self, x, hidden):
        ''' Forward pass through the network. 
            These inputs are x, and the hidden/cell state `hidden`. '''
        ## pass input through embedding layer
        embedded = self.emb_layer(x)     
        
        ## Get the outputs and the new hidden state from the lstm
        lstm_output, hidden = self.lstm(embedded, hidden)
        
        ## pass through a dropout layer
        out = self.dropout(lstm_output)
        
        #out = out.contiguous().view(-1, self.n_hidden) 
        out = out.reshape(-1, self.n_hidden*2) 

        ## put "out" through the fully-connected layer
        out = self.fc(out)

        # return the final output and the hidden state
        return out, hidden
    
    
    def init_hidden(self, batch_size):
        ''' initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x n_hidden,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data

        # if GPU is available
        if (torch.cuda.is_available()):
            hidden = (weight.new(self.n_layers*2, batch_size, self.n_hidden).zero_().cuda(),
                    weight.new(self.n_layers*2, batch_size, self.n_hidden).zero_().cuda())
        
        # if GPU is not available
        else:
            hidden = (weight.new(self.n_layers*2, batch_size, self.n_hidden).zero_(),
                    weight.new(self.n_layers*2, batch_size, self.n_hidden).zero_())
        
        return hidden

In [26]:
# instantiate the model
net4 = WordLSTM_V4()

# push the model to GPU (avoid it if you are not using the GPU)
net4.cuda()

print(net4)

  vec = np.fromstring(raw, dtype=np.float32)
WordLSTM_V4(
  (emb_layer): Embedding(26610, 300)
  (lstm): LSTM(300, 256, batch_first=True, dropout=0.3, bidirectional=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc): Linear(in_features=512, out_features=26610, bias=True)
)


In [27]:
# train the model
train(net4, batch_size = 32, epochs=20, print_every=256)

Epoch: 1/20... Step: 6338... mean_loss : 4.37, Perplexity : 79.16
Epoch: 2/20... Step: 12676... mean_loss : 2.55, Perplexity : 12.84
Epoch: 3/20... Step: 19014... mean_loss : 1.90, Perplexity : 6.68
Epoch: 4/20... Step: 25352... mean_loss : 1.54, Perplexity : 4.67
Epoch: 5/20... Step: 31690... mean_loss : 1.33, Perplexity : 3.79
Epoch: 6/20... Step: 38028... mean_loss : 1.20, Perplexity : 3.33
Epoch: 7/20... Step: 44366... mean_loss : 1.11, Perplexity : 3.04
Epoch: 8/20... Step: 50704... mean_loss : 1.04, Perplexity : 2.83
Epoch: 9/20... Step: 57042... mean_loss : 0.98, Perplexity : 2.67
Epoch: 10/20... Step: 63380... mean_loss : 0.93, Perplexity : 2.54
Epoch: 11/20... Step: 69718... mean_loss : 0.89, Perplexity : 2.43
Epoch: 12/20... Step: 76056... mean_loss : 0.85, Perplexity : 2.34
Epoch: 13/20... Step: 82394... mean_loss : 0.81, Perplexity : 2.26
Epoch: 14/20... Step: 88732... mean_loss : 0.78, Perplexity : 2.19
Epoch: 15/20... Step: 95070... mean_loss : 0.76, Perplexity : 2.13
Epo

In [29]:
 ## Pre-trained CDSS Word2Vec + Bi-Directional LM with 1 LSTM layers
print(sample(net4, 45, prime = "clinical decision support systems"),"\n")
print(sample(net4, 45, prime = "prescription errors"),"\n")
print(sample(net4, 45, prime = "one of the"),"\n")
print(sample(net4, 45, prime = "ordering system"),"\n")
print(sample(net4, 45, prime = "asthma management programs for primary care providers increasing adherence to asthma guidelines"))

clinical decision support systems and to improve treatment management treatment for treatment of treatment of treatment of type-ii alerts a case study case case case case test characteristics fell to the index the index the index order to alert to provide to provide relevant emerging tools that tools that 

prescription errors in errors in errors in errors in errors in clinical practice and workflow in the icu the icu the hospital the icu database by a university hospital from june 2014 and 2014 to december 2007 n = n and 4 and 4 of the patient 

one of the frequency and bleeding and in other in other medical centers medical centers in the first the first the development and validation study validation and two main two main two main predictors and results and the ewd identifying clinical guidelines were guidelines for the development and 

ordering system ordering system ordering system ordering through the electronic health record to ensure to extract the patient 's health state healt

In [44]:
sample(net4, 45, prime = "clinical decision support systems") ## On-the-fly word Embeddings with Bi-Directional LM

'clinical decision support systems cdsss are cdsss in cdsss in cdsss in which a system has system in the clinical setting a setting in the management and management system which system which is which is an effective form in the context and development implementation of cds for clinical decision-making'

In [118]:
sample(net4, 45, prime = "ordering system")

'ordering system ordering system ordering in cpoe into cpoe towards the cancer a less a modest the modest single capacity of probabilistic case 65 paper of time especially software 40 the knowledge-based pharmacy despite countries of countries especially pharmacy especially of target interview target interview and interview'

In [None]:
# load_fasttext_format

In [47]:
class WordLSTM_V5(nn.Module):
    
    def __init__(self, n_hidden=256, n_layers=4, drop_prob=0.3, lr=0.001):
        super().__init__()

        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.lr = lr
        
        self.emb_layer, self.emb_dim = EmbeddingsReader.from_binary("GNA_Files/fastText.bin", token2int)
        #self.emb_layer = nn.Embedding(vocab_size, 200)

        ## define the LSTM
        self.lstm = nn.LSTM(self.emb_dim, n_hidden, n_layers, 
                            dropout=drop_prob, batch_first=True, bidirectional=True)
        
        ## define a dropout layer
        self.dropout = nn.Dropout(drop_prob)
        
        ## define the fully-connected layer
        self.fc = nn.Linear(n_hidden*2, vocab_size)      
    
    def forward(self, x, hidden):
        ''' Forward pass through the network. 
            These inputs are x, and the hidden/cell state `hidden`. '''
        ## pass input through embedding layer
        embedded = self.emb_layer(x)     
        
        ## Get the outputs and the new hidden state from the lstm
        lstm_output, hidden = self.lstm(embedded, hidden)
        
        ## pass through a dropout layer
        out = self.dropout(lstm_output)
        
        #out = out.contiguous().view(-1, self.n_hidden) 
        out = out.reshape(-1, self.n_hidden*2) 

        ## put "out" through the fully-connected layer
        out = self.fc(out)

        # return the final output and the hidden state
        return out, hidden
    
    
    def init_hidden(self, batch_size):
        ''' initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x n_hidden,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data

        # if GPU is available
        if (torch.cuda.is_available()):
            hidden = (weight.new(self.n_layers*2, batch_size, self.n_hidden).zero_().cuda(),
                    weight.new(self.n_layers*2, batch_size, self.n_hidden).zero_().cuda())
        
        # if GPU is not available
        else:
            hidden = (weight.new(self.n_layers*2, batch_size, self.n_hidden).zero_(),
                    weight.new(self.n_layers*2, batch_size, self.n_hidden).zero_())
        
        return hidden

In [48]:
# instantiate the model
net5 = WordLSTM_V5()

# push the model to GPU (avoid it if you are not using the GPU)
net5.cuda()

print(net5)

  vec = np.fromstring(raw, dtype=np.float32)


WordLSTM_V5(
  (emb_layer): Embedding(26610, 300)
  (lstm): LSTM(300, 256, num_layers=4, batch_first=True, dropout=0.3, bidirectional=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc): Linear(in_features=512, out_features=26610, bias=True)
)


In [50]:
# train the model - FastText pre-trained CDSS + Bi-LSTM 4 Layers
train(net5, batch_size = 32, epochs=20, print_every=256)

Epoch: 1/20... Step: 6338...
Epoch: 2/20... Step: 12676...
Epoch: 3/20... Step: 19014...
Epoch: 4/20... Step: 25352...
Epoch: 5/20... Step: 31690...
Epoch: 6/20... Step: 38028...
Epoch: 7/20... Step: 44366...
Epoch: 8/20... Step: 50704...
Epoch: 9/20... Step: 57042...
Epoch: 10/20... Step: 63380...
Epoch: 11/20... Step: 69718...
Epoch: 12/20... Step: 76056...
Epoch: 13/20... Step: 82394...
Epoch: 14/20... Step: 88732...
Epoch: 15/20... Step: 95070...
Epoch: 16/20... Step: 101408...
Epoch: 17/20... Step: 107746...
Epoch: 18/20... Step: 114084...
Epoch: 19/20... Step: 120422...
Epoch: 20/20... Step: 126760...


In [52]:
 ## Pre-trained CDSS FastText + Bi-Directional LM with 4 LSTM layers
print(sample(net, 45, prime = "clinical decision support systems"),"\n")
print(sample(net, 45, prime = "prescription errors"),"\n")
print(sample(net, 45, prime = "one of the"),"\n")
print(sample(net, 45, prime = "ordering system"),"\n")
print(sample(net, 45, prime = "asthma management programs for primary care providers increasing adherence to asthma guidelines"))

clinical decision support systems cdss can improve the efficiency and use of cdss for the field was used in a three-year database was undertaken using the final logistic regressions were selected using a clinical model and a centralized ionized calcium is the most important factor in patients with type 

prescription errors were identified in this paper was to develop and evaluate the economic cost-effectiveness of a cdss in order with the aim of a clinical decision support systems cdsss are a first ionized d-dimer model and a proof of concept we used an electronic clinical trials 

one of the development of a computerized clinical decision rules to improve patient care in a clinical setting and the cockcroft-gault standard in a large effect to identify and validate the clinical impact of the actual clinical practice is the most common type 1 and a number and 

ordering system to improve the efficiency and use of cdss in the field for patients and older of the seniors medication therap

In [159]:
from gensim.models import KeyedVectors
from gensim.test.utils import get_tmpfile

modelW2V = gensim.models.Word2Vec.load('GNA_Files/word2vec.embedding')
# path = get_tmpfile("GNA_Files/wordvectors.kv")
modelW2V.wv.save("GNA_Files/wordvectors.kv")

wv = KeyedVectors.load("GNA_Files/wordvectors.kv", mmap='r')
# >>> vector = wv['computer']
# weights = torch.FloatTensor(modelW2V.vectors)

In [166]:
weights = torch.FloatTensor(wv.vectors)

In [168]:
weights.shape

torch.Size([15755, 300])

In [36]:
from gensim.models.fasttext import FastText as FT_gensim
# modelFT = FT_gensim.load('GNA_Files/fastText.embedding')

modelFT = FT_gensim(size=300)
modelFT.build_vocab(sentences=train_corpus_sentences)
modelFT.train(
    sentences=train_corpus_sentences, epochs=modelFT.epochs,
    total_examples=modelFT.corpus_count, total_words=modelFT.corpus_total_words
)

In [37]:
len(data)

12023

In [38]:
vocab = modelFT.wv.vocab
print(vocab['continuous'].index)

652


In [39]:
print(len(vocab))

4705


In [40]:
modelFT.wv.most_similar(positive=['clinical'], topn = 10)

[('clinic', 0.9903262853622437),
 ('supporting', 0.9902169704437256),
 ('supports', 0.9857264757156372),
 ('making', 0.983847975730896),
 ('support', 0.9830864667892456),
 ('compute', 0.9816253781318665),
 ('integrate', 0.9809657335281372),
 ('technical', 0.9803340435028076),
 ('systems', 0.979055643081665),
 ('integral', 0.977682888507843)]

In [41]:
total_data = []
for x in train_corpus_sentences:
    total_data.extend(x)

In [42]:
print(total_data[0:200])

['continuous', 'cardiorespiratory', 'monitoring', 'is', 'a', 'dominant', 'source', 'of', 'predictive', 'signal', 'in', 'machine', 'learning', 'for', 'risk', 'stratification', 'and', 'clinical', 'decision', 'support', 'beaulieu-jones', 'and', 'coworkers', 'propose', 'a', 'litmus', 'test', 'for', 'the', 'field', 'of', 'predictive', 'analytics-performance', 'improvements', 'must', 'be', 'demonstrated', 'to', 'be', 'the', 'result', 'of', 'non-clinician-initiated', 'data', 'otherwise', 'there', 'should', 'be', 'caution', 'in', 'assuming', 'that', 'predictive', 'models', 'could', 'improve', 'clinical', 'decision-making', 'beaulieu-joneset', 'al2021', 'they', 'demonstrate', 'substantial', 'prognostic', 'information', 'in', 'unsorted', 'physician', 'orders', 'made', 'before', 'the', 'first', 'midnight', 'of', 'hospital', 'admission', 'and', 'we', 'are', 'persuaded', 'that', 'it', 'is', 'fair', 'to', 'ask-if', 'the', 'physician', 'thought', 'of', 'it', 'first', 'what', 'exactly', 'is', 'machine

In [43]:
vocab_cc = list(set(total_data))
word2index = {'<unk>': 0}
for vo in vocab_cc:
    if word2index.get(vo) is None:
        word2index[vo] = len(word2index)

In [44]:
len(vocab_cc)

15755

In [45]:
def prepare_sequence(seq, to_index):
    idxs = list(map(lambda w: to_index[w] if to_index.get(w) is not None else to_index["<unk>"], seq))
    return LongTensor(idxs)

In [46]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import nltk
import random
import numpy as np
from collections import Counter, OrderedDict
import nltk
from copy import deepcopy
flatten = lambda l: [item for sublist in l for item in sublist]
random.seed(1024)

USE_CUDA = torch.cuda.is_available()
gpus = [0]
torch.cuda.set_device(gpus[0])

FloatTensor = torch.cuda.FloatTensor if USE_CUDA else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if USE_CUDA else torch.LongTensor
ByteTensor = torch.cuda.ByteTensor if USE_CUDA else torch.ByteTensor

In [47]:
prepare_sequence(train_corpus_sentences[0], word2index)

tensor([ 9262,  1809, 10032,  3989, 10668,  2373,  4493, 15710,  6035,  5917,
         1103,  9198,  7785,  6642, 11958,  7816,  1540,  5392,  3037, 11699],
       device='cuda:0')

In [48]:
word2index['continuous']

9262

In [49]:
def prepare_ptb_dataset(corpus, word2index=None):
    corpus = flatten([co + ['</s>'] for co in corpus])
    
    if word2index == None:
        vocab = list(set(corpus))
        word2index = {'<unk>': 0}
        for vo in vocab:
            if word2index.get(vo) is None:
                word2index[vo] = len(word2index)
    
    return prepare_sequence(corpus, word2index), word2index

In [50]:
# borrowed code from https://github.com/pytorch/examples/tree/master/word_language_model

def batchify(data, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).contiguous()
    if USE_CUDA:
        data = data.cuda()
    return data

In [61]:
def getBatch(data, seq_length):
     for i in range(0, data.size(1) - seq_length, seq_length):
        inputs = Variable(data[:, i: i + seq_length])
        targets = Variable(data[:, (i + 1): (i + 1) + seq_length].contiguous())
        yield (inputs, targets)

In [62]:
train_data, word2index = prepare_ptb_dataset(train_corpus_sentences)
test_data, _ = prepare_ptb_dataset(test_corpus_sentences)

In [63]:
train_data

tensor([ 9263,  1809, 10033,  ...,  3037, 11700,  4632], device='cuda:0')

In [64]:
len(word2index)

15757

In [65]:
index2word = {v:k for k, v in word2index.items()}

In [66]:
class LanguageModel(nn.Module): 
    def __init__(self, vocab_size, embedding_size, hidden_size, n_layers=1, dropout_p=0.5):

        super(LanguageModel, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.embed = nn.Embedding(vocab_size, embedding_size)
        self.rnn = nn.LSTM(embedding_size, hidden_size, n_layers, batch_first=True)
        self.linear = nn.Linear(hidden_size, vocab_size)
        self.dropout = nn.Dropout(dropout_p)
        
    def init_weight(self):
        self.embed.weight = nn.init.xavier_uniform(self.embed.weight)
        self.linear.weight = nn.init.xavier_uniform(self.linear.weight)
        self.linear.bias.data.fill_(0)
        
    def init_hidden(self,batch_size):
        hidden = Variable(torch.zeros(self.n_layers,batch_size,self.hidden_size))
        context = Variable(torch.zeros(self.n_layers,batch_size,self.hidden_size))
        return (hidden.cuda(), context.cuda()) if USE_CUDA else (hidden, context)
    
    def detach_hidden(self, hiddens):
        return tuple([hidden.detach() for hidden in hiddens])
    
    def forward(self, inputs, hidden, is_training=False): 

        embeds = self.embed(inputs)
        if is_training:
            embeds = self.dropout(embeds)
        out,hidden = self.rnn(embeds, hidden)
        return self.linear(out.contiguous().view(out.size(0) * out.size(1), -1)), hidden

In [67]:
EMBED_SIZE = 128
HIDDEN_SIZE = 1024
NUM_LAYER = 1
LR = 0.01
SEQ_LENGTH = 30 # for bptt
BATCH_SIZE = 20
EPOCH = 40
RESCHEDULED = False

In [75]:
train_data = batchify(train_data, BATCH_SIZE)
test_data = batchify(test_data, BATCH_SIZE)

In [76]:
model = LanguageModel(len(word2index), EMBED_SIZE, HIDDEN_SIZE, NUM_LAYER, 0.5)
model.init_weight() 
if USE_CUDA:
    model = model.cuda()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)


  self.embed.weight = nn.init.xavier_uniform(self.embed.weight)
  self.linear.weight = nn.init.xavier_uniform(self.linear.weight)


In [77]:
for epoch in range(EPOCH):
    total_loss = 0
    losses = []
    hidden = model.init_hidden(BATCH_SIZE)
    for i,batch in enumerate(getBatch(train_data, SEQ_LENGTH)):
        inputs, targets = batch
        hidden = model.detach_hidden(hidden)
        model.zero_grad()
        preds, hidden = model(inputs, hidden, True)

        loss = loss_function(preds, targets.view(-1))
        losses.append(loss.item())
        loss.backward()
        torch.nn.utils.clip_grad_norm(model.parameters(), 0.5) # gradient clipping
        optimizer.step()

    if epoch%5==0:
        print("[%02d/%d] mean_loss : %0.2f, Perplexity : %0.2f" % (epoch,EPOCH, np.mean(losses), np.exp(np.mean(losses))))
        losses = []
        
    # learning rate anealing
    # You can use http://pytorch.org/docs/master/optim.html#how-to-adjust-learning-rate
    if RESCHEDULED == False and epoch == EPOCH//2:
        LR *= 0.1
        optimizer = optim.Adam(model.parameters(), lr=LR)
        RESCHEDULED = True

  torch.nn.utils.clip_grad_norm(model.parameters(), 0.5) # gradient clipping


[00/40] mean_loss : 6.83, Perplexity : 926.73
[05/40] mean_loss : 4.41, Perplexity : 82.11
[10/40] mean_loss : 3.03, Perplexity : 20.76
[15/40] mean_loss : 2.21, Perplexity : 9.11
[20/40] mean_loss : 1.69, Perplexity : 5.44
[25/40] mean_loss : 1.39, Perplexity : 4.00
[30/40] mean_loss : 1.20, Perplexity : 3.32
[35/40] mean_loss : 1.07, Perplexity : 2.91


In [78]:
total_loss = 0
hidden = model.init_hidden(BATCH_SIZE)
for batch in getBatch(train_data, SEQ_LENGTH):
    inputs,targets = batch
        
    hidden = model.detach_hidden(hidden)
    model.zero_grad()
    preds, hidden = model(inputs, hidden)
    total_loss += inputs.size(1) * loss_function(preds, targets.view(-1)).data

total_loss = total_loss.item()/train_data.size(1)
print("Train Perpelexity : %5.2f" % (np.exp(total_loss)))

Train Perpelexity : 15.89


In [88]:
test_data, _ = prepare_ptb_dataset(test_corpus_sentences)

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [None]:
test_data = batchify(test_data, BATCH_SIZE)

In [87]:
total_loss = 0
hidden = model.init_hidden(BATCH_SIZE)
for batch in getBatch(test_data, SEQ_LENGTH):
    inputs,targets = batch
        
    hidden = model.detach_hidden(hidden)
    model.zero_grad()
    preds, hidden = model(inputs, hidden)
    total_loss += inputs.size(1) * loss_function(preds, targets.view(-1)).data

total_loss = total_loss.item()/test_data.size(1)
print("Train Perpelexity : %5.2f" % (np.exp(total_loss)))

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [74]:
total_loss

0

In [80]:
test_data

tensor([], device='cuda:0', size=(20, 0), dtype=torch.int64)

In [82]:
test_corpus_sentences[0]

['urinary',
 'colorimetric',
 'sensor',
 'array',
 'and',
 'algorithm',
 'to',
 'distinguish',
 'kawasaki',
 'disease',
 'from',
 'other',
 'febrile',
 'illnesses']