In [1]:
##---------------------------------------------------------------------
## Summary : Implementing optimum version of NN language model
## Author  : Srinivas Venkata Vemparala
## Source  : https://github.com/neubig/nn4nlp-code
##---------------------------------------------------------------------

%matplotlib inline
import numpy as np
import pandas as pd
import dynet as dy
import time
import matplotlib.pyplot as plt

import random 
from collections import defaultdict

In [2]:
# convert the words to integer
w2i = defaultdict(lambda: len(w2i))

# create the end of sentence and UNK tokens
S = w2i['<s>']
UNK = w2i['<unk>']

# lets declare a method to read the data
def readDataset(fileName):
    retList = []
    with open(fileName,'r+') as file:
        for line in file:
            words = [w2i[x] for x in line.lower().strip().split(' ')]
            retList.append(words)
            
    return retList

In [3]:
# now lets read the data
train = readDataset('../data/ptb/train.txt')
test = readDataset('../data/ptb/valid.txt')

# compute the number of words in the vocabulary
nWords = len(w2i)
print('Number of words in vocabulary : ',nWords)

# lets freeze the dictionary
w2i = defaultdict(lambda:UNK, len(w2i))

# lets write a method to convert int to words
i2w = {v:k for k,v in w2i.items()}

Number of words in vocabulary :  10000


In [4]:
# lets declare the embedding size and hidden layer size
nEmb = 32
nHid = 32

# lets declare the model and the trainer
model = dy.Model()
trainer = dy.AdamTrainer(model)

# let's declare the parameters
W_emb = model.add_lookup_parameters((nWords,nEmb))

# let's declare the rnn
rnn = dy.LSTMBuilder(1,nEmb,nHid,model)

# let's declare the softmax weights
W_sm = model.add_parameters((nWords,nHid))
b_sm = model.add_parameters((nWords))

In [None]:
def computeLoss(words):
    # renew the computation graph
    dy.renew_cg()
    
    embeddings = [W_emb[x] for x in words]
    
    
    # get the softmax weights
    weightsSoftmax = dy.parameter(W_sm)
    biasesSoftmax = dy.parameter(b_sm)
    
    # get the initial state of the rnn
    rnnInit = rnn.initial_state()
    
    # add end of sentence to the rnn
    state = rnnInit.add_input(W_emb[S])
    
    allLosses = []
    for i,word in enumerate(words):
        scores = weightsSoftmax*state.output() + biasesSoftmax
        loss = dy.pickneglogsoftmax(scores,word)
        allLosses.append(loss)
        
        # get the wordId for word and send it to rnn
        emb = embeddings[i]
        state = state.add_input(emb)
        
    return dy.esum(allLosses)

In [None]:
# lets start the training process
trainLosses = []
testLosses = []

bestTestLoss = 999999999
lastTestLoss = 999999999

print(' Starting the training ...')

for i in range(10):
    # randomly shuffle the training examples
    random.shuffle(train)
    
    # note start time and initialize the training loss and num of words processed
    startTime = time.time()
    trainLoss = 0.0
    numOfWordsProcessed = 0
    for sent in train:
        # compute the loss
        loss = computeLoss(sent)
        trainLoss = trainLoss+loss.value()
        numOfWordsProcessed = numOfWordsProcessed + len(sent)
        
        # compute the gradient and update the parameters
        loss.backward()
        trainer.update()
    trainLosses.append(trainLoss)
    print('Iteration ',i,' : ',' TrainingLoss : ',trainLoss,' Number of words processed : ',numOfWordsProcessed,' Time taken : ',
         (time.time()-startTime))
    
    # evaluate on test data
    testLoss = 0.0
    for sent in test:
        # compute loss
        loss = computeLoss(sent)
        testLoss = testLoss + loss.value()
    testLosses.append(testLoss)
    print('Iteration ',i,' : ',' TestLoss : ',testLoss)
    
    # check if current test loss is less than that of the previous testLoss if not reduce the learning rate by half
    if(lastTestLoss<=testLoss):
        trainer.learning_rate = trainer.learning_rate/2
        
    lastTestLoss = testLoss
    
    # check if current test loss is the best we have seen if so save the model
    # disabling saving to save time
    '''if(testLoss<bestTestLoss):
        model.save("nn-optimum_LM.txt")
        bestTestLoss = testLoss'''
    

 Starting the training ...
Iteration  0  :   TrainingLoss :  5392100.542980194  Number of words processed :  887521  Time taken :  601.6644756793976
Iteration  0  :   TestLoss :  406730.8803231716
Iteration  1  :   TrainingLoss :  4979143.804537058  Number of words processed :  887521  Time taken :  603.7007832527161
Iteration  1  :   TestLoss :  396416.016207695
Iteration  2  :   TrainingLoss :  4848180.005659819  Number of words processed :  887521  Time taken :  592.7751975059509
Iteration  2  :   TestLoss :  392490.11943364143
Iteration  3  :   TrainingLoss :  4785395.876128197  Number of words processed :  887521  Time taken :  909.9339401721954
Iteration  3  :   TestLoss :  390762.5452759266
Iteration  4  :   TrainingLoss :  4744179.611130238  Number of words processed :  887521  Time taken :  812.5940728187561
Iteration  4  :   TestLoss :  388961.98898887634
Iteration  5  :   TrainingLoss :  4712511.967818499  Number of words processed :  887521  Time taken :  836.5625920295715
