In [1]:
##---------------------------------------------------------------------------------
## Summary : Implementing simple LSTM for sentiment classification
## Author  : Srinivas Venkata Vemparala
## Source  : https://github.com/neubig/nn4nlp-code
##---------------------------------------------------------------------------------

%matplotlib inline
import numpy as np
import pandas as pd
import dynet as dy
import time 
import matplotlib.pyplot as plt
import random
from collections import defaultdict

In [2]:
# lets convert the words into integer
# The default dictionary takes a function as input and outupts 
# it if key is not present in the map.
w2i = defaultdict(lambda : len(w2i))
t2i = defaultdict(lambda : len(t2i))

# create an unknown token. As this is the first it will be 0
UNK =w2i['<unk>']

# Lets write a method to read the data.
# This method returns the list of [features, label].
# Here the features are integer ids of words and tags are labels
def readDataSet(fileName):
    retList = []
    with open(fileName,'r+') as f:
        for line in f:
            tag,sentence = line.lower().strip().split(' ||| ') 
            words = sentence.lower().strip().split(' ')
            features = [w2i[x] for x in words]
            label = t2i[tag]
            
            retList.append([features,label])
            
    return retList

In [3]:
train = readDataSet('../data/classes/train.txt')
test = readDataSet('../data/classes/test.txt')
nWords = len(w2i)
ntags = len(t2i)

# lets freeze the dictionary
w2i = defaultdict(lambda:UNK, len(w2i))

print(nWords,' : ',ntags)

18648  :  5


In [4]:
# lets define the embedding size and hidden size
nEmb = 64
nHid = 64

# lets define the model and the trainer
model = dy.Model()
trainer = dy.AdamTrainer(model)

# lets add the parameters to the model
W_emb = model.add_lookup_parameters((nWords,nEmb))

# lets define two LSTM rnns one for forward and other for backward
forwardRNN = dy.VanillaLSTMBuilder(1,nEmb,nHid,model)
backWardRNN = dy.VanillaLSTMBuilder(1,nEmb,nHid,model)

# lets declare the softmax weights
W_sm = model.add_parameters((ntags,2*nHid))
b_sm = model.add_parameters((ntags))

In [5]:
def computeScores(words):
    # renew the computation graph
    dy.renew_cg()
    
    # get the embeddings
    embeddings = [W_emb[x] for x in words]
    
    # get the init state for the rnns
    finit = forwardRNN.initial_state()
    binit = backWardRNN.initial_state()
    
    # lets pass the inputs through rnns one from the start and other from back
    fwdembs = finit.transduce(embeddings)
    embeddings.reverse()                  # reverse the word embeddings
    bwdembs = binit.transduce(embeddings)
    
    # get the last word in two embeddings and concatenate them together
    finalEmb = dy.concatenate([fwdembs[-1],bwdembs[-1]])
    
    # get the weights and biases of the softmax layer. 
    weightsSoftmax = dy.parameter(W_sm)
    biasesSoftmax = dy.parameter(b_sm)
    
    return (weightsSoftmax*finalEmb + biasesSoftmax)

In [6]:
# Now lets perform the the training
bestTestAccuracy = 0
lastTestAccuracy = 0

for i in range(25):
    # Perform the shuffling of the training data
    random.shuffle(train)
    
    # initialize the training loss and time
    startTime = time.time()
    trainLoss = 0
    trainCorrect = 0
    
    # train
    for words,tag in train:
        scores = computeScores(words)
        predict = np.argmax(scores.npvalue())
        if(predict==tag):
            trainCorrect = trainCorrect+1
        
        loss = dy.pickneglogsoftmax(scores,tag)
        trainLoss += loss.value()
        
        loss.backward()
        trainer.update()
    print('At iteration ',i,' the training accuracy is : ',(trainCorrect/len(train)),' time taken : ',(time.time()-startTime))
    
    testLoss = 0
    testCorrect = 0
    # test
    for words,tag in test:
        scores = computeScores(words)
        predict = np.argmax(scores.npvalue())
        if(predict==tag):
            testCorrect += 1
    testAccuracy = testCorrect/len(test)
    if(testAccuracy>bestTestAccuracy):
        bestTestAccuracy = testAccuracy
    
    if(testAccuracy<lastTestAccuracy):
        trainer.learning_rate = trainer.learning_rate/2
    lastTestAccuracy = testAccuracy
    
    print('At iteration ',i,' the test accuracy is : ',(testAccuracy),' time taken : ',(time.time()-startTime))
    

At iteration  0  the training accuracy is :  0.3240870786516854  time taken :  55.45553994178772
At iteration  0  the test accuracy is :  0.38054298642533935  time taken :  59.80601453781128
At iteration  1  the training accuracy is :  0.475187265917603  time taken :  63.124500036239624
At iteration  1  the test accuracy is :  0.3832579185520362  time taken :  67.66438913345337
At iteration  2  the training accuracy is :  0.6036985018726592  time taken :  53.11413335800171
At iteration  2  the test accuracy is :  0.39819004524886875  time taken :  57.68433690071106
At iteration  3  the training accuracy is :  0.7442649812734082  time taken :  52.96915912628174
At iteration  3  the test accuracy is :  0.40226244343891404  time taken :  57.3459734916687
At iteration  4  the training accuracy is :  0.8576779026217228  time taken :  52.303807973861694
At iteration  4  the test accuracy is :  0.3909502262443439  time taken :  56.719923973083496
At iteration  5  the training accuracy is :  0