In [1]:
##---------------------------------------------------------------------------------
## Summary : Implementing simple RNN for sentiment classification
## Author  : Srinivas Venkata Vemparala
## Source  : https://github.com/neubig/nn4nlp-code
##---------------------------------------------------------------------------------

%matplotlib inline
import numpy as np
import pandas as pd
import dynet as dy
import time 
import matplotlib.pyplot as plt
import random
from collections import defaultdict

In [2]:
# lets convert the words into integer
# The default dictionary takes a function as input and outupts 
# it if key is not present in the map.
w2i = defaultdict(lambda : len(w2i))
t2i = defaultdict(lambda : len(t2i))

# create an unknown token. As this is the first it will be 0
UNK =w2i['<unk>']

# Lets write a method to read the data.
# This method returns the list of [features, label].
# Here the features are integer ids of words and tags are labels
def readDataSet(fileName):
    retList = []
    with open(fileName,'r+') as f:
        for line in f:
            tag,sentence = line.lower().strip().split(' ||| ') 
            words = sentence.lower().strip().split(' ')
            features = [w2i[x] for x in words]
            label = t2i[tag]
            
            retList.append([features,label])
            
    return retList

In [3]:
train = readDataSet('../data/classes/train.txt')
test = readDataSet('../data/classes/test.txt')
nWords = len(w2i)
ntags = len(t2i)

# lets freeze the dictionary
w2i = defaultdict(lambda:UNK, len(w2i))

print(nWords,' : ',ntags)

18648  :  5


In [4]:
# lets define the embedding size and hidden layer size
nEmb = 64
nHid = 64

# lets define the model and trainer
model = dy.Model()
trainer = dy.AdamTrainer(model)

# lets define the model
W_emb = model.add_lookup_parameters((nWords,nEmb))

# lets add two rnns. one reads from start to end and other from end to start of sentence
forwardRNN =  dy.SimpleRNNBuilder(1,nEmb,nHid,model)
backWardRNN = dy.SimpleRNNBuilder(1,nEmb,nHid,model)

# lets declare the softmax weights
W_sm = model.add_parameters((ntags,2*nHid))
b_sm = model.add_parameters((ntags))

In [5]:
# lets write a method to compute the scores
def computeScores(words):
    # renew computation graph
    dy.renew_cg()
    
    embeddings = [W_emb[x] for x in words]
    
    # get the init state for the rnns
    finit = forwardRNN.initial_state()
    binit = backWardRNN.initial_state()
    
    # pass the embeddings throught the rnns and get the output
    fwdEmbs = finit.transduce(embeddings)
    embeddings.reverse()
    bwdEmbs = binit.transduce(embeddings)
    
    # get the last ones in the fwd and bwd embeddings. this will represent the entire sentence state
    # and concatanate them
    finalEmb = dy.concatenate([fwdEmbs[-1],bwdEmbs[-1]])
    
    # get the weights of softmax layers
    weightsSoftmax = dy.parameter(W_sm)
    biasesSoftmax = dy.parameter(b_sm)
    
    
    return weightsSoftmax*finalEmb + biasesSoftmax

In [7]:
# Now lets perform the the training
bestTestAccuracy = 0
lastTestAccuracy = 0

for i in range(25):
    # Perform the shuffling of the training data
    random.shuffle(train)
    
    # initialize the training loss and time
    startTime = time.time()
    trainLoss = 0
    trainCorrect = 0
    
    # train
    for words,tag in train:
        scores = computeScores(words)
        predict = np.argmax(scores.npvalue())
        if(predict==tag):
            trainCorrect = trainCorrect+1
        
        loss = dy.pickneglogsoftmax(scores,tag)
        trainLoss += loss.value()
        
        loss.backward()
        trainer.update()
    print('At iteration ',i,' the training accuracy is : ',(trainCorrect/len(train)),' time taken : ',(time.time()-startTime))
    
    testLoss = 0
    testCorrect = 0
    # test
    for words,tag in test:
        scores = computeScores(words)
        predict = np.argmax(scores.npvalue())
        if(predict==tag):
            testCorrect += 1
    testAccuracy = testCorrect/len(test)
    if(testAccuracy>bestTestAccuracy):
        bestTestAccuracy = testAccuracy
    
    if(testAccuracy<lastTestAccuracy):
        trainer.learning_rate = trainer.learning_rate/2
    lastTestAccuracy = testAccuracy
    
    print('At iteration ',i,' the test accuracy is : ',(testAccuracy),' time taken : ',(time.time()-startTime))
    

At iteration  0  the training accuracy is :  0.8730102996254682  time taken :  5.757597923278809
At iteration  0  the test accuracy is :  0.3565610859728507  time taken :  6.317943811416626
At iteration  1  the training accuracy is :  0.87312734082397  time taken :  5.539999485015869
At iteration  1  the test accuracy is :  0.3565610859728507  time taken :  6.00378680229187
At iteration  2  the training accuracy is :  0.8735955056179775  time taken :  5.263608932495117
At iteration  2  the test accuracy is :  0.3565610859728507  time taken :  5.734323740005493
At iteration  3  the training accuracy is :  0.8735955056179775  time taken :  5.313278913497925
At iteration  3  the test accuracy is :  0.3565610859728507  time taken :  5.779788970947266
At iteration  4  the training accuracy is :  0.8737125468164794  time taken :  5.1940436363220215
At iteration  4  the test accuracy is :  0.3565610859728507  time taken :  5.66184139251709
At iteration  5  the training accuracy is :  0.873595