In [7]:
##---------------------------------------------------------------------
## Summary : Implementing the bag of words each word vector is of len 5
## Author  : Srinivas Venkata Vemparala
## Source  : https://github.com/neubig/nn4nlp-code
##---------------------------------------------------------------------

import numpy as np
import pandas as pd
import dynet as dy
import time 

import random as random
from collections import defaultdict

In [8]:
# lets convert the words into integer
# The default dictionary takes a function as input and outupts 
# it if key is not present in the map.
w2i = defaultdict(lambda: len(w2i))
t2i = defaultdict(lambda: len(t2i))

# create an unknown token. As this is the first it will be 0
UNK =w2i['<unk>']
print(UNK)

# Lets write a method to read the data.
# This method returns the list of [features, label].
# Here the features are integer ids of words and tags are labels
def readDataSet(fileName):
    retList = []
    with open(fileName,'r+') as f:
        for line in f:
            tag,words = line.lower().strip().split(' ||| ')
            
            # now get the features which is the integerIds of words
            features = [w2i[x] for x in words.split(' ')]
            label = t2i[tag]
            
            # add the data to the list
            retList.append([features,label])
    return retList


0


In [9]:
# Read the data
train = readDataSet('../data/classes/train.txt')
dev = readDataSet("../data/classes/test.txt")
nwords = len(w2i)
ntags = len(t2i)

# lets freeze the dictionary
w2i = defaultdict(lambda:UNK, len(w2i))

print(nwords,':',ntags)
print('train[0] : \n',train[0])
print('test[0] : \n',dev[0])

18648 : 5
train[0] : 
 [[1, 2, 3, 4, 5, 6, 1, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 9, 17, 5, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], 0]
test[0] : 
 [[1795, 71, 16582, 448], 2]


In [10]:
# Now lets create the model and trainer
# we are using the adam trainer
model = dy.Model()
trainer = dy.AdamTrainer(model)

# Create the parameters. 
W_sm = model.add_lookup_parameters((nwords,ntags)) # weights
b_sm = model.add_parameters(ntags) # biases

In [11]:
# Now lets write a method to compute the weights once we know w and b
def computeScores(words):
    # renew the Computation graph
    dy.renew_cg()
    scores = dy.esum([dy.lookup(W_sm,x) for x in words])
    biases = dy.parameter(b_sm)
    return scores+biases

In [6]:
# Now lets perform the the training
for i in range(25):
    # Perform the shuffling of the training data
    random.shuffle(train)
    
    # initialize the training loss and time
    startTime = time.time()
    trainLoss = 0
    
    # train
    for words,tag in train:
        loss = dy.pickneglogsoftmax(computeScores(words=words),tag)
        trainLoss = trainLoss+loss.value()
        
        # compute gradients and update the parameters
        loss.backward()
        trainer.update()
    print('At iteration ',i,' the training loss is : ',trainLoss,' time taken : ',(time.time()-startTime))
    
    # test
    testCorrect = 0
    for words,tag in dev:
        scores = computeScores(words=words).npvalue()
        predict = np.argmax(scores)
        if(predict == tag):
            testCorrect = testCorrect + 1
    print('At iteration ',i,' the test accuracy is : ',(testCorrect/len(dev))*100,'%')

At iteration  0  the training loss is :  19887.35503485799  time taken :  0.4295015335083008
At iteration  0  the test accuracy is :  26.42533936651584 %
At iteration  1  the training loss is :  16970.68979987502  time taken :  0.40904736518859863
At iteration  1  the test accuracy is :  27.601809954751133 %
At iteration  2  the training loss is :  15475.103072404861  time taken :  0.47750353813171387
At iteration  2  the test accuracy is :  28.3710407239819 %
At iteration  3  the training loss is :  14389.277297459543  time taken :  0.5425007343292236
At iteration  3  the test accuracy is :  29.321266968325794 %
At iteration  4  the training loss is :  13516.224422775209  time taken :  0.41250061988830566
At iteration  4  the test accuracy is :  29.86425339366516 %
At iteration  5  the training loss is :  12786.055393196642  time taken :  0.4030015468597412
At iteration  5  the test accuracy is :  30.81447963800905 %
At iteration  6  the training loss is :  12146.173193916678  time ta