In [1]:
##---------------------------------------------------------------------
## Summary : Implementing the Continuios Bag of Words
## Author  : Srinivas Venkata Vemparala
## Source  : https://github.com/neubig/nn4nlp-code
##---------------------------------------------------------------------

import numpy as np
import pandas as pd
import dynet as dy
import time
import random

from collections import defaultdict

In [2]:
# lets declare the size of the word embedding which we will use
nEmb = 64

# create the methods to convert word to integer. We will use default dict
w2i = defaultdict(lambda: len(w2i))
t2i = defaultdict(lambda: len(t2i))
UNK = w2i['<unk>']

# Now lets define the methods to read dataSet.
# This returns a list of [features,labels]
def readDataSet(fileName):
    retList = []
    
    with open(fileName,'r+') as file:
        for line in file:
            tag,words = line.lower().strip().split(' ||| ')
            
            # Get the features from words which is list of int id for each word
            features = [w2i[x] for x in words.split(' ')]
            # Get the label from tag i..e convert tag to int
            label = t2i[tag]
            retList.append([features,label])
            
    return retList

In [3]:
# read the training data and test data
train = readDataSet(fileName='../data/classes/train.txt')
test = readDataSet(fileName='../data/classes/test.txt')

nWords = len(w2i)
nTags = len(t2i)
print(nWords,' : ',nTags)

# lets freeze the dictionary
w2i = defaultdict(lambda:UNK, len(w2i))

print('train[0] :',train[0])
print('test[0] :',test[0])

18648  :  5
train[0] : [[1, 2, 3, 4, 5, 6, 1, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 9, 17, 5, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], 0]
test[0] : [[1795, 71, 16582, 448], 2]


In [4]:
# lets declare the model and trainers
model = dy.Model()
trainer = dy.AdamTrainer(model)

# lets declare the parameters which we use
W_emb = model.add_lookup_parameters((nWords,nEmb))
W_sm = model.add_parameters((nTags,nEmb))
b_sm = model.add_parameters(nTags)


In [5]:
# lets define a method to compute the scores once the parameters are given
def computeScores(words):
    # renew the computation graph
    dy.renew_cg()
    
    biases_softMax = dy.parameter(b_sm)
    W_softMax = dy.parameter(W_sm)
    
    scores = dy.esum([dy.lookup(W_emb,x) for x in words])
    scores = W_softMax*scores + biases_softMax
    return scores

In [6]:
# lets start training 
for i in range(25):
    # let's shuffle the training examples
    random.shuffle(train)
    
    # initialize the training loss to zero and note time
    startTime = time.time()
    trainLoss = 0
    # perform training
    for words, tag in train:
        loss = dy.pickneglogsoftmax(computeScores(words=words),tag)
        trainLoss = trainLoss + loss.value()
        
        # compute the gradients and update the parameters
        loss.backward()
        trainer.update()
    print('At iteration ',i,' the training loss is : ',trainLoss,' time taken : ',(time.time()-startTime))
    
    # compute test loss
    testCorr = 0
    for words, tag in test:
        scores = computeScores(words).npvalue()
        predict = np.argmax(scores)
        
        if(predict==tag):
            testCorr = testCorr+1
    print('At iteration ',i,' the test accuracy is : ',(testCorr/len(test))*100,'%')

At iteration  0  the training loss is :  13449.677063882351  time taken :  1.4486079216003418
At iteration  0  the test accuracy is :  36.96832579185521 %
At iteration  1  the training loss is :  9663.444181382656  time taken :  1.4854578971862793
At iteration  1  the test accuracy is :  39.90950226244344 %
At iteration  2  the training loss is :  6556.493509411812  time taken :  1.4620018005371094
At iteration  2  the test accuracy is :  38.68778280542987 %
At iteration  3  the training loss is :  4243.158288538456  time taken :  1.4025015830993652
At iteration  3  the test accuracy is :  39.366515837104075 %
At iteration  4  the training loss is :  2770.2708449959755  time taken :  1.6375033855438232
At iteration  4  the test accuracy is :  37.828054298642535 %
At iteration  5  the training loss is :  1867.002669274807  time taken :  1.8270118236541748
At iteration  5  the test accuracy is :  38.1447963800905 %
At iteration  6  the training loss is :  1435.2784430384636  time taken :