In [19]:
import numpy as np
import pandas as pd
import dynet as dy
import time 

import random as random
from collections import defaultdict

In [6]:
# lets convert the words into integer
# The default dictionary takes a function as input and outupts 
# it if key is not present in the map.
w2i = defaultdict(lambda: len(w2i))
t2i = defaultdict(lambda: len(t2i))

# create an unknown token. As this is the first it will be 0
UNK =w2i['<unk>']
print(UNK)

# Lets write a method to read the data.
# This method returns the list of [features, label].
# Here the features are integer ids of words and tags are labels
def readDataSet(fileName):
    retList = []
    with open(fileName,'r+') as f:
        for line in f:
            tag,words = line.lower().strip().split(' ||| ')
            
            # now get the features which is the integerIds of words
            features = [w2i[x] for x in words.split(' ')]
            label = t2i[tag]
            
            # add the data to the list
            retList.append([features,label])
    return retList


0


In [15]:
# Read the data
train = readDataSet('data/classes/train.txt')
dev = readDataSet("data/classes/test.txt")
nwords = len(w2i)
ntags = len(t2i)

print(nwords,':',ntags)
print('train[0] : \n',train[0])
print('test[0] : \n',dev[0])

18648 : 5
train[0] : 
 [[1, 2, 3, 4, 5, 6, 1, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 9, 17, 5, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], 0]
test[0] : 
 [[1795, 71, 0, 448], 2]


In [16]:
# Now lets create the model and trainer
# we are using the adam trainer
model = dy.Model()
trainer = dy.AdamTrainer(model)

# Create the parameters. 
W_sm = model.add_lookup_parameters((nwords,ntags)) # weights
b_sm = model.add_parameters(ntags) # biases

In [17]:
# Now lets write a method to compute the weights once we know w and b
def computeScores(words):
    # renew the Computation graph
    dy.renew_cg()
    scores = dy.esum([dy.lookup(W_sm,x) for x in words])
    biases = dy.parameter(b_sm)
    return scores+biases

In [22]:
# Now lets perform the the training
for i in range(25):
    # Perform the shuffling of the training data
    random.shuffle(train)
    
    # initialize the training loss and time
    startTime = time.time()
    trainLoss = 0
    
    # train
    for words,tag in train:
        loss = dy.pickneglogsoftmax(computeScores(words=words),tag)
        trainLoss = trainLoss+loss.value()
        
        # compute gradients and update the parameters
        loss.backward()
        trainer.update()
    print('At iteration ',i,' the training loss is : ',trainLoss,' time taken : ',(time.time()-startTime))
    
    # test
    testCorrect = 0
    for words,tag in dev:
        scores = computeScores(words=words).npvalue()
        predict = np.argmax(scores)
        if(predict == tag):
            testCorrect = testCorrect + 1
    print('At iteration ',i,' the test accuracy is : ',(testCorrect/len(dev))*100,'%')

At iteration  0  the training loss is :  7359.954819768667  time taken :  0.3910384178161621
At iteration  0  the test accuracy is :  34.52488687782805 %
At iteration  1  the training loss is :  7185.740517809987  time taken :  0.3800036907196045
At iteration  1  the test accuracy is :  34.43438914027149 %
At iteration  2  the training loss is :  7017.566231377423  time taken :  0.3820023536682129
At iteration  2  the test accuracy is :  34.660633484162894 %
At iteration  3  the training loss is :  6861.6886631548405  time taken :  0.37200212478637695
At iteration  3  the test accuracy is :  34.47963800904977 %
At iteration  4  the training loss is :  6711.6247884482145  time taken :  0.37504005432128906
At iteration  4  the test accuracy is :  34.705882352941174 %
At iteration  5  the training loss is :  6567.915987864137  time taken :  0.3679540157318115
At iteration  5  the test accuracy is :  34.705882352941174 %
At iteration  6  the training loss is :  6432.217577777803  time take