In [1]:
##---------------------------------------------------------------------------------
## Summary : Implementing the Conv neural nets for sentence classification
## Author  : Srinivas Venkata Vemparala
## Source  : https://github.com/neubig/nn4nlp-code
##---------------------------------------------------------------------------------

%matplotlib inline
import numpy as np
import pandas as pd
import dynet as dy
import time 
import matplotlib.pyplot as plt
import random
from collections import defaultdict

In [2]:
# lets convert the words into integer
# The default dictionary takes a function as input and outupts 
# it if key is not present in the map.
w2i = defaultdict(lambda: len(w2i))
t2i = defaultdict(lambda: len(t2i))

# create an unknown token. As this is the first it will be 0
UNK =w2i['<unk>']
print(UNK)

# Lets write a method to read the data.
# This method returns the list of [features, label].
# Here the features are integer ids of words and tags are labels
def readDataSet(fileName):
    retList = []
    with open(fileName,'r+') as f:
        for line in f:
            tag,words = line.lower().strip().split(' ||| ')
            
            # now get the features which is the integerIds of words
            features = [w2i[x] for x in words.split(' ')]
            label = t2i[tag]
            
            # add the data to the list
            retList.append([features,label])
    return retList

0


In [3]:
# Read the data
train = readDataSet('../data/classes/train.txt')
test = readDataSet("../data/classes/test.txt")
nWords = len(w2i)
nTags = len(t2i)

# lets freeze the dictionary
w2i = defaultdict(lambda:UNK, len(w2i))

print(nWords,':',nTags)

18648 : 5


In [4]:
# lets define an embedding size and a filter size
nEmb = 128            # length of the embedding vector for each word
nFilter = 128         # number of filters
winSize = 3          # this is similar to N in N-grams.


# lets declare the model and the trainer.
model = dy.Model()
trainer = dy.AdamTrainer(model)


# lets declare the parameters which will be added to the model
W_emb = model.add_lookup_parameters((nWords,1,1,nEmb))   # the features are on z-axis :)

W_cnn = model.add_parameters((1,winSize,nEmb,nFilter))   # the weights of CNN layer
b_cnn = model.add_parameters((nFilter))                  # biases of the CNN layer

W_sm = model.add_parameters((nTags,nFilter))             # weights of softmax layer
b_sm = model.add_parameters((nTags))

In [5]:
# lets write a method to compute the scores given a sentence
def computeScores(sent):
    # renew the computation graph
    dy.renew_cg()
    
    # get the parameters to computation graph
    weightsCNN = dy.parameter(W_cnn)
    biasesCNN = dy.parameter(b_cnn)
    weightsSM = dy.parameter(W_sm)
    biasesSM = dy.parameter(b_sm)
    
    # if the sent is smaller than the window size then we will have problems
    # if that is case add enough UNK
    if len(sent) < winSize:
        sent += [0] * (winSize-len(sent))

    embed = [W_emb[x] for x in sent]
    cnnInput = dy.concatenate(embed,d=1) # concatenate the embeddings in column dimension
    cnnOutput = dy.conv2d_bias(cnnInput,weightsCNN,biasesCNN,is_valid=False,stride=(1,1))
    
    # do max pooling
    poolOut = dy.max_dim(cnnOutput,d=1)               # get the output of pooling
    poolOut = dy.reshape(poolOut, (nFilter,))   # reshape it to get the filter size
    
    # apply relu function
    poolOut = dy.rectify(poolOut)
    
    return (weightsSM*poolOut+biasesSM)

In [6]:
# Now lets perform the the training
bestTestAccuracy = 0
lastTestAccuracy = 0

for i in range(25):
    # Perform the shuffling of the training data
    random.shuffle(train)
    
    # initialize the training loss and time
    startTime = time.time()
    trainLoss = 0
    trainCorrect = 0
    
    # train
    for words,tag in train:
        scores = computeScores(words)
        predict = np.argmax(scores.npvalue())
        if(predict==tag):
            trainCorrect = trainCorrect+1
        
        loss = dy.pickneglogsoftmax(scores,tag)
        trainLoss += loss.value()
        
        loss.backward()
        trainer.update()
    print('At iteration ',i,' the training accuracy is : ',(trainCorrect/len(train)),' time taken : ',(time.time()-startTime))
    
    testLoss = 0
    testCorrect = 0
    # test
    for words,tag in test:
        scores = computeScores(words)
        predict = np.argmax(scores.npvalue())
        if(predict==tag):
            testCorrect += 1
    testAccuracy = testCorrect/len(test)
    if(testAccuracy>bestTestAccuracy):
        bestTestAccuracy = testAccuracy
    
    if(testAccuracy<lastTestAccuracy):
        trainer.learning_rate = trainer.learning_rate/2
    lastTestAccuracy = testAccuracy
    
    print('At iteration ',i,' the test accuracy is : ',(testAccuracy),' time taken : ',(time.time()-startTime))
    

At iteration  0  the training accuracy is :  0.31624531835205993  time taken :  88.07636022567749
At iteration  0  the test accuracy is :  0.35339366515837106  time taken :  92.71276187896729
At iteration  1  the training accuracy is :  0.5724485018726592  time taken :  88.24254965782166
At iteration  1  the test accuracy is :  0.38416289592760183  time taken :  92.86141872406006
At iteration  2  the training accuracy is :  0.8328651685393258  time taken :  88.43943214416504
At iteration  2  the test accuracy is :  0.3588235294117647  time taken :  93.04235649108887
At iteration  3  the training accuracy is :  0.9735486891385767  time taken :  91.87499523162842
At iteration  3  the test accuracy is :  0.3606334841628959  time taken :  97.42922854423523
At iteration  4  the training accuracy is :  0.995435393258427  time taken :  88.2517557144165
At iteration  4  the test accuracy is :  0.3547511312217195  time taken :  92.87037229537964
At iteration  5  the training accuracy is :  0.99