<a href="https://colab.research.google.com/github/tusharvatsa32/11-711-fall-20-projects/blob/master/BOW.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from collections import defaultdict

In [2]:
import time
import random
import numpy as np
import torch
from torch import nn
from torch.autograd import Variable


In [29]:
class BoW(torch.nn.Module):
    def __init__(self, nwords, ntags):
        super(BoW, self).__init__()

        """ variables """
        type = torch.FloatTensor
        use_cuda = torch.cuda.is_available()

        if use_cuda:
            type = torch.cuda.FloatTensor

        self.bias = Variable(torch.zeros(ntags),
                             requires_grad=True).type(type)

        """ layers """
        self.embedding = nn.Embedding(nwords, ntags)
        print(self.embedding)
        # initialize the weights with xavier uniform (Glorot, X. & Bengio, Y. (2010))
        nn.init.xavier_uniform_(self.embedding.weight)


    def forward(self, words):
        emb = self.embedding(words)
        out = torch.sum(emb, dim=0) + self.bias # size(out) = N
        out = out.view(1, -1) # size(out) = 1 x N
        return out


In [4]:
w2i=defaultdict(lambda:len(w2i))

In [5]:
t2i=defaultdict(lambda:len(t2i))

In [6]:
UNK=w2i['<unk>']


In [7]:
def read_dataset(filename):
    with open(filename, "r") as f:
        for line in f:
            tag, words = line.lower().strip().split(" ||| ")
            yield ([w2i[x] for x in words.split(" ")], t2i[tag])

In [8]:
train = list(read_dataset("/content/sample_data/train.txt"))
w2i = defaultdict(lambda: UNK, w2i)
dev = list(read_dataset("/content/sample_data/dev.txt"))
nwords = len(w2i)
ntags = len(t2i)

In [28]:
len(train[0][0])


21

In [31]:
model = BoW(nwords, ntags)

Embedding(17612, 5)


In [17]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

In [18]:
type = torch.LongTensor
use_cuda = torch.cuda.is_available()

In [32]:
if use_cuda:
    type = torch.cuda.LongTensor
    model.cuda()

In [None]:
for ITER in range(100):
    # Perform training
    random.shuffle(train)
    train_loss = 0.0
    start = time.time()
    for words, tag in train:
        words = torch.tensor(words).type(type)
        tag = torch.tensor([tag]).type(type)
        scores = model(words)
        loss = criterion(scores, tag)
        train_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print("iter %r: train loss/sent=%.4f, time=%.2fs" % (
                ITER, train_loss/len(train), time.time()-start))
    # Perform testing
    test_correct = 0.0
    for words, tag in dev:
        words = torch.tensor(words).type(type)
        scores = model(words)[0].detach().cpu().numpy()
        predict = np.argmax(scores)
        if predict == tag:
            test_correct += 1
    print("iter %r: test acc=%.4f" % (ITER, test_correct/len(dev)))

