In [1]:
from __future__ import division
from __future__ import print_function

import os
import random

import torch
import torch.nn as nn
import torch.optim as optim
# IMPORT CONSTANTS
from treelstm import Constants
# NEURAL NETWORK MODULES/LAYERS
# from treelstm import SimilarityTreeLSTM
# DATA HANDLING CLASSES
from treelstm import Vocab
# DATASET CLASS FOR HATE DATASET
from treelstm import HATEDataset
# METRICS CLASS FOR EVALUATION
from treelstm import Metrics
# UTILITY FUNCTIONS
from treelstm import utils
# TRAIN AND TEST HELPER FUNCTIONS
# CONFIG PARSER
from config import parse_args

from tqdm import tqdm

import torch
import torch.nn.functional as F



In [4]:
class ChildSumTreeLSTM(nn.Module):
    def __init__(self, in_dim, mem_dim):
        super(ChildSumTreeLSTM, self).__init__()
        self.in_dim = in_dim
        self.mem_dim = mem_dim
        self.ioux = nn.Linear(self.in_dim, 3 * self.mem_dim)
        self.iouh = nn.Linear(self.mem_dim, 3 * self.mem_dim)
        self.fx = nn.Linear(self.in_dim, self.mem_dim)
        self.fh = nn.Linear(self.mem_dim, self.mem_dim)

    def node_forward(self, inputs, child_c, child_h):
        child_h_sum = torch.sum(child_h, dim=0, keepdim=True)

        iou = self.ioux(inputs) + self.iouh(child_h_sum)
        i, o, u = torch.split(iou, iou.size(1) // 3, dim=1)
        i, o, u = torch.sigmoid(i), torch.sigmoid(o), torch.tanh(u)

        f = torch.sigmoid(
            self.fh(child_h) +
            self.fx(inputs).repeat(len(child_h), 1)
        )
        fc = torch.mul(f, child_c)

        c = torch.mul(i, u) + torch.sum(fc, dim=0, keepdim=True)
        h = torch.mul(o, torch.tanh(c))
        return c, h

    def forward(self, tree, inputs):
        for idx in range(tree.num_children):
            self.forward(tree.children[idx], inputs)

        if tree.num_children == 0:
            child_c = inputs[0].detach().new(1, self.mem_dim).fill_(0.).requires_grad_()
            child_h = inputs[0].detach().new(1, self.mem_dim).fill_(0.).requires_grad_()
        else:
            child_c, child_h = zip(* map(lambda x: x.state, tree.children))
            child_c, child_h = torch.cat(child_c, dim=0), torch.cat(child_h, dim=0)
        tree.state = self.node_forward(inputs[tree.idx], child_c, child_h)
        return tree.state


class Predict(nn.Module):
    """docstring for Predict"""
    def __init__(self, mem_dim,hidden_dim,num_classes):
        super(Predict, self).__init__()
        self.mem_dim = mem_dim
        self.hidden_dim = hidden_dim 
        self.num_classes = num_classes
        self.wh = nn.Linear(self.mem_dim, self.hidden_dim)
        self.wp = nn.Linear(self.hidden_dim, self.num_classes)
    def forward(self,vec):
        out = F.relu(self.wh(vec))
        out = self.wp(out)
        return out



# putting the whole model together
class SimilarityTreeLSTM(nn.Module):
    def __init__(self, vocab_size, in_dim, mem_dim, hidden_dim, num_classes, sparsity, freeze):
        super(SimilarityTreeLSTM, self).__init__()
        self.emb = nn.Embedding(vocab_size, in_dim, padding_idx=Constants.PAD, sparse=sparsity)
        if freeze:
            self.emb.weight.requires_grad = False
        self.childsumtreelstm = ChildSumTreeLSTM(in_dim, mem_dim)
        self.predict = Predict(mem_dim, hidden_dim, num_classes)

    def forward(self, ltree, linputs):
        linputs = self.emb(linputs)
        lstate, lhidden = self.childsumtreelstm(ltree, linputs)
        output = self.predict(lstate)
        return output

In [5]:
class args_init():
    def __init__(self):
        self.seed=123
        self.data='hate_data/'
        self.glove='data/glove/'
        self.save='checkpoints/attention/'
        self.expname='test'
        self.input_dim=300
        self.mem_dim=150
        self.hidden_dim=50
        self.num_classes=3
        self.epochs=15
        self.batch_size=15
        self.lr=0.01
        self.sparse=False
        self.wd=1e-4
        self.freeze_embed=False
        self.optim='adagrad'
        self.cuda=True

In [28]:
class Trainer(object):
    def __init__(self, args, model, criterion, optimizer, device):
        super(Trainer, self).__init__()
        self.args = args
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.device = device
        self.epoch = 0

    # helper function for training
    def train(self, dataset):
        error_count=0
        self.model.train()
        self.optimizer.zero_grad()
        total_loss = 0.0
        indices = torch.randperm(len(dataset), dtype=torch.long, device='cpu')
        for idx in range(len(dataset)):
            tree, sentence, label, tweet = dataset[indices[idx]]
            target = utils.map_label_to_target(label, 3)
            sentence = sentence.to(self.device)
            target = target.to(self.device)
            output = self.model(tree, sentence)
            loss = self.criterion(output, target)
            total_loss += loss.item()
            loss.backward()
            if idx % self.args.batch_size == 0 and idx > 0:
                self.optimizer.step()
                self.optimizer.zero_grad()

        self.epoch += 1
        return total_loss / len(dataset)
        
    # helper function for testing
    def test(self, dataset):
        self.model.eval()
        with torch.no_grad():
            error_count=0
            total_loss = 0.0
            predictions = torch.zeros(len(dataset), dtype=torch.float, device='cpu')
            accuracy=0
            for idx in tqdm(range(len(dataset)), desc='Testing epoch  ' + str(self.epoch) + ''):
                ltree, linput, label,tweet = dataset[idx]
                target = utils.map_label_to_target(label, 3)
                linput= linput.to(self.device)
                target = target.to(self.device)
                output = self.model(ltree, linput)
                loss = self.criterion(output, target)
                total_loss += loss.item()
                output = output.squeeze().to('cpu')
                predictions[idx] = torch.argmax(F.softmax(output))
        return total_loss / len(dataset), predictions


In [7]:
trainargs=args_init()

In [8]:

# argument validation
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

torch.manual_seed(args.seed)
random.seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = True
if not os.path.exists(args.save):
    os.makedirs(args.save)

train_dir = os.path.join(args.data, 'train/')
dev_dir = os.path.join(args.data, 'dev/')
test_dir = os.path.join(args.data, 'test/')

# write unique words from all token files
hate_vocab_file = os.path.join(args.data, 'hate.vocab')
if not os.path.isfile(hate_vocab_file):
    token_files = [os.path.join(split, 'data.toks') for split in [train_dir, dev_dir, test_dir]]

    hate_vocab_file = os.path.join(args.data, 'hate.vocab')
    utils.build_vocab(token_files, hate_vocab_file)

# get vocab object from vocab file previously written
vocab = Vocab(filename=hate_vocab_file,
              data=[Constants.PAD_WORD, Constants.UNK_WORD,
                    Constants.BOS_WORD, Constants.EOS_WORD])
print('==> HATE vocabulary size : %d ' % vocab.size())

# load HATE dataset splits
train_file = os.path.join(args.data, 'hate_train.pth')
if os.path.isfile(train_file):
    train_dataset = torch.load(train_file)
else:
    train_dataset = HATEDataset(train_dir, vocab, args.num_classes)
    torch.save(train_dataset, train_file)
print('==> Size of train data   : %d ' % len(train_dataset))
dev_file = os.path.join(args.data, 'hate_dev.pth')
if os.path.isfile(dev_file):
    dev_dataset = torch.load(dev_file)
else:
    dev_dataset = HATEDataset(dev_dir, vocab, args.num_classes)
    torch.save(dev_dataset, dev_file)
print('==> Size of dev data     : %d ' % len(dev_dataset))
test_file = os.path.join(args.data, 'hate_test.pth')
if os.path.isfile(test_file):
    test_dataset = torch.load(test_file)
else:
    test_dataset = HATEDataset(test_dir, vocab, args.num_classes)
    torch.save(test_dataset, test_file)
print('==> Size of test data    : %d ' % len(test_dataset))

==> HATE vocabulary size : 35288 
==> Size of train data   : 19826 
==> Size of dev data     : 2478 
==> Size of test data    : 2479 


## MODEL

In [10]:
model = SimilarityTreeLSTM(
        vocab.size(),
        args.input_dim,
        args.mem_dim,
        args.hidden_dim,
        args.num_classes,
        args.sparse,
        args.freeze_embed)
criterion =nn.BCEWithLogitsLoss()
model.cuda()

SimilarityTreeLSTM(
  (emb): Embedding(35288, 300, padding_idx=0)
  (childsumtreelstm): ChildSumTreeLSTM(
    (ioux): Linear(in_features=300, out_features=450, bias=True)
    (iouh): Linear(in_features=150, out_features=450, bias=True)
    (fx): Linear(in_features=300, out_features=150, bias=True)
    (fh): Linear(in_features=150, out_features=150, bias=True)
  )
  (predict): Predict(
    (wh): Linear(in_features=150, out_features=50, bias=True)
    (wp): Linear(in_features=50, out_features=3, bias=True)
  )
)

In [12]:
emb_file = os.path.join(args.data, 'hate_embed.pth')
if os.path.isfile(emb_file):
    emb = torch.load(emb_file)
else:
    # load glove embeddings and vocab
    glove_vocab, glove_emb = utils.load_word_vectors(
        os.path.join(args.glove, 'glove.840B.300d'))
    print('==> GLOVE vocabulary size: %d ' % glove_vocab.size())
    emb = torch.zeros(vocab.size(), glove_emb.size(1), dtype=torch.float, device=device)
    emb.normal_(0, 0.05)
    # zero out the embeddings for padding and other special words if they are absent in vocab
    for idx, item in enumerate([Constants.PAD_WORD, Constants.UNK_WORD,
                                Constants.BOS_WORD, Constants.EOS_WORD]):
        emb[idx].zero_()
    for word in vocab.labelToIdx.keys():
        if glove_vocab.getIndex(word):
            emb[vocab.getIndex(word)] = glove_emb[glove_vocab.getIndex(word)]
    torch.save(emb, emb_file)
# plug these into embedding matrix inside model
model.emb.weight.data.copy_(emb)

model.to(device), criterion.to(device)
if args.optim == 'adam':
    optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                  model.parameters()), lr=args.lr, weight_decay=args.wd)
elif args.optim == 'adagrad':
    optimizer = optim.Adagrad(filter(lambda p: p.requires_grad,
                                     model.parameters()), lr=args.lr, weight_decay=args.wd)
elif args.optim == 'sgd':
    optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                 model.parameters()), lr=args.lr, weight_decay=args.wd)
metrics = Metrics(args.num_classes)

In [29]:
trainer = Trainer(args, model, criterion, optimizer, device)
checkpoint = torch.load('checkpoints/test.pt')

trainer.model.load_state_dict(checkpoint['model'])
trainer.optimizer.load_state_dict(checkpoint['optim']) 

## Running the trained model on test set (the model was trained before hand )

In [33]:
test_loss,predictions = trainer.test(test_dataset)


Testing epoch  0: 100%|██████████| 2479/2479 [00:20<00:00, 123.35it/s]


## Accuracy of the model

In [44]:
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score

In [45]:
f = f1_score( test_dataset.labels,predictions, average='micro')
print("F1 Score: ", f)
p = precision_score(test_dataset.labels, predictions, average='micro')
print("Precision Score: ", p)
r = recall_score(test_dataset.labels, predictions, average='micro')
print("Recall Score: ", r)
a = accuracy_score(test_dataset.labels, predictions)
print("Accuracy Score: ", a)

F1 Score:  0.9205324727712787
Precision Score:  0.9205324727712787
Recall Score:  0.9205324727712787
Accuracy Score:  0.9205324727712787
