In [28]:
import torch
import numpy as np
from operator import itemgetter
import os
import pandas as pd
import matplotlib.pyplot as plt

import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from collections import Counter
import pickle as pkl
import random
import pdb
random.seed(42)

PAD_IDX = 0
UNK_IDX = 1
BATCH_SIZE = 32

torch.__version__

'0.4.0'

In [29]:
import os
os.chdir('/scratch/wpg205')
os.getcwd()

'/scratch/wpg205'

In [30]:
if torch.cuda.is_available and torch.has_cudnn:
    device = torch.device('cuda')
else:
    device = torch.device("cpu")

In [31]:
torch.has_cudnn

True

In [32]:
ft_home = './'
words_to_load = 50000

with open(ft_home + 'wiki-news-300d-1M.vec') as f:
    loaded_embeddings_ft = np.zeros((words_to_load+2, 300))
    words_ft = {}
    idx2words_ft = {}
    ordered_words_ft = []
    ordered_words_ft.append(PAD_IDX)
    ordered_words_ft.append(UNK_IDX)
    for t, line in enumerate(f):
        i = t + 2 
        if t >= words_to_load: 
            break
        s = line.split()
        loaded_embeddings_ft[i, :] = np.asarray(s[1:])
        words_ft[s[0]] = i
        idx2words_ft[i] = s[0]
        ordered_words_ft.append(s[0])

loaded_embeddings_ft_torch = torch.tensor(loaded_embeddings_ft, requires_grad = True)
del loaded_embeddings_ft

In [33]:
#test
print(ordered_words_ft[words_ft['the']])
print(ordered_words_ft[words_ft['test']])
print(ordered_words_ft[words_ft['is']])
print(ordered_words_ft[words_ft['passed']])

the
test
is
passed


In [34]:
snli_train = pd.read_csv('snli_train.tsv', sep='\t') 
#snli_train = snli_train.iloc[:10000,:]
snli_train['sentence1'] = snli_train['sentence1'].str.split()
snli_train['sentence2'] = snli_train['sentence2'].str.split()

snli_val = pd.read_csv('snli_val.tsv', sep='\t') 
#snli_val = snli_val.iloc[:10000,:]
snli_val['sentence1'] = snli_val['sentence1'].str.split()
snli_val['sentence2'] = snli_val['sentence2'].str.split()

snli_train['label'].replace('neutral',0, inplace=True)
snli_train['label'].replace('entailment',1, inplace = True)
snli_train['label'].replace('contradiction',2, inplace = True)

snli_val['label'].replace('neutral',0, inplace=True)
snli_val['label'].replace('entailment',1, inplace = True)
snli_val['label'].replace('contradiction',2, inplace = True)

data_tup_train = zip(snli_train.sentence1,snli_train.sentence2,snli_train.label)
data_tup_val = zip(snli_val.sentence1,snli_val.sentence2,snli_val.label)

del snli_train
del snli_val

In [35]:
Max_sen_length = 82

class VocabDataset(Dataset):
    """
    Class that represents a train/validation/test dataset that's readable for PyTorch
    Note that this class inherits torch.utils.data.Dataset
    """

    def __init__(self, data_tuple, char2id):
        """
        @param data_list: list of character
        @param target_list: list of targets

        """
        self.sent1,self.sent2,self.target_list = zip(*data_tuple)
        assert (len(self.sent1) == len(self.target_list))
        assert (len(self.sent2) == len(self.target_list))
        self.char2id = char2id

    def __len__(self):
        return len(self.sent1)

    def __getitem__(self, key):
        """
        Triggered when you call dataset[i]
        """
        char_idx_1 = [self.char2id[c] if c in self.char2id.keys() else UNK_IDX  for c in self.sent1[key][:Max_sen_length]]
        char_idx_2 = [self.char2id[c] if c in self.char2id.keys() else UNK_IDX  for c in self.sent2[key][:Max_sen_length]]
        label = self.target_list[key]
        return [char_idx_1, char_idx_2,len(char_idx_1),len(char_idx_2),label]

def vocab_collate_func(batch):
    """
    Customized function for DataLoader that dynamically pads the batch so that all
    data have the same length
    """
    data_list_1 = []
    data_list_2 = []
    label_list = []
    length_list = []
    length_list_2 = []

    for datum in batch:
        label_list.append(datum[4])
        length_list.append(datum[2])
        length_list_2.append(datum[3])
    # padding
    for datum in batch:
        padded_vec1 = np.pad(np.array(datum[0]),
                                pad_width=((0,Max_sen_length-datum[2])),
                                mode="constant", constant_values=0)
        padded_vec2 = np.pad(np.array(datum[1]),
                                pad_width=((0,Max_sen_length-datum[3])),
                                mode="constant", constant_values=0)
        data_list_1.append(padded_vec1)
        data_list_2.append(padded_vec2)
        

    ordering1 = np.linspace(0, len(data_list_1),len(data_list_1), endpoint = False)
    ordering2 = np.linspace(0, len(data_list_2),len(data_list_2), endpoint = False)
      
    ind_dec_order = np.argsort(length_list)[::-1]
    ind_dec_order_2 = np.argsort(length_list_2)[::-1]
    
    ordering1 = ordering1[ind_dec_order]
    ordering2 = ordering2[ind_dec_order_2]
        
    data_list_1 = np.array(data_list_1)[ind_dec_order]
    data_list_2 = np.array(data_list_2)[ind_dec_order_2]
                
    length_list = np.array(length_list)[ind_dec_order]
    length_list_2 = np.array(length_list_2)[ind_dec_order_2]
    
    
    mask1 = np.array(data_list_1)
    mask2 = np.array(data_list_2)
    
    mask1[mask1 > 0] = 1
    mask2[mask2 > 0] = 1
    
    
    ###need to mask padded values###
    
                
    #label_list = np.array(label_list)[ind_dec_order]
    return [torch.from_numpy(np.array(data_list_1)), torch.from_numpy(np.array(data_list_2)), \
            torch.LongTensor(length_list),torch.LongTensor(length_list_2), \
            torch.LongTensor(label_list), ordering1, ordering2]


In [36]:
train_dataset = VocabDataset(data_tup_train, words_ft)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=vocab_collate_func,
                                           shuffle=True)

val_dataset = VocabDataset(data_tup_val, words_ft)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=vocab_collate_func,
                                           shuffle=True)

del train_dataset
del val_dataset

In [115]:
class GRU(nn.Module):
    def __init__(self, emb_size, hidden_size, num_layers, num_classes, vocab_size, bidirectional = True):
        # RNN Accepts the following hyperparams:
        # emb_size: Embedding Size
        # hidden_size: Hidden Size of layer in RNN
        # num_layers: number of layers in RNN
        # num_classes: number of output classes
        # vocab_size: vocabulary size
        super(GRU, self).__init__()

        self.num_layers, self.hidden_size = num_layers, hidden_size
        #self.embedding = nn.Embedding(vocab_size, emb_size, padding_idx=PAD_IDX)
        self.embedding = nn.Embedding.from_pretrained(loaded_embeddings_ft_torch, freeze = False).to(device)
        self.rnn = nn.GRU(emb_size, hidden_size, num_layers, batch_first=True, bidirectional = True).to(device)
        self.linear1 = nn.Linear(hidden_size*2*2, 100).to(device)
        self.linear2 = nn.Linear(100, 50).to(device)
        self.linear = nn.Linear(50, num_classes).to(device)
        #self.linear = nn.Linear(hidden_size*2*2, num_classes)

    def init_hidden(self, batch_size):
        # Function initializes the activation of recurrent neural net at timestep 0
        # Needs to be in format (num_layers, batch_size, hidden_size)
        hidden = torch.randn(self.num_layers*2, batch_size, self.hidden_size)

        return hidden.to(device)

    def forward(self, x1,x2,lengths1,lengths2, ordering1, ordering2):
        #setting mask
        """"""
        m1 = torch.zeros([x1.size(0), x1.size(1)]).to(device)
        m2 = torch.zeros([x2.size(0), x2.size(1)]).to(device)
        
        m1[x1 == 1] = 1
        m2[x2 == 1] = 1
    
        #m1 = torch.from_numpy(m1)
        #m2 = torch.from_numpy(m2)
        
        
        # reset hidden state
        batch_size, seq_len = x1.size()

        # get embedding of characters
        embed1 = self.embedding(x1.to(device))
        embed2 = self.embedding(x2.to(device))
        
        m1 = m1.unsqueeze(-1).expand_as(embed1).to(device)
        m2 = m2.unsqueeze(-1).expand_as(embed2).to(device)
        embed1 = embed1.to(torch.float).to(device)
        embed2 = embed2.to(torch.float).to(device)
        m1 = m1.to(torch.float).to(device)
        m2 = m2.to(torch.float).to(device)

        embed1 = m1 * embed1+ (1-m1) * embed1.clone().detach()
        embed2 = m2 * embed2+ (1-m2) * embed2.clone().detach()
        
        
        # pack padded sequence
        embed1 = torch.nn.utils.rnn.pack_padded_sequence(embed1, lengths1.numpy(), batch_first=True)
        embed2 = torch.nn.utils.rnn.pack_padded_sequence(embed2, lengths2.numpy(), batch_first=True)
        
        
        # fprop though RNN
        self.hidden = self.init_hidden(batch_size).to(device)
        rnn_out1, self.hidden1 = self.rnn(embed1, self.hidden)
        
        # undo packing
        rnn_out1, _ = torch.nn.utils.rnn.pad_packed_sequence(rnn_out1, batch_first=True)
        
        # fprop though RNN
        self.hidden = self.init_hidden(batch_size).to(device)
        rnn_out2, self.hidden2 = self.rnn(embed2, self.hidden)
        # undo packing
        rnn_out2, _ = torch.nn.utils.rnn.pad_packed_sequence(rnn_out2, batch_first=True)
        """        
        # fprop though RNN
        self.hidden = self.init_hidden(batch_size)
        rnn_out1, self.hidden1 = self.rnn(embed1, self.hidden)
        
        # fprop though RNN
        self.hidden = self.init_hidden(batch_size)
        rnn_out2, self.hidden2 = self.rnn(embed2, self.hidden)
        """
        reverse1 = np.argsort(ordering1)
        reverse2 = np.argsort(ordering2)
        #print(rnn_out1.size())
        #Use hidden state rather than the last one
        
        
        rnn_out1 = rnn_out1[reverse1,:,:].to(device)
        rnn_out2 = rnn_out2[reverse2,:,:].to(device)
        
        #self.hidden1 = self.hidden1[:,reverse1,:]
        #self.hidden2 = self.hidden2[:,reverse2,:]
        #print(rnn_out1)
        #print(rnn_out2.size())
        
        # sum hidden activations of RNN across time
        rnn_out1 = torch.sum(rnn_out1, dim=1).to(device)
        rnn_out2 = torch.sum(rnn_out2, dim=1).to(device)
        #rnn_out1 = rnn_out1[:,-1,:]
        #rnn_out2 = rnn_out2[:,-1,:]
        
        rnn_out = torch.cat((rnn_out1,rnn_out2), 1).to(device)
        #rnn_out = torch.cat((self.hidden1,self.hidden2), 2)

        rnn_out = F.relu(self.linear1(rnn_out))
        rnn_out = F.relu(self.linear2(rnn_out))
        logits = self.linear(rnn_out)
        return logits


In [50]:
class CNN(nn.Module):
    def __init__(self, emb_size, hidden_size, num_layers, num_classes, vocab_size,kernel = 3):

        super(CNN, self).__init__()

        self.num_layers, self.hidden_size = num_layers, hidden_size
        #self.embedding = nn.Embedding(vocab_size, emb_size, padding_idx=PAD_IDX)
        self.embedding = nn.Embedding.from_pretrained(loaded_embeddings_ft_torch, freeze = False).to(device)
    
        self.conv1 = nn.Conv1d(emb_size, hidden_size, kernel_size=kernel, padding=1).to(device)
        self.conv2 = nn.Conv1d(hidden_size, hidden_size, kernel_size=kernel, padding=1).to(device)

        self.linear1 = nn.Linear(hidden_size*2, 200).to(device)
        self.linear2 = nn.Linear(200, 100).to(device)
        self.linear = nn.Linear(100, num_classes).to(device)

    def forward(self, x1,x2,lengths1,lengths2, ordering1, ordering2):
        batch_size, seq_len = x1.size()
        """
        mask1_size1 = mask1.size(0)
        mask1_size2 = mask1.size(1)
        
        mask2_size1 = mask2.size(0)
        mask2_size2 = mask2.size(1)
        
        mask1.unsqueeze_(-1)
        mask1 = mask1.expand(mask1_size1,mask1_size2,200).to(torch.float).to(device)
        
        mask2.unsqueeze_(-1)
        mask2 = mask2.expand(mask2_size1,mask2_size2,200).to(torch.float).to(device)
        """
        
        m1 = torch.zeros([x1.size(0), x1.size(1)]).to(device)
        m2 = torch.zeros([x2.size(0), x2.size(1)]).to(device)
        
        m1[x1 == 1] = 1
        m2[x2 == 1] = 1

        embed1 = self.embedding(x1).to(device)
        embed2 = self.embedding(x2).to(device)
        
        m1 = m1.unsqueeze(-1).expand_as(embed1)
        m2 = m2.unsqueeze(-1).expand_as(embed2)
        
        embed1 = embed1.to(torch.float)
        embed2 = embed2.to(torch.float)
        
        m1 = m1.to(torch.float)
        m2 = m2.to(torch.float)

        embed1 = m1 * embed1+ (1-m1) * embed1.clone().detach()
        embed2 = m2 * embed2+ (1-m2) * embed2.clone().detach()        
        
        hidden = self.conv1(embed1.transpose(1,2)).transpose(1,2).to(device)
        #hidden = hidden*mask1.to(device)
        drop1  = nn.Dropout()
        hidden = drop1(hidden)

        hidden = F.relu(hidden.contiguous().view(-1, hidden.size(-1))).view(batch_size, -1, hidden.size(-1))
        #hidden = hidden*mask1.to(device)
        hidden = self.conv2(hidden.transpose(1,2)).transpose(1,2).to(device)
        #hidden = hidden*mask1.to(device)
        hidden = drop1(hidden)
        
        hidden = F.relu(hidden.contiguous().view(-1, hidden.size(-1))).view(batch_size, -1, hidden.size(-1))
        #hidden = hidden*mask1.to(device)
        hidden = hidden.max(dim = 1)[0]
        #hidden = torch.sum(hidden, dim=1)
        #print(hidden.size())
        
        hidden2 = self.conv1(embed2.transpose(1,2)).transpose(1,2).to(device)
        #hidden2 = hidden2*mask2.to(device)
        hidden2 = drop1(hidden2)
        hidden2 = F.relu(hidden2.contiguous().view(-1, hidden2.size(-1))).view(batch_size, -1, hidden2.size(-1))
        #hidden2 = hidden2*mask2.to(device)
        
        hidden2 = self.conv2(hidden2.transpose(1,2)).transpose(1,2).to(device)
        #hidden2 = hidden2*mask2.to(device)
        hidden2 = drop1(hidden2)
        hidden2 = F.relu(hidden2.contiguous().view(-1, hidden2.size(-1))).view(batch_size, -1, hidden2.size(-1))
        #hidden2 = hidden2*mask2.to(device)
        hidden2 = hidden2.max(dim = 1)[0]
        #hidden2 = torch.sum(hidden2, dim=1)
        
        
        reverse1 = np.argsort(ordering1)
        reverse2 = np.argsort(ordering2)
    
        cnn_out1 = hidden[reverse1,:]
        cnn_out2 = hidden2[reverse2,:]
        #print(cnn_out1.size())
        #cnn_out1 = cnn_out1[:,-1]
        #cnn_out2 = cnn_out2[:,-1]
        #print(cnn_out1.size())
        
        cnn_out = torch.cat((cnn_out1,cnn_out2), 1)
        
        cnn_out = F.relu(self.linear1(cnn_out))
        cnn_out = F.relu(self.linear2(cnn_out))
        logits = self.linear(cnn_out)
        return logits

In [52]:
def test_model(loader, model):
    """
    Help function that tests the model's performance on a dataset
    @param: loader - data loader for the dataset to test against
    """
    total_loss = 0
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for data1,data2,lengths1,lengths2,labels,ordering1,ordering2 in loader:
            outputs = model(data1.to(device),data2.to(device),lengths1.to(device),lengths2.to(device),\
                        ordering1,ordering2)
            predicted = outputs.max(1, keepdim=True)[1]
            loss = criterion(outputs, labels.to(device))
            total_loss += loss/labels.size(0)
            total += labels.size(0)
            correct += predicted.eq(labels.to(device).view_as(predicted)).sum().item()
        return (100 * correct / total), total_loss

In [40]:
def rnn_hidden(hidden_dim):
    
    model = GRU(emb_size=300, hidden_size=hidden_dim, num_layers=1, num_classes=3, vocab_size=words_to_load)

    learning_rate = 3e-4
    num_epochs = 8 # number epoch to train

    # Criterion and Optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # Train the model
    total_step = len(train_loader)

    train_loss_vals = []
    train_acc_vals = []
    val_loss_vals = []
    val_acc_vals = []
    for epoch in range(num_epochs):
        for i, (data1,data2,lengths1,lengths2,labels,ordering1,ordering2) in enumerate(train_loader):
            model.train()
            optimizer.zero_grad()
            # Forward pass
            outputs = model(data1,data2,lengths1,lengths2,ordering1,ordering2)
            #print(outputs,labels)
            loss = criterion(outputs,labels.to(device))
            # Backward and optimize
            loss.backward()
            optimizer.step()
            # validate every 100 iterations
            if i > 0 and i % 1000 == 0:
                # validate
                print("total lossing")
                val_acc, val_loss = test_model(val_loader, model)
                train_acc, train_loss = test_model(train_loader, model)
                train_loss_vals.append(train_loss)
                train_acc_vals.append(train_acc)
                val_loss_vals.append(val_loss)
                val_acc_vals.append(val_acc)
                print('Epoch: [{}/{}], Step: [{}/{}], Validation Acc: {}, Train Acc: {}'.format(
                           epoch+1, num_epochs, i+1, total_step, val_acc, train_acc )) 
                
    return train_loss_vals, train_acc_vals, val_loss_vals, val_acc_vals, model

In [41]:
criterion = torch.nn.CrossEntropyLoss()
training_losses_RNN, training_accs_RNN, val_losses_RNN, val_accs_RNN, RNN_model = rnn_hidden(100)


total lossing
Epoch: [1/8], Step: [1001/3125], Validation Acc: 50.4, Train Acc: 51.071
total lossing
Epoch: [1/8], Step: [2001/3125], Validation Acc: 53.3, Train Acc: 55.285
total lossing
Epoch: [1/8], Step: [3001/3125], Validation Acc: 58.2, Train Acc: 56.816
total lossing
Epoch: [2/8], Step: [1001/3125], Validation Acc: 58.7, Train Acc: 57.638
total lossing
Epoch: [2/8], Step: [2001/3125], Validation Acc: 59.0, Train Acc: 58.825
total lossing
Epoch: [2/8], Step: [3001/3125], Validation Acc: 58.2, Train Acc: 59.471
total lossing
Epoch: [3/8], Step: [1001/3125], Validation Acc: 59.7, Train Acc: 60.328
total lossing
Epoch: [3/8], Step: [2001/3125], Validation Acc: 59.5, Train Acc: 61.2
total lossing
Epoch: [3/8], Step: [3001/3125], Validation Acc: 62.4, Train Acc: 62.46
total lossing
Epoch: [4/8], Step: [1001/3125], Validation Acc: 61.1, Train Acc: 62.767
total lossing
Epoch: [4/8], Step: [2001/3125], Validation Acc: 60.7, Train Acc: 63.274
total lossing
Epoch: [4/8], Step: [3001/3125],

In [42]:
test_model(val_loader, RNN_model)[0]

66.9

In [43]:
def model_mistakes(dataset, model):
    """
    function to identify three correct and three incorrect
    """
    num = 0
    correct_list = []
    incorrect_list = []
    true_label_cor = []
    true_label_incor = []
    model_label = []
    model.eval()
    for data1,data2,lengths1,lengths2,labels,ordering1,ordering2 in dataset:
        outputs = F.softmax(model(data1,data2, lengths1,lengths2,ordering1,ordering2), dim=0)
        predicted = outputs.max(1, keepdim=True)[1]
        if predicted[0].item() == labels[0].item():
            correct_list.append(num)
            true_label_cor.append(labels[0].item())
        else:
            incorrect_list.append(num)
            true_label_incor.append(labels[0].item())
            
        model_label.append(predicted[0].item())    
        
        if len(correct_list) > 3 and len(incorrect_list) > 310:
            return correct_list, incorrect_list, true_label_cor, true_label_incor, model_label
        
        num += 1
    return print('fail')


In [44]:
snli_val = pd.read_csv('snli_val.tsv', sep='\t') 
#snli_val = snli_val.iloc[:10000,:]
snli_val_full = snli_val.copy()
snli_val['sentence1'] = snli_val['sentence1'].str.split()
snli_val['sentence2'] = snli_val['sentence2'].str.split()

snli_val['label'].replace('neutral',0, inplace=True)
snli_val['label'].replace('entailment',1, inplace = True)
snli_val['label'].replace('contradiction',2, inplace = True)

data_tup_val = zip(snli_val.sentence1,snli_val.sentence2,snli_val.label)

val_dataset = VocabDataset(data_tup_val, words_ft)
val_loader_final = torch.utils.data.DataLoader(dataset=val_dataset,
                                           batch_size=1,
                                           collate_fn=vocab_collate_func,
                                           shuffle=False)



In [45]:
correct_list, incorrect_list, true_label_cor, true_label_incor, model_label = model_mistakes(val_loader_final, RNN_model)

convert = {0:'neutral',1:'entailment', 2:'contradiction'}

all_correct = []
for y,x in enumerate(correct_list):
    all_correct.append((snli_val_full.iloc[x,0], snli_val_full.iloc[x,1],snli_val_full.iloc[x,2],model_label[y]))

    
import operator
all_correct.sort(key = operator.itemgetter(1))

for x in range(0,3):
    print(all_correct[x][0])
    print(all_correct[x][1])
    print("labeled as {}".format(all_correct[x][2]), '\n')
          

A group of people dressed in Santa Claus suits are looking towards an audience while a DJ runs a sound board and another person throws green balls into the air .
A band plays at a beach party .
labeled as neutral 

A female basketball player dribbling down court .
A basketball player is destroying the ball .
labeled as neutral 

A building that portrays beautiful architecture stands in the sunlight as somebody on a bike passes by .
A bicyclist rides past an abandoned warehouse on a rainy day
labeled as neutral 



In [46]:
all_incorrect = []
for y,x in enumerate(incorrect_list):
    all_incorrect.append((snli_val_full.iloc[x,0], snli_val_full.iloc[x,1],snli_val_full.iloc[x,2],model_label[y]))

all_incorrect.sort(key = operator.itemgetter(1))

for x in range(0,3):
    print(all_incorrect[x][0])
    print(all_incorrect[x][1])
    print("labeled as {}, but in fact {}".format(convert[all_incorrect[x][3]],all_incorrect[x][2]), '\n')

Two young children in black clothing and padding perform a martial arts match where they compete against the other .
2 young boys compete in martial arts .
labeled as neutral, but in fact entailment 

The baby in a blue jean hat and sunglasses looks at the camera while being held .
A baby looks at the camera .
labeled as neutral, but in fact entailment 

A live band on a lawn jamming out .
A band is practicing new tunes in the garage .
labeled as neutral, but in fact contradiction 



In [47]:
def total_params(model):
    param_total = 0
    for x in model.parameters():
        if x.requires_grad:
            store = 1
            for y in range(len(x.size())):
                store = store*x.size(y)
            param_total += store
    
    #(these are untrained embeddings)
    total_embeddings = 50001*300
    param_total -= total_embeddings
    return param_total

total_params(RNN_model)
    

286803

In [53]:
CNN_model = CNN(emb_size=300, hidden_size=400, num_layers=2, num_classes=3, vocab_size=len(words_ft), kernel = 3)

learning_rate = 3e-4
num_epochs = 8 # number epoch to train

# Criterion and Optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(CNN_model.parameters(), lr=learning_rate)

# Train the model
total_step = len(train_loader)

train_loss_vals_CNN = []
train_acc_vals_CNN = []
val_loss_vals_CNN = []
val_acc_vals_CNN = []
for epoch in range(num_epochs):
    for i, (data1,data2,lengths1,lengths2,labels,ordering1,ordering2) in enumerate(train_loader):
        CNN_model.train()
        optimizer.zero_grad()
        # Forward pass
        outputs = CNN_model(data1.to(device),data2.to(device),lengths1.to(device),lengths2.to(device),\
                        ordering1,ordering2)
        loss = criterion(outputs, labels.to(device))

        # Backward and optimize
        loss.backward()
        optimizer.step()
        # validate every 100 iterations

        if i > 0 and i % 1000 == 0:
            print("lossing")
            # validate
            val_acc, val_loss = test_model(val_loader, CNN_model)
            train_acc, train_loss = test_model(train_loader, CNN_model)
            train_loss_vals_CNN.append(train_loss)
            train_acc_vals_CNN.append(train_acc)
            val_loss_vals_CNN.append(val_loss)
            val_acc_vals_CNN.append(val_acc)
            print('Epoch: [{}/{}], Step: [{}/{}], Validation Acc: {}, Train Acc: {}'.format(
                       epoch+1, num_epochs, i+1, total_step, val_acc, train_acc ))




lossing
Epoch: [1/8], Step: [1001/3125], Validation Acc: 59.5, Train Acc: 59.298
lossing
Epoch: [1/8], Step: [2001/3125], Validation Acc: 61.6, Train Acc: 62.195
lossing
Epoch: [1/8], Step: [3001/3125], Validation Acc: 59.9, Train Acc: 63.523
lossing
Epoch: [2/8], Step: [1001/3125], Validation Acc: 63.8, Train Acc: 65.02
lossing
Epoch: [2/8], Step: [2001/3125], Validation Acc: 62.6, Train Acc: 65.541
lossing
Epoch: [2/8], Step: [3001/3125], Validation Acc: 64.3, Train Acc: 66.319
lossing
Epoch: [3/8], Step: [1001/3125], Validation Acc: 65.2, Train Acc: 67.514
lossing
Epoch: [3/8], Step: [2001/3125], Validation Acc: 65.8, Train Acc: 68.046
lossing
Epoch: [3/8], Step: [3001/3125], Validation Acc: 65.2, Train Acc: 68.124
lossing
Epoch: [4/8], Step: [1001/3125], Validation Acc: 64.4, Train Acc: 68.444
lossing
Epoch: [4/8], Step: [2001/3125], Validation Acc: 67.5, Train Acc: 69.144
lossing
Epoch: [4/8], Step: [3001/3125], Validation Acc: 65.3, Train Acc: 70.058
lossing
Epoch: [5/8], Step: [

In [57]:
total_params(CNN_model)

1021703

In [56]:
test_model(val_loader, CNN_model)[0]

68.6

In [76]:
#loading mnli data
mnli_val = pd.read_csv('mnli_val.tsv', sep='\t') 
mnli_val['sentence1'] = mnli_val['sentence1'].str.split()
mnli_val['sentence2'] = mnli_val['sentence2'].str.split()


mnli_val['label'].replace('neutral',0, inplace=True)
mnli_val['label'].replace('entailment',1, inplace = True)
mnli_val['label'].replace('contradiction',2, inplace = True)

#data_tup_train = zip(mnli_train.sentence1,mnli_train.sentence2,mnli_train.label,)
data_tup_val_mnli = zip(mnli_val.sentence1,mnli_val.sentence2,mnli_val.label,mnli_val.genre)
genres = np.unique(mnli_val.genre)

In [77]:
Max_sen_length = 82

class VocabDataset_mnli(Dataset):
    """
    Class that represents a train/validation/test dataset that's readable for PyTorch
    Note that this class inherits torch.utils.data.Dataset
    """

    def __init__(self, data_tuple, char2id):
        """
        @param data_list: list of character
        @param target_list: list of targets

        """
        self.sent1,self.sent2,self.target_list,self.genre = zip(*data_tuple)
        assert (len(self.sent1) == len(self.target_list))
        assert (len(self.sent2) == len(self.target_list))
        self.char2id = char2id

    def __len__(self):
        return len(self.sent1)

    def __getitem__(self, key):
        """
        Triggered when you call dataset[i]
        """
        char_idx_1 = [self.char2id[c] if c in self.char2id.keys() else UNK_IDX  for c in self.sent1[key][:Max_sen_length]]
        char_idx_2 = [self.char2id[c] if c in self.char2id.keys() else UNK_IDX  for c in self.sent2[key][:Max_sen_length]]
        label = self.target_list[key]
        genres = self.genre[key]
        return [char_idx_1, char_idx_2,len(char_idx_1),len(char_idx_2),label,genres]

def vocab_collate_func_mnli(batch):
    """
    Customized function for DataLoader that dynamically pads the batch so that all
    data have the same length
    """
    data_list_1 = []
    data_list_2 = []
    label_list = []
    length_list = []
    length_list_2 = []
    genre_list = []

    for datum in batch:
        label_list.append(datum[4])
        length_list.append(datum[2])
        length_list_2.append(datum[3])
        genre_list.append(datum[5])
    # padding
    for datum in batch:
        padded_vec1 = np.pad(np.array(datum[0]),
                                pad_width=((0,Max_sen_length-datum[2])),
                                mode="constant", constant_values=0)
        padded_vec2 = np.pad(np.array(datum[1]),
                                pad_width=((0,Max_sen_length-datum[3])),
                                mode="constant", constant_values=0)
        data_list_1.append(padded_vec1)
        data_list_2.append(padded_vec2)
        

    ordering1 = np.linspace(0, len(data_list_1),len(data_list_1), endpoint = False)
    ordering2 = np.linspace(0, len(data_list_2),len(data_list_2), endpoint = False)
      
    ind_dec_order = np.argsort(length_list)[::-1]
    ind_dec_order_2 = np.argsort(length_list_2)[::-1]
    
    ordering1 = ordering1[ind_dec_order]
    ordering2 = ordering2[ind_dec_order_2]
        
    data_list_1 = np.array(data_list_1)[ind_dec_order]
    data_list_2 = np.array(data_list_2)[ind_dec_order_2]
                
    length_list = np.array(length_list)[ind_dec_order]
    length_list_2 = np.array(length_list_2)[ind_dec_order_2]
    
    
    
    mask1 = np.array(data_list_1)
    mask2 = np.array(data_list_2)
    
    mask1[mask1 > 0] = 1
    mask2[mask2 > 0] = 1
    
    
    ###need to mask padded values###
    
                
    #label_list = np.array(label_list)[ind_dec_order]
    return [torch.from_numpy(np.array(data_list_1)), torch.from_numpy(np.array(data_list_2)), \
            torch.LongTensor(length_list),torch.LongTensor(length_list_2), \
            torch.LongTensor(label_list), ordering1, ordering2, genre_list]


In [78]:
val_dataset_mnli = VocabDataset_mnli(data_tup_val_mnli, words_ft)

In [88]:
val_loader_mnli = torch.utils.data.DataLoader(dataset=val_dataset_mnli,
                                           batch_size=1,
                                           collate_fn=vocab_collate_func_mnli,
                                           shuffle=False)

In [119]:
def test_model_mnli(loader, model, genre):
    """
    Help function that tests the model's performance on a dataset
    @param: loader - data loader for the dataset to test against
    """
    total_loss = 0
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for data1,data2,lengths1,lengths2,labels,ordering1,ordering2,genre_true in loader:
            if genre != genre_true[0]:
                pass
            else:
                
                outputs = model(data1.to(device),data2.to(device),lengths1,lengths2,\
                            ordering1,ordering2)
                predicted = outputs.max(1, keepdim=True)[1]
                loss = criterion(outputs, labels.to(device))
                total_loss += loss/labels.size(0)
                total += labels.size(0)
                correct += predicted.eq(labels.to(device).view_as(predicted)).sum().item()
                
    return (100 * correct / total), total_loss

In [120]:
rnn_performance = []
cnn_performance = []
for x in genres:
    rnn_performance.append(test_model_mnli(val_loader_mnli, RNN_model, x)[0])
    cnn_performance.append(test_model_mnli(val_loader_mnli, CNN_model, x)[0])
    
    

In [94]:
for x in val_loader_mnli:
    print(x[4].size(0))
    break

1


In [121]:
rnn_performance

[43.71859296482412,
 44.881889763779526,
 43.41317365269461,
 47.16417910447761,
 42.66802443991853]

In [122]:
cnn_performance

[44.62311557788945,
 44.09448818897638,
 43.31337325349301,
 45.27363184079602,
 42.76985743380855]

In [129]:
mnli_perf = pd.DataFrame({"RNN":rnn_performance, "CNN":cnn_performance,"Genre":genres })
mnli_perf =mnli_perf[["Genre", "RNN","CNN"]].T
print(mnli_perf.to_latex())

\begin{tabular}{llllll}
\toprule
{} &        0 &           1 &        2 &          3 &        4 \\
\midrule
Genre &  fiction &  government &    slate &  telephone &   travel \\
RNN   &  43.7186 &     44.8819 &  43.4132 &    47.1642 &   42.668 \\
CNN   &  44.6231 &     44.0945 &  43.3134 &    45.2736 &  42.7699 \\
\bottomrule
\end{tabular}

