In [1]:
# coding: utf-8

# In[1]:

# First lets improve libraries that we are going to be used in this lab session
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence
from torch.utils.data import Dataset
from collections import Counter
import pickle as pkl
import random
import pdb
import csv
import pandas as pd
random.seed(134)

PAD_IDX = 0
UNK_IDX = 1
BATCH_SIZE = 32
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
# In[2]:

# Fasttext


# In[3]:

def build_vocab():
    words_to_load = 100000

    with open('wiki-news-300d-1M.vec', 'rb') as f:
        loaded_embeddings_ft = np.zeros((words_to_load+2, 300))
        words2idx_ft = {}
        idx2words_ft = {}
        ordered_words_ft = []
        #give <pad> and <unk> random vectors
        loaded_embeddings_ft[0] = np.random.rand(1,300) * 0
        loaded_embeddings_ft[1] = np.random.rand(1,300)

        idx2words_ft[PAD_IDX] =  '<pad>'
        idx2words_ft[UNK_IDX] = '<unk>'
        words2idx_ft['<pad>'] = PAD_IDX
        words2idx_ft['<unk>'] = UNK_IDX
        ordered_words_ft.append('<pad>')
        ordered_words_ft.append('<unk>')

        for i, line in enumerate(f):
            if i >= words_to_load: 
                break
            s = line.decode('utf8').split()
            loaded_embeddings_ft[i+2, :] = np.asarray(s[1:])
            words2idx_ft[s[0]] = i+2
            idx2words_ft[i+2] = s[0]
            ordered_words_ft.append(s[0])
        return words2idx_ft, idx2words_ft, loaded_embeddings_ft

# In[4]:
def convert_to_words(data):
    return [(sample[0].split(), sample[1].split(), sample[2]) for sample in data]

def read_data():
    train_dataset = 'snli_train.tsv'
    val_dataset = 'snli_val.tsv'
    with open(train_dataset) as tsvfile1:
        reader1 = csv.reader(tsvfile1, delimiter='\t')
        train_data =convert_to_words(reader1)
    max_len1 = max([len(word[0]) for word in train_data])
    max_len2 = max([len(word[0]) for word in train_data])
    max_len = max(max_len1, max_len2)
    
    with open(val_dataset) as tsvfile2:
        reader2 = csv.reader(tsvfile2, delimiter = '\t')
        val_data = convert_to_words(reader2)
    words2id, id2words, loaded_embeddings = build_vocab()
    return train_data[1:], val_data[1:], words2id, id2words, loaded_embeddings, max_len

In [3]:
def convert_to_words(data):
    return [(sample[0].split(), sample[1].split(), sample[2]) for sample in data]

def convert_to_words1(test_da):
    return [(sample[0].split(), sample[1].split(), sample[2], sample[3]) for sample in test_da]

def read_data():
    train_dataset = 'snli_train.tsv'
    val_dataset = 'snli_val.tsv'
    test_dataset = 'mnli_val.tsv'
    with open(train_dataset) as tsvfile1:
        reader1 = csv.reader(tsvfile1, delimiter='\t')
        train_data =convert_to_words(reader1)
    max_len1 = max([len(word[0]) for word in train_data])
    max_len2 = max([len(word[0]) for word in train_data])
    max_len = max(max_len1, max_len2)
    
    with open(val_dataset) as tsvfile2:
        reader2 = csv.reader(tsvfile2, delimiter = '\t')
        val_data = convert_to_words(reader2)
    
    with open(test_dataset,"r", encoding='utf-8') as tsvfile3:
        reader3 = csv.reader(tsvfile3, delimiter='\t')
        test_data =convert_to_words1(reader3)
    
    words2id, id2words, loaded_embeddings = build_vocab()
    return train_data[1:], val_data[1:],test_data[1:], words2id, id2words, loaded_embeddings, max_len

In [4]:
pretrained_embeddings = build_vocab()[2]


In [5]:
# In[5]:
train_data, val_data,test_data, words2id, id2words, loaded_embeddings, max_len =read_data()

labels_list = ["entailment", "neutral", "contradiction"]

# In[6]:

print ("Maximum word length of dataset is {}".format(max_len))
print ("Number of words in dataset is {}".format(len(id2words)))
#print ("Characters:")
#print (char2id.keys())
#print(train_data[:10])


Maximum word length of dataset is 82
Number of words in dataset is 100002


In [6]:
test_genres = ['fiction', 'government', 'slate', 'telephone', 'travel']

In [7]:
test_data[0][3]

'fiction'

In [8]:
fiction_test_data =[]
government_test_data =[]
slate_test_data = []
telephone_test_data =[]
travel_test_data = []
for x in test_data:
    if x[3] == 'fiction':
        fiction_test_data.append(x[:3])
    elif x[3] == 'government':
        government_test_data.append(x[:3])
    elif x[3] == 'slate':
        slate_test_data.append(x[:3])
    elif x[3] == 'telephone':
        telephone_test_data.append(x[:3])
    elif x[3] == 'travel':
        travel_test_data.append(x[:3])
    
        
    

In [9]:
print(len(fiction_test_data), len(government_test_data), len(slate_test_data),len(telephone_test_data),len(travel_test_data) )

995 1016 1002 1005 982


In [10]:
# In[7]:

class VocabDataset(Dataset):
    """
    Class that represents a train/validation/test dataset that's readable for PyTorch
    Note that this class inherits torch.utils.data.Dataset
    """

    def __init__(self, data_tuple, words2id):
        """
        @param data_list: list of character
        @param target_list: list of targets

        """
        self.data_list1,self.data_list2, self.target_list = zip(*data_tuple)
        assert (len(self.data_list1) ==len(self.data_list2))
        assert (len(self.data_list2) == len(self.target_list))
        self.words2id = words2id

    def __len__(self):
        return len(self.data_list1)

    def __getitem__(self, key):
        """
        Triggered when you call dataset[i]
        """
        word_idx1 = [self.words2id[c] if c in self.words2id else UNK_IDX  for c in self.data_list1[key][:max_len]]
        word_idx2 = [self.words2id[c] if c in self.words2id else UNK_IDX  for c in self.data_list2[key][:max_len]]
        label = labels_list.index(self.target_list[key])
        return [word_idx1,word_idx2, len(word_idx1),len(word_idx2), label]


# In[8]:




In [11]:
def vocab_collate_func(batch):
    """
    Customized function for DataLoader that dynamically pads the batch so that all
    data have the same length
    """
    data_list1 = []
    data_list2 = []
    label_list = []
    length_list1 = []
    length_list2 = []

    for datum in batch:
        label_list.append(datum[4])
        length_list1.append(datum[2])
        length_list2.append(datum[3])
        
    # padding
    for datum in batch:
        padded_vec1 = np.pad(np.array(datum[0]),
                                pad_width=((0,max_len -datum[2])),
                                mode="constant", constant_values=0)
        data_list1.append(padded_vec1)
        
        padded_vec2 = np.pad(np.array(datum[1]),
                                pad_width=((0, max_len -datum[3])),
                                mode="constant", constant_values=0)
        data_list2.append(padded_vec2)

    

    return (torch.LongTensor(data_list1).cuda(), 
        torch.LongTensor(data_list2).cuda(),
        torch.LongTensor(length_list1).cuda(), 
        torch.LongTensor(length_list2).cuda(),
        torch.LongTensor(label_list).cuda())

In [12]:
# In[9]:

train_dataset = VocabDataset(train_data, words2id)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=vocab_collate_func,
                                           shuffle=True)

val_dataset = VocabDataset(val_data, words2id)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=vocab_collate_func,
                                           shuffle=True)


fiction_test_dataset = VocabDataset(fiction_test_data, words2id)
fiction_test_loader = torch.utils.data.DataLoader(dataset=fiction_test_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=vocab_collate_func,
                                           shuffle=True)

government_test_dataset = VocabDataset(government_test_data, words2id)
government_test_loader = torch.utils.data.DataLoader(dataset=government_test_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=vocab_collate_func,
                                           shuffle=True)

slate_test_dataset = VocabDataset(slate_test_data, words2id)
slate_test_loader = torch.utils.data.DataLoader(dataset=slate_test_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=vocab_collate_func,
                                           shuffle=True)

telephone_test_dataset = VocabDataset(telephone_test_data, words2id)
telephone_test_loader = torch.utils.data.DataLoader(dataset=telephone_test_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=vocab_collate_func,
                                           shuffle=True)

travel_test_dataset = VocabDataset(travel_test_data, words2id)
travel_test_loader = torch.utils.data.DataLoader(dataset=travel_test_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=vocab_collate_func,
                                           shuffle=True)


# In[10]:

#CNN


# In[11]:

class CNN(nn.Module):
    def __init__(self, emb_size, hidden_size, num_layers, vocab_size):

        super(CNN, self).__init__()

        self.num_layers, self.hidden_size = num_layers, hidden_size
        self.embedding = nn.Embedding.from_pretrained(torch.FloatTensor(pretrained_embeddings), freeze=True).cuda()
        #self.embedding = nn.Embedding(vocab_size, emb_size, padding_idx=PAD_IDX)
        
        #self.embedding.weight.data.copy_(torch.from_numpy(build_vocab()[2]))
        #self.embedding.weight.requires_grad = False
    
        self.conv1 = nn.Conv1d(emb_size, hidden_size, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(hidden_size, hidden_size, kernel_size=3, padding=1)
        

        self.maxpool = nn.MaxPool1d(82, stride = 2)
        self.fc1 = nn.Linear(hidden_size*2,64)
        
        self.fc2 = nn.Linear(64, 3)

        #self.linear = nn.Linear(hidden_size, num_classes)

    def forward(self, x1,x2, lengths1, lengths2):
        batch_size1, seq_len1 = x1.size()
        batch_size2, seq_len2 = x2.size()
        x1 = x1.long()

        embed1 = self.embedding(x1).float()
        hidden1 = self.conv1(embed1.transpose(1,2)).transpose(1,2)
        #print(hidden1.size())
        hidden1 = F.relu(hidden1.contiguous().view(-1, hidden1.size(-1))).view(batch_size1, seq_len1, hidden1.size(-1))
        #print(hidden1.size())

        hidden1 = self.conv2(hidden1.transpose(1,2)).transpose(1,2)
        #print(hidden1.size())
        hidden1 = F.relu(hidden1.contiguous().view(-1, hidden1.size(-1))).view(batch_size1, seq_len1, hidden1.size(-1))
        #print(hidden1.size())
        
        output1 = self.maxpool(hidden1.transpose(1,2)).transpose(1, 2).squeeze(1)
        #print(hidden1.size())
        #output1 = torch.max(output1, dim=1)[0]
        
        
        batch_size2, seq_len2 = x1.size()
        x2 = x2.long()

        embed2 = self.embedding(x2)
        hidden2 = self.conv1(embed2.transpose(1,2)).transpose(1,2)
        hidden2 = F.relu(hidden2.contiguous().view(-1, hidden2.size(-1))).view(batch_size2, seq_len2, hidden2.size(-1))

        hidden2 = self.conv2(hidden2.transpose(1,2)).transpose(1,2)
        hidden2 = F.relu(hidden2.contiguous().view(-1, hidden2.size(-1))).view(batch_size2, seq_len2, hidden2.size(-1))
        #print(hidden2.transpose(1,2).size())
        output2 = self.maxpool(hidden2.transpose(1,2)).transpose(1, 2).squeeze(1)
        #print(output2.size())
        
        #output2 = torch.max(output2, dim=1)[0]
        #print(output2.size())
        output = torch.cat((output1, output2), dim=1)
        #print(output.size())
        output = F.relu(self.fc1(output))
        #print(output.size())
        output = self.fc2(output)
        
        '''
        
        
        output1 = torch.max(hidden1, dim=1)[0]
        #print(hidden1.size())
        #output1 = torch.sum(output1, dim=1)
        
        
        
        x2 = x2.long()

        embed2 = self.embedding(x2).float()
        hidden2 = self.conv1(embed2.transpose(1,2)).transpose(1,2)
        hidden2 = F.relu(hidden2.contiguous().view(-1, hidden2.size(-1))).view(batch_size2, seq_len2, hidden2.size(-1))

        hidden2 = self.conv2(hidden2.transpose(1,2)).transpose(1,2)
        hidden2 = F.relu(hidden2.contiguous().view(-1, hidden2.size(-1))).view(batch_size2, seq_len2, hidden2.size(-1))
        output2 = torch.max(hidden1, dim=1)[0]
        
        #output2 = torch.sum(output2, dim=1)
        output = torch.cat((output1, output2), dim=1)
    
        #print(output.size())
        output = F.relu(self.fc1(output))
        #print(output.size())
        output = self.fc2(output)
        #print(output.size())
        #output = F.softmax(output, dim =1)
        #print(output.size())
        
        
        

        #hidden = torch.sum(hidden, dim=1)
        #logits = self.linear(hidden)
        '''
        return output
    
    

In [13]:
# In[12]:

def test_model(loader, model):
    """
    Help function that tests the model's performance on a dataset
    @param: loader - data loader for the dataset to test against
    """
    correct = 0
    total = 0
    #val_losses = 0
    #cout =0
    model.eval()
    for data1,data2, lengths1, lengths2, labels in loader:
        data_batch1,data_batch2, lengths_batch1,lengths_batch2, label_batch = data1,data2, lengths1, lengths2,labels
        outputs = F.softmax(model(data_batch1,data_batch2, lengths_batch1, lengths_batch2), dim=1)
        #outputs = model(data_batch1,data_batch2, lengths_batch1, lengths_batch2)
        predicted = outputs.max(1, keepdim=True)[1]
        
        #outs = model(data_batch1,data_batch2, lengths_batch1, lengths_batch2)
        #loss = criterion(outs, labels)
        #val_losses += loss
        #cout +=1
        total += labels.size(0)
        correct += predicted.eq(labels.view_as(predicted)).sum().item()
    return (100 * correct / total)

def earily_stop(val_acc_history, t=5, required_progress=0.001):
    """
    Stop the training if there is no non-trivial progress in k steps
    @param val_acc_history: a list contains all the historical validation acc
    @param required_progress: the next acc should be higher than the previous by 
        at least required_progress amount to be non-trivial
    @param t: number of training steps 
    @return: a boolean indicates if the model should earily stop
    """
    # TODO: Finished
    stop=False
    repeat=0
    if len(val_acc_history)<=t:
        stop=False
    else:
        for i in range(1,t+1):
            i=-i
            diff=val_acc_history[i]-val_acc_history[i-1]
            
            if diff-required_progress<=0.00001:
                repeat+=1
            if repeat==t:
                stop=True
                break
    return stop

learning_rate = 1e-4
num_epochs = 10 # number epoch to train

model = CNN(emb_size=300, hidden_size=200, num_layers=2, vocab_size=len(id2words)).cuda()

# Criterion and Optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate)

# Train the model
total_step = len(train_loader)
validation_acc_history = []
stop_training = False

epochs = []
steps = []
train_losses =[]
#val_losses = []
train_accs = []
val_accs = []
res =[]

for epoch in range(num_epochs):
    running_loss = 0.0
    for i, (data1,data2, lengths1,lengths2, labels) in enumerate(train_loader):
        
        model.train()
        optimizer.zero_grad()
        # Forward pass
        outputs = model(data1,data2, lengths1,lengths2)
        #print(outputs.size())
        #print(labels.size())
        loss = criterion(outputs, labels)

        # Backward and optimize
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        # validate every 100 iterations
        if i > 0 and i % 1000 == 0:
            train_acc = test_model(train_loader, model)
            # validate
            val_acc = test_model(val_loader, model)
            #val_loss = test_model(val_loader, model)[1]
            print('Epoch: [{}/{}], Step: [{}/{}],  training loss: {}, Train Acc: {}, Validation Acc: {}'.format(
                       epoch+1, num_epochs, i+1, len(train_loader),  running_loss/1000, train_acc, val_acc))
            
            epochs.append(epoch+1)
            steps.append((i+1, len(train_loader)))
            train_losses.append(running_loss/1000)
            #val_losses.append(val_loss)
            train_accs.append(train_acc)
            val_accs.append(val_acc)
            
            
            running_loss = 0.0
            validation_acc_history.append(val_acc)
            # check if we need to earily stop the model
            stop_training = earily_stop(validation_acc_history)
            
            if stop_training:
                print("earily stop triggered")
                break
    # because of the the nested loop
    if stop_training:
        break
res.append(epochs, steps, train_losses, train_accs, val_accs)
res = pd.DataFrame(res, columns = ['epochs', 'steps', 'train_losses', 'train_accs', 'val_accs'])

res.to_csv('CNN_hidden200_kernel3.csv')



# In[ ]:

In [14]:
# In[ ]:

#Bidirectional GRU


# In[ ]:

class RNN(nn.Module):
    def __init__(self, emb_size, hidden_size, num_layers, num_classes, linear_size):
    
        super(RNN, self).__init__()

        self.num_layers, self.hidden_size = num_layers, hidden_size
        self.embedding = nn.Embedding.from_pretrained(torch.FloatTensor(pretrained_embeddings), freeze=True).cuda()
        #self.embedding = nn.Embedding(vocab_size, emb_size, padding_idx=PAD_IDX)
        
        #self.embedding.weight.data.copy_(torch.from_numpy(build_vocab()[2]))
        #self.embedding.weight.requires_grad = False
        self.gru= nn.GRU(emb_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.dropout = nn.Dropout(p=0.0)
        self.relu = nn.ReLU()
        self.linear_1 = nn.Linear(hidden_size*2, linear_size)
        self.linear_2 = nn.Linear(linear_size, num_classes)

    def init_hidden(self, batch_size):
        hidden = torch.randn(self.num_layers * 2, batch_size, self.hidden_size).cuda()
       
        return hidden
    

    def forward(self, x1, x2, length1, length2):
        # reset hidden state
        batch_size, seq_len1 = x1.size()
        batch_size, seq_len2 = x2.size()

        self.hidden = self.init_hidden(batch_size)
        
         # compute sorted sequence lengths
        _, idx_sort_1 = torch.sort(length1, dim=0, descending=True)
        _, idx_sort_2 = torch.sort(length2, dim=0, descending=True)
        
        _, idx_unsort_1 = torch.sort(idx_sort_1, dim=0)
        _, idx_unsort_2 = torch.sort(idx_sort_2, dim=0)
        
        # get embedding of characters
        embed1 = self.embedding(x1).float()
        embed2 = self.embedding(x2).float()
        # sort embeddings and lengths
        embed1 = embed1.index_select(0,idx_sort_1)
        embed2 = embed2.index_select(0,idx_sort_2)
        len1 = list(length1[idx_sort_1])
        len2 = list(length2[idx_sort_2])
        
        # pack padded sequence
        embed1 = torch.nn.utils.rnn.pack_padded_sequence(embed1, np.array(len1), batch_first=True)
        embed2 = torch.nn.utils.rnn.pack_padded_sequence(embed2, np.array(len2), batch_first=True)
        
        # fprop though RNN
        out1, hidden1 = self.gru(embed1, self.hidden)
        out2, hidden2 = self.gru(embed2, self.hidden)
        hidden1 = self.dropout(hidden1)
        hidden2 = self.dropout(hidden2)
        
        
        
        # sum the hidden state on the first dimension
        hidden1 = torch.sum(hidden1, dim=0)
        hidden2 = torch.sum(hidden2, dim=0)
        
        # unsort the hidden state and concatenate the two
        hidden1 = hidden1.index_select(0, idx_unsort_1)
        hidden2 = hidden2.index_select(0, idx_unsort_2)
        
        concat_input = torch.cat((hidden1, hidden2), dim=1)
        output = self.linear_1(concat_input)
        output = self.relu(output)
        output = self.linear_2(output)
        return output

   

In [15]:
# In[12]:

def test_model(loader, model):
    """
    Help function that tests the model's performance on a dataset
    @param: loader - data loader for the dataset to test against
    """
    correct = 0
    total = 0
    model.eval()
    for data1,data2, lengths1, lengths2, labels in loader:
        data_batch1,data_batch2, lengths_batch1,lengths_batch2, label_batch = data1,data2, lengths1, lengths2,labels
        outputs = F.softmax(model(data_batch1,data_batch2, lengths_batch1, lengths_batch2), dim=1)
        #outputs = model(data_batch1,data_batch2, lengths_batch1, lengths_batch2)
        predicted = outputs.max(1, keepdim=True)[1]

        total += labels.size(0)
        correct += predicted.eq(labels.view_as(predicted)).sum().item()
    return (100 * correct / total)

learning_rate = 3e-4
num_epochs = 10 # number epoch to train
vocab_size=len(id2words)


    
model = RNN(emb_size=300, hidden_size=300, num_layers=1, num_classes=3,linear_size =64).cuda()

# Criterion and Optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate)

# Train the model
# Train the model
total_step = len(train_loader)
validation_acc_history = []
stop_training = False

epochs = []
steps = []
train_losses =[]
#val_losses = []
train_accs = []
val_accs = []
res =[]

for epoch in range(num_epochs):
    running_loss = 0.0
    for i, (data1,data2, lengths1,lengths2, labels) in enumerate(train_loader):
        
        model.train()
        optimizer.zero_grad()
        # Forward pass
        outputs = model(data1,data2, lengths1,lengths2)
        #print(outputs.size())
        #print(labels.size())
        loss = criterion(outputs, labels)

        # Backward and optimize
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        # validate every 100 iterations
        if i > 0 and i % 1000 == 0:
            train_acc = test_model(train_loader, model)
            # validate
            val_acc = test_model(val_loader, model)
            #val_loss = test_model(val_loader, model)[1]
            print('Epoch: [{}/{}], Step: [{}/{}],  training loss: {}, Train Acc: {}, Validation Acc: {}'.format(
                       epoch+1, num_epochs, i+1, len(train_loader),  running_loss/1000, train_acc, val_acc))
            
            epochs.append(epoch+1)
            steps.append((i+1, len(train_loader)))
            train_losses.append(running_loss/1000)
            #val_losses.append(val_loss)
            train_accs.append(train_acc)
            val_accs.append(val_acc)
            
            
            running_loss = 0.0
            validation_acc_history.append(val_acc)
            # check if we need to earily stop the model
            stop_training = earily_stop(validation_acc_history)
            
            if stop_training:
                print("earily stop triggered")
                break
    # because of the the nested loop
    if stop_training:
        break



Epoch: [1/10], Step: [1001/3125],  training loss: 0.9752178379297256, Train Acc: 57.183, Validation Acc: 56.5
Epoch: [1/10], Step: [2001/3125],  training loss: 0.9007913145422936, Train Acc: 59.837, Validation Acc: 57.7
Epoch: [1/10], Step: [3001/3125],  training loss: 0.8666259275078774, Train Acc: 61.164, Validation Acc: 60.0
Epoch: [2/10], Step: [1001/3125],  training loss: 0.8392626314163208, Train Acc: 62.797, Validation Acc: 62.1
Epoch: [2/10], Step: [2001/3125],  training loss: 0.8218069788813591, Train Acc: 64.671, Validation Acc: 63.4
Epoch: [2/10], Step: [3001/3125],  training loss: 0.7992744281589985, Train Acc: 65.503, Validation Acc: 65.1
Epoch: [3/10], Step: [1001/3125],  training loss: 0.7754831348657608, Train Acc: 65.817, Validation Acc: 64.8
Epoch: [3/10], Step: [2001/3125],  training loss: 0.772137114584446, Train Acc: 67.047, Validation Acc: 65.1
Epoch: [3/10], Step: [3001/3125],  training loss: 0.7598943712115288, Train Acc: 67.77, Validation Acc: 66.4
Epoch: [4/10

In [16]:
res = pd.DataFrame({'epochs':epochs, 'steps': steps, 'train_losses':train_losses, 'train_accs': train_accs,'val_accs': val_accs})
res.to_csv('RNN_hidden300_drop0.csv')

In [17]:
print ("After training for {} epochs".format(num_epochs))
print ("Val Acc {}".format(test_model(val_loader, model)))

print ("Fiction Test Acc {}".format(test_model(fiction_test_loader, model)))
print ("Government Test Acc {}".format(test_model(government_test_loader, model)))
print ("Slate Test Acc {}".format(test_model(slate_test_loader, model)))
print ("Telephone Test Acc {}".format(test_model(telephone_test_loader, model)))
print ("Travel Test Acc {}".format(test_model(travel_test_loader, model)))

After training for 10 epochs
Val Acc 71.8
Fiction Test Acc 50.753768844221106
Government Test Acc 47.93307086614173
Slate Test Acc 44.01197604790419
Telephone Test Acc 49.45273631840796
Travel Test Acc 45.5193482688391


In [18]:
def error_analysis(loader, model):
    model.eval()
    
    all_wrong =0
    all_correct =0
    for data1,data2, lengths1, lengths2, labels in loader:
        data_batch1,data_batch2, lengths_batch1,lengths_batch2 = data1,data2, lengths1, lengths2
        outputs = F.softmax(model(data_batch1,data_batch2, lengths_batch1, lengths_batch2), dim=1)
        predicted = outputs.max(1, keepdim=True)[1].view(-1)
        #convert to numpy arraies
        predicted_numpy = predicted.cpu().numpy()
        #print(predicted_numpy)
        label_numpy=labels.cpu().numpy()
        #print(label_numpy)

        wrong = np.where(predicted_numpy !=label_numpy)[0]
        print(wrong)
        correct = np.where(predicted_numpy ==label_numpy)[0]
        print(correct)
        
    
        
        wrong_review1=[]
        wrong_review2=[]
        
        for ind in wrong[:3]:
            data_wrong1=data_batch1[ind].cpu().numpy()
            data_wrong2=data_batch2[ind].cpu().numpy()
            for index in data_wrong1:
                word1=id2words[index]
                wrong_review1.append(word1)
            for index in data_wrong2:
                word2=id2words[index]
                wrong_review2.append(word2)   
            print('wrong_review1', wrong_review1)
            print('wrong_review2', wrong_review2)
            print('wrong_predicted', predicted_numpy[ind] )
            print('wrong_label',label_numpy[ind] )
            
            all_wrong += 1
            wrong_review1=[]
            wrong_review2=[]
            if all_wrong >3:
                correct_review1=[]    
                correct_review2=[]  
                for ind in correct[:3]:
                    data_correct1=data_batch1[ind].cpu().numpy()
                    data_correct2=data_batch2[ind].cpu().numpy()
                    for index in data_correct1:
                        word1=id2words[index]
                        correct_review1.append(word1)
                    for index in data_correct2:
                        word2=id2words[index]
                        correct_review2.append(word2)
                    print('correct_review1', correct_review1)
                    print('correct_review2', correct_review2)
                    print('correct_predicted', predicted_numpy[ind] )
                    print('correct_label',label_numpy[ind] )
                    all_correct += 1
                    correct_review1 =[]
                    correct_review2 =[]
                    if all_correct >3:
                        break

                    

In [19]:
error_analysis(val_loader, model)

[ 8 11 14 20 22 26 28]
[ 0  1  2  3  4  5  6  7  9 10 12 13 15 16 17 18 19 21 23 24 25 27 29 30
 31]
wrong_review1 ['A', 'woman', ',', 'wearing', 'a', 'white', 'shirt', 'and', 'green', 'shorts', ',', 'sitting', 'on', 'a', 'rock', 'in', 'a', 'beautiful', 'body', 'of', 'water', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
wrong_review2 ['The', 'woman', 'is', 'barefoot', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>'

[ 2  3 13]
[ 0  1  4  5  6  7  8  9 10 11 12 14 15 16 17 18 19 20 21 22 23 24 25 26
 27 28 29 30 31]
wrong_review1 ['A', 'building', 'that', 'portrays', 'beautiful', 'architecture', 'stands', 'in', 'the', 'sunlight', 'as', 'somebody', 'on', 'a', 'bike', 'passes', 'by', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
wrong_review2 ['A', 'bicyclist', 'rides', 'past', 'an', 'abandoned', 'warehouse', 'on', 'a', 'rainy', 'day', '<pad>', '<pad>', '<pad>', '<pad>', '<pa

wrong_review1 ['Two', 'children', 'throw', 'dead', 'leaves', 'into', 'the', 'air', 'around', 'them', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
wrong_review2 ['The', 'banana', 'fell', 'off', 'of', 'the', 'tree', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<

[ 0  1  2  7  8  9 11 16 17 21 23 28]
[ 3  4  5  6 10 12 13 14 15 18 19 20 22 24 25 26 27 29 30 31]
wrong_review1 ['A', 'running', 'back', 'running', 'with', 'the', 'football', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
wrong_review2 ['The', 'running', 'back', 'is', 'in', 'class', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pa

correct_review1 ['Man', 'in', 'overalls', 'with', 'two', 'horses', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
correct_review2 ['a', 'man', 'in', 'overalls', 'with', 'two', 'horses', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '

[ 0  1  2  7 11 14 15 17 18 20 27 28]
[ 3  4  5  6  8  9 10 12 13 16 19 21 22 23 24 25 26 29 30 31]
wrong_review1 ['Three', 'sisters', ',', 'barefoot', 'in', 'pink', 'dresses', 'and', 'who', 'range', 'in', 'age', 'from', 'preschool', 'to', 'teenager', 'are', 'pictured', 'on', 'a', 'beach', 'as', 'they', 'look', 'out', 'at', 'the', 'Ocean', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
wrong_review2 ['Three', 'sisters', ',', 'barefoot', 'in', 'pink', 'dresses', 'and', 'who', 'range', 'in', 'age', 'from', 'preschool', 'to', 'teenager', '<pad>', '<pad>', '<pad>

[ 3  6  8  9 16 23 24 31]
[ 0  1  2  4  5  7 10 11 12 13 14 15 17 18 19 20 21 22 25 26 27 28 29 30]
wrong_review1 ['A', 'young', 'woman', 'sits', '<unk>', 'beside', 'her', 'purse', 'on', 'the', 'grass', 'among', 'a', 'crowd', 'of', 'dogs', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
wrong_review2 ['The', 'dogs', 'are', 'standing', 'around', 'a', '<unk>', 'woman', 'on', 'the', 'grass', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>'