# Natural Language Inference using BiLSTM

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import time

### Data is in the form: premise, hypothesis, label
### with label being either 1 (entailment), 0 (neutral, or contradiction)

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [4]:
# Model

# Create sentence 100d sentence embeddings for each premise and hypothesis, then concatenate them and pass them through a neural network
# Neural network will have 3 tanh layers with 100 hidden units each, with the bottom layer taking the concatenated sentence embeddings as input and top layer
# feeding a softmax layer with 2 outputs (entailment, contradiction)

# The sentence embedding model simply sums the word embeddings of the words in the sentence
# The word embeddings are initialized with GloVe embeddings and are not updated during training

class SentenceEmbeddingModel(nn.Module):
    def __init__(self, word_embeddings, hidden_size):
        super(SentenceEmbeddingModel, self).__init__()
        self.word_embeddings = nn.Embedding.from_pretrained(word_embeddings)
        self.hidden_size = hidden_size

    def forward(self, sentence):
        return torch.sum(self.word_embeddings(sentence), dim=1)
    
class NeuralNetwork(nn.Module):
    def __init__(self, sentence_embedding_model, hidden_size):
        super(NeuralNetwork, self).__init__()
        self.sentence_embedding_model = sentence_embedding_model
        self.hidden_size = hidden_size
        self.fc1 = nn.Linear(hidden_size * 2, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, 2)
        
    def forward(self, premise, hypothesis):
        premise_embedding = self.sentence_embedding_model(premise)
        hypothesis_embedding = self.sentence_embedding_model(hypothesis)
        concatenated = torch.cat((premise_embedding, hypothesis_embedding), dim=1)
        x = F.tanh(self.fc1(concatenated))
        x = F.tanh(self.fc2(x))
        x = self.fc3(x)
        return x
    
# Load GloVe embeddings
glove = pd.read_csv('./input/embeddings/glove.6B/glove.6B.100d.txt', sep=" ", quoting=3, header=None, index_col=0)
glove_embedding = {key: val.values for key, val in glove.T.items()}
glove_embedding = np.stack(list(glove_embedding.values()))
glove_embedding = torch.tensor(glove_embedding, dtype=torch.float32)

# Load the data
data = pd.read_csv('./data/train.csv')


In [7]:
# Tokenize the data
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer()
tokenizer.fit_on_texts(data['premise'])
tokenizer.fit_on_texts(data['hypothesis'])
premise_sequences = tokenizer.texts_to_sequences(data['premise'])
hypothesis_sequences = tokenizer.texts_to_sequences(data['hypothesis'])
premise_sequences = pad_sequences(premise_sequences)
hypothesis_sequences = pad_sequences(hypothesis_sequences)

In [13]:
# train the model
hidden_size = 100
sentence_embedding_model = SentenceEmbeddingModel(glove_embedding, hidden_size)
model = NeuralNetwork(sentence_embedding_model, hidden_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for i in range(3):
    for j in range(len(premise_sequences)):
        optimizer.zero_grad()
        premise = torch.tensor([premise_sequences[j]], dtype=torch.long)
        hypothesis = torch.tensor([hypothesis_sequences[j]], dtype=torch.long)
        label = torch.tensor([data['label'][j]], dtype=torch.long)
        output = model(premise, hypothesis)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
    print(loss.item())

0.6456449627876282
0.6502424478530884
0.6496351957321167


In [18]:
# Test the model on the first 10 examples
test_data = pd.read_csv("./data/dev.csv")
test_data = test_data.head(10)
tokenizer.fit_on_texts(test_data['premise'])
tokenizer.fit_on_texts(test_data['hypothesis'])
test_premise_sequences = tokenizer.texts_to_sequences(test_data['premise'])
test_hypothesis_sequences = tokenizer.texts_to_sequences(test_data['hypothesis'])
test_premise_sequences = pad_sequences(test_premise_sequences)
test_hypothesis_sequences = pad_sequences(test_hypothesis_sequences)

for i in range(len(test_premise_sequences)):
    print(f"Premise: {test_data['premise'][i]}")
    print(f"Hypothesis: {test_data['hypothesis'][i]}")
    print(f"Label: {test_data['label'][i]}")
    premise = torch.tensor([test_premise_sequences[i]], dtype=torch.long)
    hypothesis = torch.tensor([test_hypothesis_sequences[i]], dtype=torch.long)
    label = torch.tensor([test_data['label'][i]], dtype=torch.long)
    output = model(premise, hypothesis)
    print(output, label)

Premise: Mon Dieu! 
Hypothesis: This person is speaking English.
Label: 0
tensor([[ 0.0586, -0.0787]], grad_fn=<AddmmBackward0>) tensor([0])
Premise: He really shook up my whole mindset, Broker says. 
Hypothesis: His mindset never changed, Broker said.
Label: 0
tensor([[ 0.0602, -0.0771]], grad_fn=<AddmmBackward0>) tensor([0])
Premise: Patients were asked to place themselves on a readiness scale of 1 to 10.
Hypothesis: Most patients rated themselves as a 5 on the scale.
Label: 1
tensor([[ 0.0621, -0.0752]], grad_fn=<AddmmBackward0>) tensor([1])
Premise: I managed to pick-pocket someone next to the snack-stand.
Hypothesis: I stole someone's wallet near the concession stand.
Label: 1
tensor([[ 0.0572, -0.0801]], grad_fn=<AddmmBackward0>) tensor([1])
Premise: Forty comments were received and considered prior to the issuance of the final rules.
Hypothesis: The decisions regarding the issuance of the final rules was made after careful consideration.
Label: 1
tensor([[ 0.0666, -0.0707]], gra