# Natural Language Inference using BiLSTM

### Data is in the form: premise, hypothesis, label
### with label being either 1 (entailment), 0 (neutral, or contradiction)

In [36]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
import xgboost as xgb
import sys

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

In [47]:
def load_glove_embeddings(embedding_path):
    print("Loading GloVe embeddings...")
    embeddings_index = {}
    with open(embedding_path, 'r', encoding='utf-8') as f:
        for line in f:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coefs
    print(f"Loaded {len(embeddings_index)} word vectors.")
    return embeddings_index

def sentence_embedding(sentence, embeddings_index):
    words = sentence.split()
    embedding_dim = next(iter(embeddings_index.values())).shape[0]
    sentence_embedding = np.zeros(embedding_dim)
    for word in words:
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            sentence_embedding += embedding_vector
    return sentence_embedding

embedding_path = "./input/embeddings/glove.6B/glove.6B.300d.txt"
embeddings_index = load_glove_embeddings(embedding_path)

Loading GloVe embeddings...
Loaded 400001 word vectors.


In [48]:
data_path = "./data/train.csv"
df = pd.read_csv(data_path)

In [49]:
premise_embeddings = [sentence_embedding(sentence.lower(), embeddings_index) for sentence in df['premise']]
hypothesis_embeddings = [sentence_embedding(sentence.lower(), embeddings_index) for sentence in df['hypothesis']]

In [50]:
X = np.hstack((np.array(premise_embeddings), np.array(hypothesis_embeddings)))
y = df['label'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [51]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(600,)),  # input shape is twice the GloVe embedding dimension for premise and hypothesis
    tf.keras.layers.Dense(200, activation='tanh'),
    tf.keras.layers.Dense(200, activation='tanh'),
    tf.keras.layers.Dense(200, activation='tanh'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(200, activation='tanh'),
    tf.keras.layers.Dense(200, activation='tanh'),
    tf.keras.layers.Dense(200, activation='tanh'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(200, activation='tanh'),
    tf.keras.layers.Dense(2, activation='softmax')  # 2 classes: entailment, contradiction
])

# Compile the model
model.compile(optimizer='adadelta',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=150, batch_size=32, validation_split=0.1)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

Epoch 1/150
[1m607/607[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.4911 - loss: 0.7898 - val_accuracy: 0.4884 - val_loss: 0.7125
Epoch 2/150
[1m607/607[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.5128 - loss: 0.7519 - val_accuracy: 0.5028 - val_loss: 0.7044
Epoch 3/150
[1m607/607[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.5119 - loss: 0.7537 - val_accuracy: 0.5241 - val_loss: 0.6985
Epoch 4/150
[1m607/607[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.5093 - loss: 0.7495 - val_accuracy: 0.5311 - val_loss: 0.6950
Epoch 5/150
[1m607/607[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.5223 - loss: 0.7410 - val_accuracy: 0.5343 - val_loss: 0.6915
Epoch 6/150
[1m607/607[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.5159 - loss: 0.7377 - val_accuracy: 0.5357 - val_loss: 0.6899
Epoch 7/150
[1m607/60

In [52]:
dev_data_path = "./data/dev.csv"
dev_df = pd.read_csv(dev_data_path)

# Print all rows in the dev dataframe where there is a NaN value
print(dev_df[dev_df.isna().any(axis=1)])

# Convert NaN values to empty strings
dev_df = dev_df.fillna('')
# Print all rows in the dev dataframe where there is a NaN value
print(dev_df[dev_df.isna().any(axis=1)])

# Test the model on the dev set
premise_embeddings = [sentence_embedding(sentence.lower(), embeddings_index) for sentence in dev_df['premise']]
hypothesis_embeddings = [sentence_embedding(sentence.lower(), embeddings_index) for sentence in dev_df['hypothesis']]
X_dev = np.hstack((np.array(premise_embeddings), np.array(hypothesis_embeddings)))
y_dev = dev_df['label'].values

loss, accuracy = model.evaluate(X_dev, y_dev)
print(f"Dev Loss: {loss}")
print(f"Dev Accuracy: {accuracy}")


                                                premise hypothesis  label
3126  Tony  Shoes (so Clinton will have Shoes and So...        NaN      1
3970                            Saint-Germain-des-Pr??s        NaN      1
Empty DataFrame
Columns: [premise, hypothesis, label]
Index: []
[1m211/211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 823us/step - accuracy: 0.6328 - loss: 0.6442
Dev Loss: 0.6491600275039673
Dev Accuracy: 0.6271337270736694


In [55]:

# Create sentence embeddings using plain RNN
def sentence_embedding_rnn(sentence, embeddings_index, rnn_units=300):
    words = sentence.split()
    embedding_dim = next(iter(embeddings_index.values())).shape[0]
    sentence_embedding = np.zeros(embedding_dim)
    for word in words:
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            sentence_embedding += embedding_vector

    # Create plain RNN layer
    rnn_layer = tf.keras.layers.SimpleRNN(rnn_units)
    
    # Process sentence embedding through RNN
    sentence_embedding_rnn = rnn_layer(tf.expand_dims([sentence_embedding], axis=0))
    
    return tf.squeeze(sentence_embedding_rnn, axis=0)

In [56]:
embedding_path = "./input/embeddings/glove.6B/glove.6B.300d.txt"
embeddings_index = load_glove_embeddings(embedding_path)

# Create sentence embeddings for premise and hypothesis using plain RNN
premise_embeddings_rnn = [sentence_embedding_rnn(sentence.lower(), embeddings_index) for sentence in df['premise']]
hypothesis_embeddings_rnn = [sentence_embedding_rnn(sentence.lower(), embeddings_index) for sentence in df['hypothesis']]

# Combine premise and hypothesis embeddings
X_rnn = np.hstack((np.array(premise_embeddings_rnn), np.array(hypothesis_embeddings_rnn)))

# Split data into train and test sets
X_train_rnn, X_test_rnn, y_train_rnn, y_test_rnn = train_test_split(X_rnn, y, test_size=0.2, random_state=42)

Loading GloVe embeddings...
Loaded 400001 word vectors.


In [57]:
# Define model
model_rnn = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(600,)),  # input shape matches the combined dimension of premise and hypothesis embeddings
    tf.keras.layers.Dense(200, activation='tanh'),
    tf.keras.layers.Dense(200, activation='tanh'),
    tf.keras.layers.Dense(200, activation='tanh'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(200, activation='tanh'),
    tf.keras.layers.Dense(200, activation='tanh'),
    tf.keras.layers.Dense(200, activation='tanh'),
    tf.keras.layers.Dense(2, activation='softmax')  # 2 classes: entailment, contradiction
])

# Compile the model
model_rnn.compile(optimizer='adadelta',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

# Train the model
history_rnn = model_rnn.fit(X_train_rnn, y_train_rnn, epochs=150, batch_size=32, validation_split=0.1)

# Evaluate the model
loss_rnn, accuracy_rnn = model_rnn.evaluate(X_test_rnn, y_test_rnn)
print(f"Test Loss: {loss_rnn}")
print(f"Test Accuracy: {accuracy_rnn}")

Epoch 1/150
[1m607/607[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 23ms/step - accuracy: 0.5078 - loss: 0.7358 - val_accuracy: 0.5032 - val_loss: 0.7227
Epoch 2/150
[1m607/607[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.5067 - loss: 0.7298 - val_accuracy: 0.4986 - val_loss: 0.7179
Epoch 3/150
[1m607/607[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.4997 - loss: 0.7270 - val_accuracy: 0.4991 - val_loss: 0.7141
Epoch 4/150
[1m607/607[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.5108 - loss: 0.7179 - val_accuracy: 0.4958 - val_loss: 0.7114
Epoch 5/150
[1m607/607[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.5040 - loss: 0.7154 - val_accuracy: 0.4949 - val_loss: 0.7091
Epoch 6/150
[1m607/607[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.4983 - loss: 0.7157 - val_accuracy: 0.4972 - val_loss: 0.7071
Epoch 7/150
[1m607/

In [58]:
# Test the model on the dev set
dev_data_path = "./data/dev.csv"
dev_df = pd.read_csv(dev_data_path)

# Print all rows in the dev dataframe where there is a NaN value
print(dev_df[dev_df.isna().any(axis=1)])

# Convert NaN values to empty strings
dev_df = dev_df.fillna('')
# Print all rows in the dev dataframe where there is a NaN value
print(dev_df[dev_df.isna().any(axis=1)])

# Create sentence embeddings for premise and hypothesis using plain RNN
premise_embeddings_dev_rnn = [sentence_embedding_rnn(sentence.lower(), embeddings_index) for sentence in dev_df['premise']]
hypothesis_embeddings_dev_rnn = [sentence_embedding_rnn(sentence.lower(), embeddings_index) for sentence in dev_df['hypothesis']]
X_dev_rnn = np.hstack((np.array(premise_embeddings_dev_rnn), np.array(hypothesis_embeddings_dev_rnn)))
y_dev_rnn = dev_df['label'].values

loss_dev_rnn, accuracy_dev_rnn = model_rnn.evaluate(X_dev_rnn, y_dev_rnn)
print(f"Dev Loss: {loss_dev_rnn}")
print(f"Dev Accuracy: {accuracy_dev_rnn}")


                                                premise hypothesis  label
3126  Tony  Shoes (so Clinton will have Shoes and So...        NaN      1
3970                            Saint-Germain-des-Pr??s        NaN      1
Empty DataFrame
Columns: [premise, hypothesis, label]
Index: []
[1m211/211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 894us/step - accuracy: 0.4969 - loss: 0.7046
Dev Loss: 0.7026088237762451
Dev Accuracy: 0.5034881830215454


In [63]:
def sentence_embedding_lstm(sentence, embeddings_index, lstm_units=300):
    words = sentence.split()
    embedding_dim = next(iter(embeddings_index.values())).shape[0]
    sentence_embedding = np.zeros(embedding_dim)
    for word in words:
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            sentence_embedding += embedding_vector

    # Create LSTM layer
    lstm_layer = tf.keras.layers.LSTM(lstm_units)
    
    # Process sentence embedding through LSTM
    sentence_embedding_lstm = lstm_layer(tf.expand_dims([sentence_embedding], axis=0))
    
    return tf.squeeze(sentence_embedding_lstm, axis=0)

In [64]:
# Load GloVe embeddings
embedding_path = "./input/embeddings/glove.6B/glove.6B.300d.txt"
embeddings_index = load_glove_embeddings(embedding_path)

# Create sentence embeddings for premise and hypothesis using LSTM
premise_embeddings_lstm = [sentence_embedding_lstm(sentence.lower(), embeddings_index) for sentence in df['premise']]
hypothesis_embeddings_lstm = [sentence_embedding_lstm(sentence.lower(), embeddings_index) for sentence in df['hypothesis']]

# Combine premise and hypothesis embeddings
X_lstm = np.hstack((np.array(premise_embeddings_lstm), np.array(hypothesis_embeddings_lstm)))

# Labels
y_lstm = df['label'].values

# Split data into train and test sets
X_train_lstm, X_test_lstm, y_train_lstm, y_test_lstm = train_test_split(X_lstm, y_lstm, test_size=0.2, random_state=42)

Loading GloVe embeddings...
Loaded 400001 word vectors.


In [65]:
# Define model
model_lstm = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(600,)),  # input shape matches the combined dimension of premise and hypothesis embeddings
    tf.keras.layers.Dense(200, activation='tanh'),
    tf.keras.layers.Dense(200, activation='tanh'),
    tf.keras.layers.Dense(200, activation='tanh'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(200, activation='tanh'),
    tf.keras.layers.Dense(200, activation='tanh'),
    tf.keras.layers.Dense(200, activation='tanh'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(200, activation='tanh'),
    tf.keras.layers.Dense(2, activation='softmax')  # 2 classes: entailment, contradiction
])

# Compile the model
model_lstm.compile(optimizer='adadelta',
                   loss='sparse_categorical_crossentropy',
                   metrics=['accuracy'])

# Train the model
history_lstm = model_lstm.fit(X_train_lstm, y_train_lstm, epochs=100, batch_size=32, validation_split=0.1)

# Evaluate the model
loss_lstm, accuracy_lstm = model_lstm.evaluate(X_test_lstm, y_test_lstm)
print(f"Test Loss: {loss_lstm}")
print(f"Test Accuracy: {accuracy_lstm}")

Epoch 1/100
[1m607/607[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.5000 - loss: 0.7364 - val_accuracy: 0.5186 - val_loss: 0.6995
Epoch 2/100
[1m607/607[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.4954 - loss: 0.7360 - val_accuracy: 0.5199 - val_loss: 0.6983
Epoch 3/100
[1m607/607[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.4993 - loss: 0.7310 - val_accuracy: 0.5209 - val_loss: 0.6973
Epoch 4/100
[1m607/607[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.4955 - loss: 0.7286 - val_accuracy: 0.5199 - val_loss: 0.6966
Epoch 5/100
[1m607/607[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.4929 - loss: 0.7307 - val_accuracy: 0.5167 - val_loss: 0.6962
Epoch 6/100
[1m607/607[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.4991 - loss: 0.7260 - val_accuracy: 0.5204 - val_loss: 0.6959
Epoch 7/100
[1m607/6

In [67]:
dev_data_path = "./data/dev.csv"
dev_df = pd.read_csv(dev_data_path)

# Print all rows in the dev dataframe where there is a NaN value
print(dev_df[dev_df.isna().any(axis=1)])

# Convert NaN values to empty strings
dev_df = dev_df.fillna('')
# Print all rows in the dev dataframe where there is a NaN value
print(dev_df[dev_df.isna().any(axis=1)])

# Test the model on the dev set
premise_embeddings_dev_lstm = [sentence_embedding_lstm(sentence.lower(), embeddings_index) for sentence in dev_df['premise']]
hypothesis_embeddings_dev_lstm = [sentence_embedding_lstm(sentence.lower(), embeddings_index) for sentence in dev_df['hypothesis']]
X_dev_lstm = np.hstack((np.array(premise_embeddings_dev_lstm), np.array(hypothesis_embeddings_dev_lstm)))
y_dev_lstm = dev_df['label'].values

loss_dev_lstm, accuracy_dev_lstm = model_lstm.evaluate(X_dev_lstm, y_dev_lstm)
print(f"Dev Loss: {loss_dev_lstm}")
print(f"Dev Accuracy: {accuracy_dev_lstm}")

                                                premise hypothesis  label
3126  Tony  Shoes (so Clinton will have Shoes and So...        NaN      1
3970                            Saint-Germain-des-Pr??s        NaN      1
Empty DataFrame
Columns: [premise, hypothesis, label]
Index: []
[1m211/211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 860us/step - accuracy: 0.5018 - loss: 0.6947
Dev Loss: 0.6935579776763916
Dev Accuracy: 0.5071990489959717


In [87]:
test_premise_sentence = "A person on a horse jumps over a broken down airplane."
test_hypothesis_sentence = "A person is training his horse for a competition."
test_premise_embedding = sentence_embedding(test_premise_sentence.lower(), embeddings_index)
test_hypothesis_embedding = sentence_embedding(test_hypothesis_sentence.lower(), embeddings_index)
X_test = np.hstack((test_premise_embedding, test_hypothesis_embedding))
X_test = np.expand_dims(X_test, axis=0)
y_pred = model.predict(X_test)
y_pred_label = np.argmax(y_pred, axis=1)
print(f"Prediction: {y_pred_label[0]}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Prediction: 1
