# Group 19 NLI(B) Deep Learning Approach without the use of Transformers - Demo

## Import Libraries

In [55]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf

## Load GloVe Embeddings and create a dictionary

### Please note that the glove embeddings are not included in the dataset. You can download the embeddings from the following link: https://huggingface.co/stanfordnlp/glove/resolve/main/glove.6B.zip

### Or the mirror: https://nlp.stanford.edu/data/wordvecs/glove.6B.zip

### Please specify the path to the embeddings in the code below

In [56]:
def load_glove_embeddings(embedding_path):
    print("Loading GloVe embeddings...")
    embeddings_index = {}
    with open(embedding_path, 'r', encoding='utf-8') as f:
        for line in f:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coefs
    print(f"Loaded {len(embeddings_index)} word vectors.")
    return embeddings_index

# Define the function to convert sentences to vectors
# Same approach as in the previous notebook while training the model, from Bowman et al. (2015)
def sentence_embedding(sentence, embeddings_index):
    words = sentence.split()
    embedding_dim = next(iter(embeddings_index.values())).shape[0]
    sentence_embedding = np.zeros(embedding_dim)
    for word in words:
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            sentence_embedding += embedding_vector
    return (sentence_embedding + 1) / (len(words) + 1)

In [57]:
embedding_path = "./input/embeddings/glove.6B/glove.6B.300d.txt"
embeddings_index = load_glove_embeddings(embedding_path)

Loading GloVe embeddings...
Loaded 400001 word vectors.


## Load Deep Learing Model

In [58]:
model_path = "./models/deep_learning/model.keras"
model = tf.keras.models.load_model(model_path)
model.summary()

## Load test set we need to make the predictions on

In [51]:
test_path = "./data/test.csv"
test_df = pd.read_csv(test_path)

test_df.head()

Unnamed: 0,premise,hypothesis
0,"Boy wearing red hat, blue jacket pushing plow ...",The boy is surrounded by snow
1,A blond woman in a black shirt is standing beh...,The woman is standing.
2,Three people in uniform are outdoors and are o...,Uniformed people are outside
3,"A person, in a striped blue shirt and pants, i...",The person is running
4,"A man, woman, and child get their picture take...",A family on vacation is posing.


### The test data is in the format of a table with the following columns: premise and hypothesis

In [52]:
# Convert the sentences to vectors
test_premise_embeddinds = [sentence_embedding(sentence, embeddings_index) for sentence in test_df["premise"]]
test_hypothesis_embeddinds = [sentence_embedding(sentence, embeddings_index) for sentence in test_df["hypothesis"]]

X_test = np.hstack((np.array(test_premise_embeddinds), np.array(test_hypothesis_embeddinds)))

y_pred = np.argmax(model.predict(X_test), axis=1)

submission = pd.DataFrame({"prediction": y_pred})

[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


## A small subset of the test data is shown below, along with the predictions made by the model

In [53]:
print(submission.head())

for i in range(10):
    print(f"Premise: {test_df['premise'][i]}")
    print(f"Hypothesis: {test_df['hypothesis'][i]}")
    print(f"Prediction: {submission['prediction'][i]}")
    print()

   prediction
0           0
1           0
2           0
3           0
4           0
Premise: Boy wearing red hat, blue jacket pushing plow in snow.
Hypothesis: The boy is surrounded by snow
Prediction: 0

Premise: A blond woman in a black shirt is standing behind a counter.
Hypothesis: The woman is standing.
Prediction: 0

Premise: Three people in uniform are outdoors and are observing a scene which is out of the picture.
Hypothesis: Uniformed people are outside
Prediction: 0

Premise: A person, in a striped blue shirt and pants, is running along.
Hypothesis: The person is running
Prediction: 0

Premise: A man, woman, and child get their picture taken in front of the mountains.
Hypothesis: A family on vacation is posing.
Prediction: 0

Premise: A tennis player in blue shorts and a white shirt making an aggressive backhand swing towards the tennis ball.
Hypothesis: The person is taking a nap in their bed.
Prediction: 0

Premise: A boy looks on at an electric device with three batteries 

In [54]:
# Save the submission
submission_path = "./predictions/Group_19_B.csv"
submission.to_csv(submission_path, index=False)

## Using the model to make predictions on input by the user

In [43]:
# Define the function to convert sentences to vectors

input_premise = input("Enter the premise: ")
input_hypothesis = input("Enter the hypothesis: ")


In [44]:
print(f"The premise is: {input_premise}")
print(f"The hypothesis is: {input_hypothesis}")

The premise is: A man is throwing something into the road.
The hypothesis is: A man is on the road


In [45]:
def make_prediction(input_premise, input_hypothesis):
    premise_embedding = sentence_embedding(input_premise, embeddings_index)
    hypothesis_embedding = sentence_embedding(input_hypothesis, embeddings_index)
    X_input = np.hstack((np.array(premise_embedding), np.array(hypothesis_embedding)))
    y_input = np.argmax(model.predict(X_input.reshape(1, -1)), axis=1)[0]
    return y_input

prediction = ["Contradiction", "Entailment"][make_prediction(input_premise, input_hypothesis)]
print(f"The prediction is: {prediction}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
The prediction is: Contradiction


## In case of predictions on input as a csv file

In [46]:
test_file_path = input("Enter the path to the test file: ")
test_df = pd.read_csv(test_file_path)

# Convert the sentences to vectors
test_premise_embeddinds = [sentence_embedding(sentence, embeddings_index) for sentence in test_df["premise"]]
test_hypothesis_embeddinds = [sentence_embedding(sentence, embeddings_index) for sentence in test_df["hypothesis"]]

X_test = np.hstack((np.array(test_premise_embeddinds), np.array(test_hypothesis_embeddinds)))

y_pred = np.argmax(model.predict(X_test), axis=1)
predictions = pd.DataFrame({"prediction": y_pred})

predictions_path = input("Enter the path to save the predictions: ")
predictions.to_csv(predictions_path, index=False)

[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
