# Group 19 - NLI (A) - Traditional ML Approach (A) - Demo

## Dependency Management

In [1]:
import scipy.sparse
import pandas as pd
from joblib import load

## Data Preparation

In [2]:
def build_test_data(test_data, vectorizer):
  test_corpus = [f"{premise} {hypothesis}" for premise, hypothesis in zip(test_data['premise'], test_data['hypothesis'])]

  tfidf_premise = vectorizer.transform(test_data['premise'].values.astype('U'))
  tfidf_hypothesis = vectorizer.transform(test_data['hypothesis'].values.astype('U'))

  test_features = scipy.sparse.hstack((tfidf_premise, tfidf_hypothesis))

  return test_features

In [3]:
def write_to_csv(pred_labels):
  predictions_df = pd.DataFrame(pred_labels, columns=['prediction'])
  predictions_df.to_csv('predictions/Group_19_A.csv', index=False)

## Load Ensemble Model & TF-IDF Vectorizer

Trained Models: https://drive.google.com/drive/folders/1lvrWGcUjme_sG4yvtNi0cQ4TAOil10ce?usp=sharing

In [4]:
ensemble_model= load('models/ensemble_model.joblib')
vectorizer = load('models/tfidf/tfidf_vectorizer.joblib')

## Load Test Data & Make Predictions

In [5]:
test_data = pd.read_csv("./data/test.csv")

In [6]:
test_features = build_test_data(test_data, vectorizer)
pred_labels = ensemble_model.predict(test_features)

In [7]:
write_to_csv(pred_labels)