<a href="https://colab.research.google.com/github/tubagokhan/RegNLPDataset/blob/main/SubTask2EvaluationV2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import json
import numpy as np
from nltk.tokenize import sent_tokenize
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer

import nltk
nltk.download('punkt')

def calculate_mean_entailment_prob(Answer, Premise):
    # Tokenize both the Answer and Premise texts into sentences
    Hypotheses = sent_tokenize(Answer)
    Premises = sent_tokenize(Premise)

    # Load the NLI model and tokenizer
    model = AutoModelForSequenceClassification.from_pretrained('cross-encoder/nli-deberta-v3-xsmall')
    tokenizer = AutoTokenizer.from_pretrained('cross-encoder/nli-deberta-v3-xsmall')

    # Initialize an empty matrix to store entailment probabilities
    entailment_matrix = []

    for premise in Premises:
        row_probs = []
        for hypothesis in Hypotheses:
            # Prepare the premise-hypothesis pair and tokenize
            pair = [(premise, hypothesis)]
            features = tokenizer(pair, padding=True, truncation=True, return_tensors="pt")

            # Model evaluation
            model.eval()
            with torch.no_grad():
                logits = model(**features).logits
                probs = torch.softmax(logits, dim=1)
                entailment_prob = probs[:, 1].item()  # Extract the entailment probability for this pair
                row_probs.append(entailment_prob)

        # Add the row of entailment probabilities for this premise to the matrix
        entailment_matrix.append(row_probs)

    # Convert the matrix to a NumPy array for easier manipulation
    entailment_matrix_np = np.array(entailment_matrix)

    # Reduce the matrix to a one-dimensional vector by taking the max value of each column
    max_entailment_vector = np.max(entailment_matrix_np, axis=0)

    # Calculate the mean of the reduced vector to get the final model score
    final_model_score = np.mean(max_entailment_vector)

    # Print the reduced one-dimensional vector and the final model score
    #print("Reduced Entailment Vector:", max_entailment_vector)
    print("Final Model Score:", final_model_score)

    return final_model_score


def load_json(file_path):
    with open(file_path, 'r') as file:
        return json.load(file)

def save_json(data, file_path):
    with open(file_path, 'w') as file:
        json.dump(data, file, indent=4)

def find_match(question_id, document):
    for entry in document:
        if entry["QuestionID"] == question_id:
            return entry
    return None

def process_documents(silver_standarts, participants_answers, output_path):
    silvers = load_json(silver_standarts)
    answers = load_json(participants_answers)

    results = []
    total_items = len(silvers)
    item_counter = 0

    for silver in silvers:
        item_counter += 1
        print(f"Processing item {item_counter} of {total_items}...")

        match = find_match(silver["QuestionID"], answers)
        if match:
            model_score = calculate_mean_entailment_prob(match["Answer"], silver["Passage"])
        else:
            print(f"No match found for QuestionID: {silver['QuestionID']}")
            model_score = None

        new_entry = {
            "QuestionID": silver["QuestionID"],
            "Question": silver.get("Question", ""),
            "Passage": silver["Passage"],
            "Answer": match["Answer"] if match else None,
            "Model_Score": model_score
        }

        results.append(new_entry)

    save_json(results, output_path)
    print("Processing complete.")

# Set the file paths
silver_standarts = "/content/drive/Othercomputers/MBZUAI/MBZUAI/ADGM-Project/SharedTask/COBS_VER15.150823_entailed_questions.json"
participants_answers = "/content/drive/Othercomputers/MBZUAI/MBZUAI/ADGM-Project/SharedTask/COBS_VER15.150823_answer_entailed_questions_subtask2.json"
results = "/content/drive/Othercomputers/MBZUAI/MBZUAI/ADGM-Project/SharedTask/COBS_VER15.150823_answer_entailed_questions_subtask2_results.json"

# Execute the processing
process_documents(silver_standarts, participants_answers, results)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Processing item 1 of 1518...
Final Model Score: 0.43680496886372566
Processing item 2 of 1518...
Final Model Score: 0.04363368141154448
Processing item 3 of 1518...
Final Model Score: 0.10030363441910595
Processing item 4 of 1518...
Final Model Score: 0.0124876288140917
Processing item 5 of 1518...
Final Model Score: 0.1602939685607063
Processing item 6 of 1518...
Final Model Score: 0.3031492734073915
Processing item 7 of 1518...
Final Model Score: 0.14360551169374958
Processing item 8 of 1518...
Final Model Score: 0.40210292985041934
Processing item 9 of 1518...
Final Model Score: 0.023484568780986592
Processing item 10 of 1518...
Final Model Score: 0.04402548959478736
Processing item 11 of 1518...
Final Model Score: 0.0895331776526291
Processing item 12 of 1518...
Final Model Score: 0.2717780972956322
Processing item 13 of 1518...
Final Model Score: 0.19872119460099688
Processing item 14 of 1518...
Final Model Score: 0.34673696756362915
Processing item 15 of 1518...
Final Model Score

In [15]:
# Play an audio beep. Any audio URL will do.
from google.colab import output
output.eval_js('new Audio("https://upload.wikimedia.org/wikipedia/commons/0/05/Beep-09.ogg").play()')