<a href="https://colab.research.google.com/github/tubagokhan/ADGM/blob/main/TestSubTask2EvaluationV2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [13]:
import json
import numpy as np
from nltk.tokenize import sent_tokenize
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer

import nltk
nltk.download('punkt')

def calculate_mean_entailment_prob(Answer, Premise):
    # Tokenize both the Answer and Premise texts into sentences
    Hypotheses = sent_tokenize(Answer)
    Premises = sent_tokenize(Premise)

    # Load the NLI model and tokenizer
    model = AutoModelForSequenceClassification.from_pretrained('cross-encoder/nli-deberta-v3-xsmall')
    tokenizer = AutoTokenizer.from_pretrained('cross-encoder/nli-deberta-v3-xsmall')

    # Initialize an empty matrix to store entailment probabilities
    entailment_matrix = []

    for premise in Premises:
        row_probs = []
        for hypothesis in Hypotheses:
            # Prepare the premise-hypothesis pair and tokenize
            pair = [(premise, hypothesis)]
            features = tokenizer(pair, padding=True, truncation=True, return_tensors="pt")

            # Model evaluation
            model.eval()
            with torch.no_grad():
                logits = model(**features).logits
                probs = torch.softmax(logits, dim=1)
                entailment_prob = probs[:, 1].item()  # Extract the entailment probability for this pair
                row_probs.append(entailment_prob)

        # Add the row of entailment probabilities for this premise to the matrix
        entailment_matrix.append(row_probs)

    # Convert the matrix to a NumPy array for easier manipulation
    entailment_matrix_np = np.array(entailment_matrix)

    # Check if the matrix is not empty
    if entailment_matrix_np.size > 0:
        # Reduce the matrix to a one-dimensional vector by taking the max value of each column
        max_entailment_vector = np.max(entailment_matrix_np, axis=0)

        # Calculate the mean of the reduced vector to get the final model score
        final_model_score = np.mean(max_entailment_vector)
    else:
        # Handle the empty matrix case (e.g., by setting the score to a default value)
        final_model_score = 0  # Or any other default value that makes sense

    print("Final Model Score:", final_model_score)

    return final_model_score



def load_json(file_path):
    with open(file_path, 'r') as file:
        return json.load(file)

def save_json(data, file_path):
    with open(file_path, 'w') as file:
        json.dump(data, file, indent=4)

def find_match(question_id, document):
    for entry in document:
        if entry["QuestionID"] == question_id:
            return entry
    return None


def process_documents_reference_free(input_document, output_path):

    # Load the input document which contains all the necessary information
    document = load_json(input_document)

    results = []
    total_items = len(document)
    item_counter = 0

    # Iterate through each item in the document
    for item in document:
        item_counter += 1
        print(f"Processing item {item_counter} of {total_items}...")

        # Calculate the model score only if both "Answer" and "RetrievedPassage" are present
        if "Answer" in item and "RetrievedPassage" in item:
          # Answer - hypothesis    RetrievedPassage is Premise
            model_score = calculate_mean_entailment_prob(item["Answer"], item["RetrievedPassage"])
            # Add the model score as a new item to the current entry
            item["Model_Score_of_RetrievedPassage_to_Answer"] = model_score
        else:
            print(f"Missing 'Answer' or 'RetrievedPassage' for item {item_counter}")
            # If either is missing, set the model score to None
            item["Model_Score_of_RetrievedPassage(P)_to_Answer(H)"] = None

        # Append the updated item to the results list
        results.append(item)

    # Save the updated results to the specified output path
    save_json(results, output_path)
    print("Processing complete.")



def process_documents_from_source(silver_standarts, participants_answers, output_path):
    silvers = load_json(silver_standarts)
    answers = load_json(participants_answers)

    results = []
    total_items = len(silvers)  # Focus on the first 5 items from silver standards

    item_counter = 0

    for silver in silvers:  # Process only the first 5 items
        item_counter += 1
        print(f"Processing item {item_counter} of {total_items}...")

        match = find_match(silver["QuestionID"], answers)
        print(silver["QuestionID"])
        print(silver["Passage"])
        print(match)
        if match:
          # Answer - hypothesis    Passage is Premise
            # Calculate model_score and add it directly to the matched participant answer item
            model_score = calculate_mean_entailment_prob(match["Answer"], silver["Passage"])
            match["Model_Score_of_SilverPassage(P)_to_Answer(H)"] = model_score
            results.append(match)  # Append the updated participant answer item to results
        else:
            print(f"No match found for QuestionID: {silver['QuestionID']}")
            # Optionally handle the case where no match is found

    save_json(results, output_path)
    print("Processing complete.")



# Set the file paths
silver_standarts = "/content/drive/Othercomputers/MBZUAI/MBZUAI/ADGM-Project/SharedTask/COBS_VER15.150823_entailed_questions.json"
participants_answers = "/content/drive/Othercomputers/MBZUAI/MBZUAI/ADGM-Project/SharedTask/retrieval_results.hierarchical_bm25_updated.json"
results = "/content/drive/Othercomputers/MBZUAI/MBZUAI/ADGM-Project/SharedTask/retrieval_results.hierarchical_bm25_updated_subtask2_results.json"

# Execute the processing

process_documents_reference_free(participants_answers, results)

process_documents_from_source(silver_standarts, participants_answers, results)






Processing item 1 of 1518...
Missing 'Answer' or 'RetrievedPassage' for item 1
Processing item 2 of 1518...
Missing 'Answer' or 'RetrievedPassage' for item 2
Processing item 3 of 1518...
Missing 'Answer' or 'RetrievedPassage' for item 3
Processing item 4 of 1518...
Missing 'Answer' or 'RetrievedPassage' for item 4
Processing item 5 of 1518...
Missing 'Answer' or 'RetrievedPassage' for item 5
Processing item 6 of 1518...
Missing 'Answer' or 'RetrievedPassage' for item 6
Processing item 7 of 1518...
Missing 'Answer' or 'RetrievedPassage' for item 7
Processing item 8 of 1518...
Missing 'Answer' or 'RetrievedPassage' for item 8
Processing item 9 of 1518...
Missing 'Answer' or 'RetrievedPassage' for item 9
Processing item 10 of 1518...
Missing 'Answer' or 'RetrievedPassage' for item 10
Processing item 11 of 1518...
Missing 'Answer' or 'RetrievedPassage' for item 11
Processing item 12 of 1518...
Missing 'Answer' or 'RetrievedPassage' for item 12
Processing item 13 of 1518...
Missing 'Answer'

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Missing 'Answer' or 'RetrievedPassage' for item 326
Processing item 327 of 1518...
Missing 'Answer' or 'RetrievedPassage' for item 327
Processing item 328 of 1518...
Missing 'Answer' or 'RetrievedPassage' for item 328
Processing item 329 of 1518...
Missing 'Answer' or 'RetrievedPassage' for item 329
Processing item 330 of 1518...
Missing 'Answer' or 'RetrievedPassage' for item 330
Processing item 331 of 1518...
Missing 'Answer' or 'RetrievedPassage' for item 331
Processing item 332 of 1518...
Missing 'Answer' or 'RetrievedPassage' for item 332
Processing item 333 of 1518...
Missing 'Answer' or 'RetrievedPassage' for item 333
Processing item 334 of 1518...
Missing 'Answer' or 'RetrievedPassage' for item 334
Processing item 335 of 1518...
Missing 'Answer' or 'RetrievedPassage' for item 335
Processing item 336 of 1518...
Missing 'Answer' or 'RetrievedPassage' for item 336
Processing item 337 of 1518...
Missing 'Answer' or 'RetrievedPassage' for item 337
Processing item 338 of 1518...
Miss

KeyError: 'Answer'

In [None]:
# Play an audio beep. Any audio URL will do.
from google.colab import output
output.eval_js('new Audio("https://upload.wikimedia.org/wikipedia/commons/0/05/Beep-09.ogg").play()')

In [None]:
import json

def calculate_mean_model_score(json_file_path,value):
    # Open and read the JSON file
    with open(json_file_path, 'r') as file:
        data = json.load(file)

    # Extract "Model_Score" values, ensuring they are not None
    model_scores = []
    for item in data:
        if value in item and item[value] is not None:
            model_scores.append(item[value])

    # Calculate the mean of "Model_Score" values
    if model_scores:
        mean_score = sum(model_scores) / len(model_scores)
        return mean_score
    else:
        return "No valid Model_Score values found"

# Replace 'your_json_file.json' with the path to your actual JSON file
mean_model_score = calculate_mean_model_score('/content/drive/Othercomputers/MBZUAI/MBZUAI/ADGM-Project/SharedTask/retrieval_results.hierarchical_bm25_updated_subtask2_results.json')


value="Model_Score_of_RetrievedPassage(P)_to_Answer(H)"
print("Mean Model_Score:", mean_model_score,value)

value="Model_Score_of_SilverPassage(P)_to_Answer(H)"
print("Mean Model_Score:", mean_model_score,value)


In [None]:
# Play an audio beep. Any audio URL will do.
from google.colab import output
output.eval_js('new Audio("https://upload.wikimedia.org/wikipedia/commons/0/05/Beep-09.ogg").play()')