<a href="https://colab.research.google.com/github/tubagokhan/RegNLPDataset/blob/main/SubTask3EvaluationV3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
import json
import time
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from nltk.tokenize import sent_tokenize
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
import nltk
nltk.download('punkt')

print("Dependencies imported and NLTK punkt downloaded.")

# Load NLI model
nli_model = AutoModelForSequenceClassification.from_pretrained('cross-encoder/nli-deberta-v3-xsmall')
tokenizer = AutoTokenizer.from_pretrained('cross-encoder/nli-deberta-v3-xsmall')

print("NLI model and tokenizer loaded.")

def softmax(logits):
    e_logits = np.exp(logits - np.max(logits, axis=1, keepdims=True))
    return e_logits / np.sum(e_logits, axis=1, keepdims=True)

def get_nli_probabilities(premises, hypotheses):
    features = tokenizer(premises, hypotheses, padding=True, truncation=True, return_tensors="pt")
    nli_model.eval()
    with torch.no_grad():
        logits = nli_model(**features).logits.numpy()
    probabilities = softmax(logits)
    return probabilities

def get_nli_matrix(passages, answers):
    print(f"{len(passages)} passages and {len(answers)} answers.")
    entailment_matrix = np.zeros((len(passages), len(answers)))
    contradiction_matrix = np.zeros((len(passages), len(answers)))

    for i, pas in enumerate(passages):
        for j, ans in enumerate(answers):
            probs = get_nli_probabilities([pas], [ans])  # Get NLI probabilities
            entailment_matrix[i, j] = probs[:, 1][0]  # Access the first element to avoid deprecation warning
            contradiction_matrix[i, j] = probs[:, 0][0]  # Access the first element to avoid deprecation warning

    #print("NLI matrix calculation completed.")
    return entailment_matrix, contradiction_matrix

def calculate_scores_from_matrix(nli_matrix, score_type='entailment'):
    if nli_matrix.size == 0:
        print("Warning: NLI matrix is empty. Returning default score of 0.")
        return 0.0  # or some other default score or handling as appropriate for your use case

    if score_type == 'entailment':
        reduced_vector = np.max(nli_matrix, axis=0)
    elif score_type == 'contradiction':
        reduced_vector = np.min(nli_matrix, axis=0)
    score = np.round(np.mean(reduced_vector), 5)
    return score

def check_obligation(text):
    json_path = "/content/drive/Othercomputers/MBZUAI/MBZUAI/ADGM-Project/SharedTask/Processed_Obligations.json"
    with open(json_path, 'r') as file:
        data = json.load(file)
    texts = [item['Text'] for item in data]
    labels = [item['Obligation'] for item in data]

    X_train, _, y_train, _ = train_test_split(texts, labels, test_size=0.2, random_state=42)
    pipeline = make_pipeline(TfidfVectorizer(stop_words='english'), LogisticRegression(max_iter=1000))
    pipeline.fit(X_train, y_train)

    #print(f"Checking obligation for text: {text[:30]}...")
    return pipeline.predict([text])[0]

def calculate_coverage_score(passages, answers):
    #print("Calculating coverage score.")
    covered_obligations = 0
    total_obligations = sum(check_obligation(sent) for passage in passages for sent in sent_tokenize(passage))

    for ans in answers:
        for passage in passages:
            for sent in sent_tokenize(passage):
                if check_obligation(sent):
                    covered_obligations += 1
                    break

    coverage_score = covered_obligations / total_obligations if total_obligations > 0 else 0
    print(f"Obligation Coverage score : {np.round(coverage_score, 5)}")
    return np.round(coverage_score, 5)

def calculate_final_composite_score(passages, answers, we=0.4, wc=0.3, wo=0.3):
    #print("Calculating final composite score.")
    passage_sentences = [sent for passage in passages for sent in sent_tokenize(passage)]
    answer_sentences = [sent for answer in answers for sent in sent_tokenize(answer)]
    entailment_matrix, contradiction_matrix = get_nli_matrix(passage_sentences, answer_sentences)

    entailment_score = calculate_scores_from_matrix(entailment_matrix, 'entailment')
    contradiction_score = calculate_scores_from_matrix(contradiction_matrix, 'contradiction')
    coverage_score = calculate_coverage_score(passages, answers)

    composite_score = we * entailment_score - wc * contradiction_score + wo * coverage_score
    print(f"Final composite score: {np.round(composite_score, 5)}")
    return np.round(composite_score, 5)


# Load JSON data
with open('/content/drive/Othercomputers/MBZUAI/MBZUAI/ADGM-Project/SharedTask/retrieval_results.hierarchical_bm25_updated_subtask2.json', 'r') as file:
    data = json.load(file)

total_start_time = time.time()
item_count = 0
processed_items = []  # List to hold processed items

# Iterate over each item in the JSON data
for item in data[:200]:
    item_start_time = time.time()
    passages = [item['RetrievedPassage']]
    answers = [item['Answer']]

    try:
        # Calculate coverage score
        score = calculate_final_composite_score(passages, answers)

        # Add the calculated score to the item
        item['Score'] = np.round(score, 5)
        processed_items.append(item)  # Add the processed item to the list
        item_end_time = time.time()

        print(f"Item {item_count + 1} processed in {item_end_time - item_start_time:.2f} seconds.")
        item_count += 1
    except Exception as e:
        print(f"An error occurred while processing item {item_count + 1}: {e}")

    print("---------------------------------------------------------------")

# Save the updated JSON data with processed items to a new file
output_file = '/content/drive/Othercomputers/MBZUAI/MBZUAI/ADGM-Project/SharedTask/retrieval_results.hierarchical_bm25_updated_subtask2_with_scores.json'
with open(output_file, 'w') as outfile:
    json.dump(processed_items, outfile, indent=4)

total_end_time = time.time()
print(f"All {item_count} items processed. Total time: {total_end_time - total_start_time:.2f} seconds.")
print(f"Updated JSON data with scores has been saved to {output_file}.")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Dependencies imported and NLTK punkt downloaded.




NLI model and tokenizer loaded.
8 passages and 3 answers.
Obligation Coverage score : 0.125
Final composite score: 0.00438
Item 1 processed in 7.47 seconds.
---------------------------------------------------------------
2 passages and 3 answers.
Obligation Coverage score : 0.5
Final composite score: 0.2809
Item 2 processed in 1.93 seconds.
---------------------------------------------------------------
2 passages and 2 answers.
Obligation Coverage score : 0.5
Final composite score: 0.16198
Item 3 processed in 0.94 seconds.
---------------------------------------------------------------
2 passages and 8 answers.
Obligation Coverage score : 0.5
Final composite score: 0.20764
Item 4 processed in 4.12 seconds.
---------------------------------------------------------------
1 passages and 3 answers.
Obligation Coverage score : 1.0
Final composite score: 0.57095
Item 5 processed in 1.45 seconds.
---------------------------------------------------------------
1637 passages and 4 answers.
Obl

In [21]:
# Play an audio beep. Any audio URL will do.
from google.colab import output
output.eval_js('new Audio("https://upload.wikimedia.org/wikipedia/commons/0/05/Beep-09.ogg").play()')

In [22]:
import json

def calculate_mean_model_score(json_file_path):
    # Open and read the JSON file
    with open(json_file_path, 'r') as file:
        data = json.load(file)

    # Extract "Model_Score" values, ensuring they are not None
    model_scores = []
    for item in data:
        if "Score" in item and item["Score"] is not None:
            model_scores.append(item["Score"])

    # Calculate the mean of "Model_Score" values
    if model_scores:
        mean_score = sum(model_scores) / len(model_scores)
        return mean_score
    else:
        return "No valid Model_Score values found"




In [23]:
# Replace 'your_json_file.json' with the path to your actual JSON file
mean_model_score = calculate_mean_model_score('/content/drive/Othercomputers/MBZUAI/MBZUAI/ADGM-Project/SharedTask/retrieval_results.passage_only_bm25_updated_subtask2_with_scores.json')
print("Mean Model_Score:", mean_model_score)

Mean Model_Score: 0.4390223999999997


In [25]:
mean_model_score = calculate_mean_model_score('/content/drive/Othercomputers/MBZUAI/MBZUAI/ADGM-Project/SharedTask/retrieval_results.hierarchical_bm25_updated_subtask2_with_scores.json')
print("Mean Model_Score:", mean_model_score)

Mean Model_Score: 0.4505117999999997
