<a href="https://colab.research.google.com/github/tubagokhan/RegNLPDataset/blob/main/SubTask2PAssgeToAnswer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [11]:
!pip install openai==0.28



In [12]:
import json
import time
import openai

class GPTProcessor:
    def __init__(self, api_key):
        self.api_key = api_key

    def ask_gpt_with_passages(self, question, passages, model="gpt-3.5-turbo", max_length=1024):
        openai.api_key = self.api_key

        # Truncate passages to ensure the input is within token limits
        if len(passages) > max_length:
            passages = passages[:max_length]

        try:
            messages = [
                {"role": "system", "content": "Given the question and these passages, generate a coherent, accurate, and complete answer."},
                {"role": "system", "content": passages},
                {"role": "user", "content": question}
            ]

            response = openai.ChatCompletion.create(
                model=model,
                messages=messages
            )
            return response.choices[0].message['content'].strip()
        except openai.error.RateLimitError:
            print("Rate limit exceeded, retrying in 60 seconds...")
            time.sleep(60)
            return self.ask_gpt_with_passages(question, passages, model, max_length)
        except Exception as e:
            print(f"An error occurred: {e}")
            return "An error occurred, and no response was received."

    def process_questions(self, doc1_path, doc2_path, output_path, max_passage_length=1000):
        with open(doc1_path, 'r') as f:
            questions = json.load(f)

        with open(doc2_path, 'r') as f:
            passages = json.load(f)

        output_data = []
        question_counter = 0  # Initialize the question counter

        for question in questions:
            question_id = question["QuestionID"]
            question_text = question["Question"]
            passage = next((p["Answer"] for p in passages if p["QuestionID"] == question_id), None)

            if passage:
                generated_answer = self.ask_gpt_with_passages(question_text, passage, max_length=max_passage_length)
                output_data.append({"QuestionID": question_id, "Answer": generated_answer})
                question_counter += 1  # Increment the counter
                print(f"Processed question {question_counter}: QuestionID {question_id}")  # Optional: print progress
            else:
                print(f"No passage found for QuestionID {question_id}")

        with open(output_path, 'w') as f:
            json.dump(output_data, f, indent=4)

        print(f"Output file created at {output_path}. Total questions processed: {question_counter}")


# Example usage:
api_key = "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"  # Replace with your actual OpenAI API key
processor = GPTProcessor(api_key)
processor.process_questions("/content/drive/Othercomputers/MBZUAI/MBZUAI/ADGM-Project/SharedTask/COBS_VER15.150823_entailed_questions.json", "/content/drive/Othercomputers/MBZUAI/MBZUAI/ADGM-Project/SharedTask/retrieval_results.passage_only_bm25.json", "/content/drive/Othercomputers/MBZUAI/MBZUAI/ADGM-Project/SharedTask/retrieval_results.passage_only_bm25_subtask2.json")


Processed question 1: QuestionID 943rvf
Processed question 2: QuestionID 480fBQ
Processed question 3: QuestionID 412Gsb
Processed question 4: QuestionID 498QLc
Processed question 5: QuestionID 717Ccv
Processed question 6: QuestionID 551wbs
Processed question 7: QuestionID 444QbG
Processed question 8: QuestionID 021smm
Processed question 9: QuestionID 004VzI
Processed question 10: QuestionID 604fIU
Processed question 11: QuestionID 676pPm
Processed question 12: QuestionID 363ksV
Processed question 13: QuestionID 522OXp
Processed question 14: QuestionID 374xcL
Processed question 15: QuestionID 068FmL
Processed question 16: QuestionID 449MMK
Processed question 17: QuestionID 461TVG
Processed question 18: QuestionID 963iPV
Processed question 19: QuestionID 047Pro
Processed question 20: QuestionID 668dtU
Processed question 21: QuestionID 167EUP
Processed question 22: QuestionID 014fCP
Processed question 23: QuestionID 416Qvz
Processed question 24: QuestionID 892Nnp
Processed question 25: Qu

In [13]:
# Play an audio beep. Any audio URL will do.
from google.colab import output
output.eval_js('new Audio("https://upload.wikimedia.org/wikipedia/commons/0/05/Beep-09.ogg").play()')