In [1]:
import json
import os
import re
import json
import torch
from src.RGAR import RGAR
class QADataset:

    def __init__(self, data, dir="."):
        self.data = data.lower().split("_")[0]
        benchmark = json.load(open(os.path.join(dir, "benchmark.json")))
        if self.data not in benchmark:
            raise KeyError("{:s} not supported".format(data))
        self.dataset = benchmark[self.data]
        self.index = sorted(self.dataset.keys())

    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, key):
        if type(key) == int:
            return self.dataset[self.index[key]]
        elif type(key) == slice:
            return [self.__getitem__(i) for i in range(self.__len__())[key]]
        else:
            raise KeyError("Key type not supported.")

In [2]:
dataset_name = "medqa"
dataset = QADataset(dataset_name,dir="MIRAGE")

In [None]:
debug_idx = 3
data = dataset[debug_idx]
question = data["question"]
options = data["options"]
correct_answer = data["answer"]

# 打印调试信息
print(f"Debugging Question {debug_idx + 1}:")
print(f"Question: {question}")
print(f"Options: {options}")
print(f"Correct Answer: {correct_answer}")

In [None]:
torch.cuda.empty_cache()
gar = RGAR(
        llm_name="meta-llama/Llama-3.2-3B-Instruct", 
        retriever_name="MedCPT", 
        corpus_name="Textbooks", 
        device="cuda:0",
        cot=False,
        rag=True,
        me=1
    )
gar.answer(question, options)

### How to combine different retrieval results

In [5]:
import os
import transformers
import torch
import re
import ast

def extract_factual_info(question):
    # system_prompt = '''You are a helpful assistant that extracts key factual information from text. 
    # Please identify the most relevant pieces of information for solving the problem and present them as a list. 
    # Each item in the list should be a concise description of a key detail, formatted as:
    # ["key detail 1", "key detail 2", ..., "key detail N"].'''

    #Each option should be a concise description of a key detail, formatted as:A. "key detail 1" B. "key detail 2" C. "key detail 3" D. "key detail 4
    prompt = '''Please give 4 options for the question. This is used to assess the students' knowledge level."'''
    messages = [
        # {"role": "system", "content": system_prompt},
        {"role": "user", "content": question + "\n" + prompt},
    ]

    ans = gar.generate(messages)
    answers = []
    answers.append(re.sub("\s+", " ", ans))
    answers = answers[0]

    print(f"Generated Answer: {answers}")
    
    matched_items = re.findall(r'"([^"]*)"', answers)

    if matched_items:
        print(f"extract: {matched_items}")

        return matched_items,answers
    else:
        print("no match found")
        return [],answers

def split_sentences(text):
    text = text.rstrip('"').strip()

    pattern = r'(.*?[.!?。\n])' 
    sentences = re.findall(pattern, text, re.DOTALL) 

    if not sentences:  
        return 0, "", ""

    last_sentence = sentences[-1].strip()
    other_sentences = "".join(sentences[:-1]).strip()  

    return len(sentences), other_sentences, last_sentence

def retrieve_me(question,options="",k=32):
    
    num_sentences, other_sentences, last_sentence = split_sentences(question)
    half_k = k // 2
    all_retrieved_snippets = []
    all_scores = []
    parsed_list,original_answers = extract_factual_info(question)
    retrieved_snippets, scores = gar.retrieval_system.retrieve(question, k=half_k, rrf_k=100)
    all_retrieved_snippets.extend(retrieved_snippets)  
    all_scores.extend(scores) 
    if len(parsed_list) < 2:
        retrieved_snippets, scores = gar.retrieval_system.retrieve(original_answers+last_sentence, k=half_k, rrf_k=100)
        all_retrieved_snippets.extend(retrieved_snippets)  
        all_scores.extend(scores) 
    else:
        queries_k = half_k // len(parsed_list)
        for elem in parsed_list:
            retrieved_snippets, scores = gar.retrieval_system.retrieve(elem+last_sentence, k=queries_k, rrf_k=100)
            all_retrieved_snippets.extend(retrieved_snippets)
            all_scores.extend(scores)
    return all_retrieved_snippets,all_scores

queries,scores = retrieve_me(question)

print(f"Final Result: {queries}")

Generated Answer: Based on the provided information, here are four possible answers: Option 1: Streptococcus pneumoniae The description of the phosphorylated N-acetylglucosamine dimer with 6 fatty acids attached to a polysaccharide side chain suggests Neisseria meningitidis or possibly other gram-negative bacteria but more specifically it points towards pneumococcal capsular polysaccharides. However, given that there was no mention of symptoms like headache, neck stiffness, confusion etc., this seems less likely in comparison to another option listed below. Option 2: Haemophilus influenzae type b (Hib) Haemophilus influenzae type b has been known to cause severe invasive disease including bacteremia and sepsis especially in children under five years old; however, adults can also be affected by Hib infections although they're much rarer than in younger populations. Given the clinical presentation described above, an adult case would not typically present as acute bacterial meningitis bu

### GAR without training, only with prompt words

In [6]:
def generate_possible_content(question):
    # system_prompt = '''You are a helpful assistant that extracts key factual information from text. 
    # Please identify the most relevant pieces of information for solving the problem and present them as a list. 
    # Each item in the list should be a concise description of a key detail, formatted as:
    # ["key detail 1", "key detail 2", ..., "key detail N"].'''

    prompt = '''Please generate some knowledge that might address the above question. please give me only the knowledge.'''

    messages = [
        # {"role": "system", "content": system_prompt},
        {"role": "user", "content": question + "\n" + prompt},
    ]

    ans = gar.generate(messages)
    answers = []
    answers.append(re.sub("\s+", " ", ans))
    answers = answers[0]

    print(f"Generated Answer: {answers}")
    return answers
def generate_possible_title(question):
    # system_prompt = '''You are a helpful assistant that extracts key factual information from text. 
    # Please identify the most relevant pieces of information for solving the problem and present them as a list. 
    # Each item in the list should be a concise description of a key detail, formatted as:
    # ["key detail 1", "key detail 2", ..., "key detail N"].'''

    prompt = '''Please generate some titles of references that might address the above question. Please give me only the titles, formatted as: ["title 1", "title 2", ..., "title N"]. Please be careful not to give specific content and analysis, just the title.'''

    messages = [
        # {"role": "system", "content": system_prompt},
        {"role": "user", "content": question + "\n" + prompt},
    ]

    ans = gar.generate(messages)
    answers = []
    answers.append(re.sub("\s+", " ", ans))
    answers = answers[0]

    print(f"Generated Answer: {answers}")
    return answers
def generate_possible_answer(question):
    # system_prompt = '''You are a helpful assistant that extracts key factual information from text. 
    # Please identify the most relevant pieces of information for solving the problem and present them as a list. 
    # Each item in the list should be a concise description of a key detail, formatted as:
    # ["key detail 1", "key detail 2", ..., "key detail N"].'''

    prompt = '''Please give 4 options for the question. '''

    messages = [
        # {"role": "system", "content": system_prompt},
        {"role": "user", "content": question + "\n" + prompt},
    ]

    ans = gar.generate(messages)
    answers = []
    answers.append(re.sub("\s+", " ", ans))
    answers = answers[0]

    print(f"Generated Answer: {answers}")
    return answers

In [None]:
generate_possible_content(question)
generate_possible_title(question)
generate_possible_answer(question)