# mediRAG Pipeline

In [1]:
import os
import torch
import numpy as np
from prompts import *
import evaluate
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
from langchain.docstore.document import Document
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.chains import LLMChain
from langchain.schema.runnable import RunnablePassthrough

In [2]:
torch.cuda.set_device(0)  # have to change depending on which device u wana use
torch.cuda.current_device()

0

## Load Model and Tokenizer

In [3]:
model_name='mistralai/Mistral-7B-Instruct-v0.1'

tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir="models")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=False,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    cache_dir="models"
)

bin c:\Users\ptejd\anaconda3\envs\medrag3\lib\site-packages\bitsandbytes\libbitsandbytes_cuda118.dll


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

# Load Data

In [4]:
dataset = load_dataset("bigbio/pubmed_qa", cache_dir="data")

dataset

DatasetDict({
    train: Dataset({
        features: ['QUESTION', 'CONTEXTS', 'LABELS', 'MESHES', 'YEAR', 'reasoning_required_pred', 'reasoning_free_pred', 'final_decision', 'LONG_ANSWER'],
        num_rows: 200000
    })
    validation: Dataset({
        features: ['QUESTION', 'CONTEXTS', 'LABELS', 'MESHES', 'YEAR', 'reasoning_required_pred', 'reasoning_free_pred', 'final_decision', 'LONG_ANSWER'],
        num_rows: 11269
    })
})

In [5]:
page_content_column = "CONTEXTS"

def preprocess(dataset):
    for split in dataset.keys():
        for contexts in dataset[split][page_content_column]:
            for sentence in contexts:
                yield Document(page_content=sentence)

data = list(preprocess(dataset))  # 655055

data[0]

Document(page_content='In previous work we (Fisher et al., 2011) examined the emergence of neurobehavioral disinhibition (ND) in adolescents with prenatal substance exposure. We computed ND factor scores at three age points (8/9, 11 and 13/14 years) and found that both prenatal substance exposure and early adversity predicted ND. The purpose of the current study was to determine the association between these ND scores and initiation of substance use between ages 8 and 16 in this cohort as early initiation of substance use has been related to later substance use disorders. Our hypothesis was that prenatal cocaine exposure predisposes the child to ND, which, in turn, is associated with initiation of substance use by age 16.')

## Setting up FAISS

In [6]:
embedding_model = "BAAI/bge-large-en-v1.5"
model_kwargs = {'device':'cuda'}
encode_kwargs = {'normalize_embeddings': False}

# Initialize an instance of HuggingFaceEmbeddings with the specified parameters
embeddings = HuggingFaceEmbeddings(
    model_name=embedding_model,   
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs, 
    cache_folder="models"
)

if os.path.exists("faiss_index_pubmed"):
    db = FAISS.load_local("faiss_index_pubmed", embeddings)
else:
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=128)
    docs = text_splitter.split_documents(data)  # 676307

    db = FAISS.from_documents(docs, embeddings)
    db.save_local("faiss_index_pubmed")

In [7]:
question = dataset['train']["QUESTION"][0]
context = dataset['train']["CONTEXTS"][0]

retrieved_docs = db.similarity_search(question)  # db.similarity_search_with_score(question)

print(f"Question:\n{question}")
print(f"\nContext:\n{context}")
print(f"\nRetrieved document:\n{retrieved_docs[0].page_content}\n{retrieved_docs[1].page_content}\n{retrieved_docs[2].page_content}")

Question:
Does neurobehavioral disinhibition predict initiation of substance use in children with prenatal cocaine exposure?

Context:
['In previous work we (Fisher et al., 2011) examined the emergence of neurobehavioral disinhibition (ND) in adolescents with prenatal substance exposure. We computed ND factor scores at three age points (8/9, 11 and 13/14 years) and found that both prenatal substance exposure and early adversity predicted ND. The purpose of the current study was to determine the association between these ND scores and initiation of substance use between ages 8 and 16 in this cohort as early initiation of substance use has been related to later substance use disorders. Our hypothesis was that prenatal cocaine exposure predisposes the child to ND, which, in turn, is associated with initiation of substance use by age 16.', "We studied 386 cocaine exposed and 517 unexposed children followed since birth in a longitudinal study. Five dichotomous variables were computed based 

## Initializing Pipeline


In [8]:
text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    max_new_tokens=300,
    do_sample=False,
)

mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

# Create prompt from prompt template 
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=PROMPT_TEMPLATE_QA_ANSWER,
)

# Create llm chain 
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)

## Running Queries

In [9]:
idx = 0

questions = dataset['train'][2:12]["QUESTION"]
contexts = dataset['train'][2:12]["CONTEXTS"]
long_answers = dataset['train'][2:12]["LONG_ANSWER"]
final_decisions = dataset['train'][2:12]["final_decision"]

## QA without Retrieval

In [10]:
pred_no_ret = []
for question in questions:
    # question = f'{question}. ALong with the answer, Explicitly state where the answer to the question is yes, no or maybe'
    print("Question: ", question)
    input_ids = tokenizer.encode(question, return_tensors="pt").to("cuda")

    with torch.no_grad():
        output = model.generate(
            input_ids=input_ids,
            do_sample=False,
            return_dict_in_generate=True,
            max_new_tokens=300,
        )

    output = tokenizer.decode(output.sequences[0][len(input_ids[0]):])
    print("Generated Answer: ", output)

    pred_no_ret.append(output)
# output

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Question:  Is heart failure with preserved ejection fraction characterized by dynamic impairment of active relaxation and contraction of the left ventricle on exercise and associated with myocardial energy deficiency?


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer:  

## Abstract

Heart failure with preserved ejection fraction (HFpEF) is a common form of heart failure characterized by impaired left ventricular (LV) diastolic function and normal or near-normal LV ejection fraction. The pathophysiology of HFpEF is complex and involves both structural and functional changes in the heart. Recent studies have shown that HFpEF is associated with dynamic impairment of active relaxation and contraction of the LV on exercise, which may be related to myocardial energy deficiency. This review summarizes the current evidence on the dynamic impairment of LV function in HFpEF and its association with myocardial energy deficiency.

## Introduction

Heart failure with preserved ejection fraction (HFpEF) is a common form of heart failure characterized by impaired left ventricular (LV) diastolic function and normal or near-normal LV ejection fraction. HFpEF is estimated to affect 50% of all patients with heart failure, and its prevalence is incre

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer:  
A: False</s>
Question:  Does obvious emphysema on computed tomography during an acute exacerbation of chronic obstructive pulmonary disease predict a poor prognosis?


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer:  

## Abstract

Background: The prognosis of acute exacerbation of chronic obstructive pulmonary disease (AECOPD) is poor, and the presence of emphysema on computed tomography (CT) is associated with a poor prognosis. However, the prognosis of AECOPD with emphysema is not well understood.

Methods: This study was conducted to investigate the prognosis of AECOPD with emphysema.

Results: A total of 100 patients with AECOPD were enrolled in this study. The patients were divided into two groups: the emphysema group (n=50) and the non-emphysema group (n=50). The emphysema group had a higher mortality rate (p=0.001) and a higher incidence of hospitalization (p=0.001) than the non-emphysema group. The emphysema group also had a higher incidence of exacerbation (p=0.001) and a higher incidence of pneumonia (p=0.001) than the non-emphysema group. The emphysema group had a higher incidence of hospitalization due to exacerbation (p=0.001) and a higher inc
Question:  Are chemoki

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer:  

Abstract:

Background: HIV-1 infection is associated with an altered chemokine expression pattern in the genital tract. However, little is known about the chemokine expression patterns in the systemic compartment of HIV-1-infected women.

Methods: We compared chemokine expression patterns in the genital tract and systemic compartment of HIV-1-infected women from Benin. We used quantitative real-time PCR to measure the expression of 12 chemokines in the genital tract and systemic compartment of 10 HIV-1-infected women and 10 HIV-1-negative women.

Results: We found that HIV-1 infection was associated with an altered chemokine expression pattern in the genital tract and systemic compartment of women from Benin. In the genital tract, HIV-1 infection was associated with an increased expression of CXCL10, CXCL12, CXCL13, CXCL16, CXCL22, CXCL23, CXCL24, CXCL25, CXCL26, CXCL27, CXCL28, and CXCL30. In the systemic compartment, HIV-1 infection was associated with an increas

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer:  
A: Yes</s>
Question:  Is dNA methylation of genes linked with retinoid signaling in gastric carcinoma : expression of the retinoid acid receptor beta , cellular retinol-binding protein 1 , and tazarotene-induced gene 1 genes associated with DNA methylation?


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer:  

Abstract:

Retinoid signaling plays an important role in the development and progression of gastric cancer. However, the relationship between retinoid signaling and DNA methylation in gastric cancer is not well understood. In this study, we analyzed the expression of retinoid acid receptor beta (RARβ), cellular retinol-binding protein 1 (CRBP1), and tazarotene-induced gene 1 (TIG1) genes in gastric cancer tissues and their association with DNA methylation. We found that the expression of RARβ, CRBP1, and TIG1 genes was significantly reduced in gastric cancer tissues compared with normal tissues. Furthermore, we found that the expression of RARβ, CRBP1, and TIG1 genes was negatively correlated with DNA methylation of their promoter regions. These results suggest that DNA methylation of genes associated with retinoid signaling may play a role in the development and progression of gastric cancer.

Introduction:

Retinoid signaling plays an important role in the develo

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer:  

Abstract:

Background: Antiangiogenic therapy has been shown to be effective in treating cerebral melanoma metastases. However, the long-term efficacy of antiangiogenic therapy is limited by the development of resistance. One possible mechanism of resistance is vessel co-option, which allows tumors to continue growing by recruiting new vessels from surrounding normal tissue.

Methods: We analyzed the clinical and imaging data of 12 patients with cerebral melanoma metastases who received antiangiogenic therapy. We assessed the response of the tumors to antiangiogenic therapy using magnetic resonance imaging (MRI) and computed tomography (CT) scans. We also evaluated the development of vessel co-option by analyzing the microvascular density (MVD) of the tumors before and after antiangiogenic therapy.

Results: Antiangiogenic therapy resulted in a significant reduction in tumor size in all patients. However, in 5 patients, the tumors continued to grow despite the redu

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer:  

Abstract:

Background: Clove and eugenol are two of the most widely used natural compounds in the world. They have been shown to have a wide range of biological activities, including antioxidant, antiviral, and anti-inflammatory effects. However, the immunomodulatory effects of clove and eugenol are not well understood.

Methods: In this study, we investigated the immunomodulatory/anti-inflammatory effects of clove and eugenol on cytokine production by murine macrophages. We used a murine macrophage cell line (RAW 264.7) and cultured the cells in the presence of clove or eugenol for 24 hours. We then measured the levels of pro-inflammatory cytokines (TNF-α, IL-6, and IL-1β) and anti-inflammatory cytokines (IL-10) in the supernatants using enzyme-linked immunosorbent assay (ELISA).

Results: Our results showed that both clove and eugenol significantly reduced the levels of pro-inflammatory cytokines (TNF-α, IL-6, and IL-1β) and increased the levels of anti-infl
Ques

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer:  












































































































































































































































































































Question:  Does lipidomics reveal multiple pathway effects of a multi-components preparation on lipid biochemistry in ApoE*3Leiden.CETP mice?
Generated Answer:  

Abstract:

Background:

ApoE*3Leiden.CETP mice are a well-established model for studying the effects of ApoE4 on lipid metabolism. However, the effects of a multi-component preparation on lipid biochemistry in this model are not well understood.

Methods:

We used lipidomics to investigate the effects of a multi-component preparation on lipid biochemistry in ApoE*3Leiden.CETP mice. The multi-component preparation consisted of a combination of omega-3 fatty acids, vitamin D, and coenzyme Q10.

Results:

Our resul

In [11]:
def find_citations(predictions):
    finalpred = []
    for output in predictions:
        output_with_citations = ""
        citations = ""
        citation_list = []

        for lines in output.split("\n"):
            lines = lines.strip()
            if len(lines.split(" ")) > 10:
                for line in lines.split("."):
                    line = line.strip()
                    docs_and_scores = db.similarity_search_with_score(line)[0]  # choosing top 1 relevant document
                    if docs_and_scores[1] < 0.5:  # returned distance score is L2 distance, a lower score is better
                        doc_content = docs_and_scores[0].page_content
                        if doc_content in citation_list:
                            idx = citation_list.index(doc_content)

                        else:
                            citation_list.append(doc_content)
                            idx = len(citation_list)
                            citations += f"[{idx}] {doc_content}\n"

                        output_with_citations += line + f" [{idx}]. "

        final_output_with_citations = output_with_citations + "\n\nCitations:\n" + citations
        finalpred.append(final_output_with_citations)
    return finalpred

final_pred_citations = find_citations(pred_no_ret)

In [12]:
print(final_pred_citations[0])

Heart failure with preserved ejection fraction (HFpEF) is a common form of heart failure characterized by impaired left ventricular (LV) diastolic function and normal or near-normal LV ejection fraction [1]. The pathophysiology of HFpEF is complex and involves both structural and functional changes in the heart [2]. Recent studies have shown that HFpEF is associated with dynamic impairment of active relaxation and contraction of the LV on exercise, which may be related to myocardial energy deficiency [3]. This review summarizes the current evidence on the dynamic impairment of LV function in HFpEF and its association with myocardial energy deficiency [1]. Heart failure with preserved ejection fraction (HFpEF) is a common form of heart failure characterized by impaired left ventricular (LV) diastolic function and normal or near-normal LV ejection fraction [0]. HFpEF is estimated to affect 50% of all patients with heart failure, and its prevalence is increasing with age [4]. The pathophy

## QA with Retrieval

In [13]:
pred_ret = []
for question in questions:
    print("Question: ", question)
    retriever = db.as_retriever(
        search_type="similarity",
        search_kwargs={'k': 3}
    )

    # retriever = db.as_retriever(search_type="similarity_score_threshold", 
    #                                  search_kwargs={"score_threshold": .5, 
    #                                                 "k": top_k})

    rag_chain = ({"context": retriever, "question": RunnablePassthrough()} | llm_chain)

    # QA with retrieval
    qa_retrieval_result = rag_chain.invoke(question)
    print("Generated Answer: ", qa_retrieval_result['text'])
    pred_ret.append(qa_retrieval_result["text"])

Question:  Is heart failure with preserved ejection fraction characterized by dynamic impairment of active relaxation and contraction of the left ventricle on exercise and associated with myocardial energy deficiency?


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer:   Heart failure with preserved ejection fraction (HFpEF) is characterized by dynamic impairment of active relaxation and contraction of the left ventricle on exercise and associated with myocardial energy deficiency. This is because nearly half of patients with heart failure have a preserved ejection fraction (HFpEF). Symptoms of exercise intolerance and dyspnea are most often attributed to diastolic dysfunction; however, impaired systolic and/or arterial vasodilator reserve under stress could also play an important role. To investigate the associations between glucose metabolism, left ventricular (LV) contractile reserve, and exercise capacity in patients with chronic systolic heart failure (HF), researchers sought to evaluate the role of exercise-related changes in LV relaxation and of LV contractile function and vasculoventricular coupling (VVC) in the pathophysiology of HFpEF and to assess myocardial energetic status in these patients.

The Answer is Yes.
Question

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer:   The provided context discusses the role of soluble (Pro) Renin Receptor (s(Pro)RR) as a biomarker for gestational diabetes mellitus (GDM). The study aimed to determine the role of s(Pro)RR in predicting GDM. However, the context does not provide a definitive answer to the question. Therefore, the answer is "Maybe".
Question:  Does obvious emphysema on computed tomography during an acute exacerbation of chronic obstructive pulmonary disease predict a poor prognosis?


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer:   The provided context discusses the relationship between emphysematous changes on computed tomography (CT) during an acute exacerbation of chronic obstructive pulmonary disease (AECOPD) and prognosis. The study aims to investigate this relationship in more detail.

The study acquired CT images for 106 patients who visited the emergency department for an AECOPD. Emphysematous change was quantified by measuring the percentage of low-attenuation areas (LAA%) in the entire lung on CT images with a threshold of -950 Hounsfield units. Other factors that could influence AECOPD prognosis were also recorded on admission and analyzed.

At follow-ups conducted in 1 year, patient survival, the modified Medical Research Council (mMRC) Dyspnoea Scale, and performance status (PS) were evaluated, and a COPD Assessment Test (CAT) was completed.

However, the provided context does not explicitly state whether obvious emphysema on CT during an AECOPD predicts a poor prognosis. Therefor

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer:   The provided information includes studies that compare chemokine expression patterns in the systemic and genital tract compartments between HIV-1-infected and HIV-1-uninfected women from Benin. However, the information does not provide a direct association between chemokine expression patterns and HIV-1 infection in women from Benin. Therefore, the answer is "Maybe".
Question:  Does [ Pathological observation of lung injury in experimental animals induced by non-ferrous metal ( tin ) dust ]?


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer:   The provided context discusses the pathological observation of lung injury in experimental animals induced by tin dust. The animals were exposed to tin dusts from smelting and refining workshops, and the pathological changes in their lungs were observed dynamically. The pathological changes observed in the lungs of the rats included scattered hoar tip size of the spots in surface and section of the lungs, scattered focal granulomatous inflammation around the small bronchi and dust particles in lung tissue, granulomatous inflammation increase, fibroblasts proliferation, collagen fibers formation, and positive VG staining. These pathological changes were found to be significantly different from positive or negative controls. The study aimed to explore the characters of lung injury induced by tin dusts and to provide the diagnosis evidence of tin pneumoconiosis.

The Answer is Yes.
Question:  Is dNA methylation of genes linked with retinoid signaling in gastric carcino

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer:   The provided context discusses the association of DNA methylation with the retinoid signaling genes in gastric carcinoma (GC). The study analyzed the methylation status of the retinoid acid receptor beta (RARbeta), cellular retinol-binding protein 1 (CRBP1), and tazarotene-induced gene 1 (TIG1) genes in 42 samples of GC from 42 patients and in 8 GC cell lines. The study found that in 7 of 8 GC cell lines, the CRBP1 gene was hypermethylated, and CRBP1 transcription was inactive. In 6 of 8 GC cell lines, the TIG1 gene was hypermethylated, and TIG1 transcription was inactive. Treatment with demethylating agent 5-aza-2'-deoxycytidine restored both CRBP1 and TIG1 transcription. DNA methylation of the RARbeta, CRBP1, and TIG1 genes was detected in 15 of 42 GC samples (36%), 14 of 42 GC samples (33%), and 4 of 42 GC samples (10%), respectively, and in 6 of 30 samples (20%), 0 of 30 samples (0%), and 1 of
Question:  Does antiangiogenic therapy of cerebral melanoma metastase

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer:   The provided context discusses the effectiveness of existing therapies on cerebral and extracerebral melanoma metastases, as well as the potential role of the brain environment on tumor progression and drug resistance. It also mentions the hyperactivation of the PI3K-AKT survival pathway in brain but not extracerebral melanoma metastases and the ability of astrocyte-conditioned medium to activate AKT in melanoma cells in vitro. However, it does not provide any information about the use of antiangiogenic therapy on cerebral melanoma metastases or the possibility of sustained tumor progression via vessel co-option.

Therefore, the answer to the question cannot be determined based on the provided information.
Question:  Do clove and eugenol in noncytotoxic concentrations exert immunomodulatory/anti-inflammatory action on cytokine production by murine macrophages?


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer:   Based on the provided information, clove and eugenol in noncytotoxic concentrations exert immunomodulatory/anti-inflammatory action on cytokine production by murine macrophages. 

The study found that clove (100µg/well) inhibited IL-1β, IL-6 and IL-10 production and exerted an efficient action either before or after LPS challenge for all cytokines. Eugenol did not affect IL-1β production but inhibited IL-6 and IL-10 production. The action of eugenol (50 or 100µg/well) on IL-6 production prevented efficiently effects of LPS either before or after its addition, whereas on IL-10 production it counteracted significantly LPS action when added after LPS incubation. 

Therefore, the answer is "Yes".
Question:  Does pretreatment with stellate ganglion blockade before ischemia reduce infarct size in rat hearts?


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer:   The provided context discusses an experimental study that investigated the role of stellate ganglion blockade (SGB) in cardio-protection against ischemia reperfusion injury in rats. The study was carried out between August and October 2008 in the Department of Anesthesia, Abant Izzet Baysal University, Bolu, Turkey. The study randomly divided 21 rats into three groups: group 1 (SGB group), group 2 (preconditioned group), and group 3 (control group). The SGB group received percutaneous ganglion blockade, the preconditioned group was subjected to ischemia and then reperfusion periods for 5 minutes, and the control group was injected with normal saline. The study investigated whether Sema3a overexpression within the left stellate ganglion (LSG) confers an antiarrhythmic effect after myocardial infarction (MI) through decreasing extra- and intra-cardiac neural remodelling. However, the provided context does not mention anything about the effect of pretreatment with SGB 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer:   The provided context discusses the effects of a preparation called SUB885C on lipid biochemistry in ApoE*3Leiden.CETP mice. Lipidomics is mentioned as a tool to assess the effects of this preparation on lipid biochemistry. However, the context does not provide enough information to determine if lipidomics reveals multiple pathway effects of the multi-component preparation on lipid biochemistry in ApoE*3Leiden.CETP mice.

Therefore, the answer is "Maybe".


# Evaluation

In [14]:
bleu = evaluate.load("bleu", cache_dir="evaluation_metrics")  # value ranges from 0 to 1. score of 1 is better

bleu_score = bleu.compute(predictions=pred_no_ret, references=long_answers)
print(f"Vanilla QA: BLEU Score: {bleu_score}")

bleu_score = bleu.compute(predictions=final_pred_citations, references=long_answers)
print(f"QA with Citations: BLEU Score: {bleu_score}")

bleu_score = bleu.compute(predictions=pred_ret, references=long_answers)
print(f"QA with Retrieval: BLEU Score: {bleu_score}")

Vanilla QA: BLEU Score: {'bleu': 0.026240764988110548, 'precisions': [0.1092375366568915, 0.03099630996309963, 0.015601783060921248, 0.008975317875841436], 'brevity_penalty': 1.0, 'length_ratio': 3.3679012345679014, 'translation_length': 1364, 'reference_length': 405}
QA with Citations: BLEU Score: {'bleu': 0.01103251169790062, 'precisions': [0.055475296157180005, 0.015937409446537234, 0.006393490264458007, 0.0026208503203261502], 'brevity_penalty': 1.0, 'length_ratio': 8.54567901234568, 'translation_length': 3461, 'reference_length': 405}
QA with Retrieval: BLEU Score: {'bleu': 0.031105619821945752, 'precisions': [0.14513677811550152, 0.045941807044410414, 0.020061728395061727, 0.006998444790046656], 'brevity_penalty': 1.0, 'length_ratio': 3.2493827160493827, 'translation_length': 1316, 'reference_length': 405}


In [15]:
import numpy as np

bertscore = evaluate.load("bertscore", cache_dir="evaluation_metrics")

bert_score = bertscore.compute(predictions=pred_no_ret, references=long_answers , lang="en", batch_size =1)
bert_score = {key: np.mean(value) if key!= "hashcode" else value for key, value in bert_score.items()}
print(f"Vanilla QA: BERTScore: {bert_score}")

bert_score = bertscore.compute(predictions=final_pred_citations, references=long_answers , lang="en" , batch_size =1)
bert_score = {key: np.mean(value) if key!= "hashcode" else value for key, value in bert_score.items()}
print(f"QA with Citations: BERTScore: {bert_score}")

bert_score = bertscore.compute(predictions=pred_ret, references=long_answers , lang="en", batch_size =1 )
bert_score = {key: np.mean(value) if key!= "hashcode" else value for key, value in bert_score.items()}
print(f"QA with Retrieval: BERTScore: {bert_score}")

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Vanilla QA: BERTScore: {'precision': 0.7230669915676117, 'recall': 0.7510222315788269, 'f1': 0.736576783657074, 'hashcode': 'roberta-large_L17_no-idf_version=0.3.12(hug_trans=4.35.2)'}
QA with Citations: BERTScore: {'precision': 0.7833699464797974, 'recall': 0.8281284153461457, 'f1': 0.8046529650688171, 'hashcode': 'roberta-large_L17_no-idf_version=0.3.12(hug_trans=4.35.2)'}
QA with Retrieval: BERTScore: {'precision': 0.8313113331794739, 'recall': 0.8723351120948791, 'f1': 0.8508518457412719, 'hashcode': 'roberta-large_L17_no-idf_version=0.3.12(hug_trans=4.35.2)'}


In [16]:
def acc_calc_final(predictions, references):
    acc = 0
    for i in range(len(predictions)):
        # print(references[i].lower(), predictions[i].lower())
        if references[i].lower() in predictions[i].lower():
            acc += 1
    return acc / len(predictions)

acc = acc_calc_final(predictions=pred_no_ret, references=final_decisions)
print(f"Vanilla QA: acc: {acc}")

acc = acc_calc_final(predictions=final_pred_citations, references=final_decisions)
print(f"QA with Citations: acc: {acc}")

acc = acc_calc_final(predictions=pred_ret, references=final_decisions)
print(f"QA with Retrieval: acc: {acc}")


Vanilla QA: acc: 0.1
QA with Citations: acc: 0.0
QA with Retrieval: acc: 0.3
