In [10]:
import time
import os
import json
import gdown
import random
import pickle
import tarfile
import warnings
import requests
import pandas as pd
from tqdm import tqdm
from datasets import Dataset
from langchain_groq import ChatGroq
from langchain_core.documents import Document
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
from langchain_community.embeddings import HuggingFaceEmbeddings
from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy, context_relevancy
import groq 


In [None]:
GROQ_API_KEY = Groq(api_key=os.environ.get("GROQ_API_KEY"))
llm = ChatGroq(temperature=0, groq_api_key=GROQ_API_KEY, model_name="llama3-70b-8192")
embedder = HuggingFaceEmbeddings()

In [5]:
DATA_DIR = 'data'

# Adaptado do Ramon Simoes
def create_dataset():
    Context_articles = "https://iirc-dataset.s3.us-west-2.amazonaws.com/context_articles.tar.gz"
    IIRC_test = "https://drive.google.com/file/d/1hydwcbwN2-qoudoAbKIPjVruy0m8xjy2/view?usp=sharing"

    if not os.path.exists(DATA_DIR):
        os.makedirs(DATA_DIR)

    for url in [Context_articles, IIRC_test]:
        if url.startswith('https://drive.google.com'):
            filename = 'test_questions.json'
            filepath = os.path.join(DATA_DIR, filename)
            if not os.path.exists(filepath):
                print(f"Baixando {filename}...")
                gdown.download(IIRC_test, filepath, quiet=False, fuzzy=True)
                print(f"\n{filename} baixado.")
        else:
            filename = url.split('/')[-1]
            filepath = os.path.join(DATA_DIR, filename)

            if not os.path.exists(filepath):
                print(f"Baixando {filename}...")
                r = requests.get(url)
                with open(filepath, 'wb') as f:
                    f.write(r.content)
                print(f"{filename} baixado.")

            if filename.endswith('.tgz') or filename.endswith('.tar.gz'):
                with tarfile.open(filepath, 'r:gz') as tar:
                    members = [m for m in tar.getmembers() if not os.path.exists(os.path.join(DATA_DIR, m.name))]
                    if len(members) > 0:
                        print(f"Extraindo {filename}...")
                        tar.extractall(DATA_DIR, members=members)
                        print(f"{filename} extraído.")

    context_articles = json.load(open(f"{DATA_DIR}/context_articles.json", "r"))
    test_set = json.load(open(f"{DATA_DIR}/test_questions.json", "r"))
    return context_articles, test_set

# Pegado do Ramon Simoes
def format_answer(item):
    answer_type = item['answer']['type']
    if answer_type == "span":
        answer = ", ".join([answer_span['text'] for answer_span in item['answer']["answer_spans"]])

    elif answer_type == "value":
        answer = "{0} {1}".format(item['answer']['answer_value'], item['answer']['answer_unit'])

    elif answer_type == "binary":
        answer = item['answer']['answer_value']

    elif answer_type == "none":
        answer = "Not enough information"

    return answer

def remove_html_tags(text):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", MarkupResemblesLocatorWarning)
        soup = BeautifulSoup(text, "html.parser")

    return soup.get_text()

# Adaptado do Ramon Simoes
def process_data(dataset, verbose = False):
    documents = []
    for item in tqdm(dataset):
        contexts = []
        for context in item["context"]:
            c = context["text"]
            contexts.append(remove_html_tags(c))
            if verbose:
                print(f"\n {item['question']}")
                print(contexts)

        documents.append({
            "question": item["question"],
            "answer": format_answer(item),
            "context": contexts
        })

    print(f"\nQuantidade de questões: {len(documents)}")
    return documents

context_articles, test_set = create_dataset()
print(f"\nQuantidade de amostras do context_articles: {len(context_articles)}")
print(f"\nQuantidade de amostras do test_set: {len(test_set)}")

# Configuração do modelo e embedder
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
llm = ChatGroq(temperature=0, groq_api_key=GROQ_API_KEY, model_name="llama3-70b-8192")
embedder = HuggingFaceEmbeddings()

metrics = [faithfulness, answer_relevancy, context_relevancy]

documents = process_data(test_set)

# Adicionar parâmetro para selecionar apenas as primeiras N questões
N = 5  # Ajuste conforme necessário
selected_documents = documents[:N]

question = [document["question"] for document in selected_documents]
context = [document["context"] for document in selected_documents]
answer = [document["answer"] for document in selected_documents]
ground_truth = [document["answer"] for document in selected_documents]

dataset = Dataset.from_dict({"question": question, "contexts": context, "answer": answer, "ground_truth": ground_truth})

# Adaptado do Ramon Simoes
save_file = "qa_data.pickle"

if os.path.exists(save_file):
    with open(save_file, 'rb') as f:
        qa = pickle.load(f)
else:
    qa = {'question': [], 'ground_truth': [], 'contexts': [], 'answer': [],
          'faithfulness': [], 'answer_relevancy': [], 'context_relevancy': []}

last_processed_index = 0
with tqdm(total=len(dataset)) as pbar:
    while last_processed_index < len(dataset):
        q = Dataset.from_dict(dataset[last_processed_index: last_processed_index + 1])

        if q['question'] in qa['question']:
            last_processed_index += 1
            pbar.update(1)
            continue

        try:
            qa['question'].append(q['question'])
            qa['ground_truth'].append(q['ground_truth'])
            qa['contexts'].append(q['contexts'])
            qa['answer'].append(q['answer'])

            result = evaluate(q, metrics, llm=llm, embeddings=embedder)

            for r in result:
                qa[r].append(result[r])

            if len(qa['question']) % 5 == 0:
                with open(save_file, 'wb') as f:
                    pickle.dump(qa, f)

                sleep_time = random.uniform(5, 20)
                time.sleep(sleep_time)

            last_processed_index += 1
            pbar.update(1)

        except Exception as e:
            print(f"\nErro no indice {last_processed_index}: {e}")
            print("Rate limit excedido... esperando 120s")
            time.sleep(120)

with open(save_file, 'wb') as f:
    pickle.dump(qa, f)

for chave, valor in qa.items():
    quantidade = len(valor) if isinstance(valor, list) else 1
    print(f'Chave: {chave}, Quantidade de elementos: {quantidade}')

with open(save_file, 'rb') as f:
    qa = pickle.load(f)

df = pd.DataFrame(qa)
df.to_csv('resultado.csv', index=False)



Quantidade de amostras do context_articles: 56550

Quantidade de amostras do test_set: 50


100%|██████████| 50/50 [00:00<00:00, 14594.98it/s]



Quantidade de questões: 50


Evaluating: 100%|██████████| 3/3 [00:02<00:00,  1.33it/s]
Evaluating: 100%|██████████| 3/3 [01:24<00:00, 28.02s/it]
Evaluating: 100%|██████████| 3/3 [03:52<00:00, 77.59s/it]
Evaluating: 100%|██████████| 3/3 [04:36<00:00, 92.16s/it] 
Evaluating: 100%|██████████| 3/3 [05:11<00:00, 103.74s/it]
100%|██████████| 5/5 [15:15<00:00, 183.08s/it]

Chave: question, Quantidade de elementos: 5
Chave: ground_truth, Quantidade de elementos: 5
Chave: contexts, Quantidade de elementos: 5
Chave: answer, Quantidade de elementos: 5
Chave: faithfulness, Quantidade de elementos: 5
Chave: answer_relevancy, Quantidade de elementos: 5
Chave: context_relevancy, Quantidade de elementos: 5





In [6]:
df

Unnamed: 0,question,ground_truth,contexts,answer,faithfulness,answer_relevancy,context_relevancy
0,[What is Zeus know for in Greek mythology?],[sky and thunder god],"[[he Palici the sons of Zeus, in Greek mytholo...",[sky and thunder god],1.0,0.724556,1.0
1,[How long had the First World War been over wh...,[5 years],[[he became aide-de-camp to King Victor Emmanu...,[5 years],1.0,0.651312,1.0
2,[How old was Messe when the First World War st...,[30 years],"[[Messe was born in Mesagne, in the Province o...",[30 years],1.0,0.929272,1.0
3,[How long had Angela Scoular been acting profe...,[2 years],[[Angela Scoular appeared as Ruby Bartlett in ...,[2 years],0.0,0.818161,0.333333
4,[What is the capacity of the stadium where Bru...,"[26,688]",[[Brunt returned to first-team action after ei...,"[26,688]",1.0,0.679309,1.0


In [7]:
# Calcular e exibir métricas
def calculate_metrics(df):
    faithfulness_score = df['faithfulness'].mean()
    answer_relevancy_score = df['answer_relevancy'].mean()
    context_relevancy_score = df['context_relevancy'].mean()

    print(f"\nMétricas calculadas:")
    print(f"Faithfulness: {faithfulness_score}")
    print(f"Answer Relevance: {answer_relevancy_score}")
    print(f"Context Relevance: {context_relevancy_score}")

calculate_metrics(df)


Métricas calculadas:
Faithfulness: 0.8
Answer Relevance: 0.7605219240133552
Context Relevance: 0.8666666666666668
