In [1]:
import dataclasses
import json
from dataclasses import dataclass
from pathlib import Path

import ragas
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import answer_correctness, faithfulness

from agents.basic_agents import BasicAgent
from agents.rag_agents import RagAgent

  from .autonotebook import tqdm as notebook_tqdm


this notebook contains code to try to implement an agent that evaluates the rag pipeline

starting with basic rag

In [48]:
from synthetic_data_generation.generate_syn_data import csv_to_list
from config.project_paths import synthetic_data_dir
import random
qna_dir = list(synthetic_data_dir.glob('*.csv'))
qnas = csv_to_list(qna_dir[0])
qnas = random.sample(qnas, 5)

In [3]:
from agents.evaluator_agents import RAGResponse

In [54]:
rag_agent = RagAgent(config='stdout')

results = []
for question, answer in qnas:
    response = rag_agent.answer_with_rag(question)
    results.append(response)

In [57]:
class EvaluationAgent(RagAgent):
    def evaluate_rag(self, rag_responses: list[RAGResponse], ground_truths: list[str]):
        
        
        data = {
            'question': [i.question for i in rag_responses],
            'answer': [i.answer.content for i in rag_responses],
            'contexts': [[doc.page_content for doc in response.documents] for response in rag_responses],
            'ground_truth': ground_truths
        }
        
        dataset = Dataset.from_dict(data)

        score = evaluate(dataset,
                        #  metrics=[faithfulness, answer_correctness],
                         llm=self.llm,
                         embeddings=self.retriever.embedding_function,
                         )
        return score.to_pandas()

In [52]:
eval_agent = EvaluationAgent(config='stdout')

In [58]:
eval_result = eval_agent.evaluate_rag(results, [i[1] for i in qnas])

Evaluating:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating: 100%|██████████| 10/10 [00:09<00:00,  1.09it/s]


In [45]:
from utilities import print_long_text

print_long_text(eval_result['answer'][0])

Accessibility signals are notifications that indicate the presence of certain markers on the current line, such as
announcements and are used to aid visually impaired or screen reader users. The availability and configuration of these
signals can be managed through settings like `accessibility.signals.*`. They are not related to color blindness or
complementary colors.


In [46]:
print_long_text(eval_result['ground_truth'][0])

text regions or inline suggestions.


In [63]:
from config.project_paths import project_root
eval_result.to_html(project_root / 'eval_test.html')