In [None]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import DirectoryLoader
from dotenv import load_dotenv

load_dotenv()

loader = DirectoryLoader("/Users/tomasz/plan-and-execute-rag/docs/")
documents = loader.load()


# generator with openai models
generator_llm = ChatOpenAI(model="gpt-4o-mini")
critic_llm = ChatOpenAI(model="gpt-4o-mini")
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

generator = TestsetGenerator.from_langchain(
    generator_llm,
    critic_llm,
    embeddings
)

# generate testset
# testset = generator.generate_with_langchain_docs(documents, test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})


In [None]:
from datasets import load_dataset
# loading the V2 dataset
amnesty_qa = load_dataset("explodinggradients/amnesty_qa", "english_v2")
amnesty_qa

In [None]:
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
)



In [None]:
result

In [None]:
from langchain_openai.chat_models import ChatOpenAI
from dotenv import load_dotenv

from langchain_core.tools import Tool
from langchain_experimental.plan_and_execute import (
    PlanAndExecute,
    load_agent_executor,
    load_chat_planner,
)


from langchain_openai import ChatOpenAI


from langchain_core.tools import Tool

load_dotenv()




def generate_response(prompt_input):
    model = ChatOpenAI(model= "gpt-4o-mini", temperature=0.7)
    
    planner = load_chat_planner(model)
    tools = []
    
    executor = load_agent_executor(model, tools, verbose=True)
    
    agent = PlanAndExecute(planner=planner, executor=executor, verbose=True)
    response = agent.invoke({"input": prompt_input})
    return response["output"]



ds_for_parse = amnesty_qa["eval"].to_pandas()
results = []
prompts = []
for index, row in ds_for_parse.iterrows(): 
    context = row["contexts"]
    question = row["question"]
    prompt_input = f"""Answer the question: {question}
    Use the provided context to answer the question. The relevant contexts extracted from relevant documents are listed below:
    {context}"""
    prompts.append(prompt_input)
    result = generate_response(prompt_input)
    results.append(result)




In [None]:
from pandas import Series
Series(results)

In [None]:
ds_for_parse["answer"] = Series(results)

In [None]:
my_dataset = amnesty_qa["eval"].to_pandas()
my_dataset["answer"] = Series(results)

from datasets import Dataset
my_ds_for_eval = Dataset.from_pandas(my_dataset)

In [None]:
from ragas import evaluate
result = evaluate(
    my_ds_for_eval,
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall,
    ],
)

In [None]:
result