In [None]:
import os
import dotenv
from datasets import load_dataset
import pandas as pd
from zeno_client import ZenoClient, ZenoMetric
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
)
from ragas import evaluate

dotenv.load_dotenv(override=True)

In [None]:
fiqa_eval = load_dataset("explodinggradients/fiqa", "ragas_eval")
fiqa_eval

In [None]:
result = evaluate(
    fiqa_eval["baseline"],
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall,
    ],
)

result

In [None]:
df = result.to_pandas()
df.head()

In [None]:
client = ZenoClient(os.environ["ZENO_API_KEY"])

In [None]:
project = client.create_project(
    name="ragas FICA eval",
    description="Evaluation of RAG model using ragas on FICA dataset",
    public=True,
    view={
        "data": {
            "type": "vstack",
            "keys": {
                "question": {"type": "markdown"},
                "texts": {
                    "type": "list",
                    "elements": {"type": "markdown"},
                    "border": True,
                    "pad": True,
                },
            },
        },
        "label": {
            "type": "markdown",
        },
        "output": {
            "type": "vstack",
            "keys": {
                "answer": {"type": "markdown"},
                "ground_truths": {
                    "type": "list",
                    "elements": {"type": "markdown"},
                    "border": True,
                    "pad": True,
                },
            },
        },
        "size": "large",
    },
    metrics=[
        ZenoMetric(
            name="context_precision", type="mean", columns=["context_precision"]
        ),
        ZenoMetric(name="faithfulness", type="mean", columns=["faithfulness"]),
        ZenoMetric(name="answer_relevancy", type="mean", columns=["answer_relevancy"]),
        ZenoMetric(name="context_recall", type="mean", columns=["context_recall"]),
    ],
)

In [None]:
data_df = pd.DataFrame(
    {
        "data": df.apply(
            lambda x: {"question": x["question"], "texts": list(x["contexts"])}, axis=1
        ),
        "label": df["ground_truths"].apply(lambda x: "\n".join(x)),
    }
)
data_df["id"] = data_df.index

In [None]:
project.upload_dataset(
    data_df, id_column="id", data_column="data", label_column="label"
)

In [None]:
output_df = df[
    [
        "context_precision",
        "faithfulness",
        "answer_relevancy",
        "context_recall",
    ]
].copy()
output_df["output"] = df.apply(
    lambda x: {"answer": x["answer"], "ground_truths": list(x["ground_truths"])}, axis=1
)
output_df["id"] = output_df.index

In [None]:
project.upload_system(
    output_df, name="Base System", id_column="id", output_column="output"
)