<a href="https://colab.research.google.com/github/winterForestStump/thesis/blob/main/evaluation/argilla_notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
%pip install argilla -U
%pip install datasets

In [2]:
import argilla as rg

rg.init(
    api_url="https://winterForestStump-thesis.hf.space",
    api_key="admin.apikey",
    workspace="admin")

In [3]:
from datasets import Dataset
import pandas as pd

# Adding datasets

In [17]:
ds=pd.read_csv('https://raw.githubusercontent.com/winterForestStump/thesis/main/evaluation/bge-reranker_x_phi3-4k/csv/eval_3M%20CO.json.csv')
data_set = Dataset.from_pandas(ds)
data_set

Dataset({
    features: ['Unnamed: 0', 'question', 'company', 'response', 'context', 'retrieval_grade', 'hallucination_grade', 'answer_grade'],
    num_rows: 35
})

In [18]:
feedback_dataset = rg.FeedbackDataset(
    guidelines="Grade the retrieval and generation results of the RAG chain",
    fields=[
        rg.TextField(name="query", title="User's question (query)"),
        rg.TextField(name="retrieved_document", title="Retrieved by the RAG chain documents (context)"),
        rg.TextField(name="generated_response", title="Generated LLM response (answer) to the question"),
    ],
    questions=[
        rg.LabelQuestion(
            name="relevancy",
            title="Are the retrieved documents relevant to the given question?",
            labels=["YES", "NO", "UNSURE"],
            required=True,
            visible_labels=None
            ),
        rg.LabelQuestion(
            name="faithfulness",
            title="Is the generated answer grounded in / supported by a context (retrieved documents)?",
            labels=["YES", "NO", "UNSURE"],
            required=True,
            visible_labels=None
            ),
        rg.LabelQuestion(
            name="usefulness",
            title="Is generated answer useful to resolve a question?",
            labels=["YES", "NO", "UNSURE"],
            required=True,
            visible_labels=None
            ),
    ]
)
feedback_dataset

FeedbackDataset(
   fields=[TextField(name='query', title="User's question (query)", required=True, type='text', use_markdown=False), TextField(name='retrieved_document', title='Retrieved by the RAG chain documents (context)', required=True, type='text', use_markdown=False), TextField(name='generated_response', title='Generated LLM response (answer) to the question', required=True, type='text', use_markdown=False)]
   questions=[LabelQuestion(name='relevancy', title='Are the retrieved documents relevant to the given question?', description=None, required=True, type='label_selection', labels=['YES', 'NO', 'UNSURE'], visible_labels=None), LabelQuestion(name='faithfulness', title='Is the generated answer grounded in / supported by a context (retrieved documents)?', description=None, required=True, type='label_selection', labels=['YES', 'NO', 'UNSURE'], visible_labels=None), LabelQuestion(name='usefulness', title='Is generated answer useful to resolve a question?', description=None, requir

In [19]:
records = []
for i, item in enumerate(data_set):
    records.append(
        rg.FeedbackRecord(
            fields={
                "query": item["question"],
                "retrieved_document": item['context'],
                "generated_response": item["response"],
            },
            external_id=f"record-{i}"
        )
    )

feedback_dataset.add_records(records)

In [None]:
remote_dataset = feedback_dataset.push_to_argilla(name="my-dataset")

# Adding records to the dataset

In [4]:
dataset = rg.FeedbackDataset.from_argilla(name="my-dataset", workspace="admin")

In [5]:
files = [
         "https://raw.githubusercontent.com/winterForestStump/thesis/main/evaluation/bge-reranker_x_phi3-4k/csv/eval_COCA%20COLA%20CO.json.csv",
         "https://raw.githubusercontent.com/winterForestStump/thesis/main/evaluation/bge-reranker_x_phi3-4k/csv/eval_AMAZON%20COM%20INC.json.csv",
         "https://raw.githubusercontent.com/winterForestStump/thesis/main/evaluation/bge-reranker_x_phi3-4k/csv/eval_JPMORGAN%20CHASE%20%26%20CO.json.csv",
         "https://raw.githubusercontent.com/winterForestStump/thesis/main/evaluation/bge-reranker_x_phi3-4k/csv/eval_LOCKHEED%20MARTIN%20CORP.json.csv",
         "https://raw.githubusercontent.com/winterForestStump/thesis/main/evaluation/bge-reranker_x_phi3-4k/csv/eval_MICROSOFT%20CORP.json.csv",
         "https://raw.githubusercontent.com/winterForestStump/thesis/main/evaluation/bge-reranker_x_phi3-4k/csv/eval_NIKE%2C%20Inc..json.csv",
         "https://github.com/winterForestStump/thesis/blob/main/evaluation/bge-reranker_x_phi3-4k/csv/eval_PayPal%20Holdings%2C%20Inc..json.csv",
         "https://raw.githubusercontent.com/winterForestStump/thesis/main/evaluation/bge-reranker_x_phi3-4k/csv/eval_VERIZON%20COMMUNICATIONS%20INC.json.csv",
         "https://raw.githubusercontent.com/winterForestStump/thesis/main/evaluation/bge-reranker_x_phi3-4k/csv/eval_Walmart%20Inc..json.csv"
         ]

In [21]:
records = []
length = 35
for file in files:
    try:
        # Attempt to read the CSV file
        ds = pd.read_csv(file)
        data_set = Dataset.from_pandas(ds)

        # Process each record in the dataset
        for i, item in enumerate(data_set):
            records.append(rg.FeedbackRecord(fields={"query": item["question"],
                                                     "retrieved_document": item['context'],
                                                     "generated_response": item["response"]
                                                     },
                                            ))
        length += len(data_set)

    except pd.errors.ParserError as e:
        print(f"Error parsing {file}: {e}")
    except Exception as e:
        print(f"An unexpected error occurred with {file}: {e}")


# Output the results
print(f"Processed {len(records)} records.")

Error parsing https://github.com/winterForestStump/thesis/blob/main/evaluation/bge-reranker_x_phi3-4k/csv/eval_PayPal%20Holdings%2C%20Inc..json.csv: Error tokenizing data. C error: Expected 1 fields in line 40, saw 20

Processed 280 records.


In [22]:
length

315

In [23]:
dataset.add_records(records)

Output()

# Collect responses

In [6]:
feedback = rg.FeedbackDataset.from_argilla("my-dataset", workspace="admin")

In [29]:
list_values = []
for record_ix,record in enumerate(feedback):
  list_values.append({"record": str(record_ix+1),
                      "id": feedback.records[record_ix].id,
                      "query": feedback.records[record_ix].fields['query'],
                      "retrieved_document": feedback.records[record_ix].fields['retrieved_document'],
                      "generated_response": feedback.records[record_ix].fields['generated_response'],
                      "relevancy_value": feedback.records[record_ix].responses[0].values["relevancy"].value,
                      "faithfulness_value": feedback.records[record_ix].responses[0].values["faithfulness"].value,
                      "usefulness_value": feedback.records[record_ix].responses[0].values["usefulness"].value
                      })

In [30]:
len(list_values)

35

In [34]:
df = pd.DataFrame(list_values)
df.to_csv('eval_resuts_argilla_generalQA.csv')