In [5]:
import os
import pandas as pd
import numpy as np

In [6]:
# Directory containing the CSV files
directory = "C:/Users/York Yong/OneDrive - Singapore Management University/Desktop/Pokemon Game RAG Chatbot/Test Results"

# Columns grouped by component
generation_columns = ["faithfulness", "answer_relevancy"]
retrieval_columns = ["context_precision", "context_relevancy", "context_recall", "context_entity_recall"]
end_to_end_columns = ["answer_similarity", "answer_correctness"]

In [17]:
# Function to process each CSV file
def process_csv(file_path):
    df = pd.read_csv(file_path)
    averages = {}
    for col in generation_columns + retrieval_columns + end_to_end_columns:
        averages[col] = df[col].replace(np.nan, 0).mean()  # Replace NaN with 0 and calculate mean

    # Calculate the total score using the custom formula
    generation_score = (averages["faithfulness"] + averages["answer_relevancy"]) / 2
    retrieval_score = (averages["context_precision"] + averages["context_relevancy"] +
                       averages["context_recall"] + averages["context_entity_recall"]) / 4
    end_to_end_score = (averages["answer_similarity"] + averages["answer_correctness"]) / 2

    total_score = (generation_score + retrieval_score + end_to_end_score) / 3

    averages["total_score"] = total_score
    return averages

In [18]:
# List to store the summarized results
summary = []

# Process each CSV file in the directory
for filename in os.listdir(directory):
    if filename.endswith(".csv"):
        file_path = os.path.join(directory, filename)
        averages = process_csv(file_path)
        averages["file_name"] = filename  # Add the file name to the results
        summary.append(averages)

# Create a DataFrame from the summarized results
summary_df = pd.DataFrame(summary)

# Remove duplicates
summary_df = summary_df.drop_duplicates()

# Move file_name to the first column
columns = ['file_name'] + generation_columns + retrieval_columns + end_to_end_columns + ["total_score"]
summary_df = summary_df[columns]

# Sort the DataFrame by total score in descending order
summary_df = summary_df.sort_values(by='total_score', ascending=False)

# Reset the index
summary_df = summary_df.reset_index(drop=True)

# Output the summarized DataFrame
summary_df

Unnamed: 0,file_name,faithfulness,answer_relevancy,context_precision,context_relevancy,context_recall,context_entity_recall,answer_similarity,answer_correctness,total_score
0,llama3-GoogleGAI-2000-200.csv,0.831667,0.744816,0.848889,0.064941,0.691667,0.358138,0.882705,0.518816,0.65997
1,llama3-GoogleGAI-8000-800.csv,0.839444,0.730348,0.856111,0.05654,0.691667,0.342656,0.882823,0.521727,0.657972
2,llama3-GoogleGAI-4000-400.csv,0.774048,0.737667,0.862778,0.060809,0.705,0.353528,0.882641,0.509337,0.649125
3,llama3-GoogleGAI-1000-100.csv,0.802,0.680077,0.84,0.081087,0.713333,0.336663,0.881426,0.543896,0.648824
4,llama3-OpenAI-8000-800.csv,0.799397,0.737532,0.779444,0.056714,0.687333,0.291044,0.878397,0.520666,0.640543
5,llama3-OpenAI-4000-400.csv,0.800333,0.685376,0.831111,0.039586,0.61,0.340944,0.877541,0.516491,0.63176
6,llama3-OpenAI-1000-100.csv,0.866349,0.614363,0.781667,0.080045,0.64,0.349628,0.864866,0.499089,0.628389
7,llama3-OpenAI-2000-200.csv,0.744162,0.604213,0.786111,0.075973,0.62,0.32396,0.86803,0.514683,0.605685
8,gemma-GoogleGAI-4000-400.csv,0.403667,0.165674,0.825,0.049289,0.683333,0.36307,0.795302,0.277468,0.433743
9,gemma-GoogleGAI-8000-800.csv,0.423333,0.147917,0.826667,0.054788,0.663333,0.332757,0.792322,0.265034,0.427896
