In [18]:
import pandas as pd
import json

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [3]:
root_path = '/content/drive/MyDrive/Colab_Notebooks_New/LlamaIndex'
data_path = f'{root_path}/data'

In [7]:
filenames = [
    'df_vector_no_gpt.csv',
    'df_vector_low_gpt.csv',
    'df_vector_high_gpt.csv',
    'df_graph_no_gpt.csv',
    'df_graph_low_gpt.csv',
    'df_graph_high_gpt.csv',
]

In [12]:
# Initialize an empty list to hold the individual DataFrames
dfs = []

# Loop through the filenames, read each file, add extra columns, and append the DataFrame to the list
for filename in filenames:
    # Read the CSV file into a DataFrame
    df = pd.read_csv(f'{data_path}/{filename}', low_memory=False)

    # Extract retrieval_method and prompt_level from the filename
    parts = filename.split('_')
    retrieval_method = parts[1]  # 'vector' or 'graph'
    prompt_level = parts[2]      # 'no', 'low', or 'high'

    # Add the extra columns
    df['retrieval_method'] = retrieval_method
    df['prompt_level'] = prompt_level

    # Append the DataFrame to the list
    dfs.append(df)

# Concatenate all DataFrames in the list into a single DataFrame
combined_df = pd.concat(dfs, ignore_index=True)

In [13]:
combined_df.shape

(3000, 11)

In [14]:
combined_df.columns

Index(['question', 'ground_truth', 'answer', 'contexts', '__index_level_0__',
       'context_precision', 'faithfulness', 'answer_relevancy',
       'context_recall', 'retrieval_method', 'prompt_level'],
      dtype='object')

In [15]:
combined_df.to_csv(f'{data_path}/regas_complete_results_gpt.csv', index=False)

In [16]:
cols_to_use = [
    'context_precision',
    'faithfulness',
    'answer_relevancy',
    'context_recall',
]

In [None]:
ragas_res = {}

for filename in filenames:
    parts = filename.split('_')
    retrieval_method = parts[1]  # 'vector' or 'graph'
    prompt_level = parts[2]      # 'no', 'low', or 'high'

    if retrieval_method not in ragas_res:
        ragas_res[retrieval_method] = {}

    if prompt_level not in ragas_res[retrieval_method]:
        ragas_res[retrieval_method][prompt_level] = {}

    df = pd.read_csv(f'{data_path}/{filename}', low_memory=False)

    for col in cols_to_use:
        if col not in ragas_res[retrieval_method][prompt_level]:
            ragas_res[retrieval_method][prompt_level][col] = {}

        mean_value = df[col].mean()
        ragas_res[retrieval_method][prompt_level][col]['mean'] = mean_value

        std_value = df[col].std()
        ragas_res[retrieval_method][prompt_level][col]['std'] = std_value

ragas_res

In [19]:
# Save ragas_res as .json file
with open(f'{data_path}/ragas_res_gpt.json', 'w') as f:
    json.dump(ragas_res, f)