In [None]:
import json
import os
import pandas as pd

# Define the folders
test = "test_1"
folder_path = f"../../results/context_learning/{test}"  # Change this to your folder path

# Define the column names
metric_columns = ['BLEU', 'METEOR', "CHRF++", "TER", "Cosine_Similarity"]
dataframe = pd.DataFrame(columns=['Model', 'Size'] + metric_columns)

for filename in os.listdir(folder_path):
    if filename.endswith(".json"):  # Ensure it's a JSON file
        print(f"Processing: {filename}")
        file_path = os.path.join(folder_path, filename)

        # Open and load the JSON file
        with open(file_path, 'r', encoding='utf-8') as file:
            data = json.load(file)

        model_name = filename.replace("_shot_data_evaluate.json", "").replace("-R1-Distill", "")
        temp = []

        for e in data:
            size = e.get("size", "Unknown")

            # Evaluation metrics
            eval_metrics = e.get("eval", {})
            eval_metrics["Model"] = model_name
            eval_metrics["Size"] = size
            temp.append(eval_metrics)

        # Convert lists to DataFrame and concatenate to main DataFrame
        dataframe = pd.concat([dataframe, pd.DataFrame(temp)], ignore_index=True)

# Convert numeric columns from string to float (skip errors)
for col in metric_columns:
    dataframe[col] = pd.to_numeric(dataframe[col], errors='coerce')

# Group by Model and Size to calculate mean values
mean_values = dataframe.groupby(['Model', 'Size']).mean(numeric_only=True).reset_index()

# Compute overall mean values per Model (ignoring size)
model_avg = dataframe.groupby(['Model']).mean(numeric_only=True).reset_index()

# Save evaluation results
output_file = "mean_values_instruct.xlsx"
with pd.ExcelWriter(output_file, engine='xlsxwriter') as writer:
    mean_values.to_excel(writer, sheet_name="Evaluation", index=False)
    model_avg.to_excel(writer, sheet_name="Model_Averages", index=False)

print(f"Evaluation results saved to '{output_file}'.")


Processing: Llama-32-1B-Instruct_few_shot_data_evaluate.json
Processing: Llama-32-1B-Instruct_one_shot_data_evaluate.json
Processing: Llama-32-1B-Instruct_zero_shot_data_evaluate.json
Processing: Qwen25-05B-Instruct_few_shot_data_evaluate.json
Processing: Qwen25-05B-Instruct_one_shot_data_evaluate.json


  dataframe = pd.concat([dataframe, pd.DataFrame(temp)], ignore_index=True)


Processing: Qwen25-05B-Instruct_zero_shot_data_evaluate.json
Processing: Qwen25-15B-Instruct_few_shot_data_evaluate.json
Processing: Qwen25-15B-Instruct_one_shot_data_evaluate.json
Processing: Qwen25-15B-Instruct_zero_shot_data_evaluate.json
Processing: salamandra-2b-instruct_few_shot_data_evaluate.json
Processing: salamandra-2b-instruct_one_shot_data_evaluate.json
Processing: salamandra-2b-instruct_zero_shot_data_evaluate.json
Evaluation results saved to 'mean_values_instruct.xlsx'.
