In [2]:
import sys
import os
root_path = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(root_path)
ROOT_DIR = r"C:\Projects\Research\SWEEP\SWEEP"

import json
import pandas as pd
from collections import defaultdict

def read_json_file(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)

def extract_process_metrics(data):
    process_metrics = defaultdict(dict)
    for i, (accuracy, f1_score, substep_count) in enumerate(zip(
        data['process_level_metrics']['accuracies'],
        data['process_level_metrics']['f1_scores'],
        data['process_level_metrics']['substep_counts']
    )):
        process_name = f"Process_{i+1}"
        process_metrics[process_name] = {
            'Accuracy': accuracy,
            'F1 Score': f1_score,
            'Substep Count': substep_count
        }
    return process_metrics

def create_process_comparison_tables(results_dir):
    all_process_metrics = defaultdict(lambda: defaultdict(dict))
    
    for filename in os.listdir(results_dir):
        if filename.endswith('_overall_results.json'):
            model_name = filename.replace('_overall_results.json', '')
            file_path = os.path.join(results_dir, filename)
            data = read_json_file(file_path)
            process_metrics = extract_process_metrics(data)
            
            for process_name, metrics in process_metrics.items():
                all_process_metrics[process_name][model_name] = metrics

    process_tables = {}
    for process_name, model_metrics in all_process_metrics.items():
        df = pd.DataFrame(model_metrics).T
        df = df.sort_values('F1 Score', ascending=False)
        df = df.round(4)
        process_tables[process_name] = df

    return process_tables

def main():
    results_dir = os.path.join(ROOT_DIR, 'test/results/overall')
    
    process_tables = create_process_comparison_tables(results_dir)
    
    for process_name, table in process_tables.items():
        print(f"\nComparison Table for {process_name}:")
        print(table.to_string())
        
        # # Optionally, save each process table to a CSV file
        # csv_path = f'{process_name}_comparison.csv'
        # table.to_csv(csv_path)
        # print(f"Table for {process_name} saved to {csv_path}")

    # Create an overall summary table
    summary_data = defaultdict(dict)
    for process_name, table in process_tables.items():
        top_model = table.index[0]
        summary_data[process_name] = {
            'Top Model': top_model,
            'Best F1 Score': table.loc[top_model, 'F1 Score'],
            'Best Accuracy': table.loc[top_model, 'Accuracy'],
            'Substep Count': table.loc[top_model, 'Substep Count']
        }
    
    summary_df = pd.DataFrame(summary_data).T
    summary_df = summary_df.sort_values('Best F1 Score', ascending=False)
    summary_df = summary_df.round(4)
    
    print("\nOverall Summary Table:")
    print(summary_df.to_string())
    
    # Optionally, save the summary table to a CSV file
    
    summary_csv_name = 'process_comparison_summary.csv'
    summary_csv_path = os.path.join(results_dir, summary_csv_name)
    summary_df.to_csv(summary_csv_path)
    print(f"\nSummary table saved to {summary_csv_path}")

if __name__ == "__main__":
    main()


Comparison Table for Process_1:
                                                           Accuracy  F1 Score  Substep Count
GPT-3.5-Value-Classification-Lean_Expert_Detailed              0.5926    0.3767           27.0
GPT-3.5-Value-Classification-Neutral_Analyst_Comprehensive     0.5185    0.3056           27.0
GPT-3.5-Value-Classification-Customer_Advocate_ValueFocused    0.4815    0.2821           27.0
GPT-3.5-Value-Classification-Business_Consultant_Strategic     0.4444    0.2818           27.0
GPT-3.5-Value-Model                                            0.5556    0.2719           27.0
GPT-3.5-Value-Classification-Neutral_Analyst_Basic             0.4444    0.2311           27.0
GPT-3.5-Value-Classification-Process_Engineer_Detailed         0.3704    0.2135           27.0
GPT-3.5-Value-Classification-Lean_Expert_Minimal               0.4074    0.2076           27.0
GPT-3.5-Value-Classification-Process_Engineer_Technical        0.4444    0.1579           27.0

Comparison Table f