In [None]:
import os
import json
import glob

# --- Configuration ---
EXP_ROOT_DIR = "parallel_run_indexed_results"
RESULT_FILENAME = "summary_info.json"
# --- End Configuration ---

def find_result_file(base_dir: str, filename: str = "summary_info.json"):
    """
    Finds result file in base_dir or its latest timestamped subdirectory.
    """
    direct_path = os.path.join(base_dir, filename)
    if os.path.exists(direct_path):
        return direct_path
    if os.path.exists(base_dir):
        try:
            subdirs = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]
        except OSError:
            return None

        if not subdirs:
            return None
        subdirs.sort(reverse=True)
        
        for subdir in subdirs:
            nested_path = os.path.join(base_dir, subdir, filename)
            if os.path.exists(nested_path):
                return nested_path
                
    return None

def main():
    if not os.path.exists(EXP_ROOT_DIR):
        print(f"Error: Directory '{EXP_ROOT_DIR}' does not exist.")
        return

    print(f"Scanning results in '{EXP_ROOT_DIR}'...\n")

    task_dirs = sorted(glob.glob(os.path.join(EXP_ROOT_DIR, "webarena*")))
    
    total_tasks = 0
    success_count = 0
    fail_count = 0
    missing_count = 0
    
    results_detail = []

    for task_dir in task_dirs:
        task_id = os.path.basename(task_dir)
        
        json_path = find_result_file(task_dir, RESULT_FILENAME)
        
        if json_path:
            try:
                with open(json_path, 'r', encoding='utf-8') as f:
                    data = json.load(f)
                
                error_message = data.get("err_msg")
                
                reward = data.get("cum_reward", 0.0)
                
                # Determine status
                if error_message is not None:
                    # If there is an explicit error message, count as failure
                    status = "FAILURE"
                elif reward > 0.0:
                    status = "SUCCESS"
                else:
                    status = "FAILURE"

                if status == "SUCCESS":
                    success_count += 1
                else:
                    fail_count += 1
                
                total_tasks += 1
                results_detail.append((task_id, reward, status, json_path))
                
            except Exception as e:
                print(f"[{task_id}] JSON Read Error: {e}")
                missing_count += 1
        else:
            missing_count += 1
            # print(f"[{task_id}] Missing result file")

    # --- Print Statistics ---
    print("=" * 60)
    print("              WEB ARENA RESULT ANALYSIS              ")
    print("=" * 60)
    
    if total_tasks > 0:
        success_rate = (success_count / total_tasks) * 100
        print(f"Total Folders Scanned : {len(task_dirs)}")
        print(f"Valid Results Found   : {total_tasks}")
        print(f"Missing / Corrupted   : {missing_count}")
        print("-" * 60)
        print(f"SUCCESS (Reward > 0)  : {success_count}")
        print(f"FAILURE (Reward = 0)  : {fail_count}")
        print("-" * 60)
        print(f"SUCCESS RATE          : {success_rate:.2f}%")
    else:
        print(f"No valid {RESULT_FILENAME} files found.")
        print(f"Please check if {EXP_ROOT_DIR} contains the correct data.")
        
    print("=" * 60)

if __name__ == "__main__":
    main()

Scanning results in 'parallel_run_indexed_results'...

              WEB ARENA RESULT ANALYSIS              
Total Folders Scanned : 786
Valid Results Found   : 784
Missing / Corrupted   : 2
------------------------------------------------------------
SUCCESS (Reward > 0)  : 64
FAILURE (Reward = 0)  : 720
------------------------------------------------------------
SUCCESS RATE          : 8.16%
