In [None]:
import pandas as pd

In [None]:
import json
import os
import re
from pathlib import Path
from tqdm import tqdm

from collections import defaultdict


def analyze_loop_success(base_dir):
    """
    Count success rates of trajectories with and without loops
    """
    base_path = Path(base_dir)
    print(f"Analyzing directory: {base_path}")
    
    if not base_path.exists():
        print(f"Directory does not exist: {base_path}")
        return pd.DataFrame()

    # Collect statistics
    with_loop_success = []
    without_loop_success = []
    
    # Iterate through all task folders
    for task_folder in tqdm(list(base_path.iterdir()), desc="Analyzing tasks"):
        if not task_folder.is_dir():
            continue
            
        # Iterate through all sample folders under each task
        for sample_folder in task_folder.iterdir():
            if not sample_folder.is_dir():
                continue
            
            analysis_file = sample_folder / "analysis.json"
            if not analysis_file.exists():
                continue
            
            try:
                with open(analysis_file, 'r') as f:
                    data = json.load(f)
                
                success = data.get("success", 0)
                has_loop = "error_atomic_actions" in data and len(data["error_atomic_actions"]) > 0
                
                if has_loop:
                    with_loop_success.append(success)
                else:
                    without_loop_success.append(success)
                                    
            except Exception as e:
                print(f"Error reading file {analysis_file}: {e}")

    # Calculate statistics
    results = {
        "With Loop": {
            "Count": len(with_loop_success),
            "Success Count": len([success_item for success_item in with_loop_success if success_item > 0]),
            "Success Num": sum(with_loop_success),
            "Success Rate": (sum(with_loop_success) / len(with_loop_success) * 100) if with_loop_success else 0
        },
        "Without Loop": {
            "Count": len(without_loop_success),
            "Success Count": len([success_item for success_item in without_loop_success if success_item > 0]),
            "Success Num": sum(without_loop_success),
            "Success Rate": (sum(without_loop_success) / len(without_loop_success) * 100) if without_loop_success else 0
        }
    }
    
    df = pd.DataFrame(results).T
    df['Success Rate'] = df['Success Rate'].apply(lambda x: f"{x:.1f}%")
    
    return df

In [None]:
def analyze_loop_success_aw(base_dir):
    """
    Count success rates of trajectories with and without loops
    """
    base_path = Path(base_dir)
    print(f"Analyzing directory: {base_path}")
    
    if not base_path.exists():
        print(f"Directory does not exist: {base_path}")
        return pd.DataFrame()

    # Collect statistics
    with_loop_success = []
    without_loop_success = []
    
    # Iterate through all task folders
    for task_folder in tqdm(list(base_path.iterdir()), desc="Analyzing tasks"):
        if not task_folder.is_dir():
            continue
            
        
        analysis_file = task_folder / "analysis.json"
        if not analysis_file.exists():
            continue
        
        try:
            with open(analysis_file, 'r') as f:
                data = json.load(f)
            
            success = data.get("success", 0)
            has_loop = "error_atomic_actions" in data and len(data["error_atomic_actions"]) > 0
            
            if has_loop:
                with_loop_success.append(success)
            else:
                without_loop_success.append(success)
                                
        except Exception as e:
            print(f"Error reading file {analysis_file}: {e}")

    # Calculate statistics
    results = {
        "With Loop": {
            "Count": len(with_loop_success),
            "Success Count": len([success_item for success_item in with_loop_success if success_item > 0]),
            "Success Num": sum(with_loop_success),
            "Success Rate": (sum(with_loop_success) / len(with_loop_success) * 100) if with_loop_success else 0
        },
        "Without Loop": {
            "Count": len(without_loop_success),
            "Success Count": len([success_item for success_item in without_loop_success if success_item > 0]),
            "Success Num": sum(without_loop_success),
            "Success Rate": (sum(without_loop_success) / len(without_loop_success) * 100) if without_loop_success else 0
        }
    }
    
    df = pd.DataFrame(results).T
    df['Success Rate'] = df['Success Rate'].apply(lambda x: f"{x:.1f}%")
    
    return df

In [None]:
root_dir = "analysis_third_part"
dataset = "waa"
dataset_path = Path(root_dir) / dataset

if not dataset_path.exists():
    print(f"Dataset path not found: {dataset_path}")
else:
    print(f"Starting to analyze dataset directory: {dataset_path}")
    
    # Data for aggregating all models
    all_models_data = []
    
    # Iterate through all model folders under dataset
    for model_dir in dataset_path.iterdir():
        if not model_dir.is_dir():
            continue
            
        model_name = model_dir.name
        print(f"\n{'='*30}\nProcessing model: {model_name}")
        
        # Run analysis
        success_stats_df = analyze_loop_success(model_dir)
        
        if not success_stats_df.empty:
            print(f"Loop success rate statistics ({model_name}):")
            print(success_stats_df)
            
            # Save single model result
            output_file = f"{dataset_path}/success_loop_stats_{model_name}.csv"
            success_stats_df.to_csv(output_file, sep='\t')
            print(f"Results saved to: {output_file}")
            
            # Add model name column
            success_stats_df['Model'] = model_name
            # Model as first column
            success_stats_df = success_stats_df[['Model'] + [col for col in success_stats_df.columns if col != 'Model']]
            all_models_data.append(success_stats_df.reset_index().rename(columns={'index': 'Category'}))
        else:
            print(f"Model {model_name} has no data or directory is empty.")
    
    # Aggregate all model data
    if all_models_data:
        # Merge all model data
        combined_df = pd.concat(all_models_data, ignore_index=True)
        
        # Save aggregated results
        summary_file = f"{dataset_path}/success_loop_stats_all_models.csv"
        combined_df.to_csv(summary_file, index=False, sep='\t')
        print(f"\n{'='*50}")
        print(f"All models aggregated results saved to: {summary_file}")
        print(f"\nAggregated statistics:")
        print(combined_df)
    else:
        print("\nNo model data found.")

In [None]:
root_dir = "analysis_third_part"
dataset = "androidworld"
dataset_path = Path(root_dir) / dataset

if not dataset_path.exists():
    print(f"Dataset path not found: {dataset_path}")
else:
    print(f"Starting to analyze dataset directory: {dataset_path}")
    
    # Data for aggregating all models
    all_models_data = []
    
    # Iterate through all model folders under dataset
    for model_dir in dataset_path.iterdir():
        if not model_dir.is_dir():
            continue
            
        model_name = model_dir.name
        print(f"\n{'='*30}\nProcessing model: {model_name}")
        
        # Run analysis
        success_stats_df = analyze_loop_success_aw(model_dir)
        
        if not success_stats_df.empty:
            print(f"Loop success rate statistics ({model_name}):")
            print(success_stats_df)
            
            # Save single model result
            output_file = f"{dataset_path}/success_loop_stats_{model_name}.csv"
            success_stats_df.to_csv(output_file, sep='\t')
            print(f"Results saved to: {output_file}")
            
            # Add model name column
            success_stats_df['Model'] = model_name
            # Model as first column
            success_stats_df = success_stats_df[['Model'] + [col for col in success_stats_df.columns if col != 'Model']]
            all_models_data.append(success_stats_df.reset_index().rename(columns={'index': 'Category'}))
        else:
            print(f"Model {model_name} has no data or directory is empty.")
    
    # Aggregate all model data
    if all_models_data:
        # Merge all model data
        combined_df = pd.concat(all_models_data, ignore_index=True)
        
        # Save aggregated results
        summary_file = f"{dataset_path}/success_loop_stats_all_models.csv"
        combined_df.to_csv(summary_file, index=False, sep='\t')
        print(f"\n{'='*50}")
        print(f"All models aggregated results saved to: {summary_file}")
        print(f"\nAggregated statistics:")
        print(combined_df)
    else:
        print("\nNo model data found.")