In [None]:
import pandas as pd

In [None]:
import json
import os
import re
from pathlib import Path
from tqdm import tqdm
import pandas as pd

# Extract content from <action>...</action>
ACTION_PATTERN = re.compile(r"<action>\s*(.*?)\s*</action>", re.DOTALL)
# Extract content from <tool_call>...</tool_call>
TOOL_CALL_PATTERN = re.compile(r"<tool_call>\s*(.*?)\s*</tool_call>", re.DOTALL)
# Extract content after Action: (for uitars model)
ACTION_COLON_PATTERN = re.compile(r"Action:\s*(.+?)(?:\n|$)", re.DOTALL)

def extract_action_blocks(text: str):
    """Return a list of actions, each element is a code string inside <action>...</action>"""
    if not text:
        return []
    return [m.strip() for m in ACTION_PATTERN.findall(text)]

def extract_tool_call_blocks(text: str):
    """Return a list of tool_calls, each element is a code string inside <tool_call>...</tool_call>"""
    if not text:
        return []
    return [m.strip() for m in TOOL_CALL_PATTERN.findall(text)]

def extract_action_colon(text: str):
    """Extract content from 'Action: xxx' format (for uitars model)"""
    if not text:
        return []
    matches = ACTION_COLON_PATTERN.findall(text)
    return [m.strip() for m in matches]

def extract_action_by_model(text: str, model_name: str):
    """
    Select appropriate extraction method based on model name
    
    Args:
        text: Response text
        model_name: Model name
    
    Returns:
        Extracted action list
    """
    model_lower = model_name.lower()
    
    if "qwen3vl-235b" in model_lower:
        # qwen3vl-235b uses <tool_call> tag
        return extract_tool_call_blocks(text)
    elif "uitars" in model_lower:
        # uitars model uses Action: format
        return extract_action_colon(text)
    else:
        # Default to <action> tag extraction (for internvl3.5_30B_MoE, etc.)
        return extract_action_blocks(text)

def load_success_rates(base_path: Path):
    """
    Read result_xxx.csv file, return task -> success_rate mapping
    """
    # Find all result_*.csv files
    csv_files = list(base_path.glob("result_*.csv"))
    
    if not csv_files:
        print(f"Warning: No result_*.csv file found in {base_path}")
        return {}
    
    # Read first csv file
    csv_file = csv_files[0]
    print(f"Reading success rate file: {csv_file}")
    
    df = pd.read_csv(csv_file)
    
    # Create task -> mean_success_rate mapping
    # Filter out "Average" row
    df = df[df['task'] != '========= Average =========']
    
    success_dict = dict(zip(df['task'], df['mean_success_rate']))
    
    return success_dict

def transform_osworld_trajectories(base_dir: str, output_file: str):
    """
    Read all trajectory files from AndroidWorld dataset and convert to specified format
    
    Args:
        base_dir: Base directory path
        output_file: Output jsonl file path
    """
    base_path = Path(base_dir)
    model_name = base_path.name
    
    # Load success rate data
    success_rates = load_success_rates(base_path)
    
    all_trajectories = []
    
    # Iterate through all task folders
    for task_folder in tqdm(list(base_path.iterdir()), desc="Processing tasks"):
        if not task_folder.is_dir():
            continue
            
        task_name = task_folder.name
        
        traj_file = task_folder / "result.json"
        if not traj_file.exists():
            continue
        
        # Read result.json file
        steps = []
        with open(traj_file, 'r') as f:
            data = json.load(f)
            steps = data.get("trajectory", [])
        
        if not steps:
            continue
        
        # Get success rate for this task (0.0 or 1.0)
        success = success_rates.get(task_name, 0.0)
        # Convert to boolean
        is_success = bool(success >= 1.0)
        
        # Build trajectory data
        trajectory = []
        
        for i, step in enumerate(steps):
            step_idx = step.get("step", i + 1)
            
            # Determine observation image path
            prev_screenshot = f"screenshot_{i+1}.png"
            obs_file = task_folder / prev_screenshot
            if obs_file.exists():
                observation = str(obs_file.absolute())
            else:
                observation = "empty"
            
            # Extract action content from <action> tag
            action_code = step.get("response", "")
            action_text = extract_action_by_model(action_code, model_name)
            
            trajectory.append({
                "step_idx": step_idx,
                "observation": observation,
                "action": action_text,
            })
        
        # Build complete trajectory record
        traj_record = {
            "dataset": "androidworld",
            "model": model_name,
            "task": task_name,
            "data_dir": str(task_folder.absolute()),
            "success": is_success,  # Add success identifier
            "trajectory": trajectory
        }
        
        all_trajectories.append(traj_record)
    
    # Save to jsonl file
    output_path = Path(output_file)
    output_path.parent.mkdir(parents=True, exist_ok=True)
    
    with open(output_path, 'w') as f:
        for traj in all_trajectories:
            f.write(json.dumps(traj, ensure_ascii=False) + '\n')
    
    print(f"\nTotal trajectories processed: {len(all_trajectories)}")
    print(f"Successful trajectories: {sum(1 for t in all_trajectories if t['success'])}")
    print(f"Failed trajectories: {sum(1 for t in all_trajectories if not t['success'])}")
    print(f"Results saved to: {output_path.absolute()}")
    
    return all_trajectories

In [None]:
base_dir = "androidworld/qwen3vl-235b"
output_file = base_dir + "_transformed_trajectories.jsonl"

trajectories = transform_osworld_trajectories(base_dir, output_file)


In [None]:

# View first trajectory example
if trajectories:
    print("\nFirst trajectory example:")
    print(json.dumps(trajectories[0], indent=2, ensure_ascii=False))

In [None]:
len(trajectories)