In [33]:
import os
import json
import base64

def save_policy_info_to_task_status_folder(folder_path):
    output_base = "./train_data"

    # 遍历指定文件夹下的所有 .json 文件
    for filename in os.listdir(folder_path):
        if filename.endswith('.json'):
            file_path = os.path.join(folder_path, filename)

            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    data = json.load(f)  # 加载整个 JSON 文件内容

                if isinstance(data, list) and len(data) > 0:
                    last_element = data[-1].get('exp_data', {})  # 获取 exp_data
                    actions = last_element.get("actions", [])
                    if isinstance(actions, list) and len(actions) > 0:
                        first_action = actions[0]
                        policy_info_str = first_action.get("policy_info")
                        
                        if isinstance(policy_info_str, str):
                            try:
                                policy_info = json.loads(policy_info_str)
                                task = policy_info.get("task").split("Task: ",1)[-1]
                                status = policy_info.get("success")
                                actions = policy_info.get("actions")
                                thoughts = policy_info.get("thoughts")
                                images = policy_info.get("images")
                                if len(images) != len(actions) - 1 :  # if image can't map to action, skip.
                                    continue 
                               
                                if task is not None and status is not None:
                                    # 创建目标目录
                                    task_dir = os.path.join(output_base, task)
                                    status_dir = os.path.join(task_dir, str(status))
                                    images_dir = os.path.join(status_dir, "images")
                                    os.makedirs(images_dir, exist_ok=True)

                                    # 转换 JSON 数据
                                    output_list = []
                                    total_steps = len(actions)
                                    previous_actions = []
                                    previous_thoughts = []
                                    previous_images = []

                                    for idx, action in enumerate(actions):
                                     
                                        step = {
                                            "task": task,
                                            "n_step": total_steps,
                                            "cur_step": idx + 1,
                                            "previous_actions": previous_actions.copy(),
                                            "previous_thoughts": previous_thoughts.copy(),
                                            "previous_images": previous_images.copy(),
                                            "step_thought": thoughts[idx],
                                            "step_action": actions[idx],
                                        }

                                        # 保存图片并更新路径
                                        if idx + 1 < total_steps: # 
                                            image_data = images[idx]
                                            if image_data:
                                                # 构建文件路径（改为 .png）
                                                image_path = os.path.join(images_dir, f"{filename}_{idx}.png")
                                            
                                                # 解码 Base64 数据
                                                header, encoded = image_data.split(",", 1)
                                                image_bytes = base64.b64decode(encoded)
                                            
                                                # 用 PIL 打开图像
                                                from io import BytesIO
                                                img = Image.open(BytesIO(image_bytes))
                                                
                                                # 保存为 PNG 并设置 DPI
                                                img.save(image_path, dpi=(300, 300))  
                                            
                                                # 记录相对路径
                                                previous_images.append(os.path.relpath(image_path, status_dir))

                                            # 更新 previous_actions 和 previous_thoughts
                                            previous_actions.append(actions[idx])
                                            previous_thoughts.append(thoughts[idx])
    
                                        output_list.append(step)

                                    # 保存转换后的 JSON 文件
                                    output_json_path = os.path.join(status_dir, filename)
                                    with open(output_json_path, 'w', encoding='utf-8') as out_f:
                                        json.dump(output_list, out_f, indent=4)

                                    print(f"Saved policy_info from {filename} to {output_json_path}")
                                else:
                                    print(f"Missing 'task' or 'status' in policy_info of {filename}")

                            except json.JSONDecodeError as e:
                                print(f"Error decoding policy_info in {filename}: {e}")
                        else:
                            print(f"policy_info is not a string in {filename}")
                    else:
                        print(f"No actions or actions is not a list in {filename}")
                else:
                    print(f"Data is not a non-empty list in {filename}")

            except Exception as e:
                print(f"Error processing {filename}: {e}")

# 使用示例
folder_path = "/Users/cl/PycharmProjects/git/AWorld/aworlddistributed/trace_data/20250617/30.230.180.55/replays"  # 替换为你的目标文件夹路径(replays)
save_policy_info_to_task_status_folder(folder_path)

Saved policy_info from task_replay_52e3ee38-6191-443d-b7eb-81a0f2469244.json to ./train_data/Find the procedure to get the Doctoral Loan and when the application will open for the year 2023
/False/task_replay_52e3ee38-6191-443d-b7eb-81a0f2469244.json
Saved policy_info from task_replay_b5df3d4f-043f-4260-95e2-651334d73fca.json to ./train_data/Find the procedure to get the Doctoral Loan and when the application will open for the year 2023
/False/task_replay_b5df3d4f-043f-4260-95e2-651334d73fca.json
Saved policy_info from task_replay_786f3a91-4da3-4112-8d29-1e480b5f8fce.json to ./train_data/Find the procedure to get the Doctoral Loan and when the application will open for the year 2023
/False/task_replay_786f3a91-4da3-4112-8d29-1e480b5f8fce.json
Saved policy_info from task_replay_03250e61-1fac-4274-b8b8-e089ae948a4e.json to ./train_data/Find the procedure to get the Doctoral Loan and when the application will open for the year 2023
/False/task_replay_03250e61-1fac-4274-b8b8-e089ae948a4e.j