In [1]:
import json

In [2]:
def extract_first_true_code(file_path):
    code_list = []
    with open(file_path, "r", encoding="utf-8") as file:
        content = file.read()
        # 按大分隔线分割不同的 key_action 部分
        sections = content.split("="*50 + "\n\n")
        
        for section in sections:
            if not section.strip():
                continue
            code_content = []

            # 按小分隔线分割代码块
            blocks = section.split("="*20 + "\n\n")
            for block in blocks:
                if "Result = True" in block:
                    # 找到 Code Block 开始和 Result 之间的内容
                    lines = block.split('\n')
                    code_start = False
                    
                    for line in lines:
                        if line.startswith("Code Block"):
                            code_start = True
                            continue
                        if line.startswith("Result ="):
                            break
                        if code_start and line.strip():
                            code_content.append(line)
                    
                    if code_content:
                        code_list.append('\n'.join(code_content))
                        break
            if not code_content:
                code_list.append(False)
    
    return code_list

In [3]:
def initialize(dir):
    current_id = 1
    all_key_nodes = []
    all_tasks = {}
    
    with open(f'{dir}/tasks_and_key_nodes.json', 'r') as f:
        data = json.load(f)
    
    for name, details in data.items():
        # if name != "RecipeDeleteMultipleRecipes":
        #     continue
        code_file_path = f"{dir}/code_lists/{name}.txt"
        codes = extract_first_true_code(code_file_path)

        node_ids = []
        graph = {}
        objective = details['objective']
        template = details['template']
        key_nodes = details['key_nodes']

        for idx, key_node in enumerate(key_nodes):
            # 检查是否已存在相同的 key_action
            existing_node = next((node for node in all_key_nodes if node['description'] == key_node), None)
            if existing_node:
                # 如果存在，复用已有节点的 ID
                node_ids.append(existing_node['id'])
            else:
                label_function = codes[idx]
                if label_function != False:     # 只有验证成功的才会被创建和加入
                    # 如果不存在，创建新节点
                    node = {
                        "id": str(current_id),
                        "description": key_node,
                        "label_function": label_function
                    }
                    all_key_nodes.append(node)
                    node_ids.append(str(current_id))
                    current_id += 1

        # 生成图结构
        for i, node_id in enumerate(node_ids):
            # 设置为列表形式，方便之后添加节点
            parent_node_id = ["0"] if i == 0 else [node_ids[i-1]]   # 第一个节点 parent 为 0
            child_node_id = ["-1"] if i == len(node_ids)-1 else [node_ids[i+1]] # 最后一个节点 child 为 -1
            graph[node_id] = {
                "parent_node_id": parent_node_id,  
                "child_node_id": child_node_id  
            }

        # 存储 objective 和 key_nodes
        all_tasks[name] = {
            "objective": objective,
            "template": template,
            "key_nodes": graph
        }

    with open(f"{dir}/key_nodes.json", 'w') as f:
        json.dump(all_key_nodes, f, indent=4)
    with open(f"{dir}/tasks.json", 'w') as f:
        json.dump(all_tasks, f, indent=4)

In [4]:
# 这个之后还得额外写
# 合并文件
def merge_key_nodes(key1_path, key2_path, output_path):
    # 读第一份 key_nodes
    with open(key1_path, 'r', encoding='utf-8') as f:
        key1 = json.load(f)
    # 读第二份 key_nodes
    with open(key2_path, 'r', encoding='utf-8') as f:
        key2 = json.load(f)

    # 建 description -> id 映射，用于检测重复
    existing = { node['description']: node['id'] for node in key1}
    # 计算第一份中的最大 id（数字）
    max_id1 = max(int(node['id']) for node in key1)

    merged = list(key1)     # 合并列表初始为第一份内容
    id_mapping = {}         # 记录第二份旧 id -> 新 id（或已有 id）
    next_id = max_id1       # 用于分配新 id 的游标

    for node in key2:
        old_id = node['id']
        key = node['description']
        if key in existing:
            # 重复：复用已有 id
            id_mapping[old_id] = existing[key]
        else:
            # 新节点：分配下一个 id
            next_id += 1
            new_id = str(next_id)
            node['id'] = new_id
            merged.append(node)
            id_mapping[old_id] = new_id

    # 写出合并后的 key_nodes_new.json
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(merged, f, indent=2, ensure_ascii=False)

    return id_mapping

def merge_tasks(tasks1_path, tasks2_path, id_mapping, output_path):
    # 读两份 tasks
    with open(tasks1_path, 'r', encoding='utf-8') as f:
        tasks1 = json.load(f)
    with open(tasks2_path, 'r', encoding='utf-8') as f:
        tasks2 = json.load(f)

    merged = {}

    # 先把 tasks1 的所有任务拷贝过去
    for name, content in tasks1.items():
        # 深拷贝一份，后面不改 key_nodes
        merged[name] = content.copy()
    
    for name, content in tasks2.items():
        print("原来的内容")
        print(content.get('key_nodes', {}))
        # 更新 key_nodes 中的 id 引用
        updated_key_nodes = {}
        original_key_nodes = content.get('key_nodes', {})
        for kn_id, kn_meta in original_key_nodes.items():
            # 1. 获取map
            mapped_kn_id = id_mapping.get(kn_id, kn_id)
            
            # 2. Create a copy
            new_kn_meta = kn_meta.copy()

            # 3. 处理 'parent_node_id' 的映射
            if 'parent_node_id' in new_kn_meta:
                mapped_parents = [id_mapping.get(p_id, p_id) for p_id in kn_meta['parent_node_id']]
                new_kn_meta['parent_node_id'] = mapped_parents

            # 4. 处理 'child_node_id' 的映射
            if 'child_node_id' in new_kn_meta:
                mapped_children = [id_mapping.get(c_id, c_id) for c_id in kn_meta['child_node_id']]
                new_kn_meta['child_node_id'] = mapped_children
            
            # 5. Assign
            updated_key_nodes[mapped_kn_id] = new_kn_meta

        print("新内容")
        print(updated_key_nodes)
        # 把原 content 复制一份，并替换 key_nodes
        new_content = content.copy()
        new_content['key_nodes'] = updated_key_nodes

        merged[name] = new_content

    # 写出合并后的 tasks_new.json
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(merged, f, indent=2, ensure_ascii=False)


def merge_main(dir1, dir2):
    # 合并文件
    key1_json   = f'{dir1}/key_nodes.json'
    key2_json   = f'{dir2}/key_nodes.json'
    tasks1_json = f'{dir1}/tasks.json'
    tasks2_json = f'{dir2}/tasks.json'
    # 输出文件
    merged_tasks = f'{dir2}/tasks.json'
    merged_keys  = f'{dir2}/key_nodes.json'

    # 1) 合并 key_nodes，获取 id 映射
    id_map = merge_key_nodes(key1_json, key2_json, merged_keys)
    # 2) 合并 tasks，只更新 key_nodes 引用，不改 task_id
    merge_tasks(tasks1_json, tasks2_json, id_map, merged_tasks)

In [None]:
# dir = "json_files/OS-Atlas/InnovAll/Iter3"
# initialize(dir)
# dir_before = "json_files/OS-Atlas/InnovAll/Iter2"
# merge_main(dir_before, dir)

# dir = "json_files/GUI-R1/InnovAll/Iter3"
# initialize(dir)
# dir_before = "json_files/GUI-R1/InnovAll/Iter2"
# merge_main(dir_before, dir)

In [None]:
def add_key_node(name, key_nodes, output_file, dir):
    # Load existing data
    with open(f"{dir}/key_nodes.json", 'r') as f:
        all_key_nodes = json.load(f)
    with open(f"{dir}/tasks.json", 'r') as f:
        all_tasks = json.load(f)
    
    # Initialize variables
    node_ids = []
    new_graph = {}
    code_file_path = f"{output_file}/{name}.txt"
    
    # Handle code extraction
    codes = extract_first_true_code(code_file_path)
    
    # Get current max ID
    current_id = max(int(node['id']) for node in all_key_nodes) + 1 if all_key_nodes else 1
    
    # Process new key nodes
    for idx, key_node in enumerate(key_nodes):
        # Check for existing node
        # 这里要 .lower()防止出现重复
        existing_node = next((node for node in all_key_nodes if node['description'].lower() == key_node.lower()), None)
        if existing_node:
            node_ids.append(existing_node['id'])
        else:
            label_function = codes[idx]
            if label_function != False:     # 只有验证成功的才会被创建和加入
                # 如果不存在，创建新节点
                node = {
                    "id": str(current_id),
                    "description": key_node,
                    "label_function": label_function
                }
                all_key_nodes.append(node)
                node_ids.append(str(current_id))
                current_id += 1
    
    # Create new graph structure
    for i, node_id in enumerate(node_ids):
        parent_node_id = ["0"] if i == 0 else [node_ids[i-1]]
        child_node_id = ["-1"] if i == len(node_ids)-1 else [node_ids[i+1]]
        new_graph[node_id] = {
            "parent_node_id": parent_node_id,
            "child_node_id": child_node_id
        }

    # Merge with existing graph
    existing_graph = all_tasks[name]["key_nodes"]
    print("existing_graph:", existing_graph)
    print("new_graph:", new_graph)
    merged_graph = {}

    # 开始合并
    # Combine all unique nodes
    all_node_ids = set(existing_graph.keys()) | set(new_graph.keys())

    for node_id in all_node_ids:
        # Convert node_id to string for dictionary access
        node_id = str(node_id)
        
        # Initialize parent and child lists
        parent_nodes = []
        child_nodes = []
        
        # Get parents and children from existing graph
        if node_id in existing_graph:
            parent_nodes.extend(existing_graph[node_id]["parent_node_id"])
            child_nodes.extend(existing_graph[node_id]["child_node_id"])
        
        # Get parents and children from new graph
        if node_id in new_graph:
            parent_nodes.extend(new_graph[node_id]["parent_node_id"])
            child_nodes.extend(new_graph[node_id]["child_node_id"])
        
        # Remove duplicates and maintain rules
        parent_nodes = list(set(parent_nodes))
        child_nodes = list(set(child_nodes))
        
        merged_graph[node_id] = {
            "parent_node_id": parent_nodes,
            "child_node_id": child_nodes
        }

    print("merged_graph:", merged_graph)

    # Update the task with merged graph
    all_tasks[name]["key_nodes"] = merged_graph

    # Save updated data
    with open(f"{dir}/key_nodes.json", 'w') as f:
        json.dump(all_key_nodes, f, indent=4)
    with open(f"{dir}/tasks.json", 'w') as f:
        json.dump(all_tasks, f, indent=4)