Heuristically counting overcommitment/undercommitment rate from the experiments.

In [1]:
from pathlib import Path

from redel.utils import read_jsonl

# define base experiments path
EXPERIMENTS = Path("/Users/andrew/Desktop/Code/kanpai/experiments")


def is_overcommitted(fp, overcommitment_threshold):
    """A system is overcommitted if it has <= overcommitment_threshold nodes"""
    with open(fp) as f:
        state = json.load(f)
    return len(state["state"]) <= overcommitment_threshold


def is_undercommitted(fp, undercommitment_threshold):
    """A system is undercommitted if it has any undercommitment_threshold len chain of nodes with 0 or 1 children"""
    with open(fp) as f:
        state = json.load(f)

    nodes = {node["id"]: node for node in state["state"]}
    root = next(node for node in state["state"] if node["depth"] == 0)

    # DFS into each node, when reaching leaf mark as T/F if accumulated 1-child parents >= undercommitment_threshold
    # then every node's T/F value = any(children)
    # return root node's value
    def uc_search(node, chain):
        is_chain = len(node["children"]) <= 1
        if not node["children"]:
            return chain + 1 >= undercommitment_threshold

        is_uc = False
        for child_id in node["children"]:
            child = nodes[child_id]
            is_uc = is_uc or uc_search(child, chain + 1 if is_chain else 0)
        return is_uc

    return uc_search(root, 0)



In [6]:
import json


def count_system(fp, overcommitment_threshold=2, undercommitment_threshold=3):
    if not fp.exists():
        return
    # get all state paths in system
    state_paths = []
    for result in read_jsonl(fp / "results.jsonl"):
        state_paths.append(fp / Path(result["log_dir"]).stem / "state.json")

    n = len(state_paths)
    oc_count = 0
    uc_count = 0
    for state_path in state_paths:
        if is_overcommitted(state_path, overcommitment_threshold):
            oc_count += 1
        if is_undercommitted(state_path, undercommitment_threshold):
            uc_count += 1

    print(f"========== {fp} ==========")
    print(f"Overcommitment rate: {oc_count / n} ({oc_count} / {n})")
    print(f"Undercommitment rate: {uc_count / n} ({uc_count} / {n})")
    return oc_count, uc_count, n



In [7]:
fo_oc, fo_uc, fo_n = count_system(EXPERIMENTS / Path("fanoutqa/dev/trial2/full"))
tp_oc, tp_uc, tp_n = count_system(EXPERIMENTS / Path("travelplanner/validation/full"))
wa_oc, wa_uc, wa_n = count_system(EXPERIMENTS / Path("webarena/test/full"))

Overcommitment rate: 0.22653721682847897 (70 / 309)
Undercommitment rate: 0.11326860841423948 (35 / 309)
Overcommitment rate: 0.4111111111111111 (74 / 180)
Undercommitment rate: 0.005555555555555556 (1 / 180)
Overcommitment rate: 0.31343283582089554 (84 / 268)
Undercommitment rate: 0.44776119402985076 (120 / 268)


In [8]:
oc_total = fo_oc + tp_oc + wa_oc
uc_total = fo_uc + tp_uc + wa_uc
n_total = fo_n + tp_n + wa_n

print(f"Total overcommitment rate: {oc_total / n_total} ({oc_total} / {n_total})")
print(f"Total undercommitment rate: {uc_total / n_total} ({uc_total} / {n_total})")

Total overcommitment rate: 0.3011889035667107 (228 / 757)
Total undercommitment rate: 0.20607661822985468 (156 / 757)
