In [1]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from extract import LOCM2,LOCM
from utils import read_plan, read_json_file
from evaluator import ExecutabilityEvaluator
from collections import defaultdict
import json

In [2]:

def test_locm2_acceptance_rate(domain_name,train_traces, test_traces, invalid_suffixes):
    debug = {}
    try:
        locm2 = LOCM2(state_param=True, viz=False, debug=debug)
        model = locm2.extract_model(train_traces)
        learned_domain = model.to_pddl_domain(domain_name)
        gt_filename = os.path.join('../../data', 'goose-benchmarks', 'tasks', domain_name, 'domain.pddl')
        
        evaluator = ExecutabilityEvaluator(learned_domain, gt_filename, debug=False)
        valid_res = []
        invalid_res = []
        for problem, trace in test_traces.items():
            valid_acceptance, invalid_acceptance = evaluator.get_acceptance_rate(trace, invalid_suffixes[problem])
            valid_res.append(valid_acceptance)
            invalid_res.append(invalid_acceptance)
        if len(valid_res) == 0:
            valid = 0
        else:
            valid = sum(valid_res) / len(valid_res)
        if len(invalid_res) == 0:
            invalid = 0
        else:
            invalid = sum(invalid_res) / len(invalid_res)
        return valid, invalid
    except Exception as e:
        print(f"Error processing domain {domain_name}: {e}")
        return 0,1
    

def test_locm_acceptance_rate(domain_name,train_traces, test_traces, invalid_suffixes):
    debug = {}
    try:
        locm = LOCM(state_param=True, viz=False, debug=debug)
        model = locm.extract_model(train_traces)
        learned_domain = model.to_pddl_domain(domain_name)
        gt_filename = os.path.join('../../data', 'goose-benchmarks', 'tasks', domain_name, 'domain.pddl')
        
        evaluator = ExecutabilityEvaluator(learned_domain, gt_filename, debug=False)
        valid_res = []
        invalid_res = []
        for problem, trace in test_traces.items():
            valid_acceptance, invalid_acceptance = evaluator.get_acceptance_rate(trace, invalid_suffixes[problem])
            valid_res.append(valid_acceptance)
            invalid_res.append(invalid_acceptance)
        if len(valid_res) == 0:
            valid = 0
        else:
            valid = sum(valid_res) / len(valid_res)
        if len(invalid_res) == 0:
            invalid = 0
        else:
            invalid = sum(invalid_res) / len(invalid_res)
        return valid, invalid
    except Exception as e:
        print(f"Error processing domain {domain_name}: {e}")
        return 0,1

In [3]:
plain_traces = defaultdict(lambda: defaultdict())
with open("../../data/plain_traces/plain_traces.txt", "r") as f:
    lines = f.readlines()
    for line in lines:
        details = line.split("&&")

        domain_name = details[0]
        problem_name = details[2]
        plan = details[-1]

        plain_traces[domain_name][problem_name]= read_plan(plan)
print(plain_traces['blocksworld']['p01.pddl'])

<traces.trace.Trace object at 0x00000238824356A0>


In [4]:
train_traces = defaultdict(list)
data = read_json_file("../../data/training_data/traces_plan_po_r10.json")
for learning_obj in data:
    domain_name = learning_obj["domain"]
    train_traces[domain_name].append(learning_obj)
    


In [5]:
invalid_suffixes = defaultdict(lambda: defaultdict(list))

with open("../../data/plain_traces/invalid_suffixes.txt", "r") as f:
    lines = f.readlines()
    for line in lines:
        details = line.split("&&")
        domain_name = details[0]
        problem_name = details[1]
        plan = details[2]
        
        invalid_suffixes[domain_name][problem_name].append(read_plan(plan)[0])
print(invalid_suffixes['blocksworld']['p01.pddl'])

[<traces.step.Step object at 0x00000238D40E1430>, <traces.step.Step object at 0x00000238D40E14F0>, <traces.step.Step object at 0x00000238D40E16D0>, <traces.step.Step object at 0x00000238D40E1790>, <traces.step.Step object at 0x00000238D40E1970>, <traces.step.Step object at 0x00000238D40E1A30>, <traces.step.Step object at 0x00000238D40E1B50>]


In [None]:
with open("./acceptance_rate.csv", "w") as f:
    f.write("ID, Domain, len, len%, acceptance_rate, invalid_acceptance_rate\n")
    for domain, items in train_traces.items():
        print(f"Testing domain: {domain}")
        for item in items:
            traces = item["traces"]
            problem_name
            try:
                rate, invalid_rate = test_locm2_acceptance_rate(domain, traces, plain_traces[domain], invalid_suffixes[domain])
            except Exception as e:
                print(f"Error in balanced executability for domain {domain}: {e}")
                continue

        # Test balanced executability
            f.write(f"{item['id']}, {domain}, {item['total_length']}, {item['len%']}, {rate}, {invalid_rate}\n")
            f.flush()
            print(f"Balanced Executability for {domain}: {(rate, invalid_rate)}")

Testing domain: blocksworld
Balanced Executability for blocksworld: (0.030303030303030304, 0.00404040404040404)
Balanced Executability for blocksworld: (0.030303030303030304, 0.00404040404040404)
Balanced Executability for blocksworld: (0.030303030303030304, 0.00404040404040404)
Balanced Executability for blocksworld: (0.030303030303030304, 0.00404040404040404)
Balanced Executability for blocksworld: (0.030303030303030304, 0.00404040404040404)
Balanced Executability for blocksworld: (0.030303030303030304, 0.00404040404040404)
Balanced Executability for blocksworld: (0.030303030303030304, 0.00404040404040404)
Balanced Executability for blocksworld: (1.0, 0.04888032881392957)
Balanced Executability for blocksworld: (0.030303030303030304, 0.00404040404040404)
Balanced Executability for blocksworld: (0.030303030303030304, 0.00404040404040404)
Balanced Executability for blocksworld: (0.030303030303030304, 0.00404040404040404)
Balanced Executability for blocksworld: (0.030303030303030304, 0.

In [7]:
with open("./locm_acceptance_rate.csv", "w") as f:
    f.write("ID, Domain, len, len%, valid_acceptance, invalid_valid_acceptance\n")
    for domain, items in train_traces.items():
        print(f"Testing domain: {domain}")
        for item in items:
            traces = item["traces"]
            problem_name
            try:
                rate, invalid_rate = test_locm_acceptance_rate(domain, traces, plain_traces[domain], invalid_suffixes[domain])
            except Exception as e:
                print(f"Error in balanced executability for domain {domain}: {e}")
                continue

        # Test balanced executability
            f.write(f"{item['id']}, {domain}, {item['total_length']}, {item['len%']}, {rate}, {invalid_rate}\n")
            f.flush()
            print(f"Balanced Executability for {domain}: {(rate, invalid_rate)}")

Testing domain: blocksworld
Balanced Executability for blocksworld: (0.030303030303030304, 0.00404040404040404)
Balanced Executability for blocksworld: (0.030303030303030304, 0.00404040404040404)
Balanced Executability for blocksworld: (0.030303030303030304, 0.00404040404040404)
Balanced Executability for blocksworld: (0.030303030303030304, 0.00404040404040404)
Balanced Executability for blocksworld: (0.030303030303030304, 0.00404040404040404)
Balanced Executability for blocksworld: (0.030303030303030304, 0.00404040404040404)
Balanced Executability for blocksworld: (0.030303030303030304, 0.00404040404040404)
Balanced Executability for blocksworld: (1.0, 0.06203068561298607)
Balanced Executability for blocksworld: (0.030303030303030304, 0.00404040404040404)
Balanced Executability for blocksworld: (0.030303030303030304, 0.00404040404040404)
Balanced Executability for blocksworld: (0.030303030303030304, 0.00404040404040404)
Balanced Executability for blocksworld: (0.030303030303030304, 0.