In [1]:
import sys
import os
sys.path.append(os.path.abspath(".."))

In [2]:
ds_filename = "./generated_nl_data/transport_p2nl.txt"
with open(ds_filename, 'r') as f:
    data = f.readlines()

def plan_to_actions(plan):
    actions = []
    steps = plan.split(",")
    for step in steps:
        step = step.strip("()")
        act_arg = step.split(" ")
        act_name = act_arg[0]
        args = act_arg[1:]
        actions.append({
            "act": act_name,
            "args": args
        })
    return actions
dataset = []
for line in data:
    l = line.split("&&")
    prob = l[0]
    plan = l[1]
    text = l[2]
    dataset.append({
        "prob": prob,
        "plan": plan_to_actions(plan),
        "text": text
    })

print(dataset)

[{'prob': 'p01.pddl', 'plan': [{'act': 'pick-up', 'args': ['v1?vehicle', 'l1?location', 'p1?package', 'c0?size', 'c1?size']}, {'act': 'drive', 'args': ['v1?vehicle', 'l1?location', 'l2?location']}, {'act': 'drop', 'args': ['v1?vehicle', 'l2?location', 'p1?package', 'c0?size', 'c1?size']}], 'text': 'Pick up the vehicle at location l1 with package p1 of size c0 and size c1, drive the vehicle from location l1 to location l2, and drop the vehicle at location l2 with package p1 of size c0 and size c1.\n'}, {'prob': 'p02.pddl', 'plan': [{'act': 'drive', 'args': ['v1?vehicle', 'l2?location', 'l1?location']}, {'act': 'pick-up', 'args': ['v1?vehicle', 'l1?location', 'p1?package', 'c0?size', 'c1?size']}, {'act': 'drive', 'args': ['v1?vehicle', 'l1?location', 'l2?location']}, {'act': 'drop', 'args': ['v1?vehicle', 'l2?location', 'p1?package', 'c0?size', 'c1?size']}], 'text': 'First, drive the vehicle from location l2 to location l1. Next, pick up the package p1 of size c0 at location l1. Then, dr

In [6]:
import re
import json
from llm import generate_prompt, generate_responses

def log(step, result):
    log_dir = './logs'
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    log_file_name = os.path.join(log_dir, 'transport_%s.jsonl' % step)
    with open(log_file_name, 'a', encoding='utf-8') as f:
        f.write(json.dumps(result) + '\n')

def solve( paragraph, ds_name=""):
    verb_args = get_verb_args(paragraph)
    verb_args_filtered = remove_non_eventive_verbs(paragraph, verb_args)
    return verb_args_filtered

def parse_json( string):
    try:
        m = re.search(r"```(?:json|jsonc)?\s*([\s\S]*?)\s*```", string, re.I)
        payload = m.group(1).strip() if m else string
        obj = json.loads(payload)
        return obj
    except json.JSONDecodeError as e:
        print("JSONDecodeError:", e)
        return None

def get_verb_args(paragraph):
    prompt = generate_prompt('verb_args', {'nl': paragraph})
    response = generate_responses("gpt-4o-mini", prompt, temperature=0, log=True)['content']
    obj = parse_json(response)
    log('verb_args', json.dumps(obj))
    return obj

def remove_non_eventive_verbs(paragraph, verb_args):
    prompt = generate_prompt('remove_non_eventive_verbs', {'nl': paragraph, 'verbs': json.dumps(verb_args)})
    response = generate_responses("gpt-4o-mini", prompt, temperature=0, log=True)['content']
    obj = parse_json(response)
    log('remove_non_eventive_verbs', json.dumps(obj))
    return obj

for item in dataset:
    result = solve(item['text'])
    item['pred'] = result

In [13]:
import spacy
nlp = spacy.load("en_core_web_sm")

precisions, recalls, f1s = [],[],[]

for item in dataset:
    plan = item['plan']
    pred = item['pred']
    acts = [f"{a['act']}({', '.join(a['args'])})" for a in plan]
    pred_acts = [f"{a['verb']}({', '.join(a['arguments'])})" for a in pred]
    print("Gold Actions: ", "; ".join(acts))
    print("Predicted Actions: ", "; ".join(pred_acts))
    print()


    common = 0
    pred_pointer  = 0
    for act in plan:
        act_name = act['act']
        act_name = act_name.replace("-", " ")
        doc1 = nlp(act_name)
        act_lemma = " ".join([token.lemma_.lower() for token in doc1])

        for pred_idx in range(pred_pointer, len(pred)):
            pred_act = pred[pred_idx]
            pred_name = pred_act['verb']
            doc2 = nlp(pred_name)
            pred_act_lemma = " ".join([token.lemma_.lower() for token in doc2])

            if act_lemma in pred_act_lemma or pred_act_lemma in act_lemma:
                common += 1
                pred_pointer = pred_idx + 1
                break
    tp = common
    fp = len(pred_acts) - common
    fn = len(acts) - common

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

    print(f"Precision: {precision}, Recall: {recall}, F1: {f1}")
    print("\n")
    precisions.append(precision)
    recalls.append(recall)
    f1s.append(f1)

avg_precision = sum(precisions) / len(precisions)
avg_recall = sum(recalls) / len(recalls)
avg_f1 = sum(f1s) / len(f1s)
print(f"Average Precision: {avg_precision}, Average Recall: {avg_recall}, Average F1: {avg_f1}")

        

        



Gold Actions:  pick-up(v1?vehicle, l1?location, p1?package, c0?size, c1?size); drive(v1?vehicle, l1?location, l2?location); drop(v1?vehicle, l2?location, p1?package, c0?size, c1?size)
Predicted Actions:  Pick up(the vehicle, location l1, package p1, size c0, size c1); drive(the vehicle, location l1, location l2); drop(the vehicle, location l2, package p1, size c0, size c1)

Precision: 1.0, Recall: 1.0, F1: 1.0


Gold Actions:  drive(v1?vehicle, l2?location, l1?location); pick-up(v1?vehicle, l1?location, p1?package, c0?size, c1?size); drive(v1?vehicle, l1?location, l2?location); drop(v1?vehicle, l2?location, p1?package, c0?size, c1?size)
Predicted Actions:  drive(the vehicle, location l2, location l1); pick up(the package p1, size c0, location l1); drive(the vehicle, location l1, location l2); drop(the package p1, size c0, location l2, the vehicle)

Precision: 1.0, Recall: 1.0, F1: 1.0


Gold Actions:  drive(v1?vehicle, l1?location, l2?location); drive(v1?vehicle, l2?location, l3?locati