In [33]:
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from extract import POLOCM2
from utils import read_json_file, GeneralTimeOut, set_timer_throw_exc, read_plan
from collections import defaultdict
from evaluator import ExecutabilityEvaluator
import numpy as np
import pandas as pd


In [34]:
INPUT_DIR = '../../output/p2'
OUTPUT_DIR = '../../output/p2_test'

In [35]:
def run_single_experiment(cplex_dir,cplex_threads, extraction_type, learning_obj, dod , test_data, invalid_test_suffixes, logger):
    """Runs a single experiment given the necessary parameters."""
    domain = learning_obj['domain']
    traces = learning_obj['traces']
    all_po_traces = learning_obj['po_traces']

    logger.info(f"Running {domain}-lo.{learning_obj['id']}-{dod} ...")

    extractions = {
        'p2': POLOCM2,
    }
    extraction = extractions[extraction_type]

    
    try:
        index_by_dod = int(dod* 10 -1)
        po_traces = all_po_traces[index_by_dod]
        actual_dod = sum([poat.flex for poat in po_traces]) / len(po_traces)
        runtime, accuracy_val, error_rate,acceptance_rate , invalid_acceptance_rate, remark = solve(
            cplex_dir,
            cplex_threads, 
            extraction,
            po_traces,
            traces,
            domain,
            test_data, 
            invalid_test_suffixes,
        )


    except GeneralTimeOut as t:
        runtime, accuracy_val, error_rate,acceptance_rate , invalid_acceptance_rate, remark = (600,0,0), 0,0,0, 0, f"Timeout"
    except Exception as e:
        runtime, accuracy_val, error_rate,acceptance_rate , invalid_acceptance_rate, remark = (0, 0, 0), 0, 0, 0, 0, e
        logger.error(f"Error during experiment for domain {domain}: {e}")

    polocm_time, locm2_time, locm_time = runtime
    logger.info(f"{domain}-lo.{learning_obj['id']}-{dod}  DONE")

    result_data = {
        'id': learning_obj['id'],
        'dod': dod,
        'actual_dod': actual_dod,
        'domain': domain,
        'index': learning_obj['index'],
        'total_length': learning_obj['total_length'],
        'len%': learning_obj['len%'],
        'runtime': sum(runtime),
        'polocm_time': polocm_time,
        'locm2_time': locm2_time,
        'locm_time': locm_time,
        'accuracy': accuracy_val,
        'error_rate': error_rate,
        'acceptance_rate': acceptance_rate,
        'invalid_acceptance_rate': invalid_acceptance_rate,  # Placeholder for invalid acceptance rate
        'remark': remark
    }
    write_result_to_csv(dod, extraction_type, result_data)
    return

def write_result_to_csv(dod, extraction_type, result_data):
    """Writes the result data to a CSV file in a thread-safe manner."""
    csv_file_path = os.path.join(OUTPUT_DIR, f"results_{dod}_{extraction_type}.csv")
    file_exists = os.path.exists(csv_file_path)
    with open(csv_file_path, 'a') as csv_file:
        if not file_exists:
            headers = result_data.keys()
            csv_file.write(','.join(headers) + '\n')

        values = [str(result_data[key]) for key in result_data.keys()]
        csv_file.write(','.join(values) + '\n')


@set_timer_throw_exc(num_seconds=600, exception=GeneralTimeOut, max_time=600)
def solve(cplex_dir,cplex_threads, extraction,po_traces ,traces, domain_name, test_data, invalid_test_suffixes):
    try: 
        remark = []
        extraction_method = extraction(cplex_dir, cplex_threads)
        
    
        model, TM , runtime = extraction_method.extract_model(po_traces)
    
        pddl_model = model.to_pddl_domain(domain_name)
        golden_TM = extraction_method.get_TM_list(traces)
    
        accuracy_val,error_rate, r = get_AP_accuracy(TM, golden_TM)
        if r:
            remark.append(r)
        acceptance_rate, invalida_acceptance_rate, r = get_acceptance_rate(pddl_model, test_data, invalid_test_suffixes)
        if r:
            remark.append(r)

        if len(remark)==0:
            remark = ['Success']
    except Exception as e:
        print(f"Error: {e}")
        extraction_method.terminate()
        return (0,0,0), 0,0, 0,0, e
    return runtime, accuracy_val, error_rate, acceptance_rate, invalida_acceptance_rate, " ".join(remark)


def get_AP_accuracy(TM, golden_TM):
    if (len(TM)==0):
        return 0,1, "AP Empty"
    if (len(TM) != len(golden_TM)):
        return 0,1, "AP Invalid Length"
    acc = []
    fpr = []

    def get_golden_TM(TM_cols):
        for golden_tm in golden_TM:
            if set(golden_tm.columns) == TM_cols:
                return golden_tm
        return None
    for sort, m1 in enumerate(TM):
        m1_cols = set(m1.columns)
        golden_tm = get_golden_TM(m1_cols)
        assert golden_tm is not None, f"Golden TM for sort {sort}: {m1_cols} not found in golden_TM"
        
        m1 = m1.reindex(index=golden_tm.index, columns=golden_tm.columns) 
        m1 = np.where(m1>0, 1, 0)
        
        l1 = m1.flatten()

        m2 = np.where(golden_tm>0, 1,0)
        l2 = m2.flatten()
      
        # print(f"sort{sort}-AP array [learned]: {l1}")
        # print(f"sort{sort}-AP array [ground ]: {l2}")
        acc.append(sum(l1==l2)/len(l1))
        fpr.append(np.sum((l2==0)& (l1==1))/len(l1)) # one side error rate
        # fp = np.sum((l2==0) & (l1==1))
        # tn = np.sum((l2==0) & (l1==0))
        # fpr.append(fp / (fp + tn) if (fp + tn) > 0 else 0)
        
    return sum(acc)/len(acc), sum(fpr)/len(fpr), None


def get_acceptance_rate(learned_domain, test_data, invalid_test_suffixes):
   
    try:
        evaluator = ExecutabilityEvaluator(learned_domain)
        valid_res = []
        invalid_res = []
        
        for problem, trace in test_data.items():
            valid_acceptance, invalid_acceptance = evaluator.get_acceptance_rate(trace, invalid_test_suffixes[problem])
            valid_res.append(valid_acceptance)
            # Only considert invalid acceptance if the valid seq is accepted
            if valid_acceptance == 1:
                invalid_res.append(invalid_acceptance)
        if len(valid_res) == 0:
            valid = 0
        else:
            valid = sum(valid_res) / len(valid_res)
        if len(invalid_res) == 0:
            invalid = 0
        else:
            invalid = sum(invalid_res) / len(invalid_res)
    except Exception as e:
        return 0,0, "Error in acceptance rate calculation" + str(e)
    return valid, invalid, None

def read_files(input_filepath, test_filepath, invalid_suffixes_filepath):
    
    
    TRAIN_DATA  = read_json_file(input_filepath)

    plain_traces = defaultdict(lambda: defaultdict())
    with open(test_filepath, 'r') as f:
        lines = f.readlines()
        for line in lines:
            details = line.split("&&")

            domain_name = details[0]
            problem_name = details[2]
            plan = details[-1]

            plain_traces[domain_name][problem_name]= read_plan(plan)
    TEST_DATA = plain_traces

    invalid_suffixes = defaultdict(lambda: defaultdict(list))

    with open(invalid_suffixes_filepath, "r") as f:
        lines = f.readlines()
        for line in lines:
            details = line.split("&&")
            domain_name = details[0]
            problem_name = details[1]
            plan = details[2]
            
            invalid_suffixes[domain_name][problem_name].append(read_plan(plan)[0])
    INVALID_TEST_SUFFIXES = invalid_suffixes
    return TRAIN_DATA, TEST_DATA, INVALID_TEST_SUFFIXES

In [36]:
expected_ids = set(range(1,1335))
for result_file in os.listdir(INPUT_DIR):
    df = pd.read_csv(os.path.join(INPUT_DIR, result_file))
    existing_ids = set(df['id'])
    missing_ids = sorted(expected_ids - existing_ids)
    print(result_file)
    print(f"Missing IDs: {missing_ids}")

results_0.1_p2.csv
Missing IDs: []
results_0.2_p2.csv
Missing IDs: []
results_0.3_p2.csv
Missing IDs: []
results_0.4_p2.csv
Missing IDs: []
results_0.5_p2.csv
Missing IDs: []
results_0.6_p2.csv
Missing IDs: []
results_0.7_p2.csv
Missing IDs: []
results_0.8_p2.csv
Missing IDs: []
results_0.9_p2.csv
Missing IDs: [884]
results_1.0_p2.csv
Missing IDs: []


In [37]:
# with open("../../data/plain_traces/plain_traces.txt", "r") as f:
#     lines = f.readlines()
#     for line in lines:
#         details = line.split("&&")
#         name = f"{details[0]}-{details[2]}-{details[3]}"
      
#         plan = details[-1]
#         trace = read_plan(plan)
#         po_trace = trace.to_partial_ordered_trace(0.3)
#         test_exe(name, po_trace)