# CAiSE 2025 experiments

In [None]:
from exptools import *

Parameters for the experiments

In [None]:
from datetime import datetime, timezone
from pathlib import Path


MAX_TRACES = [100, 500, 1000, 2000, 4000, 8000]
MAX_EVENTS = [20, 30, 40, 50, 60, 70]
NOISE = [0]
THRESHOLDS = [.3, .6]

SEED = b'\x81\x97u+'

MODEL_NAME = 'experimental_model_ivan 2'
MODEL_PATH = Path(f"{MODEL_NAME}.decl")

EXPORT_PREFIX = 'caise2025_'
EXPORT_PATH = Path('output', EXPORT_PREFIX + datetime.now(timezone.utc).isoformat(timespec='minutes'))
EXPORT_PATH.mkdir(parents=True, exist_ok=False)

print(f'Results written on directory <{EXPORT_PATH.as_posix()}>')

In [None]:
from Declare4Py.ProcessMiningTasks.LogGenerator.PositionalBased.PositionalBasedLogGeneratorNG import PBLogGeneratorBaseline, PBLogGeneratorRandom, PBLogGeneratorHamming, PBLogGeneratorLevenshtein, PBLogGeneratorOrig
from Declare4Py.ProcessMiningTasks.LogGenerator.PositionalBased.PositionalBasedModel import PositionalBasedModel


Define the set of experiments to run

In [None]:
MODEL = PositionalBasedModel().parse_from_file(MODEL_PATH.as_posix())
EXPERIMENTS: dict[str, Experiment] = {}

for model_name, model, traces, events, noise in itertools.product([MODEL_NAME], [MODEL], MAX_TRACES, MAX_EVENTS, NOISE):
    params = {
        'model': model_name,
        'traces': traces,
        'events': events,
        'noise': noise
    }
    exp_args = {
        'init': {
            'total_traces': traces,
            'min_event': events,
            'max_event': events,
            'process_model': model,
            'log': None,
            'verbose': False,
            'seed': SEED},
        'run': {
            'equal_rule_split': True,
            'high_variability': False,
            'generate_negatives_traces': False,
            'positive_noise_percentage': noise,
            'negative_noise_percentage': noise,
            'append_results': False}}
    exp_id = f'_{model_name}_{traces:04}_{events:03}_{noise:02}'

    EXPERIMENTS['old' + exp_id] = Experiment(
        id_='old' + exp_id,
        class_=PBLogGeneratorOrig,
        args=exp_args,
        model=model,
        parameters=params,
        description='Rewritten original code'
    )
    EXPERIMENTS['baseline' + exp_id] = Experiment(
        id_='baseline' + exp_id,
        class_=PBLogGeneratorBaseline,
        args=exp_args,
        model=model,
        parameters=params,
        description='No attempt to introduce variability in the generated logs'
    )
    EXPERIMENTS['random' + exp_id] = Experiment(
        id_='random' + exp_id,
        class_=PBLogGeneratorRandom,
        args=exp_args,
        model=model,
        parameters=params,
        description='Uses clingo randomisation to generate different models'
    )

for model_name, model, traces, events, noise, threshold, randomise in itertools.product([MODEL_NAME], [MODEL], MAX_TRACES, MAX_EVENTS, NOISE, THRESHOLDS, [False, True]):
    params = {
        'model': model_name,
        'traces': traces,
        'events': events,
        'noise': noise,
        'threshold': threshold,
        'randomise': randomise
    }
    exp_args = {
        'init': {
            'total_traces': traces,
            'min_event': events,
            'max_event': events,
            'process_model': model,
            'log': None,
            'verbose': False,
            'seed': SEED,
            'threshold': threshold,
            'randomise': randomise},
        'run': {
            'equal_rule_split': True,
            'high_variability': False,
            'generate_negatives_traces': False,
            'positive_noise_percentage': noise,
            'negative_noise_percentage': noise,
            'append_results': False}}
    exp_id = f'_{model_name}_{traces:04}_{events:03}_{noise:02}_{int(threshold * 100):02}_{randomise}'

    EXPERIMENTS['hamming' + exp_id] = Experiment(
        id_='hamming' + exp_id,
        class_=PBLogGeneratorHamming,
        args=exp_args,
        model=model,
        parameters=params,
        description='Hamming distance threshold implemented in ASP'
    )
    EXPERIMENTS['levenshtein' + exp_id] = Experiment(
        id_='levenshtein' + exp_id,
        class_=PBLogGeneratorLevenshtein,
        args=exp_args,
        model=model,
        parameters=params,
        description='Levenshtein distance threshold implemented in ASP'
    )


with EXPORT_PATH.joinpath('experiments.json').open('w') as fp:
    experiments_dump(EXPERIMENTS, fp, indent=2)

EXPERIMENTS

In [None]:
GENERATORS = {eid: e.new_generator() for eid, e in EXPERIMENTS.items()}

In [None]:
from IPython import get_ipython
ipython = get_ipython()

for eid, gen, exp in ((k, GENERATORS[k], EXPERIMENTS[k]) for k in EXPERIMENTS.keys()):
    header = '-' * 5 + f' {eid} [{type(gen).__name__}] '
    print(header + '-' * (72 - len(header)))
    with log_to_file(EXPORT_PATH.joinpath(f'{eid}.log'), level=logging.DEBUG):
        %time exp.run_generator(gen)
    gen.get_results_as_dataframe().to_csv(EXPORT_PATH.joinpath(f'{eid}.csv'))


## Evaluate the variability

In [None]:
results = [exp.get_results(GENERATORS[eid], normalise=True, columns=['resource']) for eid, exp in EXPERIMENTS.items()]

with EXPORT_PATH.joinpath('results.json').open('w') as fp:
    json.dump(results, fp)

df = pd.json_normalize(results)

df.to_csv(EXPORT_PATH.joinpath('results.csv'))

df
