# Logs variability experiments: tabu based

In [None]:
from exptools import *

from datetime import datetime, timezone
from pathlib import Path
from pprint import pprint
import subprocess


Parameters for the experiments

In [None]:
MAX_TRACES = [100, 500, 1000, 2000, 4000, 8000]
MAX_EVENTS = [20, 30, 40, 50, 60, 70]
NOISE = [0]
THRESHOLDS = [.3, .6]

SEED = b'\x81\x97u+'
DEBUG = False

MODEL_NAME = 'variability_model'

EXPORT_PREFIX = 'variability_tabu'
EXPORT_TAG = None

OUTPUT_DIR = 'output'


In [None]:
MODEL_PATH = Path(f"{MODEL_NAME}.decl")
EXPORT_TAG = EXPORT_TAG if EXPORT_TAG is not None else datetime.now(tz=timezone.utc).strftime(r'%Y-%m-%dT%H%M%SZ')

EXPORT_PATH = Path(OUTPUT_DIR, EXPORT_PREFIX + '_' + EXPORT_TAG)
EXPORT_PATH.mkdir(parents=True, exist_ok=False)

print(f'Results written on directory <{EXPORT_PATH.as_posix()}>')
try:
    print('Git describe: ' + subprocess.check_output(['git', 'describe', '--dirty'], text=True).strip())
except subprocess.CalledProcessError as e:
    print(f'Git info not available: {e.stderr}')

In [None]:
from Declare4Py.ProcessMiningTasks.LogGenerator.PositionalBased.PositionalBasedLogGeneratorNG import PBLogGeneratorBaseline, PBLogGeneratorRandom, PBLogGeneratorHamming, PBLogGeneratorLevenshtein, PBLogGeneratorOrig
from Declare4Py.ProcessMiningTasks.LogGenerator.PositionalBased.PositionalBasedModel import PositionalBasedModel


Define the set of experiments to run

In [None]:
EXPERIMENTS: list[dict] = []

for model_name, model, traces, events, noise, threshold, randomise in itertools.product([MODEL_NAME], [MODEL_PATH], MAX_TRACES, MAX_EVENTS, NOISE, THRESHOLDS, [False, True]):
    params = {
        'model': model_name,
        'traces': traces,
        'events': events,
        'noise': noise,
        'threshold': threshold,
        'randomise': randomise
    }
    exp_args = {
        'init': {
            'total_traces': traces,
            'min_event': events,
            'max_event': events,
            'process_model': model,
            'log': None,
            'verbose': DEBUG,
            'seed': SEED,
            'threshold': threshold,
            'randomise': randomise},
        'run': {
            'equal_rule_split': True,
            'high_variability': False,
            'generate_negatives_traces': False,
            'positive_noise_percentage': noise,
            'negative_noise_percentage': noise,
            'append_results': False}}
    exp_id = f'_{model_name}_{traces:04}_{events:03}_{noise:02}_{int(threshold * 100):02}_{randomise}'

    EXPERIMENTS.append(dict(
        id_='hamming' + exp_id,
        class_=PBLogGeneratorHamming,
        args=exp_args,
        model=model,
        parameters=params,
        description='Hamming distance threshold implemented in ASP'
    ))
    EXPERIMENTS.append(dict(
        id_='levenshtein' + exp_id,
        class_=PBLogGeneratorLevenshtein,
        args=exp_args,
        model=model,
        parameters=params,
        description='Levenshtein distance threshold implemented in ASP'
    ))


with EXPORT_PATH.joinpath('experiments.json').open('w') as fp:
    json.dump(EXPERIMENTS, fp, indent=2, default=lambda o: repr(o))

pprint(EXPERIMENTS)

In [None]:
RESULTS: list[dict] = []

for runner in (Experiment.new(**exp_d).runner() for exp_d in EXPERIMENTS):
    header = '-' * 5 + f' {runner.id} [{runner.experiment.class_.__name__}] '
    print(header + '-' * (72 - len(header)))
    with log_to_file(EXPORT_PATH.joinpath(f'{runner.id}.log.json'), level=logging.DEBUG):
        runner.run(seed=SEED)
    runner.generator.get_results_as_dataframe().to_csv(EXPORT_PATH.joinpath(f'{runner.id}.csv'))
    RESULTS.append(runner.stats(normalise=True, columns=['resource']))
    with EXPORT_PATH.joinpath(f'{runner.id}_results.json').open('w') as fp:
        json.dump(RESULTS[-1], fp)

with EXPORT_PATH.joinpath('results.json').open('w') as fp:
    json.dump(RESULTS, fp)

df = pd.json_normalize(RESULTS)
df.to_csv(EXPORT_PATH.joinpath('results.csv'))
df