In [None]:
from exptools import *

In [None]:
from Declare4Py.ProcessMiningTasks.LogGenerator.PositionalBased.PositionalBasedLogGeneratorNG import PBLogGeneratorHamming, PBLogGeneratorRandom, PBLogGeneratorOrig, PBLogGeneratorLevenshtein


In [None]:
from datetime import datetime, timezone
from pathlib import Path


SEED = b'\x81\x97u+'

MODEL_NAME = 'caise2025_experimental_model'
MODEL_PATH = Path(f"{MODEL_NAME}.decl")

EXPORT_PREFIX = 'tests_'
EXPORT_PATH = Path('output', EXPORT_PREFIX + datetime.now(timezone.utc).isoformat(timespec='minutes'))
EXPORT_PATH.mkdir(parents=True, exist_ok=False)

In [None]:
traces = 10
events = 20
noise = 0
model = PositionalBasedModel().parse_from_file(MODEL_PATH.as_posix())

exp = Experiment(
        id_= 'random',
        class_=PBLogGeneratorRandom,
        model=model,
        parameters={
            'traces': traces,
            'events': events,
        },
        args={
            'init': {
                'total_traces': traces,
                'min_event': events,
                'max_event': events,
                'process_model': model,
                'log': None,
                'verbose': True,
                'seed': SEED},
            'run': {
                'equal_rule_split': True,
                'high_variability': False,
                'generate_negatives_traces': False,
                'positive_noise_percentage': noise,
                'negative_noise_percentage': noise,
                'append_results': False}}
                )
g: PBLogGeneratorOrig = exp.new_generator()

with log_to_file(EXPORT_PATH.joinpath(f'{exp.id_}.log'), level=logging.DEBUG):
    exp.run_generator(g)

exp.get_results(g)

In [None]:
traces = 10
events = 20
noise = 0
model = PositionalBasedModel().parse_from_file(MODEL_PATH.as_posix())

exp = Experiment(
        id_= 'levenshtein',
        class_=PBLogGeneratorLevenshtein,
        model=model,
        parameters={
            'traces': traces,
            'events': events,
        },
        args={
            'init': {
                'total_traces': traces,
                'min_event': events,
                'max_event': events,
                'process_model': model,
                'log': None,
                'verbose': True,
                'seed': SEED,
                'threshold': .3,
                'randomise': True},
            'run': {
                'equal_rule_split': True,
                'high_variability': False,
                'generate_negatives_traces': False,
                'positive_noise_percentage': noise,
                'negative_noise_percentage': noise,
                'append_results': False}}
                )
g: PBLogGeneratorOrig = exp.new_generator()

with log_to_file(EXPORT_PATH.joinpath(f'{exp.id_}.log'), level=logging.DEBUG):
    exp.run_generator(g)

exp.get_results(g)

In [None]:
traces = 10
events = 20
noise = 0
model = PositionalBasedModel().parse_from_file(MODEL_PATH.as_posix())

exp = Experiment(
        id_= 'hamming',
        class_=PBLogGeneratorHamming,
        model=model,
        parameters={
            'traces': traces,
            'events': events,
        },
        args={
            'init': {
                'total_traces': traces,
                'min_event': events,
                'max_event': events,
                'process_model': model,
                'log': None,
                'verbose': True,
                'seed': SEED,
                'threshold': .3,
                'randomise': True},
            'run': {
                'equal_rule_split': True,
                'high_variability': False,
                'generate_negatives_traces': False,
                'positive_noise_percentage': noise,
                'negative_noise_percentage': noise,
                'append_results': False}}
                )
g: PBLogGeneratorOrig = exp.new_generator()

with log_to_file(EXPORT_PATH.joinpath(f'{exp.id_}.log'), level=logging.DEBUG):
    exp.run_generator(g)

exp.get_results(g)

## Testing reproducibility

In [None]:

traces = 10
events = 20
noise = 0
model = PositionalBasedModel().parse_from_file(MODEL_PATH.as_posix())

REPRODUCIBILITY_EXP = {
    'orig': Experiment(
        id_= 'orig',
        class_=PositionalBasedLogGenerator,
        model=model,
        args={
            'init': {
                'total_traces': traces,
                'min_events': events,
                'max_events': events,
                'pb_model': model,
                'verbose': False},
            'run': {
                'equal_rule_split': True,
                'high_variability': False,
                'generate_negatives_traces': False,
                'positive_noise_percentage': noise,
                'negative_noise_percentage': noise,
                'append_results': False,}}
                ),
    'redo': Experiment(
        id_= 'orig',
        class_=PBLogGeneratorOrig,
        model=model,
        args={
            'init': {
                'total_traces': traces,
                'min_event': events,
                'max_event': events,
                'process_model': model,
                'verbose': False,
                'seed': SEED},
            'run': {
                'equal_rule_split': True,
                'high_variability': False,
                'generate_negatives_traces': False,
                'positive_noise_percentage': noise,
                'negative_noise_percentage': noise,
                'append_results': False,}}
                ),
    'random': Experiment(
        id_= 'random',
        class_=PBLogGeneratorRandom,
        model=model,
        args={
            'init': {
                'total_traces': traces,
                'min_event': events,
                'max_event': events,
                'process_model': model,
                'log': None,
                'verbose': False,
                'seed': SEED},
            'run': {
                'equal_rule_split': True,
                'high_variability': False,
                'generate_negatives_traces': False,
                'positive_noise_percentage': noise,
                'negative_noise_percentage': noise,
                'append_results': False}}
                ),
}

REPRODUCIBILITY_EXP

In [None]:
with log_to_file(EXPORT_PATH.joinpath(f'{EXPORT_PREFIX}.log')):
    for exp, seed in itertools.product(REPRODUCIBILITY_EXP.values(), (None, SEED)):
        header = '-' * 5 + f' checking reproducibility of {exp.id_} with seed={seed} '
        print(header + '-' * (72 - len(header)))
        diff = exp.check_reproducibility(seed=seed)
        print(diff)


### Compare results between different implementations

In [None]:
exp1 = REPRODUCIBILITY_EXP['orig']
exp2 = REPRODUCIBILITY_EXP['redo']
g1 = exp1.new_generator()
g2 = exp2.new_generator()
with log_to_file(EXPORT_PATH.joinpath(f'{EXPORT_PREFIX}.log')):
    with random_seed(SEED):
        %time exp1.run_generator(g1)
    with random_seed(SEED):
        %time exp2.run_generator(g2)

compare_results(g1, g2, only=['concept:name:order', 'concept:name'])

In [None]:
pd.json_normalize([exp1.get_results(g1, normalise=True, columns=['resource']), exp2.get_results(g2, normalise=True, columns=['resource'])])