# Tests for experiments code

In [None]:
from exptools import *

from datetime import datetime, timezone
from pathlib import Path
from pprint import pprint
import subprocess

import pandas as pd

Parameters for the experiments

In [None]:
MAX_TRACES = [10]
MAX_EVENTS = [20]
NOISE = [0]
THRESHOLDS = [.3]

SEED = b'\x81\x97u+'
DEBUG = True

MODEL_NAME = 'variability_model'

EXPORT_PREFIX = 'tests'
EXPORT_TAG = None

OUTPUT_DIR = 'output'


In [None]:
MODEL_PATH = Path(f"{MODEL_NAME}.decl")
EXPORT_TAG = EXPORT_TAG if EXPORT_TAG is not None else datetime.now(tz=timezone.utc).strftime(r'%Y-%m-%dT%H%M%SZ')

EXPORT_PATH = Path(OUTPUT_DIR, EXPORT_PREFIX + '_' + EXPORT_TAG)
EXPORT_PATH.mkdir(parents=True, exist_ok=False)

print(f'Results written on directory <{EXPORT_PATH.as_posix()}>')
try:
    print('Git describe: ' + subprocess.check_output(['git', 'describe', '--dirty'], text=True).strip())
except subprocess.CalledProcessError as e:
    print(f'Git info not available: {e.stderr}')

In [None]:
from Declare4Py.ProcessMiningTasks.LogGenerator.PositionalBased.PositionalBasedLogGeneratorNG import PBLogGeneratorHamming, PBLogGeneratorRandom, PBLogGeneratorOrig, PBLogGeneratorLevenshtein


## Testing experiments definition

In [None]:
for id_, class_, model, traces, events, noise, seed in itertools.product(
    ['random'],
    [PBLogGeneratorRandom, 'Declare4Py.ProcessMiningTasks.LogGenerator.PositionalBased.PositionalBasedLogGeneratorNG.PBLogGeneratorRandom'],
    [MODEL_PATH, PositionalBasedModel().parse_from_file(MODEL_PATH.as_posix()), MODEL_PATH.read_text()],
    MAX_TRACES, MAX_EVENTS, NOISE, [SEED]
):
    params = {'traces': traces, 'events': events, 'seed': seed}
    args = {'init': {
                'total_traces': traces,
                'min_event': events,
                'max_event': events,
                'process_model': None,
                'log': None,
                'verbose': DEBUG,
                'seed': seed},
            'run': {
                'equal_rule_split': True,
                'high_variability': False,
                'generate_negatives_traces': False,
                'positive_noise_percentage': noise,
                'negative_noise_percentage': noise,
                'append_results': False}}
    desc = 'Random generation of models'
    print('-' * 5 + f' {id_}, {class_}, {type(model).__name__}, {traces}, {events}, {noise}, {seed}')
    pprint({
        'id_': id_,
        'class_': class_,
        'model': model,
        'parameters': params,
        'args': args,
        'description': desc
    })
    exp = Experiment.new(id_=id_, class_=class_, model=model, parameters=params, args=args, description=desc)
    pprint(exp)
    pprint(exp.runner())

## Testing generators

In [None]:
for id_, class_, model, traces, events, noise, seed in [
    ('random', PBLogGeneratorRandom, MODEL_PATH, 10, 20, 0, SEED),
    ('orig', PBLogGeneratorOrig, MODEL_PATH, 10, 20, 0, SEED),
]:
    exp_def = {
        'id_': id_,
        'class_': class_,
        'model': model,
        'parameters': {'traces': traces, 'events': events, 'noise': noise, 'seed': seed},
        'args': {'init': {
                    'total_traces': traces,
                    'min_event': events,
                    'max_event': events,
                    'verbose': DEBUG,
                    'seed': seed},
                'run': {'equal_rule_split': True,
                        'high_variability': False,
                        'generate_negatives_traces': False,
                        'positive_noise_percentage': noise,
                        'negative_noise_percentage': noise,
                        'append_results': False}},
        'description': 'Random generation of models'}
    print('-' * 5 + f' {id_}, {class_}, {type(model).__name__}, {traces}, {events}, {noise}, {seed}')
    runner = Experiment.new(**exp_def).runner()
    with log_to_file(EXPORT_PATH.joinpath(f'{runner.id}.log.json'), level=logging.DEBUG):
        %time runner.run(seed=seed)
    pprint(runner.stats())


In [None]:
for id_, class_, model, traces, events, noise, seed, threshold, randomise in [
    ('levenshtein', PBLogGeneratorLevenshtein, MODEL_PATH, 10, 20, 0, SEED, .3, False),
    ('hamming', PBLogGeneratorHamming, MODEL_PATH, 10, 20, 0, SEED, .3, False),
]:
    exp_def = {
        'id_': id_,
        'class_': class_,
        'model': model,
        'parameters': {'traces': traces, 'events': events, 'noise': noise, 'seed': seed},
        'args': {'init': {
                    'total_traces': traces,
                    'min_event': events,
                    'max_event': events,
                    'verbose': DEBUG,
                    'seed': seed,
                    'threshold': threshold,
                    'randomise': randomise},
                'run': {'equal_rule_split': True,
                        'high_variability': False,
                        'generate_negatives_traces': False,
                        'positive_noise_percentage': noise,
                        'negative_noise_percentage': noise,
                        'append_results': False}},
        'description': 'Random generation of models'}
    print('-' * 5 + f' {id_}, {class_}, {type(model).__name__}, {traces}, {events}, {noise}, {seed}')
    runner = Experiment.new(**exp_def).runner()
    with log_to_file(EXPORT_PATH.joinpath(f'{runner.id}.log.json'), level=logging.DEBUG):
        %time runner.run(seed=seed)
    pprint(runner.stats())


## Testing reproducibility

In [None]:
traces = 10
events = 20
noise = 0
seed = SEED
model = MODEL_PATH

REPRODUCIBILITY_EXP = { exp['id_']: exp for exp in [
    {
        'id_':  'orig',
        'class_': PositionalBasedLogGenerator,
        'model': model,
        'parameters': {'traces': traces, 'events': events, 'noise': noise, 'seed': seed},
        'description': '',
        'args': {
            'init': {
                'total_traces': traces,
                'min_events': events,
                'max_events': events,
                'verbose': DEBUG},
            'run': {
                'equal_rule_split': True,
                'high_variability': False,
                'generate_negatives_traces': False,
                'positive_noise_percentage': noise,
                'negative_noise_percentage': noise,
                'append_results': False,}}
                },
    {
        'id_':  'redo',
        'class_': PBLogGeneratorOrig,
        'model': model,
        'parameters': {'traces': traces, 'events': events, 'noise': noise, 'seed': seed},
        'description': '',
        'args': {
            'init': {
                'total_traces': traces,
                'min_event': events,
                'max_event': events,
                'verbose': DEBUG,
                'seed': seed},
            'run': {
                'equal_rule_split': True,
                'high_variability': False,
                'generate_negatives_traces': False,
                'positive_noise_percentage': noise,
                'negative_noise_percentage': noise,
                'append_results': False,}}
                },
    {
        'id_':  'random',
        'class_': PBLogGeneratorRandom,
        'model': model,
        'parameters': {'traces': traces, 'events': events, 'noise': noise, 'seed': seed},
        'description': '',
        'args': {
            'init': {
                'total_traces': traces,
                'min_event': events,
                'max_event': events,
                'log': None,
                'verbose': True,
                'seed': seed},
            'run': {
                'equal_rule_split': True,
                'high_variability': False,
                'generate_negatives_traces': False,
                'positive_noise_percentage': noise,
                'negative_noise_percentage': noise,
                'append_results': False}}
                },
]
}

pprint(REPRODUCIBILITY_EXP)

In [None]:
with log_to_file(EXPORT_PATH.joinpath(f'{EXPORT_PREFIX}_reprod.log.json')):
    for exp, seed in itertools.product((Experiment.new(**exp_def) for exp_def in REPRODUCIBILITY_EXP.values()), (None, SEED)):
        header = '-' * 5 + f' checking reproducibility of {exp.id_} with seed={seed} '
        print(header + '-' * (72 - len(header)))
        diff = exp.check_reproducibility(seed=seed)
        print(diff)


### Compare results between different implementations

In [None]:
from IPython.display import display

r1, r2 = tuple(Experiment.new(**REPRODUCIBILITY_EXP[i]).runner() for i in ('orig', 'redo'))
with log_to_file(EXPORT_PATH.joinpath(f'{EXPORT_PREFIX}_diff.log.json')):
    for r in (r1, r2):
        print('-' * 5 + f' {r.id}')
        %time r.run(seed=SEED)

display(compare_results(r1.generator, r2.generator, only=['concept:name:order', 'concept:name']))
display(pd.json_normalize([r.stats( normalise=True, columns=['resource']) for r in (r1, r2)]))