# **Quantitative evaluation script for discovery algorithms and conformance checking methods using four different dimensions for log-model comparison**

The following script evaluates secutiry-relevant datasets using a cross-validation approach. In practice, manual splitting by certain criteria might be more valuable, but the goal of the evaluation is discussion from the quantitative perspective, observing differences between algorithms an conformance checking methods accross different dimenstion.

The applied method of k-fold cross validation for process mining is in line with the framework in Rozinat, Anne & Medeiros, A & Günther, C & Weijters, A. & Aalst, Wil. (2007). Towards an evaluation framework for process mining algorithms. Reactivity of Solids. 

Please note:these datasets are meant to support quantitative evaluation only, for case studies, different data sets are offered in the repository.

First let's install the dependencies that we will require

In [None]:
# Install PM4Py
!pip install pm4py

In [None]:
# Import XES log converting functionality from PM4Py
from pm4py.objects.conversion.log import converter as xes_converter
# Import XES log importing functionality from PM4Py
from pm4py.objects.log.importer.xes import importer as xes_importer
from pm4py.algo.filtering.pandas.cases import case_filter

# Import discovery algorithms from PM4Py
from pm4py.algo.discovery.alpha import algorithm as alpha_miner
from pm4py.algo.discovery.inductive import algorithm as inductive_miner
from pm4py.algo.discovery.heuristics import algorithm as heuristics_miner
from pm4py.algo.discovery.inductive.variants.im_clean.algorithm import Parameters
from pm4py.objects.conversion.process_tree import converter

# Import conformance checking algorithms from PM4Py
from pm4py.algo.conformance.tokenreplay import algorithm as token_replay
from pm4py.algo.conformance.alignments.petri_net import algorithm as alignments

# Import quality evaluation algorithms from PM4py
from pm4py.algo.evaluation.precision import algorithm as precision_evaluator
from pm4py.algo.evaluation.generalization import algorithm as generalization_evaluator
from pm4py.algo.evaluation.replay_fitness import algorithm as replay_fitness_evaluator
from pm4py.algo.evaluation.simplicity import algorithm as simplicity_evaluator


# Import helper functionalities
from pm4py.objects.log.obj import EventLog
from pm4py.util.xes_constants import KEY_NAME
from typing import Tuple
import random
import math
import pandas as pd
import pm4py



Next, load the datasets tha will be used for testing

In [None]:
# Load datasets, please adjust path to each file, depeding on where you run the notebook, Dataset can be find in the GitHUb of the submission, or sourced from their respective sources
datasets = {}

# Dataset 1: Log of Volvo IT incident management system Parent item: BPI Challenge 2013 Logs of Volvo IT incident and problem management
# Source of helpdesk dataset: BPI Challenge 2013, https://data.4tu.nl/articles/dataset/BPI_Challenge_2013_incidents/12693914/1
#  http://www.win.tue.nl/bpi/2013/challenge


datasets['bpi_challenge_2013_incidents'] = xes_importer.apply('bpi_challenge_2013_incidents.xes')

# Source of 
# Dataset 2: Log of Volvo IT problem management system
# Source of helpdesk dataset: BPI Challenge 2013, https://www.win.tue.nl/bpi/doku.php?id=2013:challenge&redirect=1id=2013/challenge
# Reference: http://www.win.tue.nl/bpi/2013/challenge
datasets['BPI_Challenge_2013_closed_problems'] = xes_importer.apply('dBPI_Challenge_2013_closed_problems.xes')


# Dataset 3:  Dutch Financial Institute log is an application process for a personal loan or overdraft
# Source of helpdesk dataset: BPI Challenge 201,  Dutch Financial Institute
# Reference: https://www.win.tue.nl/bpi/doku.php?id=2012:challenge

datasets['BPI_Challenge_2012_Complete'] = xes_importer.apply('BPI_Challenge_2012_Complete.xes.')


# Dataset 3: Log of a helpdesk process
# Source of helpdesk dataset: Public Repository of Process Mining Datasest, https://github.com/ERamaM/ProcessMiningDatasets/tree/master/XES
# Import
datasets['Helpdesk'] = xes_importer.apply('Helpdesk.xes')


Preview

In [None]:
# Preview datasets in a pandas datafame to get an overview
for key, ds in datasets.items():
  print(f"Loaded Dataset: {key}")
  df = xes_converter.apply(ds, variant=xes_converter.Variants.TO_DATA_FRAME)
  display(df)
  print("\r\n\r\n\r\n")

In [None]:

def split(log: EventLog, test_ds_idxs = list()) -> Tuple[EventLog, EventLog]:
    """
    Split the event log in a discovery log (on which algorithm is applied to discover a model, corresponds to train) and a test log, which is then replayed in/aligned with the discovered model.
    test_ds_idxs : list the case ids in the log Please note splitting on cases is applied, not on events.
    This function will slipt the log to train and test dataset based on test dataset ids of case.
    
    """
    idxs_test = []
    idxs_train = []

    print(test_ds_idxs)
    for i in range(len(log)):
      if i in test_ds_idxs:
        idxs_test.append(i)
      else:
        idxs_train.append(i)

    train_log = EventLog(list(), attributes=log.attributes, extensions=log.extensions, classifiers=log.classifiers,
                            omni_present=log.omni_present, properties=log.properties)
    test_log = EventLog(list(), attributes=log.attributes, extensions=log.extensions, classifiers=log.classifiers,
                            omni_present=log.omni_present, properties=log.properties)
    for idx in idxs_train:
        train_log.append(log[idx])
    for idx in idxs_test:
        test_log.append(log[idx])
    return train_log, test_log

In [None]:
# This can be used to adjust the parameters to be used for testing across the various algorithms, conformance checking, evaluation, and k-fold validation
'''
You can also define the K for the k-fold cross validation. For example, 5 fold cross validation.

'''
params = {
    'k-fold' : 5,
    'algo': ['Alpha', 'Inductive', 'Heuristic'], # deifne the process mining algorithms to discover model with 
    'conformance' : ['Token-based replay', 'Alignments'], # conformance checking methods to apply
    'evaluation' : ['Fitness', 'Precision', 'Generalization', 'Simplicity'], # evaluation metrics to calculate
}

In [None]:
# crossvalidation_algotest calculates the quality metrics across the various algorithms and conformance checking methods based on k-fold cross validation methods for a given dataset
from pm4py.objects.conversion.process_tree import converter as pt_converter

def crossvalidation_algotest(log: EventLog):
  global params
  # K for the k fold cross validation
  k_fold = params['k-fold']

  # Define which discovery algorithms to use for model discovery (rougly corresponds to train)
  algorithms = params['algo']

  # Define what conformance checking methods will be used
  conformance_checking = params['conformance']

  # Define what evaluation methods will be used
  evaluations = params['evaluation']

  # Get the total number of event cases from the loaded event
  cnt_logs = len(log)
  print(f'Total number of cases in the loaded event log is {cnt_logs}')

  # Calculate the number of test dataset cases based on k-fold parameter setting
  cnt_test = math.floor(cnt_logs / k_fold)


  outputs = []
  # Put together all algorithms 
  for k in range(len(algorithms)):
    algo = algorithms[k]
    output = []
    label = []

    # Put together both conformance checking methods
    for conformance in conformance_checking:
      for evaluation in evaluations:
        metrics = 0

        # apply k-fold cross validation
        for i in range(k_fold):
          #split the dataset into train(discover)/test(replay) one
          end_idx = int(cnt_logs - cnt_test * i)
          start_idx = int(end_idx - cnt_test)
          #print(f"start idx {start_idx}, end_idx {end_idx}")
          test_ds_idxs = sorted(range(start_idx, end_idx))
          print(f'Number of cases in test event log is {len(test_ds_idxs)}')
          train_ds, test_ds = split(log, test_ds_idxs)
          #test_ds = log

          #df_train = xes_converter.apply(train_ds, variant=xes_converter.Variants.TO_DATA_FRAME)
          #df_test = xes_converter.apply(test_ds, variant=xes_converter.Variants.TO_DATA_FRAME)
          #display(df_train)
          #display(df_test)

          if algo.lower() == "alpha":
            net, initial_marking, final_marking = alpha_miner.apply(train_ds)
          elif algo.lower() == "inductive":
            ptree = inductive_miner.apply_tree(train_ds, parameters={Parameters.NOISE_THRESHOLD: 0.2}, variant=inductive_miner.Variants.IM_CLEAN)
            net, initial_marking, final_marking = pt_converter.apply(ptree, variant=pt_converter.Variants.TO_PETRI_NET)#converter.apply(ptree)
            #net, initial_marking, final_marking = inductive_miner.apply(train_ds)
          else: # elif algo.lower() == "heuristic":
            #net, initial_marking, final_marking = heuristics_miner.apply(train_ds, parameters={heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.99})
            net, initial_marking, final_marking = heuristics_miner.apply(train_ds, parameters={heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.5})

          is_sound = pm4py.objects.petri_net.utils.check_soundness.check_easy_soundness_net_in_fin_marking(net, initial_marking, final_marking)
          print(f'Is easy soundness : {is_sound}')
          #print(conformance)
          try:
            if conformance.lower() == "token-based replay":
              if evaluation.lower() == "fitness":
                fitness = replay_fitness_evaluator.apply(test_ds, net, initial_marking, final_marking, variant=replay_fitness_evaluator.Variants.TOKEN_BASED)
                print(fitness)
                metrics = metrics + fitness['average_trace_fitness']
              elif evaluation.lower() == "precision":
                metrics = metrics + precision_evaluator.apply(test_ds, net, initial_marking, final_marking, variant=precision_evaluator.Variants.ETCONFORMANCE_TOKEN)
              elif evaluation.lower() == "generalization":
                metrics = metrics + generalization_evaluator.apply(test_ds, net, initial_marking, final_marking)
              else: #Simplicity
                metrics = metrics + simplicity_evaluator.apply(net)
            elif conformance.lower() == "alignments":
              if evaluation.lower() == "fitness":
                fitness = replay_fitness_evaluator.apply(test_ds, net, initial_marking, final_marking, variant=replay_fitness_evaluator.Variants.ALIGNMENT_BASED)
                print(fitness)
                metrics = metrics + fitness['average_trace_fitness']
              elif evaluation.lower() == "precision":
                try:
                  metrics = metrics + precision_evaluator.apply(test_ds, net, initial_marking, final_marking, variant=precision_evaluator.Variants.ALIGN_ETCONFORMANCE)
                except:
                  metrics = math.nan
              elif evaluation.lower() == "generalization":
                metrics = metrics + generalization_evaluator.apply(test_ds, net, initial_marking, final_marking)
              else: #Simplicity
                metrics = metrics + simplicity_evaluator.apply(net)
          except:
            print(f'Excpetion occured {algo}/{conformance}')
            metrics = math.nan
        
        metrics = metrics / k_fold
        label.append(conformance + "/" + evaluation)
        output.append(metrics)
    
    if k == 0:
      outputs.append(label)
    outputs.append(output)

  print(outputs)
  return outputs



In [None]:
# The relevant quality metrics based on parameters for all event logs selected
result = {} # the final resuls is a disctionary with a dataframe containing metrics vs event log name
for key, ds in datasets.items():
  metrics = crossvalidation_algotest(ds)
  algorithms = parameters['algo']
  df = pd.DataFrame()
  df['Conformance checking'] = metrics[0]
  for i in range(len(algorithms)):
    algo = algorithms[i]
    df[algo] = metrics[i + 1]
  
  result[key] = df

In [None]:
# Display table with values of quality metrics
for key, res in result.items():
  print(f"The result of '{key}' event log")
  display(res)

In [None]:
# Plot bar chars of process mining quality metrics
import matplotlib.pyplot as plt
import numpy as np

def overlapped_bar(df, show=False, width=0.25, alpha=1,
                   title='', xlabel='', ylabel='', **plot_kwargs):
    plt.rcParams['figure.figsize'] = (24, 5)
    xlabel = xlabel or df.index.name
    N = len(df)
    M = len(df.columns)
    indices = np.arange(N)
    #colors = ['steelblue', 'firebrick', 'darksage', 'goldenrod', 'gray'] * int(M / 5. + 1)
    colors = ['green', 'blue', 'red']
    for i, label, color in zip(range(M), df.columns, colors):
      kwargs = plot_kwargs
      kwargs.update({'color': color, 'label': label})
      plt.bar(indices + width * (i - 1), df[label], width=width, alpha=alpha if i else 1, **kwargs)
      plt.xticks(indices,
                  ['{}'.format(idx) for idx in df.index.values])
    plt.legend()
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.grid()
    if show:
        plt.show()
    return plt.gcf()

for key, res in result.items():
  print(f"\nOverlay bar chart with evaluation metrics vs. conformance checking methods for '{key}' event log")
  df = res.set_index('Conformance checking')
  overlapped_bar(df, show=True, title = f'{key}')
