In [2]:
import pandas as pd

In [3]:
def calc_precision(df, col='combined'):
    tp = df[(df[col] == True) & (df['y_true'] == True)].shape[0]
    fp = df[(df[col] == True) & (df['y_true'] == False)].shape[0]
    return tp / (tp + fp)

def calc_recall(df, col='combined'):
    tp = df[(df[col] == True) & (df['y_true'] == True)].shape[0]
    fn = df[(df[col] == False) & (df['y_true'] == True)].shape[0]
    return tp / (tp + fn)

def calc_f1(r, p):
    return 2 * ((p * r) / (p + r))

In [4]:
def calc_preformance(nn_model, process_model, rule=1):
    merged_df = nn_model.copy()
    merged_df.columns = ['nn_y_pred', 'y_true']
    merged_df['process_y_pred'] = process_model['y_pred']
    print('=' * 20)
    if rule == 0:
        print('Only using process model')
        merged_df['combined'] = merged_df['process_y_pred']
    elif rule == 1:
        print('Using OR rule')
        merged_df['combined'] = merged_df['nn_y_pred'] | merged_df['process_y_pred']
    elif rule == 2:
        print('Using AND rule')
        merged_df['combined'] = merged_df['nn_y_pred'] & merged_df['process_y_pred']
    elif rule == 3:
        print('Using XOR rule')
        merged_df['combined'] = merged_df['nn_y_pred'] ^ merged_df['process_y_pred']
    else:
        raise ValueError('rule must be 1, 2, or 3')

    p = calc_precision(merged_df)
    r = calc_recall(merged_df)
    f1 = calc_f1(r, p)

    print(f'Precision: {p}')
    print(f'Recall: {r}')
    print(f'F1: {f1}')
    print('=' * 20)

    

# Regular DAPNN

In [5]:
dapnn = pd.read_csv('../save_model/dapnn_y_pred_y_true.csv')
p = calc_precision(dapnn, col='y_pred')
r = calc_recall(dapnn, col='y_pred')
f1 = calc_f1(r, p)


print('='*20 + '\nOnly using DAPNN')
print(f'Precision: {p}')
print(f'Recall: {r}')
print(f'F1: {f1}')
print('='*20)

Only using DAPNN
Precision: 0.794545213320445
Recall: 0.9156455452897364
F1: 0.8508077610001623


## IM (Inductive Miner)

In [6]:
IM = pd.read_csv('../save_model/IM_y_pred_y_true.csv')
for rule in range(0, 4):
    calc_preformance(dapnn, IM, rule=rule)

Only using process model
Precision: 0.48542458808618505
Recall: 0.03346220212742721
F1: 0.06260855350538425
Using OR rule
Precision: 0.7770107932283581
Recall: 0.9182993687613307
F1: 0.8417675266039983
Using AND rule
Precision: 0.7797125483692647
Recall: 0.03080837865583295
F1: 0.05927466801143049
Using XOR rule
Precision: 0.7769173406757299
Recall: 0.8874909901054977
F1: 0.8285312289716769


# Alpha Miner

In [7]:
alpha = pd.read_csv('../save_model/alpha_y_pred_y_true.csv')
for rule in range(0, 4):
    calc_preformance(dapnn, alpha, rule=rule)

Only using process model
Precision: 0.4775328462575648
Recall: 0.6652687678832755
F1: 0.5559804682152146
Using OR rule
Precision: 0.5450736064398742
Recall: 0.9716925496363279
F1: 0.6983857864434317
Using AND rule
Precision: 0.79859132750204
Recall: 0.6092217635366839
F1: 0.6911701844268642
Using XOR rule
Precision: 0.3554294281430713
Recall: 0.36247078609964395
F1: 0.35891557535713126


# Heuristic Miner

In [8]:
heuristic = pd.read_csv('../save_model/heuristic_y_pred_y_true.csv')
for rule in range(0, 4):
    calc_preformance(dapnn, heuristic, rule=rule)

Only using process model
Precision: 0.47690625
Recall: 1.0
F1: 0.6458179048263897
Using OR rule
Precision: 0.47690625
Recall: 1.0
F1: 0.6458179048263897
Using AND rule
Precision: 0.794545213320445
Recall: 0.9156455452897364
F1: 0.8508077610001623
Using XOR rule
Precision: 0.08931751428108883
Recall: 0.08435445471026363
F1: 0.08676506930871021


# DAPNN Switched

In [9]:
dapnn_switched = pd.read_csv('../save_model/dapnn_y_pred_y_true_switched.csv')

p = calc_precision(dapnn_switched, col='y_pred')
r = calc_recall(dapnn_switched, col='y_pred')
f1 = calc_f1(r, p)


print('='*20 + '\nOnly using DAPNN')
print(f'Precision: {p}')
print(f'Recall: {r}')
print(f'F1: {f1}')
print('='*20)

Only using DAPNN
Precision: 0.783751069557777
Recall: 0.8202826376602669
F1: 0.8016008537886872


## IM

In [10]:
for rule in range(0, 4):
    calc_preformance(dapnn_switched, IM, rule=rule)

Only using process model
Precision: 0.48542458808618505
Recall: 0.03346220212742721
F1: 0.06260855350538425
Using OR rule
Precision: 0.7651578031054518
Recall: 0.8255575213507197
F1: 0.7942109686909015
Using AND rule
Precision: 0.7699880668257757
Recall: 0.02818731843697442
F1: 0.0543837838976801
Using XOR rule
Precision: 0.7649881603486934
Recall: 0.7973702029137453
F1: 0.7808436002737851


## Alpha Miner

In [11]:
for rule in range(0, 4):
    calc_preformance(dapnn_switched, alpha, rule=rule)

Only using process model
Precision: 0.4775328462575648
Recall: 0.6652687678832755
F1: 0.5559804682152146
Using OR rule
Precision: 0.538039941939036
Recall: 0.9391695607539916
F1: 0.6841422763904248
Using AND rule
Precision: 0.78705597331907
Recall: 0.5463818447895507
F1: 0.6449990975427378
Using XOR rule
Precision: 0.37361062057216465
Recall: 0.39278771596444095
F1: 0.38295924017206867


## Heuristic Miner

In [12]:
for rule in range(0, 4):
    calc_preformance(dapnn_switched, heuristic, rule=rule)

Only using process model
Precision: 0.47690625
Recall: 1.0
F1: 0.6458179048263897
Using OR rule
Precision: 0.47690625
Recall: 1.0
F1: 0.6458179048263897
Using AND rule
Precision: 0.783751069557777
Recall: 0.8202826376602669
F1: 0.8016008537886872
Using XOR rule
Precision: 0.1711207703346297
Recall: 0.1797173623397331
F1: 0.1753137451260307


# DAPNN Switched

In [13]:
dapnn_switched = pd.read_csv('../save_model/dapnn_y_pred_y_true_switched.csv')
print('='*20 + '\nOnly using DAPNN Switched')
p = calc_precision(dapnn_switched, col='y_pred')
r = calc_recall(dapnn_switched, col='y_pred')
f1 = calc_f1(r, p)
print(f'Precision: {p}')
print(f'Recall: {r}')
print(f'F1: {f1}')
print('='*20)

Only using DAPNN Switched
Precision: 0.783751069557777
Recall: 0.8202826376602669
F1: 0.8016008537886872


## IM

In [14]:
for rule in range(0, 4):
    calc_preformance(dapnn_switched, IM, rule=rule)

Only using process model
Precision: 0.48542458808618505
Recall: 0.03346220212742721
F1: 0.06260855350538425
Using OR rule
Precision: 0.7651578031054518
Recall: 0.8255575213507197
F1: 0.7942109686909015
Using AND rule
Precision: 0.7699880668257757
Recall: 0.02818731843697442
F1: 0.0543837838976801
Using XOR rule
Precision: 0.7649881603486934
Recall: 0.7973702029137453
F1: 0.7808436002737851


## Alpha Miner

In [15]:
for rule in range(0, 4):
    calc_preformance(dapnn_switched, alpha, rule=rule)

Only using process model
Precision: 0.4775328462575648
Recall: 0.6652687678832755
F1: 0.5559804682152146
Using OR rule
Precision: 0.538039941939036
Recall: 0.9391695607539916
F1: 0.6841422763904248
Using AND rule
Precision: 0.78705597331907
Recall: 0.5463818447895507
F1: 0.6449990975427378
Using XOR rule
Precision: 0.37361062057216465
Recall: 0.39278771596444095
F1: 0.38295924017206867


## Heuristic Miner

In [16]:
for rule in range(0, 4):
    calc_preformance(dapnn_switched, heuristic, rule=rule)

Only using process model
Precision: 0.47690625
Recall: 1.0
F1: 0.6458179048263897
Using OR rule
Precision: 0.47690625
Recall: 1.0
F1: 0.6458179048263897
Using AND rule
Precision: 0.783751069557777
Recall: 0.8202826376602669
F1: 0.8016008537886872
Using XOR rule
Precision: 0.1711207703346297
Recall: 0.1797173623397331
F1: 0.1753137451260307
