In [1]:
import os, sys
import shutil
import subprocess

result_dir = "taysir_results_track_1"
if not os.path.isdir(result_dir):
    os.mkdir(result_dir)

# config
nn_script_path = "source/active_learning/system_under_learning/python/nn_connector_taysir_track_1.py"
transformer_script_path = "source/active_learning/system_under_learning/python/nn_connector_taysir_track_1_transformer.py"
ini_path = "ini/l_sharp.ini"
pred_ini_path = "ini/predict-count-driven.ini"

# dataset and models
path_to_models = "data/active_learning/test_nn_queries/taysir_competition/track1/models"
path_to_test_sets = "data/active_learning/test_nn_queries/taysir_competition/track1/test_sets"
path_to_train_sets = "data/active_learning/test_nn_queries/taysir_competition/track1/datasets"

json_ending = ".ff.final.json"
dot_ending = ".ff.final.dot"
result_ending = ".ff.final.json.result"

def get_start_and_end_symbol(i, path_to_train_sets):
    train_set_path = os.path.join(path_to_train_sets, "1.{}.taysir.valid.words".format(i))
    with open(train_set_path, "rt") as f:
        _ = f.readline()
        line = f.readline()
        line = line.strip().split()
        start_symbol = line[1]
        end_symbol = line[-1]
    return start_symbol, end_symbol    

## Train the models

In [2]:
for i in range(2, 8):
    if i==1:
        continue
        
    tmp_script_path = transformer_script_path if i==7 else nn_script_path
    
    model_name = "1.{}.taysir.model".format(i)
    model_path = os.path.join(path_to_models, model_name)
    
    start_symbol, end_symbol = get_start_and_end_symbol(i, path_to_train_sets)
    
    print("Starting training model nr. {}. Start-symbol={}, end-symbol={}".format(i, start_symbol, end_symbol))
    command = ["./flexfringe", "--ini={}".format(ini_path), "--start_symbol={}".format(start_symbol), \
               "--end_symbol={}".format(end_symbol), "--aptafile={}".format(model_path), tmp_script_path]
    p = subprocess.run(command, stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE, universal_newlines=True)
    for outstr in p.stdout:
        sys.stdout.write(outstr)
    for outstr in p.stderr:
        sys.stderr.write(outstr)
    print("Finished training model nr. {}. Starting prediction.".format(i))
    
    test_set_name = "1.{}.taysir.test.combined".format(i)
    test_set_path = os.path.join(path_to_test_sets, test_set_name)
    command = ["./flexfringe", "--ini={}".format(pred_ini_path), \
               "--aptafile={}{}".format(tmp_script_path, json_ending), test_set_path]
    p = subprocess.run(command, stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE, universal_newlines=True)
    for outstr in p.stderr:
        sys.stderr.write(outstr)
    print("Finished prediction. Move the files to the folder now.")
    
    shutil.move(tmp_script_path + json_ending, os.path.join(result_dir, "taysir_model_{}.json".format(i)))
    shutil.move(tmp_script_path + dot_ending, os.path.join(result_dir, "taysir_model_{}.dot".format(i)))
    shutil.move(tmp_script_path + result_ending, os.path.join(result_dir, "taysir_model_{}.result".format(i)))
    
    #command = ["dot", "-Tpdf", os.path.join(result_dir, "taysir_model_{}.dot".format(i)), ">>", \
    #           os.path.join(result_dir, "taysir_model_{}.pdf".format(i))]
    #p = subprocess.run(command, stdout=subprocess.PIPE,
    #                            stderr=subprocess.PIPE, universal_newlines=True)
    #for outstr in p.stderr:
    #    sys.stderr.write(outstr)
    print("Done with model nr. {}".format(i))

Starting training model nr. 2. Start-symbol=16, end-symbol=17
Run in active learning mode.
Loading module: nn_connector_taysir_track_1
Loading the Neural network model in python module.
Loading alphabet in python module.
Setting internal flexfringe alphabet, inferred from the network's training alphabet
Python module source/active_learning/system_under_learning/python/nn_connector_taysir_track_1.py loaded and initialized successfully.
Using heuristic count_driven
Alphabet: seq: 0 1 10 11 12 13 14 15 2 3 4 5 6 7 8 9 
We do an extend
[0/500000] samples suggested.
Counterexample because tree not parsable
Counterexample of length 27 found: 15 7 2 2 11 5 2 2 8 7 11 14 14 13 8 4 1 9 15 9 8 4 2 8 6 12 1 
Counterexample because tree not parsable
Counterexample of length 26 found: 7 15 7 10 10 4 8 2 11 11 4 9 1 4 14 13 0 8 5 10 5 12 7 3 7 3 
[500/500000] samples suggested.
[1000/500000] samples suggested.
[1500/500000] samples suggested.
[2000/500000] samples suggested.
[2500/500000] samples su

deleted merger
deleted merger


Finished prediction. Move the files to the folder now.
Done with model nr. 2
Starting training model nr. 3. Start-symbol=16, end-symbol=17


KeyboardInterrupt: 

## Evaluate the models

In [3]:
def get_types(json_file):
    try:
        json_content = json.load(open(json_file, "rt"))
    except FileNotFoundError as e:
        raise RuntimeError(f"Json file not found: {e.filename}.")
    types = {key: i for i, key in enumerate(json_content["types"])}
    return types

def convert_type(x):
    return types[x.strip()]

In [4]:
import json
import pandas as pd

import numpy as np
from sklearn.metrics import balanced_accuracy_score, accuracy_score


summary_fh = open(os.path.join(result_dir, "results_summary.txt"), "wt")
summary_fh.write("Model nr. {}, Balanced_acc, Acc\n")
for i in range(2, 8):
    if i == 1: 
        continue
    print("Predicting model {}".format(i))
    label_file = os.path.join(path_to_test_sets, "1.{}.taysir.test.labels".format(i))
    labels = list()
    for j, label in enumerate(open(label_file, "rt")):
        if j==0:
            continue
        label = label.strip().strip("\n").strip()
        labels.append(int(label))
        
    result_file = os.path.join(result_dir, "taysir_model_{}.result".format(i))
    res_df = pd.read_csv(result_file, delimiter=";")
    
    types = get_types(os.path.join(result_dir, "taysir_model_{}.json".format(i)))
    types["0"] = 1
    res_df["types_mapped"] = res_df[" predicted trace type"].map(convert_type)
    
    bacc = balanced_accuracy_score(np.array(labels), res_df["types_mapped"])
    acc = accuracy_score(np.array(labels), res_df["types_mapped"])
    
    outstr = "{}, {}, {}\n".format(i, bacc, acc)
    summary_fh.write(outstr)
    summary_fh.flush()
summary_fh.close()

Predicting model 2
Predicting model 3


FileNotFoundError: [Errno 2] No such file or directory: 'taysir_results/taysir_model_3.result'