In [13]:
import os, sys
import shutil
import subprocess

result_dir = "taysir_results"
if not os.path.isdir(result_dir):
    os.mkdir(result_dir)

# config
nn_script_path = "source/active_learning/system_under_learning/python/nn_connector_taysir_track_1.py"
ini_path = "ini/l_sharp.ini"
pred_ini_path = "ini/predict-count-driven.ini"

# dataset and models
path_to_models = "data/active_learning/test_nn_queries/taysir_competition/track1/models"
path_to_test_sets = "data/active_learning/test_nn_queries/taysir_competition/track1/test_sets"
path_to_train_sets = "data/active_learning/test_nn_queries/taysir_competition/track1/datasets"

json_ending = ".ff.final.json"
dot_ending = ".ff.final.dot"
result_ending = ".ff.final.json.result"

def get_start_and_end_symbol(i, path_to_train_sets):
    train_set_path = os.path.join(path_to_train_sets, "1.{}.taysir.valid.words".format(i))
    with open(train_set_path, "rt") as f:
        _ = f.readline()
        line = f.readline()
        line = line.strip().split()
        start_symbol = line[1]
        end_symbol = line[-1]
    return start_symbol, end_symbol    

## Train the models

In [None]:
for i in range(1, 12):
    model_name = "1.{}.taysir.model".format(i)
    model_path = os.path.join(path_to_models, model_name)
    
    start_symbol, end_symbol = get_start_and_end_symbol(i, path_to_train_sets)
    
    print("Starting training model nr. {}".format(i))
    command = ["./flexfringe", "--ini={}".format(ini_path), "--start_symbol={}".format(start_symbol), \
               "--end_symbol={}".format(end_symbol), "--aptafile={}".format(model_path), nn_script_path]
    p = subprocess.run(command, stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE, universal_newlines=True)
    #for outstr in p.stdout:
    #    sys.stdout.write(outstr)
    for outstr in p.stderr:
        sys.stderr.write(outstr)
    print("Finished training model nr. {}. Starting prediction.".format(i))
    
    test_set_name = "1.{}.taysir.test.combined".format(i)
    test_set_path = os.path.join(path_to_test_sets, test_set_name)
    
    command = ["./flexfringe", "--ini={}".format(pred_ini_path), \
               "--aptafile={}{}".format(nn_script_path, json_ending), test_set_path]
    p = subprocess.run(command, stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE, universal_newlines=True)
    for outstr in p.stderr:
        sys.stderr.write(outstr)
    print("Finished prediction. Move the files to the folder now.")
    
    shutil.move(nn_script_path + json_ending, os.path.join(result_dir, "taysir_model_{}.json".format(i)))
    shutil.move(nn_script_path + dot_ending, os.path.join(result_dir, "taysir_model_{}.dot".format(i)))
    shutil.move(nn_script_path + result_ending, os.path.join(result_dir, "taysir_model_{}.result".format(i)))
    
    command = ["dot", "-Tpdf", os.path.join(result_dir, "taysir_model_{}.dot".format(i)), ">>", \
               os.path.join(result_dir, "taysir_model_{}.pdf".format(i))]
    p = subprocess.run(command, stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE, universal_newlines=True)
    for outstr in p.stderr:
        sys.stderr.write(outstr)
    print("Done with model nr. {}".format(i))

Starting training model nr. 1


## Evaluate the models

In [None]:
def get_types(json_file):
    try:
        json_content = json.load(open(json_file, "rt"))
    except FileNotFoundError as e:
        raise RuntimeError(f"Json file not found: {e.filename}.")
    types = {key: i for i, key in enumerate(json_content["types"])}
    return types

def convert_type(x):
    return types[x.strip()]

In [None]:
import json
import pandas as pd

import numpy as np
from sklearn.metrics import balanced_accuracy_score, accuracy_score


result_file = open(os.path.join(result_dir, "results_summary.txt"), "wt")
result_file.write("Model nr. {}, Balanced_acc, Acc\n")
for i in range(1, 12):
    label_file = os.path.join(path_to_test_sets, "1.{}.taysir.test.labels".format(i))
    labels = list()
    for i, label in enumerate(open(label_file, "rt")):
        if i==0:
            continue
        label = label.strip().strip("\n").strip()
        labels.append(int(label))
        
    result_file = os.path.join(result_dir, "taysir_model_{}.result".format(i))
    res_df = pd.read_csv(result_file)
    
    types = get_types(os.path.join(result_dir, "taysir_model_{}.json".format(i)))
    res_df["types_mapped"] = res_df["predicted trace type"].map(convert_type)
    
    bacc = balanced_accuracy_score(np.array(labels), pred["types_mapped"])
    acc = accuracy_score(np.array(labels), pred["types_mapped"])
    
    outstr = "{}, {}, {}".format(i, bacc, acc)
result_file.close()