In [1]:
import os, sys
import shutil
import subprocess

result_dir = "taysir_results_track_2"
if not os.path.isdir(result_dir):
    os.mkdir(result_dir)

# config
nn_script_path = "source/active_learning/system_under_learning/python/nn_connector_taysir_track_1.py"
transformer_script_path = "source/active_learning/system_under_learning/python/nn_connector_taysir_track_1_transformer.py"
ini_path = "ini/probabilistic_lsharp.ini"
pred_ini_path = "ini/predict_probabilistic_lsharp.ini"

# dataset and models
path_to_models = "data/active_learning/test_nn_queries/taysir_competition/track2/models"
path_to_test_sets = "data/active_learning/test_nn_queries/taysir_competition/track2/test_sets"
path_to_train_sets = "data/active_learning/test_nn_queries/taysir_competition/track2/datasets"

json_ending = ".ff.final.json"
dot_ending = ".ff.final.dot"
result_ending = ".ff.final.json.result"

def get_start_and_end_symbol(i, path_to_train_sets):
    train_set_path = os.path.join(path_to_train_sets, "2.{}.taysir.valid.words".format(i))
    with open(train_set_path, "rt") as f:
        _ = f.readline()
        line = f.readline()
        line = line.strip().split()
        start_symbol = line[1]
        end_symbol = line[-1]
    return start_symbol, end_symbol    

## Train the models

In [None]:
for i in range(1, 12):
    if i==1:
        continue
        
    tmp_script_path = transformer_script_path if i==7 else nn_script_path
    
    model_name = "2.{}.taysir.model".format(i)
    model_path = os.path.join(path_to_models, model_name)
    
    start_symbol, end_symbol = get_start_and_end_symbol(i, path_to_train_sets)
    
    print("Starting training model nr. {}. Start-symbol={}, end-symbol={}".format(i, start_symbol, end_symbol))
    command = ["./flexfringe", "--ini={}".format(ini_path), "--start_symbol={}".format(start_symbol), \
               "--end_symbol={}".format(end_symbol), "--aptafile={}".format(model_path), tmp_script_path]
    p = subprocess.run(command, stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE, universal_newlines=True)
    for outstr in p.stdout:
        sys.stdout.write(outstr)
    for outstr in p.stderr:
        sys.stderr.write(outstr)
    print("Finished training model nr. {}. Starting prediction.".format(i))
    
    test_set_name = "2.{}.taysir.test.combined".format(i)
    test_set_path = os.path.join(path_to_test_sets, test_set_name)
    command = ["./flexfringe", "--ini={}".format(pred_ini_path), \
               "--aptafile={}{}".format(tmp_script_path, json_ending), test_set_path]
    p = subprocess.run(command, stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE, universal_newlines=True)
    for outstr in p.stderr:
        sys.stderr.write(outstr)
    print("Finished prediction. Move the files to the folder now.")
    
    shutil.move(tmp_script_path + json_ending, os.path.join(result_dir, "taysir_model_{}.json".format(i)))
    shutil.move(tmp_script_path + dot_ending, os.path.join(result_dir, "taysir_model_{}.dot".format(i)))
    shutil.move(tmp_script_path + result_ending, os.path.join(result_dir, "taysir_model_{}.result".format(i)))
    
    #command = ["dot", "-Tpdf", os.path.join(result_dir, "taysir_model_{}.dot".format(i)), ">>", \
    #           os.path.join(result_dir, "taysir_model_{}.pdf".format(i))]
    #p = subprocess.run(command, stdout=subprocess.PIPE,
    #                            stderr=subprocess.PIPE, universal_newlines=True)
    #for outstr in p.stderr:
    #    sys.stderr.write(outstr)
    print("Done with model nr. {}".format(i))

Starting training model nr. 2. Start-symbol=7, end-symbol=8
Run in active learning mode.
Probabilistic L# only works with probabilistic oracle. Automatically switched to that one.        If this is undesired behavior check your input and/or source code.
Loading module: nn_connector_taysir_track_1
Loading the Neural network model in python module.
Loading alphabet in python module.
Setting internal flexfringe alphabet, inferred from the network's training alphabet
Python module source/active_learning/system_under_learning/python/nn_connector_taysir_track_1.py loaded and initialized successfully.
Using heuristic log_alergia
Alphabet: seq: 0 1 2 3 4 5 6 
Model nr 1
[0/500000] samples suggested.
Predictions of the following counterexample: The true probability: 0.01387, predicted probability: 0.0345162
Counterexample of length 1 found: 5 
Model nr 2
[500/500000] samples suggested.
[1000/500000] samples suggested.
[1500/500000] samples suggested.
[2000/500000] samples suggested.
[2500/50000

deleted merger
deleted merger


Finished prediction. Move the files to the folder now.
Done with model nr. 2
Starting training model nr. 3. Start-symbol=15, end-symbol=16
Run in active learning mode.
Probabilistic L# only works with probabilistic oracle. Automatically switched to that one.        If this is undesired behavior check your input and/or source code.
Loading module: nn_connector_taysir_track_1
Loading the Neural network model in python module.
Loading alphabet in python module.
Setting internal flexfringe alphabet, inferred from the network's training alphabet
Python module source/active_learning/system_under_learning/python/nn_connector_taysir_track_1.py loaded and initialized successfully.
Using heuristic log_alergia
Alphabet: seq: 0 1 10 11 12 13 14 2 3 4 5 6 7 8 9 
Model nr 1
[0/500000] samples suggested.
[500/500000] samples suggested.
[1000/500000] samples suggested.
[1500/500000] samples suggested.
[2000/500000] samples suggested.
[2500/500000] samples suggested.
[3000/500000] samples suggested.
[3

deleted merger
deleted merger


Finished prediction. Move the files to the folder now.
Done with model nr. 3
Starting training model nr. 4. Start-symbol=20, end-symbol=21


## Evaluate the models

In [None]:
def get_score(s: str):
    res = 1
    for value_str in s.strip( "'[] " ).split(","):
        res *= float(value_str)
    return res

In [None]:
import json
import pandas as pd

import numpy as np
from sklearn.metrics import balanced_accuracy_score, accuracy_score


summary_fh = open(os.path.join(result_dir, "results_summary.txt"), "wt")
summary_fh.write("Model nr. {}, Mean, Sum, Max\n")
for i in range(1, 12):
    if i == 1: 
        continue
    print("Predicting model {}".format(i))
    label_file = os.path.join(path_to_test_sets, "2.{}.taysir.test.labels".format(i))
    labels = list()
    for j, label in enumerate(open(label_file, "rt")):
        if j==0:
            continue
        label = label.strip().strip("\n").strip()
        labels.append(float(label))
        
    result_file = os.path.join(result_dir, "taysir_model_{}.result".format(i))
    res_df = pd.read_csv(result_file, delimiter=";")
        
    res_df["total_scores"] = res_df[" score sequence"].map(get_score)
    
    diffs = np.abs(np.array(res_df["total_scores"]).reshape(-1) - np.array(labels).reshape(-1))
        
    outstr = "{}, {}, {}, {}\n".format(i, np.mean(diffs), np.sum(diffs), np.max(diffs))
    summary_fh.write(outstr)
    summary_fh.flush()
summary_fh.close()