In [6]:
import os, sys
import shutil
import subprocess

RANGE = [1, 10]

result_dir = "taysir_results_track_2"
if not os.path.isdir(result_dir):
    os.mkdir(result_dir)

# config
nn_script_path = "source/active_learning/system_under_learning/python/connectors/nn_connector_taysir_track_1.py"
transformer_script_path = "source/active_learning/system_under_learning/python/connectors/nn_connector_taysir_track_2_transformer.py"
ini_path = "ini/probabilistic_lsharp.ini"
pred_ini_path = "ini/predict_probabilistic_lsharp.ini"

# dataset and models
path_to_models = "data/active_learning/test_nn_queries/taysir_competition/track2/models"
path_to_test_sets = "data/active_learning/test_nn_queries/taysir_competition/track2/test_sets"
path_to_train_sets = "data/active_learning/test_nn_queries/taysir_competition/track2/datasets"

json_ending = ".ff.final.json"
dot_ending = ".ff.final.dot"
result_ending = ".ff.final.json.result"

def get_start_and_end_symbol(i, path_to_train_sets):
    train_set_path = os.path.join(path_to_train_sets, "2.{}.taysir.valid.words".format(i))
    with open(train_set_path, "rt") as f:
        _ = f.readline()
        line = f.readline()
        line = line.strip().split()
        start_symbol = line[1]
        end_symbol = line[-1]
    return start_symbol, end_symbol    

## Train the models

In [5]:
for i in range(RANGE[0], RANGE[-1]+1):
        
    tmp_script_path = transformer_script_path if i==10 else nn_script_path
    
    model_name = "2.{}.taysir.model".format(i)
    model_path = os.path.join(path_to_models, model_name)
    
    start_symbol, end_symbol = get_start_and_end_symbol(i, path_to_train_sets)
    
    print("Starting training model nr. {}. Start-symbol={}, end-symbol={}".format(i, start_symbol, end_symbol))
    command = ["./flexfringe", "--ini={}".format(ini_path), "--start_symbol={}".format(start_symbol), \
               "--end_symbol={}".format(end_symbol), "--aptafile={}".format(model_path), tmp_script_path]
    p = subprocess.run(command, stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE, universal_newlines=True)
    for outstr in p.stdout:
        sys.stdout.write(outstr)
    for outstr in p.stderr:
        sys.stderr.write(outstr)
    print("Finished training model nr. {}. Starting prediction.".format(i))
    
    test_set_name = "2.{}.taysir.test.combined".format(i)
    test_set_path = os.path.join(path_to_test_sets, test_set_name)
    command = ["./flexfringe", "--ini={}".format(pred_ini_path), \
               "--aptafile={}{}".format(tmp_script_path, json_ending), test_set_path]
    p = subprocess.run(command, stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE, universal_newlines=True)
    for outstr in p.stderr:
        sys.stderr.write(outstr)
    print("Finished prediction. Move the files to the folder now.")
    
    shutil.move(tmp_script_path + json_ending, os.path.join(result_dir, "taysir_model_{}.json".format(i)))
    shutil.move(tmp_script_path + dot_ending, os.path.join(result_dir, "taysir_model_{}.dot".format(i)))
    shutil.move(tmp_script_path + result_ending, os.path.join(result_dir, "taysir_model_{}.result".format(i)))
    
    #command = ["dot", "-Tpdf", os.path.join(result_dir, "taysir_model_{}.dot".format(i)), ">>", \
    #           os.path.join(result_dir, "taysir_model_{}.pdf".format(i))]
    #p = subprocess.run(command, stdout=subprocess.PIPE,
    #                            stderr=subprocess.PIPE, universal_newlines=True)
    for outstr in p.stderr:
        sys.stderr.write(outstr)
    print("Done with model nr. {}".format(i))

Starting training model nr. 2. Start-symbol=7, end-symbol=8
Run in active learning mode.
Probabilistic L# only works with probabilistic oracle. Automatically switched to that one.If this is undesired behavior check your input and/or source code.
Loading module: nn_connector_taysir_track_1
Loading the Neural network model in python module.
Loading alphabet in python module.
Setting internal flexfringe alphabet, inferred from the network's training alphabet
Python module source/active_learning/system_under_learning/python/connectors/nn_connector_taysir_track_1.py loaded and initialized successfully.
testmerge option set to true, because algorithm relies on it
Using heuristic string_probability_estimator_v2
Alphabet: seq: 0 1 2 3 4 5 6 
Processed layer 1
Reached fringe. Extend and recompute merges
Processed layer 1
Processed layer 2
Reached fringe. Extend and recompute merges
Processed layer 1
Processed layer 2
Processed layer 3
Reached fringe. Extend and recompute merges
Processed layer 

deleted merger
deleted merger
deleted merger


Run in active learning mode.
Probabilistic L# only works with probabilistic oracle. Automatically switched to that one.If this is undesired behavior check your input and/or source code.
Loading module: nn_connector_taysir_track_1
Loading the Neural network model in python module.
Loading alphabet in python module.
Setting internal flexfringe alphabet, inferred from the network's training alphabet
Python module source/active_learning/system_under_learning/python/connectors/nn_connector_taysir_track_1.py loaded and initialized successfully.
testmerge option set to true, because algorithm relies on it
Using heuristic string_probability_estimator_v2
Alphabet: seq: 0 1 10 11 12 13 14 2 3 4 5 6 7 8 9 
Processed layer 1
Reached fringe. Extend and recompute merges
Processed layer 1
Processed layer 2
Reached fringe. Extend and recompute merges
Processed layer 1
Processed layer 2
Processed layer 3
Reached fringe. Extend and recompute merges
Processed layer 1
Processed layer 2
Processed layer 3
C

deleted merger
deleted merger
deleted merger


Run in active learning mode.
Probabilistic L# only works with probabilistic oracle. Automatically switched to that one.If this is undesired behavior check your input and/or source code.
Loading module: nn_connector_taysir_track_1
Loading the Neural network model in python module.
Loading alphabet in python module.
Setting internal flexfringe alphabet, inferred from the network's training alphabet
Python module source/active_learning/system_under_learning/python/connectors/nn_connector_taysir_track_1.py loaded and initialized successfully.
testmerge option set to true, because algorithm relies on it
Using heuristic string_probability_estimator_v2
Alphabet: seq: 0 1 10 11 12 13 14 15 16 17 18 19 2 3 4 5 6 7 8 9 
Processed layer 1
Reached fringe. Extend and recompute merges
Processed layer 1
Complete basis found. Forwarding hypothesis
Searching for counterexamples
[0/500000] samples suggested.
[500/500000] samples suggested.
[1000/500000] samples suggested.
[1500/500000] samples suggested

deleted merger
deleted merger
deleted merger


Finished prediction. Move the files to the folder now.
Done with model nr. 4
Starting training model nr. 5. Start-symbol=33, end-symbol=34
Run in active learning mode.
Probabilistic L# only works with probabilistic oracle. Automatically switched to that one.If this is undesired behavior check your input and/or source code.
Loading module: nn_connector_taysir_track_1
Loading the Neural network model in python module.
Loading alphabet in python module.
Setting internal flexfringe alphabet, inferred from the network's training alphabet
Python module source/active_learning/system_under_learning/python/connectors/nn_connector_taysir_track_1.py loaded and initialized successfully.
testmerge option set to true, because algorithm relies on it
Using heuristic string_probability_estimator_v2
Alphabet: seq: 0 1 10 11 12 13 14 15 16 17 18 19 2 20 21 22 23 24 25 26 27 28 29 3 30 31 32 4 5 6 7 8 9 
Processed layer 1
Reached fringe. Extend and recompute merges
Processed layer 1
Complete basis found. 

deleted merger
deleted merger
deleted merger


Run in active learning mode.
Probabilistic L# only works with probabilistic oracle. Automatically switched to that one.If this is undesired behavior check your input and/or source code.
Loading module: nn_connector_taysir_track_1
Loading the Neural network model in python module.
Loading alphabet in python module.
Setting internal flexfringe alphabet, inferred from the network's training alphabet
Python module source/active_learning/system_under_learning/python/connectors/nn_connector_taysir_track_1.py loaded and initialized successfully.
testmerge option set to true, because algorithm relies on it
Using heuristic string_probability_estimator_v2
Alphabet: seq: 0 1 10 11 12 13 14 15 16 17 18 19 2 3 4 5 6 7 8 9 
Processed layer 1
Reached fringe. Extend and recompute merges
Processed layer 1
Processed layer 2
Reached fringe. Extend and recompute merges
Processed layer 1
Processed layer 2
Complete basis found. Forwarding hypothesis
Searching for counterexamples
[0/500000] samples suggested

deleted merger
deleted merger
deleted merger


Run in active learning mode.
Probabilistic L# only works with probabilistic oracle. Automatically switched to that one.If this is undesired behavior check your input and/or source code.
Loading module: nn_connector_taysir_track_1
Loading the Neural network model in python module.
Loading alphabet in python module.
Setting internal flexfringe alphabet, inferred from the network's training alphabet
Python module source/active_learning/system_under_learning/python/connectors/nn_connector_taysir_track_1.py loaded and initialized successfully.
testmerge option set to true, because algorithm relies on it
Using heuristic string_probability_estimator_v2
Alphabet: seq: 0 1 10 11 12 13 14 15 16 17 18 19 2 3 4 5 6 7 8 9 
Processed layer 1
Reached fringe. Extend and recompute merges
Processed layer 1
Processed layer 2
Reached fringe. Extend and recompute merges
Processed layer 1
Processed layer 2
Complete basis found. Forwarding hypothesis
Searching for counterexamples
[0/500000] samples suggested

deleted merger
deleted merger
deleted merger


Finished prediction. Move the files to the folder now.
Done with model nr. 7
Starting training model nr. 8. Start-symbol=24, end-symbol=25
Run in active learning mode.
Probabilistic L# only works with probabilistic oracle. Automatically switched to that one.If this is undesired behavior check your input and/or source code.
Loading module: nn_connector_taysir_track_1
Loading the Neural network model in python module.
Loading alphabet in python module.
Setting internal flexfringe alphabet, inferred from the network's training alphabet
Python module source/active_learning/system_under_learning/python/connectors/nn_connector_taysir_track_1.py loaded and initialized successfully.
testmerge option set to true, because algorithm relies on it
Using heuristic string_probability_estimator_v2
Alphabet: seq: 0 1 10 11 12 13 14 15 16 17 18 19 2 20 21 22 23 3 4 5 6 7 8 9 
Processed layer 1
Reached fringe. Extend and recompute merges
Processed layer 1
Complete basis found. Forwarding hypothesis
Searc

deleted merger
deleted merger
deleted merger


Run in active learning mode.
Probabilistic L# only works with probabilistic oracle. Automatically switched to that one.If this is undesired behavior check your input and/or source code.
Loading module: nn_connector_taysir_track_1
Loading the Neural network model in python module.
Loading alphabet in python module.
Setting internal flexfringe alphabet, inferred from the network's training alphabet
Python module source/active_learning/system_under_learning/python/connectors/nn_connector_taysir_track_1.py loaded and initialized successfully.
testmerge option set to true, because algorithm relies on it
Using heuristic string_probability_estimator_v2
Alphabet: seq: 0 1 10 11 12 13 14 15 16 17 18 19 2 3 4 5 6 7 8 9 
Processed layer 1
Reached fringe. Extend and recompute merges
Processed layer 1
Processed layer 2
Reached fringe. Extend and recompute merges
Processed layer 1
Processed layer 2
Complete basis found. Forwarding hypothesis
Searching for counterexamples
[0/500000] samples suggested

deleted merger
deleted merger
deleted merger


Finished prediction. Move the files to the folder now.
Done with model nr. 9
Starting training model nr. 10. Start-symbol=33, end-symbol=34
Run in active learning mode.
Probabilistic L# only works with probabilistic oracle. Automatically switched to that one.If this is undesired behavior check your input and/or source code.
Loading module: nn_connector_taysir_track_2_transformer
Loading the Neural network model in python module.
Loading alphabet in python module.
Setting internal flexfringe alphabet, inferred from the network's training alphabet
Python module source/active_learning/system_under_learning/python/connectors/nn_connector_taysir_track_2_transformer.py loaded and initialized successfully.
testmerge option set to true, because algorithm relies on it
Using heuristic string_probability_estimator_v2
Alphabet: seq: 0 1 10 11 12 13 14 15 16 17 18 19 2 20 21 22 23 24 25 26 27 28 29 3 30 31 32 4 5 6 7 8 9 
Complete basis found. Forwarding hypothesis
Searching for counterexamples
[0/

deleted merger
deleted merger
deleted merger


## Evaluate the models

In [7]:
def get_score(s: str):
    res = 1
    for value_str in s.strip( "'[] " ).split(","):
        res *= float(value_str)
    return res

In [8]:
import json
import pandas as pd

import numpy as np
from sklearn.metrics import balanced_accuracy_score, accuracy_score


summary_fh = open(os.path.join(result_dir, "results_summary.txt"), "wt")
summary_fh.write("Model nr. {}, Mean, Sum, Max, MSE\n")
for i in range(RANGE[0], RANGE[-1]+1):
    print("Predicting model {}".format(i))
    label_file = os.path.join(path_to_test_sets, "2.{}.taysir.test.labels".format(i))
    labels = list()
    for j, label in enumerate(open(label_file, "rt")):
        if j==0:
            continue
        label = label.strip().strip("\n").strip()
        labels.append(float(label))
        
    result_file = os.path.join(result_dir, "taysir_model_{}.result".format(i))
    res_df = pd.read_csv(result_file, delimiter=";")
        
    res_df["total_scores"] = res_df[" score sequence"].map(get_score)
    
    diffs = np.abs(np.array(res_df["total_scores"]).reshape(-1) - np.array(labels).reshape(-1))
    mse = np.sqrt(np.sum(diffs * diffs)) / diffs.shape[0]
    print(mse)
    outstr = "{}, {}, {}, {}, {}\n".format(i, np.mean(diffs), np.sum(diffs), np.max(diffs), mse)
    summary_fh.write(outstr)
    summary_fh.flush()
summary_fh.close()

Predicting model 1
1.9479176926288914e-07
Predicting model 2
6.48329819647788e-07
Predicting model 3
2.193717964538732e-07
Predicting model 4
6.106799396224005e-09
Predicting model 5
1.369817266577982e-08
Predicting model 6
1.860460500589625e-07
Predicting model 7
3.639958254769226e-11
Predicting model 8
5.553558609578781e-07
Predicting model 9
6.737365553681175e-11
Predicting model 10
1.59407479921577e-06
