# Test NN acoustic models
Test alignment model performance using a hand-corrected test set.
We can test multiple models in a row, possibly waiting for them to be trained.

Test set is a directory tree with `.wav` and `.TextGrid` files and file `exceptions.txt` in the root. Names of tiers with text and phones are configurable below.

In [None]:
import os
home = os.getenv("HOME")

In [None]:
# config cell - edit as needed

# Models to test (with numbers in the name):
model_name_pattern = home+"/f-w/prak/acmodel/man_both_training_00%02d" # will be used as "model_name_pattern % i"

# Range of model numbers to test:
model_numbers = range(1, 20) # will be substituted in model_name_pattern

# Where is the test set:
test_set_dir = home+'/test-prak/repair_ref'
#/home/hanzl/test-prak/repair_ref/radio2_cleanTG/PETA-GVA-07-08.TextGrid

# Which tier has manually aligned reference phones:
reference_phone_tier = 'fix-phone'

# Which tier has text to align:
reference_text_tier = 'word'

# Where results of tests will go (observe it using tail -f):
logfile = "test-log-x"

In [None]:
import sys
if sys.path[0] != '..':
    sys.path[0:0] = ['..'] # prepend main Prak directory
from acmodel.praat_ifc import *
from acmodel.evaluate import *
from acmodel.nn_acmodel import *
import prongen

In [None]:
wavlist = !find {test_set_dir} -name '*.wav'
len(wavlist)

In [None]:
# just a cursory test of file counts to fail early on bad config
tmp = !find {test_set_dir} -name '*.TextGrid'
assert len(wavlist)==len(tmp)

In [None]:
# add pronunciation exceptions needed for the test set
prongen.hmm_pron.lexicon_replacements |= prongen.hmm_pron.read_lexirules_table(test_set_dir+"/exceptions.txt")

In [None]:
# Run tests, possibly waiting for each model to be trained
for i in model_numbers:
    model_name = model_name_pattern%i    
    while not os.path.exists(model_name+".tsv"): # .tsv used to get statistics for b log corr.
        print("Waiting for "+model_name)
        !sleep 60
    # Make sure the file is also finished:
    !sleep 10
    print(f"model {model_name}")
    !echo model {model_name} >>{logfile}
    model = load_nn_acoustic_model(model_name, mid_size=100, varstates=False)
    total = Accumulator()
    max_misplace = 0.1
    for wav in wavlist:
        print(wav)
        tg_file = wav[:-len(".wav")]+".TextGrid"
        tg = read_interval_tiers_from_textgrid_file(tg_file)
        man = tg[reference_phone_tier] # reference test data
        txt = " ".join(x for (b, e, w) in tg[reference_text_tier] if (x:=w.strip())!="")
        # align using our model:
        phone_tier, word_tier = align_wav_and_text_using_model(wav, txt, model) # b corr?
        total.man_phones += len(man)
        compare_tiers_detailed(man, phone_tier, total, "", max_misplace)

    print("Summary results:")
    report_line = f'{"%0.3f"%(100*total.dif/total.man_phones)}% mismatched,'

    report_line += f', {"%0.3f"%(100*total.misplaced_50/total.man_phones)}% misplaced more than 0.05s'
    report_line += f', {"%0.3f"%(100*total.misplaced_100/total.man_phones)}% misplaced more than 0.1s'
    report_line += f', {"%0.3f"%(100*total.misplaced_200/total.man_phones)}% misplaced more than 0.2s'

    report_line += f', {"%0.4f"%(total.midshift/total.man_phones)} midshift'
    print(report_line)
    !echo {report_line} >>{logfile}