In [8]:
import os
import csv
import shutil
from time import time
from tqdm import tqdm
from utils.process_label_to_txt import convert_txt
from demucs_utils.seperate_vocal import separate
from mfa.align import create_parser, run_align_corpus
from mfa.src.postprocessing import post_process_helper

In [7]:
FOLDER = "data/public_test_separated_optimized_mdx_extra_multi_ver3"
PUBLIC_TEST_OUTPUT_RAW = "data/output/public_test_raw"
OUTPUT_DIR = "data/output/public_test_json"
RAW_LYRIC_JSON = "data/public_test/json_labels"
OUTPUT_TIME_SUBMISSION = "./result/time_submission.csv"
OUTPUT_JUPYTER_FILE = "./result/jupyter_submission"
DICTIONARY_PATH = "mfa/models/vietnamese_mfa_dict_ver3.dict"
ACOUSTIC_MODEL_PATH = "mfa/models/mfa_vn_vocal_train_combine_train_public_test.zip"
SUBMISSION_DIR="./result"
test_cases = os.listdir(FOLDER)
len(test_cases)

264

In [9]:
all_predicted_time = []
parser = create_parser()
args, unknown = parser.parse_known_args(["align"])
args.dictionary_path =  DICTIONARY_PATH
args.acoustic_model_path = ACOUSTIC_MODEL_PATH
args.output_directory = PUBLIC_TEST_OUTPUT_RAW

for folder_name in tqdm(test_cases):
    t1 = time()
    separate_optimized_dir = os.path.join(FOLDER, folder_name)
    args.corpus_directory = separate_optimized_dir
    file_name = os.listdir(separate_optimized_dir)[0][:-4]
    run_align_corpus(args, unknown)
    post_process_helper.post_process_json(
        file_name=file_name, 
        raw_output=PUBLIC_TEST_OUTPUT_RAW, 
        raw_lyric=RAW_LYRIC_JSON,
        output_dir=OUTPUT_DIR,
    )
    t2 = time()
    predicted_time = int(t2*1000 - t1*1000)
    all_predicted_time.append((file_name, predicted_time))

if not os.path.exists(SUBMISSION_DIR):
    os.makedirs(SUBMISSION_DIR)
    
# Save time submission
with open(OUTPUT_TIME_SUBMISSION, 'w') as f:
    write = csv.writer(f)
    fields = ["fname", "time (millisecond)"] 
    write.writerow(fields)
    write.writerows(all_predicted_time)

# Save jupyter submission
shutil.make_archive(OUTPUT_JUPYTER_FILE, 'zip', OUTPUT_DIR)

  0%|          | 0/264 [00:00<?, ?it/s]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.23s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:07<00:00,  7.59s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.51s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:07<00:00,  7.48s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:00<00:00, 22.03it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 27.280400037765503 seconds



  0%|          | 1/264 [00:28<2:05:50, 28.71s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...





[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.28s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.71s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.91s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.39s/it]
100%|██████████| 1/1 [00:00<00:00, 27.88it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 16.566444396972656 seconds



  1%|          | 2/264 [00:45<1:36:00, 21.99s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.55s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.23s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.44s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.36s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.53s/it]
100%|██████████| 1/1 [00:00<00:00, 31.80it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 26.148521423339844 seconds



  1%|          | 3/264 [01:12<1:45:25, 24.23s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.12s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.32s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.63s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.10s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:00<00:00, 27.70it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 22.493577241897583 seconds



  2%|▏         | 4/264 [01:36<1:43:19, 23.84s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.31s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.82s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.41s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.80s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.62s/it]
100%|██████████| 1/1 [00:00<00:00, 20.27it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 22.814887046813965 seconds



  2%|▏         | 5/264 [01:59<1:42:31, 23.75s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.06s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]


[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.27s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.49s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.36s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.30s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.75s/it]
100%|██████████| 1/1 [00:00<00:00, 20.04it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.98557162284851 seconds



  2%|▏         | 6/264 [02:21<1:39:56, 23.24s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.17s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.23s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.06s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.57s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.01s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.60s/it]
100%|██████████| 1/1 [00:00<00:00, 16.00it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.329517602920532 seconds



  3%|▎         | 7/264 [02:44<1:38:40, 23.04s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.22s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.76s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.36s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.72s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.85s/it]
100%|██████████| 1/1 [00:00<00:00, 19.53it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 22.315881490707397 seconds



  3%|▎         | 8/264 [03:07<1:38:33, 23.10s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.23s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.12s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.30s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.12s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.50s/it]
100%|██████████| 1/1 [00:00<00:00, 26.83it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 25.995454788208008 seconds



  3%|▎         | 9/264 [03:34<1:43:30, 24.35s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.05s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.29s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.93s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.34s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.39s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.43s/it]
100%|██████████| 1/1 [00:00<00:00,  7.32it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.539979934692383 seconds



  4%|▍         | 10/264 [03:56<1:39:15, 23.45s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.30s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.84s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.36s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.25s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.41s/it]
100%|██████████| 1/1 [00:00<00:00, 16.52it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 23.62143325805664 seconds



  4%|▍         | 11/264 [04:21<1:40:33, 23.85s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.06s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.45s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.57s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.36s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.04s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.02s/it]
100%|██████████| 1/1 [00:00<00:00, 20.02it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.61811113357544 seconds



  5%|▍         | 12/264 [04:42<1:37:08, 23.13s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.13s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...





[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.38s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:09<00:00,  9.23s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:02<00:00,  2.49s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:09<00:00,  9.78s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.37s/it]
100%|██████████| 1/1 [00:00<00:00, 20.82it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 34.25655913352966 seconds



  5%|▍         | 13/264 [05:17<1:52:01, 26.78s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.24s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.09s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.44s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.72s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.34s/it]
100%|██████████| 1/1 [00:00<00:00, 27.54it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.683923482894897 seconds



  5%|▌         | 14/264 [05:38<1:43:35, 24.86s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.06s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.31s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.30s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.32s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.67s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.39s/it]
100%|██████████| 1/1 [00:00<00:00, 22.55it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 24.604560136795044 seconds



  6%|▌         | 15/264 [06:03<1:43:45, 25.00s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.13s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.29s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.69s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.39s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.35s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.56s/it]
100%|██████████| 1/1 [00:00<00:00, 17.10it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 23.295830488204956 seconds



  6%|▌         | 16/264 [06:27<1:42:15, 24.74s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.34s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...





[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.26s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.07s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.38s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.19s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.58s/it]
100%|██████████| 1/1 [00:00<00:00, 24.31it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 28.903162956237793 seconds



  6%|▋         | 17/264 [06:58<1:49:23, 26.57s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.23s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...





[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.27s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.11s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.45s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.13s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.66s/it]
100%|██████████| 1/1 [00:00<00:00, 14.41it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 25.995023488998413 seconds



  7%|▋         | 18/264 [07:26<1:50:20, 26.91s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.23s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...





[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.24s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.99s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.37s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.30s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.59s/it]
100%|██████████| 1/1 [00:00<00:00, 17.35it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 27.3064124584198 seconds



  7%|▋         | 19/264 [07:55<1:52:45, 27.61s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]


[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.26s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.51s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.39s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.21s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.65s/it]
100%|██████████| 1/1 [00:00<00:00, 21.98it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 25.518077611923218 seconds



  8%|▊         | 20/264 [08:22<1:51:18, 27.37s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.23s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.88s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.29s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.85s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.14s/it]
100%|██████████| 1/1 [00:00<00:00, 27.15it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 27.78604221343994 seconds



  8%|▊         | 21/264 [08:50<1:52:20, 27.74s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...





[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.26s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.29s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.35s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:00<00:00, 20.99it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 28.77330756187439 seconds



  8%|▊         | 22/264 [09:20<1:54:08, 28.30s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.30s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.40s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.34s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.45s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.43s/it]
100%|██████████| 1/1 [00:00<00:00, 18.14it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 31.1621515750885 seconds



  9%|▊         | 23/264 [09:52<1:58:03, 29.39s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.31s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.16s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.51s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.04s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.41s/it]
100%|██████████| 1/1 [00:00<00:00, 19.93it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 23.69325280189514 seconds



  9%|▉         | 24/264 [10:17<1:52:13, 28.06s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.05s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.48s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.93s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.53s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.99s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.60s/it]
100%|██████████| 1/1 [00:00<00:00, 23.19it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.51166296005249 seconds



  9%|▉         | 25/264 [10:39<1:44:51, 26.33s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.22s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.40s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.34s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.57s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.58s/it]
100%|██████████| 1/1 [00:00<00:00, 23.97it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.65814518928528 seconds



 10%|▉         | 26/264 [11:01<1:39:39, 25.13s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.06s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.22s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.64s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.84s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.72s/it]
100%|██████████| 1/1 [00:00<00:00, 24.20it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.933936834335327 seconds



 10%|█         | 27/264 [11:24<1:36:17, 24.38s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.12s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.26s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:07<00:00,  7.72s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.37s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.89s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.55s/it]
100%|██████████| 1/1 [00:00<00:00, 23.17it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 25.656902551651 seconds



 11%|█         | 28/264 [11:50<1:38:16, 24.99s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.30s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.99s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.10s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:00<00:00, 23.13it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 23.454649209976196 seconds



 11%|█         | 29/264 [12:15<1:36:53, 24.74s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.14s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.32s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:16<00:00, 16.31s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.53s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.61s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.19s/it]
100%|██████████| 1/1 [00:00<00:00, 12.00it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 36.66004157066345 seconds



 11%|█▏        | 30/264 [12:52<1:51:40, 28.64s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.05s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.80s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:09<00:00,  9.97s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.40s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:11<00:00, 11.56s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.28s/it]
100%|██████████| 1/1 [00:00<00:00, 18.34it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 39.21817231178284 seconds



 12%|█▏        | 31/264 [13:34<2:06:20, 32.54s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.27s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]


[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:02<00:00,  2.01s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:12<00:00, 12.55s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.52s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:09<00:00,  9.29s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.41s/it]
100%|██████████| 1/1 [00:00<00:00, 21.35it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 37.14014554023743 seconds



 12%|█▏        | 32/264 [14:13<2:12:49, 34.35s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.18s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...





[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:02<00:00,  2.63s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:09<00:00,  9.84s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.34s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:08<00:00,  8.13s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:04<00:00,  4.05s/it]
100%|██████████| 1/1 [00:00<00:00,  4.03it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 38.474608421325684 seconds



 12%|█▎        | 33/264 [14:52<2:17:52, 35.81s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.06s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.35s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.82s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.40s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.86s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:03<00:00,  3.10s/it]
100%|██████████| 1/1 [00:00<00:00, 28.58it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 22.817418098449707 seconds



 13%|█▎        | 34/264 [15:17<2:05:18, 32.69s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.26s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.77s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.44s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.16s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.87s/it]
100%|██████████| 1/1 [00:00<00:00, 20.46it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.136160135269165 seconds



 13%|█▎        | 35/264 [15:39<1:52:27, 29.46s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.21s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.70s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.29s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:02<00:00,  2.75s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.36s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.10s/it]
100%|██████████| 1/1 [00:00<00:00, 26.32it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.521278142929077 seconds



 14%|█▎        | 36/264 [16:02<1:44:05, 27.39s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...





[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:02<00:00,  2.87s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:08<00:00,  8.05s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.38s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.76s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.32s/it]
100%|██████████| 1/1 [00:00<00:00, 24.66it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 28.056148052215576 seconds



 14%|█▍        | 37/264 [16:31<1:45:17, 27.83s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.05s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.32s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.13s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.97s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.67s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.51s/it]
100%|██████████| 1/1 [00:00<00:00, 30.50it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.47518825531006 seconds



 14%|█▍        | 38/264 [16:51<1:36:16, 25.56s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.21s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.87s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.64s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.13s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.40s/it]
100%|██████████| 1/1 [00:00<00:00, 37.42it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 18.752100706100464 seconds



 15%|█▍        | 39/264 [17:10<1:28:57, 23.72s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:02<00:00,  2.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.56s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.61s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.96s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.61s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.43s/it]
100%|██████████| 1/1 [00:00<00:00, 13.11it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.57467031478882 seconds



 15%|█▌        | 40/264 [17:32<1:25:48, 22.98s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.25s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...





[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.84s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:07<00:00,  7.47s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.48s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:08<00:00,  8.18s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.71s/it]
100%|██████████| 1/1 [00:00<00:00, 25.45it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 30.759042739868164 seconds



 16%|█▌        | 41/264 [18:03<1:35:07, 25.60s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.21s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]


[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.25s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:14<00:00, 14.89s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:03<00:00,  3.04s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:27<00:00, 27.44s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:05<00:00,  5.98s/it]
100%|██████████| 1/1 [00:00<00:00, 30.31it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 65.69935011863708 seconds



 16%|█▌        | 42/264 [19:10<2:20:19, 37.92s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.87s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...





[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...


  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.22s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.30s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.30s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.26s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.31s/it]
100%|██████████| 1/1 [00:00<00:00, 27.40it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 22.35025405883789 seconds



 16%|█▋        | 43/264 [19:34<2:03:51, 33.63s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.15s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.26s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.63s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.42s/it]
100%|██████████| 1/1 [00:00<00:00, 35.48it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 18.61207866668701 seconds



 17%|█▋        | 44/264 [19:53<1:47:29, 29.31s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.06s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.36s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.68s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.29s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.55s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:00<00:00, 31.89it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.957775354385376 seconds



 17%|█▋        | 45/264 [20:14<1:38:34, 27.01s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.46s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:10<00:00, 10.07s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.31s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.46s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.31s/it]
100%|██████████| 1/1 [00:00<00:00, 27.98it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 27.155646085739136 seconds



 17%|█▋        | 46/264 [20:42<1:39:01, 27.26s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.38s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.46s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.36s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.93s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:03<00:00,  3.62s/it]
100%|██████████| 1/1 [00:00<00:00, 16.65it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 18.63434386253357 seconds



 18%|█▊        | 47/264 [21:02<1:29:57, 24.87s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.15s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...





[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...


  0%|          | 0/1 [00:02<?, ?it/s]

[32mINFO[0m - Calculating CMVN...





[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.74s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:07<00:00,  7.04s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:03<00:00,  3.23s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.52s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:04<00:00,  4.53s/it]
100%|██████████| 1/1 [00:00<00:00,  8.72it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 33.69920992851257 seconds



 18%|█▊        | 48/264 [21:36<1:40:15, 27.85s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.15s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.45s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:08<00:00,  8.65s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.88s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.09s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:03<00:00,  3.49s/it]
100%|██████████| 1/1 [00:00<00:00, 25.76it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 32.52716946601868 seconds



 19%|█▊        | 49/264 [22:10<1:45:47, 29.52s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.24s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.74s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.29s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.59s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.39s/it]
100%|██████████| 1/1 [00:00<00:00, 31.54it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 15.1337411403656 seconds



 19%|█▉        | 50/264 [22:27<1:32:38, 25.97s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.23s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.42s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.49s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.40s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]
100%|██████████| 1/1 [00:00<00:00, 35.41it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 17.10650086402893 seconds



 19%|█▉        | 51/264 [22:45<1:23:28, 23.51s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.21s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.30s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.30s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.43s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.54s/it]
100%|██████████| 1/1 [00:00<00:00, 29.87it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 16.34759283065796 seconds



 20%|█▉        | 52/264 [23:04<1:18:07, 22.11s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.06s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.39s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.32s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.34s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.30s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.24s/it]
100%|██████████| 1/1 [00:00<00:00, 24.35it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 22.1893572807312 seconds



 20%|██        | 53/264 [23:27<1:18:32, 22.33s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.21s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.36s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.32s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.99s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:00<00:00, 20.87it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.04179549217224 seconds



 20%|██        | 54/264 [23:48<1:16:34, 21.88s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.21s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.80s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.44s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.49s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.32s/it]
100%|██████████| 1/1 [00:00<00:00, 26.66it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.074642419815063 seconds



 21%|██        | 55/264 [24:08<1:14:00, 21.25s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.35s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.34s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.42s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.24s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.28s/it]
100%|██████████| 1/1 [00:00<00:00, 21.16it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 17.18504810333252 seconds



 21%|██        | 56/264 [24:26<1:10:19, 20.29s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.40s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.91s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.53s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.04s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  3.00s/it]
100%|██████████| 1/1 [00:00<00:00, 29.25it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 22.728205919265747 seconds



 22%|██▏       | 57/264 [24:49<1:13:14, 21.23s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.64s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:09<00:00,  9.89s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.77s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:03<00:00,  3.22s/it]
100%|██████████| 1/1 [00:00<00:00, 23.22it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 34.96605658531189 seconds



 22%|██▏       | 58/264 [25:25<1:27:49, 25.58s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.69s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.25s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.51s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.79s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.02s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.53s/it]
100%|██████████| 1/1 [00:00<00:00, 28.04it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.21248197555542 seconds



 22%|██▏       | 59/264 [25:45<1:21:33, 23.87s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...





[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.65s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.26s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:04<00:00,  4.17s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.32s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.96s/it]
100%|██████████| 1/1 [00:00<00:00, 23.98it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 24.511803150177002 seconds



 23%|██▎       | 60/264 [26:10<1:22:28, 24.26s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]


[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.80s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.24s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.41s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.87s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:00<00:00, 27.22it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 18.443524599075317 seconds



 23%|██▎       | 61/264 [26:30<1:18:15, 23.13s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.53s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.69s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.87s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.53s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.41s/it]
100%|██████████| 1/1 [00:00<00:00, 39.18it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.123488426208496 seconds



 23%|██▎       | 62/264 [26:51<1:15:31, 22.43s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]


[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.32s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.33s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.73s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.01s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:03<00:00,  3.01s/it]
100%|██████████| 1/1 [00:00<00:00, 25.52it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 23.590001106262207 seconds



 24%|██▍       | 63/264 [27:16<1:17:16, 23.07s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]


[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.60s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.44s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.42s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.88s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.59s/it]
100%|██████████| 1/1 [00:00<00:00, 30.92it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.288537979125977 seconds



 24%|██▍       | 64/264 [27:39<1:17:11, 23.16s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.20s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.08s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.34s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.97s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.76s/it]
100%|██████████| 1/1 [00:00<00:00, 31.02it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.010366916656494 seconds



 25%|██▍       | 65/264 [28:00<1:14:25, 22.44s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.74s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.26s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.40s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.77s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.31s/it]
100%|██████████| 1/1 [00:00<00:00, 34.22it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.96779155731201 seconds



 25%|██▌       | 66/264 [28:22<1:14:15, 22.50s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.70s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.27s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.32s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.10s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.41s/it]
100%|██████████| 1/1 [00:00<00:00, 23.89it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 23.36327624320984 seconds



 25%|██▌       | 67/264 [28:46<1:15:22, 22.96s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.44s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.88s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.45s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.79s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]
100%|██████████| 1/1 [00:00<00:00, 27.53it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.01812195777893 seconds



 26%|██▌       | 68/264 [29:06<1:11:47, 21.98s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.47s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.20s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.41s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.82s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.49s/it]
100%|██████████| 1/1 [00:00<00:00, 22.71it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 17.261810541152954 seconds



 26%|██▌       | 69/264 [29:24<1:07:28, 20.76s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.22s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.67s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:02<00:00,  2.66s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.34s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.32s/it]
100%|██████████| 1/1 [00:00<00:00, 25.71it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.5040066242218 seconds



 27%|██▋       | 70/264 [29:44<1:06:37, 20.60s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.21s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.63s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.73s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.47s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]
100%|██████████| 1/1 [00:00<00:00, 26.82it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 18.677313327789307 seconds



 27%|██▋       | 71/264 [30:04<1:05:13, 20.28s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.05s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.59s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.44s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:02<00:00,  2.39s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.85s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.41s/it]
100%|██████████| 1/1 [00:00<00:00, 34.78it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.358087301254272 seconds



 27%|██▋       | 72/264 [30:24<1:04:36, 20.19s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.16s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.80s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.53s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.37s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.12s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.31s/it]
100%|██████████| 1/1 [00:00<00:00, 24.53it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 24.17333173751831 seconds



 28%|██▊       | 73/264 [30:49<1:08:43, 21.59s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.38s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.51s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.32s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.65s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.41s/it]
100%|██████████| 1/1 [00:00<00:00, 32.19it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.22392511367798 seconds



 28%|██▊       | 74/264 [31:09<1:07:06, 21.19s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.14s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...





[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:02<00:00,  2.03s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.38s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.32s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.78s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.38s/it]
100%|██████████| 1/1 [00:00<00:00, 25.14it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.129156589508057 seconds



 28%|██▊       | 75/264 [31:31<1:07:18, 21.37s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.21s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.19s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.02s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.66s/it]
100%|██████████| 1/1 [00:00<00:00, 18.53it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 18.78786015510559 seconds



 29%|██▉       | 76/264 [31:50<1:05:11, 20.80s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.12s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.55s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:10<00:00, 10.09s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:02<00:00,  2.60s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.94s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:05<00:00,  5.06s/it]
100%|██████████| 1/1 [00:00<00:00,  3.80it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 35.88281440734863 seconds



 29%|██▉       | 77/264 [32:28<1:20:29, 25.83s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.12s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.53s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.45s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.51s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.56s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.24s/it]
100%|██████████| 1/1 [00:00<00:00, 27.90it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 26.253984928131104 seconds



 30%|██▉       | 78/264 [32:57<1:23:31, 26.95s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.05s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.43s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.23s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:02<00:00,  2.31s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.12s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.77s/it]
100%|██████████| 1/1 [00:00<00:00, 19.41it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.202404499053955 seconds



 30%|██▉       | 79/264 [33:19<1:18:40, 25.52s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...





[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.26s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:08<00:00,  8.99s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.64s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:13<00:00, 13.69s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.37s/it]
100%|██████████| 1/1 [00:00<00:00, 24.49it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 40.41428875923157 seconds



 30%|███       | 80/264 [34:01<1:32:53, 30.29s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.32s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.19s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.55s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.29s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.47s/it]
100%|██████████| 1/1 [00:00<00:00, 23.72it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.464893579483032 seconds



 31%|███       | 81/264 [34:22<1:24:02, 27.55s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.26s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.17s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.52s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.38s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.30s/it]
100%|██████████| 1/1 [00:00<00:00, 28.60it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 23.8753502368927 seconds



 31%|███       | 82/264 [34:47<1:20:53, 26.67s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.28s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.88s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:02<00:00,  2.22s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.10s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.60s/it]
100%|██████████| 1/1 [00:00<00:00,  6.93it/s]


[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 24.46138858795166 seconds


 31%|███▏      | 83/264 [35:12<1:19:17, 26.28s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.31s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.81s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.48s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  3.00s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:00<00:00, 23.98it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.940386295318604 seconds



 32%|███▏      | 84/264 [35:35<1:15:48, 25.27s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.51s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.86s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.44s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.92s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.46s/it]
100%|██████████| 1/1 [00:00<00:00, 16.50it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 24.81545400619507 seconds



 32%|███▏      | 85/264 [36:01<1:15:48, 25.41s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:02<00:00,  2.82s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:04<00:00,  4.12s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:10<00:00, 10.39s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.72s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.38s/it]
100%|██████████| 1/1 [00:00<00:00, 35.97it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 34.3367440700531 seconds



 33%|███▎      | 86/264 [36:36<1:24:07, 28.36s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.78s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...





[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...


  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.48s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.73s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.28s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.92s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.41s/it]
100%|██████████| 1/1 [00:00<00:00, 31.99it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.748392343521118 seconds



 33%|███▎      | 87/264 [36:58<1:17:45, 26.36s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:02<00:00,  2.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.22s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.39s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.40s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.79s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:03<00:00,  3.02s/it]
100%|██████████| 1/1 [00:00<00:00, 22.51it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.792510509490967 seconds



 33%|███▎      | 88/264 [37:18<1:12:30, 24.72s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.06s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...





[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.42s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.50s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.64s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.82s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]
100%|██████████| 1/1 [00:00<00:00, 34.40it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 23.736992597579956 seconds



 34%|███▎      | 89/264 [37:47<1:15:10, 25.77s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.22s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.27s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:02<00:00,  2.28s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.88s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.50s/it]
100%|██████████| 1/1 [00:00<00:00, 21.17it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 17.99355125427246 seconds



 34%|███▍      | 90/264 [38:06<1:09:10, 23.85s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.06s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.79s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:10<00:00, 10.79s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.92s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:12<00:00, 12.67s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:03<00:00,  3.64s/it]
100%|██████████| 1/1 [00:00<00:00,  7.35it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 38.80785083770752 seconds



 34%|███▍      | 91/264 [38:46<1:22:23, 28.57s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.14s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.26s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.70s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.43s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.90s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.22s/it]
100%|██████████| 1/1 [00:00<00:00, 31.34it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 18.754018306732178 seconds



 35%|███▍      | 92/264 [39:06<1:14:29, 25.99s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.20s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.57s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:02<00:00,  2.54s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.63s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.31s/it]
100%|██████████| 1/1 [00:00<00:00, 26.53it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 22.70796489715576 seconds



 35%|███▌      | 93/264 [39:29<1:11:55, 25.24s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.34s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.18s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.32s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.62s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.34s/it]
100%|██████████| 1/1 [00:00<00:00, 44.15it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 16.348418474197388 seconds



 36%|███▌      | 94/264 [39:46<1:04:30, 22.77s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.12s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.26s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.64s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.47s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.98s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.31s/it]
100%|██████████| 1/1 [00:00<00:00, 31.45it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 24.42800521850586 seconds



 36%|███▌      | 95/264 [40:11<1:06:07, 23.48s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:02<00:00,  2.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.56s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.63s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.26s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.35s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.56s/it]
100%|██████████| 1/1 [00:00<00:00, 31.94it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 25.072115898132324 seconds



 36%|███▋      | 96/264 [40:37<1:07:36, 24.15s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.05s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]


[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.62s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:07<00:00,  7.00s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.43s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:14<00:00, 14.76s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.49s/it]
100%|██████████| 1/1 [00:00<00:00, 29.50it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 42.65809416770935 seconds



 37%|███▋      | 97/264 [41:20<1:23:18, 29.93s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.45s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:09<00:00,  9.22s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.51s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.01s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.37s/it]
100%|██████████| 1/1 [00:00<00:00, 31.43it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 27.573869466781616 seconds



 37%|███▋      | 98/264 [41:49<1:21:38, 29.51s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.05s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...





[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.58s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.82s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.66s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.47s/it]
100%|██████████| 1/1 [00:00<00:00, 20.68it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 22.708173990249634 seconds



 38%|███▊      | 99/264 [42:13<1:16:20, 27.76s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:02<00:00,  2.17s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]


[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.62s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:08<00:00,  8.63s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.65s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.70s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.91s/it]
100%|██████████| 1/1 [00:00<00:00, 26.08it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 32.084404945373535 seconds



 38%|███▊      | 100/264 [42:46<1:20:07, 29.31s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.55s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.85s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.70s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.48s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:04<00:00,  4.41s/it]
100%|██████████| 1/1 [00:00<00:00, 38.69it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 25.048699378967285 seconds



 38%|███▊      | 101/264 [43:14<1:18:40, 28.96s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.52s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:12<00:00, 12.51s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.58s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.77s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:03<00:00,  3.96s/it]
100%|██████████| 1/1 [00:00<00:00, 14.61it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 33.90534567832947 seconds



 39%|███▊      | 102/264 [43:48<1:22:48, 30.67s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.28s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...





[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...


  0%|          | 0/1 [00:02<?, ?it/s]

[32mINFO[0m - Calculating CMVN...





[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:03<00:00,  3.86s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:07<00:00,  7.94s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.76s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.01s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.81s/it]
100%|██████████| 1/1 [00:00<00:00,  8.85it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 31.51214599609375 seconds



 39%|███▉      | 103/264 [44:24<1:26:26, 32.22s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.19s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:02<00:00,  2.10s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.38s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.41s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.18s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.34s/it]
100%|██████████| 1/1 [00:00<00:00, 27.55it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 26.381484270095825 seconds



 39%|███▉      | 104/264 [44:52<1:22:40, 31.00s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.21s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.83s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.40s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.57s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.77s/it]
100%|██████████| 1/1 [00:00<00:00, 18.60it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 16.12303352355957 seconds



 40%|███▉      | 105/264 [45:09<1:11:02, 26.81s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.25s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.72s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:02<00:00,  2.33s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.69s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.76s/it]
100%|██████████| 1/1 [00:00<00:00, 26.33it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 24.63243079185486 seconds



 40%|████      | 106/264 [45:35<1:09:38, 26.44s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:02<00:00,  2.07s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:07<00:00,  7.37s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.51s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.58s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.37s/it]
100%|██████████| 1/1 [00:00<00:00, 23.05it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 23.636186838150024 seconds



 41%|████      | 107/264 [45:59<1:07:31, 25.81s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.87s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:10<00:00, 10.90s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.83s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.58s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.79s/it]
100%|██████████| 1/1 [00:00<00:00, 20.69it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 30.3082058429718 seconds



 41%|████      | 108/264 [46:30<1:11:10, 27.38s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.70s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.42s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.87s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.36s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.27s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.93s/it]
100%|██████████| 1/1 [00:00<00:00, 18.53it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 24.186830759048462 seconds



 41%|████▏     | 109/264 [46:56<1:09:12, 26.79s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:02<00:00,  2.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.48s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.50s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.32s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.93s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.33s/it]
100%|██████████| 1/1 [00:00<00:00, 27.86it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.47849130630493 seconds



 42%|████▏     | 110/264 [47:17<1:04:25, 25.10s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.11s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.70s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:07<00:00,  7.30s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.46s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:08<00:00,  8.47s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.46s/it]
100%|██████████| 1/1 [00:00<00:00, 13.21it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 29.169789791107178 seconds



 42%|████▏     | 111/264 [47:47<1:07:49, 26.60s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.45s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:14<00:00, 14.14s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:02<00:00,  2.06s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:07<00:00,  7.18s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.38s/it]
100%|██████████| 1/1 [00:00<00:00, 12.98it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 36.180264472961426 seconds



 42%|████▏     | 112/264 [48:24<1:15:37, 29.85s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:02<00:00,  2.15s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...





[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...


  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...





[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:02<00:00,  2.49s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:08<00:00,  8.35s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.69s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.91s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.74s/it]
100%|██████████| 1/1 [00:00<00:00, 11.67it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 30.548115730285645 seconds



 43%|████▎     | 113/264 [48:57<1:17:08, 30.65s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.93s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:09<00:00,  9.20s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:02<00:00,  2.12s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:09<00:00,  9.27s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:08<00:00,  8.35s/it]
100%|██████████| 1/1 [00:00<00:00, 11.82it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 40.96129584312439 seconds



 43%|████▎     | 114/264 [49:41<1:26:36, 34.64s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.94s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:09<00:00,  9.82s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:02<00:00,  2.56s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.80s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.76s/it]
100%|██████████| 1/1 [00:00<00:00, 12.72it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 34.677231788635254 seconds



 44%|████▎     | 115/264 [50:17<1:27:08, 35.09s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.43s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.98s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.52s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:00<00:00, 20.16it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 18.305617332458496 seconds



 44%|████▍     | 116/264 [50:40<1:17:19, 31.35s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.45s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.42s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.48s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.03s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.50s/it]
100%|██████████| 1/1 [00:00<00:00, 37.60it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.125116109848022 seconds



 44%|████▍     | 117/264 [51:00<1:08:22, 27.91s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.42s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.76s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.50s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.42s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.59s/it]
100%|██████████| 1/1 [00:00<00:00, 25.09it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 22.33902883529663 seconds



 45%|████▍     | 118/264 [51:23<1:04:23, 26.46s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.30s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.10s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.52s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.87s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.56s/it]
100%|██████████| 1/1 [00:00<00:00, 24.75it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.16996741294861 seconds



 45%|████▌     | 119/264 [51:45<1:00:40, 25.11s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.23s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.24s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.43s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.69s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:00<00:00, 35.07it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.464253425598145 seconds



 45%|████▌     | 120/264 [52:06<57:25, 23.92s/it]  

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.31s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.92s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.96s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.38s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.01s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.37s/it]
100%|██████████| 1/1 [00:00<00:00, 29.83it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 22.59502935409546 seconds



 46%|████▌     | 121/264 [52:29<56:37, 23.76s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]


[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:02<00:00,  2.08s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.43s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.31s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.26s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.60s/it]
100%|██████████| 1/1 [00:00<00:00, 32.89it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 23.96297597885132 seconds



 46%|████▌     | 122/264 [52:54<57:04, 24.11s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.13s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...





[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.22s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.46s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.34s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.10s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.43s/it]
100%|██████████| 1/1 [00:00<00:00, 29.05it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 18.85798215866089 seconds



 47%|████▋     | 123/264 [53:14<53:29, 22.76s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.11s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.30s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.54s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.40s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.64s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.84s/it]
100%|██████████| 1/1 [00:00<00:00, 25.74it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 16.93027400970459 seconds



 47%|████▋     | 124/264 [53:31<49:31, 21.23s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.81s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...





[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...


  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:02<00:00,  2.44s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.05s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.68s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.10s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:03<00:00,  3.08s/it]
100%|██████████| 1/1 [00:00<00:00, 20.30it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 26.95103669166565 seconds



 47%|████▋     | 125/264 [53:59<53:42, 23.19s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.11s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...





[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...


  0%|          | 0/1 [00:01<?, ?it/s]


[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.73s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.08s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.29s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.07s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:03<00:00,  3.12s/it]
100%|██████████| 1/1 [00:00<00:00, 36.04it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 23.080735445022583 seconds



 48%|████▊     | 126/264 [54:23<53:49, 23.40s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:02<00:00,  2.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.61s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.83s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.95s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.58s/it]
100%|██████████| 1/1 [00:00<00:00, 31.76it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 22.005518436431885 seconds



 48%|████▊     | 127/264 [54:46<53:10, 23.29s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.30s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.26s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.36s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.97s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.54s/it]
100%|██████████| 1/1 [00:00<00:00, 25.33it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.30795645713806 seconds



 48%|████▊     | 128/264 [55:06<50:31, 22.29s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]


[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:02<00:00,  2.82s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.17s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.86s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.60s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:03<00:00,  3.93s/it]
100%|██████████| 1/1 [00:00<00:00,  2.02it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 28.980801343917847 seconds



 49%|████▉     | 129/264 [55:36<55:11, 24.53s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.18s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.91s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.68s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.50s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.35s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.50s/it]
100%|██████████| 1/1 [00:00<00:00, 27.66it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 24.292465686798096 seconds



 49%|████▉     | 130/264 [56:01<55:15, 24.74s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.80s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:07<00:00,  7.93s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.39s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.54s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:00<00:00, 27.75it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 25.436317443847656 seconds



 50%|████▉     | 131/264 [56:27<55:58, 25.25s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.33s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.66s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.40s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.80s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.65s/it]
100%|██████████| 1/1 [00:00<00:00, 20.47it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 16.60938549041748 seconds



 50%|█████     | 132/264 [56:45<50:23, 22.91s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.29s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...





[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...


  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.29s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.11s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.34s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.33s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.59s/it]
100%|██████████| 1/1 [00:00<00:00,  2.78it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.301595449447632 seconds



 50%|█████     | 133/264 [57:07<49:27, 22.66s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.31s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.21s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.37s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.86s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.60s/it]
100%|██████████| 1/1 [00:00<00:00, 27.79it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.706307649612427 seconds



 51%|█████     | 134/264 [57:28<48:20, 22.31s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.20s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.30s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.89s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.45s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.61s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]
100%|██████████| 1/1 [00:00<00:00, 31.13it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 17.716622352600098 seconds



 51%|█████     | 135/264 [57:47<45:30, 21.17s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.22s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.16s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.45s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.48s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.48s/it]
100%|██████████| 1/1 [00:00<00:00, 32.38it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.372659921646118 seconds



 52%|█████▏    | 136/264 [58:07<44:29, 20.85s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.11s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.26s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.85s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.41s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.62s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.56s/it]
100%|██████████| 1/1 [00:00<00:00, 29.21it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 23.805908203125 seconds



 52%|█████▏    | 137/264 [58:32<46:30, 21.97s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:02<00:00,  2.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:02<00:00,  2.40s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.59s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.46s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.70s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.97s/it]
100%|██████████| 1/1 [00:00<00:00, 34.91it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.903974294662476 seconds



 52%|█████▏    | 138/264 [58:53<45:57, 21.88s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.11s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.60s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.93s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:02<00:00,  2.10s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.89s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.44s/it]
100%|██████████| 1/1 [00:00<00:00, 28.03it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 25.35860800743103 seconds



 53%|█████▎    | 139/264 [59:19<48:16, 23.18s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:02<00:00,  2.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]


[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.28s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.09s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.39s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.31s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]
100%|██████████| 1/1 [00:00<00:00, 31.15it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 24.827239751815796 seconds



 53%|█████▎    | 140/264 [59:45<49:23, 23.90s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.89s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...





[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...


  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.23s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.78s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.39s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.93s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.96s/it]
100%|██████████| 1/1 [00:00<00:00, 29.04it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.667561054229736 seconds



 53%|█████▎    | 141/264 [1:00:06<47:28, 23.16s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.26s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.29s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.72s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.45s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.76s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.44s/it]
100%|██████████| 1/1 [00:00<00:00, 30.49it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 17.563467502593994 seconds



 54%|█████▍    | 142/264 [1:00:25<44:06, 21.69s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...





[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.47s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.87s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:03<00:00,  3.22s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.45s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:00<00:00, 33.88it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.49086904525757 seconds



 54%|█████▍    | 143/264 [1:00:47<44:05, 21.86s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.06s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.50s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.55s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:03<00:00,  3.62s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.04s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.47s/it]
100%|██████████| 1/1 [00:00<00:00, 14.88it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 26.36445903778076 seconds



 55%|█████▍    | 144/264 [1:01:14<46:55, 23.46s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.18s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...





[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...


  0%|          | 0/1 [00:02<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.56s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.83s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.44s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.80s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.48s/it]
100%|██████████| 1/1 [00:00<00:00, 23.43it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 29.984394788742065 seconds



 55%|█████▍    | 145/264 [1:01:45<51:10, 25.80s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.53s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.93s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.50s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.41s/it]
100%|██████████| 1/1 [00:00<00:00, 20.05it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 25.937974452972412 seconds



 55%|█████▌    | 146/264 [1:02:15<52:40, 26.79s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.98s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.38s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.51s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.03s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]
100%|██████████| 1/1 [00:00<00:00, 26.00it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.340240716934204 seconds



 56%|█████▌    | 147/264 [1:02:35<48:14, 24.74s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.48s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.78s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.52s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.05s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.62s/it]
100%|██████████| 1/1 [00:00<00:00, 23.68it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 22.109018087387085 seconds



 56%|█████▌    | 148/264 [1:02:57<46:40, 24.14s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.27s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.08s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.37s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.16s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.37s/it]
100%|██████████| 1/1 [00:00<00:00, 34.04it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.472087383270264 seconds



 56%|█████▋    | 149/264 [1:03:18<44:33, 23.25s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.24s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.51s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.38s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.49s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]
100%|██████████| 1/1 [00:00<00:00, 21.92it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 23.63380193710327 seconds



 57%|█████▋    | 150/264 [1:03:43<44:47, 23.57s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.48s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.23s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.43s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.15s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.88s/it]
100%|██████████| 1/1 [00:00<00:00, 23.88it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 24.498051404953003 seconds



 57%|█████▋    | 151/264 [1:04:08<45:19, 24.06s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.98s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.40s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.39s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.76s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.09s/it]
100%|██████████| 1/1 [00:00<00:00, 26.46it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 28.422158241271973 seconds



 58%|█████▊    | 152/264 [1:04:37<47:43, 25.57s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.20s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.07s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.63s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.51s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]
100%|██████████| 1/1 [00:00<00:00, 32.46it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.2868549823761 seconds



 58%|█████▊    | 153/264 [1:04:57<44:10, 23.88s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.12s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.45s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.28s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.51s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.73s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.51s/it]
100%|██████████| 1/1 [00:00<00:00, 23.49it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.341843128204346 seconds



 58%|█████▊    | 154/264 [1:05:19<42:48, 23.35s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.89s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:07<00:00,  7.71s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.31s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.03s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.43s/it]
100%|██████████| 1/1 [00:00<00:00, 30.27it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 26.81854486465454 seconds



 59%|█████▊    | 155/264 [1:05:47<44:50, 24.68s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.34s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.43s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.31s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.48s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.56s/it]
100%|██████████| 1/1 [00:00<00:00, 12.47it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.327707529067993 seconds



 59%|█████▉    | 156/264 [1:06:07<42:01, 23.35s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:02<00:00,  2.13s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.21s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.77s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.55s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.12s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.49s/it]
100%|██████████| 1/1 [00:00<00:00, 29.07it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 25.093549489974976 seconds



 59%|█████▉    | 157/264 [1:06:33<42:56, 24.08s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:02<00:00,  2.06s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.47s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.77s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.90s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.83s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:00<00:00, 34.71it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 23.60638666152954 seconds



 60%|█████▉    | 158/264 [1:06:57<42:40, 24.15s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.05s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]


[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.51s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.12s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.60s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.11s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.31s/it]
100%|██████████| 1/1 [00:00<00:00, 28.24it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 28.8462336063385 seconds



 60%|██████    | 159/264 [1:07:27<45:17, 25.88s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:02<00:00,  2.63s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.29s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.82s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:02<00:00,  2.05s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.06s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.44s/it]
100%|██████████| 1/1 [00:00<00:00, 31.66it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.946033716201782 seconds



 61%|██████    | 160/264 [1:07:49<42:38, 24.60s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]


[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.94s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.45s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.32s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.02s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.38s/it]
100%|██████████| 1/1 [00:00<00:00, 28.35it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 22.731609106063843 seconds



 61%|██████    | 161/264 [1:08:12<41:44, 24.32s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.20s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.28s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.29s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.25s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.29s/it]
100%|██████████| 1/1 [00:00<00:00, 39.74it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 18.13007116317749 seconds



 61%|██████▏   | 162/264 [1:08:31<38:31, 22.66s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.20s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.25s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.50s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.00s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.36s/it]
100%|██████████| 1/1 [00:00<00:00, 41.77it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 17.31700897216797 seconds



 62%|██████▏   | 163/264 [1:08:49<35:47, 21.26s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.06s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.29s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.18s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:05<00:00,  5.01s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.24s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:04<00:00,  4.49s/it]
100%|██████████| 1/1 [00:00<00:00, 18.26it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 25.998933792114258 seconds



 62%|██████▏   | 164/264 [1:09:16<38:09, 22.90s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...





[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.21s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.82s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.28s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.98s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.29s/it]
100%|██████████| 1/1 [00:00<00:00, 35.77it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.1878182888031 seconds



 62%|██████▎   | 165/264 [1:09:36<36:16, 21.99s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.20s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.53s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.31s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.40s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.31s/it]
100%|██████████| 1/1 [00:00<00:00, 21.37it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 22.938528776168823 seconds



 63%|██████▎   | 166/264 [1:09:59<36:42, 22.47s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.20s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.83s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.28s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.65s/it]
100%|██████████| 1/1 [00:00<00:00, 19.63it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 16.893993616104126 seconds



 63%|██████▎   | 167/264 [1:10:17<33:56, 20.99s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.23s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.68s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.58s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.87s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.39s/it]
100%|██████████| 1/1 [00:00<00:00, 17.35it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.63412642478943 seconds



 64%|██████▎   | 168/264 [1:10:38<33:43, 21.08s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.06s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.22s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.34s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.01s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.37s/it]
100%|██████████| 1/1 [00:00<00:00, 32.39it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.569225072860718 seconds



 64%|██████▍   | 169/264 [1:10:58<32:58, 20.82s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.21s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.17s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.27s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.14s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:00<00:00, 35.09it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 15.84142017364502 seconds



 64%|██████▍   | 170/264 [1:11:15<30:34, 19.52s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.25s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.81s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.30s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.61s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.83s/it]
100%|██████████| 1/1 [00:00<00:00, 23.55it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 17.297962188720703 seconds



 65%|██████▍   | 171/264 [1:11:33<29:30, 19.04s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.28s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.75s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:02<00:00,  2.21s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.87s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.94s/it]
100%|██████████| 1/1 [00:00<00:00, 32.65it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.72266960144043 seconds



 65%|██████▌   | 172/264 [1:11:55<30:44, 20.05s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.50s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...





[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...


  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.59s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.46s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.83s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:00<00:00, 36.03it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 17.547707080841064 seconds



 66%|██████▌   | 173/264 [1:12:13<29:34, 19.49s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.06s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.20s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.74s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.22s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.42s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.49s/it]
100%|██████████| 1/1 [00:00<00:00, 41.51it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 16.787851572036743 seconds



 66%|██████▌   | 174/264 [1:12:31<28:26, 18.96s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.05s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.21s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:11<00:00, 11.33s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.30s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.25s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.74s/it]
100%|██████████| 1/1 [00:00<00:00, 32.45it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 30.508732080459595 seconds



 66%|██████▋   | 175/264 [1:13:02<33:34, 22.63s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.21s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.72s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.43s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.42s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.56s/it]
100%|██████████| 1/1 [00:00<00:00, 15.14it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 22.6851863861084 seconds



 67%|██████▋   | 176/264 [1:13:26<33:31, 22.86s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.21s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.41s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.30s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.46s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.44s/it]
100%|██████████| 1/1 [00:00<00:00, 35.83it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.76120686531067 seconds



 67%|██████▋   | 177/264 [1:13:48<32:57, 22.73s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:02<00:00,  2.59s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.45s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.40s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.53s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.38s/it]
100%|██████████| 1/1 [00:00<00:00, 35.25it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.073362350463867 seconds



 67%|██████▋   | 178/264 [1:14:08<31:19, 21.86s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.12s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.20s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.72s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.26s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.60s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.84s/it]
100%|██████████| 1/1 [00:00<00:00, 28.72it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 18.665948629379272 seconds



 68%|██████▊   | 179/264 [1:14:27<29:56, 21.13s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.20s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.46s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.88s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.69s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.77s/it]
100%|██████████| 1/1 [00:00<00:00, 18.86it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 22.56531596183777 seconds



 68%|██████▊   | 180/264 [1:14:51<30:28, 21.77s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.26s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.14s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.32s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.99s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:00<00:00, 37.61it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 18.796476364135742 seconds



 69%|██████▊   | 181/264 [1:15:10<29:09, 21.08s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.20s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.91s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.82s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.81s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.23s/it]
100%|██████████| 1/1 [00:00<00:00, 37.81it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 16.518953323364258 seconds



 69%|██████▉   | 182/264 [1:15:27<27:12, 19.90s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.51s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.05s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.29s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.40s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:03<00:00,  3.24s/it]
100%|██████████| 1/1 [00:00<00:00, 30.39it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.39146399497986 seconds



 69%|██████▉   | 183/264 [1:15:47<26:56, 19.95s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.30s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.98s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.42s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.21s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:00<00:00, 36.92it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.56259036064148 seconds



 70%|██████▉   | 184/264 [1:16:08<26:52, 20.15s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:02<00:00,  2.14s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.34s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.41s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.11s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:00<00:00, 20.18it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 24.20789623260498 seconds



 70%|███████   | 185/264 [1:16:33<28:24, 21.57s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.06s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.24s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.73s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.92s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.06s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.81s/it]
100%|██████████| 1/1 [00:00<00:00, 30.79it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 22.84076237678528 seconds



 70%|███████   | 186/264 [1:16:56<28:47, 22.15s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.06s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.19s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.03s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.86s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.57s/it]
100%|██████████| 1/1 [00:00<00:00, 31.59it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 16.61526107788086 seconds



 71%|███████   | 187/264 [1:17:14<26:40, 20.78s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.12s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.23s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:07<00:00,  7.35s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:07<00:00,  7.47s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.67s/it]
100%|██████████| 1/1 [00:00<00:00, 28.37it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 26.567855834960938 seconds



 71%|███████   | 188/264 [1:17:41<28:47, 22.73s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:02<00:00,  2.04s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.27s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.80s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.30s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.68s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:00<00:00, 18.73it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.893860578536987 seconds



 72%|███████▏  | 189/264 [1:18:03<28:03, 22.45s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.06s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:02<00:00,  2.23s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.25s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.30s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.97s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:03<00:00,  3.19s/it]
100%|██████████| 1/1 [00:00<00:00, 24.06it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.08840250968933 seconds



 72%|███████▏  | 190/264 [1:18:24<27:03, 21.94s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.69s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.16s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.96s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.06s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.28s/it]
100%|██████████| 1/1 [00:00<00:00, 36.74it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 26.19889783859253 seconds



 72%|███████▏  | 191/264 [1:18:51<28:33, 23.47s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:02<00:00,  2.59s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.37s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.80s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.30s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.46s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.44s/it]
100%|██████████| 1/1 [00:00<00:00, 17.36it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 26.00013303756714 seconds



 73%|███████▎  | 192/264 [1:19:18<29:32, 24.62s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.92s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.20s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.69s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.25s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.70s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:00<00:00, 29.66it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 18.59910798072815 seconds



 73%|███████▎  | 193/264 [1:19:37<27:14, 23.02s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:02<00:00,  2.04s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.21s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.98s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.37s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.62s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.91s/it]
100%|██████████| 1/1 [00:00<00:00, 33.41it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.833357572555542 seconds



 73%|███████▎  | 194/264 [1:19:59<26:18, 22.55s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.70s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.93s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.52s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.25s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:00<00:00, 32.13it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 24.687141180038452 seconds



 74%|███████▍  | 195/264 [1:20:24<26:54, 23.39s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.05s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.24s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.67s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.30s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.86s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:00<00:00, 19.78it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.132383346557617 seconds



 74%|███████▍  | 196/264 [1:20:46<25:58, 22.92s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.36s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.36s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.04s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.60s/it]
100%|██████████| 1/1 [00:00<00:00, 35.83it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.268775939941406 seconds



 75%|███████▍  | 197/264 [1:21:06<24:35, 22.02s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.49s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.30s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.27s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.83s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:04<00:00,  4.05s/it]
100%|██████████| 1/1 [00:00<00:00, 37.37it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 24.717624187469482 seconds



 75%|███████▌  | 198/264 [1:21:31<25:19, 23.02s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:02<00:00,  2.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...





[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:02<00:00,  2.84s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:13<00:00, 13.28s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.68s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:09<00:00,  9.13s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.64s/it]
100%|██████████| 1/1 [00:00<00:00, 28.13it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 38.57431983947754 seconds



 75%|███████▌  | 199/264 [1:22:11<30:13, 27.90s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.20s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.23s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.32s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.19s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:00<00:00, 20.20it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.213141918182373 seconds



 76%|███████▌  | 200/264 [1:22:31<27:16, 25.57s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.33s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.99s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.62s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.91s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:09<00:00,  9.10s/it]
100%|██████████| 1/1 [00:00<00:00, 34.96it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 26.04727792739868 seconds



 76%|███████▌  | 201/264 [1:22:57<27:11, 25.90s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.06s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:04<00:00,  4.09s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.43s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.71s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.31s/it]
100%|██████████| 1/1 [00:00<00:00, 19.40it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.795185804367065 seconds



 77%|███████▋  | 202/264 [1:23:19<25:23, 24.58s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.87s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.20s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.75s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.28s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.64s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.69s/it]
100%|██████████| 1/1 [00:00<00:00, 34.96it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.38768196105957 seconds



 77%|███████▋  | 203/264 [1:23:41<24:16, 23.87s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.26s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.08s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.23s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.53s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.29s/it]
100%|██████████| 1/1 [00:00<00:00, 37.75it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 17.82496404647827 seconds



 77%|███████▋  | 204/264 [1:24:00<22:19, 22.32s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.11s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.20s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.33s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.36s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.01s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.60s/it]
100%|██████████| 1/1 [00:00<00:00, 32.96it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.088378429412842 seconds



 78%|███████▊  | 205/264 [1:24:20<21:11, 21.55s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.19s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.94s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.73s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.81s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:04<00:00,  4.25s/it]
100%|██████████| 1/1 [00:00<00:00, 17.84it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 18.664509534835815 seconds



 78%|███████▊  | 206/264 [1:24:39<20:12, 20.91s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.30s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.44s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.32s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.00s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.42s/it]
100%|██████████| 1/1 [00:00<00:00, 39.21it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.20437240600586 seconds



 78%|███████▊  | 207/264 [1:24:59<19:41, 20.73s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.06s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.23s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.51s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.32s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.39s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.66s/it]
100%|██████████| 1/1 [00:00<00:00, 34.94it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 18.879009246826172 seconds



 79%|███████▉  | 208/264 [1:25:19<19:01, 20.39s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.06s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.53s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.90s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.29s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.45s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:03<00:00,  3.09s/it]
100%|██████████| 1/1 [00:00<00:00, 21.79it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.236186027526855 seconds



 79%|███████▉  | 209/264 [1:25:41<19:07, 20.86s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.82s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:09<00:00,  9.29s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.36s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:09<00:00,  9.03s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.41s/it]
100%|██████████| 1/1 [00:00<00:00, 20.43it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 30.79971671104431 seconds



 80%|███████▉  | 210/264 [1:26:13<21:54, 24.35s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.26s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.32s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.32s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.26s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.90s/it]
100%|██████████| 1/1 [00:00<00:00, 10.26it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 18.792118072509766 seconds



 80%|███████▉  | 211/264 [1:26:33<20:21, 23.05s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...





[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.27s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.75s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.26s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.12s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.60s/it]
100%|██████████| 1/1 [00:00<00:00, 44.19it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 16.825465440750122 seconds



 80%|████████  | 212/264 [1:26:51<18:31, 21.37s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.20s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.07s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.28s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.07s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.29s/it]
100%|██████████| 1/1 [00:00<00:00, 24.60it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 16.141744375228882 seconds



 81%|████████  | 213/264 [1:27:08<17:01, 20.03s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.05s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.19s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.06s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.01s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.29s/it]
100%|██████████| 1/1 [00:00<00:00, 31.60it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 15.573993682861328 seconds



 81%|████████  | 214/264 [1:27:24<15:44, 18.90s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.20s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.14s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.30s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.72s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:00<00:00, 44.38it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.05160665512085 seconds



 81%|████████▏ | 215/264 [1:27:46<16:06, 19.73s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.82s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.99s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.41s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.37s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.29s/it]
100%|██████████| 1/1 [00:00<00:00, 34.95it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 23.590047359466553 seconds



 82%|████████▏ | 216/264 [1:28:10<16:57, 21.21s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.11s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:02<00:00,  2.22s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.33s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.37s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.42s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.69s/it]
100%|██████████| 1/1 [00:00<00:00, 30.11it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 24.407554626464844 seconds



 82%|████████▏ | 217/264 [1:28:35<17:30, 22.36s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.05s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.61s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.73s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.34s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.31s/it]
100%|██████████| 1/1 [00:00<00:00, 21.60it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 24.88595938682556 seconds



 83%|████████▎ | 218/264 [1:29:01<17:53, 23.34s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.19s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.78s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.28s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.59s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:00<00:00, 36.87it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 14.416021823883057 seconds



 83%|████████▎ | 219/264 [1:29:16<15:37, 20.83s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:02<00:00,  2.24s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.43s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.95s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.37s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.39s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.39s/it]
100%|██████████| 1/1 [00:00<00:00, 24.04it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.91458487510681 seconds



 83%|████████▎ | 220/264 [1:29:37<15:13, 20.76s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.05s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.46s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.47s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.82s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.63s/it]
100%|██████████| 1/1 [00:00<00:00, 19.29it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 25.480878829956055 seconds



 84%|████████▎ | 221/264 [1:30:03<16:02, 22.39s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.61s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.20s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.26s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.31s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.86s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.29s/it]
100%|██████████| 1/1 [00:00<00:00, 35.32it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.033379077911377 seconds



 84%|████████▍ | 222/264 [1:30:25<15:37, 22.32s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.20s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.94s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.29s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.90s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.78s/it]
100%|██████████| 1/1 [00:00<00:00, 26.99it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 17.42483615875244 seconds



 84%|████████▍ | 223/264 [1:30:43<14:23, 21.06s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:02<00:00,  2.05s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.27s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.26s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.90s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.42s/it]
100%|██████████| 1/1 [00:00<00:00, 37.23it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.803651571273804 seconds



 85%|████████▍ | 224/264 [1:31:05<14:12, 21.31s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.47s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.35s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.25s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.29s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.44s/it]
100%|██████████| 1/1 [00:00<00:00, 35.81it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.510221004486084 seconds



 85%|████████▌ | 225/264 [1:31:28<14:09, 21.78s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.33s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.58s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.42s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.39s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.87s/it]
100%|██████████| 1/1 [00:00<00:00, 27.42it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.85282301902771 seconds



 86%|████████▌ | 226/264 [1:31:50<13:55, 21.99s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.12s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.24s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.79s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.46s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.14s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.60s/it]
100%|██████████| 1/1 [00:00<00:00, 28.05it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.7158203125 seconds



 86%|████████▌ | 227/264 [1:32:11<13:18, 21.58s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.87s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...





[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...


  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.22s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.90s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.96s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.37s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.91s/it]
100%|██████████| 1/1 [00:00<00:00, 24.83it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 29.499855279922485 seconds



 86%|████████▋ | 228/264 [1:32:41<14:29, 24.16s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.19s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.50s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.43s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.48s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.64s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.46s/it]
100%|██████████| 1/1 [00:00<00:00, 33.42it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 25.323439598083496 seconds



 87%|████████▋ | 229/264 [1:33:07<14:28, 24.82s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:05<00:00,  5.70s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.28s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.42s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.72s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.04s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.86s/it]
100%|██████████| 1/1 [00:00<00:00, 31.36it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 30.601327896118164 seconds



 87%|████████▋ | 230/264 [1:33:39<15:12, 26.84s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.62s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.52s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.64s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.72s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:00<00:00, 23.20it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.287367582321167 seconds



 88%|████████▊ | 231/264 [1:33:59<13:40, 24.86s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.26s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.98s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.45s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.77s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.71s/it]
100%|██████████| 1/1 [00:00<00:00, 23.80it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.400085926055908 seconds



 88%|████████▊ | 232/264 [1:34:21<12:49, 24.06s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.27s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.20s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.41s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.78s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.96s/it]
100%|██████████| 1/1 [00:00<00:00, 27.23it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.853785753250122 seconds



 88%|████████▊ | 233/264 [1:34:43<12:03, 23.33s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.13s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.24s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.64s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.90s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.97s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.67s/it]
100%|██████████| 1/1 [00:00<00:00, 16.36it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.00414538383484 seconds



 89%|████████▊ | 234/264 [1:35:04<11:18, 22.63s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.69s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:02<00:00,  2.08s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.96s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.65s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.93s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.87s/it]
100%|██████████| 1/1 [00:00<00:00, 22.56it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 25.01139760017395 seconds



 89%|████████▉ | 235/264 [1:35:30<11:25, 23.64s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.34s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...





[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...


  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.44s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.47s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.83s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.67s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:06<00:00,  6.71s/it]
100%|██████████| 1/1 [00:00<00:00, 15.23it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 28.972068548202515 seconds



 89%|████████▉ | 236/264 [1:36:00<11:55, 25.55s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.97s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:07<00:00,  7.17s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.55s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.45s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.60s/it]
100%|██████████| 1/1 [00:00<00:00, 19.47it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 28.600895643234253 seconds



 90%|████████▉ | 237/264 [1:36:30<12:05, 26.87s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.74s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.40s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.58s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.42s/it]
100%|██████████| 1/1 [00:00<00:00, 29.65it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 24.794644355773926 seconds



 90%|█████████ | 238/264 [1:36:56<11:28, 26.48s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.52s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]


[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.32s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.12s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.40s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.09s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.84s/it]
100%|██████████| 1/1 [00:00<00:00, 30.48it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 17.66055703163147 seconds



 91%|█████████ | 239/264 [1:37:14<10:01, 24.07s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.21s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.30s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.25s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.49s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.82s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.70s/it]
100%|██████████| 1/1 [00:00<00:00, 23.91it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 20.034029483795166 seconds



 91%|█████████ | 240/264 [1:37:35<09:14, 23.12s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.85s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.66s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.67s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.32s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.89s/it]
100%|██████████| 1/1 [00:00<00:00, 16.91it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 23.82308554649353 seconds



 91%|█████████▏| 241/264 [1:38:00<09:03, 23.61s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:03<00:00,  3.53s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:02<00:00,  2.82s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.58s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.71s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.99s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.83s/it]
100%|██████████| 1/1 [00:00<00:00, 20.98it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 24.156042337417603 seconds



 92%|█████████▏| 242/264 [1:38:25<08:52, 24.18s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...





[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...


  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...





[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.48s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.55s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.37s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.71s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.74s/it]
100%|██████████| 1/1 [00:00<00:00, 31.99it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 25.051605224609375 seconds



 92%|█████████▏| 243/264 [1:38:51<08:40, 24.80s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.17s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.24s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.25s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.46s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.28s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.76s/it]
100%|██████████| 1/1 [00:00<00:00, 25.58it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 17.249424934387207 seconds



 92%|█████████▏| 244/264 [1:39:10<07:35, 22.78s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.29s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.93s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:02<00:00,  2.72s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.44s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.58s/it]
100%|██████████| 1/1 [00:00<00:00,  6.29it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 25.12933373451233 seconds



 93%|█████████▎| 245/264 [1:39:35<07:30, 23.70s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:02<00:00,  2.14s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]


[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.41s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.38s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.89s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.88s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.10s/it]
100%|██████████| 1/1 [00:00<00:00, 11.63it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 30.888453483581543 seconds



 93%|█████████▎| 246/264 [1:40:07<07:50, 26.13s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.22s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.31s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.03s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.47s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.05s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.15s/it]
100%|██████████| 1/1 [00:00<00:00, 26.55it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 18.991177558898926 seconds



 94%|█████████▎| 247/264 [1:40:28<06:54, 24.41s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.75s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...





[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...


  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.30s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.66s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.43s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.02s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.57s/it]
100%|██████████| 1/1 [00:00<00:00, 18.29it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 18.926671028137207 seconds



 94%|█████████▍| 248/264 [1:40:47<06:08, 23.04s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.23s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.25s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.01s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.37s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.62s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.72s/it]
100%|██████████| 1/1 [00:00<00:00, 24.33it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 24.18691873550415 seconds



 94%|█████████▍| 249/264 [1:41:12<05:54, 23.63s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:02<00:00,  2.90s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...





[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...


  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:02<00:00,  2.14s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.12s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.56s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.51s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.18s/it]
100%|██████████| 1/1 [00:00<00:00, 28.98it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 32.373199462890625 seconds



 95%|█████████▍| 250/264 [1:41:47<06:16, 26.87s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.37s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.56s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.72s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.15s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.40s/it]
100%|██████████| 1/1 [00:00<00:00, 29.75it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 23.43739414215088 seconds



 95%|█████████▌| 251/264 [1:42:11<05:39, 26.09s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.23s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.87s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.36s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.85s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.50s/it]
100%|██████████| 1/1 [00:00<00:00, 27.20it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 16.758482694625854 seconds



 95%|█████████▌| 252/264 [1:42:29<04:42, 23.55s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.06s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.53s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:07<00:00,  7.28s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:05<00:00,  5.88s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.29s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:11<00:00, 11.97s/it]
100%|██████████| 1/1 [00:00<00:00,  7.32it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 40.52314758300781 seconds



 96%|█████████▌| 253/264 [1:43:10<05:17, 28.87s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.56s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.74s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.76s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.11s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.60s/it]
100%|██████████| 1/1 [00:00<00:00, 23.14it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 19.89675545692444 seconds



 96%|█████████▌| 254/264 [1:43:31<04:25, 26.56s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.57s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.35s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.40s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:03<00:00,  3.54s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.44s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.48s/it]
100%|██████████| 1/1 [00:00<00:00, 22.65it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 23.691664457321167 seconds



 97%|█████████▋| 255/264 [1:43:56<03:53, 25.93s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.70s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.40s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:02<00:00,  2.04s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.02s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:02<00:00,  2.85s/it]
100%|██████████| 1/1 [00:00<00:00, 13.03it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 24.8226797580719 seconds



 97%|█████████▋| 256/264 [1:44:21<03:26, 25.81s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.19s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.24s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.23s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.46s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.69s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:05<00:00,  5.48s/it]
100%|██████████| 1/1 [00:00<00:00, 22.84it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 23.901031970977783 seconds



 97%|█████████▋| 257/264 [1:44:46<02:59, 25.64s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]


[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.31s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.39s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.43s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:02<00:00,  2.57s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.60s/it]
100%|██████████| 1/1 [00:00<00:00, 21.35it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 24.934802055358887 seconds



 98%|█████████▊| 258/264 [1:45:13<02:34, 25.82s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:03<00:00,  3.75s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...





[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...


  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.35s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.34s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.71s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.54s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.53s/it]
100%|██████████| 1/1 [00:00<00:00, 21.20it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 30.62846827507019 seconds



 98%|█████████▊| 259/264 [1:45:44<02:17, 27.57s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.71s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.56s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.53s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:04<00:00,  4.31s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.79s/it]
100%|██████████| 1/1 [00:00<00:00,  8.50it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 24.744736671447754 seconds



 98%|█████████▊| 260/264 [1:46:10<01:47, 26.97s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.43s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:08<00:00,  8.33s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:02<00:00,  2.87s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:05<00:00,  5.31s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:03<00:00,  3.11s/it]
100%|██████████| 1/1 [00:00<00:00, 14.48it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 28.94349765777588 seconds



 99%|█████████▉| 261/264 [1:46:40<01:23, 27.83s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:01<00:00,  1.22s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.96s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.93s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.78s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.53s/it]
100%|██████████| 1/1 [00:00<00:00, 23.54it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 22.644968509674072 seconds



 99%|█████████▉| 262/264 [1:47:03<00:53, 26.55s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...
[32mINFO[0m - Compiling training graphs...



100%|██████████| 1/1 [00:02<00:00,  2.62s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:06<00:00,  6.14s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.53s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:03<00:00,  3.95s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.56s/it]
100%|██████████| 1/1 [00:00<00:00, 28.53it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 26.195139169692993 seconds



100%|█████████▉| 263/264 [1:47:30<00:26, 26.65s/it]

[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Loading corpus from source files...


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]

[32mINFO[0m - Found 1 speaker across 1 file, average number of utterances per speaker: 1.0
[32mINFO[0m - Initializing multiprocessing jobs...
[32mINFO[0m - Creating corpus split for feature generation...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Generating MFCCs...



  0%|          | 0/1 [00:01<?, ?it/s]

[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Creating corpus split with features...





[32mINFO[0m - Compiling training graphs...


100%|██████████| 1/1 [00:01<00:00,  1.25s/it]

[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:07<00:00,  7.49s/it]


[32mINFO[0m - Calculating fMLLR for speaker adaptation...


100%|██████████| 1/1 [00:01<00:00,  1.40s/it]

[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating alignments...



100%|██████████| 1/1 [00:01<00:00,  1.89s/it]


[32mINFO[0m - Exporting TextGrids to data/output/public_test_raw...
[32mINFO[0m - Collecting phone and word alignments from alignment lattices...


100%|██████████| 1/1 [00:01<00:00,  1.48s/it]
100%|██████████| 1/1 [00:00<00:00, 32.14it/s]

[32mINFO[0m - Finished exporting TextGrids to data/output/public_test_raw!
[32mINFO[0m - Done! Everything took 21.267521142959595 seconds



100%|██████████| 264/264 [1:47:52<00:00, 24.52s/it]


NameError: name 'SUBMISSION_DIR' is not defined