In [1]:
import os
from omegaconf import OmegaConf, open_dict
import nemo.collections.asr as nemo_asr
from nemo.collections.asr.metrics.wer import word_error_rate
from nemo.utils import exp_manager
import nemo
from omegaconf import OmegaConf
# Manifest Utils
from tqdm.auto import tqdm
import json
# Preprocessing steps
import torch.nn as nn
import pytorch_lightning as ptl
import json
from datetime import datetime

################################################################################
###          (please add 'export KALDI_ROOT=<your_path>' in your $HOME/.profile)
###          (or run as: KALDI_ROOT=<your_path> python <your_script>.py)
################################################################################



In [2]:
WANDB_LOGGER = True
ASR_DIR = "/home/khoatlv/ASR-NEMO"
TOKENIZER_DIR = os.path.join(ASR_DIR, "tokenizers")

model_path = "/home/khoatlv/models/quarznet/stt_en_quartznet15x5.nemo"
model_config = "config/quartznet_15x5.yaml"
config_path = os.path.join(ASR_DIR, model_config)

train_manifest_cleaned = "/home/khoatlv/manifests/train_manifest_processed.json"
test_manifest_cleaned = "/home/khoatlv/manifests/test_manifest_processed.json"

train_set = None
test_set = None

In [3]:
def enable_bn_se(m):
    if type(m) == nn.BatchNorm1d:
        m.train()
        for param in m.parameters():
            param.requires_grad_(True)

    if 'SqueezeExcite' in type(m).__name__:
        m.train()
        for param in m.parameters():
            param.requires_grad_(True)

def load_config(path):
    config = OmegaConf.load(path)
    config = OmegaConf.to_container(config, resolve=True)
    config = OmegaConf.create(config)
    
    config.model.train_ds.manifest_filepath = train_manifest_cleaned
    config.model.validation_ds.manifest_filepath = test_manifest_cleaned
    config.model.test_ds.manifest_filepath = test_manifest_cleaned
    
    # config.model.train_ds.labels = list(train_set)
    # config.model.validation_ds.labels = list(train_set)
    # config.model.test_ds.labels = list(train_set)
    
    return config

In [5]:
# import torch
# check_point = torch.load(
#     "/home/khoatlv/ASR/nemo_experiments/QuartzNet12x1/2022-03-20_14-52-16/checkpoints/QuartzNet12x1--val_loss=26.3146-epoch=9-last.ckpt",
#     map_location='cuda'
# )
# print(check_point.keys())
# check_point['hyper_parameters']['cfg']['beam_search_lm']['lm_path'] = "/home/khoatlv/models/n_gram_model/4-gram-lm_large.bin"
# torch.save(check_point, "/home/khoatlv/ASR/nemo_experiments/QuartzNet12x1/2022-03-20_14-52-16/checkpoints/cepoch_9_change_lm_path.ckpt")

# print(asr_model.cfg.keys())

In [6]:

# asr_model = nemo_asr.models.EncDecCTCModel.restore_from(
#     "/home/khoatlv/ASR-NEMO/models/quarznet/stt_en_quartznet15x5.nemo",
#     map_location='cuda'
# )
asr_model = nemo_asr.models.EncDecCTCModel.load_from_checkpoint(
    "/home/khoatlv/ASR-NEMO/experiments/ASR-Char-Model-Language-vi/2022-03-24_06-34-59/checkpoints/ASR-Char-Model-Language-vi--val_wer=0.7208-epoch=4-last.ckpt",
    map_location='cuda'
)
asr_model.export()

[NeMo W 2022-03-24 06:44:48 modelPT:148] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
    Train config : 
    manifest_filepath: /home/khoatlv/manifests/train_manifest_processed_processed.json
    sample_rate: 16000
    labels:
    - b
    - c
    - d
    - đ
    - g
    - h
    - k
    - l
    - m
    - 'n'
    - p
    - q
    - r
    - s
    - t
    - v
    - x
    - a
    - á
    - à
    - ạ
    - ã
    - ả
    - ă
    - ắ
    - ằ
    - ặ
    - ẵ
    - ẳ
    - â
    - ấ
    - ầ
    - ậ
    - ẫ
    - ẩ
    - e
    - é
    - è
    - ẹ
    - ẽ
    - ẻ
    - ê
    - ế
    - ề
    - ệ
    - ễ
    - ể
    - i
    - í
    - ì
    - ị
    - ĩ
    - ỉ
    - 'y'
    - ý
    - ỳ
    - ỵ
    - ỹ
    - ỷ
    - o
    - ó
    - ò
    - ọ
    - õ
    - ỏ
    - ô
    - ố
    - ồ
    - ộ
    - ỗ
    - ổ
    - ơ
    - ớ
    - ờ
    - ợ
    - ỡ
    - ở
    - u
    - ú
    - ù
    

[NeMo I 2022-03-24 06:44:48 features:255] PADDING: 16
[NeMo I 2022-03-24 06:44:48 features:272] STFT using torch


In [7]:
os.system("wandb login {}".format("03f1412a8edbcb2869809c69eb534d8b803365b2"))
freeze_encoder = True 
freeze_encoder = bool(freeze_encoder)

if freeze_encoder:
    asr_model.encoder.freeze()
    asr_model.encoder.apply(enable_bn_se)
    print("Model encoder has been frozen, and batch normalization has been unfrozen")
else:
    asr_model.encoder.unfreeze()
    print("Model encoder has been un-frozen")

config = load_config(config_path)
asr_model.change_vocabulary(new_vocabulary=list(config.model.labels))

asr_model.cfg.labels = list(config.model.labels)
asr_model.setup_training_data(config.model.train_ds)
asr_model.setup_multiple_validation_data(config.model.validation_ds)

with open_dict(asr_model.cfg):
    asr_model.cfg.optim = config.model.optim
    asr_model.cfg.spec_augment = config.model.spec_augment    
    
asr_model.spec_augmentation = asr_model.from_config_dict(config.model.spec_augment)
asr_model.setup_optimization(config.model.optim)

asr_model._wer.use_cer = True
asr_model._wer.log_prediction = True

trainer = ptl.Trainer(**config.trainer)
asr_model.set_trainer(trainer)
asr_model.cfg = asr_model._cfg

exp_config = exp_manager.ExpManagerConfig(**config.exp_manager)
exp_config = OmegaConf.structured(exp_config)
logdir = exp_manager.exp_manager(trainer, exp_config)

wandb: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Model encoder has been frozen, and batch normalization has been unfrozen


[NeMo W 2022-03-24 06:44:50 ctc_models:328] Old ['b', 'c', 'd', 'đ', 'g', 'h', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'x', 'a', 'á', 'à', 'ạ', 'ã', 'ả', 'ă', 'ắ', 'ằ', 'ặ', 'ẵ', 'ẳ', 'â', 'ấ', 'ầ', 'ậ', 'ẫ', 'ẩ', 'e', 'é', 'è', 'ẹ', 'ẽ', 'ẻ', 'ê', 'ế', 'ề', 'ệ', 'ễ', 'ể', 'i', 'í', 'ì', 'ị', 'ĩ', 'ỉ', 'y', 'ý', 'ỳ', 'ỵ', 'ỹ', 'ỷ', 'o', 'ó', 'ò', 'ọ', 'õ', 'ỏ', 'ô', 'ố', 'ồ', 'ộ', 'ỗ', 'ổ', 'ơ', 'ớ', 'ờ', 'ợ', 'ỡ', 'ở', 'u', 'ú', 'ù', 'ụ', 'ũ', 'ủ', 'ư', 'ứ', 'ừ', 'ự', 'ữ', 'ử', 'j', 'f', 'w', 'z', ' '] and new ['b', 'c', 'd', 'đ', 'g', 'h', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'x', 'a', 'á', 'à', 'ạ', 'ã', 'ả', 'ă', 'ắ', 'ằ', 'ặ', 'ẵ', 'ẳ', 'â', 'ấ', 'ầ', 'ậ', 'ẫ', 'ẩ', 'e', 'é', 'è', 'ẹ', 'ẽ', 'ẻ', 'ê', 'ế', 'ề', 'ệ', 'ễ', 'ể', 'i', 'í', 'ì', 'ị', 'ĩ', 'ỉ', 'y', 'ý', 'ỳ', 'ỵ', 'ỹ', 'ỷ', 'o', 'ó', 'ò', 'ọ', 'õ', 'ỏ', 'ô', 'ố', 'ồ', 'ộ', 'ỗ', 'ổ', 'ơ', 'ớ', 'ờ', 'ợ', 'ỡ', 'ở', 'u', 'ú', 'ù', 'ụ', 'ũ', 'ủ', 'ư', 'ứ', 'ừ', 'ự', 'ữ', 'ử', 'j', 'f', 'w', 'z', ' '] ma

[NeMo I 2022-03-24 06:44:50 collections:173] Dataset loaded with 419 files totalling 0.44 hours
[NeMo I 2022-03-24 06:44:50 collections:174] 0 files were filtered totalling 0.00 hours
[NeMo I 2022-03-24 06:44:50 collections:173] Dataset loaded with 197 files totalling 0.23 hours
[NeMo I 2022-03-24 06:44:50 collections:174] 0 files were filtered totalling 0.00 hours


[NeMo W 2022-03-24 06:44:50 modelPT:475] Trainer wasn't specified in model constructor. Make sure that you really wanted it.


[NeMo I 2022-03-24 06:44:50 modelPT:587] Optimizer config = Novograd (
    Parameter Group 0
        amsgrad: False
        betas: [0.95, 0.25]
        eps: 1e-08
        grad_averaging: False
        lr: 0.01
        weight_decay: 0.001
    )


[NeMo W 2022-03-24 06:44:50 lr_scheduler:816] Neither `max_steps` nor `iters_per_batch` were provided to `optim.sched`, cannot compute effective `max_steps` !
    Scheduler will not be instantiated !
      rank_zero_deprecation(
    
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


[NeMo I 2022-03-24 06:44:50 exp_manager:283] Experiments will be logged at /home/khoatlv/ASR-NEMO/experiments/ASR-Char-Model-Language-vi/2022-03-24_06-44-50


[NeMo W 2022-03-24 06:44:50 exp_manager:881] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to -1. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.


In [8]:
print("Starting train model")
trainer.fit(asr_model)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[NeMo W 2022-03-24 06:44:51 modelPT:496] The lightning trainer received accelerator: <pytorch_lightning.accelerators.gpu.GPUAccelerator object at 0x7fbb5b31ed60>. We recommend to use 'ddp' instead.


Starting train model
[NeMo I 2022-03-24 06:44:51 modelPT:587] Optimizer config = Novograd (
    Parameter Group 0
        amsgrad: False
        betas: [0.95, 0.25]
        eps: 1e-08
        grad_averaging: False
        lr: 0.01
        weight_decay: 0.001
    )
[NeMo I 2022-03-24 06:44:51 lr_scheduler:833] Scheduler "<nemo.core.optim.lr_scheduler.CosineAnnealing object at 0x7fbb5b259e50>" 
    will be used during training (effective maximum steps = 135) - 
    Parameters : 
    (warmup_steps: null
    warmup_ratio: 0.05
    min_lr: 1.0e-05
    last_epoch: -1
    max_steps: 135
    )



  | Name              | Type                              | Params
------------------------------------------------------------------------
0 | preprocessor      | AudioToMelSpectrogramPreprocessor | 0     
1 | encoder           | ConvASREncoder                    | 18.9 M
2 | decoder           | ConvASRDecoder                    | 97.4 K
3 | loss              | CTCLoss                           | 0     
4 | spec_augmentation | SpectrogramAugmentation           | 0     
5 | _wer              | WER                               | 0     
------------------------------------------------------------------------
174 K     Trainable params
18.8 M    Non-trainable params
19.0 M    Total params
75.968    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

[NeMo I 2022-03-24 06:44:53 wer:244] 
    
[NeMo I 2022-03-24 06:44:53 wer:245] reference:em liền gọi to
[NeMo I 2022-03-24 06:44:53 wer:246] predicted:nh hn h
[NeMo I 2022-03-24 06:44:53 wer:244] 
    
[NeMo I 2022-03-24 06:44:53 wer:245] reference:tháng mười mộtchờ mong duyên mới
[NeMo I 2022-03-24 06:44:53 wer:246] predicted:n hn hn hn 


Training: 0it [00:00, ?it/s]

[NeMo I 2022-03-24 06:44:57 wer:244] 
    
[NeMo I 2022-03-24 06:44:57 wer:245] reference:dạ nhớ bà dã quỳ ông là
[NeMo I 2022-03-24 06:44:57 wer:246] predicted:  hn n đ n
[NeMo I 2022-03-24 06:44:59 wer:244] 
    
[NeMo I 2022-03-24 06:44:59 wer:245] reference:hay là tay sai cho quỷ thì thực sự rất là khủng khiếp
[NeMo I 2022-03-24 06:44:59 wer:246] predicted:th h chà hn hàn han han chn han cha chn h


Validating: 0it [00:00, ?it/s]

[NeMo I 2022-03-24 06:45:03 wer:244] 
    
[NeMo I 2022-03-24 06:45:03 wer:245] reference:em liền gọi to
[NeMo I 2022-03-24 06:45:03 wer:246] predicted: 
[NeMo I 2022-03-24 06:45:03 wer:244] 
    
[NeMo I 2022-03-24 06:45:03 wer:245] reference:tháng mười mộtchờ mong duyên mới
[NeMo I 2022-03-24 06:45:03 wer:246] predicted: h 
[NeMo I 2022-03-24 06:45:03 wer:244] 
    
[NeMo I 2022-03-24 06:45:03 wer:245] reference:ngày thơ đi mất
[NeMo I 2022-03-24 06:45:03 wer:246] predicted: 
[NeMo I 2022-03-24 06:45:03 wer:244] 
    
[NeMo I 2022-03-24 06:45:03 wer:245] reference:bố liền nói
[NeMo I 2022-03-24 06:45:03 wer:246] predicted: 
[NeMo I 2022-03-24 06:45:03 wer:244] 
    
[NeMo I 2022-03-24 06:45:03 wer:245] reference:trích dẫn từ báo chí
[NeMo I 2022-03-24 06:45:03 wer:246] predicted: h 
[NeMo I 2022-03-24 06:45:03 wer:244] 
    
[NeMo I 2022-03-24 06:45:03 wer:245] reference:dạ bẩm thầy giờ đã muộn rồi à
[NeMo I 2022-03-24 06:45:03 wer:246] predicted: h 
[NeMo I 2022-03-24 06:45:03 wer:2

Epoch 0, global step 26: val_wer reached 0.90281 (best 0.90281), saving model to "/home/khoatlv/ASR-NEMO/experiments/ASR-Char-Model-Language-vi/2022-03-24_06-44-50/checkpoints/ASR-Char-Model-Language-vi--val_wer=0.9028-epoch=0.ckpt" as top 3
Exception ignored in: <function _releaseLock at 0x7fbcb9d56040>
Traceback (most recent call last):
  File "/usr/lib/python3.8/logging/__init__.py", line 227, in _releaseLock
    def _releaseLock():
KeyboardInterrupt: 


[NeMo I 2022-03-24 06:45:10 wer:244] 
    
[NeMo I 2022-03-24 06:45:10 wer:245] reference:biển trời tình mẹ
[NeMo I 2022-03-24 06:45:10 wer:246] predicted: đhn h ni
[NeMo I 2022-03-24 06:45:12 wer:244] 
    
[NeMo I 2022-03-24 06:45:12 wer:245] reference:bởi đối với cô
[NeMo I 2022-03-24 06:45:12 wer:246] predicted: hng hàn h


Validating: 0it [00:00, ?it/s]

[NeMo I 2022-03-24 06:45:16 wer:244] 
    
[NeMo I 2022-03-24 06:45:16 wer:245] reference:em liền gọi to
[NeMo I 2022-03-24 06:45:16 wer:246] predicted: hàn h h
[NeMo I 2022-03-24 06:45:16 wer:244] 
    
[NeMo I 2022-03-24 06:45:16 wer:245] reference:tháng mười mộtchờ mong duyên mới
[NeMo I 2022-03-24 06:45:16 wer:246] predicted: ch h h h
[NeMo I 2022-03-24 06:45:16 wer:244] 
    
[NeMo I 2022-03-24 06:45:16 wer:245] reference:ngày thơ đi mất
[NeMo I 2022-03-24 06:45:16 wer:246] predicted: h ch h
[NeMo I 2022-03-24 06:45:16 wer:244] 
    
[NeMo I 2022-03-24 06:45:16 wer:245] reference:bố liền nói
[NeMo I 2022-03-24 06:45:16 wer:246] predicted: h
[NeMo I 2022-03-24 06:45:16 wer:244] 
    
[NeMo I 2022-03-24 06:45:16 wer:245] reference:trích dẫn từ báo chí
[NeMo I 2022-03-24 06:45:16 wer:246] predicted: ch  h
[NeMo I 2022-03-24 06:45:16 wer:244] 
    
[NeMo I 2022-03-24 06:45:16 wer:245] reference:dạ bẩm thầy giờ đã muộn rồi à
[NeMo I 2022-03-24 06:45:16 wer:246] predicted: ch h h
[NeMo 

Epoch 1, global step 53: val_wer reached 0.75558 (best 0.75558), saving model to "/home/khoatlv/ASR-NEMO/experiments/ASR-Char-Model-Language-vi/2022-03-24_06-44-50/checkpoints/ASR-Char-Model-Language-vi--val_wer=0.7556-epoch=1.ckpt" as top 3


In [None]:
# save_path = "/home/khoatlv/models/quarznet/quartznet_12x1_trained.nemo"
# asr_model.save_to(save_path)

In [None]:
# asr_model.setup_test_data(config.model.test_ds)
# trainer.test(asr_model, ckpt_path=None)