# Installing required packages

In [None]:
!pip install nemo_toolkit[all]
!pip install pydub
!pip install jsonlines
!pip install jiwer

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting nemo_toolkit[all]
  Downloading nemo_toolkit-1.18.1-py3-none-any.whl (2.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m26.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting huggingface-hub (from nemo_toolkit[all])
  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
Collecting onnx>=1.7.0 (from nemo_toolkit[all])
  Downloading onnx-1.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (14.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.6/14.6 MB[0m [31m35.6 MB/s[0m eta [36m0:00:00[0m
Collecting ruamel.yaml (from nemo_toolkit[all])
  Downloading ruamel.yaml-0.17.26-py3-none-any.whl (109 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.1/109.1 kB[

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import re
import os
import librosa
import nemo
import nemo.collections.asr as nemo_asr
from tqdm import tqdm
import jsonlines
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from jiwer import wer, cer
import pytorch_lightning as pl
from omegaconf import DictConfig
import pandas as pd
import gc
import torch
import warnings
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Define the model

In [None]:
asr_model = nemo_asr.models.EncDecRNNTBPEModel.restore_from(restore_path='/content/drive/MyDrive/finetuned_conformer.nemo')

[NeMo I 2023-05-24 19:16:34 mixins:170] Tokenizer SentencePieceTokenizer initialized with 1024 tokens


[NeMo W 2023-05-24 19:16:35 modelPT:161] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
    Train config : 
    manifest_filepath: /content/train.jsonl
    sample_rate: 16000
    batch_size: 2
    shuffle: true
    num_workers: 0
    pin_memory: true
    use_start_end_token: false
    trim_silence: false
    max_duration: 37.02
    min_duration: 0.01
    is_tarred: false
    tarred_audio_filepaths: null
    shuffle_n: 2048
    bucketing_strategy: synced_randomized
    bucketing_batch_size: null
    bucketing_weights: ''
    
[NeMo W 2023-05-24 19:16:35 modelPT:168] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s). 
    Validation config : 
    manifest_filepath: /content/test.jsonl
    sample_rate: 16000
    

[NeMo I 2023-05-24 19:16:36 features:287] PADDING: 0


    
[NeMo W 2023-05-24 19:16:38 rnnt:1211] `preserve_memory` was set for the Joint Model. Please be aware this will severely impact the forward-backward step time. It also might not solve OOM issues if the GPU simply does not have enough memory to compute the joint.


[NeMo I 2023-05-24 19:16:38 rnnt_models:206] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2023-05-24 19:16:40 save_restore_connector:249] Model EncDecRNNTBPEModel was successfully restored from /content/drive/MyDrive/finetuned_conformer.nemo.


# Download the manifest

Manifest contains path to audio, transcription and duration of the audio.

In [None]:
manifest = []
with jsonlines.open('/content/drive/MyDrive/manifest.jsonl') as f:
    for obj in f:
        manifest.append(obj)

# Train

## Define trainer from pytorch lightning

In [None]:
trainer = pl.Trainer(max_epochs=5, accelerator="cuda")

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


## Split data to train and test samples

In [None]:
train, test = train_test_split(manifest, test_size=0.3, random_state=22)

with jsonlines.open('train.jsonl', 'w') as writer:
    writer.write_all(train)
with jsonlines.open('test.jsonl', 'w') as writer:
    writer.write_all(test)

## Change parameters in the config

In [None]:
params = asr_model._cfg
params['train_ds']['manifest_filepath'] = '/content/train.jsonl'
params['train_ds']['batch_size']=2
params['validation_ds']['manifest_filepath'] = '/content/test.jsonl'
params['validation_ds']['batch_size']=2
params['test_ds']['manifest_filepath'] = '/content/test.jsonl'
params['test_ds']['batch_size']=2
params['train_ds']['num_workers'] = 0

In [None]:
cfg_ = DictConfig(params)
cfg_.joint.fuse_loss_wer = True
cfg_.joint.fused_batch_size = 2
cfg_.joint.preserve_memory = True
cfg_.log_prediction = False
cfg_.optim.sched.warmup_steps = None

## Restore model with our new config

In [None]:
asr_model = nemo_asr.models.EncDecRNNTBPEModel.restore_from(restore_path='/content/drive/MyDrive/finetuned_conformer.nemo', override_config_path=cfg_)
asr_model.setup_training_data(cfg_['train_ds'])
asr_model.setup_multiple_validation_data(cfg_['validation_ds'])

[NeMo I 2023-05-24 19:22:31 mixins:170] Tokenizer SentencePieceTokenizer initialized with 1024 tokens


[NeMo W 2023-05-24 19:22:31 modelPT:161] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
    Train config : 
    manifest_filepath: /content/train.jsonl
    sample_rate: 16000
    batch_size: 2
    shuffle: true
    num_workers: 0
    pin_memory: true
    use_start_end_token: false
    trim_silence: false
    max_duration: 37.02
    min_duration: 0.01
    is_tarred: false
    tarred_audio_filepaths: null
    shuffle_n: 2048
    bucketing_strategy: synced_randomized
    bucketing_batch_size: null
    bucketing_weights: ''
    
[NeMo W 2023-05-24 19:22:31 modelPT:168] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s). 
    Validation config : 
    manifest_filepath: /content/test.jsonl
    sample_rate: 16000
    

[NeMo I 2023-05-24 19:22:31 features:287] PADDING: 0


    
[NeMo W 2023-05-24 19:22:32 rnnt:1211] `preserve_memory` was set for the Joint Model. Please be aware this will severely impact the forward-backward step time. It also might not solve OOM issues if the GPU simply does not have enough memory to compute the joint.


[NeMo I 2023-05-24 19:22:33 rnnt_models:206] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2023-05-24 19:22:33 save_restore_connector:249] Model EncDecRNNTBPEModel was successfully restored from /content/drive/MyDrive/finetuned_conformer.nemo.
[NeMo I 2023-05-24 19:22:33 collections:193] Dataset loaded with 1579 files totalling 1.82 hours
[NeMo I 2023-05-24 19:22:33 collections:194] 0 files were filtered totalling 0.00 hours
[NeMo I 2023-05-24 19:22:34 collections:193] Dataset loaded with 677 files totalling 0.82 hours
[NeMo I 2023-05-24 19:22:34 collections:194] 0 files were filtered totalling 0.00 hours


    


In [None]:
gc.collect()

if torch.cuda.is_available():
  accelerator = 'gpu'
else:
  accelerator = 'cpu'

if accelerator == 'gpu':
  torch.cuda.empty_cache()

In [None]:
warnings.filterwarnings('ignore')

## Train model

In [None]:
trainer.fit(asr_model)

INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


[NeMo I 2023-05-24 19:24:24 modelPT:722] Optimizer config = Novograd (
    Parameter Group 0
        amsgrad: False
        betas: [0.9, 0.98]
        eps: 1e-08
        grad_averaging: False
        lr: 0.001
        weight_decay: 0.001
    )
[NeMo I 2023-05-24 19:24:24 lr_scheduler:910] Scheduler "<nemo.core.optim.lr_scheduler.CosineAnnealing object at 0x7f34219a1120>" 
    will be used during training (effective maximum steps = 3950) - 
    Parameters : 
    (warmup_steps: null
    warmup_ratio: null
    min_lr: 1.0e-06
    max_steps: 3950
    )


INFO:pytorch_lightning.callbacks.model_summary:
  | Name              | Type                              | Params
------------------------------------------------------------------------
0 | preprocessor      | AudioToMelSpectrogramPreprocessor | 0     
1 | encoder           | ConformerEncoder                  | 115 M 
2 | decoder           | RNNTDecoder                       | 3.9 M 
3 | joint             | RNNTJoint                         | 1.4 M 
4 | loss              | RNNTLoss                          | 0     
5 | spec_augmentation | SpectrogramAugmentation           | 0     
6 | wer               | RNNTBPEWER                        | 0     
------------------------------------------------------------------------
120 M     Trainable params
0         Non-trainable params
120 M     Total params
481.780   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


## Save model to disk

In [None]:
asr_model.save_to('/content/drive/MyDrive/finetuned_conformer_opochka.nemo')

In [None]:
model = nemo_asr.models.EncDecRNNTBPEModel.restore_from(restore_path='/content/drive/MyDrive/finetuned_conformer_opochka.nemo')

[NeMo I 2023-05-24 20:04:59 mixins:170] Tokenizer SentencePieceTokenizer initialized with 1024 tokens


[NeMo W 2023-05-24 20:04:59 modelPT:161] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
    Train config : 
    manifest_filepath: /content/train.jsonl
    sample_rate: 16000
    batch_size: 2
    shuffle: true
    num_workers: 0
    pin_memory: true
    use_start_end_token: false
    trim_silence: false
    max_duration: 37.02
    min_duration: 0.01
    is_tarred: false
    tarred_audio_filepaths: null
    shuffle_n: 2048
    bucketing_strategy: synced_randomized
    bucketing_batch_size: null
    bucketing_weights: ''
    
[NeMo W 2023-05-24 20:04:59 modelPT:168] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s). 
    Validation config : 
    manifest_filepath: /content/test.jsonl
    sample_rate: 16000
    

[NeMo I 2023-05-24 20:04:59 features:287] PADDING: 0


[NeMo W 2023-05-24 20:05:01 rnnt:1211] `preserve_memory` was set for the Joint Model. Please be aware this will severely impact the forward-backward step time. It also might not solve OOM issues if the GPU simply does not have enough memory to compute the joint.


[NeMo I 2023-05-24 20:05:01 rnnt_models:206] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2023-05-24 20:05:01 save_restore_connector:249] Model EncDecRNNTBPEModel was successfully restored from /content/drive/MyDrive/finetuned_conformer_opochka.nemo.


# Evaluate

In [None]:
files = [file['audio_filepath'] for file in test]
transcriptions = model.transcribe(paths2audio_files=files)

In [None]:
wers = []
cers = []

for i, transcription in enumerate(transcriptions[0]):
    if test[i]['text'] != '' and test[i]['text'] != ' ':
      w = wer(test[i]['text'], transcription)
      wers.append(w)
      c = cer(test[i]['text'], transcription)
      cers.append(c)
      test[i]['wer'] = w
      test[i]['cer'] = c
      test[i]['transcript'] = transcription

print('Mean WER: ', sum(wers)/len(wers))
print('Mean CER: ', sum(cers)/len(cers))

Mean WER:  0.3689178250587116
Mean CER:  0.2549948754243436


In [None]:
test_df = pd.DataFrame.from_records(test)

In [None]:
test_df.to_excel("output_conformer_opochka.xlsx")