In [None]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle
!chmod 600 /root/.kaggle/kaggle.json

In [None]:
import os
import torch
from tqdm.auto import tqdm

# ASR

In [None]:
"""
You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.

Instructions for setting up Colab are as follows:
1. Open a new Python 3 notebook.
2. Import this notebook from GitHub (File -> Upload Notebook -> "GITHUB" tab -> copy/paste GitHub URL)
3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select "GPU" for hardware accelerator)
4. Run this cell to set up dependencies.
5. Restart the runtime (Runtime -> Restart Runtime) for any upgraded packages to take effect


NOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.
"""
# If you're using Google Colab and not running locally, run this cell.

## Install dependencies
# !pip install wget
!apt-get install -y sox libsndfile1 ffmpeg
!pip install text-unidecode
# !pip install matplotlib>=3.3.2

## Install NeMo
BRANCH = 'r2.0.0rc0'
!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]
# !pip install nemo_toolkit[all] --upgrade

"""
Remember to restart the runtime for the kernel to pick up any upgraded packages (e.g. matplotlib)!
Alternatively, you can uncomment the exit() below to crash and restart the kernel, in the case
that you want to use the "Run All Cells" (or similar) option.
"""
# exit()

## Download Checkpoint from kaggle

Link might be expired, please refer to the README file for the latest link

In [None]:
!wget https://www.kaggleusercontent.com/kf/185200440/eyJhbGciOiJkaXIiLCJlbmMiOiJBMTI4Q0JDLUhTMjU2In0..9j8hO9RcMQo_hMrxtQ7UPA.jILygHNPhDmiiFkzrJcsUXvt84JgTLyHfRx6lqFPjKZkImx4pQ9BFefXPewyzRArFDpelTPV57kj1wWvgxbSalqbMq_E2xPefntPwFHqcvpknzG7E4iuZOoHvrQlNSaulSJAZgmIj3K2oj4qlOXMxO8J276k0EzWNdXmMMiWkTdpdn2GnxJ0K_J7r32Ishj1cGWVASzijnB-oas6h2Zw7yKbAJMdwM0Kcfrs-__VjgYku3J7vwjEki-Jjn0ClqyegssAx2yAU26M3XEZDN9B6sT1GeB422rkynfeBHwnlw3V7VPaJZ6Ywe2x-Hv7dLMu48mjfzCourrjC70Xu5dBPHzTveTwhRu1sGfmo_oMCEgsOoWVnrQLIuAwTjs8LnJpvmtOB2p_SsnG-j7byRTs3monO5mQP3EAEHAAyzUwdUD4quEAu2FG9qteWOF67YjOOEeNBA9OQwGrkQvKAgvCHza6kP0BYUhF4yvXwrlnHv90_fOW7lftUh7ktZzrDURQvudZVtxWn5OCjG5OJ9wOTjrnDY0ThKVS-qp6X3E7shYluqtylrYpF9_cj9CmvwdWDmUHBR2U8RObQKQyvaFAAyQH22SecCs4NHHXLievwR0AKnQ4xff6Fr10X44lHVE-ETrN_oglWf_U288v-5xzfQ.MiYrc-NVZdn_bTKteMYVxQ/AIC-ASR/i9bfrpip/checkpoints/epoch=215-step=21600.ckpt

## Tokenizer Creation

In [None]:
BRANCH = 'r2.0.0rc0'
if not os.path.exists("scripts/tokenizers/process_asr_text_tokenizer.py"):
  !mkdir scripts
  !wget -P scripts/ https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/scripts/tokenizers/process_asr_text_tokenizer.py

!python ./scripts/process_asr_text_tokenizer.py \
  --manifest="/content/train_manifest.json" \
  --data_root="." \
  --vocab_size=256 \
  --tokenizer="spe" \
  --no_lower_case \
  --spe_type="bpe" \
  --log

## Model Configs

In [None]:
try:
    from ruamel.yaml import YAML
except ModuleNotFoundError:
    from ruamel_yaml import YAML
config_path = './configs/fast-conformer_ctc_bpe.yaml'

if not os.path.exists(config_path):
    # Grab the config we'll use in this example
    BRANCH = 'r2.0.0rc0'
    !mkdir configs
    !wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/examples/asr/conf/fastconformer/fast-conformer_ctc_bpe.yaml


In [None]:
from omegaconf import OmegaConf

import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
import logging

# NeMo's "core" package
import nemo
# NeMo's ASR collection - this collections contains complete ASR models and
# building blocks (modules) for ASR
import nemo.collections.asr as nemo_asr


# --- Config Information ---#
try:
    from ruamel.yaml import YAML
except ModuleNotFoundError:
    from ruamel_yaml import YAML
config_path = './configs/fast-conformer_ctc_bpe.yaml'

yaml = YAML(typ='safe')
with open(config_path) as f:
    params = yaml.load(f)
print(params)

train_manifest = "/content/train_manifest.json"
adapt_manifest = "/content/train_manifest.json"


params['model']['train_ds']['manifest_filepath'] = train_manifest
params['model']['train_ds']['batch_size'] = 16
params['model']['train_ds']['num_workers'] = 4


params['model']['validation_ds']['manifest_filepath'] = None
params['model']['validation_ds']['batch_size'] = 1
params['model']['validation_ds']['num_workers'] = 4


params['model']['tokenizer']['dir'] = "/content/tokenizer_spe_bpe_v256"
params['model']['optim']['sched']['warmup_steps'] = 0

params['model']['optim']['lr'] = 5e-5
params['model']['optim']['weight_decay'] = 0
params['model']['optim']['sched']['warmup_steps'] = 0
params['model']['optim']['sched']['min_lr'] = 1e-6


params['model'].pop('test_ds')

## Model Creation

In [None]:
import nemo.collections.asr as nemo_asr
conf = OmegaConf.create(params)
model = nemo_asr.models.EncDecCTCModelBPE(cfg=conf['model'])

### Loading Checkpoint

In [None]:
ckpt = torch.load("/content/epoch=215-step=21600.ckpt")
model.load_state_dict(ckpt['state_dict'])

## Testing on train/adapt

In [None]:
import soundfile as sf
from IPython.display import Audio
audio_path = "test/test_sample_3577_noisy.wav"
model.eval()
# model = model.cuda()
with torch.no_grad():
  clean = model_enhance(torch.Tensor(sf.read(audio_path, dtype='float32')[0]).cuda().unsqueeze(0).unsqueeze(0)).squeeze()
  print(clean.shape)
  print(model.transcribe([clean]))
  print(model.transcribe([sf.read(audio_path, dtype='float32')[0]]))
# print(sf.read(audio_path, dtype='float32'))
Audio(clean.cpu().numpy(), rate=16000)

In [None]:
Audio(audio_path)

## Test results.csv generation

In [None]:
import torch
import soundfile as sf
import os

data_dir = "/content/data/test"

model.eval()

with open("results.csv", "w+", encoding='utf-8') as fp:
    fp.write("audio,transcript\n")

count = 0

for filename in tqdm(os.listdir(data_dir)):
    audio, sr = sf.read(os.path.join(data_dir, filename), dtype='float32')
    with torch.no_grad():
        rv = model.transcribe([audio])
    with open("results.csv", "a+") as fp:
        fp.write(f"{os.path.splitext(os.path.basename(filename))[0]},{rv[0]}\n")