In [None]:
import os
import json
import librosa
from tqdm import tqdm

In [None]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
wandb_token = user_secrets.get_secret("wandb-token")

!wandb login $wandb_token

In [None]:
!cp -r /kaggle/input/002-fastconformerl-ctc-bpev256-from-scratch/* .

In [None]:
"""
You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.

Instructions for setting up Colab are as follows:
1. Open a new Python 3 notebook.
2. Import this notebook from GitHub (File -> Upload Notebook -> "GITHUB" tab -> copy/paste GitHub URL)
3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select "GPU" for hardware accelerator)
4. Run this cell to set up dependencies.
5. Restart the runtime (Runtime -> Restart Runtime) for any upgraded packages to take effect


NOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.
"""
# If you're using Google Colab and not running locally, run this cell.

## Install dependencies
# !pip install wget
!apt-get install -y sox libsndfile1 ffmpeg
!pip install text-unidecode
# !pip install matplotlib>=3.3.2

## Install NeMo
BRANCH = 'r2.0.0rc0'
!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]
# !pip install nemo_toolkit[all] --upgrade
!pip install boto3 --upgrade
"""
Remember to restart the runtime for the kernel to pick up any upgraded packages (e.g. matplotlib)!
Alternatively, you can uncomment the exit() below to crash and restart the kernel, in the case
that you want to use the "Run All Cells" (or similar) option.
"""
# exit()

In [None]:
def build_manifest(data_path, output_path, split, take=-1):
    with open(output_path, "w+") as fout:
        with open(f"{data_path}/{split}.csv", "r") as fp:
            header = True
            for line in tqdm(fp):
                if header:
                    header = False
                    continue

                line = line.strip()
                data = line.split(",")
                sample_path = f"{data_path}/{split}/{data[0]}.wav"
                sample = {
                    "audio_filepath": sample_path,
                    "duration": librosa.get_duration(path=sample_path),
                    "text": data[1]
                }
                json.dump(sample, fout, ensure_ascii = False)
                fout.write("\n") 
                if take > 0:
                    take -= 1
                if take == 0:
                    break

In [None]:
build_manifest("/kaggle/input/mtc-aic-dataset/MTC-AIC-Dataset/downloaded_file", "train_manifest.json", "train")
build_manifest("/kaggle/input/mtc-aic-dataset/MTC-AIC-Dataset/downloaded_file", "adapt_manifest.json", "adapt")

with open("/kaggle/input/nemo-aic-2-manifests/adapt_manifest.json", "r") as fp:
    data = []
    for line in fp:
        data.append(json.loads(line))
    
len(data)

In [None]:
BRANCH = 'r2.0.0rc0'
if not os.path.exists("scripts/tokenizers/process_asr_text_tokenizer.py"):
  !mkdir scripts
  !wget -P scripts/ https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/scripts/tokenizers/process_asr_text_tokenizer.py
        
!python ./scripts/process_asr_text_tokenizer.py \
  --manifest="/kaggle/input/nemo-aic-2-manifests/train_manifest.json" \
  --data_root="." \
  --vocab_size=256 \
  --tokenizer="spe" \
  --no_lower_case \
  --spe_type="bpe" \
  --log

In [None]:
# --- Config Information ---#
config_path = './configs/fast-conformer_ctc_bpe.yaml'

if not os.path.exists(config_path):
    # Grab the config we'll use in this example
    BRANCH = 'r2.0.0rc0'
    !mkdir configs
    !wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/examples/asr/conf/fastconformer/fast-conformer_ctc_bpe.yaml


In [None]:
code = """from omegaconf import OmegaConf

import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
import logging
import torch

# NeMo's "core" package
import nemo
# NeMo's ASR collection - this collections contains complete ASR models and
# building blocks (modules) for ASR
import nemo.collections.asr as nemo_asr


# --- Config Information ---#
try:
    from ruamel.yaml import YAML
except ModuleNotFoundError:
    from ruamel_yaml import YAML
config_path = './configs/fast-conformer_ctc_bpe.yaml'

yaml = YAML(typ='safe')
with open(config_path) as f:
    params = yaml.load(f)
print(params)

train_manifest = "/kaggle/input/nemo-aic-2-manifests/train_manifest.json"
adapt_manifest = "/kaggle/input/nemo-aic-2-manifests/adapt_manifest.json"


params['model']['train_ds']['manifest_filepath'] = train_manifest
params['model']['train_ds']['batch_size'] = 16
params['model']['train_ds']['num_workers'] = 4


params['model']['validation_ds']['manifest_filepath'] = adapt_manifest
params['model']['validation_ds']['batch_size'] = 1
params['model']['validation_ds']['num_workers'] = 4


params['model']['tokenizer']['dir'] = "tokenizer_spe_bpe_v256"
params['model']['optim']['sched']['warmup_steps'] = 0

params['model']['optim']['lr'] = 5e-5
params['model']['optim']['weight_decay'] = 0
params['model']['optim']['sched']['warmup_steps'] = 0
params['model']['optim']['sched']['min_lr'] = 1e-7


params['model'].pop('test_ds')

############### Medium configs ####################
## Commented for now
#params['model']['encoder']['d_model'] = 256
#params['model']['encoder']['n_heads'] = 4
#params['model']['encoder']['n_layers'] = 16



logging.getLogger('nemo_logger').setLevel(logging.ERROR)

wandb_logger = WandbLogger(project="AIC-ASR", name="002_FastConformerL_ctc_b16x16_bpev256_gpu2T4_scratch", version="t07vpj2m")

trainer = pl.Trainer(max_epochs=400, logger=wandb_logger, accumulate_grad_batches=16, check_val_every_n_epoch=2)

conf = OmegaConf.create(params)

print(OmegaConf.to_yaml(conf, resolve=True))
# print(DictConfig(OmegaConf.to_yaml(conf['model'], resolve=True)))

model = nemo_asr.models.EncDecCTCModelBPE(cfg=conf['model'], trainer=trainer)

print(model)

trainer.fit(model, ckpt_path="AIC-ASR/t07vpj2m/checkpoints/epoch=39-step=4000.ckpt")"""

In [None]:
with open("train.py", "w+") as fp:
    fp.write(code)

In [None]:
%%bash
python train.py