<a href="https://colab.research.google.com/github/MLo7Ghinsan/WFL-ASR/blob/main/inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Install and Preparation
from google.colab import drive
from IPython.display import clear_output
import os
drive.mount('/content/drive')
!git clone https://github.com/MLo7Ghinsan/WFL-ASR
!wget https://github.com/MLo7Ghinsan/WFL-ASR/releases/download/model_release/ft_model.zip
!unzip ft_model.zip -d /content/model
%cd /content/WFL-ASR

clear_output()
print("Installing components for inference")

!pip install -r requirements.txt

clear_output()
print("Setup Complete")


In [None]:
#@title Apply settings
%%writefile /content/model/config.yaml

data:
  data_dir: /content/Training_dataset
  sample_rate: 16000
  num_val_files: 10
  max_seq_len: null
model:
  encoder_type: whisper
  whisper_model: openai/whisper-base
  wavlm_model: microsoft/wavlm-base
  freeze_encoder: false
  enable_bilstm: true
  bilstm_num_layer: 2
  enable_dilated_conv: true
  dilated_conv_depth: 2
  dilated_conv_kernel: 3
  enable_duration_prediction: true
  duration_head_dim: 128
  duration_loss_weight: 0.2
  enable_self_attn_polisher: false
  self_attn_heads: 2
  num_conformer_layers: 2
  conformer_heads: 2
  conformer_ff_expansion: 2
  conformer_kernel_size: 31
  conformer_dropout: 0.5
  lang_emb_dim: 64
  num_languages: 2
training:
  batch_size: 1
  num_workers: 4
  learning_rate: 0.0000001
  weight_decay: 1.0e-05
  label_smoothing: 0.1
  max_steps: 500000
  val_check_interval: 1000
  max_checkpoints: 5
  log_dir: /content/logs
output:
  save_dir: /content/model
postprocess:
  median_filter: 2
  merge_segments: previous


English data == 0 as lang_id
Japanese data == 1 as lang_id

In [None]:
#@title # Single Audio Inference
import os
audio_file = "" # @param {"type":"string"}
lang_id = "0" # @param ["0","1"]
output_lab = audio_file.replace(".wav",".lab")
!python infer.py {audio_file} --checkpoint /content/model/model.pt --config /content/model/config.yaml --output {output_lab} --lang-id {lang_id}
clear_output()
print("Labels generated, check them under " + output_lab)

In [None]:
#@title # Folder Inference
folder_path = "" # @param {"type":"string"}
lang_id = "0" # @param ["0","1"]
lab_folder = True # @param {"type":"boolean"}
#@markdown <font size="-1.5"> Separate the label in their own folder
if lab_folder:
  output_path = folder_path + "/lab"
else:
  output_path = folder_path
!python infer.py {folder_path} --checkpoint /content/model/model.pt --config /content/model/config.yaml --output {output_path} --lang-id {lang_id}
clear_output()
print("Labels generated, check them under " + output_path)

In [None]:
#@title Correct Labels
#@markdown <font size="-1.5"> Folder path or wav file. Wav and label should be on the same folder
folder_path = "" # @param {"type":"string"}
#@markdown <font size="-1.5"> Save a PNG graph visualization of the label
save_png_visualization = True # @param {"type":"boolean"}
if save_png_visualization:
  !python correct_label.py {folder_path} --save_plot
else:
  !python correct_label.py {folder_path}

In [None]:
#@title # Automatic Inference, Correction and Packing
#@markdown This procress automatically unpacks wavs, process, correct and give back the labels inside a drive folder

data_zip_path = "" # @param {"type":"string"}
lang_id = "0" # @param ["0","1"]
correct_label = True # @param {"type":"boolean"}
#@markdown <font size="-1.5"> Save the original label generated by the WFL
save_original_labs= True # @param {"type":"boolean"}
#@markdown <font size="-1.5"> Save a PNG graph visualization of the label
save_png_visualization = True # @param {"type":"boolean"}

if correct_label == False:
  save_original_labs = False
  save_png_visualization = False

import os
import shutil
import subprocess
import datetime
from pathlib import Path
#Folders
temp_folder = Path("/content/temp")
temp_folder_export = Path("/content/temp_export")
wav_folder = Path("/content/wavs")
export_folder = Path("/content/drive/MyDrive/WFL/exports")
temp_folder.mkdir(exist_ok=True)
temp_folder_export.mkdir(exist_ok=True)
wav_folder.mkdir(exist_ok=True)
os.makedirs(export_folder,exist_ok=True)

#Extracting
try:
  print(f"Extracting {data_zip_path}")
  subprocess.run(['7z', 'x', data_zip_path, '-o' + str(temp_folder)], check=True)
  print(f"File {data_zip_path} extracted succesfully to {temp_folder}.")
except subprocess.CalledProcessError as e:
  print(f"Error: {e}")

#Moving
print(f"Moving Wav Files...")
for ext_files in temp_folder.rglob('*'):
  if ext_files.is_file():
    if ext_files.suffix.lower() in ['.wav']:
      dest_folder = wav_folder / ext_files.name
      shutil.move(str(ext_files), str(dest_folder))

clear_output()
print(f"Moving Complete.")

#Inference
print(f"Inferencing...")
!python infer.py {wav_folder} --checkpoint /content/model/model.pt --config /content/model/config.yaml --lang-id {lang_id}

if save_original_labs:
  og_label_folder= temp_folder_export / "original_label"
  og_label_folder.mkdir(exist_ok=True)
  print(f"Saving Original Labels...")
  for ext_files in wav_folder.rglob('*'):
    if ext_files.is_file():
      if ext_files.suffix.lower() in ['.lab']:
        dest_folder = og_label_folder / ext_files.name
        shutil.copy(str(ext_files), str(dest_folder))
  clear_output()
  print(f"Original Labels Saved.")

#Correct Label
if correct_label:
  print(f"Correcting Labels...")
  if save_png_visualization:
    !python correct_label.py {wav_folder} --save_plot
  else:
    !python correct_label.py {wav_folder}
  clear_output()
  print(f"Labels Corrected.")

# Move Correct Label and PNG to temp_folder_export
print(f"Moving Labels...")
for ext_files in wav_folder.rglob('*'):
  if ext_files.is_file():
    if ext_files.suffix.lower() in ['.lab', '.png']:
      dest_folder = temp_folder_export / ext_files.name
      shutil.move(str(ext_files), str(dest_folder))

clear_output()

# Pack and Move file to Google Drive
print(f"Packing...")
current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
archive_name = f"WFL_Export_{current_time}.zip"
export_path = os.path.join(export_folder,archive_name)
try:
  print(f"Creating file: {archive_name}")
  subprocess.run(['7z', 'a', '-mx9', export_path, os.path.join(temp_folder_export, "*"),'-r'], check=True)
  print(f"File created successfully at: {export_path}")
except subprocess.CalledProcessError as e:
  print(f"Error creating file: {e}")

#Delete temp folder
shutil.rmtree(temp_folder)
shutil.rmtree(temp_folder_export)
shutil.rmtree(wav_folder)