Intunist's notebook for NNSVS rapid prototyping. **_(Colab version)_**
<br/>For NNSVS 0.0.3
<br/>Intunist cannot provide support if you encounter errors. Please ask the community for support.

Copying and modifying this notebook is **not permitted** as it creates complications.

**Click <img src='https://i.imgur.com/IfKbEiK.png'> to view files**

In [1]:
#@title Connect Google Drive
from google.colab import drive
print('\033[97;100m' + 'Mounting Google Drive.' + '\033[0m')
drive.mount("/content/drive")

[97;100mMounting Google Drive.[0m
Mounted at /content/drive


In [None]:
#@title Install NNSVS
%rm -rf sample_data
%cd -q /content
!git clone https://github.com/intunist/ETK.git ETK &> /dev/null
!bash /content/ETK/train/install.sh
!ln -sf /content/ETK/train/conf/train/acoustic/model/acoustic_conv1dresnet.yaml /content/ETK/train/conf/train/acoustic/model/acoustic_current.yaml &> /dev/null
!ln -sf /content/ETK/train/conf/train/acoustic/model/acoustic_current.yaml ACOUSTIC_SETTINGS.yaml &> /dev/null
%reload_ext tensorboard

Installing, this will take about 5.5 minutes.
installing 7z
installing nnsvs 0.0.3
installing miscellaneous packages (this will take a bit...)


In [None]:
#@title Decompress your dataset

empty_dataset_folder = False #@param {type:"boolean"}
compressed_dataset_path = "/content/dataset.zip" #@param {type:"string"}

if empty_dataset_folder == True:
  !rm -rf /content/ETK/singing_database

if compressed_dataset_path.endswith('.rar'):
  !unrar -o+ x "$compressed_dataset_path" /content/ETK/singing_database/ | tqdm --desc extracting --unit files --unit_scale > /dev/null
else:
  !7za  -bso0 -y x "$compressed_dataset_path" -o/content/ETK/singing_database/
print('\nDone!')

#@markdown If you choose to not use this cell, upload your files to `ETK/singing_database/`

In [None]:
#@title Alternative: Extract dump.
#@markdown To save time, you can do feature extraction (stage 1) and unpack it.
#@markdown </br>Note that ResF0 and non-ResF0 models need separate dumps.
compressed_dump_path = "/content/dump.*" #@param {type:"string"}
#@markdown The files will be extracted to `ETK/train/dump/`. The archived folder needs to match the singer name.

if compressed_dump_path.endswith('.rar'):
  !unrar -o+ x "$compressed_dump_path" /content/ETK/train/dump/ | tqdm --desc extracting --unit files --unit_scale > /dev/null
else:
  !7za  -bso0 -y x "$compressed_dump_path" -o/content/ETK/train/dump/
print('\nDone!')

#Configure NNSVS
---

In [None]:
#@title Change Language and model
#@markdown Changing these settings will require you to run feature generation again.


lang = "Japanese" #@param ["Japanese","Japanese_compatibility", "English", "Romance", "Polish", "Custom"]
singer_name = "TILKE_ENUNU" #@param {type:"string"}
#@markdown No spaces in `singer_name:`. Use underscores `_`.

#@markdown if custom language, set these:
custom_hed = "custom.hed" #@param {type:"string"}
custom_table = "custom.table" #@param {type:"string"}
#@markdown Place your files into the `hed` and `dic` folder.
#vowels = "a, e ,i, o, u, N, A, E, I, O, U" #@param {type:"string"}

#acoustic model type:
model = "Conv1dResnet" #@param ["Conv1dResnet", "ResF0Conv1dResnet", "FFConvLSTM", "ResSkipF0FFConvLSTM", "ResF0VariancePredictor", "ResF0NonAttentiveTacotron", "world_multistream_ar-mgc-f0-bap"]
use_mdn = False #@param {type:"boolean"}
#@markdown MDN is available for all acoustic model types except Tacotron. MDN is not recommended below 1hr of human audio.

#only suppoort sine vibrato
vibrato = "none" #@param ["none", "sine", "diff"]
# change out_dim to 206 when using vibrato
# might require ResF0
# add stream sized to acoustic model config (note: [180, 3, 1, 15], diff: [180, 3, 1, 15, 3], sine: [180, 3, 1, 15, 6, 1])
# change acosutic features file
# this may be broken and I don't know how to fix it right now.
# may remove "diff" as an option.

force_fix_vuv = False #@param {type:"boolean"}

filter_long_segments = True #@param {type:"boolean"}

sample_rate = "44100" #@param ["44100", "48000", "88200", "96000"]

d4c_threshold = 0.25 #@param {type:"slider", min:0, max:1, step:0.05}

# so the notebook is more likely to work on Jupyter.
import importlib
nnsvs_path = importlib.util.find_spec("nnsvs")
nnsvs_path = nnsvs_path.submodule_search_locations[0]
print("NNSVS location: " + nnsvs_path)

def get_vowels():
  import subprocess
  vowels = subprocess.getoutput('sed -n -r \'s/^.*("C-Vowel"|"C-Vowels"|"C-Phone_Boin")\s+//p\' /content/ETK/train/hed/'+hed_file)
  junk = ['{', '}', '-', '+', '*']
  table = str.maketrans('', '', ''.join(junk))
  vowels = vowels.translate(table)
  vowels = vowels.replace(",","', '")
  print("Vowels: " + "'" + vowels + "'")
  !sed -i -r "s|(VOWELS = )(.+)|\1'{vowels}'|g" /content/ETK/train/stage0/compare_mono_align_and_mono_score.py

def get_in_dim():
    in_dim = 0
    with open("/content/ETK/train/hed/"+hed_file,'r') as site:
        for line in site.readlines():
            if line.startswith(('QS', 'CQS')):
                in_dim = in_dim + 1
    acoustic_in_dim = in_dim + int(4)
    print("in_dim:", + in_dim)
    print("acoustic_in_dim:", + acoustic_in_dim)
    !sed -i -r 's|(in_dim:)(\s+)(.+)|\1\2{in_dim}|g' /content/ETK/train/conf/train/timelag/model/timelag_mdnv2.yaml
    !sed -i -r 's|(in_dim:)(\s+)(.+)|\1\2{in_dim}|g' /content/ETK/train/conf/train/duration/model/duration_vp_mdn.yaml
    !sed -i -r 's|(in_dim:)(\s+)(.+)|\1\2{acoustic_in_dim}|g' /content/ETK/train/conf/train/acoustic/model/acoustic_current.yaml

def get_idx():
    from nnmnkwii.io import hts
    binary_dict, continuous_dict = hts.load_question_set("/content/ETK/train/hed/" + hed_file)
    for n in range(len(binary_dict)):
        if binary_dict[n][0] in ("C-Silence", "C-Silences", "C-Phone_Muon"):
            in_rest_idx = n
            print("in_rest_idx:", in_rest_idx)
            !sed -i -r 's|(in_rest_idx:)(\s+)(.+)|\1\2{in_rest_idx}|g' /content/ETK/train/conf/train/acoustic/data/myconfig.yaml
    for n in range(len(continuous_dict)):
        if continuous_dict[n][0] in ("e1", "e1_absolute_pitch"): # the absolute pitch of the current note
            in_lf0_idx = n+  len(binary_dict)
            print("in_lf0_idx:", in_lf0_idx)
            !sed -i -r 's|(in_lf0_idx:)(\s+)(.+)|\1\2{in_lf0_idx}|g' /content/ETK/train/conf/train/acoustic/model/acoustic_current.yaml
            !sed -i -r 's|(in_lf0_idx:)(\s+)(.+)|\1\2{in_lf0_idx}|g' /content/ETK/train/conf/train/acoustic/data/myconfig.yaml

!ln -sf /content/ETK/train/conf/train/acoustic/model/acoustic_{model.lower()}.yaml /content/ETK/train/conf/train/acoustic/model/acoustic_current.yaml &> /dev/null

#These will pull the "main" branch for each language to reduce maintanance.
if lang == "Japanese":
    print("Switching to Intunist Japanese")
    hed_file = "intunist_jp.hed"
    dic_file = "intunist_jp.table"
elif lang == "Japanese_compatibility":
    print("Switching to Intunist Japanese (Compatible with older HEDs)")
    !git clone https://github.com/intunist/nnsvs-japanese-plus lang-ext &> /dev/null
    hed_file = "intunist_jp_compatibility.hed"
    dic_file = "intunist_jp_compatibility.table"
elif lang == "English":
    print("Switching to Intunist English")
    !git clone https://github.com/intunist/nnsvs-english-support lang-ext &> /dev/null
    hed_file = "intunist_en.hed"
    dic_file = "blank.table"
elif lang == "Romance":
    print("Switching to Legacy Romance Family Support")
    !git clone https://github.com/DYVAUX/nnsvs-romance-language-support lang-ext &> /dev/null
    hed_file = "DVX_lat.hed"
    dic_file = "blank.table"
elif lang == "CJK":
    print("Switching to Intunist CJK")
    hed_file = "intunist_cjk.hed"
    dic_file = "intunist_cjk.table"
    print("SELECTED LANGUAGE IS UNAVAILABLE")
elif lang == "Chinese":
    print("Switching to Archivoice Chinese")
    !git clone https://github.com/Archivoice/nnsvs-chinese-support lang-ext &> /dev/null
    hed_file = "chinese.hed"
    dic_file = "chinese.table"
elif lang == "Polish":
    print("Switching to SzTP Polish")
    !git clone https://github.com/SzopaTatyJarka/nnsvs-polish-support lang-ext &> /dev/null
    hed_file = "sztj_polish.hed"
    dic_file = "sztj_polish_phoneme.table"
elif lang == "Custom":
    print("Switching to Custom Language")
    hed_file = custom_hed
    dic_file = custom_table
!cp -r lang-ext/hed /content/ETK/train &> /dev/null
!cp -r lang-ext/dic /content/ETK/train &> /dev/null
!rm -rf lang-ext

get_in_dim()
if (lang == 'Japanese' or lang == 'Japanese_compatibility'):
  vowels = "'a', 'i', 'u', 'e', 'o', 'A', 'I', 'U', 'E', 'O', 'N'"
  print("Vowels: " + vowels)
  !sed -i -r "s|(VOWELS = )(.+)|\1{vowels}|g" /content/ETK/train/stage0/compare_mono_align_and_mono_score.py
#elif lang == 'Custom':
#  print("Vowels: " + vowels)
#  !sed -i -r "s|(VOWELS = )(.+)|\1{vowels}|g" /content/ETK/train/stage0/compare_mono_align_and_mono_score.py
else:
  get_vowels()

#copy custom Intunist feature gen configuration.
!cp -r ETK/train/custom/prepare_features/acoustic/* {nnsvs_path}/bin/conf/prepare_features/acoustic/
!cp -r ETK/train/custom/prepare_features/acoustic/* {nnsvs_path}/bin/conf/prepare_static_features/acoustic
print("Acoustic model: " + model)
if model.startswith(('Res', 'world', 'world')):
    get_idx()
    !sed -i -r 's|(relative_f0:)(\s+)(.+)|\1\2false|g' /content/ETK/train/enuconfig.yaml
    !sed -i -r 's|(relative_f0:)(\s+)(.+)|\1\2false|g' {nnsvs_path}/bin/conf/prepare_features/acoustic/static_*.yaml
    !sed -i -r 's|(relative_f0:)(\s+)(.+)|\1\2false|g' {nnsvs_path}/bin/conf/prepare_features/acoustic/intunist_*.yaml
    !sed -i -r 's|(relative_f0:)(\s+)(.+)|\1\2false|g' {nnsvs_path}/bin/conf/prepare_static_features/acoustic/static_*.yaml
    !sed -i -r 's|(relative_f0:)(\s+)(.+)|\1\2false|g' {nnsvs_path}/bin/conf/prepare_static_features/acoustic/intunist_*.yaml
    !sed -i -r 's|(out_lf0_idx:)(\s+)(.+)|\1\2180|g' /content/ETK/train/conf/train/acoustic/data/myconfig.yaml
    # change to nnsvs-train-resf0
else:
    # Set relative_f0 true static_deltadelta_* and enuconfig
    # change to nnsvs-train
    # set idx to null
    !sed -i -r 's|(in_rest_idx:)(\s+)(.+)|\1\2null|g' /content/ETK/train/conf/train/acoustic/data/myconfig.yaml
    !sed -i -r 's|(in_lf0_idx:)(\s+)(.+)|\1\2null|g' /content/ETK/train/conf/train/acoustic/model/acoustic_current.yaml
    !sed -i -r 's|(in_lf0_idx:)(\s+)(.+)|\1\2null|g' /content/ETK/train/conf/train/acoustic/data/myconfig.yaml
    !sed -i -r 's|(out_lf0_idx:)(\s+)(.+)|\1\2null|g' /content/ETK/train/conf/train/acoustic/data/myconfig.yaml
    !sed -i -r 's|(relative_f0:)(\s+)(.+)|\1\2true|g' /content/ETK/train/enuconfig.yaml
    !sed -i -r 's|(relative_f0:)(\s+)(.+)|\1\2true|g' {nnsvs_path}/bin/conf/prepare_features/acoustic/static_*.yaml
    !sed -i -r 's|(relative_f0:)(\s+)(.+)|\1\2true|g' {nnsvs_path}/bin/conf/prepare_features/acoustic/intunist_*.yaml
    !sed -i -r 's|(relative_f0:)(\s+)(.+)|\1\2true|g' {nnsvs_path}/bin/conf/prepare_static_features/acoustic/static_*.yaml
    !sed -i -r 's|(relative_f0:)(\s+)(.+)|\1\2true|g' {nnsvs_path}/bin/conf/prepare_static_features/acoustic/intunist_*.yaml

#no vibrato mdeling for new model types
if vibrato != "none" and model.startswith(('world', 'mel')):
  raise Exception('WORLD and MEL models cannot be used with vibrato modeling!')
elif model.startswith('world'):
  vibrato = "world"
  !sed -i -r 's|(acoustic_features:)(\s+)(.+)|\1\2static_only|g' /content/ETK/train/config.yaml
  !sed -i -r 's|(vibrato_mode:)(\s+)(.+)|\1\2none|g' {nnsvs_path}/bin/conf/prepare_features/acoustic/static_*.yaml
elif model.startswith('mel'):
  vibrato = "mel"
#  !sed -i -r 's|(acoustic_features:)(\s+)(.+)|\1\2melf0_48k|g' /content/ETK/train/config.yaml
  !sed -i -r 's|(vibrato_mode:)(\s+)(.+)|\1\2none|g' {nnsvs_path}/bin/conf/prepare_features/acoustic/static_*.yaml

#set vibrato
print("Vibrato Mode: "+ str(vibrato))
if vibrato == "none":
  !sed -i -r 's|(stream_sizes:)(\s+)(.+)|\1\2\[180, 3, 1, 5\]|g' /content/ETK/train/conf/train/acoustic/model/acoustic_current.yaml
  !sed -i -r 's|(has_dynamic_features:)(\s+)(.+)|\1\2\[true, true, false, false\]|g' /content/ETK/train/conf/train/acoustic/model/acoustic_current.yaml
  !sed -i -r 's|(out_dim:)(\s+)(.+)|\1\2189|g' /content/ETK/train/conf/train/acoustic/model/acoustic_current.yaml
  !sed -i -r 's|(acoustic_features:)(\s+)(.+)|\1\2intunist_world_dynmgc_novib|g' /content/ETK/train/config.yaml
elif vibrato == "diff":
  !sed -i -r 's|(stream_sizes:)(\s+)(.+)|\1\2\[180, 3, 1, 5, 3\]|g' /content/ETK/train/conf/train/acoustic/model/acoustic_current.yaml
  !sed -i -r 's|(has_dynamic_features:)(\s+)(.+)|\1\2\[true, true, false, false, true\]|g' /content/ETK/train/conf/train/acoustic/model/acoustic_current.yaml
  !sed -i -r 's|(out_dim:)(\s+)(.+)|\1\2192|g' /content/ETK/train/conf/train/acoustic/model/acoustic_current.yaml
  !sed -i -r 's|(acoustic_features:)(\s+)(.+)|\1\2intunist_world_dynmgc_diffvib|g' /content/ETK/train/config.yaml
elif vibrato == "sine":
  !sed -i -r 's|(stream_sizes:)(\s+)(.+)|\1\2\[180, 3, 1, 5, 6, 1\]|g' /content/ETK/train/conf/train/acoustic/model/acoustic_current.yaml
  !sed -i -r 's|(has_dynamic_features:)(\s+)(.+)|\1\2\[true, true, false, false, true, false\]|g' /content/ETK/train/conf/train/acoustic/model/acoustic_current.yaml
  !sed -i -r 's|(out_dim:)(\s+)(.+)|\1\2196|g' /content/ETK/train/conf/train/acoustic/model/acoustic_current.yaml
  !sed -i -r 's|(acoustic_features:)(\s+)(.+)|\1\2intunist_world_dynmgc_sinevib|g' /content/ETK/train/config.yaml

print("MDN: "+ str(use_mdn))
if use_mdn:
  !sed -i -r 's|(use_mdn:)(\s+)(.+)|\1\2true|g' /content/ETK/train/conf/train/acoustic/model/acoustic_current.yaml
else:
  !sed -i -r 's|(use_mdn:)(\s+)(.+)|\1\2false|g' /content/ETK/train/conf/train/acoustic/model/acoustic_current.yaml

#set force_fix_vuv
print("Fix VUV: "+ str(force_fix_vuv))
!sed -i -r 's|(force_fix_vuv:)(\s+)(.+)|\1\2{force_fix_vuv}|g' /content/ETK/train/conf/train/acoustic/train/myconfig.yaml

# set filter_long_segments
print("Acoustic Segment Filter: "+ str(filter_long_segments))
!sed -i -r 's|(filter_long_segments:)(\s+)(.+)|\1\2{filter_long_segments}|g' /content/ETK/train/conf/train/acoustic/data/myconfig.yaml


#set samplerate
print("Sample Rate: "+ str(sample_rate))
!sed -i -r 's|(sample_rate:)(\s+)(.+)|\1\2{sample_rate}|g' /content/ETK/train/config.yaml
!sed -i -r 's|(sample_rate:)(\s+)(.+)|\1\2{sample_rate}|g' /content/ETK/train/enuconfig.yaml
!sed -i -r 's|(sample_rate:)(\s+)(.+)|\1\2{sample_rate}|g' /content/ETK/train/conf/train/acoustic/data/myconfig.yaml
!sed -i -r 's|(sample_rate:)(\s+)(.+)|\1\2{sample_rate}|g' /content/ETK/train/conf/train_postfilter/data/myconfig.yaml
!sed -i -r 's|(sample_rate:)(\s+)(.+)|\1\2{sample_rate}|g' /content/ETK/train/enuconfig.yaml
!sed -i -r 's|(sample_rate:)(\s+)(.+)|\1\2{sample_rate}|g' {nnsvs_path}/bin/conf/prepare_features/acoustic/static_*.yaml
!sed -i -r 's|(sample_rate:)(\s+)(.+)|\1\2{sample_rate}|g' {nnsvs_path}/bin/conf/prepare_features/acoustic/intunist_*.yaml

#set correct feature gen values in stati_*.yaml and melf0_48k.yaml
import math
hop_size = int(sample_rate)/200
print("hop size: "+ str(math.ceil(hop_size)))
!sed -i -r 's|(hop_size:)(\s+)(.+)|\1\2{math.ceil(hop_size)}|g' {nnsvs_path}/bin/conf/prepare_features/acoustic/static_*.yaml
#!sed -i -r 's|(hop_size:)(\s+)(.+)|\1\2{math.ceil(hop_size)}|g' {nnsvs_path}/bin/conf/prepare_features/acoustic/melf0_48k.yaml
if sample_rate in ('22050', '24000'):
  !sed -i -r 's|(fft_size:)(\s+)(.+)|\1\2512|g' {nnsvs_path}/bin/conf/prepare_features/acoustic/static_*.yaml
#  !sed -i -r 's|(fft_size:)(\s+)(.+)|\1\2512|g' {nnsvs_path}/bin/conf/prepare_features/acoustic/melf0_48k.yaml
if sample_rate in ('44100', '48000'):
  !sed -i -r 's|(fft_size:)(\s+)(.+)|\1\21024|g' {nnsvs_path}/bin/conf/prepare_features/acoustic/static_*.yaml
#  !sed -i -r 's|(fft_size:)(\s+)(.+)|\1\21024|g' {nnsvs_path}/bin/conf/prepare_features/acoustic/melf0_48k.yaml
elif sample_rate in ('88200', '96000'):
  !sed -i -r 's|(fft_size:)(\s+)(.+)|\1\22048|g' {nnsvs_path}/bin/conf/prepare_features/acoustic/static_*.yaml
#  !sed -i -r 's|(fft_size:)(\s+)(.+)|\1\22048|g' {nnsvs_path}/bin/conf/prepare_features/acoustic/melf0_48k.yaml

#set d4c
print("d4c threshold: "+ str(d4c_threshold))
!sed -i -r 's|(d4c_threshold:)(\s+)(.+)|\1\2{d4c_threshold}|g' {nnsvs_path}/bin/conf/prepare_features/acoustic/static_*.yaml
!sed -i -r 's|(d4c_threshold:)(\s+)(.+)|\1\2{d4c_threshold}|g' {nnsvs_path}/bin/conf/prepare_features/acoustic/intunist_*.yaml

!sed -i -r 's|(question_path:)(\s+)(.+)|\1\2hed/{hed_file}|g' /content/ETK/train/enuconfig.yaml
!sed -i -r 's|(table_path:)(\s+)(.+)|\1\2dic/{dic_file}|g' /content/ETK/train/config.yaml
!sed -i -r 's|(question_path:)(\s+)(.+)|\1\2hed/{hed_file}|g' /content/ETK/train/config.yaml
!sed -i -r 's|(table_path:)(\s+)(.+)|\1\2dic/{dic_file}|g' /content/ETK/train/enuconfig.yaml


In [None]:
#@title Set parameters
#@markdown This notebook uses sane defaults for all model types. Additional settings removed to make training more simple.
#@markdown <br />Settings here can be changed without re-running feature generation.

pretrained_expdir = "" #@param {type:"string"}
#@markdown Ex: `exp/singer_name/`. Settings will need to match.

disable_checkpoints = True #@param {type:"boolean"}

acoustic_epochs = 120 #@param {type:"integer"}
acoustic_loss = "mae" #@param ["mse", "mae"]
#acoustic_init = "none" #@param ["none", "kaiming_normal", "xavier_normal"]
#@markdown Due to to the difference in training parameters, you can change advanced settings in the `acoustic_current.yaml file`

#The notebook uses MDNv2 by default with optimal settings. Choose desired number of epochs.
duration_epochs = 250 #@param {type:"integer"}
#duration_init = "none" #@param ["none", "kaiming_normal", "xavier_normal"]
timelag_epochs = 250 #@param {type:"integer"}
#timelag_init = "none" #@param ["none", "kaiming_normal", "xavier_normal"]

pitch_reg_weight = 0.03 #@param {type:"number"}

!sed -i -r '/#/!s|(pretrained_expdir:)(.*)|\1 "{pretrained_expdir}"|g' /content/ETK/train/config.yaml

#set the epoch count
import math
!sed -i -r 's|(nepochs:)(\s+)(.+)|\1\2{acoustic_epochs}|g' /content/ETK/train/conf/train/acoustic/train/myconfig.yaml
acoustic_checkpoint_interval = acoustic_epochs * 0.15
acoustic_checkpoint_interval_trunc = math.trunc(acoustic_checkpoint_interval)
acoustic_checkpoint_interval_trunc_min = min(acoustic_checkpoint_interval_trunc, 80)
if disable_checkpoints:
  acoustic_checkpoint_interval_trunc_min = 999999
!sed -i -r 's|(checkpoint_epoch_interval:)(\s+)(.+)|\1\2{acoustic_checkpoint_interval_trunc_min}|g' /content/ETK/train/conf/train/acoustic/train/myconfig.yaml
#
!sed -i -r 's|(nepochs:)(\s+)(.+)|\1\2{duration_epochs}|g' /content/ETK/train/conf/train/duration/train/myconfig.yaml
duration_checkpoint_interval = duration_epochs * 0.15
duration_checkpoint_interval_trunc = math.trunc(duration_checkpoint_interval)
duration_checkpoint_interval_trunc_min = min(duration_checkpoint_interval_trunc, 80)
!sed -i -r 's|(checkpoint_epoch_interval:)(\s+)(.+)|\1\2{duration_checkpoint_interval_trunc_min}|g' /content/ETK/train/conf/train/duration/train/myconfig.yaml
#
!sed -i -r 's|(nepochs:)(\s+)(.+)|\1\2{timelag_epochs}|g' /content/ETK/train/conf/train/timelag/train/myconfig.yaml
timelag_checkpoint_interval = timelag_epochs * 0.15
timelag_checkpoint_interval_trunc = math.trunc(timelag_checkpoint_interval)
timelag_checkpoint_interval_trunc_min = min(timelag_checkpoint_interval_trunc, 80)
!sed -i -r 's|(checkpoint_epoch_interval:)(\s+)(.+)|\1\2{timelag_checkpoint_interval_trunc_min}|g' /content/ETK/train/conf/train/timelag/train/myconfig.yaml

#set init value
#removed. Instead init is tested for each model type for multiple datasets and the best is pre-set.

#set loss mode
#unsupported for MDN models.
!sed -i -r 's|(feats_criterion:)(\s+)(.+)|\1\2{acoustic_loss}|g' /content/ETK/train/conf/train/acoustic/train/myconfig.yaml

!sed -i -r 's|(pitch_reg_weight:)(\s+)(.+)|\1\2{pitch_reg_weight}|g' /content/ETK/train/conf/train/acoustic/train/myconfig.yaml

print("Values set")

#Training Steps
---

In [None]:
#@markdown Tensorboard.
#@markdown View training info. non-live.

import datetime, os
%tensorboard --load_fast=true --reload_interval=1 --reload_multifile=true --logdir=/content/ETK/train/tensorboard/

In [None]:
#@title Main Training
#@markdown GV postfilter is included with normal training. Edit `enuconfig.yaml` after training to enable.
starting_stage = 0 #@param {type:"slider", min:0, max:5, step:1.0}
stopping_stage = 5 #@param {type:"slider", min:0, max:5, step:1.0}

!sed -i -r 's|(spk:)(\s+)(.+)|\1\2{singer_name}|g' /content/ETK/train/config.yaml
!sed -i -r 's|(model_dir:)(\s+)(.+)|\1\2exp/{singer_name}\_intunist_prototyping_notebook|g' /content/ETK/train/enuconfig.yaml
!sed -i -r 's|(stats_dir:)(\s+)(.+)|\1\2dump/{singer_name}/norm|g' /content/ETK/train/enuconfig.yaml

%cd "/content/ETK/train"
if model.startswith(('Res', 'world', 'mel')):
    !bash run_resf0.sh --stage $starting_stage --stop_stage $stopping_stage
else:
    !bash run.sh --stage $starting_stage --stop_stage $stopping_stage

#Advanced Training
---

In [None]:
#@title Train vocoder
#@markdown Vocoder training in Colab is NOT POSSIBLE. This has been removed.

#from parallel_wavegan.models.nsf import HnSincNSF
#HnSincNSF(1,1)

#if sample_rate == "48000":
#  !sed -i -r 's|(vocoder_model:)(\s+)(.+)|\1\2hn-sinc-nsf_sr48k_pwgD|g' /content/ETK/train/config.yaml
#else:
#  raise SystemExit("vocoder training only supports 48000Hz audio")
#if model.startswith('Res'):
#  pass
#else:
#  raise SystemExit("vocoder training only supports ResF0 models")

#print("training vocoder for " + singer_name)
#starting_stage = 1 #@param {type:"slider", min:1, max:2, step:1}
#stopping_stage = 2 #@param {type:"slider", min:1, max:2, step:1}

#%cd "/content/ETK/train"
#!bash run_vocoder.sh --stage $starting_stage --stop_stage $stopping_stage

#Release
---

In [None]:
#@title Run release step and package voice
store_on_google_drive = False #@param {type:"boolean"}

delete_checkpoint = True #@param {type:"boolean"}
#@markdown `delete_checkpoint` deletes extra checkpoints to reduce singer size. Does not affect quality.

print("Packaging " + singer_name)
%cd "/content/ETK/train"
!bash run.sh --stage 12 --stop_stage 12

import subprocess
time = subprocess.getoutput('date +%y%m%d-%H%M-UTC')

if delete_checkpoint:
    %rm -f /content/ETK/train/release/{singer_name}_---/exp/{singer_name}_intunist_prototyping_notebook/*/checkpoint*
    %rm -f /content/ETK/train/release/{singer_name}_---/exp/{singer_name}_intunist_prototyping_notebook/*/epoch*

%cd -q "/content/ETK/train/release/"
print('\033[97;100m' + 'Compressing Model' + '\033[0m')
!7za -bso0 a "/content/{singer_name}_NNSVS_model_{time}.7z" "{singer_name}_---"
if store_on_google_drive == True:
    %mkdir -p /content/drive/MyDrive/NNSVS_Release_Models
    print('\033[97;100m' + 'Copying Singer to your Google Drive' + '\033[0m')
    !cp /content/{singer_name}_NNSVS_model_{time}.7z /content/drive/MyDrive/NNSVS_Release_Models/{singer_name}_NNSVS_model_{time}.7z
    print('Done!')
else:
    print('Done!')