# Check Device

In [None]:
!nvidia-smi

NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.



## Mount Google Drvie & OneDrive to Colab

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
!python -m pip install --upgrade pip setuptools wheel
# https://colab.research.google.com/github/jakiya99/Torrent-to-Onedrive/blob/main/new_torrents_download_to_onedrive.ipynb#scrollTo=Fil_bHuAD8S3
# https://github.com/jakiyaa/Torrent-to-Onedrive/blob/main/full%20tutorial.md
# https://github.com/jakiyaa/rclone-authenticate

!wget https://downloads.rclone.org/v1.60.1/rclone-v1.60.1-linux-amd64.deb
!apt install ./rclone-v1.60.1-linux-amd64.deb

In [None]:
!rclone config

In [None]:
import os
path = "/content"
os.chdir(path)

!pwd

/content


In [None]:
!mkdir onedrive
# 사용자 shell이 종료되어도 작동되기 위해 nohup(no hang up), & 사용
!nohup rclone --vfs-cache-mode writes mount onedrive: /content/onedrive &

nohup: appending output to 'nohup.out'


In [None]:
!ls ./onedrive

# Install <img src="https://api.wandb.ai/files/cayush/images/projects/464653/2dbc1a01.png" height="25" width="160" /> & jamo package
https://tts.readthedocs.io/en/latest/index.html

In [None]:
!pip install TTS==0.10.0

In [None]:
!pip install jamo

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
[0m

## Create formatter to tokenize text & train model

In [None]:
# change the current working directory
os.chdir('/content/drive/MyDrive/glow-tts-dialect')
!pwd

/content/drive/MyDrive/glow-tts-dialect


In [None]:
# https://github.com/coqui-ai/TTS/blob/dev/TTS/tts/datasets/formatters.py
from korean.ko_cleaner import tokenize
import os

# formatter for jss (jejueo single speaker) speech dataset
def jss_formatter(root_path, meta_file, ignored_speakers=None):
  """Normalize JSS meta data file for TTS"""
  txt_file = os.path.join(root_path, meta_file)
  items = []
  with open(txt_file, "r", encoding="utf-8") as ttf:
    for line in ttf:
      cols = line.split("|")
      wav_file = os.path.join(root_path, cols[0])
      text = cols[1].strip()
      tokens = tokenize(text)
      tokenize_text = "".join(tokens)
      items.append({"text": tokenize_text, "audio_file": wav_file, "speaker_name": "", "root_path": root_path})
  return items

In [None]:
from korean.ko_cleaner import PUNC, VALID_CHARS

print( "".join(VALID_CHARS))
print(PUNC)

ᄀᄁᄂᄃᄄᄅᄆᄇᄈᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵᆞᆢᅌᆨᆩᆪᆫᆬᆭᆮᆯᆰᆱᆲᆳᆴᆵᆶᆷᆸᆹᆺᆻᆼᆽᆾᆿᇀᇁᇂ
!'(),-.:;? 


## Train Glow-TTS

In [None]:
from korean.ko_cleaner import PUNC, VALID_CHARS

# 🐸Coqui Glow-TTS 
from trainer import Trainer, TrainerArgs
# from TTS.encoder.utils.training import init_training

from TTS.tts.configs.glow_tts_config import GlowTTSConfig
from TTS.tts.configs.shared_configs import BaseDatasetConfig, BaseAudioConfig, CharactersConfig
from TTS.tts.datasets import load_tts_samples
from TTS.tts.models.glow_tts import GlowTTS
from TTS.tts.utils.text.tokenizer import TTSTokenizer
from TTS.utils.audio import AudioProcessor

output_path = '/content/drive/MyDrive/glow-tts-dialect/model/glow-tts/'

# define dataset config
dataset_config = BaseDatasetConfig(dataset_name = "jss", path="/content/onedrive/jss-dataset", meta_file_train="metadata.txt")

# define audio config
audio_config = BaseAudioConfig(
    sample_rate=22050,
    resample=True,
    win_length=1024,
    hop_length=256,
    num_mels=80,
    preemphasis=0.0,
    ref_level_db=20,
    log_func="np.log",
    do_trim_silence=True,
    mel_fmin=0,
    mel_fmax=None,
    spec_gain=1.0,
    signal_norm=False,
    do_amp_to_db_linear=False,
)

# define charactes config
# characters_class="TTS.tts.models.vits.VitsCharacters",
characters_config = CharactersConfig(
    pad="_",
    eos="~",
    characters= "".join(VALID_CHARS),
    punctuations= PUNC,
    phonemes=None,
)

# init the training config
config = GlowTTSConfig(
    batch_size=64,
    eval_batch_size=32,
    eval_split_size=0.1,
    num_loader_workers=4,
    num_eval_loader_workers=4,
    run_eval=True,
    test_delay_epochs=-1,
    epochs=1000,
    text_cleaner=None,
    use_phonemes=False,
    phoneme_language=None,
    phoneme_cache_path=os.path.join(output_path, "phoneme_cache"),
    print_step=25,
    mixed_precision=True,
    output_path=output_path,
    datasets=[dataset_config],
    audio=audio_config,
    characters=characters_config,
)


# init the audio processor
ap = AudioProcessor.init_from_config(config)

# init the tokenizer
tokenizer, config = TTSTokenizer.init_from_config(config)

# load training samples
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_size=config.eval_split_size, formatter=jss_formatter)

# init the model
model = GlowTTS(config, ap, tokenizer, speaker_manager=None)


# init the trainer and 🚀
trainer = Trainer(TrainerArgs(), config, output_path, model=model, train_samples=train_samples, eval_samples=eval_samples)

trainer.fit()