<a href="colab.research.google.com/github/usamireko/G2PTrainer/blob/main/G2PTrainer_Notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This is a notebook for training G2P files for OpenUtau, based off a modified version of the code found on OpenUTAU

The original code can be found [here](https://github.com/stakira/OpenUtau/tree/master/py).

This notebook is an edited copy of LotteV and Mlo7Ghinsan, edited by [usamireko](https://github.com/usamireko)

#Setup

In [None]:
#@title # Mount Google Drive and Setup

from google.colab import drive
from IPython.display import clear_output, display, HTML

!rm -rf /content/sample_data
drive.mount("/content/drive")
!git clone https://github.com/usamireko/G2PTrainer.git
#thank you lotte <3
!pip install antlr4-python3-runtime==4.9.*
!pip install hydra-core==1.3.2 omegaconf==2.3.0 -q #that warning popup is annoying
!pip install torch torchaudio
!pip install editdistance
!pip install tqdm==4.65.0
!pip install onnx
!pip install pytorch_optimizer
!pip install PyYAML

%cd /content/G2PTrainer

clear_output()
print("Done!")
display(HTML('''
<div style="text-align:center;">
  <img src="https://files.catbox.moe/qcrz23.gif" alt="Snow" style="height:30%;">
</div>
'''))

# Training

In [None]:
#@title Input dictionary
from IPython.display import clear_output
%cd /content/g2p
import os
if not os.path.exists("/content/user_g2p_data"):
    os.makedirs("/content/user_g2p_data")
clear_output()
import sys
import re
import torch
import hydra
import yaml
from omegaconf import OmegaConf
sys.path.append(os.path.abspath('.'))
from dataset import SphinxDataset
from trainer import G2pTrainer
from model import GreedyG2p

def train(trainer):
    print('training...')
    trainer.train()


def export(trainer, model_path, onnx_path):
    print('exporting model...')
    trainer.model.load_state_dict(torch.load(model_path))
    trainer.model.cpu()
    greedy = GreedyG2p(trainer.model.max_len,
                       trainer.model.encoder, trainer.model.decoder)
    greedy.export(onnx_path)

    print('testing...')
    trainer.test('test_log.txt')

#@markdown input dictionary (can be dict.txt or dsdict.yaml)
dict_path = "" # @param {type:"string"}

#@markdown input model save location
save_path = "" # @param {type:"string"}
if not save_path:
    raise ValueError("Empty save_path, please specify a path to save your model")
else:
    pass

user_dict_path = save_path +"/user_dictionary.txt" #cleaned dict
user_config = save_path + "/config.yaml" #generated config
user_phonemes = save_path + "/phones.txt" #yea phoneme list yea poosay

default_config = {
    "model": {
        "_target_": "model.G2p",
        "max_len": 48,
        "encoder": {
            "_target_": "model.Encoder",
            "graphemes": [],
            "d_model": 64,
            "d_hidden": 128,
            "num_layers": 2,
            "dropout": 0.1
        },
        "decoder": {
            "_target_": "model.Decoder",
            "phonemes": [],
            "d_model": 64,
            "d_hidden": 128,
            "num_layers": 2,
            "dropout": 0.1
        }
    },
    "optimizer": {
        "_target_": "torch.optim.AdamW",
        "lr": 0.001,
        "betas": [0.9, 0.999],
        "eps": 1e-08,
        "weight_decay": 0.01,
    },
    "lr_scheduler": {
        "_target_": "torch.optim.lr_scheduler.ReduceLROnPlateau",
        "mode": "min",
        "factor": 0.4,
        "patience": 5,
        "min_lr": 1e-6,
    }
}

with open(user_config, "w") as cfg:
    yaml.dump(default_config, cfg)

if dict_path:
    pass
else:
    raise ValueError("Please input path to your dictionary")

if dict_path.endswith(".txt"):
    print("using txt format")
    with open(dict_path, "r", encoding = "utf-8") as dict:
        content = dict.read()
        if "\t" in content:
            no_tab_content = content.replace("\t", "  ")
            with open(user_dict_path, "w", encoding = "utf-8") as file:
                file.write(no_tab_content)
        else:
            with open(user_dict_path, "w", encoding = "utf-8") as file:
                file.write(content)

elif dict_path.endswith(".yaml"):
    print("using yaml format")
    #because they're troublesome >:( (not sure about true and false tho, but ill just include them cus why not)
    bool2string = {
        "yes": "'yes'",
        "no": "'no'",
        "on": "'on'",
        "off": "'off'",
        "true": "'true'",
        "false": "'false'"
    }

    #ik its extra but oh well
    with open(dict_path, "r", encoding = "utf-8") as file:
        content = file.read()
        for word, replacement in bool2string.items():
            pattern = r"(?<!\')\b{}\b(?!\')".format(re.escape(word))
            content = re.sub(pattern, replacement, content)
    with open(dict_path, "w", encoding = "utf-8") as file:
        file.write(content)

    with open(dict_path, "r", encoding = "utf-8") as dict:
        content = yaml.safe_load(dict)
        entries = content.get("entries", [])
        with open(user_dict_path, "w", encoding = "utf-8") as file:
            for entry in entries:
                grapheme = entry["grapheme"]
                phonemes = " ".join(entry["phonemes"])
                file.write(f"{grapheme}  {phonemes}\n")
else:
    raise TypeError("format not supported")

graphemes = set()
phonemes = set()

with open(user_dict_path, "r", encoding = "utf-8") as file:
    for line in file:
        line = line.strip()
        if line:
            grapheme = line.split()[0]
            graphemes.add(grapheme)
            phoneme = line.split("  ")[1]
            phonemes.update(set(phoneme.split()))

#add the necessary stuff ig based on readme

required_char = ["<unk>", "<pad>", "<bos>", "<eos>"]

#edited this for when theres like special character in dict'''\\
for char in required_char:
    graphemes.discard(char)
    phonemes.discard(char)

graphemes = required_char + sorted(graphemes)
phonemes = required_char + sorted(phonemes)

vowel_types = {"a", "i", "u", "e", "o", "N", "M", "NG"}
with open(user_phonemes, "w") as f:
    for phoneme in phonemes:
        if phoneme in required_char:
            continue
        if phoneme in vowel_types:
            f.write(f"{phoneme}\tvowel\n")
        else:
            f.write(f"{phoneme}\t-\n")

with open(user_config, "r", encoding = "utf-8") as cfg:
    training_config = yaml.safe_load(cfg)
training_config["model"]["encoder"]["graphemes"] = graphemes
training_config["model"]["decoder"]["phonemes"] = phonemes
with open(user_config, "w", encoding = "utf-8") as cfg:
    yaml.dump(training_config, cfg, allow_unicode = True)

cfg = user_config
cfg = OmegaConf.load(cfg)

dataset = user_dict_path
dataset = SphinxDataset(dataset, cfg.model,
                        comment_prefix=';;;',
                        # "RECORDS(1)" -> "RECORDS"
                        remove_word_digits=True,
                        # "R EH1 K ER0 D Z" -> "R EH K ER D Z"
                        remove_phoneme_digits=True)

# @markdown you may need to adjust the batch size and epochs. <br> If there's too much loss, you can try decreasing the batch size. <br> Between 50 and 150 epochs is generally recommended for training, although you can play around with this a bit. <br> You can continue training from the latest checkpoint at a later moment if you so desire.
loss_device = "cuda" # @param ["cpu", "cuda"]
batch_size = 128 # @param {type:"slider", min:1, max:300, step:1}
epochs = 100 # @param {type:"slider", min:1, max:300, step:1}


#@markdown the model that get saves to save_path will be the g2p-best.ptsd, you can check in g2p folder under /content for other model if applicable

In [None]:
#@title Start training
test_for_error = False # @param {type:"boolean"}
#@markdown Option to test the finished model for its word error and phoneme error rate.... very slow though, especially with big model (should be in your yaml config)
import os
from omegaconf import OmegaConf

#@markdown This wont resume training per se, its just to retry training in case of modified parameters without having to process your dataset again
resume = False # @param {"type":"boolean"}
config_path = "" # @param {"type":"string"}
base_dir = os.path.dirname(config_path)

model_path = os.path.join(base_dir, "g2p-best.ptsd")


if resume and config_path:
    cfg = OmegaConf.load(config_path)
    print(f"Resumed from config: {config_path}")
    print(f"This wont resume training per se, its just to retry training in case of modified parameters")
else:
    # Load from user_config path for new runs
    cfg = OmegaConf.load(user_config)
    print(f"Loaded new config: {user_config}")

trainer = G2pTrainer(
        device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        loss_device=torch.device("cpu"),
        model=hydra.utils.instantiate(cfg.model),
        dataset=dataset,
        batch_size=batch_size,
        epochs=epochs,
        optimizer_cfg=cfg.get('optimizer', None),
        lr_scheduler_cfg=cfg.get('lr_scheduler', None))

train(trainer)
print("Optimizer:")
print(trainer.optimizer)
print("\nLearning Rate Scheduler:")
print(trainer.scheduler)
print("\nLearning Rate Scheduler State:")
print(trainer.scheduler.state_dict())

!cp /content/G2PTrainer/g2p-best.ptsd $save_path

if test_for_error:
    trainer.test(test_log = save_path +"/test_log.txt")
else:
    pass

#Export ONNX

In [None]:
#@title Convert checkpoint
# You can just drop a path and itll appended the file extension
model_path = "path_to_model" # @param {"type":"string"}
output_path = "path_to_onnx" # @param {"type":"string"}
output_path =  output_path + ".onnx"
export(trainer, model_path, output_path)
