In [None]:
import argparse
import os
import subprocess
import time
import traceback
from datetime import datetime

import infolog
import numpy as np
import tensorflow as tf
from datasets import audio
from hparams import hparams_debug_string
from tacotron.feeder import Feeder
from tacotron.models import create_model
from tacotron.utils import ValueWindow, plot
from tacotron.utils.text import sequence_to_text
from tacotron.utils.symbols import symbols
from tqdm import tqdm

log = infolog.log

def time_string():
    return datetime.now().strftime('%Y-%m-%d %H:%M')

### Tensorboard projector

In [None]:
def add_embedding_stats(summary_writer, embedding_names, paths_to_meta, checkpoint_path):
    #Create tensorboard projector
    config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig()
    config.model_checkpoint_path = checkpoint_path

    for embedding_name, path_to_meta in zip(embedding_names, paths_to_meta):
        #Initialize config
        embedding = config.embeddings.add()
        #Specifiy the embedding variable and the metadata
        embedding.tensor_name = embedding_name
        embedding.metadata_path = path_to_meta
    
    #Project the embeddings to space dimensions for visualization
    tf.contrib.tensorboard.plugins.projector.visualize_embeddings(summary_writer, config)

def add_train_stats(model, hparams):
    with tf.variable_scope('stats') as scope:
        for i in range(hparams.tacotron_num_gpus):
            tf.summary.histogram(f'mel_outputs {i}', model.tower_mel_outputs[i])
            tf.summary.histogram(f'mel_targets {i}', model.tower_mel_targets[i])
        tf.summary.scalar('before_loss', model.before_loss)
        tf.summary.scalar('after_loss', model.after_loss)

        if hparams.predict_linear:
            tf.summary.scalar('linear_loss', model.linear_loss)
            for i in range(hparams.tacotron_num_gpus):
                tf.summary.histogram(f'linear_outputs {i}', model.tower_linear_outputs[i])
                tf.summary.histogram(f'linear_targets {i}', model.tower_linear_targets[i])

        tf.summary.scalar('regularization_loss', model.regularization_loss)
        tf.summary.scalar('stop_token_loss', model.stop_token_loss)
        tf.summary.scalar('loss', model.loss)
        tf.summary.scalar('learning_rate', model.learning_rate) #Control learning rate decay speed
        if hparams.tacotron_teacher_forcing_mode == 'scheduled':
            tf.summary.scalar('teacher_forcing_ratio', model.ratio) #Control teacher forcing ratio decay when mode = 'scheduled'
        gradient_norms = [tf.norm(grad) for grad in model.gradients]
        tf.summary.histogram('gradient_norm', gradient_norms)
        tf.summary.scalar('max_gradient_norm', tf.reduce_max(gradient_norms)) #visualize gradients (in case of explosion)
        return tf.summary.merge_all()
    
def add_eval_stats(summary_writer, step, linear_loss, before_loss, after_loss, stop_token_loss, loss):
    values = [
    tf.Summary.Value(tag='Tacotron_eval_model/eval_stats/eval_before_loss', simple_value=before_loss),
    tf.Summary.Value(tag='Tacotron_eval_model/eval_stats/eval_after_loss', simple_value=after_loss),
    tf.Summary.Value(tag='Tacotron_eval_model/eval_stats/stop_token_loss', simple_value=stop_token_loss),
    tf.Summary.Value(tag='Tacotron_eval_model/eval_stats/eval_loss', simple_value=loss),
    ]
    if linear_loss is not None:
        values.append(tf.Summary.Value(tag='Tacotron_eval_model/eval_stats/eval_linear_loss', simple_value=linear_loss))
    test_summary = tf.Summary(value=values)
    summary_writer.add_summary(test_summary, step)

### Train

In [None]:
def train(log_dir, args, hparams):
    save_dir = os.path.join(log_dir, 'taco_pretrained')
    plot_dir = os.path.join(log_dir, 'plots')
    wav_dir = os.path.join(log_dir, 'wavs')
    mel_dir = os.path.join(log_dir, 'mel-spectrograms')
    eval_dir = os.path.join(log_dir, 'eval-dir')
    eval_plot_dir = os.path.join(eval_dir, 'plots')
    eval_wav_dir = os.path.join(eval_dir, 'wavs')
    tensorboard_dir = os.path.join(log_dir, 'tacotron_events')
    meta_folder = os.path.join(log_dir, 'metas')
    os.makedirs(save_dir, exist_ok=True)
    os.makedirs(plot_dir, exist_ok=True)
    os.makedirs(wav_dir, exist_ok=True)
    os.makedirs(mel_dir, exist_ok=True)
    os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(eval_plot_dir, exist_ok=True)
    os.makedirs(eval_wav_dir, exist_ok=True)
    os.makedirs(tensorboard_dir, exist_ok=True)
    os.makedirs(meta_folder, exist_ok=True)

    checkpoint_path = os.path.join(save_dir, 'tacotron_model.ckpt')
    input_path = os.path.join(args.base_dir, args.tacotron_input)

    if hparams.predict_linear:
        linear_dir = os.path.join(log_dir, 'linear-spectrograms')
        os.makedirs(linear_dir, exist_ok=True)

    log('Checkpoint path: {}'.format(checkpoint_path))
    log('Loading training data from: {}'.format(input_path))
    log('Using model: {}'.format(args.model))
    log(hparams_debug_string())

    #Start by setting a seed for repeatability
    tf.set_random_seed(hparams.tacotron_random_seed)

    #Set up data feeder
    coord = tf.train.Coordinator()
    with tf.variable_scope('datafeeder') as scope:
        feeder = Feeder(coord, input_path, hparams)

Feeder 类具体内容前往feeder.ipynb查看 http://localhost:8888/lab/tree/TTS/Speech-Zone/Tacotron2/Tacotron2/tacotron/feeder.ipynb

In [None]:
    #Set up model:
    global_step = tf.Variable(0, name='global_step', trainable=False)
    model, stats = model_train_mode(args, feeder, hparams, global_step)
    eval_model = model_test_mode(args, feeder, hparams, global_step)

其中`model_train_mode`,`model_text_mode`在train.py中有定义，如下：

In [None]:
def model_train_mode(args, feeder, hparams, global_step):
    with tf.variable_scope('Tacotron_model', reuse=tf.AUTO_REUSE) as scope:
        model_name = None
        if args.model == 'Tacotron-2':
            model_name = 'Tacotron'
        model = create_model(model_name or args.model, hparams)
        if hparams.predict_linear:
            model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.token_targets, linear_targets=feeder.linear_targets,
                             targets_lengths=feeder.targets_lengths, global_step=global_step,is_training=True, split_infos=feeder.split_infos)
        else:
            model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.token_targets,
                             targets_lengths=feeder.targets_lengths, global_step=global_step,is_training=True, split_infos=feeder.split_infos)
        model.add_loss()
        model.add_optimizer(global_step)
        stats = add_train_stats(model, hparams)
        return model, stats

In [None]:
def model_test_mode(args, feeder, hparams, global_step):
    with tf.variable_scope('Tacotron_model', reuse=tf.AUTO_REUSE) as scope:
        model_name = None
        if args.model == 'Tacotron-2':
            model_name = 'Tacotron'
        model = create_model(model_name or args.model, hparams)
        if hparams.predict_linear:
            model.initialize(feeder.eval_inputs, feeder.eval_input_lengths, feeder.eval_mel_targets, feeder.eval_token_targets,linear_targets=feeder.eval_linear_targets, 
                             targets_lengths=feeder.eval_targets_lengths, global_step=global_step,is_training=False, 
                             is_evaluating=True, split_infos=feeder.eval_split_infos)
        else:
            model.initialize(feeder.eval_inputs, feeder.eval_input_lengths, feeder.eval_mel_targets, feeder.eval_token_targets,
                             targets_lengths=feeder.eval_targets_lengths, global_step=global_step, is_training=False, 
                             is_evaluating=True, split_infos=feeder.eval_split_infos)
        model.add_loss()
        return model

`create_model`在 `models/__init__.py`有定义。  http://localhost:8888/lab/tree/TTS/Speech-Zone/Tacotron2/Tacotron2/tacotron/models/__init__.py

而`create_model`又调用了 Tacotron类， 此类在`models/tacotron.ipynb`有讲解。

In [None]:
def tacotron_train(args, log_dir, hparams):
    return train(log_dir, args, hparams)