In [None]:
import tensorflow as tf

class EncoderConvolutions:
    """Encoder convolutional layers used to find local dependencies in inputs characters.
    """
    def __init__(self, is_training, hparams, activation=tf.nn.relu, scope=None):
        """
        Args:
            is_training: Boolean, determines if the model is training or in inference to control dropout
            kernel_size: tuple or integer, The size of convolution kernels
            channels: integer, number of convolutional kernels
            activation: callable, postnet activation function for each convolutional layer
            scope: Postnet scope.
        """
        super(EncoderConvolutions, self).__init__()
        self.is_training = is_training

        self.kernel_size = hparams.enc_conv_kernel_size
        self.channels = hparams.enc_conv_channels
        self.activation = activation
        self.scope = 'enc_conv_layers' if scope is None else scope
        self.drop_rate = hparams.tacotron_dropout_rate
        self.enc_conv_num_layers = hparams.enc_conv_num_layers
        self.bnorm = hparams.batch_norm_position
        
    def __call__(self, inputs):
        with tf.variable_scope(self.scope):
            x = inputs
            for i in range(self.enc_conv_num_layers):
                x = conv1d(x, self.kernel_size, self.channels, self.activation,
                    self.is_training, self.drop_rate, self.bnorm, 'conv_layer_{}_'.format(i + 1)+self.scope)
        return x        

In [None]:
class EncoderRNN:
    """Encoder bidirectional one layer LSTM
    """
    def __init__(self, is_training, size=256, zoneout=0.1, scope=None):
        """
        Args:
            is_training: Boolean, determines if the model is training or in inference to control zoneout
            size: integer, the number of LSTM units for each direction
            zoneout: the zoneout factor
            scope: EncoderRNN scope.
        """
        super(EncoderRNN, self).__init__()
        self.is_training = is_training

        self.size = size
        self.zoneout = zoneout
        self.scope = 'encoder_LSTM' if scope is None else scope

        #Create forward LSTM Cell
        self._fw_cell = ZoneoutLSTMCell(size, is_training,
            zoneout_factor_cell=zoneout,
            zoneout_factor_output=zoneout,
            name='encoder_fw_LSTM')

        #Create backward LSTM Cell
        self._bw_cell = ZoneoutLSTMCell(size, is_training,
            zoneout_factor_cell=zoneout,
            zoneout_factor_output=zoneout,
            name='encoder_bw_LSTM')

    def __call__(self, inputs, input_lengths):
        with tf.variable_scope(self.scope):
            outputs, (fw_state, bw_state) = tf.nn.bidirectional_dynamic_rnn(
                self._fw_cell,
                self._bw_cell,
                inputs,
                sequence_length=input_lengths,
                dtype=tf.float32,
                swap_memory=True)

            return tf.concat(outputs, axis=2) # Concat and return forward + backward outputs
        
class ZoneoutLSTMCell(tf.nn.rnn_cell.RNNCell):
    '''Wrapper for tf LSTM to create Zoneout LSTM Cell
    inspired by:
    https://github.com/teganmaharaj/zoneout/blob/master/zoneout_tensorflow.py
    Published by one of 'https://arxiv.org/pdf/1606.01305.pdf' paper writers.
    Many thanks to @Ondal90 for pointing this out. You sir are a hero!
    '''
    def __init__(self, num_units, is_training, zoneout_factor_cell=0., zoneout_factor_output=0., state_is_tuple=True, name=None):
        '''Initializer with possibility to set different zoneout values for cell/hidden states.
        '''
        zm = min(zoneout_factor_output, zoneout_factor_cell)
        zs = max(zoneout_factor_output, zoneout_factor_cell)

        if zm < 0. or zs > 1.:
            raise ValueError('One/both provided Zoneout factors are not in [0, 1]')

        self._cell = tf.nn.rnn_cell.LSTMCell(num_units, state_is_tuple=state_is_tuple, name=name)
        self._zoneout_cell = zoneout_factor_cell
        self._zoneout_outputs = zoneout_factor_output
        self.is_training = is_training
        self.state_is_tuple = state_is_tuple

    @property
    def state_size(self):
        return self._cell.state_size

    @property
    def output_size(self):
        return self._cell.output_size

    def __call__(self, inputs, state, scope=None):
        '''Runs vanilla LSTM Cell and applies zoneout.
        '''
        #Apply vanilla LSTM
        output, new_state = self._cell(inputs, state, scope)

        if self.state_is_tuple:
            (prev_c, prev_h) = state
            (new_c, new_h) = new_state
        else:
            num_proj = self._cell._num_units if self._cell._num_proj is None else self._cell._num_proj
            prev_c = tf.slice(state, [0, 0], [-1, self._cell._num_units])
            prev_h = tf.slice(state, [0, self._cell._num_units], [-1, num_proj])
            new_c = tf.slice(new_state, [0, 0], [-1, self._cell._num_units])
            new_h = tf.slice(new_state, [0, self._cell._num_units], [-1, num_proj])

        #Apply zoneout
        if self.is_training:
            #nn.dropout takes keep_prob (probability to keep activations) not drop_prob (probability to mask activations)!
            c = (1 - self._zoneout_cell) * tf.nn.dropout(new_c - prev_c, (1 - self._zoneout_cell)) + prev_c
            h = (1 - self._zoneout_outputs) * tf.nn.dropout(new_h - prev_h, (1 - self._zoneout_outputs)) + prev_h

        else:
            c = (1 - self._zoneout_cell) * new_c + self._zoneout_cell * prev_c
            h = (1 - self._zoneout_outputs) * new_h + self._zoneout_outputs * prev_h

        new_state = tf.nn.rnn_cell.LSTMStateTuple(c, h) if self.state_is_tuple else tf.concat(1, [c, h])

        return output, new_state

## 知识点补充

### tf.nn.dropout

`tf.nn.dropout(x, rate, noise_shape=None, seed=None, name=None)`

参数：

- rate: The probability that each element is dropped. For example, setting rate=0.1 would drop 10% of input elements.

- noise_shape: A 1-D Tensor of type int32, representing the shape for randomly generated keep/drop flags

### tf.slice(), tf.split(), tf.concat()和 tf.stack() 详解

目标是将一个5x4x3的tensor张量转换为4x15的张量和20x3的张量，具体转换效果如下图所示。

![tf.jpg](attachment:ea83cfe1-dc18-4e51-80a7-365d441269cd.jpg)

`tf.slice(input, begin, size)`

![Screen Shot 2021-11-04 at 8.01.37 PM.png](attachment:66a49207-b1d5-4c04-ab14-60093bfaad11.png)

`tf.split(input, num_or_size_split, axis=0, num=None)`

参数：

- num_or_size_split:可以是一个数字，就是按照axis等分为几个矩阵，也可以是个列表，列表的和应该等于该维度的大小。

![Screen Shot 2021-11-04 at 8.05.04 PM.png](attachment:20be7b74-7d4c-4f19-90ca-f206649f6167.png)

`tf.concat(input, axis)`

![Screen Shot 2021-11-04 at 8.06.07 PM.png](attachment:02522959-51c5-4465-a928-8d46f3354cf0.png)

`tf.stack(input, axis=0)`

![Screen Shot 2021-11-04 at 8.07.32 PM.png](attachment:248dbd6a-18d8-4968-9a9e-c5c785a6c19e.png)

In [None]:
class Prenet:
    """Two fully connected layers used as an information bottleneck for the attention.
    """
    def __init__(self, is_training, layers_sizes=[256, 256], drop_rate=0.5, activation=tf.nn.relu, scope=None):
        """
        Args:
            layers_sizes: list of integers, the length of the list represents the number of pre-net
                layers and the list values represent the layers number of units
            activation: callable, activation functions of the prenet layers.
            scope: Prenet scope.
        """
        super(Prenet, self).__init__()
        self.drop_rate = drop_rate

        self.layers_sizes = layers_sizes
        self.activation = activation
        self.is_training = is_training

        self.scope = 'prenet' if scope is None else scope

    def __call__(self, inputs):
        x = inputs

        with tf.variable_scope(self.scope):
            for i, size in enumerate(self.layers_sizes):
                dense = tf.layers.dense(x, units=size, activation=self.activation, name='dense_{}'.format(i + 1))
                #The paper discussed introducing diversity in generation at inference time
                #by using a dropout of 0.5 only in prenet layers (in both training and inference).
                x = tf.layers.dropout(dense, rate=self.drop_rate, training=True, name=f'dropout_{i+1}' + self.scope)
        return x