# Dynamic Recurrent Neural Network.

TensorFlow 2.0 implementation of a Recurrent Neural Network (LSTM) that performs dynamic computation over sequences with variable length. This example is using a toy dataset to classify linear sequences. The generated sequences have variable length.

- Author: Aymeric Damien
- Project: https://github.com/aymericdamien/TensorFlow-Examples/

## RNN Overview

<img src="http://colah.github.io/posts/2015-08-Understanding-LSTMs/img/RNN-unrolled.png" alt="nn" style="width: 600px;"/>

References:
- [Long Short Term Memory](http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf), Sepp Hochreiter & Jurgen Schmidhuber, Neural Computation 9(8): 1735-1780, 1997.

In [1]:
from __future__ import absolute_import, division, print_function

import tensorflow as tf
from tensorflow.keras import Model, layers
import numpy as np
import random

In [2]:
# 数据集参数.
num_classes = 2 # 类别数目，是否为线性序列.
seq_max_len = 20 # 最大序列长度.
seq_min_len = 5 # 最小序列长度 (填充前的).
masking_val = -1 # -1将表示掩码，并用于将序列填充到公共最大长度.
max_value = 10000 # 最大整数值.

# 训练参数
learning_rate = 0.001
training_steps = 2000
batch_size = 64
display_step = 100

# 网络参数
num_units = 32 # LSTM层的神经元数目.

In [3]:
# ====================
#  TOY DATA GENERATOR
# ====================

def toy_sequence_data():
    """ Generate sequence of data with dynamic length.
    This function generates toy samples for training:
    - Class 0: linear sequences (i.e. [1, 2, 3, 4, ...])
    - Class 1: random sequences (i.e. [9, 3, 10, 7,...])

    NOTICE:
    We have to pad each sequence to reach 'seq_max_len' for TensorFlow
    consistency (we cannot feed a numpy array with inconsistent
    dimensions). The dynamic calculation will then be perform and ignore
    the masked value (here -1).
    """
    while True:
        # 建立序列长度的变量.
        seq_len = random.randint(seq_min_len, seq_max_len)
        rand_start = random.randint(0, max_value - seq_len)
        # 新增一个随机或者线性的整数序列.
        if random.random() < .5:
            # 生成一个线性序列.
            seq = np.arange(start=rand_start, stop=rand_start+seq_len)
            # 将值缩放到0-1之间.
            seq = seq / max_value
            # 将序列填充至与最大序列尺寸一致.
            # 屏蔽值设置为-1.
            seq = np.pad(seq, mode='constant', pad_width=(0, seq_max_len-seq_len), constant_values=masking_val)
            label = 0
        else:
            # 生成一个随机序列.
            seq = np.random.randint(max_value, size=seq_len)
            # 将值缩放到0-1之间.
            seq = seq / max_value
            # 将序列填充至与最大序列尺寸一致.
            # 屏蔽值设置为-1.
            seq = np.pad(seq, mode='constant', pad_width=(0, seq_max_len-seq_len), constant_values=masking_val)
            label = 1
        yield np.array(seq, dtype=np.float32), np.array(label, dtype=np.float32)

In [4]:
#数据预处理.
train_data = tf.data.Dataset.from_generator(toy_sequence_data, output_types=(tf.float32, tf.float32))
train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(1)

In [5]:
# 创建LSTM模型.
class LSTM(Model):
    # 创建模型层次.
    def __init__(self):
        super(LSTM, self).__init__()
        # 定义一个屏蔽值为-1的屏蔽层.
        self.masking = layers.Masking(mask_value=masking_val)
        # 定义一个应用于屏蔽层的LSTM层.
        # 动态计算时将会自动忽略-1的值.
        self.lstm = layers.LSTM(units=num_units)
        # 输出全连接层.
        self.out = layers.Dense(num_classes)

    # 建立前向传导.
    def call(self, x, is_training=False):
        # 一个RNN层期望三维的输入 (batch_size, seq_len, num_features).
        x = tf.reshape(x, shape=[-1, seq_max_len, 1])
        # 应用Masking层.
        x = self.masking(x)
        # 应用LSTM层.
        x = self.lstm(x)
        # 应用输出层.
        x = self.out(x)
        if not is_training:
            # # tf交叉熵期望logits没有softmax，所以只在不训练时应用softmax.
            x = tf.nn.softmax(x)
        return x

# 构建LSTM模型.
lstm_net = LSTM()

In [6]:
# 交叉熵损失.
def cross_entropy_loss(x, y):
    # 将tf交叉熵函数的标签转换为int64.
    y = tf.cast(y, tf.int64)
    # 将softmax应用于logits计算交叉熵.
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=x)
    # 批次平均损耗.
    return tf.reduce_mean(loss)

# 精确度指标.
def accuracy(y_pred, y_true):
    # Predicted class is the index of highest score in prediction vector (i.e. argmax).
    correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1)

# Adam优化器.
optimizer = tf.optimizers.Adam(learning_rate)

In [7]:
# 优化过程. 
def run_optimization(x, y):
    with tf.GradientTape() as g:
        # 向前传导.
        pred = lstm_net(x, is_training=True)
        # 计算损失.
        loss = cross_entropy_loss(pred, y)
        
    # 创建用于更新，即可训练的变量.
    trainable_variables = lstm_net.trainable_variables

    # 计算梯度值.
    gradients = g.gradient(loss, trainable_variables)
    
    # 根据梯度值更新权值.
    optimizer.apply_gradients(zip(gradients, trainable_variables))

In [8]:
# 按照给定的步长训练.
for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1):
    # 运行优化器去更新权值和偏差.
    run_optimization(batch_x, batch_y)
    
    if step % display_step == 0 or step == 1:
        pred = lstm_net(batch_x, is_training=True)
        loss = cross_entropy_loss(pred, batch_y)
        acc = accuracy(pred, batch_y)
        print("step: %i, loss: %f, accuracy: %f" % (step, loss, acc))

step: 1, loss: 0.694514, accuracy: 0.453125
step: 100, loss: 0.673558, accuracy: 0.625000
step: 200, loss: 0.484812, accuracy: 0.796875
step: 300, loss: 0.416448, accuracy: 0.796875
step: 400, loss: 0.349169, accuracy: 0.828125
step: 500, loss: 0.376243, accuracy: 0.812500
step: 600, loss: 0.246535, accuracy: 0.906250
step: 700, loss: 0.242801, accuracy: 0.906250
step: 800, loss: 0.185866, accuracy: 0.921875
step: 900, loss: 0.190631, accuracy: 0.937500
step: 1000, loss: 0.181896, accuracy: 0.953125
step: 1100, loss: 0.185485, accuracy: 0.906250
step: 1200, loss: 0.123493, accuracy: 0.921875
step: 1300, loss: 0.089478, accuracy: 1.000000
step: 1400, loss: 0.153334, accuracy: 0.921875
step: 1500, loss: 0.057054, accuracy: 1.000000
step: 1600, loss: 0.122397, accuracy: 0.968750
step: 1700, loss: 0.086428, accuracy: 0.968750
step: 1800, loss: 0.126830, accuracy: 0.906250
step: 1900, loss: 0.067411, accuracy: 0.984375
step: 2000, loss: 0.056770, accuracy: 0.984375
