# Part2: Music Generation with RNNs

此部分为 `Part2_Music_Generation.ipynb` 的化简版本，理清主要思路，只包含核心代码.

1. 预处理数据集：
- 构建单词库 `vacab`，以频率高低设置对应索引，将字符串转为数字.
- 构建训练集batch，包含 `sequence_length` 和 `batch_size` 两个参数. 每个batch中的样本序列的开头 `start` 为 `[0,n-len-1]` 中随机选取的，其中 `n=vacab_size` 词库大小. 每个样本的特征为数据集的子串 `[start, start+len-1]`，标签为子串 `[start+1, start+len]`.

2. 搭建模型：embedding层，参数 `embedding_dimensionality` $\to$ LSTM层，参数 `rnn_units` $\to$ Dense层，参数 `units=vacab_size`.

3. 定义损失函数，使用交叉熵函数. 超参数配置：`training_iterations`，`learning_rate`. 构建训练函数：
- 使用 `tf.GradientTap` 对变量进行观测，计算 $\mathcal{L}(y, \hat{y})$.
- 求出 $\frac{\partial\mathcal{L}}{\partial W}$，$W$ 为全体可学习参数 `model.trainable_variables`.
- 使用 `optimizer` 对梯度进行更新.
- 开始训练：执行训练函数 `training_iterations` 次，用 `tqdm` 可视化进度条，在记录点保存模型.

4. 生成歌曲，根据启动种子 `start_text` 作为预测序列的开头，用 `tf.random.categorical` 以输出的结果作为概率分布选出一个预测值，作为下一次预测的输入值.

In [None]:
import tensorflow as tf
import tensorflow.keras as keras
import keras.layers as layers
import myTools
import numpy as np
import matplotlib.pyplot as plt
import mitdeeplearning as mdl
import IPython.display as ipythondisplay
import pathlib
from tqdm import tqdm

In [None]:
songs = mdl.lab1.load_training_data()
dataset = '\n\n'.join(x[1] for x in enumerate(songs))
vocab = set(dataset)
vocab_size = len(vocab)
cnt = {}
for c in vocab:
    cnt[c] = dataset.count(c)
cnt = sorted(cnt.items(), key=lambda x: x[1], reverse=True)
vocab = [x[0] for x in cnt]  # 构建出现频率从大到小的字符库
# 构建两个对应表
char2idx = {u: i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)
# 数字化dataset
dataset = [char2idx[c] for c in dataset]

In [None]:
def make_batch(ds, seq_length, batch_size):
    n = len(ds)
    idx = np.random.choice(n - seq_length, batch_size)
    x = np.array([ds[st:st+seq_length] for st in idx])
    y = np.array([ds[st+1:st+seq_length+1] for st in idx])
    return x, y

example = make_batch(dataset, 10, 2)
print(example[0].shape, example[1].shape)

In [None]:
def build_model(vocab_size, embedding_dim, rnn_units):
    return keras.Sequential([
        layers.Embedding(vocab_size, embedding_dim),
        layers.LSTM(rnn_units, return_sequences=True),
        layers.Dense(vocab_size)
    ])

def compute_loss(y, y_hat):
    return keras.losses.sparse_categorical_crossentropy(y, y_hat, from_logits=True)

# Hyperparameters setup
training_iterations = 5000
batch_size = 32
seq_length = 100
learning_rate = 5e-3
embedding_dim = 256
rnn_units = 1024
checkpoint_dir = pathlib.Path('./training_checkpoints')
checkpoint_prefix = checkpoint_dir.joinpath('my_ckpt')

model = build_model(vocab_size, embedding_dim, rnn_units)
optimizer = tf.keras.optimizers.Adam(learning_rate)

def train_step(x, y):
    with tf.GradientTape() as tape:
        y_hat = model(x)
        loss = compute_loss(y, y_hat)
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return loss

model.summary()

In [None]:
history = []
plotter = mdl.util.PeriodicPlotter(sec=2, xlabel='Iterations', ylabel='Loss')  # 绘制loss函数动态变化图

for _ in tqdm(range(training_iterations)):
    x, y = make_batch(dataset, seq_length, batch_size)
    loss = train_step(x, y)
    history.append(loss.numpy().mean())
    plotter.plot(history)
    if _ % 100 == 0:
        model.save_weights(checkpoint_prefix)
model.save_weights(checkpoint_dir)

In [None]:
def generate_text(model, start_text, length=1000):
    x = np.array([char2idx[c] for c in start_text])
    ret = []
    model.reset_states()
    for i in tqdm(range(length)):
        pred = model(x.reshape(1, -1))
        pred = tf.squeeze(pred, 0)
        pred_id = tf.random.categorical(pred, num_samples=1)[-1, 0].numpy()
        x = np.array([pred_id])
        ret.append(idx2char[pred_id])
    return start_text + ''.join(ret)
mysong = generate_text(model, 'X')
myTools.play_song(mysong)