# RNN 简介

# 时间序列模型概念简介
循环神经网络（RNN）是一种神经网络类型，其神经元的输出在下一个时间步会反馈作为输入，使网络具有处理序列数据的能力。它能处理变长序列，挖掘数据中的时序信息，不过存在长期依赖问题，即难以处理长序列中相距较远的信息关联。
RNN与普通神经网络的主要区别在于其具有记忆功能，神经元的输出能作为下一步输入，可处理序列数据，且输入和输出长度不固定；普通神经网络一般处理独立同分布的数据，层与层之间是简单的前馈连接关系，输入输出的长度通常是固定的。

RNN的应用场景广泛，在自然语言处理方面，可用于语言模型来预测下一个单词的概率，还能完成机器翻译、文本生成任务；在语音识别领域，能够处理语音这种时间序列信号，提高识别准确率；在时间序列预测中，像股票价格预测、天气预测等，RNN通过学习历史数据模式预测未来值；在视频分析中，它可以处理视频帧序列，进行动作识别等操作。





# RNN网络结构图
![图1](images/rnn.png)

RNN公式：
$$
[
\boldsymbol{h}_t = tanh(\boldsymbol{h}_{t-1} \boldsymbol{W}_h + \boldsymbol{x}_t \boldsymbol{W}_x + \boldsymbol{b})
]
$$

# 观察torch.nn.RNN的输入输出

In [79]:
import torch
import torch.nn as nn

In [114]:
# 单向、单层rnn
# 1个时间步
# batch_first=True表示输入数据的维度为[batch_size, seq_len, input_dim], input_dim在后文也称为input_size
single_rnn = nn.RNN(input_size=4, hidden_size=3, num_layers=1, batch_first=True) 
input = torch.randn(1, 1, 4) # 输入数据维度为[batch_size, time_steps_num, input_dim]
output, h_n = single_rnn(input) # output维度为[batch_size, time_steps_num, hidden_size=3]，h_n维度为[num_layers=1, batch_size, hidden_size=3]
print(input,output, output.shape, h_n, h_n.shape,  sep='\n')

tensor([[[ 0.2211,  0.0713, -0.7325,  0.2592]]])
tensor([[[-0.4568, -0.2468,  0.2100]]], grad_fn=<TransposeBackward1>)
torch.Size([1, 1, 3])
tensor([[[-0.4568, -0.2468,  0.2100]]], grad_fn=<StackBackward0>)
torch.Size([1, 1, 3])


In [113]:
# 单向、单层rnn
# 2个时间步
single_rnn = nn.RNN(input_size=4, hidden_size=3, num_layers=1, batch_first=True) # 输入数据的维度为[batch_size, time_steps_num, input_dim]
input = torch.randn(1, 2, 4) # 输入数据维度为[batch_size, time_steps_num, input_dim]
output, h_n = single_rnn(input) # output维度为[batch_size, time_steps_num, hidden_size=3]，h_n维度为[num_layers=1, batch_size, hidden_size=3]
print(input,output, output.shape, h_n, h_n.shape,  sep='\n')

tensor([[[ 1.3055,  0.0048, -0.0629,  0.4583],
         [ 0.8229,  1.5621,  0.1653,  0.5145]]])
tensor([[[-0.2296, -0.4769, -0.0592],
         [ 0.0134, -0.5053, -0.6914]]], grad_fn=<TransposeBackward1>)
torch.Size([1, 2, 3])
tensor([[[ 0.0134, -0.5053, -0.6914]]], grad_fn=<StackBackward0>)
torch.Size([1, 1, 3])


output输出为不同时间步的隐状态

In [82]:
# 双向、单层rnn
bi_rnn = nn.RNN(input_size=4, hidden_size=3, num_layers=1, batch_first=True, bidirectional=True)
bi_output, bi_h_n = bi_rnn(input)
print(bi_output, bi_output.shape, bi_h_n, bi_h_n.shape, sep='\n')

tensor([[[ 0.0293, -0.2261,  0.3965, -0.8093,  0.5495, -0.2421],
         [ 0.3172, -0.4707, -0.0548, -0.9589,  0.5515,  0.0759],
         [-0.3819, -0.9026,  0.2700, -0.6062,  0.9286, -0.6791],
         [-0.9650,  0.2898,  0.9175, -0.9964,  0.3749, -0.4732],
         [ 0.4947, -0.6497,  0.0801, -0.3799,  0.8914, -0.4917]],

        [[ 0.1236,  0.6172,  0.5129, -0.9334, -0.7831,  0.1077],
         [ 0.7416,  0.5501,  0.4543, -0.8432, -0.2094, -0.3928],
         [ 0.9069, -0.6283, -0.4312, -0.5202,  0.6983, -0.2993],
         [ 0.2843, -0.9798, -0.5583, -0.0776,  0.9733,  0.1556],
         [-0.9714, -0.1158,  0.7961, -0.9926,  0.1743,  0.1932]],

        [[ 0.8565, -0.8896, -0.7905, -0.4024,  0.6848,  0.4695],
         [-0.2559,  0.0835,  0.7091, -0.7468, -0.3244, -0.6832],
         [-0.3923, -0.4974,  0.4001, -0.9646,  0.8942,  0.0540],
         [ 0.2724, -0.8785, -0.4926, -0.8918,  0.8703,  0.0652],
         [ 0.4889, -0.8752, -0.3374,  0.1035,  0.6077, -0.4534]]],
       grad_fn=<Tra

# 从零手搓 RNN 

### 自定义单向单层RNN Layer

In [83]:
import torch
import torch.nn as nn

对照RNN公式实现RNN Layer
$$
[
\boldsymbol{h}_t = tanh(\boldsymbol{h}_{t-1} \boldsymbol{W}_h + \boldsymbol{x}_t \boldsymbol{W}_x + \boldsymbol{b})
]
$$

In [84]:
class RNNLayer(nn.Module):
    def __init__(self,input_size, hidden_size, num_layers=1, batch_first=True):
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.input_size = input_size
        self.bidirectional = False
        super().__init__()
        self.W_ih = nn.Parameter(torch.rand(self.input_size, self.hidden_size))
        self.W_hh = nn.Parameter(torch.rand(self.hidden_size, self.hidden_size))
        self.b_ih = nn.Parameter(torch.zeros(self.hidden_size))
        self.b_hh = nn.Parameter(torch.zeros(self.hidden_size))
        
    def forward(self,x_t,h_prev=None):
        # part 1: torch.matmul(x_t, self.W_ih)
        # x_t包含多个时间步，形状为[batch_size, time_steps_num, input_dim]
        # W_ih形状为[input_dim, hidden_size]
        # torch.matmul(x_t, self.W_ih) 输出矩阵形状为[batch_size, time_steps_num, hidden_size]
        # part 2: torch.matmul(h_prev, self.W_hh)
        # h_prev 形状为[batch_size, time_steps_num, hidden_size]
        # W_hh形状为[hidden_size, hidden_size]
        # torch.matmul(h_prev, self.W_hh) 输出矩阵形状为[batch_size, time_steps_num, hidden_size]
        if h_prev == None:
             h_prev = torch.zeros( x_t.size(0), self.hidden_size)
        output = torch.tanh(torch.matmul(x_t, self.W_ih) + self.b_ih + torch.matmul(h_prev, self.W_hh) + self.b_hh)
        return output,output[:,-1,:].unsqueeze(0)
        

### 测试输出

In [85]:
# 单向、单层rnn
single_rnn = RNNLayer(input_size=4, hidden_size=3, num_layers=1, batch_first=True) # batch_first=True表示输入数据的维度为[batch_size, time_steps_num, input_dim]
input = torch.randn(1, 5, 4) # 输入数据维度为[batch_size, time_steps_num, input_size]
output,h_n = single_rnn(input) # output维度为[batch_size, time_steps_num, hidden_size=3]，h_n维度为[num_layers=1, batch_size, hidden_size=3]
print(output, output.shape, h_n, h_n.shape,  sep='\n')

tensor([[[-0.5654, -0.4903, -0.5631],
         [-0.9804, -0.9930, -0.9965],
         [-0.6663, -0.3654, -0.2664],
         [ 0.3566, -0.1782,  0.1626],
         [-0.3247, -0.6990, -0.6349]]], grad_fn=<TanhBackward0>)
torch.Size([1, 5, 3])
tensor([[[-0.3247, -0.6990, -0.6349]]], grad_fn=<UnsqueezeBackward0>)
torch.Size([1, 1, 3])


输出结果形状与nn.RNN一致

### 用nn.RNN建立模型

In [119]:
import torch.nn as nn

In [123]:
import torch.nn.functional as F

In [None]:
print(F.one_hot.__doc__)

In [147]:
class CustomRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.vocab_size = output_size# 输入是One hot, output_size和vocab_size 都是词表大小
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, X):
        # 这里的X输入为word index
        X = F.one_hot(torch.tensor(torch.tensor(X)),self.vocab_size)
        X = X.to(torch.float32)
        print(X.size())
        state_0 = torch.zeros(self.num_layers, X.size(0), self.hidden_size).to(X.device) # 隐状态的形状为[层数，batch_size,hidden_size]
        out, state = self.rnn(X, state_0) 
        out = self.fc(out[:, -1, :])  # 取最后一个时间步的输出
        return out

In [129]:
F.one_hot(torch.tensor([1,2]),20)

tensor([[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [130]:
print(F.one_hot.__doc__)


one_hot(tensor, num_classes=-1) -> LongTensor

Takes LongTensor with index values of shape ``(*)`` and returns a tensor
of shape ``(*, num_classes)`` that have zeros everywhere except where the
index of last dimension matches the corresponding value of the input tensor,
in which case it will be 1.

See also `One-hot on Wikipedia`_ .

.. _One-hot on Wikipedia:
    https://en.wikipedia.org/wiki/One-hot

Arguments:
    tensor (LongTensor): class values of any shape.
    num_classes (int):  Total number of classes. If set to -1, the number
        of classes will be inferred as one greater than the largest class
        value in the input tensor.

Returns:
    LongTensor that has one more dimension with 1 values at the
    index of last dimension indicated by the input, and 0 everywhere
    else.

Examples:
    >>> F.one_hot(torch.arange(0, 5) % 3)
    tensor([[1, 0, 0],
            [0, 1, 0],
            [0, 0, 1],
            [1, 0, 0],
            [0, 1, 0]])
    >>> F.one_hot(torch.ar

### 测试模型输出

In [89]:
def predict_ch8(prefix, num_preds, net, vocab, device):
    """在prefix后面生成新字符

    Defined in :numref:`sec_rnn_scratch`"""
    state = net.begin_state(batch_size=1, device=device)
    outputs = [vocab[prefix[0]]]
    get_input =  lambda: torch.tensor([outputs[-1]], device=device).reshape((1, 1))
    for y in prefix[1:]:  # 预热期
        _, state = net(get_input(), state)
        outputs.append(vocab[y])
    for _ in range(num_preds):  # 预测num_preds步
        y, state = net(get_input(), state)
        outputs.append(int(y.argmax(dim=1).reshape(1)))
    return ''.join([vocab.idx_to_token[i] for i in outputs])

In [105]:
def load_data_time_machine(batch_size, num_steps,
                           use_random_iter=False, max_tokens=10000):
    """返回时光机器数据集的迭代器和词表

    Defined in :numref:`sec_language_model`"""
    data_iter = SeqDataLoader(
        batch_size, num_steps, use_random_iter, max_tokens)
    return data_iter, data_iter.vocab

In [None]:

def load_corpus_time_machine(max_tokens=-1):
    """返回时光机器数据集的词元索引列表和词表

    Defined in :numref:`sec_text_preprocessing`"""
    lines = read_time_machine()
    tokens = tokenize(lines, 'char')
    vocab = Vocab(tokens)
    # 因为时光机器数据集中的每个文本行不一定是一个句子或一个段落，
    # 所以将所有文本行展平到一个列表中
    corpus = [vocab[token] for line in tokens for token in line]
    if max_tokens > 0:
        corpus = corpus[:max_tokens]
    return corpus, vocab

In [None]:
def seq_data_iter_sequential(corpus, batch_size, num_steps):
    """使用顺序分区生成一个小批量子序列

    Defined in :numref:`sec_language_model`"""
    # 从随机偏移量开始划分序列
    offset = random.randint(0, num_steps)
    num_tokens = ((len(corpus) - offset - 1) // batch_size) * batch_size
    Xs = d2l.tensor(corpus[offset: offset + num_tokens])
    Ys = d2l.tensor(corpus[offset + 1: offset + 1 + num_tokens])
    Xs, Ys = Xs.reshape(batch_size, -1), Ys.reshape(batch_size, -1)
    num_batches = Xs.shape[1] // num_steps
    for i in range(0, num_steps * num_batches, num_steps):
        X = Xs[:, i: i + num_steps]
        Y = Ys[:, i: i + num_steps]
        yield X, Y

In [104]:
class SeqDataLoader:
    """加载序列数据的迭代器"""
    def __init__(self, batch_size, num_steps, use_random_iter, max_tokens):
        """Defined in :numref:`sec_language_model`"""
        if use_random_iter:
            self.data_iter_fn = d2l.seq_data_iter_random
        else:
            self.data_iter_fn = d2l.seq_data_iter_sequential
        self.corpus, self.vocab = d2l.load_corpus_time_machine(max_tokens)
        self.batch_size, self.num_steps = batch_size, num_steps

    def __iter__(self):
        return self.data_iter_fn(self.corpus, self.batch_size, self.num_steps)

In [100]:
batch_size, num_steps = 32, 35
train_iter, vocab = d2l.load_data_time_machine(batch_size, num_steps)

AttributeError: module 'd2l' has no attribute 'load_data_time_machine'

In [145]:
model = CustomRNN(256, 10,1,256)
predict_ch8('time traveller ', 10, model, vocab, d2l.try_gpu())

NameError: name 'vocab' is not defined

In [182]:
model = CustomRNN(256, 10,1,256)
Y = model([[13,12,14]])

torch.Size([1, 3, 256])


  X = F.one_hot(torch.tensor(torch.tensor(X)),self.vocab_size)


In [183]:
Y.size()

torch.Size([1, 256])

In [164]:
index = Y.argmax(dim=1)

In [165]:
idx_to_char[index]

'动'

In [190]:
def predict(init_chars,model,time_steps_num,idx_to_char,char_to_idx):
    X = []
    for c in init_chars:
        X.append(char_to_idx[c])
    output = init_chars
    print(X)
    for i in range(time_steps_num):
        Y= model([X])
        idx = Y.argmax(dim=1)
        X.append(idx)
        output+=idx_to_char[idx]
    return output

In [None]:
idx_to_char

In [192]:
predict('构', model,5,idx_to_char,char_to_idx)

[12]
torch.Size([1, 1, 256])
torch.Size([1, 2, 256])
torch.Size([1, 3, 256])
torch.Size([1, 4, 256])
torch.Size([1, 5, 256])


  X = F.one_hot(torch.tensor(torch.tensor(X)),self.vocab_size)


'构3将连将连'

In [76]:
class CustomRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(CustomRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.input_size = input_size
        self.rnn = RNNLayer(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(self.hidden_size, output_size)

    def forward(self, X, h_prev):
        
        print(x.size())
        batch_size, seq_length, _ = x.size()
        hiddens = []
        for t in range(seq_length):
            x_t = x[:, t, :]
            h_t = torch.tanh(torch.mm(x_t, self.W_ih) + self.b_ih + torch.mm(h_prev, self.W_hh) + self.b_hh)
            hiddens.append(h_t)
            h_prev = h_t
        h_final = hiddens[-1]
        output = self.fc(h_final)
        return output, h_prev

    def init_hidden(self, batch_size):
        return torch.zeros(batch_size, self.hidden_size)




In [65]:
#RNNLayer(input_size=input_size, hidden_size=hidden_size, num_layers=1, batch_first=True)
num_hiddens = 256
# rnn_layer = nn.LSTM(input_size=vocab_size, hidden_size=num_hiddens) # 已测试
rnn_layer = RNNLayer(input_size=vocab_size, hidden_size=num_hiddens)
num_hiddens = 256
# rnn_layer = nn.LSTM(input_size=vocab_size, hidden_size=num_hiddens) # 已测试
rnn_layer = nn.RNN(input_size=vocab_size, hidden_size=num_hiddens)
# 本类已保存在d2lzh_pytorch包中方便以后使用
class RNNModel(nn.Module):
    def __init__(self, rnn_layer, vocab_size):
        super(RNNModel, self).__init__()
        self.rnn = rnn_layer
        self.hidden_size = rnn_layer.hidden_size * (2 if rnn_layer.bidirectional else 1) 
        self.vocab_size = vocab_size
        self.dense = nn.Linear(self.hidden_size, vocab_size)
        self.state = None

    def forward(self, inputs, state): # inputs: (batch, seq_len)
        # 获取one-hot向量表示
        X = nn.functional.one_hot(inputs, self.vocab_size) # X是个list
        Y, self.state = self.rnn(X, state)
        # 全连接层会首先将Y的形状变成(num_steps * batch_size, num_hiddens)，它的输出
        # 形状为(num_steps * batch_size, vocab_size)
        output = self.dense(Y.view(-1, Y.shape[-1]))
        return output, self.state

In [None]:
print(nn.functional.one_hot.__doc__)

In [59]:
torch.arange(0, 6).view(3,2) % 3

tensor([[0, 1],
        [2, 0],
        [1, 2]])

In [154]:
def load_data_lyrics():
    #with zipfile.ZipFile('./test.txt') as zin:
    with open('test.txt') as f:
            corpus_chars = f.read()#.decode('utf-8')
    # corpus_chars[:40]  # '想要有直升机\n想要和你飞到宇宙去\n想要和你融化在一起\n融化在宇宙里\n我每天每天每'

    # 将换行符替换成空格；仅使用前1万个字符来训练模型
    corpus_chars = corpus_chars.replace('\n', ' ').replace('\r', ' ')
    corpus_chars = corpus_chars[0:10000]

    # 将每个字符映射成索引
    idx_to_char = list(set(corpus_chars))
    char_to_idx = dict([(char, i) for i, char in enumerate(idx_to_char)])
    vocab_size = len(char_to_idx)  # 1027
    corpus_indices = [char_to_idx[char] for char in corpus_chars]
    sample = corpus_indices[:20]
    return corpus_indices, char_to_idx, idx_to_char, vocab_size


In [155]:
(corpus_indices, char_to_idx, idx_to_char, vocab_size) = load_data_lyrics()

In [162]:
idx_to_char[:5]

['R', 'n', '节', 'A', '”']

In [163]:
char_to_idx

{'R': 0,
 'n': 1,
 '节': 2,
 'A': 3,
 '”': 4,
 '协': 5,
 '态': 6,
 '征': 7,
 '自': 8,
 'y': 9,
 '访': 10,
 '队': 11,
 '构': 12,
 '应': 13,
 '解': 14,
 '言': 15,
 '批': 16,
 '次': 17,
 '持': 18,
 '做': 19,
 '另': 20,
 '题': 21,
 '概': 22,
 '正': 23,
 '回': 24,
 '无': 25,
 '度': 26,
 '能': 27,
 '将': 28,
 '和': 29,
 '共': 30,
 '远': 31,
 '/': 32,
 '问': 33,
 '需': 34,
 '开': 35,
 'P': 36,
 '处': 37,
 '息': 38,
 '符': 39,
 '列': 40,
 '不': 41,
 '3': 42,
 '按': 43,
 'p': 44,
 '件': 45,
 '含': 46,
 '术': 47,
 '是': 48,
 '.': 49,
 '方': 50,
 '缺': 51,
 '中': 52,
 '链': 53,
 '何': 54,
 '式': 55,
 '风': 56,
 't': 57,
 '码': 58,
 '量': 59,
 '仅': 60,
 'J': 61,
 '递': 62,
 'h': 63,
 '增': 64,
 '知': 65,
 '刚': 66,
 '间': 67,
 '享': 68,
 '具': 69,
 '面': 70,
 '为': 71,
 '客': 72,
 '调': 73,
 '思': 74,
 '反': 75,
 '压': 76,
 '视': 77,
 '明': 78,
 '移': 79,
 'W': 80,
 '型': 81,
 '务': 82,
 '跳': 83,
 '成': 84,
 '足': 85,
 '性': 86,
 '计': 87,
 '够': 88,
 '公': 89,
 '操': 90,
 '念': 91,
 '）': 92,
 '始': 93,
 '直': 94,
 '行': 95,
 '子': 96,
 '体': 97,
 '名': 98,
 '缓': 99,
 '标': 100,

In [48]:
def to_onehot(X, n_class):  
    # X shape: (batch, seqd_len), output: seq_len elements of (batch, n_class)
    return [one_hot(X[:, i], n_class) for i in range(X.shape[1])]


In [77]:
# 本函数已保存在d2lzh_pytorch包中方便以后使用
def predict_rnn_pytorch(prefix, num_chars, model, vocab_size, device, idx_to_char,
                      char_to_idx):
    state = None
    output = [char_to_idx[prefix[0]]] # output会记录prefix加上输出
    for t in range(num_chars + len(prefix) - 1):
        X = torch.tensor([output[-1]], device=device).view(1, 1)
        if state is not None:
            if isinstance(state, tuple): # LSTM, state:(h, c)  
                state = (state[0].to(device), state[1].to(device))
            else:   
                state = state.to(device)
            
        (Y, state) = model(X, state)
        if t < len(prefix) - 1:
            output.append(char_to_idx[prefix[t + 1]])
        else:
            output.append(int(Y.argmax(dim=1).item()))
    return ''.join([idx_to_char[i] for i in output])

In [78]:
device='cuda:0'
model = RNNModel(rnn_layer, vocab_size).to(device)
predict_rnn_pytorch('分开', 10, model, vocab_size, device, idx_to_char, char_to_idx)

RuntimeError: mat1 and mat2 must have the same dtype

## 训练

### 数据预处理
- 读取数据集：
  首先需要获取周杰伦的歌词数据集，可以从网络上搜索整理其歌词文本，将所有歌词保存到一个文本文件中，如jaychou_lyrics.txt。使用 Python 的open()函数读取文件内容，并进行必要的字符编码转换 。
- 建立字符索引：
将歌词中的每个字符映射为一个从 0 开始的连续整数索引，构建字符到索引的字典char_to_idx以及索引到字符的字典idx_to_char。通过遍历歌词文本，找出所有不同的字符，然后为每个字符分配一个唯一的索引。同时，可以得到词典大小vocab_size，即不同字符的数量 。
- 数据采样:

  对处理后的数据进行采样，以便生成训练所需的小批量数据。常见的采样方式有随机采样和相邻采样两种 ：
- 随机采样：
  每次从数据中随机选择一定长度的连续字符序列作为一个样本，同时对应的下一个字符作为该样本的标签。例如，若设定时间步数为num_steps，则每次随机选取num_steps个连续字符作为输入样本，其后面的一个字符作为输出标签。
相邻采样：按照顺序依次选取连续的字符序列作为样本和标签，即第i个样本的输入是从i到i + num_steps - 1的字符序列，其标签则是从i + 1到i + num_steps的字符序列。

In [34]:

# 假设输入歌词维度、隐藏层维度、层数、输出维度等
input_size = 100
hidden_size = 256
num_layers = 1
output_size = 100
rnn = CustomRNN(input_size, hidden_size, num_layers, output_size)

# 模拟输入数据（实际要根据歌词进行词向量等转换），这里假设一批次2条数据，序列长度5，维度为input_size
x = torch.randn(2, 5, input_size)
optimizer = optim.Adam(rnn.parameters(), lr=0.001)
criterion = nn.MSELoss()

for epoch in range(10):
    h_prev = rnn.init_hidden(2)
    output, h_prev = rnn(x, h_prev)
    loss = criterion(output, torch.randn(2, output_size))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    print(f'Epoch {epoch}, Loss: {loss.item()}')


NameError: name 'CustomRNN' is not defined

### 模型训练
参数初始化：初始化模型的参数，如词嵌入维度embedding_dim、隐藏层维度hidden_dim等，并定义损失函数和优化器。例如，可以使用交叉熵损失函数nn.CrossEntropyLoss()和随机梯度下降优化器torch.optim.SGD() 。
训练循环：在训练循环中，按照设定的批次大小和采样方式获取训练数据，将数据输入到模型中进行前向传播，计算损失值，然后使用优化器进行反向传播更新模型参数。在每个训练周期，可以打印出当前的损失值，以观察模型的训练进度 。

In [1]:
def train(model, data_loader, criterion, optimizer, num_epochs):
    for epoch in range(num_epochs):
        hidden = None
        total_loss = 0
        for batch_x, batch_y in data_loader:
            optimizer.zero_grad()
            output, hidden = model(batch_x, hidden)
            loss = criterion(output, batch_y.view(-1))
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5)
            optimizer.step()
            total_loss += loss.item()
            hidden = hidden.detach()
        print(f'Epoch {epoch + 1}, Loss: {total_loss / len(data_loader)}')

### 模型测试与效果评估
- 生成歌词：训练完成后，可以使用训练好的模型来生成周杰伦风格的歌词。给定一个起始字符或字符序列，通过模型预测下一个可能的字符，然后将预测的字符作为新的输入，继续预测下一个字符，以此类推，生成一段歌词 。

In [2]:
def generate_text(model, char_to_idx, idx_to_char, start_text, length):
    model.eval()
    with torch.no_grad():
        input_text = torch.tensor([char_to_idx[char] for char in start_text]).unsqueeze(0)
        hidden = None
        generated_text = start_text
        for _ in range(length):
            output, hidden = model(input_text, hidden)
            output_probs = torch.softmax(output, dim=1)
            top_prob, top_idx = torch.topk(output_probs, k=1)
            top_char = idx_to_char[top_idx.item()]
            generated_text += top_char
            input_text = torch.tensor([top_idx]).unsqueeze(0)
        return generated_text

- 效果评估：可以从多个角度评估生成歌词的效果，如歌词的通顺性、连贯性、是否符合周杰伦的风格等。一种简单的方法是人工观察和评价生成的歌词，判断其是否具有一定的合理性和艺术感。也可以使用一些自动评估指标，如困惑度（Perplexity）等来定量地评估模型的性能，但困惑度指标并非完全能够准确反映生成文本的质量，仅供参考.

In [3]:
def calculate_perplexity(model, data_loader, criterion):
    model.eval()
    total_loss = 0
    total_count = 0
    with torch.no_grad():
        for batch_x, batch_y in data_loader:
            output, _ = model(batch_x, None)
            loss = criterion(output, batch_y.view(-1))
            total_loss += loss.item() * batch_y.numel()
            total_count += batch_y.numel()
    return torch.exp(torch.tensor(total_loss / total_count))

通过以上步骤，就可以利用周杰伦的歌词训练 PyTorch RNN 模型，并对生成歌词的效果进行测试和评估 。需要注意的是，由于歌词的生成具有一定的主观性和创造性，模型的表现可能会因多种因素而有所不同，可通过调整模型结构、参数、训练数据等方式来进一步优化模型的性能 。

In [None]:
s