<a href="https://colab.research.google.com/github/yananma/5_programs_per_day/blob/master/02152.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 6.5 循环神经网络的简洁实现

In [0]:
import math 
import numpy as np 
import torch
from torch import nn, optim 
import torch.nn.functional as F 
import d2l 
import time 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [0]:
!mkdir ../../data

In [4]:
!git clone https://github.com/ShusenTang/Dive-into-DL-PyTorch.git

Cloning into 'Dive-into-DL-PyTorch'...
remote: Enumerating objects: 1692, done.[K
remote: Total 1692 (delta 0), reused 0 (delta 0), pack-reused 1692[K
Receiving objects: 100% (1692/1692), 25.29 MiB | 33.54 MiB/s, done.
Resolving deltas: 100% (975/975), done.


In [0]:
!cp Dive-into-DL-PyTorch/data/jaychou_lyrics.txt.zip ../../data

In [0]:
(corpus_indices, char_to_idx, idx_to_char, vocab_size) = d2l.load_data_jay_lyrics()

### 6.5.1 定义模型

In [0]:
num_hiddens = 256 
rnn_layer = nn.RNN(input_size=vocab_size, hidden_size=num_hiddens)

In [8]:
num_steps = 35 
batch_size = 2 
state = None 
X = torch.rand(num_steps, batch_size, vocab_size)
Y, state_new = rnn_layer(X, state)
Y.shape, len(state_new), state_new[0].shape

(torch.Size([35, 2, 256]), 1, torch.Size([2, 256]))

In [0]:
class RNNModel(nn.Module):
    def __init__(self, rnn_layer, vocab_size):
        super(RNNModel, self).__init__()
        self.rnn = rnn_layer 
        self.hidden_size = rnn_layer.hidden_size * (2 if rnn_layer.bidirectional else 1)
        self.vocab_size = vocab_size 
        self.dense = nn.Linear(self.hidden_size, vocab_size)
        self.state = None 

    def forward(self, inputs, state):
        X = d2l.to_onehot(inputs, self.vocab_size)
        Y, self.state = self.rnn(torch.stack(X), state)
        output = self.dense(Y.view(-1, Y.shape[-1]))
        return output, self.state

### 6.5.2 训练模型

In [0]:
def predict_rnn_pytorch(prefix, num_chars, model, vocab_size, device, idx_to_char, char_to_idx):
    state = None 
    output = [char_to_idx[prefix[0]]]
    for t in range(num_chars + len(prefix) - 1):
        X = torch.tensor([output[-1]], device=device).view(1, 1)
        if state is not None:
            if isinstance(state, tuple):
                state = (state[0].to(device), state[1].to(device))
            else:
                state = state.to(device)

        (Y, state) = model(X, state)
        if t < len(prefix) - 1:
            output.append(char_to_idx[prefix[t + 1]])
        else:
            output.append(int(Y.argmax(dim=1).item()))
    return ''.join([idx_to_char[i] for i in output])

In [11]:
model = RNNModel(rnn_layer, vocab_size).to(device)
predict_rnn_pytorch('分开', 10, model, vocab_size, device, idx_to_char, char_to_idx)

'分开天辛天天如蝶封如心前'

In [0]:
def train_and_predict_rnn_pytorch(model, num_hiddens, vocab_size, device, corpus_indices, idx_to_char, char_to_idx, 
                num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes):
    loss = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    model.to(device)
    state = None 
    for epoch in range(num_epochs):
        l_sum, n, start = 0.0, 0, time.time()
        data_iter = d2l.data_iter_consecutive(corpus_indices, batch_size, num_steps, device)
        for X, Y in data_iter:
            if state is not None:
                if isinstance (state, tuple):
                    state = (state[0].detach(), state[1].detach())
                else:
                    state = state.detach()
            (output, state) = model(X, state)
            y = torch.transpose(Y, 0, 1).contiguous().view(-1)
            l = loss(output, y.long())
            optimizer.zero_grad()
            l.backward()
            d2l.grad_clipping(model.parameters(), clipping_theta, device)
            optimizer.step()
            l_sum += l.item() * y.shape[0]
            n += y.shape[0]
            
        try:
            perplexity = math.exp(l_sum / n)
        except OverflowError:
            perplexity = float('inf')
        if (epoch + 1) % pred_period == 0:
            print('epoch %d, perplexity %f, time %.2f sec' 
                % (epoch + 1, perplexity, time.time() - start))
            for prefix in prefixes:
                print(' -', predict_rnn_pytorch(prefix, pred_len, model, vocab_size, device, 
                            idx_to_char, char_to_idx))

In [14]:
num_epochs, batch_size, lr, clipping_theta = 250, 32, 1e-3, 1e-2 
pred_period, pred_len, prefixes = 50, 50, ['分开', '不分开']
train_and_predict_rnn_pytorch(model, num_hiddens, vocab_size, device, corpus_indices, idx_to_char, char_to_idx, 
            num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes)

epoch 50, perplexity 10.818418, time 0.14 sec
 - 分开始我不  想 我不能再想 我不能再想 我不要再想 我不要再想 我不要再想 我不要再想 我不要再想 我
 - 不分开 我不能再想 我不要再想 我不要再想 我不要再想 我不要再想 我不要再想 我不要再想 我不要再想 我
epoch 100, perplexity 1.253825, time 0.14 sec
 - 分开始我不错搞错 拜托 我想是你的脑袋有问题 随便说说 其实我早已经猜透看透不想多说 只是我怕眼泪撑不住
 - 不分开不能承想你 是你是你 别不了 想要再这样打我妈妈 难道你手不会痛吗 其实我回家就想要阻止一切 让家庭
epoch 150, perplexity 1.063882, time 0.14 sec
 - 分开始我不多痛熬 心伤透看到  什么都会有快使用双截棍 哼哼哈兮 快使用双截棍 哼哼哈兮 如果我有轻功 
 - 不分开不了太多就我 就和你 让它一定的你 原时日记 你的手知后口让我知道 就是开不了口让她知道 就是开不了
epoch 200, perplexity 1.031009, time 0.14 sec
 - 分开 我不多痛熬  我球你爸 我打我妈妈 我说你爸你 打我妈 这样 吗干嘛这样 何必让酒牵鼻子走 瞎 说
 - 不分开不了太多就我 无和汉常 我想就你已经我的我都会你烦爱  这样没担忧伤 难多 我跟一直到口睡著一口被老
epoch 250, perplexity 1.019579, time 0.14 sec
 - 分开 我不懂 你的黑色幽默 想通 却又再考倒我 说散 你想很久了吧? 我不想拆穿你 当作 是你开的玩笑 
 - 不分开不了太多就我 一定一 从小到大你在 有的路从 时间变你 心老 看不著去想这样没担忧 唱着歌 一直走 
