In [12]:
import torch 
from torch import nn
from torch.nn import functional as F

import sys
sys.path.append('..')
from d2l_helpers import *

In [13]:
batch_size, num_steps = 32, 35
train_iter, vocab = load_data_time_machine(batch_size, num_steps)

In [14]:
X = torch.arange(10).reshape((2, 5))
F.one_hot(X.T, 28).shape

torch.Size([5, 2, 28])

In [15]:
def get_params(vocab_size, num_hiddens, device):
    num_inputs = num_outputs = vocab_size

    def normal(shape):
        return torch.randn(size=shape, device=device) * 0.01

    # 隐藏层参数
    W_xh = normal((num_inputs, num_hiddens))
    W_hh = normal((num_hiddens, num_hiddens))
    b_h = torch.zeros(num_hiddens, device=device)
    # 输出层参数
    W_hq = normal((num_hiddens, num_outputs))
    b_q = torch.zeros(num_outputs, device=device)
    # 附加梯度
    params = [W_xh, W_hh, b_h, W_hq, b_q]
    for param in params:
        param.requires_grad_(True)
    return params

In [16]:
def init_rnn_state(batch_size, num_hiddens, device):
    return (torch.zeros((batch_size, num_hiddens), device=device), )

In [17]:
def rnn(inputs, state, params):
    # inputs的形状：(时间步数量，批量大小，词表大小)
    W_xh, W_hh, b_h, W_hq, b_q = params
    H, = state
    outputs = []
    # X的形状：(批量大小，词表大小)
    for X in inputs:
        H = torch.tanh(torch.mm(X, W_xh) + torch.mm(H, W_hh) + b_h)
        Y = torch.mm(H, W_hq) + b_q
        outputs.append(Y)
    return torch.cat(outputs, dim=0), (H,)

In [18]:
class RNNModelScratch: #@save
    """从零开始实现的循环神经网络模型"""
    def __init__(self, vocab_size, num_hiddens, device,
                 get_params, init_state, forward_fn):
        self.vocab_size, self.num_hiddens = vocab_size, num_hiddens
        self.params = get_params(vocab_size, num_hiddens, device)
        self.init_state, self.forward_fn = init_state, forward_fn

    def __call__(self, X, state):
        X = F.one_hot(X.T, self.vocab_size).type(torch.float32)
        return self.forward_fn(X, state, self.params)

    def begin_state(self, batch_size, device):
        return self.init_state(batch_size, self.num_hiddens, device)

In [19]:
num_hiddens = 512
net = RNNModelScratch(len(vocab), num_hiddens, get_device(), get_params,
                      init_rnn_state, rnn)
state = net.begin_state(X.shape[0], get_device())
Y, new_state = net(X.to(get_device()), state)
Y.shape, len(new_state), new_state[0].shape

(torch.Size([10, 28]), 1, torch.Size([2, 512]))

In [20]:
predict_rnn('time traveller', 10, net, vocab, get_device())

'time travellerrrrrrrrrrr'

In [21]:
num_epochs, lr = 500, 1
train_rnn(net, train_iter, vocab, lr, num_epochs, get_device())

time traveller the the the the the the the the the the the the t
epoch 10, perplexity 13.6
time travellere the the the the the the the the the the the the 
epoch 20, perplexity 10.6
time travellere the the the the the the the the the the the the 
epoch 30, perplexity 9.6


KeyboardInterrupt: 

In [11]:
train_rnn(net, train_iter, vocab, lr, num_epochs, get_device(), use_random_iter=True)

time travellerit s against reason said the medical manour ancest
epoch 10, perplexity 2.0
time travellerit s against reason said filbycan ascuble he have 
epoch 20, perplexity 1.7
time travellerit s against reason said filby but you willnever c
epoch 30, perplexity 1.8
time traveller proc ensong bre weing wisereell simely thing he t
epoch 40, perplexity 1.8
time traveller but now you begin to seethe object of my investig
epoch 50, perplexity 1.6
time travellerit s alain th saldithis ction at reger mace ane or
epoch 60, perplexity 1.8
time travellerit s againstarthere and for the frese suin ceqvedi
epoch 70, perplexity 1.6
time travellerit s against reason said filbychat whis ho d wist 
epoch 80, perplexity 1.6
time traveller proceeded anyreal body must have extension in fou
epoch 90, perplexity 1.6
time traveller proceeded anyreal body must havelength breadth th
epoch 100, perplexity 1.5
time travellerit s against reason said filbywon a kfowr his regl
epoch 110, perplexity 1.7
time tra