## RNN原理以及API

### PyTorch API

In [2]:
import torch
import torch.nn as nn

1 单向、单层RNN

In [3]:
single_rnn = nn.RNN(4, 3, 1, batch_first=True)
input = torch.randn(1, 2, 4)  # bs * sl * fs
output, h_n = single_rnn(input)
print(output)
print(h_n)

tensor([[[-0.7447,  0.9569, -0.0550],
         [ 0.1866, -0.1136, -0.7676]]], grad_fn=<TransposeBackward1>)
tensor([[[ 0.1866, -0.1136, -0.7676]]], grad_fn=<StackBackward0>)


2 双向、单层RNN

In [4]:
bidirectional_rnn = nn.RNN(4, 3, 1, batch_first=True, bidirectional=True)
bi_output, bi_h_n = bidirectional_rnn(input)
print(bi_output)
print(bi_h_n)

tensor([[[ 0.5428,  0.3343, -0.3668,  0.4381, -0.5255,  0.6674],
         [-0.4299, -0.7727, -0.1020,  0.5534,  0.3776, -0.1801]]],
       grad_fn=<TransposeBackward1>)
tensor([[[-0.4299, -0.7727, -0.1020]],

        [[ 0.4381, -0.5255,  0.6674]]], grad_fn=<StackBackward0>)


### 实现单向RNN

定义常量

In [5]:
bs, T = 2, 3  # 批大小，输入序列长度

input_size, hidden_size = 2, 3  # 输入特征大小, 隐藏层大小
input = torch.randn(bs, T, input_size)  # 随机初始化一个输入特征序列
h_prev = torch.zeros(bs, hidden_size)  # 初始隐含状态

In [6]:
rnn = nn.RNN(input_size, hidden_size, batch_first=True)
rnn_output, state_final = rnn(input, h_prev.unsqueeze(0))

print(rnn_output)
print(state_final)

tensor([[[ 0.6003,  0.6859, -0.7012],
         [ 0.7660,  0.7241, -0.6936],
         [ 0.7001, -0.5198,  0.5499]],

        [[-0.6602,  0.6324, -0.5846],
         [-0.4041,  0.7759, -0.8506],
         [ 0.2483,  0.4718, -0.6424]]], grad_fn=<TransposeBackward1>)
tensor([[[ 0.7001, -0.5198,  0.5499],
         [ 0.2483,  0.4718, -0.6424]]], grad_fn=<StackBackward0>)


In [10]:
def rnn_forward(input, weight_ih, bias_ih, weight_hh, bias_hh, h_prev):
    bs, T, input_size = input.shape
    h_dim = weight_ih.shape[0]
    h_out = torch.zeros(bs, T, h_dim)  # 初始化输出（状态）矩阵

    for t in range(T):
        x = input[:, t, :].unsqueeze(2)  # 获取当前时刻输入 bs * input_size * 1
        w_ih_batch = weight_ih.unsqueeze(0).tile(bs, 1, 1)  # bs * h_dim * input_size
        w_hh_batch = weight_hh.unsqueeze(0).tile(bs, 1, 1)  # bs * h_dim * h_dim

        w_times_x = torch.bmm(w_ih_batch, x).squeeze()  # bs * h_dim
        w_times_h = torch.bmm(w_hh_batch, h_prev.unsqueeze(2)).squeeze()  # bs * h_dim
        h_prev = torch.tanh(w_times_x + bias_ih + w_times_h + bias_hh)

        h_out[:, t, :] = h_prev

    return h_out, h_prev.unsqueeze(0)

In [8]:
for k, v in rnn.named_parameters():
    print(k, v)

weight_ih_l0 Parameter containing:
tensor([[ 0.2882, -0.4006],
        [-0.4664, -0.2001],
        [ 0.4187,  0.2187]], requires_grad=True)
weight_hh_l0 Parameter containing:
tensor([[ 0.3319, -0.1043, -0.4763],
        [-0.0804,  0.0795, -0.3260],
        [ 0.3775, -0.0891,  0.4441]], requires_grad=True)
bias_ih_l0 Parameter containing:
tensor([ 0.2358,  0.3048, -0.3172], requires_grad=True)
bias_hh_l0 Parameter containing:
tensor([-0.0119, -0.1967,  0.1734], requires_grad=True)


将官方API创建的RNN参数放入我们实现的RNN中

In [11]:
custom_rnn_output, custom_state_final = rnn_forward(input, rnn.weight_ih_l0, rnn.bias_ih_l0,
                                                    rnn.weight_hh_l0, rnn.bias_hh_l0, h_prev)

print(custom_rnn_output)
print(custom_state_final)

tensor([[[ 0.6003,  0.6859, -0.7012],
         [ 0.7660,  0.7241, -0.6936],
         [ 0.7001, -0.5198,  0.5499]],

        [[-0.6602,  0.6324, -0.5846],
         [-0.4041,  0.7759, -0.8506],
         [ 0.2483,  0.4718, -0.6424]]], grad_fn=<CopySlices>)
tensor([[[ 0.7001, -0.5198,  0.5499],
         [ 0.2483,  0.4718, -0.6424]]], grad_fn=<UnsqueezeBackward0>)


### 实现双向RNN

In [12]:
def bidirectional_rnn_forward(input, weight_ih, bias_ih, weight_hh, bias_hh, h_prev,
                              weight_ih_reverse, bias_ih_reverse, weight_hh_reverse,
                              bias_hh_reverse, h_prev_reverse):
    bs, T, input_size = input.shape
    h_dim = weight_ih.shape[0]
    h_out = torch.zeros(bs, T, h_dim*2)  # 初始化输出（状态）矩阵, 双向是两倍的特征大小

    forward_output = rnn_forward(input, weight_ih, bias_ih, weight_hh, bias_hh, h_prev)[0]
    backward_output = rnn_forward(torch.flip(input, [1]), weight_ih_reverse, bias_ih_reverse, weight_hh_reverse,
                bias_hh_reverse, h_prev_reverse)[0]

    h_out[:, :, :h_dim] = forward_output
    h_out[:, :, h_dim:] = backward_output

    return h_out, h_out[:, -1, :].reshape(bs, 2, h_dim).transpose(0, 1)

In [14]:
bi_rnn = nn.RNN(input_size, hidden_size, batch_first=True, bidirectional=True)
h_prev = torch.zeros(2, bs, hidden_size)
bi_rnn_output, bi_state_final = bi_rnn(input, h_prev)

print(bi_rnn_output)
print(bi_state_final)

tensor([[[-0.8440, -0.0569, -0.8015, -0.4325, -0.3314,  0.5379],
         [-0.8681,  0.1661, -0.7116, -0.2285, -0.2285,  0.2005],
         [-0.9298, -0.1148,  0.1273,  0.9305,  0.8283, -0.2040]],

        [[ 0.5569, -0.2113, -0.7327,  0.5457, -0.7249,  0.4445],
         [ 0.3946, -0.6107, -0.2400,  0.2806, -0.6004,  0.2373],
         [-0.6069, -0.6419, -0.0389,  0.6437,  0.1966,  0.0750]]],
       grad_fn=<TransposeBackward1>)
tensor([[[-0.9298, -0.1148,  0.1273],
         [-0.6069, -0.6419, -0.0389]],

        [[-0.4325, -0.3314,  0.5379],
         [ 0.5457, -0.7249,  0.4445]]], grad_fn=<StackBackward0>)


In [15]:
for k, v in bi_rnn.named_parameters():
    print(k, v)

weight_ih_l0 Parameter containing:
tensor([[-0.5625,  0.4296],
        [-0.0916, -0.0864],
        [ 0.3384,  0.1789]], requires_grad=True)
weight_hh_l0 Parameter containing:
tensor([[ 0.3780,  0.2811, -0.1564],
        [-0.5480,  0.3370,  0.2068],
        [ 0.4430, -0.2981, -0.5468]], requires_grad=True)
bias_ih_l0 Parameter containing:
tensor([-0.4369,  0.1689, -0.4814], requires_grad=True)
bias_hh_l0 Parameter containing:
tensor([-0.5010, -0.4525, -0.0309], requires_grad=True)
weight_ih_l0_reverse Parameter containing:
tensor([[ 0.3608,  0.4691],
        [ 0.5268,  0.0841],
        [-0.1010, -0.1879]], requires_grad=True)
weight_hh_l0_reverse Parameter containing:
tensor([[ 0.0084, -0.2417, -0.5126],
        [-0.3220,  0.2346, -0.3049],
        [ 0.2511, -0.5149,  0.1071]], requires_grad=True)
bias_ih_l0_reverse Parameter containing:
tensor([0.5699, 0.1016, 0.4650], requires_grad=True)
bias_hh_l0_reverse Parameter containing:
tensor([ 0.1358,  0.1725, -0.3583], requires_grad=True)


In [16]:
custom_bi_rnn_output, custom_bi_state_final = bidirectional_rnn_forward(input, bi_rnn.weight_ih_l0, bi_rnn.bias_ih_l0,
                                                                  bi_rnn.weight_hh_l0, bi_rnn.bias_hh_l0, h_prev[0],
                                                                  bi_rnn.weight_ih_l0_reverse, bi_rnn.bias_ih_l0_reverse,
                                                                  bi_rnn.weight_hh_l0_reverse, bi_rnn.bias_hh_l0_reverse,
                                                                  h_prev[1])
print(custom_bi_rnn_output)
print(custom_bi_state_final)

tensor([[[-0.8440, -0.0569, -0.8015,  0.9305,  0.8283, -0.2040],
         [-0.8681,  0.1661, -0.7116, -0.2285, -0.2285,  0.2005],
         [-0.9298, -0.1148,  0.1273, -0.4325, -0.3314,  0.5379]],

        [[ 0.5569, -0.2113, -0.7327,  0.6437,  0.1966,  0.0750],
         [ 0.3946, -0.6107, -0.2400,  0.2806, -0.6004,  0.2373],
         [-0.6069, -0.6419, -0.0389,  0.5457, -0.7249,  0.4445]]],
       grad_fn=<CopySlices>)
tensor([[[-0.9298, -0.1148,  0.1273],
         [-0.6069, -0.6419, -0.0389]],

        [[-0.4325, -0.3314,  0.5379],
         [ 0.5457, -0.7249,  0.4445]]], grad_fn=<TransposeBackward0>)
