In [2]:
#官网示例
import torch

import torch.nn as nn
rnn = nn.RNN(10, 20, 2) #input_size * hidden_size * num_layers
input = torch.randn(5, 3, 10) #seq_len * batch_size * input_size
h0 = torch.randn(2, 3, 20) #D*numlayers, batch_size, hidden_size 单向D=1,双向D=2
output, hn = rnn(input, h0) #

In [3]:
import torch
# 单向、单层RNN
import torch.nn as nn
# 1.单向、单层RNN
single_rnn = nn.RNN(4, 3, 1,batch_first = True)
input = torch.randn(1, 2, 4)  #bs * sl *feature_size
output, h_n = single_rnn(input, )
print(output)  # 1* 2* 3 bs*sl*output_size
print(h_n) #1*1*3  1*1,1,output_size

tensor([[[ 0.2096, -0.0761,  0.5804],
         [ 0.5766, -0.2917,  0.8847]]], grad_fn=<TransposeBackward1>)
tensor([[[ 0.5766, -0.2917,  0.8847]]], grad_fn=<StackBackward>)


In [5]:
#2.双向、单层RNN
from unicodedata import bidirectional

bidirectional_rnn = nn.RNN(4,3,1,batch_first = True, bidirectional = True)
bi_output,bi_h_n = bidirectional_rnn(input)
print(bi_output.shape)  #1,2,6 6是因为最后把forward和backward layer两个输出拼起来
print(bi_h_n.shape)   #2,1,3


torch.Size([1, 2, 6])
torch.Size([2, 1, 3])


$$
h_{t}=\tanh \left(x_{t} W_{i h}^{T}+b_{i h}+h_{t-1} W_{h h}^{T}+b_{h h}\right)
$$

In [24]:
import torch
import torch.nn as nn
bs, T = 2, 3#batch_size和序列长度
input_size , hidden_size = 2, 3
input = torch.randn(bs, T, input_size) #随机初始化一个输入特征序列
h_prev = torch.zeros(bs , hidden_size) #初始（第0时刻）隐含状态

#step1 调用pytorch rnn api
rnn = nn.RNN(input_size, hidden_size,batch_first = True)
rnn_output, state_final = rnn(input, h_prev.unsqueeze(0))
# print(h_prev.unsqueeze(0).shape)
print(rnn_output)
print(state_final)


#step2 手写一个rnn_forward函数,由于1.0版本之后RNN的核心函数是用C语言写的，这里先不看了
def rnn_forward(input, weight_ih,  weight_hh, bias_ih,bias_hh, h_prev):
    bs, T, input_size = input.shape
    h_dim = weight_ih.shape[0]
    h_out = torch.zeros(bs, T, h_dim) #初始化一个输出

    for t in range(T):
        x = input[:,t, :].unsqueeze(2) #获取当前时刻输入 bs * input_size * 1
        w_ih_batch = weight_ih.unsqueeze(0).tile(bs,1,1) #bs*h_dim*input_size1,2,3
        w_hh_batch = weight_hh.unsqueeze(0).tile(bs,1,1) #bs * h_dim *h_dim

        w_times_x = torch.bmm(w_ih_batch, x).squeeze(-1) #去掉最后1维 bs*h_dim
        w_times_h = torch.bmm(w_hh_batch, h_prev.unsqueeze(2)).squeeze(-1) #bs*h_dim
        # print(w_times_h.shape)
        # print(bias_ih.shape)
        h_prev = torch.tanh(w_times_x + bias_ih + w_times_h + bias_hh)#t时刻的输出
        h_out[:, t, :] = h_prev

    return h_out, h_prev.unsqueeze(0)

#验证手写的准确性
# for k,v in rnn.named_parameters():
#     print(k,v)
custom_rnn_output, customm_state_final = \
rnn_forward(input, rnn.weight_ih_l0, rnn.weight_hh_l0, rnn.bias_ih_l0, rnn.bias_hh_l0, h_prev)
print(custom_rnn_output)
print(customm_state_final)

tensor([[[-0.0717,  0.7278,  0.1232],
         [ 0.0612,  0.2570, -0.5203],
         [-0.5873,  0.3653,  0.3944]],

        [[-0.6452,  0.1224,  0.5377],
         [-0.5038,  0.2123,  0.0262],
         [ 0.2256,  0.7519, -0.4472]]], grad_fn=<TransposeBackward1>)
tensor([[[-0.5873,  0.3653,  0.3944],
         [ 0.2256,  0.7519, -0.4472]]], grad_fn=<StackBackward>)
tensor([[[-0.0717,  0.7278,  0.1232],
         [ 0.0612,  0.2570, -0.5203],
         [-0.5873,  0.3653,  0.3944]],

        [[-0.6452,  0.1224,  0.5377],
         [-0.5038,  0.2123,  0.0262],
         [ 0.2256,  0.7519, -0.4472]]], grad_fn=<CopySlices>)
tensor([[[-0.5873,  0.3653,  0.3944],
         [ 0.2256,  0.7519, -0.4472]]], grad_fn=<UnsqueezeBackward0>)


In [29]:
#step3 手写一个bidirectional_rnn_forward函数，实现双向rnn计算原理
def bidirectional_rnn_forward(input,weight_ih,weight_hh,bias_ih,bias_hh,h_prev,\
    weight_ih_reverse,weight_hh_reverse,bias_ih_reverse,bias_hh_reverse,h_prev_reverse):
    bs, T, input_size = input.shape
    h_dim = weight_ih.shape[0]
    h_out = torch.zeros(bs, T, h_dim * 2) #初始化一个输出,双向是两倍
    
    forward_output = rnn_forward(input,weight_ih,weight_hh,bias_ih, bias_hh, h_prev)[0]
    #对于input在长度维度上翻转
    # print(input.shape)
    backward_output = rnn_forward(torch.flip(input, [1]),\
        weight_ih_reverse,weight_hh_reverse,bias_ih_reverse,bias_hh_reverse,h_prev_reverse)[0]

    # print(backward_output.type)
    backward_output = torch.flip(backward_output, [1])
    h_out[:,:,:h_dim] = forward_output
    h_out[:,:,h_dim:] = backward_output

    return h_out, h_out[:,-1,:].reshape((bs,2,h_dim)).transpose(0, 1)

bi_rnn = nn.RNN(input_size, hidden_size,batch_first = True, bidirectional = True)
h_prev = torch.zeros(2, bs, hidden_size)
bi_rnn_output, bi_rnn_state_final = bi_rnn(input, h_prev)
# for k,v in bi_rnn.named_parameters():
#     print(k,v)

custom_bi_rnn_output, customm_bi_state_final = \
bidirectional_rnn_forward(input, bi_rnn.weight_ih_l0, bi_rnn.weight_hh_l0, bi_rnn.bias_ih_l0, bi_rnn.bias_hh_l0, h_prev[0],\
    bi_rnn.weight_ih_l0_reverse, bi_rnn.weight_hh_l0_reverse, bi_rnn.bias_ih_l0_reverse, bi_rnn.bias_hh_l0_reverse, h_prev[1]   )

print(bi_rnn_output)
print(custom_bi_rnn_output)
print(bi_rnn_state_final)
print(customm_bi_state_final)


torch.Size([2, 3, 2])
tensor([[[ 0.2760, -0.0069, -0.5394, -0.6023, -0.8299,  0.4788],
         [ 0.2576,  0.7831, -0.4671,  0.0680, -0.4369, -0.1669],
         [ 0.4671,  0.0510, -0.9003, -0.8580,  0.0632,  0.5948]],

        [[ 0.6516, -0.2474, -0.7960, -0.7446,  0.2857,  0.5412],
         [ 0.7751,  0.3338, -0.7345, -0.3145, -0.0457,  0.2821],
         [ 0.3365,  0.6906, -0.5649, -0.4892, -0.6540,  0.0592]]],
       grad_fn=<TransposeBackward1>)
tensor([[[ 0.2760, -0.0069, -0.5394, -0.6023, -0.8299,  0.4788],
         [ 0.2576,  0.7831, -0.4671,  0.0680, -0.4369, -0.1669],
         [ 0.4671,  0.0510, -0.9003, -0.8580,  0.0632,  0.5948]],

        [[ 0.6516, -0.2474, -0.7960, -0.7446,  0.2857,  0.5412],
         [ 0.7751,  0.3338, -0.7345, -0.3145, -0.0457,  0.2821],
         [ 0.3365,  0.6906, -0.5649, -0.4892, -0.6540,  0.0592]]],
       grad_fn=<CopySlices>)
tensor([[[ 0.4671,  0.0510, -0.9003],
         [ 0.3365,  0.6906, -0.5649]],

        [[-0.6023, -0.8299,  0.4788],
        