In [23]:
import torch
import torch.nn as nn

bs, T = 2, 3
input_size, hidden_size = 2, 3
input = torch.randn(bs, T, input_size)
h_prev = torch.zeros(bs, hidden_size)

In [26]:
# 单向、单层RNN
single_rnn = nn.RNN(input_size, hidden_size, batch_first=True)
output, h_n = single_rnn(input, h_prev.unsqueeze(0))
print("torch.nn:")
print(output, h_n, sep='\n')

torch.nn:
tensor([[[ 0.5132, -0.3790, -0.1950],
         [ 0.8378,  0.2247, -0.4548],
         [ 0.8367,  0.9223,  0.3713]],

        [[ 0.0602, -0.5196, -0.1648],
         [-0.1270,  0.0013, -0.2568],
         [-0.0058,  0.7812,  0.5089]]], grad_fn=<TransposeBackward1>)
tensor([[[ 0.8367,  0.9223,  0.3713],
         [-0.0058,  0.7812,  0.5089]]], grad_fn=<StackBackward0>)


In [6]:
# 双向、单层RNN
bidirectional_rnn = nn.RNN(4, 3, 1, batch_first=True, bidirectional=True)
bi_output, bi_h_n = bidirectional_rnn(input)
print(bi_output, bi_h_n, sep='\n')

tensor([[[-0.1368,  0.7060, -0.2620, -0.6250, -0.3227,  0.7300],
         [ 0.0046,  0.7662, -0.3822, -0.2390, -0.8976,  0.8876]]],
       grad_fn=<TransposeBackward1>)
tensor([[[ 0.0046,  0.7662, -0.3822]],

        [[-0.6250, -0.3227,  0.7300]]], grad_fn=<StackBackward0>)


In [25]:
# 手动实现一个单项RNN

def forward(input, w_ih, b_ih, w_hh, b_hh, h_prev):
    bs, T, input_size = input.shape
    h_dim = w_ih.shape[0]
    h_out = torch.zeros(bs, T, h_dim) # 初始化一个输出
    
    for t in range(T):
        x = input[:, t, :].unsqueeze(2) # 获取当前时刻输入  bs * input_size * 1
        w_ih_batch = w_ih.unsqueeze(0).tile(bs, 1, 1) # bs * h_dim * input_size
        w_hh_batch = w_hh.unsqueeze(0).tile(bs, 1, 1) # bs * h_dim * h_dim
        
        # bmm相当于第一维不动 后两维矩阵相乘
        w_times_x = torch.bmm(w_ih_batch, x).squeeze(-1)  # 得到bs * h_dim
        w_times_h = torch.bmm(w_hh_batch, h_prev.unsqueeze(2)).squeeze(-1)  # 得到bs * h_dim
        h_prev = torch.tanh(w_times_x + b_ih + w_times_h + b_hh)
        
        h_out[:, t, :] = h_prev
        
    return h_out, h_prev.unsqueeze(0)
 
# 验证rnn_forward正确性
# for k, v in single_rnn.named_parameters():
#     print(k, v)
    
custom_rnn_output, custom_state_final = forward(input, single_rnn.weight_ih_l0, \
                                               single_rnn.bias_ih_l0, single_rnn.weight_hh_l0, \
                                               single_rnn.bias_hh_l0, h_prev)

print("Custom:")
print(custom_rnn_output, custom_state_final, sep='\n')



# 手动实现一个双向RNN
def bidirectional_rnn_forward(input, w_ih, b_ih, w_hh, b_hh, h_prev, \
                             w_ih_r, ):
    

Custom:
tensor([[[ 0.4180, -0.3473,  0.4346],
         [ 0.4717, -0.0361, -0.4478],
         [ 0.8994,  0.9366,  0.4396]],

        [[ 0.3011, -0.3287,  0.7705],
         [ 0.0039, -0.0573,  0.6931],
         [ 0.3710,  0.6758,  0.8040]]], grad_fn=<CopySlices>)
tensor([[[0.8994, 0.9366, 0.4396],
         [0.3710, 0.6758, 0.8040]]], grad_fn=<UnsqueezeBackward0>)
