## RNN = Recurrent Neural Network
#### $$h_t = \tanh(w_{ih} \times x_t + b_{ih} + w_{hh} \times h_{t-1} + b_{hh}$$
##### 1. $h_{t-1}$ - 前一时刻输出的隐藏变量
##### 2. $x_t$ - 该时刻的输入向量

In [1]:
from torch import nn

In [2]:
# 输入特征维度为 5, 隐藏向量的特征维度为 7
rnn_cell = nn.RNNCell(input_size = 5, hidden_size = 7)

In [3]:
rnn_cell

RNNCell(5, 7)

In [4]:
# 查看 RNNCell 的权值属性
print(rnn_cell.weight_ih)
print(rnn_cell.weight_hh)
print(rnn_cell.bias_ih)
print(rnn_cell.bias_hh)

Parameter containing:
tensor([[ 0.0077,  0.1008,  0.1247,  0.0154, -0.0833],
        [ 0.3569, -0.0952,  0.3412,  0.3544, -0.3669],
        [ 0.2429, -0.1582,  0.0115,  0.1180, -0.1244],
        [-0.2614, -0.0604, -0.2257, -0.0659,  0.1029],
        [-0.1459,  0.1111,  0.1475, -0.3432, -0.3094],
        [-0.1935,  0.0313, -0.0643, -0.1423, -0.2273],
        [ 0.0744, -0.0722, -0.3413, -0.3499, -0.1177]], requires_grad=True)
Parameter containing:
tensor([[-0.1566, -0.1068,  0.2955,  0.3457, -0.3213,  0.1127,  0.2691],
        [ 0.0831,  0.2043,  0.0523, -0.1140,  0.0712, -0.0049,  0.1523],
        [ 0.0377,  0.0293, -0.3171,  0.1407, -0.0375,  0.0279,  0.0204],
        [ 0.0438,  0.1344, -0.0286,  0.1958, -0.2138, -0.2879,  0.1420],
        [ 0.0525,  0.0703,  0.2155, -0.3119,  0.0903, -0.2384,  0.2589],
        [-0.0364,  0.2902, -0.0801,  0.1708,  0.1888,  0.0107, -0.3042],
        [-0.1022, -0.3021, -0.0116,  0.2234, -0.0167,  0.3207, -0.2658]],
       requires_grad=True)
Parameter c

In [5]:
import torch

In [6]:
Input = torch.rand(1, 5)
Hidden = torch.randn(1, 7)
rnn_cell(Input, Hidden)

tensor([[ 0.5858, -0.2554, -0.0037,  0.1401, -0.2586, -0.5967,  0.1548]],
       grad_fn=<TanhBackward>)

In [7]:
# rnn 可以处理一串序列
rnn = nn.RNN(input_size = 5, hidden_size = 7)

In [8]:
Input = torch.rand(3, 2, 5) # 初始化一个批量数为 2, 序列长度为 3, 特征维度数为 5 的输入
Hidden_0 = torch.randn(1, 2, 7)

In [9]:
Output, Hidden_1 = rnn(Input, Hidden_0)

In [10]:
print(Output.size())
print(Hidden_1.size())

torch.Size([3, 2, 7])
torch.Size([1, 2, 7])


## LSTM = Long-Short Term Memory
### Hochreiter and Schmidhuber, 1997, 处理 "梯度消失" 现象
#### 输入: $h_{t-1}, c_{t-1}, x_t$
#### 输出: $(h_t, c_t)$

In [11]:
lstm_cell = nn.LSTMCell(input_size = 5, hidden_size = 7)

In [12]:
Input = torch.rand(1, 5)
H_0 = torch.randn(1, 7)
C_0 = torch.randn(1, 7)

In [13]:
lstm_cell(Input, (H_0, C_0))

(tensor([[-0.4472,  0.2017, -0.2630, -0.3902, -0.1862, -0.0136,  0.1125]],
        grad_fn=<MulBackward0>),
 tensor([[-0.7371,  0.4007, -0.4013, -0.5062, -0.3430, -0.0419,  0.4852]],
        grad_fn=<AddBackward0>))

In [14]:
lstm = nn.LSTM(input_size = 5, hidden_size = 7)

In [15]:
Input = torch.rand(3, 2, 5)
H_0 = torch.randn(1, 2, 7)
C_0 = torch.randn(1, 2, 7)

In [16]:
Output, (H_1, C_1) = lstm(Input, (H_0, C_0))

In [17]:
print(Output.size())
print(H_1.size())
print(C_1.size())

torch.Size([3, 2, 7])
torch.Size([1, 2, 7])
torch.Size([1, 2, 7])
