In [1]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np

# RNN介绍

| x1  | x2  | x3  | y |
| --- | --- | --- | - |
| 001 | 010 | 100 | 1 |
| 100 | 111 | 000 | 0 |
| 001 | 010 | 100 | 1 |
| 100 | 000 | 111 | 1 |
| 111 | 010 | 100 | 0 |
| 100 | 010 | 000 | 0 |

x1,x2,x3就是$\color{#FF0000}{seq len}$序列的长度，每个下面由一个长度为3的向量组成，这个向量就是$\color{#FF0000}{input size}$。  
对于语句一行数据就是一句话，每句话有三个单词组成，每个单词用001这样的one-hot编码。  

![avatar](image/rnn.jpg)

![avatar](image/rnn_nlp.png)

![avatar](image/rnn2.jpg)

从上面图可以看出torch.ones(seq_len,batch_size,input_size)，这样一个输入数据第一个维度是序列 也就是I,hate,this这三  
batch_size相当于该序列位置上所有批次数据，input_size是该序列特征维度。  
[batch_size,input_size] * [input_size,out_size] 就是X*W的

# 建立输入到输出

out输出的是所有h1到h_N的隐层结果，hidden输出的就是最后一次计算后的结果值。这里N个RNN层其实就是同一个

batch_size是输入多少条数据  
seq_len是循环的长度，比如根据前n天天气的n，前n天股票信息的n  
input_size是每一天的信息维度  
hidden_size是神经元数量

# batch_first = True

In [91]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.input_size = 12
        self.hidden_size = 64
        self.num_layers = 2
        self.rnn = nn.RNN(
            input_size=self.input_size,
            hidden_size=self.hidden_size,
            num_layers=self.num_layers,
            batch_first=True,
        )
    def forward(self, x):
        # 初始的隐层输入，可以不填
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        out, hidden = self.rnn(x,h0)
        return out,hidden

In [92]:
model = Net()
# 输入维度[batch,seq_len,feature]
x = torch.rand(128,28,12)
output,hidden = model(x)
output.shape,hidden.shape

(torch.Size([128, 28, 64]), torch.Size([2, 128, 64]))

True的模式下  
output维度是[batch,seq_len,hidden_size]  
hidden的维度是[num_layers,batch,hidden_size]  

# batch_first = False

In [95]:
class Net2(nn.Module):
    def __init__(self):
        super().__init__()
        self.input_size = 12
        self.hidden_size = 64
        self.num_layers = 2
        self.rnn = nn.RNN(
            input_size=self.input_size,
            hidden_size=self.hidden_size,
            num_layers=self.num_layers,
            batch_first=False,
        )
    def forward(self, x):
        # 初始的隐层输入，可以不填
        h0 = torch.zeros(self.num_layers, x.size(1), self.hidden_size)
        out, hidden = self.rnn(x,h0)
        return out,hidden

In [96]:
model = Net2()
# 输入维度[seq_len,batch,eature]
x = torch.rand(28,128,12)
output,hidden = model(x)
output.shape,hidden.shape

(torch.Size([28, 128, 64]), torch.Size([2, 128, 64]))

False的模式下  
output维度是[seq_len,batch,hidden_size]  
hidden的维度是[num_layers,batch,hidden_size] 

In [97]:
output[-1,:,:]

tensor([[ 0.1120, -0.1544, -0.1922,  ..., -0.1320,  0.0459,  0.1618],
        [ 0.1295, -0.1396, -0.2902,  ..., -0.0764,  0.0439,  0.1843],
        [ 0.2361, -0.2360, -0.3124,  ..., -0.1758, -0.0555,  0.2236],
        ...,
        [ 0.1080, -0.2091, -0.2772,  ..., -0.3361, -0.1193,  0.2419],
        [ 0.0247, -0.1583, -0.3389,  ..., -0.2296, -0.0656,  0.1738],
        [ 0.1520, -0.1978, -0.4128,  ..., -0.1930, -0.1602,  0.2644]],
       grad_fn=<SliceBackward>)

In [99]:
hidden[-1,:,:]

tensor([[ 0.1120, -0.1544, -0.1922,  ..., -0.1320,  0.0459,  0.1618],
        [ 0.1295, -0.1396, -0.2902,  ..., -0.0764,  0.0439,  0.1843],
        [ 0.2361, -0.2360, -0.3124,  ..., -0.1758, -0.0555,  0.2236],
        ...,
        [ 0.1080, -0.2091, -0.2772,  ..., -0.3361, -0.1193,  0.2419],
        [ 0.0247, -0.1583, -0.3389,  ..., -0.2296, -0.0656,  0.1738],
        [ 0.1520, -0.1978, -0.4128,  ..., -0.1930, -0.1602,  0.2644]],
       grad_fn=<SliceBackward>)

# LSTM

In [64]:
# input_size 输入特征的大小
# hidden_size 神经元模块额数量
# num_layer 几层隐藏层
# lstm默认输入的维度是 (seq_len,batch,feature)
class LSTM(nn.Module):
    def __init__(self,flag):
        super().__init__()
        self.lstm = torch.nn.LSTM(
            input_size = 12,
            hidden_size = 64,
            num_layers = 1,
            batch_first = flag
        )
        self.out = torch.nn.Linear(in_features = 64,out_features = 10)

    def forward(self, x):
        # output包含每个序列的输出结果
        # h_n 只包含最后一个序列的输出
        # c_n 只包含最后一个序列的输出
        # h是最终输出，c是模块里cell的输出 
        output,(h_n,c_n) = self.lstm(x)
        return output,(h_n,c_n)
       

In [65]:
# batch_first设置为True
model = LSTM(True)
# batch,seq_len,feature
x = torch.rand(128,28,12)
a,(b,c) = model(x)
a.shape,b.shape,c.shape

(torch.Size([128, 28, 64]), torch.Size([1, 128, 64]), torch.Size([1, 128, 64]))

True的模式下  
output维度是[batch,seq_len,hidden_size]  
h_n的维度是[num_layers,batch,hidden_size]  
c_n的维度是[num_layers,batch,hidden_size]

In [66]:
# batch_first设置为False
model = LSTM(False)
# seq_len,batch,feature
x = torch.rand(28,128,12)
a,(b,c) = model(x)
a.shape,b.shape,c.shape

(torch.Size([28, 128, 64]), torch.Size([1, 128, 64]), torch.Size([1, 128, 64]))

False的模式下  
output维度是[seq_len,batch,hidden_size]  
h_n的维度是[num_layers,batch,hidden_size]  
c_n的维度是[num_layers,batch,hidden_size]

In [70]:
a[-1,:,:]

tensor([[ 0.1301,  0.0610, -0.0604,  ..., -0.0548,  0.0606,  0.2497],
        [ 0.1255,  0.0589, -0.0490,  ..., -0.0105,  0.0410,  0.2842],
        [ 0.1395,  0.0280, -0.0617,  ..., -0.0073,  0.0297,  0.2154],
        ...,
        [ 0.1219,  0.0582, -0.0552,  ..., -0.0449,  0.0457,  0.2289],
        [ 0.1080,  0.0659, -0.0380,  ..., -0.0206,  0.0564,  0.2877],
        [ 0.0973,  0.0531, -0.0474,  ..., -0.0218,  0.0481,  0.2067]],
       grad_fn=<SliceBackward>)

In [71]:
b

tensor([[[ 0.1301,  0.0610, -0.0604,  ..., -0.0548,  0.0606,  0.2497],
         [ 0.1255,  0.0589, -0.0490,  ..., -0.0105,  0.0410,  0.2842],
         [ 0.1395,  0.0280, -0.0617,  ..., -0.0073,  0.0297,  0.2154],
         ...,
         [ 0.1219,  0.0582, -0.0552,  ..., -0.0449,  0.0457,  0.2289],
         [ 0.1080,  0.0659, -0.0380,  ..., -0.0206,  0.0564,  0.2877],
         [ 0.0973,  0.0531, -0.0474,  ..., -0.0218,  0.0481,  0.2067]]],
       grad_fn=<StackBackward>)

可以发现output的最后一个数就等于h_n