In [1]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np

# RNN介绍

| x1  | x2  | x3  | y |
| --- | --- | --- | - |
| 001 | 010 | 100 | 1 |
| 100 | 111 | 000 | 0 |
| 001 | 010 | 100 | 1 |
| 100 | 000 | 111 | 1 |
| 111 | 010 | 100 | 0 |
| 100 | 010 | 000 | 0 |

x1,x2,x3就是$\color{#FF0000}{seq len}$序列的长度，每个下面由一个长度为3的向量组成，这个向量就是$\color{#FF0000}{input size}$。  
对于语句一行数据就是一句话，每句话有三个单词组成，每个单词用001这样的one-hot编码。  

![avatar](image/rnn.jpg)

![avatar](image/rnn_nlp.png)

![avatar](image/rnn2.jpg)

从上面图可以看出torch.ones(seq_len,batch_size,input_size)，这样一个输入数据第一个维度是序列 也就是I,hate,this这三  
batch_size相当于该序列位置上所有批次数据，input_size是该序列特征维度。  
[batch_size,input_size] * [input_size,out_size] 就是X*W的

# 建立输入到输出

out输出的是所有h1到h_N的隐层结果，hidden输出的就是最后一次计算后的结果值。这里N个RNN层其实就是同一个

batch_size是输入多少条数据  
seq_len是循环的长度，比如根据前n天天气的n，前n天股票信息的n  
input_size是每一天的信息维度  
hidden_size是神经元数量

In [49]:
class Net(nn.Module):
    def __init__(self,input_size,hidden_size,num_layers,num_classes):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_size = num_classes
        self.rnn = nn.RNN(
            input_size=self.input_size,
            hidden_size=self.hidden_size,
            num_layers=self.num_layers,
            batch_first=False,
        )
        for p in self.rnn.parameters():
          nn.init.normal_(p, mean=0.0, std=0.001)

        self.linear = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, x):
       # x维度是[seq,batch,feature] 
       # h0维度是[num_layers,batch,hidden_size]
       h0 = torch.zeros(self.num_layers, x.size(1), self.hidden_size)
       out, hidden = self.rnn(x, h0)
       out = out.view(-1, self.hidden_size)
       out = self.linear(out)
       out = out.unsqueeze(dim=0)
       return out, hidden

batch_first设置为True后，输入数据的要把batch_size放在第一维

In [54]:
batch_size = 5
seq_len = 3
input_size = 4
hidden_size = 2
num_layers = 2
num_classes = 1
model = Net(input_size,hidden_size,num_layers,num_classes)

In [55]:
x = torch.ones(seq_len,batch_size,input_size)

In [56]:
output,hidden = model(x)

In [57]:
output.shape,hidden.shape

(torch.Size([1, 15, 1]), torch.Size([2, 5, 2]))