In [1]:
import torch

In [2]:
s = "我爱北京天安门!"

In [5]:
# 分词

import jieba
# cut方法返回的是一个迭代器； lcut返回的是一个列表
words = jieba.lcut("我爱北京天安门")
words

['我', '爱', '北京', '天安门']

In [6]:
# 向量化，Embedding
# 抽取时序特征
# [batch_size, seq_len, embedding_dim]
# - batch_size: 批次大小
# - seq_len: 序列长度
# - embedding_dim: 每个 token 的向量维度
from torch import nn

In [7]:
X = torch.randn(16,64,256)

In [37]:
"""
入参解释：
- input_size    输入的每个词向量维度：256
- hidden_size   输出每个词的向量维度：512
- num_layers=1  RNN的层数
- nonlinearity='tanh'   内部的激活函数
- bias=True 是否有偏置
- batch_first=False 是否把批量维度放在第一个 [seq_len, batch_size, embedding_dim]
- dropout=0.0   默认不启用
- bidirectional=False   是否是双向RNN
- device=None   存储设备
- dtype=None    数据类型
"""
rnn = nn.RNN(input_size=256, 
             hidden_size=512,
             num_layers=1,
             batch_first=False,
             bidirectional=False)

In [38]:
# 上面经过Embedding之后的数据格式是 [batch_size, seq_len, embedding_size]
# 但是RNN要求的格式树令是 [seq_len, batch_size, embedding_size]
# 所以要进行一层转换
X1 = torch.permute(input=X, dims=(1, 0, 2))
print(f"转换之前的shape {X.shape}")
print(f"转换之后的shape {X1.shape}")

转换之前的shape torch.Size([16, 64, 256])
转换之后的shape torch.Size([64, 16, 256])


In [39]:
# 观察RNN的API定义
# 输入是 ：Inputs: input, hx
# 输出是： Outputs: output, h_n

# 输出的结果包含两个部分，output是记录了每一步的输出；而h_n是记录了最后时刻的输出
# 输入的结果也是两个，第二个参数如果没有可以不传，或者传0
h0 = torch.zeros(1, 16, 512, dtype=torch.float)
output, h_n = rnn(X1,h0)
print(f"输入是 {X1.shape}")
print(f"输出是 {output.shape}")
print(f"最后一个输出是 {h_n.shape}")

输入是 torch.Size([64, 16, 256])
输出是 torch.Size([64, 16, 512])
最后一个输出是 torch.Size([1, 16, 512])


In [40]:
print(f"output的最后一个其实和hn是一样的 \noutput {output[-1,:,:]}  \n hn {h_n}")

output的最后一个其实和hn是一样的 
output tensor([[-0.3548, -0.0835,  0.0580,  ..., -0.7443,  0.4115,  0.6679],
        [ 0.8222,  0.5622, -0.3938,  ..., -0.5744, -0.1881, -0.1546],
        [-0.0710,  0.5078, -0.2589,  ..., -0.5885,  0.2836, -0.0554],
        ...,
        [-0.3088,  0.2612, -0.0703,  ..., -0.1197, -0.4710, -0.0349],
        [ 0.4588, -0.2597, -0.2544,  ..., -0.4350, -0.0330,  0.2473],
        [-0.3499,  0.2656,  0.1242,  ..., -0.0787, -0.3473,  0.0507]],
       grad_fn=<SliceBackward0>)  
 hn tensor([[[-0.3548, -0.0835,  0.0580,  ..., -0.7443,  0.4115,  0.6679],
         [ 0.8222,  0.5622, -0.3938,  ..., -0.5744, -0.1881, -0.1546],
         [-0.0710,  0.5078, -0.2589,  ..., -0.5885,  0.2836, -0.0554],
         ...,
         [-0.3088,  0.2612, -0.0703,  ..., -0.1197, -0.4710, -0.0349],
         [ 0.4588, -0.2597, -0.2544,  ..., -0.4350, -0.0330,  0.2473],
         [-0.3499,  0.2656,  0.1242,  ..., -0.0787, -0.3473,  0.0507]]],
       grad_fn=<StackBackward0>)


In [41]:
# RNN层数不同，这里的层数就不同
print(f"weight_hh_l0: {rnn.weight_hh_l0.shape}")
print(f"weight_ih_l0: {rnn.weight_ih_l0.shape}")
print(f"bias_hh_l0: {rnn.bias_hh_l0.shape}")
print(f"bias_ih_l0: {rnn.bias_ih_l0.shape}")

weight_hh_l0: torch.Size([512, 512])
weight_ih_l0: torch.Size([512, 256])
bias_hh_l0: torch.Size([512])
bias_ih_l0: torch.Size([512])


In [47]:
"""
测试两层神经网络
"""
rnn = nn.RNN(input_size=256, 
             hidden_size=512,
             num_layers=2,
             batch_first=False,
             bidirectional=False)
X1 = torch.permute(input=X, dims=(1, 0, 2))
h0 = torch.zeros(2, 16, 512, dtype=torch.float)
output, h_n = rnn(X1,h0)
print(f"输入是 {X1.shape}")
print(f"两层的时候，这里只有最后一层的输出是 {output.shape}")
print(f"两层的时候，这里只有最后一层的最后一个输出是 {h_n.shape}")
print(f"output的最后一个其实和hn是一样的 \noutput {output[-1,:,:]}  \n hn {h_n}")

输入是 torch.Size([64, 16, 256])
两层的时候，这里只有最后一层的输出是 torch.Size([64, 16, 512])
两层的时候，这里只有最后一层的最后一个输出是 torch.Size([2, 16, 512])
output的最后一个其实和hn是一样的 
output tensor([[-0.0727,  0.2223, -0.1220,  ..., -0.0049, -0.3443,  0.1957],
        [ 0.0110, -0.1342, -0.4635,  ...,  0.2775,  0.0511,  0.2856],
        [ 0.4682, -0.1468, -0.2681,  ..., -0.4639, -0.1938,  0.0648],
        ...,
        [-0.1844, -0.2150, -0.5185,  ..., -0.2146,  0.0795, -0.2569],
        [ 0.1286, -0.1892,  0.2991,  ...,  0.1855, -0.1356,  0.3463],
        [-0.1526, -0.1138, -0.0942,  ...,  0.2970, -0.1958, -0.0390]],
       grad_fn=<SliceBackward0>)  
 hn tensor([[[-0.4653, -0.1231,  0.4320,  ..., -0.0613, -0.5454, -0.3012],
         [ 0.3392, -0.4254,  0.1615,  ..., -0.1905,  0.1031, -0.4940],
         [-0.1764,  0.7190,  0.4638,  ..., -0.1483,  0.4278,  0.1346],
         ...,
         [-0.6177,  0.7016,  0.0532,  ..., -0.6200, -0.1635, -0.1561],
         [-0.1854, -0.2854, -0.2756,  ...,  0.3003, -0.2917, -0.0071],
      

In [46]:
# RNN层数不同，这里的层数就不同
print(f"rnn.weight_hh_l0 {rnn.weight_hh_l0.shape}")
print(f"rnn.weight_hh_l1 {rnn.weight_hh_l1.shape}")
print(f"rnn.weight_ih_l0 {rnn.weight_ih_l0.shape}")
print(f"rnn.weight_ih_l1 {rnn.weight_ih_l1.shape}")
print(f"rnn.bias_hh_l0 {rnn.bias_hh_l0.shape}")
print(f"rnn.bias_hh_l1 {rnn.bias_hh_l1.shape}")


rnn.weight_hh_l0 torch.Size([512, 512])
rnn.weight_hh_l1 torch.Size([512, 512])
rnn.weight_ih_l0 torch.Size([512, 256])
rnn.weight_ih_l1 torch.Size([512, 512])
rnn.bias_hh_l0 torch.Size([512])
rnn.bias_hh_l1 torch.Size([512])


In [None]:
# 双向RNN
# 是针对输入的数据，正向输入一次，反向输入一次