In [138]:
import torch
import torch.nn as nn

# RNN

![title](image/rnn.png)

## nn.RNNCell

In [139]:
# 一次处理一个单词

### Single Cell

In [142]:
# x: [seq_len: 10, batch_size: 3, word_vec: 100]
x.shape

torch.Size([10, 3, 100])

In [143]:
# input_size, hidden_size
cell1 = nn.RNNCell(100, 20)
cell1

RNNCell(100, 20)

In [144]:
cell1._parameters.keys()

odict_keys(['weight_ih', 'weight_hh', 'bias_ih', 'bias_hh'])

In [145]:
cell1.weight_ih.shape, cell1.weight_hh.shape, cell1.bias_ih.shape, cell1.bias_hh.shape

(torch.Size([20, 100]),
 torch.Size([20, 20]),
 torch.Size([20]),
 torch.Size([20]))

In [146]:
# h1: [batch_size: 3, hidden_size: 20]
h1 = torch.zeros(3, 20)

In [147]:
# x: torch.Size([10, 3, 100])
# xt: torch.Size([3, 100])
for xt in x:
    # h1: [batch_size: 3, hidden_size: 20]
    # cell1 = nn.RNNCell(100, 20)
    h1 = cell1(xt, h1)

In [148]:
print(h1.shape)

torch.Size([3, 20])


### Two Cells

In [149]:
# input_size, hidden_size
cell1 = nn.RNNCell(100, 30)
cell2 = nn.RNNCell(30, 20)

In [150]:
# h1: [batch_size: 3, hidden_size: 30]
h1 = torch.zeros(3, 30)
# h2: [batch_size: 3, hidden_size: 20]
h2 = torch.zeros(3, 20)

In [151]:
# x: torch.Size([10, 3, 100])
# xt: torch.Size([3, 100])
for xt in x:
    # h1: [batch_size: 3, hidden_size: 30]
    h1 = cell1(xt, h1)
    # h2: [batch_size: 3, hidden_size: 20]
    h2 = cell2(h1, h2)

In [152]:
h1.shape, h2.shape

(torch.Size([3, 30]), torch.Size([3, 20]))

## nn.RNN

In [153]:
# 一次处理一个句子

In [154]:
# input_size, hidden_size, num_layers
rnn = nn.RNN(100, 10, 3)
rnn

RNN(100, 10, num_layers=3)

In [155]:
# 参数名称
rnn._parameters.keys()

odict_keys(['weight_ih_l0', 'weight_hh_l0', 'bias_ih_l0', 'bias_hh_l0', 'weight_ih_l1', 'weight_hh_l1', 'bias_ih_l1', 'bias_hh_l1', 'weight_ih_l2', 'weight_hh_l2', 'bias_ih_l2', 'bias_hh_l2'])

In [156]:
# 第一层参数shape
rnn.weight_ih_l0.shape, rnn.weight_hh_l0.shape, rnn.bias_ih_l0.shape, rnn.bias_hh_l0.shape

(torch.Size([10, 100]),
 torch.Size([10, 10]),
 torch.Size([10]),
 torch.Size([10]))

In [157]:
# 第二层参数shape
rnn.weight_ih_l1.shape, rnn.weight_hh_l1.shape, rnn.bias_ih_l1.shape, rnn.bias_hh_l1.shape

(torch.Size([10, 10]),
 torch.Size([10, 10]),
 torch.Size([10]),
 torch.Size([10]))

In [158]:
# 第三层参数shape
rnn.weight_ih_l2.shape, rnn.weight_hh_l2.shape, rnn.bias_ih_l2.shape, rnn.bias_hh_l2.shape

(torch.Size([10, 10]),
 torch.Size([10, 10]),
 torch.Size([10]),
 torch.Size([10]))

### Single Layer RNN

In [159]:
rnn = nn.RNN(input_size=100, hidden_size=20, num_layers=1)
rnn

RNN(100, 20)

In [160]:
# x: [seq_len: 10, batch_size: 3, word_vec: 100]
# seq_len：句子长度
# batch_size：批量大小
# word_vec：每个单词向量长度
x = torch.randn(10, 3, 100)

In [161]:
# out: [seq_len: 10, batch_size:3, hidden_size: 20]
# ht: [num_layers: 1, batch_size: 3, hidden_size: 20]
out, ht = rnn(x)
print(out.shape, ht.shape)

torch.Size([10, 3, 20]) torch.Size([1, 3, 20])


In [162]:
# h0: [num_layers: 1, batch_size: 3, hidden_size: 20]
# num_layers：隐藏层层数
# batch_size：批量大小
# hidden_size：隐藏层大小
h0 = torch.zeros(1, 3, 20)

In [163]:
# out: [seq_len: 10, batch_size:3, hidden_size: 20]
# ht: [num_layers: 1, batch_size: 3, hidden_size: 20]
out, ht = rnn(x, h0)
print(out.shape, ht.shape)

torch.Size([10, 3, 20]) torch.Size([1, 3, 20])


### Multi Layer RNN

In [164]:
rnn = nn.RNN(input_size=100, hidden_size=20, num_layers=4)
rnn

RNN(100, 20, num_layers=4)

In [165]:
# x: [seq_len: 10, batch_size: 3, word_vec: 100]
# seq_len：句子长度
# batch_size：批量大小
# word_vec：每个单词向量长度
x = torch.randn(10, 3, 100)

In [166]:
# out: [seq_len: 10, batch_size:3, hidden_size: 20]
# ht: [num_layers: 4, batch_size: 3, hidden_size: 20]
out, ht = rnn(x)
print(out.shape, ht.shape)

torch.Size([10, 3, 20]) torch.Size([4, 3, 20])


In [167]:
# h0: [num_layers: 4, batch_size: 3, hidden_size: 20]
# num_layers：隐藏层层数
# batch_size：批量大小
# hidden_size：隐藏层大小
h0 = torch.zeros(4, 3, 20)

In [168]:
# out: [seq_len: 10, batch_size:3, hidden_size: 20]
# ht: [num_layers: 4, batch_size: 3, hidden_size: 20]
out, ht = rnn(x, h0)
print(out.shape, ht.shape)

torch.Size([10, 3, 20]) torch.Size([4, 3, 20])


### 双向Multi Layer RNN

In [169]:
rnn = nn.RNN(input_size=100, hidden_size=20, num_layers=4, bidirectional=True)
rnn

RNN(100, 20, num_layers=4, bidirectional=True)

In [170]:
# [seq_len, batch_size, word_vec]
x = torch.randn(10, 3, 100)

In [171]:
# out: [seq_len: 10, batch_size: 3, hidden_size: 20]
# h: [num_layers: 4, batch_size: 3, hidden_size: 20]
# c: [num_layers: 4, batch_size: 3, hidden_size: 20]
out, ht = rnn(x)
out.shape, ht.shape

(torch.Size([10, 3, 40]), torch.Size([8, 3, 20]))

# LSTM

## nn.LSTMCell

In [172]:
# 一次处理一个单词

### Single Cell

In [173]:
x = torch.randn(10, 3, 100)
# x: [seq_len: 10, batch_size: 3, word_vec: 100]
x.shape

torch.Size([10, 3, 100])

In [174]:
cell = nn.LSTMCell(input_size=100, hidden_size=20)

In [175]:
cell._parameters.keys()

odict_keys(['weight_ih', 'weight_hh', 'bias_ih', 'bias_hh'])

In [176]:
cell.weight_ih.shape, cell.weight_hh.shape, cell.bias_ih.shape, cell.bias_hh.shape

(torch.Size([80, 100]),
 torch.Size([80, 20]),
 torch.Size([80]),
 torch.Size([80]))

In [180]:
h = torch.randn(3, 20)
c = torch.randn(3, 20)

In [181]:
for xt in x:
    h, c = cell(xt, (h, c))

In [182]:
h.shape, c.shape

(torch.Size([3, 20]), torch.Size([3, 20]))

### Two Cells

In [183]:
x = torch.randn(10, 3, 100)
# x: [seq_len: 10, batch_size: 3, word_vec: 100]
x.shape

torch.Size([10, 3, 100])

In [184]:
cell1 = nn.LSTMCell(input_size=100, hidden_size=30)
cell2 = nn.LSTMCell(input_size=30, hidden_size=20)

In [185]:
h1 = torch.randn(3, 30)
c1 = torch.randn(3, 30)
h2 = torch.randn(3, 20)
c2 = torch.randn(3, 20)

In [186]:
for xt in x:
    h1, c1 = cell1(xt, (h1, c1))
    h2, c2 = cell2(h1, (h2, c2))

In [187]:
h1.shape, c1.shape, h2.shape, c2.shape

(torch.Size([3, 30]),
 torch.Size([3, 30]),
 torch.Size([3, 20]),
 torch.Size([3, 20]))

## nn.LSTM

In [188]:
# 一次处理一个句子

In [189]:
# input_size, hidden_size, num_layers
lstm = nn.LSTM(100, 10, 3)
lstm

LSTM(100, 10, num_layers=3)

In [190]:
# 参数名称
lstm._parameters.keys()

odict_keys(['weight_ih_l0', 'weight_hh_l0', 'bias_ih_l0', 'bias_hh_l0', 'weight_ih_l1', 'weight_hh_l1', 'bias_ih_l1', 'bias_hh_l1', 'weight_ih_l2', 'weight_hh_l2', 'bias_ih_l2', 'bias_hh_l2'])

In [191]:
# 第一层参数shape
lstm.weight_ih_l0.shape, lstm.weight_hh_l0.shape, lstm.bias_ih_l0.shape, lstm.bias_hh_l0.shape

(torch.Size([40, 100]),
 torch.Size([40, 10]),
 torch.Size([40]),
 torch.Size([40]))

In [192]:
# 第二层参数shape
lstm.weight_ih_l1.shape, lstm.weight_hh_l1.shape, lstm.bias_ih_l1.shape, lstm.bias_hh_l1.shape

(torch.Size([40, 10]),
 torch.Size([40, 10]),
 torch.Size([40]),
 torch.Size([40]))

In [193]:
# 第三层参数shape
lstm.weight_ih_l2.shape, lstm.weight_hh_l2.shape, lstm.bias_ih_l2.shape, lstm.bias_hh_l2.shape

(torch.Size([40, 10]),
 torch.Size([40, 10]),
 torch.Size([40]),
 torch.Size([40]))

### Single Layer LSTM

In [194]:
lstm = nn.LSTM(input_size=100, hidden_size=20, num_layers=1)
lstm

LSTM(100, 20)

In [195]:
# [seq_len, batch_size, word_vec]
x = torch.randn(10, 3, 100)

In [196]:
# out: [seq_len: 10, batch_size: 3, hidden_size: 20]
# h: [num_layers: 1, batch_size: 3, hidden_size: 20]
# c: [num_layers: 1, batch_size: 3, hidden_size: 20]
out, (h, c) = lstm(x)
out.shape, h.shape, c.shape

(torch.Size([10, 3, 20]), torch.Size([1, 3, 20]), torch.Size([1, 3, 20]))

### Multi Layer LSTM

In [198]:
lstm = nn.LSTM(input_size=100, hidden_size=20, num_layers=4)
lstm

LSTM(100, 20, num_layers=4)

In [199]:
# [seq_len, batch_size, word_vec]
x = torch.randn(10, 3, 100)

In [200]:
# out: [seq_len: 10, batch_size: 3, hidden_size: 20]
# h: [num_layers: 4, batch_size: 3, hidden_size: 20]
# c: [num_layers: 4, batch_size: 3, hidden_size: 20]
out, (h, c) = lstm(x)
out.shape, h.shape, c.shape

(torch.Size([10, 3, 20]), torch.Size([4, 3, 20]), torch.Size([4, 3, 20]))

### 双向Multi Layer LSTM

In [201]:
lstm = nn.LSTM(input_size=100, hidden_size=20, num_layers=4, bidirectional=True)
lstm

LSTM(100, 20, num_layers=4, bidirectional=True)

In [202]:
# [seq_len, batch_size, word_vec]
x = torch.randn(10, 3, 100)

In [203]:
# out: [seq_len: 10, batch_size: 3, hidden_size: 20]
# h: [num_layers: 4, batch_size: 3, hidden_size: 20]
# c: [num_layers: 4, batch_size: 3, hidden_size: 20]
out, (h, c) = lstm(x)
out.shape, h.shape, c.shape

(torch.Size([10, 3, 40]), torch.Size([8, 3, 20]), torch.Size([8, 3, 20]))