In [1]:
import torch
import random
import torch.nn as nn
import numpy as np
import torch.nn.functional as F

In [2]:
def set_rand_seed(seed=1):
    print("Random Seed: ", seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    # torch.backends.cudnn.enabled = False       
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True  

In [3]:
set_rand_seed()

Random Seed:  1


In [4]:
embed_size = 128
hidden_size = 512
n_layer = 2

## 定义LSTM
- 输入
- output保存了最后一层，每个time step的输出h，如果是双向LSTM，每个time step的输出h = [h正向, h逆向] (同一个time step的正向和逆向的h连接起来)。
- h_n保存了每一层，最后一个time step的输出h，如果是双向LSTM，单独保存前向和后向的最后一个time step的输出h。
- c_n与h_n一致，只是它保存的是c的值。

In [5]:
lstm = nn.LSTM(embed_size, hidden_size, n_layer, batch_first=True, bidirectional=True)

In [6]:
lstm.all_weights[1] # 参数

[Parameter containing:
 tensor([[-0.0286,  0.0102,  0.0196,  ...,  0.0140, -0.0306,  0.0162],
         [ 0.0073,  0.0275, -0.0246,  ..., -0.0119,  0.0383,  0.0296],
         [-0.0230, -0.0301, -0.0402,  ..., -0.0140,  0.0097,  0.0410],
         ...,
         [ 0.0088,  0.0129,  0.0204,  ...,  0.0042, -0.0166,  0.0139],
         [ 0.0035,  0.0292, -0.0052,  ..., -0.0033, -0.0247, -0.0149],
         [ 0.0304, -0.0205, -0.0235,  ..., -0.0435, -0.0082,  0.0070]],
        requires_grad=True),
 Parameter containing:
 tensor([[ 0.0332,  0.0117, -0.0223,  ..., -0.0188, -0.0090,  0.0232],
         [-0.0044,  0.0032,  0.0183,  ..., -0.0309, -0.0030,  0.0329],
         [-0.0231, -0.0378,  0.0360,  ..., -0.0179, -0.0228, -0.0330],
         ...,
         [-0.0014, -0.0154, -0.0263,  ..., -0.0001, -0.0220, -0.0228],
         [ 0.0007,  0.0211,  0.0364,  ..., -0.0040, -0.0417, -0.0221],
         [ 0.0077, -0.0442,  0.0341,  ...,  0.0277, -0.0085,  0.0205]],
        requires_grad=True),
 Parameter con

In [7]:
x = torch.rand(3, 1000, 128)

In [8]:
output, (hn, cn) = lstm(x)

In [9]:
hn.size()

torch.Size([4, 3, 512])

In [10]:
output.size()

torch.Size([3, 1000, 1024])

In [11]:
cn.size()

torch.Size([4, 3, 512])

In [12]:
output[0, -1, :512][:20]   # 1.前向传播时，output中最后一个time step的前512个与hn最后一层前向传播的输出应该一致。

tensor([-0.0253, -0.0033,  0.0074, -0.0321,  0.0448,  0.0364,  0.0274, -0.0126,
        -0.0314, -0.0074,  0.0296,  0.0046, -0.0231, -0.0112,  0.0002,  0.0588,
         0.0282,  0.0186, -0.0107,  0.0064], grad_fn=<SliceBackward>)

In [14]:
hn[2, 0][:20]  # 最后一层 

tensor([-0.0253, -0.0033,  0.0074, -0.0321,  0.0448,  0.0364,  0.0274, -0.0126,
        -0.0314, -0.0074,  0.0296,  0.0046, -0.0231, -0.0112,  0.0002,  0.0588,
         0.0282,  0.0186, -0.0107,  0.0064], grad_fn=<SliceBackward>)

In [15]:
cn[2, 0][:20]

tensor([-0.0496, -0.0064,  0.0153, -0.0658,  0.0887,  0.0693,  0.0537, -0.0262,
        -0.0664, -0.0149,  0.0573,  0.0095, -0.0491, -0.0215,  0.0003,  0.1142,
         0.0567,  0.0350, -0.0211,  0.0126], grad_fn=<SliceBackward>)

In [19]:
output[0, 0, 512:][:20]   # 2.后向传播时，output中最后一个time step的后20个与hn最后一层后向传播的输出应该一致

tensor([ 0.0067,  0.0291,  0.0195, -0.0040,  0.0222, -0.0016,  0.0204,  0.0483,
         0.0391, -0.0150, -0.0355,  0.0069, -0.0433,  0.0397, -0.0110, -0.0593,
         0.0156, -0.0023,  0.0674, -0.0159], grad_fn=<SliceBackward>)

In [17]:
hn[3, 0][:20] 

tensor([ 0.0067,  0.0291,  0.0195, -0.0040,  0.0222, -0.0016,  0.0204,  0.0483,
         0.0391, -0.0150, -0.0355,  0.0069, -0.0433,  0.0397, -0.0110, -0.0593,
         0.0156, -0.0023,  0.0674, -0.0159], grad_fn=<SliceBackward>)

In [15]:
out = torch.cat((x, output), 2)
out.size()

torch.Size([3, 2000, 640])

In [15]:
out = F.relu(out)
out.size()

torch.Size([3, 2000, 1152])

In [16]:
out = out.permute(0, 2, 1).contiguous()
out.size()

torch.Size([3, 1152, 2000])

In [17]:
maxpool = nn.MaxPool1d(2000)

In [18]:
out = maxpool(out).squeeze()
out.size()

torch.Size([3, 1152])

In [19]:
fc = nn.Linear(512 * 2 + 128, 2)

In [20]:
out = fc(out)

In [21]:
out.size()

torch.Size([3, 2])

In [12]:
output, (hn, cn) = lstm(x)

In [14]:
hn.size()

torch.Size([2, 3, 512])

In [15]:
output.size()

torch.Size([3, 2000, 1024])

In [17]:
cn.size()

torch.Size([2, 3, 512])

In [30]:
output[0][-1][:10]

tensor([ 0.0890,  0.0503, -0.1258,  0.0348,  0.0773,  0.0296,  0.0683, -0.1334,
        -0.0785,  0.1435], grad_fn=<SliceBackward>)

In [31]:
hn.size()

torch.Size([2, 1, 512])

In [32]:
cn.size()

torch.Size([2, 1, 512])

In [35]:
lstm2 = nn.LSTM(embed_size, hidden_size, batch_first=True)

In [36]:
out, (h, c) = lstm2(x)

In [37]:
h.size()

torch.Size([1, 1, 512])

In [38]:
h

tensor([[[ 5.0007e-02,  1.1504e-01, -8.0164e-03, -2.8628e-02,  4.3624e-02,
          -4.8098e-02, -8.9852e-02, -2.0751e-02, -1.4995e-02,  2.1779e-02,
           1.4510e-01,  3.7402e-02, -2.4306e-02,  8.3935e-02,  1.3019e-01,
           5.2696e-02,  5.8331e-03,  2.3580e-02,  5.0516e-03, -1.6813e-01,
           2.0978e-02,  7.0036e-02,  2.5464e-02, -8.2757e-03, -8.2930e-02,
          -2.4754e-01,  3.6266e-02, -8.9510e-02,  1.0058e-01, -3.9296e-02,
          -3.2925e-02, -3.0055e-02, -1.1519e-01,  1.6550e-02, -1.0098e-01,
           2.4721e-02, -4.6358e-02,  1.4646e-01,  6.0281e-02, -3.3340e-02,
          -1.5491e-01, -1.3073e-01, -1.1565e-01,  4.8550e-02, -5.9586e-02,
          -1.9076e-01, -1.0272e-01, -8.1003e-02,  7.1642e-02, -7.8382e-02,
          -6.5359e-02,  1.9230e-03,  1.1971e-01,  1.7767e-01,  3.5265e-02,
           4.9145e-02, -9.9055e-02,  3.6508e-02,  1.4252e-01, -1.0448e-01,
           2.6643e-02, -1.9212e-02,  2.4304e-02, -1.3443e-01, -9.2144e-02,
           5.3559e-02, -4

In [39]:
c[-1].size()

torch.Size([1, 512])

In [45]:
x = torch.randn(2, 1, 1)#为1可以扩展为3和3 
x

tensor([[[ 0.2492]],

        [[-0.5608]]])

In [46]:
y = torch.randn(2, 3, 3)

In [47]:
y

tensor([[[-1.3034,  0.2991, -0.4937],
         [-0.8327,  1.0045,  0.2131],
         [-0.8213,  0.3709, -0.2830]],

        [[ 0.8204, -2.0705,  0.1645],
         [ 1.8283,  0.9639, -0.8203],
         [-0.1718,  0.4644, -1.1035]]])

In [43]:
x = x.expand_as(y)
x

tensor([[[-2.2631, -2.2631, -2.2631],
         [-2.2631, -2.2631, -2.2631],
         [-2.2631, -2.2631, -2.2631]],

        [[ 1.2091,  1.2091,  1.2091],
         [ 1.2091,  1.2091,  1.2091],
         [ 1.2091,  1.2091,  1.2091]]])

In [48]:
z = x.expand_as(y).contiguous()

In [49]:
z

tensor([[[ 0.2492,  0.2492,  0.2492],
         [ 0.2492,  0.2492,  0.2492],
         [ 0.2492,  0.2492,  0.2492]],

        [[-0.5608, -0.5608, -0.5608],
         [-0.5608, -0.5608, -0.5608],
         [-0.5608, -0.5608, -0.5608]]])

In [51]:
z.size()

torch.Size([2, 3, 3])

In [21]:
?nn.LSTM