In [2]:
import jieba
import torch
from torch import nn

In [3]:
s1 = "我吃饭了！"
s2 = "今天天气很好！"
s3 = "这辆车很好看！"

In [4]:
jieba.lcut(s3)

Building prefix dict from the default dictionary ...
Loading model from cache /var/folders/mc/t06t7bmj32zflsscnj8kphfwjl80v8/T/jieba.cache
Loading model cost 0.377 seconds.
Prefix dict has been built successfully.


['这辆', '车', '很', '好看', '！']

In [5]:
words = {word for sentence in [s1, s2, s3] for word in jieba.lcut(sentence)}

In [6]:
words.add("<UNK>")

In [7]:
words.add("<PAD>")

In [8]:
words

{'<PAD>', '<UNK>', '了', '今天天气', '吃饭', '好', '好看', '很', '我', '车', '这辆', '！'}

In [9]:
word2idx = {word: idx for idx, word in enumerate(words)}
idx2word = {idx: word for word, idx in word2idx.items()}

In [10]:
word2idx

{'！': 0,
 '了': 1,
 '今天天气': 2,
 '我': 3,
 '这辆': 4,
 '好看': 5,
 '<UNK>': 6,
 '<PAD>': 7,
 '很': 8,
 '车': 9,
 '吃饭': 10,
 '好': 11}

In [11]:
idx2word

{0: '！',
 1: '了',
 2: '今天天气',
 3: '我',
 4: '这辆',
 5: '好看',
 6: '<UNK>',
 7: '<PAD>',
 8: '很',
 9: '车',
 10: '吃饭',
 11: '好'}

In [12]:
s1 = "我吃饭了！"
s2 = "今天天气很好！"
s3 = "这辆车很好看！"

In [13]:
idx1 = [word2idx.get(word, word2idx.get("<UNK>")) for word in jieba.lcut(s1)]
idx1

[3, 10, 1, 0]

In [14]:
idx2 = [word2idx.get(word, word2idx.get("<UNK>")) for word in jieba.lcut(s2)]
idx2

[2, 8, 11, 0]

In [15]:
idx3 = [word2idx.get(word, word2idx.get("<UNK>")) for word in jieba.lcut(s3)]
idx3

[4, 9, 8, 5, 0]

In [16]:
# 补 1 个 pad
idx1 += [word2idx.get("<PAD>")]

In [17]:
idx2 += [word2idx.get("<PAD>")]

In [18]:
idx1

[3, 10, 1, 0, 7]

In [19]:
idx2

[2, 8, 11, 0, 7]

In [20]:
idx3

[4, 9, 8, 5, 0]

In [21]:
[idx1, idx2, idx3]

[[3, 10, 1, 0, 7], [2, 8, 11, 0, 7], [4, 9, 8, 5, 0]]

In [22]:
X = torch.tensor(data=[idx1, idx2, idx3], dtype=torch.long).T

In [23]:
# [seq_len, batch_size]
X.shape

torch.Size([5, 3])

In [24]:
# word embedding
embed = nn.Embedding(num_embeddings=len(word2idx), embedding_dim=6)

In [25]:
len(word2idx)

12

In [26]:
# [3, 5, 12] --> [3, 5, 6]

In [27]:
# [batch_size, seq_len, embedding_dim]
embed(X).shape

torch.Size([5, 3, 6])

In [28]:
# [N, C, H, W]
# [N, Seq_len, Embedding_dim]

In [29]:
nn.RNN

torch.nn.modules.rnn.RNN

$h_t = \tanh(x_t W_{ih}^T + b_{ih} + h_{t-1}W_{hh}^T + b_{hh})
$

In [43]:
rnn = nn.RNN(input_size=6, hidden_size=7, batch_first=False)

In [31]:
X1 = embed(X)

In [32]:
out, hn = rnn(X1)

In [33]:
# 每一步的输出
out.shape

torch.Size([5, 3, 7])

In [34]:
# 最后一步的输出
hn.shape

torch.Size([1, 3, 7])

In [35]:
out[-1, :, :]

tensor([[ 0.4206, -0.8645,  0.1780, -0.9479, -0.5214,  0.2152,  0.2334],
        [ 0.5270, -0.8427,  0.1379, -0.9600, -0.5147,  0.1860,  0.2059],
        [ 0.1640,  0.2288, -0.9124,  0.0523, -0.2982, -0.6744, -0.7821]],
       grad_fn=<SliceBackward0>)

In [36]:
hn

tensor([[[ 0.4206, -0.8645,  0.1780, -0.9479, -0.5214,  0.2152,  0.2334],
         [ 0.5270, -0.8427,  0.1379, -0.9600, -0.5147,  0.1860,  0.2059],
         [ 0.1640,  0.2288, -0.9124,  0.0523, -0.2982, -0.6744, -0.7821]]],
       grad_fn=<StackBackward0>)

In [None]:
nn.RNNCell(input_size=128, hidden_size=256)

In [37]:
class Model(nn.Module):
    def __init__(self, dict_len=5000, embedding_dim=256, n_classes=2):
        super().__init__()
        # 嵌入：词向量
        self.embed = nn.Embedding(num_embeddings=dict_len, 
                                  embedding_dim=embedding_dim)
        # 循环神经网络提取特征
        self.rnn = nn.RNN(input_size=embedding_dim,
                         hidden_size=embedding_dim)
        # 转换输出
        self.out = nn.Linear(in_features=embedding_dim, 
                            out_features=n_classes)
        
    def forward(self, x):
        # [seq_len, batch_size] --> [seq_len, batch_size, embedding_dim]
        x = self.embed(x)
        # out: [seq_len, batch_size, embedding_dim]
        # hn: [1, batch_size, embedding_dim]
        out, hn = self.rnn(x)
        # [1, batch_size, embedding_dim] --> [batch_size, embedding_dim]
        x = torch.squeeze(input=hn, dim=0)
        # [batch_size, embedding_dim] --> [batch_size, n_classes]
        x = self.out(x)
        return x

In [38]:
model = Model(dict_len=5000, embedding_dim=256, n_classes=2)

In [39]:
model

Model(
  (embed): Embedding(5000, 256)
  (rnn): RNN(256, 256)
  (out): Linear(in_features=256, out_features=2, bias=True)
)

In [40]:
X = torch.randint(low=0, high=5000, size=(26, 3), dtype=torch.long)

In [41]:
# [seq_len, batch_size]
X.shape

torch.Size([26, 3])

In [42]:
# [batch_size, n_classes]
model(X).shape

torch.Size([3, 2])

In [46]:
lstm = nn.LSTM(input_size=128, hidden_size=256)

In [49]:
X = torch.randn(13, 2, 128)
c0 = torch.zeros(1, 2, 256, dtype=torch.float32) # 长期状态
h0 = torch.zeros(1, 2, 256, dtype=torch.float32) # 短期状态
# out, (hn, cn) = lstm(X, (c0, h0))

In [50]:
out.shape

torch.Size([13, 2, 256])

In [52]:
X = torch.randn(13, 2, 128)
c0 = torch.zeros(2, 256, dtype=torch.float32) # 长期状态
h0 = torch.zeros(2, 256, dtype=torch.float32) # 短期状态
lstm_cell = nn.LSTMCell(input_size=128, hidden_size=256)
out = []
for x in X:
    h0, c0 = lstm_cell(x, (h0, c0))
    out.append(h0)

In [53]:
hn = h0.unsqueeze(dim=0)
cn = c0.unsqueeze(dim=0)
torch.stack(tensors=out, dim=0)

tensor([[[ 0.1675, -0.0614,  0.0169,  ..., -0.0652,  0.0325, -0.1295],
         [ 0.1012, -0.0077, -0.0210,  ...,  0.0178, -0.0410, -0.0682]],

        [[-0.0122,  0.0945, -0.0944,  ...,  0.0014,  0.0194, -0.0010],
         [ 0.0690, -0.1371, -0.0003,  ...,  0.3057, -0.1202, -0.0566]],

        [[-0.0432,  0.2013, -0.1296,  ...,  0.0198, -0.0545,  0.0972],
         [ 0.0515, -0.0417, -0.0244,  ...,  0.1871, -0.0542, -0.0828]],

        ...,

        [[ 0.1728,  0.0268,  0.0503,  ..., -0.1391, -0.0713, -0.1325],
         [ 0.0502,  0.1082,  0.0370,  ..., -0.0046, -0.0384,  0.0046]],

        [[-0.0275,  0.0179,  0.0947,  ..., -0.0690, -0.1117, -0.0279],
         [ 0.0234,  0.0409, -0.0797,  ..., -0.0004, -0.0634, -0.1005]],

        [[-0.0243, -0.0414,  0.0169,  ...,  0.0033, -0.0872, -0.0816],
         [-0.1455,  0.1672,  0.0130,  ..., -0.0905,  0.1045, -0.0446]]],
       grad_fn=<StackBackward0>)

In [55]:
gru = nn.GRU(input_size=128, hidden_size=256)
X = torch.randn(13, 2, 128)
h0 = torch.randn(1, 2, 256, dtype=torch.float32)
out, hn = gru(X, h0)

(tensor([[[ 0.5625, -0.4521, -0.2904,  ..., -0.4018, -0.0016,  0.6812],
          [-0.8867, -0.3558, -0.6299,  ..., -0.5218,  0.7341, -0.6390]],
 
         [[-0.2211, -0.4959, -0.1156,  ..., -0.2546, -0.1646,  0.2154],
          [-0.6964, -0.3545, -0.3515,  ..., -0.3695,  0.4752,  0.0509]],
 
         [[-0.0044, -0.2836, -0.0336,  ..., -0.4338, -0.0371,  0.2066],
          [-0.6791, -0.1804, -0.4110,  ..., -0.4786,  0.2913, -0.1178]],
 
         ...,
 
         [[ 0.2340, -0.1078,  0.2161,  ..., -0.1712, -0.0820,  0.3054],
          [ 0.3108, -0.0156,  0.1005,  ...,  0.5452,  0.1688,  0.2013]],
 
         [[ 0.2055, -0.0862,  0.3331,  ..., -0.3964,  0.0330,  0.1954],
          [ 0.3720, -0.2097,  0.0400,  ...,  0.5283,  0.1772,  0.3714]],
 
         [[ 0.1953,  0.0301,  0.1913,  ..., -0.0159, -0.0060,  0.2923],
          [ 0.6132, -0.0329,  0.0219,  ...,  0.3170,  0.2292,  0.2638]]],
        grad_fn=<StackBackward0>),
 tensor([[[ 1.9533e-01,  3.0062e-02,  1.9127e-01, -3.2366e-02, -8.47