## LSTM by Hand

通过自定义层实现LSTM，学习自Tensorflow Codelab线下活动(20201114)  
分享内容参考      https://zhuanlan.zhihu.com/p/293208563  
自定义LSTM层来源: https://www.bilibili.com/video/BV1FV41117Uz/  
  
  
[LSTM简介](https://zh.wikipedia.org/wiki/%E9%95%B7%E7%9F%AD%E6%9C%9F%E8%A8%98%E6%86%B6)

In [1]:
import tensorflow as tf
import numpy as np
import jieba
import pandas as pd

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.options.display.max_columns = None
pd.options.display.max_colwidth = 80
pd.options.display.precision = 4
pd.options.display.max_rows = 999
pd.options.display.float_format = '{:.4f}'.format  # 防止科学计数法，小数显示4位


physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [2]:
jieba.enable_paddle()

Paddle enabled successfully......


In [3]:
batch_size = 4
sequence_length = 5
input_size = 30
output_size = 20

x = tf.random.uniform((batch_size, sequence_length, input_size))

In [4]:
x.shape

TensorShape([4, 5, 30])

In [5]:
# LSTM's input: [batch_size, sequence_length, input_size]
# LSTM's output1: [batch_size, sequence_length, input_size]
#        output2: [batch_size, input_size]


In [6]:
xt = x[:, 0, :]

In [7]:
xt.shape

TensorShape([4, 30])

### 按照LSTM的公式写出计算过程

**方程**  
$ f_t = \sigma_g(W_f x_t + U_f h_{t-1} + b_f) $  
$ i_t = \sigma_g(W_i x_t + U_i h_{t-1} + b_i) $  
$ o_t = \sigma_g(W_o x_t + U_o h_{t-1} + b_o) $  
$ c_t = f_t \circ c_{t-1} + i_t \circ \sigma_c(W_c x_t + U_c h_{t-1} + b_c) $  
$ h_t = o_t \circ \sigma_h(c_t) $

**变量**

- $ x_t \in \mathbb{R}^d $: LSTM的input(输入)
- $ f_t \in \mathbb{R}^d $: forget gate(遗忘阀)
- $ i_t \in \mathbb{R}^d $: input gate(输入阀)
- $ o_t \in \mathbb{R}^d $: output gate(输出阀)
- $ c_t \in \mathbb{R}^d $: cell state(单元状态)
- $ h_t \in \mathbb{R}^d $: hidden state(隐藏状态)
- $ W \in \mathbb{R}^{h\times{d}}、U \in \mathbb{R}^{h\times{h}}、b \in \mathbb{R}^{h} $: 训练中的矩阵，网络学习计算元值


**激活函数**

- $ \sigma_g $: sigmoid function  
- $ \sigma_c $: hyperbolic tangent function  
- $ \sigma_h $: hyperbolic tangent function or, as the peephole LSTM paper suggests, $ \sigma_h(x) = x $

In [8]:
wf = tf.random.uniform((input_size, output_size))
wi = tf.random.uniform((input_size, output_size))
wo = tf.random.uniform((input_size, output_size))
wc = tf.random.uniform((input_size, output_size))

uf = tf.random.uniform((output_size, output_size))
ui = tf.random.uniform((output_size, output_size))
uo = tf.random.uniform((output_size, output_size))
uc = tf.random.uniform((output_size, output_size))

bf = tf.random.uniform((1, output_size))
bi = tf.random.uniform((1, output_size))
bo = tf.random.uniform((1, output_size))
bc = tf.random.uniform((1, output_size))



In [9]:
sequence_outputs = []
for i in range(sequence_length):

    if i == 0:
        xt = x[:, 0, :]
        ft = tf.sigmoid(tf.matmul(xt, wf) + bf)
        it = tf.sigmoid(tf.matmul(xt, wi) + bi)
        ot = tf.sigmoid(tf.matmul(xt, wo) + bo)
        cht = tf.tanh(tf.matmul(xt, wc) + bc)

        ct = it * cht
        ht = ot * tf.tanh(ct)
    
    else:
        xt = x[:, 0, :]
        ft = tf.sigmoid(tf.matmul(xt, wf) + bf)
        it = tf.sigmoid(tf.matmul(xt, wi) + bi)
        ot = tf.sigmoid(tf.matmul(xt, wo) + bo)
        cht = tf.tanh(tf.matmul(xt, wc) + bc)

        ct = ft * ct + it * cht
        ht = ot * tf.tanh(ct)
    
    sequence_outputs.append(ht)

In [10]:
sequence_outputs = tf.stack(sequence_outputs)
sequence_outputs = tf.transpose(sequence_outputs, (1, 0, 2))

In [11]:
sequence_outputs

<tf.Tensor: shape=(4, 5, 20), dtype=float32, numpy=
array([[[0.7599528 , 0.761233  , 0.7612163 , 0.7610016 , 0.7612512 ,
         0.76144326, 0.7611025 , 0.761134  , 0.7609633 , 0.761372  ,
         0.76106924, 0.7606706 , 0.761347  , 0.7611531 , 0.76136607,
         0.7607359 , 0.76124275, 0.76126325, 0.76094896, 0.76038885],
        [0.9621517 , 0.9635794 , 0.9637104 , 0.9633134 , 0.96346885,
         0.96386755, 0.96364105, 0.9637293 , 0.963295  , 0.96385753,
         0.96358603, 0.96343404, 0.96374685, 0.9635008 , 0.96385294,
         0.9632597 , 0.9637009 , 0.9636283 , 0.9633781 , 0.962844  ],
        [0.99319154, 0.9946173 , 0.9947929 , 0.9943322 , 0.9945647 ,
         0.99492985, 0.9947289 , 0.99487346, 0.9943656 , 0.9949384 ,
         0.99467885, 0.99468267, 0.9948186 , 0.99454176, 0.994932  ,
         0.9944886 , 0.9947662 , 0.99465144, 0.994463  , 0.9939454 ],
        [0.9974759 , 0.9988997 , 0.99908346, 0.99860746, 0.99887973,
         0.99921834, 0.9990179 , 0.9991819 , 0.9

### 利用LSTM计算过程创建自定义LSTM层

In [12]:
class CustomLSTM(tf.keras.layers.Layer):
    
    """
    LSTM's input: [batch_size, sequence_length, input_size]
    LSTM's output1: [batch_size, sequence_length, input_size]
           output2: [batch_size, input_size]
    """
    
    def __init__(self, output_size, return_sequence=False):
        super(CustomLSTM, self).__init__()
        self.output_size = output_size
        self.return_sequence = return_sequence
    
    def build(self, input_shape):
        super(CustomLSTM, self).build(input_shape)
        input_size = int(input_shape[-1])
        
        self.wf = self.add_weight('wf', shape=(input_size, self.output_size))
        self.wi = self.add_weight('wi', shape=(input_size, self.output_size))
        self.wo = self.add_weight('wo', shape=(input_size, self.output_size))
        self.wc = self.add_weight('wc', shape=(input_size, self.output_size))

        self.uf = self.add_weight('uf', shape=(self.output_size, self.output_size))
        self.ui = self.add_weight('ui', shape=(self.output_size, self.output_size))
        self.uo = self.add_weight('uo', shape=(self.output_size, self.output_size))
        self.uc = self.add_weight('uc', shape=(self.output_size, self.output_size))

        self.bf = self.add_weight('bf', shape=(1, self.output_size))
        self.bi = self.add_weight('bi', shape=(1, self.output_size))
        self.bo = self.add_weight('bo', shape=(1, self.output_size))
        self.bc = self.add_weight('bc', shape=(1, self.output_size))

    def call(self, x):
        sequence_outputs = []
        for i in range(sequence_length):
            if i == 0:
                xt  = x[:, 0, :]
                ft  = tf.sigmoid(tf.matmul(xt, self.wf) + self.bf)
                it  = tf.sigmoid(tf.matmul(xt, self.wi) + self.bi)
                ot  = tf.sigmoid(tf.matmul(xt, self.wo) + self.bo)
                cht = tf.tanh(   tf.matmul(xt, self.wc) + self.bc)
                ct  = it * cht
                ht  = ot * tf.tanh(ct)

            else:
                xt  = x[:, 0, :]
                ft  = tf.sigmoid(tf.matmul(xt, self.wf) + self.bf)
                it  = tf.sigmoid(tf.matmul(xt, self.wi) + self.bi)
                ot  = tf.sigmoid(tf.matmul(xt, self.wo) + self.bo)
                cht = tf.tanh(  tf.matmul(xt, self.wc) + self.bc)
                ct  = ft * ct + it * cht
                ht  = ot * tf.tanh(ct)
                
            sequence_outputs.append(ht)
            
        sequence_outputs = tf.stack(sequence_outputs)
        sequence_outputs = tf.transpose(sequence_outputs, (1, 0, 2))
        if self.return_sequence:
            return sequence_outputs
        return sequence_outputs[:, -1, :]

### 模拟数据观察自定义LSTM层的输出结果

In [13]:
x = tf.random.uniform((batch_size, sequence_length, input_size))

In [14]:
lstm = CustomLSTM(output_size=output_size)

In [15]:
lstm(x)

<tf.Tensor: shape=(4, 20), dtype=float32, numpy=
array([[ 0.14196199, -0.27651674,  0.1837907 , -0.0373537 , -0.09249178,
        -0.1844426 ,  0.00699177,  0.20219146,  0.43124267,  0.25954372,
         0.02460214,  0.16653983,  0.49788246, -0.00882451,  0.15867671,
         0.20885237,  0.3694257 , -0.45187405,  0.02587388, -0.14198577],
       [ 0.15445489, -0.3206394 , -0.05431183, -0.22153592,  0.14122206,
        -0.11209695,  0.01091626, -0.00911266,  0.4667048 ,  0.20028926,
         0.12510265,  0.11349513,  0.44742316, -0.11969211, -0.01274685,
         0.2578714 ,  0.2570484 , -0.05326088,  0.07690178, -0.1782843 ],
       [ 0.09163525, -0.20344685, -0.23980032, -0.2547594 , -0.02468296,
        -0.05470318, -0.00934831,  0.1942564 ,  0.41320494,  0.2855691 ,
         0.04259385,  0.10165824,  0.6054762 , -0.3918743 ,  0.05062461,
         0.30940577,  0.24984838, -0.14017284,  0.1513982 , -0.17503503],
       [ 0.1671571 , -0.37962854,  0.47003105, -0.0492636 ,  0.07206415,

### 使用自定义的LSTM层使用随机数据进行训练

In [16]:
model = tf.keras.Sequential([
    CustomLSTM(output_size=32), 
    tf.keras.layers.Dense(2, activation='softmax')
])

model.compile(
    loss = tf.keras.losses.SparseCategoricalCrossentropy(), 
    optimizer = tf.keras.optimizers.Adam()
)

In [17]:
x_batch = tf.random.uniform((batch_size, sequence_length, input_size))
y_batch = tf.random.uniform((batch_size,), maxval=2, dtype=tf.int32)

In [18]:
x_batch.shape

TensorShape([4, 5, 30])

In [19]:
y_batch.shape

TensorShape([4])

In [20]:
y_batch

<tf.Tensor: shape=(4,), dtype=int32, numpy=array([1, 1, 0, 1], dtype=int32)>

In [21]:
model.train_on_batch(x_batch, y_batch)



0.8380963802337646

In [22]:
x_data = tf.random.uniform((batch_size * 1000, sequence_length, input_size))
y_data = tf.random.uniform((batch_size * 1000,), maxval=2, dtype=tf.int32)

In [23]:
model.fit(x_data, y_data, batch_size=4)



<tensorflow.python.keras.callbacks.History at 0x7f83443852d0>

In [24]:
model.fit(x_data, y_data, batch_size=4)



<tensorflow.python.keras.callbacks.History at 0x7f8344364c50>

In [25]:
model.fit(x_data, y_data, batch_size=4)



<tensorflow.python.keras.callbacks.History at 0x7f8344329bd0>

### 使用自定义LSTM层对文本数据集进行实战

In [26]:
from zh_dataset_inews import title_train, label_train, content_train, title_test, label_test, content_test

数据集zh_dataset_inews是一份新闻数据集，有标题，正文，情感倾向(0-中性, 1-积极, 2-消极)  
对它进行预览。先使用tittle进行情感分类  

In [27]:
len(title_train)

5355

In [28]:
for x, y in zip(title_train[:10], label_train[:10]):
    print(x, y)

周六晚到卖场听夜场摇滚 1
北京老教授泄露，持有山河药辅节后下跌公告，速速看看！！！ 1
张滩镇积极开展基干民兵训练活动 0
俩小伙无证骑摩托，未成年还试图闯卡！ 2
不好意思，你不配做深圳人!_搜狐汽车_搜狐网 2
蔡英文元旦升旗遇抗议 民众：枪毙蔡英文 2
巢湖市绞吸机械清淤公司重在回访-照明器材项目合作–光波网 1
出租屋半年被偷8次：整栋楼共用一个锁芯 2
从林芝到拉萨，还可以这样玩! 1
为何说奇瑞是技术达人? 看了“雄狮”你就懂了 1


In [29]:
title_train_cut = [' '.join(jieba.cut(x, cut_all=False)) for x in title_train]
title_test_cut  = [' '.join(jieba.cut(x, cut_all=False)) for x in title_test]

Building prefix dict from the default dictionary ...
Loading model from cache /tmp/jieba.cache
Loading model cost 0.911 seconds.
Prefix dict has been built successfully.


In [30]:
len(title_train_cut)

5355

In [31]:
title_train_cut[:10]

['周六 晚到 卖场 听 夜场 摇滚',
 '北京 老 教授 泄露 ， 持有 山河 药辅 节后 下跌 公告 ， 速速 看看 ！ ！ ！',
 '张滩 镇 积极开展 基干民兵 训练 活动',
 '俩 小伙 无证 骑 摩托 ， 未成年 还 试图 闯卡 ！',
 '不好意思 ， 你 不配 做 深圳 人 ! _ 搜狐 汽车 _ 搜狐网',
 '蔡 英文 元旦 升旗 遇 抗议   民众 ： 枪毙 蔡 英文',
 '巢湖市 绞吸 机械 清淤 公司 重在 回访 - 照明 器材 项目 合作 – 光波 网',
 '出租屋 半年 被 偷 8 次 ： 整栋 楼 共用 一个 锁 芯',
 '从 林芝 到 拉萨 ， 还 可以 这样 玩 !',
 '为何 说 奇瑞 是 技术 达 人 ?   看 了 “ 雄狮 ” 你 就 懂 了']

In [32]:
text_vector = tf.keras.layers.experimental.preprocessing.TextVectorization()
# 学习词表
text_vector.adapt(title_train_cut)


In [33]:
vocab_size = len(text_vector.get_vocabulary())
embedding_dim = 128

In [34]:
type(x_data)

tensorflow.python.framework.ops.EagerTensor

通过 text_vector('你 好') 和  text_vector('你好')对比发现，这里没有进行分词   

In [35]:
text_vector('你 好')

<tf.Tensor: shape=(2,), dtype=int64, numpy=array([18, 98])>

In [36]:
text_vector('你好')

<tf.Tensor: shape=(1,), dtype=int64, numpy=array([2896])>

In [37]:
title_train_text_vector = text_vector(title_train_cut) # [text_vector(x) for x in title_train_cut]
title_test_text_vector  = text_vector(title_test_cut) # [text_vector(x) for x in title_test_cut]


In [38]:
test_input_dataset = tf.data.Dataset.from_tensor_slices(title_train_text_vector)

In [39]:
title_train_text_vector[:10].shape

TensorShape([10, 44])

In [40]:
x_train = tf.convert_to_tensor(title_train_text_vector)
x_test  = tf.convert_to_tensor(title_test_text_vector)

In [41]:
type(x_train)

tensorflow.python.framework.ops.EagerTensor

In [42]:
y_train = tf.convert_to_tensor(label_train)
y_test  = tf.convert_to_tensor(label_test)

In [43]:
test_embedding_layer = tf.keras.layers.Embedding(vocab_size, embedding_dim)

In [44]:
x_train.shape

TensorShape([5355, 44])

In [45]:
x_train_embedding = test_embedding_layer(x_train)

In [46]:
x_train_embedding.shape

TensorShape([5355, 44, 128])

In [47]:
model_text = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim),
    CustomLSTM(output_size=32), 
    tf.keras.layers.Dense(3, activation='softmax')
])

model_text.compile(
    loss = tf.keras.losses.SparseCategoricalCrossentropy(), 
    optimizer = tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)

In [48]:
x_train.shape

TensorShape([5355, 44])

In [49]:
y_train.shape

TensorShape([5355])

In [50]:
model_text.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 128)         2328064   
_________________________________________________________________
custom_lstm_2 (CustomLSTM)   (None, 32)                20608     
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 99        
Total params: 2,348,771
Trainable params: 2,348,771
Non-trainable params: 0
_________________________________________________________________


In [51]:
history_model_text = model_text.fit(
    x_train, y_train, 
    validation_split=0.1, 
    epochs=20
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [52]:
model_text_after_embedding = tf.keras.Sequential([
    CustomLSTM(output_size=32), 
    tf.keras.layers.Dense(3, activation='softmax')
])

model_text_after_embedding.compile(
    loss = tf.keras.losses.SparseCategoricalCrossentropy(), 
    optimizer = tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)

In [53]:
history_model_text_after_embedding = model_text_after_embedding.fit(
    x_train_embedding, y_train, 
    validation_split=0.1, 
    epochs=20,
    batch_size=128
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [54]:
x_train_embedding_batch = x_train_embedding[:4, :, :]
x_train_embedding_batch.shape

TensorShape([4, 44, 128])

In [55]:
y_train_embedding_batch = y_train[:4]
y_train_embedding_batch.shape

TensorShape([4])

In [56]:
model_text_after_embedding.train_on_batch(x_train_embedding_batch, y_train_embedding_batch)



[0.9775608777999878, 0.48148149251937866]

In [57]:
x_train_embedding.shape

TensorShape([5355, 44, 128])

In [58]:
model_text_after_embedding.fit(x_train_embedding, y_train, batch_size = 10)



<tensorflow.python.keras.callbacks.History at 0x7f82e45de0d0>

In [59]:
x_train_embedding.shape

TensorShape([5355, 44, 128])

In [60]:
model_text.evaluate(x_test, y_test)



[1.3021190166473389, 0.5555555820465088]

In [61]:
y_test_pred = model_text.predict(x_test)

In [62]:
len(y_test_pred.argmax(axis=1))

999

In [63]:
len(x_test)

999

In [64]:
output_check = pd.DataFrame({'title_test': title_test, 'label_test': label_test, 'y_test_pred': y_test_pred.argmax(axis=1)})

In [65]:
output_check

Unnamed: 0,title_test,label_test,y_test_pred
0,寂静之地torrent迅雷资源下载(中英双语字幕)高清版,1,1
1,大巴车与汽车在雪天相撞 汽车前部受损严重,2,1
2,乌什县燃气壁挂炉十大排名介绍-土拨鼠装修网,1,1
3,朝阳法院司法拍卖拍出26套房产,1,2
4,诵读英雄事迹,0,1
5,马兰屯镇政府孔祥全贪污腐败,2,2
6,惊心!2岁女童掉进厦门火车站站台，火车即将发车...,2,2
7,精准扶贫“拔穷根”,0,0
8,非法收购滥伐林木也构成犯罪 - 青海普法网,2,2
9,妻子去世，男子携女离家出走，失联三天后发现车辆冻在冰湖下方,2,2


In [66]:
output_check.query('label_test != y_test_pred')

Unnamed: 0,title_test,label_test,y_test_pred
1,大巴车与汽车在雪天相撞 汽车前部受损严重,2,1
3,朝阳法院司法拍卖拍出26套房产,1,2
4,诵读英雄事迹,0,1
10,“清华名师”只有小学文化 看这些人如何给给苏州家长“上套”,2,1
16,威尼斯人演出:农业部部长谈转基因食品:食用不食用 由消费者选择,1,2
23,好险！行驶中的汽车后备箱，突然掉出一个小男孩！,2,1
25,他惨遭割喉焚烧 器官衰竭 警方介入调查 竟是模仿玄幻小说？,2,1
33,【独家视频】男子开车走了神 斑马线上酿事故,2,1
35,民警李嘉顺：八年八获嘉奖，成功调解纠纷5000余起,0,1
36,喜迎十九大 忠诚保平安,0,1


In [67]:
print(1)

1


### 在model中加入textVectorization层,使其直接输入文本

#### 先定义一个分词层 

据了解，tf.keras里文本的预处理，需要先分好词。所以这里分两步走  

- 分好词的文本可以直接作为输入源，不需要自定义层应该可以实现
- 自定义一个预处理层，带上分词功能  


In [None]:
class CustomLSTM(tf.keras.layers.Layer):
    
    """
    LSTM's input: [batch_size, sequence_length, input_size]
    LSTM's output1: [batch_size, sequence_length, input_size]
           output2: [batch_size, input_size]
    """
    
    def __init__(self, output_size, return_sequence=False):
        super(CustomLSTM, self).__init__()
        self.output_size = output_size
        self.return_sequence = return_sequence
    
    def build(self, input_shape):
        super(CustomLSTM, self).build(input_shape)
        input_size = int(input_shape[-1])
        
        self.wf = self.add_weight('wf', shape=(input_size, self.output_size))
        self.wi = self.add_weight('wi', shape=(input_size, self.output_size))
        self.wo = self.add_weight('wo', shape=(input_size, self.output_size))
        self.wc = self.add_weight('wc', shape=(input_size, self.output_size))

        self.uf = self.add_weight('uf', shape=(self.output_size, self.output_size))
        self.ui = self.add_weight('ui', shape=(self.output_size, self.output_size))
        self.uo = self.add_weight('uo', shape=(self.output_size, self.output_size))
        self.uc = self.add_weight('uc', shape=(self.output_size, self.output_size))

        self.bf = self.add_weight('bf', shape=(1, self.output_size))
        self.bi = self.add_weight('bi', shape=(1, self.output_size))
        self.bo = self.add_weight('bo', shape=(1, self.output_size))
        self.bc = self.add_weight('bc', shape=(1, self.output_size))

    def call(self, x):
        sequence_outputs = []
        for i in range(sequence_length):
            if i == 0:
                xt  = x[:, 0, :]
                ft  = tf.sigmoid(tf.matmul(xt, self.wf) + self.bf)
                it  = tf.sigmoid(tf.matmul(xt, self.wi) + self.bi)
                ot  = tf.sigmoid(tf.matmul(xt, self.wo) + self.bo)
                cht = tf.tanh(   tf.matmul(xt, self.wc) + self.bc)
                ct  = it * cht
                ht  = ot * tf.tanh(ct)

            else:
                xt  = x[:, 0, :]
                ft  = tf.sigmoid(tf.matmul(xt, self.wf) + self.bf)
                it  = tf.sigmoid(tf.matmul(xt, self.wi) + self.bi)
                ot  = tf.sigmoid(tf.matmul(xt, self.wo) + self.bo)
                cht = tf.tanh(  tf.matmul(xt, self.wc) + self.bc)
                ct  = ft * ct + it * cht
                ht  = ot * tf.tanh(ct)
                
            sequence_outputs.append(ht)
            
        sequence_outputs = tf.stack(sequence_outputs)
        sequence_outputs = tf.transpose(sequence_outputs, (1, 0, 2))
        if self.return_sequence:
            return sequence_outputs
        return sequence_outputs[:, -1, :]

In [69]:
# 定义一个分词层 

class CustomTextVector(tf.keras.layers.Layer):
    
    """
    输入文本数据，分词，然后转成 Embedding层 
    """
    
    def __init__(self, output_size, vocab_size=None, return_sequence=False):
        super(CustomTextVector, self).__init__()
        self.output_size = output_size
        self.vocab_size = vocab_size
        self.return_sequence = return_sequence
    
    def build(self, input_shape):
        super(CustomTextVector, self).build(input_shape)
        input_size = int(input_shape[-1])
        
        self.wf = self.add_weight('wf', shape=(input_size, self.output_size))
        self.wi = self.add_weight('wi', shape=(input_size, self.output_size))
        self.wo = self.add_weight('wo', shape=(input_size, self.output_size))
        self.wc = self.add_weight('wc', shape=(input_size, self.output_size))

        self.uf = self.add_weight('uf', shape=(self.output_size, self.output_size))
        self.ui = self.add_weight('ui', shape=(self.output_size, self.output_size))
        self.uo = self.add_weight('uo', shape=(self.output_size, self.output_size))
        self.uc = self.add_weight('uc', shape=(self.output_size, self.output_size))

        self.bf = self.add_weight('bf', shape=(1, self.output_size))
        self.bi = self.add_weight('bi', shape=(1, self.output_size))
        self.bo = self.add_weight('bo', shape=(1, self.output_size))
        self.bc = self.add_weight('bc', shape=(1, self.output_size))

    def call(self, x):
        sequence_outputs = []
        for i in range(sequence_length):
            if i == 0:
                xt  = x[:, 0, :]
                ft  = tf.sigmoid(tf.matmul(xt, self.wf) + self.bf)
                it  = tf.sigmoid(tf.matmul(xt, self.wi) + self.bi)
                ot  = tf.sigmoid(tf.matmul(xt, self.wo) + self.bo)
                cht = tf.tanh(   tf.matmul(xt, self.wc) + self.bc)
                ct  = it * cht
                ht  = ot * tf.tanh(ct)

            else:
                xt  = x[:, 0, :]
                ft  = tf.sigmoid(tf.matmul(xt, self.wf) + self.bf)
                it  = tf.sigmoid(tf.matmul(xt, self.wi) + self.bi)
                ot  = tf.sigmoid(tf.matmul(xt, self.wo) + self.bo)
                cht = tf.tanh(  tf.matmul(xt, self.wc) + self.bc)
                ct  = ft * ct + it * cht
                ht  = ot * tf.tanh(ct)
                
            sequence_outputs.append(ht)
            
        sequence_outputs = tf.stack(sequence_outputs)
        sequence_outputs = tf.transpose(sequence_outputs, (1, 0, 2))
        if self.return_sequence:
            return sequence_outputs
        return sequence_outputs[:, -1, :]

In [70]:
model_direct_text = tf.keras.Sequential(
    tf.keras.layers.Embedding(vocab_size, embedding_dim),
    CustomLSTM(output_size=32), 
    tf.keras.layers.Dense(3, activation='softmax')
])

model_direct_text.compile(
    loss = tf.keras.losses.SparseCategoricalCrossentropy(), 
    optimizer = tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)