## LSTM by Hand

来源: https://www.bilibili.com/video/BV1FV41117Uz/

In [1]:
import tensorflow as tf
import numpy as np
import jieba

In [2]:
jieba.enable_paddle()

Paddle enabled successfully......


In [3]:
batch_size = 4
sequence_length = 5
input_size = 30
output_size = 20

x = tf.random.uniform((batch_size, sequence_length, input_size))

In [4]:
x.shape

TensorShape([4, 5, 30])

In [5]:
# LSTM's input: [batch_size, sequence_length, input_size]
# LSTM's output1: [batch_size, sequence_length, input_size]
#        output2: [batch_size, input_size]


In [6]:
xt = x[:, 0, :]

In [7]:
xt.shape

TensorShape([4, 30])

In [8]:
wf = tf.random.uniform((input_size, output_size))
wi = tf.random.uniform((input_size, output_size))
wo = tf.random.uniform((input_size, output_size))
wc = tf.random.uniform((input_size, output_size))

uf = tf.random.uniform((output_size, output_size))
ui = tf.random.uniform((output_size, output_size))
uo = tf.random.uniform((output_size, output_size))
uc = tf.random.uniform((output_size, output_size))

bf = tf.random.uniform((1, output_size))
bi = tf.random.uniform((1, output_size))
bo = tf.random.uniform((1, output_size))
bc = tf.random.uniform((1, output_size))



In [9]:
sequence_outputs = []
for i in range(sequence_length):

    if i == 0:
        xt = x[:, 0, :]
        ft = tf.sigmoid(tf.matmul(xt, wf) + bf)
        it = tf.sigmoid(tf.matmul(xt, wi) + bi)
        ot = tf.sigmoid(tf.matmul(xt, wo) + bo)
        cht = tf.tanh(tf.matmul(xt, wc) + bc)

        ct = it * cht
        ht = ot * tf.tanh(ct)
    
    else:
        xt = x[:, 0, :]
        ft = tf.sigmoid(tf.matmul(xt, wf) + bf)
        it = tf.sigmoid(tf.matmul(xt, wi) + bi)
        ot = tf.sigmoid(tf.matmul(xt, wo) + bo)
        cht = tf.tanh(tf.matmul(xt, wc) + bc)

        ct = ft * ct + it * cht
        ht = ot * tf.tanh(ct)
    
    sequence_outputs.append(ht)

In [10]:
sequence_outputs = tf.stack(sequence_outputs)
sequence_outputs = tf.transpose(sequence_outputs, (1, 0, 2))

In [11]:
sequence_outputs

<tf.Tensor: shape=(4, 5, 20), dtype=float32, numpy=
array([[[0.75968313, 0.7612307 , 0.7609331 , 0.7608878 , 0.76081806,
         0.7610073 , 0.760796  , 0.76098645, 0.76131004, 0.7606621 ,
         0.76035756, 0.76084065, 0.7612073 , 0.7608161 , 0.760411  ,
         0.7610628 , 0.7610523 , 0.7607056 , 0.7611117 , 0.76004195],
        [0.9616596 , 0.9637002 , 0.9633146 , 0.9633101 , 0.963115  ,
         0.96330076, 0.963086  , 0.96334845, 0.9634829 , 0.96290797,
         0.96283597, 0.9633642 , 0.9636256 , 0.9633425 , 0.96293634,
         0.9635948 , 0.96340144, 0.9630325 , 0.96349764, 0.9626773 ],
        [0.9926386 , 0.99477166, 0.9944598 , 0.99443686, 0.9941524 ,
         0.9944042 , 0.99414885, 0.9944644 , 0.9947988 , 0.99392563,
         0.99400395, 0.9945192 , 0.99468374, 0.99447703, 0.99409807,
         0.9947011 , 0.9945429 , 0.99406916, 0.9945493 , 0.9938628 ],
        [0.9969111 , 0.99905956, 0.9987818 , 0.9987481 , 0.99843496,
         0.9987157 , 0.9984424 , 0.9987761 , 0.9

In [12]:
class CustomLSTM(tf.keras.layers.Layer):
    
    """
    LSTM's input: [batch_size, sequence_length, input_size]
    LSTM's output1: [batch_size, sequence_length, input_size]
           output2: [batch_size, input_size]
    """
    
    def __init__(self, output_size, return_sequence=False):
        super(CustomLSTM, self).__init__()
        self.output_size = output_size
        self.return_sequence = return_sequence
    
    def build(self, input_shape):
        super(CustomLSTM, self).build(input_shape)
        input_size = int(input_shape[-1])
        
        self.wf = self.add_weight('wf', shape=(input_size, self.output_size))
        self.wi = self.add_weight('wi', shape=(input_size, self.output_size))
        self.wo = self.add_weight('wo', shape=(input_size, self.output_size))
        self.wc = self.add_weight('wc', shape=(input_size, self.output_size))

        self.uf = self.add_weight('uf', shape=(self.output_size, self.output_size))
        self.ui = self.add_weight('ui', shape=(self.output_size, self.output_size))
        self.uo = self.add_weight('uo', shape=(self.output_size, self.output_size))
        self.uc = self.add_weight('uc', shape=(self.output_size, self.output_size))

        self.bf = self.add_weight('bf', shape=(1, self.output_size))
        self.bi = self.add_weight('bi', shape=(1, self.output_size))
        self.bo = self.add_weight('bo', shape=(1, self.output_size))
        self.bc = self.add_weight('bc', shape=(1, self.output_size))

    def call(self, x):
        sequence_outputs = []
        for i in range(sequence_length):
            if i == 0:
                xt  = x[:, 0, :]
                ft  = tf.sigmoid(tf.matmul(xt, self.wf) + self.bf)
                it  = tf.sigmoid(tf.matmul(xt, self.wi) + self.bi)
                ot  = tf.sigmoid(tf.matmul(xt, self.wo) + self.bo)
                cht = tf.tanh(   tf.matmul(xt, self.wc) + self.bc)
                ct  = it * cht
                ht  = ot * tf.tanh(ct)

            else:
                xt  = x[:, 0, :]
                ft  = tf.sigmoid(tf.matmul(xt, self.wf) + self.bf)
                it  = tf.sigmoid(tf.matmul(xt, self.wi) + self.bi)
                ot  = tf.sigmoid(tf.matmul(xt, self.wo) + self.bo)
                cht = tf.tanh(  tf.matmul(xt, self.wc) + self.bc)
                ct  = ft * ct + it * cht
                ht  = ot * tf.tanh(ct)
                
            sequence_outputs.append(ht)
            
        sequence_outputs = tf.stack(sequence_outputs)
        sequence_outputs = tf.transpose(sequence_outputs, (1, 0, 2))
        if self.return_sequence:
            return sequence_outputs
        return sequence_outputs[:, -1, :]

In [13]:
x = tf.random.uniform((batch_size, sequence_length, input_size))

In [14]:
lstm = CustomLSTM(output_size=output_size)

In [15]:
lstm(x)

<tf.Tensor: shape=(4, 20), dtype=float32, numpy=
array([[-0.02465984,  0.24337918, -0.01038667,  0.18872677,  0.25175014,
         0.21659058, -0.3161729 , -0.02962111, -0.11237445,  0.52755046,
        -0.51885635, -0.24678676,  0.26010892, -0.19956557,  0.44766775,
         0.07805736,  0.56003   ,  0.17517935,  0.12646793, -0.17871073],
       [ 0.08170585,  0.34328038, -0.2221587 ,  0.1857989 ,  0.34343675,
         0.22039437, -0.13987061, -0.03365904, -0.04686617,  0.30995008,
        -0.48257637, -0.21904732,  0.21090023, -0.19042602,  0.5343647 ,
         0.19197959,  0.4998687 ,  0.06068008, -0.01904575, -0.0856628 ],
       [-0.20344037,  0.14708847, -0.01317312,  0.18910876,  0.2624301 ,
         0.20066454, -0.25570446,  0.04223626, -0.0492571 ,  0.4126082 ,
        -0.5265868 , -0.07792316,  0.16856897, -0.19315873,  0.61615115,
        -0.19891731,  0.55206233,  0.19993803,  0.20309655, -0.0655907 ],
       [ 0.03935883,  0.31329268, -0.13026157,  0.14405465,  0.4169662 ,

In [16]:
model = tf.keras.Sequential([
    CustomLSTM(output_size=32), 
    tf.keras.layers.Dense(2, activation='softmax')
])

model.compile(
    loss = tf.keras.losses.SparseCategoricalCrossentropy(), 
    optimizer = tf.keras.optimizers.Adam()
)

In [17]:
model.summary()

ValueError: This model has not yet been built. Build the model first by calling `build()` or calling `fit()` with some data, or specify an `input_shape` argument in the first layer(s) for automatic build.

In [None]:
x_batch = tf.random.uniform((batch_size, sequence_length, input_size))
y_batch = tf.random.uniform((batch_size,), maxval=2, dtype=tf.int32)

In [None]:
x_batch.shape

In [None]:
y_batch.shape

In [None]:
y_batch

In [None]:
model.train_on_batch(x_batch, y_batch)

In [None]:
x_data = tf.random.uniform((batch_size * 1000, sequence_length, input_size))
y_data = tf.random.uniform((batch_size * 1000,), maxval=2, dtype=tf.int32)

In [None]:
model.fit(x_data, y_data, batch_size=4)

In [None]:
model.fit(x_data, y_data, batch_size=4)

In [None]:
model.fit(x_data, y_data, batch_size=4)

In [None]:
from zh_dataset_inews import title_train, label_train, content_train, title_test, label_test, content_test

In [None]:
title_train_cut = [' '.join(jieba.cut(x, cut_all=False)) for x in title_train]
title_test_cut  = [' '.join(jieba.cut(x, cut_all=False)) for x in title_test]

In [None]:
len(title_train_cut)

In [None]:
title_train_cut[:10]

In [None]:
text_vector = tf.keras.layers.experimental.preprocessing.TextVectorization()
# 学习词表
text_vector.adapt(title_train_cut)


In [None]:
type(x_data)

通过 text_vector('你 好') 和  text_vector('你好')对比发现，这里没有进行分词   

In [None]:
text_vector('你 好')

In [None]:
text_vector('你好')

In [None]:
title_train_text_vector = text_vector(title_train_cut) # [text_vector(x) for x in title_train_cut]
title_test_text_vector  = text_vector(title_test_cut) # [text_vector(x) for x in title_test_cut]


In [None]:
title_train_text_vector[:10].shape

In [None]:
x_train = tf.convert_to_tensor(title_train_text_vector)
x_test  = tf.convert_to_tensor(title_test_text_vector)

In [None]:
type(x_train)

In [None]:
y_train = tf.convert_to_tensor(label_train)
y_test  = tf.convert_to_tensor(label_test)

In [None]:
model_text = tf.keras.Sequential([
    CustomLSTM(output_size=32), 
    tf.keras.layers.Dense(2, activation='softmax')
])

model_text.compile(
    loss = tf.keras.losses.SparseCategoricalCrossentropy(), 
    optimizer = tf.keras.optimizers.Adam()
)

In [None]:
x_train.shape

In [None]:
y_train.shape

In [None]:
model_text.summary()

In [None]:
history_model_text = model_text.fit(x_train, y_train, )