## LSTM by Hand

来源: https://www.bilibili.com/video/BV1FV41117Uz/

In [97]:
import tensorflow as tf
import numpy as np

In [3]:
batch_size = 4
sequence_length = 5
input_size = 30
output_size = 20

x = tf.random.uniform((batch_size, sequence_length, input_size))

In [4]:
x.shape

TensorShape([4, 5, 30])

In [None]:
# LSTM's input: [batch_size, sequence_length, input_size]
# LSTM's output1: [batch_size, sequence_length, input_size]
#        output2: [batch_size, input_size]


In [5]:
xt = x[:, 0, :]

In [6]:
xt.shape

TensorShape([4, 30])

In [7]:
wf = tf.random.uniform((input_size, output_size))
wi = tf.random.uniform((input_size, output_size))
wo = tf.random.uniform((input_size, output_size))
wc = tf.random.uniform((input_size, output_size))

uf = tf.random.uniform((output_size, output_size))
ui = tf.random.uniform((output_size, output_size))
uo = tf.random.uniform((output_size, output_size))
uc = tf.random.uniform((output_size, output_size))

bf = tf.random.uniform((1, output_size))
bi = tf.random.uniform((1, output_size))
bo = tf.random.uniform((1, output_size))
bc = tf.random.uniform((1, output_size))



In [15]:
sequence_outputs = []
for i in range(sequence_length):

    if i == 0:
        xt = x[:, 0, :]
        ft = tf.sigmoid(tf.matmul(xt, wf) + bf)
        it = tf.sigmoid(tf.matmul(xt, wi) + bi)
        ot = tf.sigmoid(tf.matmul(xt, wo) + bo)
        cht = tf.tanh(tf.matmul(xt, wc) + bc)

        ct = it * cht
        ht = ot * tf.tanh(ct)
    
    else:
        xt = x[:, 0, :]
        ft = tf.sigmoid(tf.matmul(xt, wf) + bf)
        it = tf.sigmoid(tf.matmul(xt, wi) + bi)
        ot = tf.sigmoid(tf.matmul(xt, wo) + bo)
        cht = tf.tanh(tf.matmul(xt, wc) + bc)

        ct = ft * ct + it * cht
        ht = ot * tf.tanh(ct)
    
    sequence_outputs.append(ht)

In [16]:
sequence_outputs = tf.stack(sequence_outputs)
sequence_outputs = tf.transpose(sequence_outputs, (1, 0, 2))

In [17]:
sequence_outputs

<tf.Tensor: shape=(4, 5, 20), dtype=float32, numpy=
array([[[0.7615513 , 0.76152027, 0.7611937 , 0.7612968 , 0.7612811 ,
         0.76152354, 0.7615123 , 0.7612592 , 0.76102453, 0.76138854,
         0.76132154, 0.7615129 , 0.76154405, 0.76156265, 0.76156324,
         0.7615773 , 0.7614139 , 0.7615132 , 0.7615583 , 0.76134163],
        [0.9639812 , 0.9639275 , 0.9637301 , 0.96366656, 0.96380603,
         0.96397704, 0.9639306 , 0.9637829 , 0.9635306 , 0.96377355,
         0.9636911 , 0.96393913, 0.963963  , 0.96398157, 0.96397525,
         0.9640142 , 0.96385056, 0.96396244, 0.963987  , 0.9638627 ],
        [0.99502003, 0.99497443, 0.99482745, 0.9946906 , 0.9948806 ,
         0.9950189 , 0.99496067, 0.9948606 , 0.994611  , 0.9947966 ,
         0.99471253, 0.99497205, 0.9950144 , 0.9950233 , 0.9950278 ,
         0.99504524, 0.99489224, 0.9950013 , 0.9950253 , 0.994935  ],
        [0.9992992 , 0.9992583 , 0.99912107, 0.998966  , 0.9991663 ,
         0.9992976 , 0.9992369 , 0.9991473 , 0.9

In [62]:
class CustomLSTM(tf.keras.layers.Layer):
    
    def __init__(self, output_size, return_sequence=False):
        super(CustomLSTM, self).__init__()
        self.output_size = output_size
        self.return_sequence = return_sequence
    
    def build(self, input_shape):
        super(CustomLSTM, self).build(input_shape)
        input_size = int(input_shape[-1])
        
        self.wf = self.add_weight('wf', shape=(input_size, self.output_size))
        self.wi = self.add_weight('wi', shape=(input_size, self.output_size))
        self.wo = self.add_weight('wo', shape=(input_size, self.output_size))
        self.wc = self.add_weight('wc', shape=(input_size, self.output_size))

        self.uf = self.add_weight('uf', shape=(self.output_size, self.output_size))
        self.ui = self.add_weight('ui', shape=(self.output_size, self.output_size))
        self.uo = self.add_weight('uo', shape=(self.output_size, self.output_size))
        self.uc = self.add_weight('uc', shape=(self.output_size, self.output_size))

        self.bf = self.add_weight('bf', shape=(1, self.output_size))
        self.bi = self.add_weight('bi', shape=(1, self.output_size))
        self.bo = self.add_weight('bo', shape=(1, self.output_size))
        self.bc = self.add_weight('bc', shape=(1, self.output_size))

    def call(self, x):
        sequence_outputs = []
        for i in range(sequence_length):
            if i == 0:
                xt  = x[:, 0, :]
                ft  = tf.sigmoid(tf.matmul(xt, self.wf) + self.bf)
                it  = tf.sigmoid(tf.matmul(xt, self.wi) + self.bi)
                ot  = tf.sigmoid(tf.matmul(xt, self.wo) + self.bo)
                cht = tf.tanh(   tf.matmul(xt, self.wc) + self.bc)
                ct  = it * cht
                ht  = ot * tf.tanh(ct)

            else:
                xt  = x[:, 0, :]
                ft  = tf.sigmoid(tf.matmul(xt, self.wf) + self.bf)
                it  = tf.sigmoid(tf.matmul(xt, self.wi) + self.bi)
                ot  = tf.sigmoid(tf.matmul(xt, self.wo) + self.bo)
                cht = tf.tanh(  tf.matmul(xt, self.wc) + self.bc)
                ct  = ft * ct + it * cht
                ht  = ot * tf.tanh(ct)
                
            sequence_outputs.append(ht)
            
        sequence_outputs = tf.stack(sequence_outputs)
        sequence_outputs = tf.transpose(sequence_outputs, (1, 0, 2))
        if self.return_sequence:
            return sequence_outputs
        return sequence_outputs[:, -1, :]

In [63]:
x = tf.random.uniform((batch_size, sequence_length, input_size))

In [64]:
lstm = CustomLSTM(output_size=output_size)

In [65]:
lstm(x)

<tf.Tensor: shape=(4, 20), dtype=float32, numpy=
array([[ 0.17559315, -0.0033465 ,  0.19498971,  0.20370719,  0.555604  ,
        -0.11497947, -0.12907541, -0.44550073, -0.31511465,  0.30270946,
         0.10547545,  0.03975537,  0.0846829 , -0.6350818 ,  0.21577433,
         0.17099082,  0.03680279,  0.01804884, -0.00656928,  0.21040186],
       [ 0.27549097,  0.38219866,  0.3293894 ,  0.12697959,  0.46211892,
         0.1328001 , -0.15074047, -0.42947242, -0.4036346 ,  0.08333913,
        -0.0364337 , -0.01039509,  0.43182087, -0.7040502 ,  0.2583863 ,
         0.0117531 , -0.1063024 ,  0.34903434,  0.2475179 ,  0.10472149],
       [ 0.13400528,  0.29809576,  0.12833905,  0.27291787,  0.30175376,
        -0.09728647, -0.10244128, -0.36168754, -0.39647517,  0.2241841 ,
        -0.1136786 , -0.01113822,  0.46097702, -0.8099291 ,  0.32965016,
         0.16833295,  0.0230644 ,  0.3228824 ,  0.04792258,  0.04600074],
       [ 0.1801043 ,  0.14192995,  0.1562702 ,  0.15627138,  0.3975749 ,

In [66]:
model = tf.keras.Sequential([
    CustomLSTM(output_size=32), 
    tf.keras.layers.Dense(2, activation='softmax')
])

model.compile(
    loss = tf.keras.losses.SparseCategoricalCrossentropy(), 
    optimizer = tf.keras.optimizers.Adam()
)

In [67]:
x_batch = tf.random.uniform((batch_size, sequence_length, input_size))
y_batch = tf.random.uniform((batch_size,), maxval=2, dtype=tf.int32)

In [70]:
model.train_on_batch(x_batch, y_batch)

0.6908512711524963

In [71]:
x_data = tf.random.uniform((batch_size * 1000, sequence_length, input_size))
y_data = tf.random.uniform((batch_size * 1000,), maxval=2, dtype=tf.int32)

In [72]:
model.fit(x_data, y_data, batch_size=4)



<tensorflow.python.keras.callbacks.History at 0x7fe53baa0d30>

In [73]:
model.fit(x_data, y_data, batch_size=4)



<tensorflow.python.keras.callbacks.History at 0x7fe53baa0b00>

In [74]:
model.fit(x_data, y_data, batch_size=4)



<tensorflow.python.keras.callbacks.History at 0x7fe53d49c588>

In [75]:
from zh_dataset_inews import title_train, label_train, content_train, title_test, label_test, content_test

In [76]:
text_vector = tf.keras.layers.experimental.preprocessing.TextVectorization()
# 学习词表
text_vector.adapt(content_train)


In [105]:
type(x_data)

tensorflow.python.framework.ops.EagerTensor

In [106]:
x_train = tf.convert_to_tensor(text_vector(content_train))
x_test  = tf.convert_to_tensor(text_vector(content_test))

In [107]:
type(x_train)

tensorflow.python.framework.ops.EagerTensor

In [114]:
y_train = tf.convert_to_tensor(label_train)
y_test  = tf.convert_to_tensor(label_test)

In [115]:
model_text = tf.keras.Sequential([
    CustomLSTM(output_size=32), 
    tf.keras.layers.Dense(3, activation='softmax')
])

model_text.compile(
    loss = tf.keras.losses.SparseCategoricalCrossentropy(), 
    optimizer = tf.keras.optimizers.Adam()
)

In [116]:
x_train.shape

TensorShape([5355, 921])

In [117]:
y_train.shape

TensorShape([5355])

In [118]:
history_model_text = model_text.fit(x_train, y_train, batch_size=100)

ValueError: in user code:

    /Users/weirain/anaconda3/envs/tf2.0/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:806 train_function  *
        return step_function(self, iterator)
    <ipython-input-62-2df1baacde32>:31 call  *
        xt  = x[:, 0, :]
    /Users/weirain/anaconda3/envs/tf2.0/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /Users/weirain/anaconda3/envs/tf2.0/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py:1024 _slice_helper
        name=name)
    /Users/weirain/anaconda3/envs/tf2.0/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /Users/weirain/anaconda3/envs/tf2.0/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py:1196 strided_slice
        shrink_axis_mask=shrink_axis_mask)
    /Users/weirain/anaconda3/envs/tf2.0/lib/python3.7/site-packages/tensorflow/python/ops/gen_array_ops.py:10352 strided_slice
        shrink_axis_mask=shrink_axis_mask, name=name)
    /Users/weirain/anaconda3/envs/tf2.0/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py:744 _apply_op_helper
        attrs=attr_protos, op_def=op_def)
    /Users/weirain/anaconda3/envs/tf2.0/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py:593 _create_op_internal
        compute_device)
    /Users/weirain/anaconda3/envs/tf2.0/lib/python3.7/site-packages/tensorflow/python/framework/ops.py:3485 _create_op_internal
        op_def=op_def)
    /Users/weirain/anaconda3/envs/tf2.0/lib/python3.7/site-packages/tensorflow/python/framework/ops.py:1975 __init__
        control_input_ops, op_def)
    /Users/weirain/anaconda3/envs/tf2.0/lib/python3.7/site-packages/tensorflow/python/framework/ops.py:1815 _create_c_op
        raise ValueError(str(e))

    ValueError: Index out of range using input dim 2; input has only 2 dims for '{{node sequential_11/custom_lstm_18/strided_slice}} = StridedSlice[Index=DT_INT32, T=DT_INT64, begin_mask=5, ellipsis_mask=0, end_mask=5, new_axis_mask=0, shrink_axis_mask=2](IteratorGetNext, sequential_11/custom_lstm_18/strided_slice/stack, sequential_11/custom_lstm_18/strided_slice/stack_1, sequential_11/custom_lstm_18/strided_slice/stack_2)' with input shapes: [?,921], [3], [3], [3] and with computed input tensors: input[3] = <1 1 1>.
