In [1]:
from __future__ import absolute_import, division, print_function

# Import TensorFlow v2.
import tensorflow as tf
from tensorflow.keras import Model, layers
import numpy as np

In [3]:
# MNIST dataset parameters.
num_classes = 10 # total classes (0-9 digits).
num_features = 784 # data features (img shape: 28*28).

# Training Parameters
learning_rate = 0.001
training_steps = 1000
batch_size = 32
display_step = 100

# Network Parameters
# MNIST image shape is 28*28px, we will then handle 28 sequences of 28 timesteps for every sample.
num_input = 28 # number of sequences.
timesteps = 28 # timesteps.
num_units = 32 # number of neurons for the LSTM layer.

In [4]:
# Prepare MNIST data.
from tensorflow.keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# Convert to float32.
x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32)
# Flatten images to 1-D vector of 784 features (28*28).
x_train, x_test = x_train.reshape([-1, 28, 28]), x_test.reshape([-1, num_features])
# Normalize images value from [0, 255] to [0, 1].
x_train, x_test = x_train / 255., x_test / 255.

In [12]:
# class MyRNN(Model):
#     def __init__(self,hidden_size,output_nums):
#         super(MyRNN,self).__init__()
#         self.hidden_state = layers.RNN(layers.SimpleRNNCell(units=hidden_size)) 
#         self.out = layers.Dense(output_nums)
    
#     def call(self,x,is_training=False):
#         h = self.hidden_state(x)
#         out = self.out(h)
#         if not is_training:
#             out = tf.nn.softmax(out)  
#         return out
# rnn_net = MyRNN(num_units,num_classes)

In [35]:
# Create LSTM Model.
class LSTM(Model):
    # Set layers.
    def __init__(self):
        super(LSTM, self).__init__()
        # RNN (LSTM) hidden layer.
        self.lstm_layer = layers.LSTM(units=num_units)
        self.out = layers.Dense(num_classes)

    # Set forward pass.
    def call(self, x, is_training=False):
        # LSTM layer.
        x = self.lstm_layer(x)
        # Output layer (num_classes).
        x = self.out(x)
        if not is_training:
            # tf cross entropy expect logits without softmax, so only
            # apply softmax when not training.
            x = tf.nn.softmax(x)
        return x

# Build LSTM model.
rnn_net = LSTM()

In [33]:
def cross_loss(x,y):
    y = tf.cast(y,tf.int64)
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=x,labels=y)
    return tf.reduce_mean(loss)
def accuracy(y_pred,y):
    pre_cls = tf.equal(tf.argmax(y_pred,axis=1), tf.cast(y,tf.int64))
    acc = tf.reduce_mean(tf.cast(pre_cls,tf.float32),axis=-1)
    return acc

optimizer = tf.optimizers.Adam(learning_rate)
def train_step(x,y):
    with tf.GradientTape() as gt:
        pred = rnn_net(x,is_training=True)
        loss = cross_loss(pred,y)
    train_vars = rnn_net.trainable_variables
    grad = gt.gradient(loss,train_vars)
    optimizer.apply_gradients(zip(grad,train_vars))
    

In [30]:
## 
train_data = tf.data.Dataset.from_tensor_slices((x_train,y_train))
train_data = train_data.repeat().shuffle(5000).batch(batch_size)



In [36]:
## RUN Training
for step , (batch_x,batch_y) in enumerate(train_data.take(training_steps),1):
    #print(batch_x.shape)
    train_step(batch_x,batch_y) 
    if step % display_step == 0:
        pred = rnn_net(batch_x,is_training=True)
        loss = cross_loss(pred,batch_y)
        acc = accuracy(pred,batch_y)
        print("step: %i, loss: %f, accuracy: %f" % (step, loss, acc))

step: 100, loss: 1.042739, accuracy: 0.687500
step: 200, loss: 0.577597, accuracy: 0.843750
step: 300, loss: 0.454180, accuracy: 0.843750
step: 400, loss: 0.644746, accuracy: 0.750000
step: 500, loss: 0.363440, accuracy: 0.875000
step: 600, loss: 0.320509, accuracy: 0.968750
step: 700, loss: 0.170768, accuracy: 0.968750
step: 800, loss: 0.319935, accuracy: 0.937500
step: 900, loss: 0.246423, accuracy: 0.937500
step: 1000, loss: 0.285533, accuracy: 0.843750
