In [1]:
# importing necessary libraries
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

In [2]:
# loading the dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
# normalizing the data and reshaping it to 3D for LSTM input:
x_train = x_train.reshape([-1, 28, 28]).astype("float32") / 255.0
x_test = x_test.reshape([-1, 28, 28]).astype("float32") / 255.0

In [4]:
# defining the model:
model = keras.Sequential()

In [5]:
# adding input layer:
model.add(keras.Input(shape=(None, 28)))

In [6]:
# adding first LSTM layer with return sequences as True and relu activation function:
model.add(layers.LSTM(256, return_sequences=True, activation="relu"))



In [7]:
# adding second LSTM layer with name as lstm_layer2:
model.add(layers.LSTM(256, name="lstm_layer2"))

In [8]:
# adding output layer with 10 nodes:
model.add(layers.Dense(10))

In [9]:
# printing the model summary:
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, None, 256)         291840    
                                                                 
 lstm_layer2 (LSTM)          (None, 256)               525312    
                                                                 
 dense (Dense)               (None, 10)                2570      
                                                                 
Total params: 819,722
Trainable params: 819,722
Non-trainable params: 0
_________________________________________________________________
None


In [10]:
# compiling the model: 
model.compile(
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
optimizer=keras.optimizers.Adam(lr=0.001),
metrics=["accuracy"],
)



In [11]:
# training the model:
model.fit(x_train, y_train, batch_size=64, epochs=10, verbose=2)

Epoch 1/10
938/938 - 62s - loss: 0.2813 - accuracy: 0.9077 - 62s/epoch - 66ms/step
Epoch 2/10
938/938 - 52s - loss: 0.0863 - accuracy: 0.9732 - 52s/epoch - 55ms/step
Epoch 3/10
938/938 - 52s - loss: 0.0607 - accuracy: 0.9811 - 52s/epoch - 55ms/step
Epoch 4/10
938/938 - 53s - loss: 0.0475 - accuracy: 0.9855 - 53s/epoch - 57ms/step
Epoch 5/10
938/938 - 51s - loss: 0.0385 - accuracy: 0.9879 - 51s/epoch - 54ms/step
Epoch 6/10
938/938 - 52s - loss: 0.0340 - accuracy: 0.9894 - 52s/epoch - 55ms/step
Epoch 7/10
938/938 - 52s - loss: 0.0291 - accuracy: 0.9908 - 52s/epoch - 56ms/step
Epoch 8/10
938/938 - 53s - loss: 0.0257 - accuracy: 0.9919 - 53s/epoch - 57ms/step
Epoch 9/10
938/938 - 51s - loss: 0.0221 - accuracy: 0.9931 - 51s/epoch - 54ms/step
Epoch 10/10
938/938 - 52s - loss: 0.0204 - accuracy: 0.9934 - 52s/epoch - 56ms/step


<keras.callbacks.History at 0x7f08a04c87c0>

In [12]:
# evaluating the model:
model.evaluate(x_test, y_test, batch_size=64, verbose=2)

157/157 - 2s - loss: 0.0302 - accuracy: 0.9911 - 2s/epoch - 12ms/step


[0.0302263330668211, 0.991100013256073]