In [1]:
# importing the important libraries:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

In [2]:
# Load MNIST dataset:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
# Reshape data to fit LSTM input shape:
x_train = x_train.reshape([-1, 28, 28]).astype("float32") / 255.0
x_test = x_test.reshape([-1, 28, 28]).astype("float32") / 255.0

In [4]:
# Define model architecture:
model = keras.Sequential()

In [5]:
# Add input layer to accept variable-length sequences of 28 features:
model.add(keras.Input(shape=(None, 28)))

In [6]:
# Add first Bidirectional LSTM layer with 256 units and relu activation function:
model.add(layers.Bidirectional(layers.LSTM(256, return_sequences=True, activation="relu")))




In [7]:
# Add second Bidirectional LSTM layer with 256 units:
model.add(layers.Bidirectional(layers.LSTM(256, name="lstm_layer2")))

In [8]:
# Add dense output layer with 10 units for 10 classes of digits:
model.add(layers.Dense(10))

In [9]:
# Print model summary:
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional (Bidirectiona  (None, None, 512)        583680    
 l)                                                              
                                                                 
 bidirectional_1 (Bidirectio  (None, 512)              1574912   
 nal)                                                            
                                                                 
 dense (Dense)               (None, 10)                5130      
                                                                 
Total params: 2,163,722
Trainable params: 2,163,722
Non-trainable params: 0
_________________________________________________________________
None


In [10]:

# Compile model with sparse categorical crossentropy loss function, Adam optimizer, and accuracy metric:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(lr=0.001),
    metrics=["accuracy"],
)



In [11]:
# Train model on training data for 10 epochs with batch size of 64:
model.fit(x_train, y_train, batch_size=64, epochs=10, verbose=2)

Epoch 1/10
938/938 - 104s - loss: 0.2448 - accuracy: 0.9204 - 104s/epoch - 111ms/step
Epoch 2/10
938/938 - 89s - loss: 0.0757 - accuracy: 0.9768 - 89s/epoch - 95ms/step
Epoch 3/10
938/938 - 87s - loss: 0.0520 - accuracy: 0.9840 - 87s/epoch - 93ms/step
Epoch 4/10
938/938 - 87s - loss: 0.0417 - accuracy: 0.9871 - 87s/epoch - 93ms/step
Epoch 5/10
938/938 - 86s - loss: 0.0343 - accuracy: 0.9893 - 86s/epoch - 91ms/step
Epoch 6/10
938/938 - 85s - loss: 0.0289 - accuracy: 0.9911 - 85s/epoch - 91ms/step
Epoch 7/10
938/938 - 86s - loss: 0.0269 - accuracy: 0.9914 - 86s/epoch - 92ms/step
Epoch 8/10
938/938 - 85s - loss: 0.0207 - accuracy: 0.9935 - 85s/epoch - 91ms/step
Epoch 9/10
938/938 - 85s - loss: 0.0193 - accuracy: 0.9941 - 85s/epoch - 91ms/step
Epoch 10/10
938/938 - 86s - loss: 0.0151 - accuracy: 0.9952 - 86s/epoch - 92ms/step


<keras.callbacks.History at 0x7f99f7e91c70>

In [12]:
# Evaluate model on test data with batch size of 64 and print test accuracy:
model.evaluate(x_test, y_test, batch_size=64, verbose=2)

157/157 - 3s - loss: 0.0424 - accuracy: 0.9884 - 3s/epoch - 17ms/step


[0.042361415922641754, 0.9883999824523926]