 **Train a 2-layer bidirectional LSTM**

Use the IMDB movie review sentiment data using keras.datasets.imdb

Import Libraries

In [5]:
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Sequential
from keras import optimizers
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Input, Dense, Dropout, Embedding, Flatten, LSTM, Bidirectional
from keras.models import Model
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

Load the IMDB movie review sentiment data using keras.datasets.imdb

In [6]:
#limit the total number of words that we are interested in modeling to the 20000 most frequent words, and zero out the rest
max_features = 20000  # Considering top 20000 features
#constrain each review to be 200 words, truncating long reviews and pad the shorter reviews with zero values
maxlen = 200

In [7]:
(X_train, y_train), (X_val, y_val) = keras.datasets.imdb.load_data(num_words=max_features)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])


In [8]:
print(X_train.shape)
print(X_val.shape)
print(y_train.shape)
print(y_val.shape)
#  dataset split into (50%) train and (50%) test sets

(25000,)
(25000,)
(25000,)
(25000,)


In [9]:
#The words have been replaced by integers that indicate the ordered frequency of each word in the dataset. 
#The sentences in each review are therefore comprised of a sequence of integers
X_train

array([list([1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 19193, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 10311, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 12118, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]),
       list([1, 194, 1153, 194, 82

In [10]:
y_train

array([1, 0, 0, ..., 0, 1, 0])

Training and Testing Data

In [11]:
#Truncate and pad the input sequences so that they are all the same length for modeling. 
#The model will learn the zero values carry no information so indeed the sequences are not the same length in terms of content, but same length vectors is required to perform the computation in Keras
X_train = keras.preprocessing.sequence.pad_sequences(X_train, maxlen=maxlen)
X_val = keras.preprocessing.sequence.pad_sequences(X_val, maxlen=maxlen)

In [29]:
X_train.shape

(25000, 200)

In [31]:
X_val.shape

(25000, 200)

Build the model

In [24]:
# Building model using functional API

# Input for variable-length sequences of integers
inputs = keras.Input(shape=(None,), dtype="int32")
# Embed each word integer in a 128-dimensional vector
# Word embedding technique words are encoded as real-valued vectors in a high dimensional space, where the similarity between words in terms of meaning translates to closeness in the vector space
x = layers.Embedding(max_features, 128)(inputs)
# Add 2 bidirectional LSTMs
x = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(x)
x = layers.Bidirectional(layers.LSTM(64))(x)
# Add a classifier
outputs = layers.Dense(1, activation="sigmoid")(x) # Sigmoid activation as it is binary classification
model = keras.Model(inputs, outputs)
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, None)]            0         
_________________________________________________________________
embedding_6 (Embedding)      (None, None, 128)         2560000   
_________________________________________________________________
bidirectional_5 (Bidirection (None, None, 128)         98816     
_________________________________________________________________
bidirectional_6 (Bidirection (None, 128)               98816     
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 129       
Total params: 2,757,761
Trainable params: 2,757,761
Non-trainable params: 0
_________________________________________________________________


In [19]:
#Building model without using Functional API
model2 = Sequential()
model2.add(Embedding(max_features, 128, input_length=maxlen))
model2.add(Dropout(0.5))
model2.add(Bidirectional(LSTM(64, return_sequences=True))) # Bidirectional LSTM layer with 64 memory units (smart neurons)
model2.add(Bidirectional(LSTM(64)))
model2.add(Dropout(0.5)) # Dropout layer is added to remove problem of overfitting
model2.add(Dense(1, activation='sigmoid')) # Dense output layer with a single neuron and a sigmoid activation function to make 0 or 1 predictions

In [None]:
#Built 2 models
#model is 2 layer bidirectional LSTM using Keras functional API
#model2 is 2 layer bidirectional LSTM without using Keras functional API

Train and evaluate the model. Use Accuracy to evalaute.

In [25]:
model.compile("adam", "binary_crossentropy", metrics=["accuracy"])

In [26]:
model.fit(X_train, y_train, batch_size=32, epochs=3, validation_data=(X_val, y_val))

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7ffa09ecc7d0>

In [20]:
model2.compile("adam", "binary_crossentropy", metrics=["accuracy"])

In [21]:
model2.fit(X_train, y_train, batch_size=32, epochs=3, validation_data=(X_val, y_val))

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7ffa055c7ad0>

In [22]:
model2.evaluate(X_val, y_val)
#Accuracy of model2 is 86.34%



[0.3553614318370819, 0.8634399771690369]

In [27]:
model.evaluate(X_val, y_val)
#Accuracy of model is 86.11%



[0.38224631547927856, 0.8610799908638]