Step 1: Import Necessary Libraries

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM
from tensorflow.keras.datasets import imdb
from tensorflow.keras.layers import Dropout

Step 2: Load and Preprocess the IMDB Dataset
TensorFlow provides easy access to the IMDB dataset, preloaded with tokenized texts. However, for an advanced approach, we consider controlling the vocabulary size and padding sequences for uniform input length.

In [2]:
# Set the vocabulary size
vocab_size = 5000

# Load IMDB dataset
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

# Set the maximum number of words per review
max_words = 500

# Pad sequences for uniform input size
X_train = sequence.pad_sequences(X_train, maxlen=max_words)
X_test = sequence.pad_sequences(X_test, maxlen=max_words)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


Step 3: Define the LSTM Model
We'll design the neural network with an Embedding layer, an LSTM layer, and a dense output layer with a sigmoid activation function for binary classification.

In [3]:
# Model configuration
embedding_size = 32

# Build the model
model = Sequential()
model.add(Embedding(vocab_size, embedding_size, input_length=max_words))
model.add(Dropout(0.2))  # Dropout layer after embedding
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))  # Adding dropout to LSTM
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

# Compile Diff Model
# For multi-class classification, change the final layer and loss function
# Uncomment Below to change to this
#model.add(Dense(num_classes, activation='softmax'))
#model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 500, 32)           160000    
                                                                 
 lstm (LSTM)                 (None, 100)               53200     
                                                                 
 dense (Dense)               (None, 1)                 101       
                                                                 
Total params: 213301 (833.21 KB)
Trainable params: 213301 (833.21 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


Step 3b: Apply Learning Rate Schedules and Early Stopping
Learning rate schedules and early stopping are strategies to optimize the training process. TensorFlow allows you to easily implement these.

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler
import math

# Define a learning rate schedule function
def scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return lr * math.exp(-0.1)

# Instantiate callbacks
lr_scheduler = LearningRateScheduler(scheduler)
early_stopping = EarlyStopping(monitor='val_loss', patience=3)

# Train the model with callbacks
model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=batch_size, epochs=num_epochs, callbacks=[lr_scheduler, early_stopping])

Step 4: Train the Model
We'll train the model with the training data and evaluate its initial performance on the test set.

In [4]:
# Training configuration
batch_size = 64
num_epochs = 3

# Train the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=batch_size, epochs=num_epochs)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x7ac1ca663550>

Step 5: Evaluate the Model
After training, we evaluate the model using the test set to obtain metrics like accuracy, precision, recall, and F1-score.

In [5]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

# For precision, recall, and F1-score, we need to make predictions and compare with true labels
from sklearn.metrics import classification_report

predictions = (model.predict(X_test) > 0.5).astype("int32")
print(classification_report(y_test, predictions))

Test Accuracy: 86.84%
              precision    recall  f1-score   support

           0       0.90      0.83      0.86     12500
           1       0.84      0.91      0.87     12500

    accuracy                           0.87     25000
   macro avg       0.87      0.87      0.87     25000
weighted avg       0.87      0.87      0.87     25000



Step 6: Advanced Model Evaluation: Confusion Matrix and ROC Curve
To evaluate your model more comprehensively, utilise the confusion matrix and ROC curve analysis.

In [None]:
from sklearn.metrics import confusion_matrix, roc_curve, auc
import matplotlib.pyplot as plt

# Confusion matrix
cm = confusion_matrix(y_test, predictions)
print("Confusion Matrix:\n", cm)

# ROC curve
fpr, tpr, thresholds = roc_curve(y_test, model.predict(X_test))
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()