In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
max_words = 10000 # I am using only  the top 10k most used words
max_len = 500 # Every review should be 500 words long or else we will have to use padding

In [None]:
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_words)

#Padding the sequences to have a fixed length
X_train = sequence.pad_sequences(X_train, maxlen=max_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_len)
print(f"Training Data Shape: {X_train.shape}, Testing Data Shape: {X_test.shape}")

In [None]:
model = Sequential()

#Embedding layer
from tensorflow.keras.layers import Input, LSTM,
model.add(Input(shape=(max_len,))) 
model.add(Embedding(input_dim=max_words, output_dim=128))
model.add(LSTM(128, activation='tanh', recurrent_dropout=0.2))

model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Stop if the model is not improving fo 3 consecutive epochs to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    epochs=10,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping]
)