In [3]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Embedding, GRU, LSTM, Dense, TextVectorization
import numpy as np

# -----------------------------
# 1. Load IMDB dataset
# -----------------------------
max_features = 10000  # top words
max_len = 100         # max review length

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=max_features)

# Pad sequences so they are all the same length
X_train = tf.keras.preprocessing.sequence.pad_sequences(X_train, maxlen=max_len)
X_test = tf.keras.preprocessing.sequence.pad_sequences(X_test, maxlen=max_len)

# -----------------------------
# 2. Define GRU model
# -----------------------------
gru_model = Sequential([
    Embedding(max_features, 32, input_length=max_len),
    GRU(100),
    Dense(1, activation="sigmoid")
])
gru_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# -----------------------------
# 3. Define LSTM model
# -----------------------------
lstm_model = Sequential([
    Embedding(max_features, 32, input_length=max_len),
    LSTM(100),
    Dense(1, activation="sigmoid")
])
lstm_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# -----------------------------
# 4. Train both models (quick demo: 2 epochs each)
# -----------------------------
print("\nTraining GRU model...")
gru_model.fit(X_train, y_train, epochs=5, batch_size=64, validation_split=0.2, verbose=2)

print("\nTraining LSTM model...")
lstm_model.fit(X_train, y_train, epochs=5, batch_size=64, validation_split=0.2, verbose=2)

# -----------------------------
# 5. Test custom reviews
# -----------------------------
# Load IMDB word index to map words -> numbers
word_index = tf.keras.datasets.imdb.get_word_index()
reverse_index = {v+3: k for k, v in word_index.items()}
reverse_index[0] = "<PAD>"
reverse_index[1] = "<START>"
reverse_index[2] = "<UNK>"

def encode_review(text):
    # Simple encoding (lowercase words mapped to IMDB word index)
    return [
        word_index.get(word.lower(), 2)  # unknown -> 2
        for word in text.split()
    ]

test_cases = [
    ("An emotional and deep plot", "Positive"),
    ("The story was dull", "Negative")
]

X_custom = []
for review, _ in test_cases:
    encoded = encode_review(review)
    padded = tf.keras.preprocessing.sequence.pad_sequences([encoded], maxlen=max_len)
    X_custom.append(padded)

X_custom = np.vstack(X_custom)

# Predictions
gru_preds = (gru_model.predict(X_custom) > 0.5).astype("int32").flatten()
lstm_preds = (lstm_model.predict(X_custom) > 0.5).astype("int32").flatten()

def decode_label(val):
    return "Positive" if val == 1 else "Negative"

# -----------------------------
# 6. Print results
# -----------------------------
print(f"\n{'Review Text':40} | {'Expected':8} | {'GRU':8} | {'LSTM':8} | Same?")
print("-"*80)
for i, (text, expected) in enumerate(test_cases):
    gru_out = decode_label(gru_preds[i])
    lstm_out = decode_label(lstm_preds[i])
    same = "Yes" if gru_out == lstm_out else "No"
    print(f"{text:40} | {expected:8} | {gru_out:8} | {lstm_out:8} | {same}")



Training GRU model...
Epoch 1/5
313/313 - 22s - 69ms/step - accuracy: 0.7619 - loss: 0.4729 - val_accuracy: 0.8428 - val_loss: 0.3765
Epoch 2/5
313/313 - 20s - 63ms/step - accuracy: 0.8823 - loss: 0.2870 - val_accuracy: 0.8384 - val_loss: 0.4012
Epoch 3/5
313/313 - 21s - 66ms/step - accuracy: 0.9130 - loss: 0.2207 - val_accuracy: 0.8388 - val_loss: 0.3678
Epoch 4/5
313/313 - 21s - 67ms/step - accuracy: 0.9329 - loss: 0.1811 - val_accuracy: 0.8384 - val_loss: 0.4400
Epoch 5/5
313/313 - 20s - 63ms/step - accuracy: 0.9489 - loss: 0.1388 - val_accuracy: 0.8404 - val_loss: 0.4540

Training LSTM model...
Epoch 1/5
313/313 - 23s - 73ms/step - accuracy: 0.7579 - loss: 0.4714 - val_accuracy: 0.8452 - val_loss: 0.3578
Epoch 2/5
313/313 - 21s - 68ms/step - accuracy: 0.8892 - loss: 0.2778 - val_accuracy: 0.8332 - val_loss: 0.3803
Epoch 3/5
313/313 - 41s - 130ms/step - accuracy: 0.9155 - loss: 0.2197 - val_accuracy: 0.8458 - val_loss: 0.3737
Epoch 4/5
313/313 - 21s - 68ms/step - accuracy: 0.9340 -