In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load dataset (keep top 10,000 words)
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)

# Remove top 20 most frequent words (indices 1-20 become 0)
def filter_top_words(data):
    return [[0 if word <= 20 else word for word in review] for review in data]

x_train = filter_top_words(x_train)
x_test = filter_top_words(x_test)

# Pad sequences to a fixed length (500)
maxlen = 500
x_train_padded = pad_sequences(x_train, maxlen=maxlen, padding='post', truncating='post')
x_test_padded = pad_sequences(x_test, maxlen=maxlen, padding='post', truncating='post')

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

model = Sequential([
    # Embedding layer (maps word indices to dense vectors)
    Embedding(
        input_dim=10001,  # Vocabulary size (0-10,000)
        output_dim=128,   # Embedding dimension
        input_length=maxlen,  # Input sequence length
        mask_zero=True     # Ignore padding
    ),
    # SimpleRNN layer
    SimpleRNN(64, activation='tanh'),
    # Output layer (binary classification)
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)



In [7]:
# Train the model
history = model.fit(
    x_train_padded, y_train,
    batch_size=128,
    epochs=10,
    validation_split=0.2  # 20% of training data for validation
)

Epoch 1/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 46ms/step - accuracy: 1.0000 - loss: 3.0724e-04 - val_accuracy: 0.8462 - val_loss: 0.7337
Epoch 2/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 44ms/step - accuracy: 1.0000 - loss: 2.3992e-04 - val_accuracy: 0.8450 - val_loss: 0.7479
Epoch 3/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 43ms/step - accuracy: 1.0000 - loss: 2.0336e-04 - val_accuracy: 0.8462 - val_loss: 0.7608
Epoch 4/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 44ms/step - accuracy: 1.0000 - loss: 1.6470e-04 - val_accuracy: 0.8482 - val_loss: 0.7740
Epoch 5/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 44ms/step - accuracy: 1.0000 - loss: 1.3828e-04 - val_accuracy: 0.8464 - val_loss: 0.7848
Epoch 6/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 45ms/step - accuracy: 1.0000 - loss: 1.1860e-04 - val_accuracy: 0.8470 - val_loss: 0

In [8]:
# Evaluate on test set
test_loss, test_acc = model.evaluate(x_test_padded, y_test)
print(f"Test Accuracy: {test_acc * 100:.2f}%")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 12ms/step - accuracy: 0.8391 - loss: 0.8478
Test Accuracy: 83.96%


In [15]:
# Custom review testing
word_index = imdb.get_word_index()

def preprocess_review(review):
    words = review.lower().split()
    review_seq = [0 if word_index[word] <= 20 else word_index[word] for word in words if word in word_index and word_index[word] < 10000]
    return pad_sequences([review_seq], maxlen=500)


In [16]:
custom_review = "This movie was a complete disappointment with poor acting, a predictable plot, and terrible dialogue. The pacing was slow, and the ending felt rushed. Not worth the time."
preprocessed_review = preprocess_review(custom_review)
prediction = model.predict(preprocessed_review)

if prediction > 0.5:
    print("Positive Sentiment")
else:
    print("Negative Sentiment")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
Negative Sentiment


In [17]:
# Input custom review
custom_review ="An absolute masterpiece with brilliant acting, a gripping plot, stunning visuals, and a powerful soundtrack. The story was captivating from start to finish. Truly a must-watch experience!"
preprocessed_review = preprocess_review(custom_review)
prediction = model.predict(preprocessed_review)

if prediction > 0.5:
    print("Positive Sentiment")
else:
    print("Negative Sentiment")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
Positive Sentiment
