In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

In [2]:
# Load the dataset
max_features = 10000  # Number of words to consider as features
maxlen = 200  # Cut texts after this number of words (among top max_features most common words)

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

# Pad sequences to ensure uniform input length
X_train = pad_sequences(X_train, maxlen=maxlen)
X_test = pad_sequences(X_test, maxlen=maxlen)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [3]:
# Build the RNN model
model = Sequential()
model.add(Embedding(max_features, 32, input_length=maxlen))  # Embedding layer
model.add(SimpleRNN(32, activation='relu'))  # SimpleRNN layer
model.add(Dense(1, activation='sigmoid'))  # Output layer with sigmoid for binary classification

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()




In [4]:
# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=64, validation_split=0.2)

Epoch 1/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 26ms/step - accuracy: 0.6029 - loss: 0.6469 - val_accuracy: 0.7736 - val_loss: 0.4874
Epoch 2/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 28ms/step - accuracy: 0.7750 - loss: 0.5025 - val_accuracy: 0.8180 - val_loss: 0.4235
Epoch 3/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 26ms/step - accuracy: 0.8710 - loss: 0.3210 - val_accuracy: 0.8472 - val_loss: 0.3616
Epoch 4/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 28ms/step - accuracy: 0.9094 - loss: 0.2365 - val_accuracy: 0.8566 - val_loss: 0.3754
Epoch 5/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 26ms/step - accuracy: 0.9331 - loss: 0.1791 - val_accuracy: 0.8518 - val_loss: 0.3605
Epoch 6/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 26ms/step - accuracy: 0.9468 - loss: 0.1430 - val_accuracy: 0.8504 - val_loss: 0.3989
Epoch 7/10
[1m313/31

In [5]:
# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc * 100:.2f}%')

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.8182 - loss: 0.4784
Test accuracy: 82.09%


In [6]:
# Example prediction on a single review
sample_review = X_test[0]  # Get one review from the test set
sample_review = sample_review.reshape(1, -1)  # Reshape for prediction

prediction = model.predict(sample_review)
print(f'Predicted sentiment: {"Positive" if prediction > 0.5 else "Negative"}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 153ms/step
Predicted sentiment: Negative


In [11]:
# Save the entire model
model.save('sentiment_rnn_model.h5')
from tensorflow.keras.models import load_model

# Load the saved model
model = load_model('sentiment_rnn_model.h5')



In [7]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Example new review
new_review = ["This movie was fantastic! The acting was brilliant and the story was compelling."]

# Tokenize and convert to sequences
tokenizer = Tokenizer(num_words=max_features)  # Use the same max_features as during training
tokenizer.fit_on_texts(new_review)
sequences = tokenizer.texts_to_sequences(new_review)

# Pad the sequence to the same length as the training data
padded_sequence = pad_sequences(sequences, maxlen=maxlen)

In [8]:
# Make a prediction
prediction = model.predict(padded_sequence)
sentiment = "Positive" if prediction[0] > 0.5 else "Negative"
print(f'Predicted sentiment: {sentiment}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
Predicted sentiment: Positive
