# Importing the required libraries

In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
import numpy as np

# Loading the imdb dataset

In [2]:
# Load the IMDb dataset
max_features = 10000  # Number of words to keep in the vocabulary
maxlen = 100  # Maximum length of each sequence
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [3]:
# Pad sequences to have the same length
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

# Building the RNN model using tensorflow

In [4]:
# Build the RNN model using TensorFlow
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(max_features, 32, input_length=maxlen),
    tf.keras.layers.SimpleRNN(32),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Training the model

In [5]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [6]:
# Train the model
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x78b4b9ae73d0>

In [7]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')

Test Loss: 0.5195599794387817, Test Accuracy: 0.8163599967956543


# Visualizing the results

In [8]:
# Function to convert word indices back to words
def indices_to_words(indices, word_index):
    reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
    return ' '.join([reverse_word_index.get(i, '?') for i in indices])


In [9]:
def visualize_sample(model, x_test, y_test, index):
    sample_input = x_test[index]
    true_label = y_test[index]

    # Make a prediction
    prediction = model.predict(np.expand_dims(sample_input, axis=0))[0]

    # Convert indices to words
    sample_text = indices_to_words(sample_input, imdb.get_word_index())

    # Print the true sentiment and predicted probability
    sentiment = 'Positive' if true_label == 1 else 'Negative'
    prediction_prob = prediction[0] if true_label == 0 else 1 - prediction[0]
    
    print(f"True Sentiment: {sentiment}")
    print(f"Predicted Probability: {prediction_prob:.4f}")
    print("Review:")
    print(sample_text)

In [10]:
# Visualize a few sample predictions
for i in range(5):
    visualize_sample(model, x_test, y_test, i)
    print("\n---\n")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
True Sentiment: Negative
Predicted Probability: 0.6521
Review:
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? the wonder own as by is sequence i i and and to of hollywood br of down shouting getting boring of ever it sadly sadly sadly i i was then does don't close faint after one carry as by are be favourites all family turn in does as three part in another some to be probably with world and her an have faint beginning own as is sequence

---

True Sentiment: Positive
Predicted Probability: 0.0056
Review:
is telling program br silliness okay and to frustration at corner and she of sequences to political clearly in of drugs keep guy i i was throwing room sugar as it by br be plot many for occasionally film verge boyfriend difficult kid as you it failed not if gerard to if woman in and is police fi spooky or of self what have pretty in can so suit you good 2 which why s