In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
# Load the IMDb movie reviews dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [3]:
# Pad sequences to a fixed length
max_len = 100
x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)

In [4]:
# Build the LSTM model
model = Sequential()
model.add(Embedding(input_dim=10000, output_dim=128, input_length=max_len))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))

In [5]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [6]:
# Train the model
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_split=0.2)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7ad8252ac8e0>

In [7]:
# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"\nTest Accuracy: {test_acc * 100:.2f}%")


Test Accuracy: 82.72%


In [18]:
# Sample sentence
sample_sentence = "I really enjoyed the movie, the plot was captivating and the acting was superb."

sample_sentence1 = "The film was a complete disappointment, with a confusing plot and unconvincing performances."

# Tokenize the sentences based on the IMDb dataset's vocabulary
sample_tokens = imdb.get_word_index()
sample_indices = [sample_tokens.get(word.lower(), 0) + 3 for word in sample_sentence.split()]
sample_indices1 = [sample_tokens.get(word.lower(), 0) + 3 for word in sample_sentence1.split()]

# Pad the sequences to the fixed length
sample_padded = pad_sequences([sample_indices], maxlen=max_len)
sample_padded1 = pad_sequences([sample_indices1], maxlen=max_len)

# Make predictions on the sample sentences
prediction = model.predict(sample_padded)
prediction1 = model.predict(sample_padded1)

# Display the predicted sentiments
get_sentiment = lambda p: "Positive" if p >= 0.5 else "Negative"

print(f"\nSample Sentence: '{sample_sentence}'")
print(f"Predicted Sentiment: {get_sentiment(prediction[0, 0])} (Probability: {prediction[0, 0] * 100:.2f}%)\n")

print(f"Sample Sentence: '{sample_sentence1}'")
print(f"Predicted Sentiment: {get_sentiment(prediction1[0, 0])} (Probability: {prediction1[0, 0] * 100:.2f}%)\n")



Sample Sentence: 'I really enjoyed the movie, the plot was captivating and the acting was superb.'
Predicted Sentiment: Positive (Probability: 88.52%)

Sample Sentence: 'The film was a complete disappointment, with a confusing plot and unconvincing performances.'
Predicted Sentiment: Negative (Probability: 19.67%)



In [19]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 100, 128)          1280000   
                                                                 
 spatial_dropout1d (Spatial  (None, 100, 128)          0         
 Dropout1D)                                                      
                                                                 
 lstm (LSTM)                 (None, 100)               91600     
                                                                 
 dense (Dense)               (None, 1)                 101       
                                                                 
Total params: 1371701 (5.23 MB)
Trainable params: 1371701 (5.23 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
