In [25]:
pip install tensorflow

Note: you may need to restart the kernel to use updated packages.


In [1]:
import tensorflow as tf                             # Tensorflow for building and training neural network models
from tensorflow.keras.datasets import imdb          # IMDb movie review dataset - movie reviews and sentiment labels
from tensorflow.keras.layers import Dense, Embedding, LSTM
from tensorflow.keras.models import Sequential      # Linear stack of layers for building neural network
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy    # Calculate difference between predicted and actual values in binary classification
from tensorflow.keras.preprocessing.sequence import pad_sequences   # Pad sequences to a specified length

In [2]:
embedding_output_dimensions = 15    # Size of vector space in which words will be embedded
# Define the maximum input length
max_input_length = 500

# Define the number of words in the vocabulary
vocabulary_size = 5000

In [3]:
# Load dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=5000)   # Restrict the vocabulary size to 5000 words

In [4]:
# Pad all sequences
padded_inputs = pad_sequences(x_train, maxlen=500, value = 0.0) # Pads sequences to maximum length with padding value 0
padded_inputs_test = pad_sequences(x_test, maxlen=500, value = 0.0) # Pads sequences to maximum length with padding value 0

In [12]:
# Define the Keras model
model = Sequential()
model.add(Embedding(vocabulary_size, embedding_output_dimensions, input_shape = (500,) ))  # Embedding - converts integer-encoded words into dense vectors of fixed size
model.add(LSTM(10))    # LSTM layer with 10 units
model.add(Dense(1, activation='sigmoid'))

In [13]:
# Compile the model
model.compile(optimizer=Adam(), loss=BinaryCrossentropy(), metrics=["accuracy"])

In [14]:
# Give a summary of model architecture
model.summary()

In [15]:
# Train the model
history = model.fit(padded_inputs, y_train, batch_size=128, epochs=5, verbose=1, validation_split=0.2)

# Verbose: It can be 0(Silent), 1(Progress bar), 2(Epoch number, loss and metrics in one line)

Epoch 1/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 76ms/step - accuracy: 0.5983 - loss: 0.6586 - val_accuracy: 0.8110 - val_loss: 0.4545
Epoch 2/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 94ms/step - accuracy: 0.8429 - loss: 0.4013 - val_accuracy: 0.8466 - val_loss: 0.3718
Epoch 3/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 99ms/step - accuracy: 0.8768 - loss: 0.3227 - val_accuracy: 0.7864 - val_loss: 0.4680
Epoch 4/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 99ms/step - accuracy: 0.8696 - loss: 0.3327 - val_accuracy: 0.8616 - val_loss: 0.3444
Epoch 5/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 101ms/step - accuracy: 0.9049 - loss: 0.2593 - val_accuracy: 0.8580 - val_loss: 0.3342


In [16]:
# Test the model after training
test_results = model.evaluate(padded_inputs_test, y_test, verbose=1)
print(f'Test results - Loss: {test_results[0]} - Accuracy: {100*test_results[1]}%')

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 12ms/step - accuracy: 0.8608 - loss: 0.3415
Test results - Loss: 0.3380950689315796 - Accuracy: 86.26400232315063%


In [17]:
from tensorflow.keras.preprocessing.text import Tokenizer

In [18]:
tokenizer = Tokenizer(num_words = 5000)

In [24]:
user_input = input("Enter your text: ")
tokenizer.fit_on_texts([user_input])
tokenized_input = tokenizer.texts_to_sequences([user_input])
padded_input = pad_sequences(tokenized_input, maxlen=500, padding='post', truncating='post', value=0.0)
predictions = model.predict(padded_input)

print(predictions)

Enter your text:  the movie is good


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
[[0.3770659]]


In [22]:
predicted_class = (predictions[0][0] > 0.5).astype("int32")

print(predicted_class)
if predicted_class == 0:
    print("Predicted sentiment: Positive")
else:
    print("Predicted sentiment: Negative")

0
Predicted sentiment: Positive
