In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, GRU, Dense, Dropout

In [2]:
vocab_size = 10000
max_len = 200  # each review will be padded/truncated to 200 words

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

print("Training samples:", len(x_train))
print("Test samples:", len(x_test))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Training samples: 25000
Test samples: 25000


In [3]:
x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)


In [4]:
model = Sequential()
model.add(Embedding(vocab_size, 128, input_length=max_len))




In [5]:
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))

In [6]:
model.add(Dense(1, activation="sigmoid"))


In [7]:
model.compile(loss="binary_crossentropy",
              optimizer="adam",
              metrics=["accuracy"])

In [8]:
history = model.fit(x_train, y_train,
                    batch_size=64,
                    epochs=3,
                    validation_data=(x_test, y_test))

Epoch 1/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m352s[0m 886ms/step - accuracy: 0.6981 - loss: 0.5565 - val_accuracy: 0.8451 - val_loss: 0.3670
Epoch 2/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m340s[0m 868ms/step - accuracy: 0.8537 - loss: 0.3499 - val_accuracy: 0.8327 - val_loss: 0.3813
Epoch 3/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m383s[0m 873ms/step - accuracy: 0.8587 - loss: 0.3351 - val_accuracy: 0.8560 - val_loss: 0.3602


In [9]:
loss, acc = model.evaluate(x_test, y_test, verbose=0)
print(f"Test Accuracy: {acc*100:.2f}%")

Test Accuracy: 85.60%


In [10]:
word_index = imdb.get_word_index()
reverse_word_index = {v+3: k for k, v in word_index.items()}
reverse_word_index[0] = "<PAD>"
reverse_word_index[1] = "<START>"
reverse_word_index[2] = "<UNK>"
reverse_word_index[3] = "<UNUSED>"

def decode_review(text_ids):
    return " ".join([reverse_word_index.get(i, "?") for i in text_ids])

sample_id = 10
print("Review:", decode_review(x_test[sample_id]))
print("True Sentiment:", "Positive" if y_test[sample_id]==1 else "Negative")

pred = model.predict(np.expand_dims(x_test[sample_id], axis=0))[0][0]
print("Predicted Sentiment:", "Positive" if pred > 0.5 else "Negative")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Review: of two men <UNK> murders in exchange for getting rid of the two people messing up their lives throw <UNK> from the train is an original and very inventive comedy take on the idea it's a credit to danny <UNK> that he both wrote and starred in this minor comedy gem br br anne <UNK> is the mother who <UNK> the film's title and it's understandable why she gets under the skin of danny <UNK> with her sharp tongue and relentlessly putting him down for any minor <UNK> billy crystal is the writer who's wife has stolen his book idea and is now being <UNK> as a great new author even appearing on the oprah show to in <UNK> he should be enjoying thus <UNK> gets the idea of <UNK> murders to rid themselves of these <UNK> factors br br of course everything and anything can happen when writer carl <UNK> lets his