In [1]:
from keras.datasets import imdb

# Load dataset with only the top 10,000 words considered
# Keras already have preprocessed dataset
(top_words, max_length) = (10000, 500)
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [2]:
print("First Review (encoded):", X_train[0])
print("First Review Label:", y_train[0])


First Review (encoded): [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]
First Review Label: 1


In [3]:
# DECODING THE REVIEW
word_index = imdb.get_word_index()

index_to_word = {index + 3: word for word, index in word_index.items()}
index_to_word[0], index_to_word[1], index_to_word[2] = "<PAD>", "<START>", "<UNK>"

decoded_review = " ".join([index_to_word.get(i, "<UNK>") for i in X_train[0]])
print("Decoded Review:", decoded_review)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1us/step
Decoded Review: <START> this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert <UNK> is an amazing actor and now the same being director <UNK> father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for <UNK> and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also <UNK> to the two little boy's that played the <UNK> of norman and paul they were just brilliant 

In [4]:
from keras.preprocessing.sequence import pad_sequences

# Pad sequences to the same length
X_train = pad_sequences(X_train, maxlen=max_length)
X_test = pad_sequences(X_test, maxlen=max_length)


In [8]:
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense

embedding_vector_length = 32
max_length = 500

model = Sequential([
    Embedding(input_dim=10000, output_dim=embedding_vector_length, input_length=max_length),
    LSTM(100),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())


None


In [9]:
model.fit(X_train, y_train, epochs=3, batch_size=64, validation_data=(X_test, y_test))


Epoch 1/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 30ms/step - accuracy: 0.6745 - loss: 0.5817 - val_accuracy: 0.5998 - val_loss: 0.6806
Epoch 2/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 30ms/step - accuracy: 0.7461 - loss: 0.5056 - val_accuracy: 0.8518 - val_loss: 0.3543
Epoch 3/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 30ms/step - accuracy: 0.8948 - loss: 0.2693 - val_accuracy: 0.8685 - val_loss: 0.3085


<keras.src.callbacks.history.History at 0x785f2fcab670>

In [10]:
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: {:.2f}%".format(scores[1] * 100))


Accuracy: 86.85%
