In [1]:
# Movie Sentiment Analysis using Deep Neural Networks on IMDB Dataset

import numpy as np
from tensorflow import keras
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Embedding, Flatten

# ===============================
# Parameters Setup
# ===============================
max_words = 10000         # Only consider the top 10,000 most frequent words
max_length = 250          # Cut off reviews after 250 words
embedding_size = 50       # Dimension of the embedding vector

# ===============================
# Load IMDB Dataset
# ===============================
# The IMDB dataset is already preprocessed: each review is encoded as a sequence of word indexes
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_words)

# ===============================
# Preprocess Input Sequences
# ===============================
# Pad sequences so that all reviews have the same length
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_length)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_length)

# ===============================
# Build the Model
# ===============================
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=embedding_size, input_length=max_length))  # Embedding Layer
model.add(Flatten())                # Flatten the 3D tensor into 2D for Dense layers
model.add(Dense(128, activation='relu'))  # Hidden Dense layer
model.add(Dropout(0.5))             # Dropout for regularization
model.add(Dense(1, activation='sigmoid'))  # Output layer with sigmoid for binary classification

# ===============================
# Compile the Model
# ===============================
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# ===============================
# Train the Model
# ===============================
batch_size = 32
epochs = 1   # You can increase epochs for better accuracy
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          validation_data=(x_test, y_test))

# ===============================
# Evaluate the Model
# ===============================
loss, accuracy = model.evaluate(x_test, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 1us/step




[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 32ms/step - accuracy: 0.6957 - loss: 0.5270 - val_accuracy: 0.8664 - val_loss: 0.3122
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.8647 - loss: 0.3119
Test Loss: 0.31216368079185486
Test Accuracy: 0.8663600087165833
