In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv(r"imdb_top_1000.csv")

In [13]:
# Use 'Overview' as the review text
df = df.dropna(subset=['Overview'])
texts = df['Overview'].astype(str).values

In [14]:
# Create sentiment labels: 1 if IMDB_Rating >= 7.0 else 0
df['sentiment'] = (df['IMDB_Rating'] >= 7.0).astype(int)
labels = df['sentiment'].values

In [15]:
# Tokenize text
num_words = 10000
maxlen = 200

In [16]:
tokenizer = Tokenizer(num_words=num_words)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

In [17]:
x = pad_sequences(sequences, maxlen=maxlen)
y = labels

In [18]:
# Train-test split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [19]:
# Build model
model = Sequential([
    Embedding(input_dim=num_words, output_dim=32, input_length=maxlen),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])



In [20]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [21]:
# Train model
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_split=0.2)

Epoch 1/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 98ms/step - accuracy: 1.0000 - loss: 0.2687 - val_accuracy: 1.0000 - val_loss: 7.1285e-10
Epoch 2/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 1.0000 - loss: 2.2124e-10 - val_accuracy: 1.0000 - val_loss: 2.9355e-16
Epoch 3/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - accuracy: 1.0000 - loss: 7.7828e-17 - val_accuracy: 1.0000 - val_loss: 3.8570e-19
Epoch 4/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 1.0000 - loss: 2.3384e-19 - val_accuracy: 1.0000 - val_loss: 2.4310e-20
Epoch 5/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 1.0000 - loss: 4.1713e-20 - val_accuracy: 1.0000 - val_loss: 8.0787e-21


<keras.src.callbacks.history.History at 0x1b0bfedacd0>

In [22]:
# Evaluate model
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 1.0000 - loss: 6.8487e-21
Test Accuracy: 1.00


In [23]:
# Sample prediction
sample_review = x_test[0].reshape(1, -1)
predicted = model.predict(sample_review)[0][0]
print("Predicted Sentiment:", "Positive" if predicted > 0.5 else "Negative")
print("Actual Sentiment:", "Positive" if y_test[0] == 1 else "Negative")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 232ms/step
Predicted Sentiment: Positive
Actual Sentiment: Positive
