In [1]:
import pandas as pd
import numpy as np
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, Flatten
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

# Load the CSV file
data = pd.read_csv("imdb_dataset.csv")

# Split the data into input (reviews) and output (sentiment) columns
reviews = data["review"]
sentiments = data["sentiment"]

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(reviews)
sequences = tokenizer.texts_to_sequences(reviews)

# Pad sequences to a fixed length
max_length = 250
X = pad_sequences(sequences, maxlen=max_length)

# Convert sentiments to binary labels (0 for negative, 1 for positive)
y = np.array([1 if sentiment == 'positive' else 0 for sentiment in sentiments])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [2]:
# Build the model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=128, input_length=max_length))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, batch_size=64, epochs=7, validation_data=(X_test, y_test))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print("Test loss:", loss)
print("Test accuracy:", accuracy)

# Make predictions
predictions = model.predict(X_test[:5])


Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
Test loss: 0.4669923484325409
Test accuracy: 0.8834999799728394


In [3]:
# Display sample predictions and actual sentiments
print("Sample Predictions:\n")
for i in range(5):
    predicted_sentiment = "Positive" if predictions[i] >= 0.5 else "Negative"
    actual_sentiment = "Positive" if y_test[i] == 1 else "Negative"
    print("Predicted Sentiment:", predicted_sentiment)
    print("Actual Sentiment:", actual_sentiment)
    review_text = reviews.iloc[i]
    print("Review Text:\n", review_text)
    print()

Sample Predictions:

Predicted Sentiment: Positive
Actual Sentiment: Positive
Review Text:
 One of the other reviewers has mentioned that after watching just 1 Oz episode you'll be hooked. They are right, as this is exactly what happened with me.  The first thing that struck me about Oz was its brutality and unflinching scenes of violence, which set in right from the word GO. Trust me, this is not a show for the faint hearted or timid. This show pulls no punches with regards to drugs, sex or violence. Its is hardcore, in the classic use of the word.  It is called OZ as that is the nickname given to the Oswald Maximum Security State Penitentary. It focuses mainly on Emerald City, an experimental section of the prison where all the cells have glass fronts and face inwards, so privacy is not high on the agenda. Em City is home to many..Aryans, Muslims, gangstas, Latinos, Christians, Italians, Irish and more....so scuffles, death stares, dodgy dealings and shady agreements are never far aw