In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [3]:
df=pd.read_csv('IMDB Dataset.csv')
df.tail(5)

Unnamed: 0,review,sentiment
49995,I thought this movie did a down right good job...,positive
49996,"Bad plot, bad dialogue, bad acting, idiotic di...",negative
49997,I am a Catholic taught in parochial elementary...,negative
49998,I'm going to have to disagree with the previou...,negative
49999,No one expects the Star Trek movies to be high...,negative


In [5]:
X = df['review']
y = df['sentiment'].map({'positive': 1, 'negative': 0})

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
from tensorflow.keras.preprocessing.text import Tokenizer
tokenizer=Tokenizer()
tokenizer.fit_on_texts(X_train)
total_words = len(tokenizer.word_index) + 1

In [10]:
total_words

112281

In [13]:
from tensorflow.keras.utils import pad_sequences
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_val_seq = tokenizer.texts_to_sequences(X_val)

max_len = 500
X_train_pad = pad_sequences(X_train_seq, padding='post', maxlen=max_len)
X_val_pad = pad_sequences(X_val_seq, padding='post', maxlen=max_len)

In [17]:
import numpy as np
X_train_pad.shape

(40000, 500)

In [37]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

from tensorflow.keras.layers import BatchNormalization

model = Sequential()
model.add(Embedding(input_dim=total_words, output_dim=100, input_length=max_len))
model.add(LSTM(128, return_sequences=True))
model.add(BatchNormalization())
model.add(LSTM(64))
model.add(Dense(1, activation='sigmoid'))
model.build(input_shape=(None, max_len))



In [39]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [41]:
model.summary()


In [43]:
from tensorflow.keras.callbacks import EarlyStopping

earlystop = EarlyStopping(
    monitor='val_loss',       
    patience=3,              
    restore_best_weights=True, 
    verbose=1                  
)

In [45]:
history = model.fit(X_train_pad, y_train, epochs=10, batch_size=64, validation_data=(X_val_pad, y_val), callbacks=[earlystop], validation_split=0.2,)


Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m680s[0m 1s/step - accuracy: 0.5114 - loss: 0.6930 - val_accuracy: 0.5169 - val_loss: 0.6862
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m594s[0m 951ms/step - accuracy: 0.5306 - loss: 0.6754 - val_accuracy: 0.6510 - val_loss: 0.6481
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m744s[0m 1s/step - accuracy: 0.7658 - loss: 0.5045 - val_accuracy: 0.8775 - val_loss: 0.3011
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m705s[0m 1s/step - accuracy: 0.9172 - loss: 0.2267 - val_accuracy: 0.8915 - val_loss: 0.2924
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m747s[0m 1s/step - accuracy: 0.9582 - loss: 0.1329 - val_accuracy: 0.8870 - val_loss: 0.3126
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m784s[0m 1s/step - accuracy: 0.9807 - loss: 0.0720 - val_accuracy: 0.8706 - val_loss: 0.3465
Epoch 7/10
[1m625/

In [47]:
model.save("lstm_sentiment_model.h5")



In [55]:
while True:
    user_input = input("\nEnter your movie review (or type 'exit' to quit):\n> ")
    if user_input.lower() == 'exit':
        break

    # Preprocess input
    seq = tokenizer.texts_to_sequences([user_input])
    padded = pad_sequences(seq, maxlen=max_len)

    # Predict sentiment
    prob = model.predict(padded)[0][0]
    sentiment = "Positive 😊" if prob > 0.5 else "Negative 😞"

    # Output
    print(f"\n🧠 Sentiment: {sentiment}")



Enter your movie review (or type 'exit' to quit):
>  the plot had very suspensful twists. The direction and script were fabulous


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step

🧠 Sentiment: Positive 😊



Enter your movie review (or type 'exit' to quit):
>  exit
