## IMDB DATASET- SENTIMENT ANALYSIS

##### PROBLEM STATMENT
The IMDB dataset provides a collection of movie reviews labeled as positive or negative, ideal for analyzing audience sentiment. The goal is to create a sentiment analysis model using deep learning techniques to classify reviews accurately.

In [1]:
## Step-1:Import Libraries and Load the Model
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.datasets import imdb

In [2]:
## Step-2:Load the IMDB Dataset and Split into train and test dataset

# Vocab Size
max_features=1000

(X_train, y_train), (X_test, y_test) =imdb.load_data(num_words=max_features)


In [3]:
# Shape
print(f'Training data shape: {X_train.shape}, Training labels shape: {y_train.shape}')
print(f'Testing data shape: {X_train.shape}, Testing labels shape: {y_test.shape}')

Training data shape: (25000,), Training labels shape: (25000,)
Testing data shape: (25000,), Testing labels shape: (25000,)


In [4]:
# Check a sample review and its label
movie_review=X_train[0]
review_label=y_train[0]

print(f"Sample review (as integers):{movie_review}")
print(f'Sample label: {review_label}')

Sample review (as integers):[1, 14, 22, 16, 43, 530, 973, 2, 2, 65, 458, 2, 66, 2, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 2, 2, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2, 19, 14, 22, 4, 2, 2, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 2, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2, 2, 16, 480, 66, 2, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 2, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 2, 15, 256, 4, 2, 7, 2, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 2, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2, 56, 26, 141, 6, 194, 2, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 2, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 2, 88, 12, 16, 283, 5, 16, 2, 113, 103, 32, 15, 16, 2, 19, 178, 32]
Sample label: 1


In [5]:
## Step-3:Padding - Converting every sentences into equal lengths (500 words)
from tensorflow.keras.preprocessing import sequence
max_len=500
X_train=sequence.pad_sequences(X_train,maxlen=max_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_len)
X_train

array([[  0,   0,   0, ...,  19, 178,  32],
       [  0,   0,   0, ...,  16, 145,  95],
       [  0,   0,   0, ...,   7, 129, 113],
       ...,
       [  0,   0,   0, ...,   4,   2,   2],
       [  0,   0,   0, ...,  12,   9,  23],
       [  0,   0,   0, ..., 204, 131,   9]])

In [6]:
## Step-4:Train Simple RNN
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding,SimpleRNN,Dense
model=Sequential()
model.add(Embedding(max_features,128,input_length=max_len)) ## Embedding Layers
model.add(SimpleRNN(units=128,input_shape=(500, 128),activation='relu'))
model.add(Dense(1,activation="sigmoid"))

  super().__init__(**kwargs)


In [7]:
model.summary()

In [8]:
## Step-5:Configure the learning process of the model
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [9]:
## Step-6:Create an instance of EarlyStoppping Callback
from tensorflow.keras.callbacks import EarlyStopping
earlystopping=EarlyStopping(monitor='val_loss',patience=5,restore_best_weights=True)
earlystopping

<keras.src.callbacks.early_stopping.EarlyStopping at 0x22bf1980550>

In [10]:
## Step-7:Train the model with early stopping
history=model.fit(
    X_train,y_train,epochs=10,batch_size=32,
    validation_split=0.2,
    callbacks=[earlystopping]
)

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 203ms/step - accuracy: 0.5602 - loss: nan - val_accuracy: 0.5062 - val_loss: nan
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 179ms/step - accuracy: 0.4967 - loss: nan - val_accuracy: 0.5062 - val_loss: nan
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 167ms/step - accuracy: 0.4977 - loss: nan - val_accuracy: 0.5062 - val_loss: nan
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m102s[0m 164ms/step - accuracy: 0.5026 - loss: nan - val_accuracy: 0.5062 - val_loss: nan
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m102s[0m 162ms/step - accuracy: 0.4985 - loss: nan - val_accuracy: 0.5062 - val_loss: nan
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m102s[0m 163ms/step - accuracy: 0.5043 - loss: nan - val_accuracy: 0.5062 - val_loss: nan


In [11]:
##Step-8:Save model file
model.save('imdb_rnn_model.h5')


