In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN,Dense

In [3]:
max_features = 10000 #vocbulary size
(X_train,y_train),(X_test,y_test) = imdb.load_data(num_words=max_features)

#print the shape of the data

print('Training data', X_train.shape, y_train.shape)
print('Test data', X_test.shape, y_test.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
Training data (25000,) (25000,)
Test data (25000,) (25000,)


In [4]:
## Inspect a sample review

sample_review=X_train[0] # we get one hot representation of words
sample_label=y_train[0] #positive or negative

print('Sample review as integers:', sample_review)
print('Sample label:', sample_label)

Sample review as integers: [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]
Sample label: 1


In [5]:
### Mapping of words index to words for understanding
word_index=imdb.get_word_index()
#word_index #dictornary

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


In [6]:
reverse_word_index= {value: key for key, value in word_index.items()}

In [7]:
decoded_review = ' '.join([reverse_word_index.get(i - 3, '?') for i in sample_review])
decoded_review

"? this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert ? is an amazing actor and now the same being director ? father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for ? and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also ? to the two little boy's that played the ? of norman and paul they were just brilliant children are often left out of the ? list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you th

In [8]:
from tensorflow.keras.preprocessing import sequence

max_len = 500

X_train = sequence.pad_sequences(X_train, maxlen=max_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_len)
X_train

array([[   0,    0,    0, ...,   19,  178,   32],
       [   0,    0,    0, ...,   16,  145,   95],
       [   0,    0,    0, ...,    7,  129,  113],
       ...,
       [   0,    0,    0, ...,    4, 3586,    2],
       [   0,    0,    0, ...,   12,    9,   23],
       [   0,    0,    0, ...,  204,  131,    9]], dtype=int32)

In [10]:
## Train Simple RNN
#dim = 10 previously , as it is big dataset lets put 128
dim = 128
model=Sequential()
model.add(Embedding(max_features, dim, input_length=max_len))
model.add(SimpleRNN(128,activation='relu'))
model.add(Dense(1, activation='sigmoid'))



In [11]:
model.summary()

In [12]:
model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(), metrics=['accuracy'])

In [13]:
## Create an instance of EarlyStopping Callback
from tensorflow.keras.callbacks import EarlyStopping
earlystopping=EarlyStopping(monitor='val_loss', patience=5, restore_best_weights = True)

In [14]:
model.summary()

In [15]:
# Train the model with earlystopping
model.fit(
    X_train,y_train,epochs=10,batch_size=32, validation_split=0.2,
    callbacks=[earlystopping]
)

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 57ms/step - accuracy: 0.6136 - loss: 0.8512 - val_accuracy: 0.5786 - val_loss: 0.6420
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 55ms/step - accuracy: 0.7135 - loss: 0.6533 - val_accuracy: 0.7098 - val_loss: 0.5546
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 54ms/step - accuracy: 0.7667 - loss: 2642083072.0000 - val_accuracy: 0.6342 - val_loss: 0.6188
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 55ms/step - accuracy: 0.7415 - loss: 0.5025 - val_accuracy: 0.7166 - val_loss: 0.5439
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 55ms/step - accuracy: 0.8264 - loss: 0.3889 - val_accuracy: 0.7528 - val_loss: 0.5181
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 55ms/step - accuracy: 0.8615 - loss: 0.3268 - val_accuracy: 0.7570 - val_loss: 0.5318
Epoch 7

<keras.src.callbacks.history.History at 0x7bf402149930>

In [16]:
model.save('simple_rnn_imdb.h5')



In [17]:
model.summary()

In [19]:
model.get_weights()

[array([[ 0.303582  , -0.05025218,  0.04663373, ..., -0.08286288,
         -0.2956273 ,  0.00372062],
        [-0.01408651, -0.02047963,  0.0667265 , ...,  0.03920185,
         -0.04481776,  0.018064  ],
        [-0.00566717,  0.09010366,  0.13804859, ..., -0.07665639,
         -0.09472236, -0.09275725],
        ...,
        [ 0.01105624,  0.09257907,  0.02660605, ...,  0.02705771,
         -0.04121911, -0.0567257 ],
        [ 0.051004  ,  0.00394419,  0.06931485, ..., -0.00765905,
         -0.04823989,  0.00572048],
        [ 0.04768732, -0.05904706,  0.09086073, ...,  0.02673497,
         -0.04744519,  0.07095532]], dtype=float32),
 array([[ 0.10525948, -0.09288356,  0.03406724, ..., -0.18896659,
          0.10021158, -0.10961915],
        [ 0.0500736 ,  0.0408796 , -0.1960221 , ..., -0.03969699,
         -0.16432096,  0.03212856],
        [-0.11843143, -0.01471975, -0.00930662, ..., -0.08181067,
         -0.00046799,  0.0947779 ],
        ...,
        [ 0.18953615,  0.11383326,  0.0

# Prediction

In [20]:
# Step 2 Helper Funtcion
# Function to decode the reviews

def decoded_review(encode_review):
  return ' '.join([reverse_word_index.get(i - 3, '?') for i in encode_review])

# Function to preprocess user input

def preprocess_text(text):
  words = text.lower().split()
  encoded_review = [word_index.get(word, 2) + 3 for word in words]
  padded_review = sequence.pad_sequences([encoded_review], maxlen=500)
  return padded_review

In [21]:
### Prediction function

def predict_sentiment(review):
  preprocessed_input= preprocess_text(review)
  prediction = model.predict(preprocessed_input)
  sentiment = 'Positive' if prediction[0][0] > 0.5 else 'Negative'
  return sentiment, prediction[0][0]

In [24]:
## User Input and Prediction
# example Review for prediction
example_review = "This movie was boring. The acting was worst and the plot was thrilling"
sentiment,score=predict_sentiment(example_review)

print(example_review)
print('Sentiment:',sentiment)
print('Prediction Score:',score)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
This movie was boring. The acting was worst and the plot was thrilling
Sentiment: Negative
Prediction Score: 0.014334362


In [9]:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()

In [10]:
# instantiate a distribution strategy
tf.tpu.experimental.initialize_tpu_system(tpu)
tpu_strategy = tf.distribute.TPUStrategy(tpu)

In [11]:
with tpu_strategy.scope():
    dim = 128
    model=Sequential()
    model.add(Embedding(max_features, dim, input_length=max_len))
    model.add(SimpleRNN(128,activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(), metrics=['accuracy'])

In [13]:
model.fit(
    X_train,y_train,epochs=50,batch_size=32, validation_split=0.2
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50

KeyboardInterrupt: 