In [24]:
# Step 1: Import Libraries and Load the Model
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import load_model

In [25]:
# Load the pre-trained model with ReLU activation
model = load_model('simple_rnn_imdb.h5')
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 500, 128)          1280000   
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 128)               32896     
                                                                 
 dense_1 (Dense)             (None, 1)                 129       
                                                                 
Total params: 1313025 (5.01 MB)
Trainable params: 1313025 (5.01 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [26]:
model.get_weights()

[array([[-0.00821593, -0.08669226,  0.05118893, ...,  0.01493507,
         -0.05181531,  0.00339529],
        [-0.05938848, -0.07188316,  0.01662093, ...,  0.05469234,
          0.00343757,  0.0273779 ],
        [-0.00862648, -0.05349358,  0.04949524, ..., -0.05533157,
          0.01584006, -0.00528931],
        ...,
        [ 0.0094333 , -0.04153987,  0.0601604 , ...,  0.03187549,
          0.04331043, -0.00077908],
        [ 0.02972841,  0.05997285, -0.09011976, ..., -0.07237873,
          0.03321388,  0.07411703],
        [ 0.04568293, -0.05542685,  0.09943862, ...,  0.07711763,
         -0.07635488, -0.00303652]], dtype=float32),
 array([[ 0.10129602, -0.117083  ,  0.12447267, ..., -0.14288962,
         -0.0193699 ,  0.04143548],
        [ 0.01924922,  0.10695758, -0.0669086 , ..., -0.00851364,
         -0.05889199, -0.02215817],
        [ 0.0855544 ,  0.1157801 , -0.13859999, ..., -0.20546624,
          0.00794205, -0.09126207],
        ...,
        [-0.02286603,  0.02890836, -0.0

In [27]:
# Assuming model_weights is the list of arrays from model.get_weights()
model_weights = model.get_weights()

# Print the length of the list (number of arrays)
print(f"Number of arrays in the model weights: {len(model_weights)}")

# Print the shape of each array
for i, array in enumerate(model_weights):
    print(f"Shape of array {i + 1}: {array.shape}")


Number of arrays in the model weights: 6
Shape of array 1: (10000, 128)
Shape of array 2: (128, 128)
Shape of array 3: (128, 128)
Shape of array 4: (128,)
Shape of array 5: (128, 1)
Shape of array 6: (1,)


In [28]:
# Sorting the dictionary by values (ascending order)
sorted_word_index = dict(sorted(word_index.items(), key=lambda item: item[1]))


for key, value in sorted_word_index.items():
    if value <= 10:
        print(f"{key}: {value}")

the: 1
and: 2
a: 3
of: 4
to: 5
is: 6
br: 7
in: 8
it: 9
i: 10


In [29]:

# Load the IMDB dataset word index
word_index = imdb.get_word_index()
reverse_word_index = {value: key for key, value in word_index.items()}
reverse_word_index

{34701: 'fawn',
 52006: 'tsukino',
 52007: 'nunnery',
 16816: 'sonja',
 63951: 'vani',
 1408: 'woods',
 16115: 'spiders',
 2345: 'hanging',
 2289: 'woody',
 52008: 'trawling',
 52009: "hold's",
 11307: 'comically',
 40830: 'localized',
 30568: 'disobeying',
 52010: "'royale",
 40831: "harpo's",
 52011: 'canet',
 19313: 'aileen',
 52012: 'acurately',
 52013: "diplomat's",
 25242: 'rickman',
 6746: 'arranged',
 52014: 'rumbustious',
 52015: 'familiarness',
 52016: "spider'",
 68804: 'hahahah',
 52017: "wood'",
 40833: 'transvestism',
 34702: "hangin'",
 2338: 'bringing',
 40834: 'seamier',
 34703: 'wooded',
 52018: 'bravora',
 16817: 'grueling',
 1636: 'wooden',
 16818: 'wednesday',
 52019: "'prix",
 34704: 'altagracia',
 52020: 'circuitry',
 11585: 'crotch',
 57766: 'busybody',
 52021: "tart'n'tangy",
 14129: 'burgade',
 52023: 'thrace',
 11038: "tom's",
 52025: 'snuggles',
 29114: 'francesco',
 52027: 'complainers',
 52125: 'templarios',
 40835: '272',
 52028: '273',
 52130: 'zaniacs',

In [30]:
# self-note: this whole adding 3 to index while predicting,but not adding 3 to index on xtrain while training is confusing, ignore this part and focus on the rest as this is specific to imdb data

# Step 2: Helper Functions
# Function to preprocess user input
def preprocess_text(text):
    words = text.lower().split()
    encoded_review = [word_index.get(word, 2) + 3 for word in words] # shifting indices by 3 to reserve indices 0,1,2
    padded_review = sequence.pad_sequences([encoded_review], maxlen=500)
    return padded_review

# Function to decode reviews
def decode_review(encoded_review):
    return ' '.join([reverse_word_index.get(i - 3, '?') for i in encoded_review])


In [31]:
### Prediction  function

def predict_sentiment(review):
    preprocessed_input=preprocess_text(review)

    prediction=model.predict(preprocessed_input)

    sentiment = 'Positive' if prediction[0][0] > 0.5 else 'Negative'
    
    return sentiment, prediction[0][0]



In [32]:
# Step 4: User Input and Prediction
# Example review for prediction
example_review = "This movie was fantastic! The acting was great and the plot was thrilling."

sentiment,score=predict_sentiment(example_review)

print(f'Review: {example_review}')
print(f'Sentiment: {sentiment}')
print(f'Prediction Score: {score}')

Review: This movie was fantastic! The acting was great and the plot was thrilling.
Sentiment: Positive
Prediction Score: 0.5538839101791382
