In [1]:
# Step 1: Import Libraries and Load the Model
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import load_model




In [2]:
# Load the pre-trained model with ReLU activation
model = load_model('simple_rnn_imdb.h5')
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 500, 128)          1280000   
                                                                 
 simple_rnn (SimpleRNN)      (None, 128)               32896     
                                                                 
 dense (Dense)               (None, 1)                 129       
                                                                 
Total params: 1313025 (5.01 MB)
Trainable params: 1313025 (5.01 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [3]:
model.get_weights()

[array([[-0.00303774,  0.02717041,  0.03028799, ...,  0.00274583,
         -0.03426115,  0.02553397],
        [ 0.0393985 ,  0.02638064, -0.01498283, ..., -0.01805309,
          0.04017623, -0.04253488],
        [-0.00668607,  0.02355682,  0.01573865, ..., -0.00530959,
         -0.01508184, -0.04197519],
        ...,
        [ 0.01930814,  0.00751261, -0.02293946, ...,  0.02319631,
          0.02907575, -0.00201451],
        [-0.00192086,  0.01308901, -0.02110725, ...,  0.03130877,
          0.01971374,  0.05917965],
        [-0.03387688,  0.00189659, -0.00251738, ..., -0.01952546,
         -0.03246742,  0.02001214]], dtype=float32),
 array([[-0.07017403, -0.00491249, -0.01633602, ..., -0.05882629,
          0.05659708, -0.12134165],
        [-0.08906183,  0.08639013, -0.06495457, ...,  0.02510101,
          0.08738253,  0.10192858],
        [ 0.09211566,  0.04045962,  0.09083416, ...,  0.01300585,
         -0.17038497, -0.1310981 ],
        ...,
        [ 0.02369958, -0.14675589, -0.0

In [6]:
# Assuming model_weights is the list of arrays from model.get_weights()
model_weights = model.get_weights()

# Print the length of the list (number of arrays)
print(f"Number of arrays in the model weights: {len(model_weights)}")

# Print the shape of each array
for i, array in enumerate(model_weights):
    print(f"Shape of array {i + 1}: {array.shape}")


Number of arrays in the model weights: 6
Shape of array 1: (10000, 128)
Shape of array 2: (128, 128)
Shape of array 3: (128, 128)
Shape of array 4: (128,)
Shape of array 5: (128, 1)
Shape of array 6: (1,)


In [20]:
# Sorting the dictionary by values (ascending order)
sorted_word_index = dict(sorted(word_index.items(), key=lambda item: item[1]))


for key, value in sorted_word_index.items():
    if value <= 10:
        print(f"{key}: {value}")

the: 1
and: 2
a: 3
of: 4
to: 5
is: 6
br: 7
in: 8
it: 9
i: 10


In [8]:

# Load the IMDB dataset word index
word_index = imdb.get_word_index()
reverse_word_index = {value: key for key, value in word_index.items()}
reverse_word_index

{34701: 'fawn',
 52006: 'tsukino',
 52007: 'nunnery',
 16816: 'sonja',
 63951: 'vani',
 1408: 'woods',
 16115: 'spiders',
 2345: 'hanging',
 2289: 'woody',
 52008: 'trawling',
 52009: "hold's",
 11307: 'comically',
 40830: 'localized',
 30568: 'disobeying',
 52010: "'royale",
 40831: "harpo's",
 52011: 'canet',
 19313: 'aileen',
 52012: 'acurately',
 52013: "diplomat's",
 25242: 'rickman',
 6746: 'arranged',
 52014: 'rumbustious',
 52015: 'familiarness',
 52016: "spider'",
 68804: 'hahahah',
 52017: "wood'",
 40833: 'transvestism',
 34702: "hangin'",
 2338: 'bringing',
 40834: 'seamier',
 34703: 'wooded',
 52018: 'bravora',
 16817: 'grueling',
 1636: 'wooden',
 16818: 'wednesday',
 52019: "'prix",
 34704: 'altagracia',
 52020: 'circuitry',
 11585: 'crotch',
 57766: 'busybody',
 52021: "tart'n'tangy",
 14129: 'burgade',
 52023: 'thrace',
 11038: "tom's",
 52025: 'snuggles',
 29114: 'francesco',
 52027: 'complainers',
 52125: 'templarios',
 40835: '272',
 52028: '273',
 52130: 'zaniacs',

In [21]:
# self-note: this whole adding 3 to index while predicting,but not adding 3 to index on xtrain while training is confusing, ignore this part and focus on the rest as this is specific to imdb data

# Step 2: Helper Functions
# Function to preprocess user input
def preprocess_text(text):
    words = text.lower().split()
    encoded_review = [word_index.get(word, 2) + 3 for word in words] # shifting indices by 3 to reserve indices 0,1,2
    padded_review = sequence.pad_sequences([encoded_review], maxlen=500)
    return padded_review

# Function to decode reviews
def decode_review(encoded_review):
    return ' '.join([reverse_word_index.get(i - 3, '?') for i in encoded_review])


In [22]:
### Prediction  function

def predict_sentiment(review):
    preprocessed_input=preprocess_text(review)

    prediction=model.predict(preprocessed_input)

    sentiment = 'Positive' if prediction[0][0] > 0.5 else 'Negative'
    
    return sentiment, prediction[0][0]



In [23]:
# Step 4: User Input and Prediction
# Example review for prediction
example_review = "This movie was fantastic! The acting was great and the plot was thrilling."

sentiment,score=predict_sentiment(example_review)

print(f'Review: {example_review}')
print(f'Sentiment: {sentiment}')
print(f'Prediction Score: {score}')

Review: This movie was fantastic! The acting was great and the plot was thrilling.
Sentiment: Positive
Prediction Score: 0.6155846118927002
