In [1]:
import streamlit as st
import re
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import joblib  # For loading tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from PIL import Image


# Load the saved tokenizer
tokenizer = joblib.load('tokenizer1.joblib')  # Replace with your tokenizer file path
# tokenizer = Tokenizer(num_words=20000) 

# Load the saved model
model = tf.keras.models.load_model('sentiment_model1.h5')  # Replace with your model file path

# Ensure NLTK stopwords are downloaded
nltk.download('stopwords')
nltk.download('punkt')
max_sequence_length = 100







[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [4]:
# Function to preprocess the text data
def preprocess_text(text):
    text = re.sub(r'http\S+', '', text)  # Remove URLs
    text = re.sub(r'@\S+', '', text)     # Remove mentions
    text = re.sub(r'#\S+', '', text)     # Remove hashtags
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)  # Remove special characters and punctuation
    text = text.lower()                  # Convert to lowercase
    stop_words = set(stopwords.words('english'))
    words = text.split()
    text = ' '.join([word for word in words if word not in stop_words])  # Remove stop words
    return text


def predict_sentiment(text):
    cleaned_text = preprocess_text(text)
    sequence = tokenizer.texts_to_sequences([cleaned_text])
    padded_sequence = pad_sequences(sequence, maxlen=max_sequence_length, padding='post')  # Ensure max_sequence_length matches your model
    prediction = model.predict(padded_sequence)
    
    return prediction


# Main function to run the app
def main():
    # Page title and icon
    tweet = "Completed #Fustercluck this weekend, rounding off year one of #Borderlands3. I decided to go back in response and play the original Borderlands again as I had the 4K GOTY version on my PC already. The shooting feels awful. I'm going to try some suggested ini changes tomorrow."
    prediction = predict_sentiment(tweet)
    print(prediction)
    output = ["Irrelevant", "Negative", "Neutral","Positive"][prediction.argmax()]

    print(output)
    
# Run the main function
if __name__ == '__main__':
    main()


[[    0     0     0     0     0     0     0     0     0     0     0     0
      0     0     0     0     0     0     0     0     0     0     0     0
      0     0     0     0     0     0     0     0     0     0     0     0
      0     0     0     0     0     0     0     0     0     0     0     0
      0     0     0     0     0     0     0     0     0     0     0     0
      0     0     0     0     0     0     0     0     0     0     0     0
      0     0     0  1886   443    89     6   614    29    35  1008     8
    582    72  1317 12696   575   168   146   771   338  1209     2    42
    229  6058   849   386]]
[[0.19845793 0.24855013 0.30066815 0.2523238 ]]
Neutral


In [11]:
model.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 100, 100)          2000000   
                                                                 
 spatial_dropout1d (Spatial  (None, 100, 100)          0         
 Dropout1D)                                                      
                                                                 
 bidirectional (Bidirection  (None, 200)               160800    
 al)                                                             
                                                                 
 dense_14 (Dense)            (None, 4)                 804       
                                                                 
Total params: 2161604 (8.25 MB)
Trainable params: 2161604 (8.25 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
