# Sentiment Analysis using - RNN model

In [1]:
import os
import pandas as pd
import json
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.optimizers import Adam


In [2]:
# File path to JSON dataset
file_path = r'C:\Users\Administrator\Documents\GitHub\Datasets\yelp_academic_dataset_review.json'

In [3]:
# Load data in chunks
chunk_size = 5000
model_save_path = 'sentiment_analysis_rnn_model.h5'

In [4]:
# Initialize the model outside the loop
model = None  
epochs = 10

In [5]:
for chunk in pd.read_json(file_path, lines=True, chunksize=chunk_size):
    # Accessing the data DataFrame
    texts = chunk['text'].tolist()
    labels = chunk['stars'].tolist()

    # Tokenize the text
    tokenizer = Tokenizer(num_words=10000, oov_token='<OOV>')
    tokenizer.fit_on_texts(texts)
    sequences = tokenizer.texts_to_sequences(texts)

    # Padding sequences
    max_len = 100 
    padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post', truncating='post')

    # Convert labels to numerical format
    numeric_labels = np.array(labels)

    # Split the dataset into training and testing sets
    x_train, x_test, y_train, y_test = train_test_split(padded_sequences, numeric_labels, test_size=0.2, random_state=42)

    # Build or load the RNN model
    if model is None or not os.path.exists(model_save_path):
        embedding_dim = 50
        model = Sequential()
        model.add(Embedding(input_dim=10000, output_dim=embedding_dim, input_length=max_len))
        model.add(LSTM(100))
        model.add(Dense(1, activation='linear'))  # Used linear activation for regression
        model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')  # Use mean squared error for regression
    else:
        # Load the previously trained model
        model = load_model(model_save_path)

    # Train the model
    model.fit(x_train, y_train, epochs=epochs, batch_size=128, validation_split=0.1)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10

KeyboardInterrupt: 

In [None]:
# Evaluate the final model on the test set
mse = model.evaluate(x_test, y_test)
print(f'Test Mean Squared Error: {mse}')

In [None]:
# Save the entire model to an HDF5 file
model.save('final_rnn_model')