# Sentiment Analysis using - RNN model

In [1]:
import pandas as pd
import json
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.optimizers import Adam

In [2]:
# Initialize an empty list to store chunks
chunk_size = 5000
chunks = []

In [3]:
# File path to JSON dataset
file_path = r'C:\Users\Administrator\Documents\GitHub\Datasets\yelp_academic_dataset_review.json'

# Load data in chunks
for chunk in pd.read_json(file_path, lines=True, chunksize=chunk_size):
    chunks.append(chunk)

In [4]:
# Concatenate all chunks into a single DataFrame
data = pd.concat(chunks, ignore_index=True)

In [5]:
# Accessing the data DataFrame
texts = data['text'].tolist()
labels = data['stars'].tolist()  

In [6]:
# Tokenize the text
max_words = 10000  # Adjust as needed
tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

In [7]:
# Padding sequences
max_len = 100  # Adjust as needed
padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post', truncating='post')

In [8]:
# Convert labels to numerical format
# For regression, you can use the star ratings directly
numeric_labels = np.array(labels)

In [9]:
# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(padded_sequences, numeric_labels, test_size=0.2, random_state=42)

In [10]:
# Build the RNN model
embedding_dim = 50  # Adjust as needed
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=embedding_dim, input_length=max_len))
model.add(LSTM(100))
model.add(Dense(1, activation='linear'))  # Used linear activation for regression


In [11]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')  # Use mean squared error for regression


In [12]:
# Train the model
batch_size = 128  # Adjust as needed
epochs = 5  # Adjust as needed
model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x166261d64d0>

In [None]:
# Evaluate the model
mse = model.evaluate(x_test, y_test)
print(f'Test Mean Squared Error: {mse}')

In [None]:
# Save the entire model to a HDF5 file
model.save('sentiment_analysis_rnn_model.h5')