# Text Sentiment Classification using CNNs, RNNs

## 1. Import necessary libraries

In [None]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, Conv1D, GlobalMaxPooling1D, LSTM, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.datasets import imdb
from sklearn.metrics import classification_report

## 2. Load and preprocess the IMDB dataset

In [None]:
# Load the IMDb dataset
max_words = 5000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_words)

# Pad sequences to ensure uniform input length
max_sequence_length = 500  # Define max length for padding
x_train_pad = pad_sequences(x_train, maxlen=max_sequence_length)
x_test_pad = pad_sequences(x_test, maxlen=max_sequence_length)

## 3. Build CNN Model

In [None]:
# Build CNN Model
def create_cnn_model():
    model = Sequential()
    model.add(Embedding(input_dim=max_words, output_dim=128, input_length=max_sequence_length))
    model.add(Conv1D(filters=128, kernel_size=5, activation='relu'))
    model.add(GlobalMaxPooling1D())
    model.add(Dense(10, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))  # Sigmoid for binary classification
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

cnn_model = create_cnn_model()
cnn_model.summary()  # Display model summary

# Train CNN model
cnn_model.fit(x_train_pad, y_train, epochs=5, batch_size=32, validation_split=0.2)



Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 9ms/step - accuracy: 0.6137 - loss: 0.6232 - val_accuracy: 0.8674 - val_loss: 0.3277
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - accuracy: 0.8782 - loss: 0.3317 - val_accuracy: 0.8936 - val_loss: 0.2687
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9327 - loss: 0.2242 - val_accuracy: 0.8956 - val_loss: 0.2649
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.9572 - loss: 0.1484 - val_accuracy: 0.8894 - val_loss: 0.3281
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - accuracy: 0.9682 - loss: 0.1196 - val_accuracy: 0.8924 - val_loss: 0.3317


<keras.src.callbacks.history.History at 0x789de30a3c10>

## 4. CNN Model Evaluation

In [None]:
# Evaluate CNN Model
cnn_predictions = (cnn_model.predict(x_test_pad) > 0.5).astype("int32")  # Convert probabilities to binary
print("CNN Model Evaluation:")
print(classification_report(y_test, cnn_predictions))

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step
CNN Model Evaluation:
              precision    recall  f1-score   support

           0       0.92      0.85      0.88     12500
           1       0.86      0.93      0.89     12500

    accuracy                           0.89     25000
   macro avg       0.89      0.89      0.89     25000
weighted avg       0.89      0.89      0.89     25000



## 5. Build RNN model

In [None]:
# Build LSTM Model
def create_lstm_model():
    model = Sequential()
    model.add(Embedding(input_dim=max_words, output_dim=128, input_length=max_sequence_length))
    model.add(LSTM(128))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))  # Sigmoid for binary classification
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

lstm_model = create_lstm_model()
lstm_model.summary()  # Display model summary

# Train LSTM model
lstm_model.fit(x_train_pad, y_train, epochs=5, batch_size=32, validation_split=0.2)



Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 25ms/step - accuracy: 0.6637 - loss: 0.5888 - val_accuracy: 0.8404 - val_loss: 0.3832
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 25ms/step - accuracy: 0.8547 - loss: 0.3512 - val_accuracy: 0.8594 - val_loss: 0.3277
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 31ms/step - accuracy: 0.8834 - loss: 0.2957 - val_accuracy: 0.8358 - val_loss: 0.3707
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 21ms/step - accuracy: 0.9146 - loss: 0.2263 - val_accuracy: 0.8740 - val_loss: 0.3373
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 26ms/step - accuracy: 0.9277 - loss: 0.1898 - val_accuracy: 0.8682 - val_loss: 0.3655


<keras.src.callbacks.history.History at 0x789ed9767640>

## 6. RNN model Evaluation

In [None]:
# Evaluate LSTM Model
lstm_predictions = (lstm_model.predict(x_test_pad) > 0.5).astype("int32")  # Convert probabilities to binary
print("LSTM Model Evaluation:")
print(classification_report(y_test, lstm_predictions))

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step
LSTM Model Evaluation:
              precision    recall  f1-score   support

           0       0.88      0.85      0.86     12500
           1       0.85      0.89      0.87     12500

    accuracy                           0.87     25000
   macro avg       0.87      0.87      0.87     25000
weighted avg       0.87      0.87      0.87     25000



## 7. Make Predictions from both (CNNs,RNNs)

In [None]:
# Example review for prediction
def predict_sentiment(review, model):
    sequence = imdb.get_word_index()
    encoded_review = [sequence.get(word, 0) for word in review.split()]
    padded_review = pad_sequences([encoded_review], maxlen=max_sequence_length)
    prediction = model.predict(padded_review)
    sentiment = 'Positive' if prediction[0] > 0.5 else 'Negative'  # Assuming sigmoid output
    return sentiment

# Example review
review = "I thought the movie was great!"
# CNN Prediction
print(f'CNN Prediction: {predict_sentiment(review, cnn_model)}')
# LSTM Prediction
print(f'LSTM Prediction: {predict_sentiment(review, lstm_model)}')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step
CNN Prediction: Positive
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
LSTM Prediction: Positive
