Install Required Libraries

In [2]:
pip install numpy pandas scikit-learn flask requests nltk





In [3]:
pip install tensorflow --upgrade


Note: you may need to restart the kernel to use updated packages.


Load and Preprocess the Data

In [5]:
import pandas as pd
import numpy as np
import nltk
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense, Dropout

nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
stop_words = set(stopwords.words('english'))

# Load dataset
df = pd.read_csv("dataset.csv")

# Encode labels
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['encoded_label'] = le.fit_transform(df['sentiment'])

# Split data
X = df['sentence']
y = df['encoded_label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Tokenization
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

# Padding
max_len = 100
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)
y_train_cat = to_categorical(y_train)
y_test_cat = to_categorical(y_test)

print("X_train_pad shape:", X_train_pad.shape)
print("y_train_cat shape:", y_train_cat.shape)


X_train_pad shape: (2133, 100)
y_train_cat shape: (2133, 41)


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\VIdha\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Build and Train LSTM Model

In [13]:
from keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
from keras.models import Sequential

# Define constants
vocab_size = 10000
embedding_dim = 128 
max_len = 100  
output_dim = 41

model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_len),
    Bidirectional(LSTM(128, return_sequences=True)),
    Dropout(0.3),
    Bidirectional(LSTM(64)),
    Dense(64, activation='relu'),
    Dense(output_dim, activation='softmax')
])

import numpy as np

# Dummy input to force the model to build
dummy_input = np.random.randint(0, vocab_size, (1, max_len))  # (batch_size=1, sequence_length=max_len)
model(dummy_input)

# Display the model summary again
model.summary()

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Train the model 
history = model.fit(X_train_pad, y_train_cat, validation_split=0.1, epochs=10, batch_size=32)




Epoch 1/10
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 184ms/step - accuracy: 0.3129 - loss: 2.3481 - val_accuracy: 0.2897 - val_loss: 1.5490
Epoch 2/10
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 163ms/step - accuracy: 0.3392 - loss: 1.4534 - val_accuracy: 0.3411 - val_loss: 1.4661
Epoch 3/10
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 157ms/step - accuracy: 0.4433 - loss: 1.1644 - val_accuracy: 0.3598 - val_loss: 1.4243
Epoch 4/10
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 166ms/step - accuracy: 0.4942 - loss: 0.9618 - val_accuracy: 0.3785 - val_loss: 1.3846
Epoch 5/10
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 175ms/step - accuracy: 0.5805 - loss: 0.9063 - val_accuracy: 0.4579 - val_loss: 1.4344
Epoch 6/10
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 170ms/step - accuracy: 0.6526 - loss: 0.8100 - val_accuracy: 0.4579 - val_loss: 1.5262
Epoch 7/10
[1m60/60[0

In [14]:
model.save("emotion_model.h5")
import pickle
with open("tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)


