In [80]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [81]:
df = pd.read_csv("sentimentdataset.csv")
df.shape

(732, 15)

In [82]:
df = df[["Text","Sentiment"]]
df["Sentiment"] = df["Sentiment"].str.strip()

In [83]:
x=sorted(df['Sentiment'].unique())
len(x)

191

In [84]:
x

['Acceptance',
 'Accomplishment',
 'Admiration',
 'Adoration',
 'Adrenaline',
 'Adventure',
 'Affection',
 'Amazement',
 'Ambivalence',
 'Amusement',
 'Anger',
 'Anticipation',
 'Anxiety',
 'Appreciation',
 'Apprehensive',
 'Arousal',
 'ArtisticBurst',
 'Awe',
 'Bad',
 'Betrayal',
 'Bitter',
 'Bitterness',
 'Bittersweet',
 'Blessed',
 'Boredom',
 'Breakthrough',
 'Calmness',
 'Captivation',
 'Celebration',
 'Celestial Wonder',
 'Challenge',
 'Charm',
 'Colorful',
 'Compassion',
 'Compassionate',
 'Confidence',
 'Confident',
 'Confusion',
 'Connection',
 'Contemplation',
 'Contentment',
 'Coziness',
 'Creative Inspiration',
 'Creativity',
 'Culinary Adventure',
 'CulinaryOdyssey',
 'Curiosity',
 'Darkness',
 'Dazzle',
 'Desolation',
 'Despair',
 'Desperation',
 'Determination',
 'Devastated',
 'Disappointed',
 'Disappointment',
 'Disgust',
 'Dismissive',
 'DreamChaser',
 'Ecstasy',
 'Elation',
 'Elegance',
 'Embarrassed',
 'Emotion',
 'EmotionalStorm',
 'Empathetic',
 'Empowerment',
 'E

In [85]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
df["SentimentEncoded"] = label_encoder.fit_transform(df["Sentiment"])

In [86]:
from sklearn.model_selection import train_test_split
X = df["Text"]
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# # Download stopwords if not already downloaded
# nltk.download("stopwords")
# nltk.download("punkt")

# stop_words = set(stopwords.words("english"))

# def remove_stopwords(text):
#     words = word_tokenize(text)  # Tokenize text into words
#     filtered_text = " ".join([word for word in words if word.lower() not in stop_words])
#     return filtered_text
# X = X.apply(remove_stopwords)
y = df["SentimentEncoded"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [87]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Tokenization
tokenizer = Tokenizer(num_words=None, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

# Convert text to sequences
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

# Padding sequences to ensure uniform input size
MAX_LEN = 100
X_train_padded = pad_sequences(X_train_seq, maxlen=MAX_LEN, padding="post")
X_test_padded = pad_sequences(X_test_seq, maxlen=MAX_LEN, padding="post")


In [88]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional

# Define model
model = Sequential([
    Embedding(input_dim=5000, output_dim=256, input_length=MAX_LEN),  # Increased embedding size
    Bidirectional(LSTM(units=128, return_sequences=True)),  # More LSTM units
    Dropout(0.3),  # Regularization
    Bidirectional(LSTM(units=64, return_sequences=False)),  
    Dropout(0.3),
    Dense(128, activation="relu"),  # Intermediate dense layer
    Dense(191, activation="softmax")  # Multi-class classification
])

# Compile model
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

model.summary()



In [89]:
from sklearn.model_selection import KFold
import numpy as np

kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold_no = 1

for train_index, val_index in kf.split(X_train_padded):
    print(f"Training on Fold {fold_no}...")
    
    X_fold_train, X_fold_val = X_train_padded[train_index], X_train_padded[val_index]
    y_fold_train, y_fold_val = np.array(y_train)[train_index], np.array(y_train)[val_index]

    model.fit(X_fold_train, y_fold_train, validation_data=(X_fold_val, y_fold_val), epochs=5, batch_size=32)
    
    fold_no += 1


Training on Fold 1...
Epoch 1/5
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 93ms/step - accuracy: 0.0175 - loss: 5.2068 - val_accuracy: 0.0427 - val_loss: 5.0228
Epoch 2/5
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 288ms/step - accuracy: 0.0623 - loss: 4.8283 - val_accuracy: 0.0427 - val_loss: 4.9452
Epoch 3/5
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 296ms/step - accuracy: 0.0579 - loss: 4.7374 - val_accuracy: 0.0513 - val_loss: 4.9518
Epoch 4/5
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 308ms/step - accuracy: 0.0995 - loss: 4.6120 - val_accuracy: 0.0427 - val_loss: 4.9926
Epoch 5/5
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 303ms/step - accuracy: 0.1042 - loss: 4.4881 - val_accuracy: 0.0855 - val_loss: 4.8888
Training on Fold 2...
Epoch 1/5
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 311ms/step - accuracy: 0.1107 - loss: 4.5074 - val_accuracy: 0.1966 - val_loss

In [90]:
y_test = np.array(y_test)
loss, acc = model.evaluate(X_test_padded, y_test)
print(f"Test Accuracy: {acc:.2f}")


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step - accuracy: 0.1416 - loss: 7.6817
Test Accuracy: 0.16
