In [1]:
print("Welcome to Emotion Detection Project!")


Welcome to Emotion Detection Project!


In [2]:
!pip install -q kaggle


In [3]:
from google.colab import files
files.upload()


Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"soumyav25","key":"6c557c3b36143503dca7c79747237e98"}'}

In [4]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


In [5]:
!kaggle datasets download -d praveengovi/emotions-dataset-for-nlp


Dataset URL: https://www.kaggle.com/datasets/praveengovi/emotions-dataset-for-nlp
License(s): CC-BY-SA-4.0
Downloading emotions-dataset-for-nlp.zip to /content
  0% 0.00/721k [00:00<?, ?B/s]
100% 721k/721k [00:00<00:00, 735MB/s]


In [6]:
!unzip emotions-dataset-for-nlp.zip


Archive:  emotions-dataset-for-nlp.zip
  inflating: test.txt                
  inflating: train.txt               
  inflating: val.txt                 


In [7]:
import pandas as pd

train_df = pd.read_csv("train.txt", sep=";", names=["text", "label"])
test_df = pd.read_csv("test.txt", sep=";", names=["text", "label"])
val_df = pd.read_csv("val.txt", sep=";", names=["text", "label"])


In [8]:
train_df.head()


Unnamed: 0,text,label
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


In [9]:
print("Train:", train_df.shape)
print("Test:", test_df.shape)
print("Validation:", val_df.shape)


Train: (16000, 2)
Test: (2000, 2)
Validation: (2000, 2)


In [10]:
train_df['label'].value_counts()


Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
joy,5362
sadness,4666
anger,2159
fear,1937
love,1304
surprise,572


In [11]:
import re

def clean_text(text):
    text = text.lower()                         # Lowercase
    text = re.sub(r"[^a-zA-Z0-9\s]", "", text)  # Remove punctuation
    text = re.sub(r"\s+", " ", text).strip()    # Remove extra spaces
    return text

# Apply cleaning to all datasets
train_df['text'] = train_df['text'].apply(clean_text)
test_df['text'] = test_df['text'].apply(clean_text)
val_df['text'] = val_df['text'].apply(clean_text)


In [12]:
train_df.head()


Unnamed: 0,text,label
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


In [13]:
train_df['text'].head(10)


Unnamed: 0,text
0,i didnt feel humiliated
1,i can go from feeling so hopeless to so damned...
2,im grabbing a minute to post i feel greedy wrong
3,i am ever feeling nostalgic about the fireplac...
4,i am feeling grouchy
5,ive been feeling a little burdened lately wasn...
6,ive been taking or milligrams or times recomme...
7,i feel as confused about life as a teenager or...
8,i have been with petronas for years i feel tha...
9,i feel romantic too


In [14]:
# Reload raw version for comparison (optional)
raw_df = pd.read_csv("train.txt", sep=";", names=["text", "label"])

# Show original vs cleaned
comparison = pd.DataFrame({
    "Before Cleaning": raw_df['text'].head(10),
    "After Cleaning": train_df['text'].head(10)
})
comparison


Unnamed: 0,Before Cleaning,After Cleaning
0,i didnt feel humiliated,i didnt feel humiliated
1,i can go from feeling so hopeless to so damned...,i can go from feeling so hopeless to so damned...
2,im grabbing a minute to post i feel greedy wrong,im grabbing a minute to post i feel greedy wrong
3,i am ever feeling nostalgic about the fireplac...,i am ever feeling nostalgic about the fireplac...
4,i am feeling grouchy,i am feeling grouchy
5,ive been feeling a little burdened lately wasn...,ive been feeling a little burdened lately wasn...
6,ive been taking or milligrams or times recomme...,ive been taking or milligrams or times recomme...
7,i feel as confused about life as a teenager or...,i feel as confused about life as a teenager or...
8,i have been with petronas for years i feel tha...,i have been with petronas for years i feel tha...
9,i feel romantic too,i feel romantic too


In [37]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()

y_train = label_encoder.fit_transform(train_df['label'])
y_test = label_encoder.transform(test_df['label'])
y_val = label_encoder.transform(val_df['label'])

# Store number of classes
num_classes = len(label_encoder.classes_)

# For converting back later
label_to_emotion = dict(zip(range(len(label_encoder.classes_)), label_encoder.classes_))


In [16]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Tokenizer
tokenizer = Tokenizer(oov_token="<OOV>")
tokenizer.fit_on_texts(train_df['text'])

X_train = tokenizer.texts_to_sequences(train_df['text'])
X_test = tokenizer.texts_to_sequences(test_df['text'])
X_val = tokenizer.texts_to_sequences(val_df['text'])

# Pad sequences
max_len = max(len(seq) for seq in X_train)  # use max length from training set
X_train = pad_sequences(X_train, maxlen=max_len, padding='post')
X_test = pad_sequences(X_test, maxlen=max_len, padding='post')
X_val = pad_sequences(X_val, maxlen=max_len, padding='post')

vocab_size = len(tokenizer.word_index) + 1


In [38]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense

model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=64, input_length=max_len),
    GlobalAveragePooling1D(),
    Dense(64, activation='relu'),
    Dense(num_classes, activation='softmax')
])




In [39]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam

model.compile(
    optimizer=Adam(),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Callbacks
early_stop = EarlyStopping(monitor='val_loss', patience=3)
model_ckpt = ModelCheckpoint("best_model.h5", save_best_only=True)

# Train
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=15,
    callbacks=[early_stop, model_ckpt]
)


Epoch 1/10
[1m499/500[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 9ms/step - accuracy: 0.3294 - loss: 1.6022



[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 10ms/step - accuracy: 0.3295 - loss: 1.6021 - val_accuracy: 0.4490 - val_loss: 1.5567
Epoch 2/10
[1m498/500[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 9ms/step - accuracy: 0.4229 - loss: 1.5103



[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 9ms/step - accuracy: 0.4232 - loss: 1.5099 - val_accuracy: 0.5745 - val_loss: 1.2916
Epoch 3/10
[1m498/500[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 10ms/step - accuracy: 0.6269 - loss: 1.0966



[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - accuracy: 0.6271 - loss: 1.0959 - val_accuracy: 0.7540 - val_loss: 0.8502
Epoch 4/10
[1m496/500[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - accuracy: 0.8088 - loss: 0.6396



[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.8089 - loss: 0.6392 - val_accuracy: 0.8285 - val_loss: 0.6262
Epoch 5/10
[1m499/500[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 10ms/step - accuracy: 0.8878 - loss: 0.4211



[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - accuracy: 0.8878 - loss: 0.4210 - val_accuracy: 0.8540 - val_loss: 0.5213
Epoch 6/10
[1m498/500[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 9ms/step - accuracy: 0.9145 - loss: 0.2973



[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 9ms/step - accuracy: 0.9145 - loss: 0.2973 - val_accuracy: 0.8490 - val_loss: 0.5173
Epoch 7/10
[1m496/500[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 10ms/step - accuracy: 0.9287 - loss: 0.2251



[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.9287 - loss: 0.2250 - val_accuracy: 0.8595 - val_loss: 0.4324
Epoch 8/10
[1m495/500[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 10ms/step - accuracy: 0.9459 - loss: 0.1767



[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - accuracy: 0.9459 - loss: 0.1765 - val_accuracy: 0.8710 - val_loss: 0.4194
Epoch 9/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10ms/step - accuracy: 0.9627 - loss: 0.1408 - val_accuracy: 0.8635 - val_loss: 0.4539
Epoch 10/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 9ms/step - accuracy: 0.9660 - loss: 0.1203 - val_accuracy: 0.8740 - val_loss: 0.4267


In [40]:
from tensorflow.keras.models import load_model
import numpy as np

# Load best model
best_model = load_model("best_model.h5")

# Predict
pred_probs = best_model.predict(X_test)
pred_labels = np.argmax(pred_probs, axis=1)

# Convert to emotion names
pred_emotions = [label_to_emotion[i] for i in pred_labels]
actual_emotions = [label_to_emotion[i] for i in y_test]




[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [41]:
for i in range(5):
    print(f"Text: {test_df['text'].iloc[i]}")
    print(f"Actual Emotion: {actual_emotions[i]} | Predicted Emotion: {pred_emotions[i]}")
    print("----")


Text: im feeling rather rotten so im not very ambitious right now
Actual Emotion: sadness | Predicted Emotion: sadness
----
Text: im updating my blog because i feel shitty
Actual Emotion: sadness | Predicted Emotion: sadness
----
Text: i never make her separate from me because i don t ever want her to feel like i m ashamed with her
Actual Emotion: sadness | Predicted Emotion: sadness
----
Text: i left with my bouquet of red and yellow tulips under my arm feeling slightly more optimistic than when i arrived
Actual Emotion: joy | Predicted Emotion: joy
----
Text: i was feeling a little vain when i did this one
Actual Emotion: sadness | Predicted Emotion: sadness
----


In [42]:
print(label_encoder.classes_)


['anger' 'fear' 'joy' 'love' 'sadness' 'surprise']


In [43]:
!pip install gradio




In [44]:
import gradio as gr


In [45]:
def predict_emotion(text):
    # Clean input text
    cleaned_text = clean_text(text)

    # Tokenize and pad
    sequence = tokenizer.texts_to_sequences([cleaned_text])
    padded = pad_sequences(sequence, maxlen=max_len, padding='post')

    # Predict
    prediction = model.predict(padded)
    label_index = np.argmax(prediction)

    # Decode numeric label back to emotion
    predicted_emotion = label_encoder.inverse_transform([label_index])[0]

    return predicted_emotion


In [46]:
iface = gr.Interface(
    fn=predict_emotion,
    inputs=gr.Textbox(lines=2, placeholder="Enter a sentence..."),
    outputs="text",
    title="Emotion Detection from Text",
    description="Enter a sentence and this model will predict the emotion."
)


In [47]:
import gradio as gr
import numpy as np

def predict_emotion_gradio(text):
    # Step 1: Preprocess the input
    sequence = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(sequence, maxlen=max_len, padding='post')

    # Step 2: Predict
    pred = model.predict(padded)
    label = label_encoder.inverse_transform([np.argmax(pred)])

    return label[0]

# Step 3: Create Interface
interface = gr.Interface(
    fn=predict_emotion_gradio,
    inputs=gr.Textbox(lines=2, placeholder="Type a sentence..."),
    outputs="text",
    title="Emotion Detection Chatbot",
    description="Enter a sentence to detect the emotion (e.g., sadness, joy, anger, etc.)."
)

interface.launch()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()




* Running on public URL: https://20a53c6370ae618e03.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


