<a href="https://colab.research.google.com/github/vinit6085903/anpr/blob/main/Emotion_Classification_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("parulpandey/emotion-dataset")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/parulpandey/emotion-dataset?dataset_version_number=1...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 715k/715k [00:00<00:00, 66.3MB/s]

Extracting files...
Path to dataset files: /root/.cache/kagglehub/datasets/parulpandey/emotion-dataset/versions/1





In [2]:
import os
print(os.listdir(path))

['training.csv', 'validation.csv', 'test.csv']


In [3]:
import pandas as pd
import os

train = pd.read_csv(os.path.join(path, "training.csv"))
val   = pd.read_csv(os.path.join(path, "validation.csv"))
test  = pd.read_csv(os.path.join(path, "test.csv"))


In [4]:
final_dataset = pd.concat([train, val, test], axis=0)


In [5]:
final_dataset = final_dataset.reset_index(drop=True)


In [6]:
print(final_dataset.shape)
final_dataset.head()


(20000, 2)


Unnamed: 0,text,label
0,i didnt feel humiliated,0
1,i can go from feeling so hopeless to so damned...,0
2,im grabbing a minute to post i feel greedy wrong,3
3,i am ever feeling nostalgic about the fireplac...,2
4,i am feeling grouchy,3


In [7]:
final_dataset.isnull().sum()

Unnamed: 0,0
text,0
label,0


In [8]:
import pandas as pd
import re
import nltk

from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report


In [9]:
nltk.download('stopwords')
nltk.download('wordnet')


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [10]:

final_dataset.head()


Unnamed: 0,text,label
0,i didnt feel humiliated,0
1,i can go from feeling so hopeless to so damned...,0
2,im grabbing a minute to post i feel greedy wrong,3
3,i am ever feeling nostalgic about the fireplac...,2
4,i am feeling grouchy,3


In [11]:
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-zA-Z\s]", "", text)

    words = text.split()
    words = [
        lemmatizer.lemmatize(word)
        for word in words
        if word not in stop_words
    ]

    return " ".join(words)

final_dataset['clean_text'] = final_dataset['text'].apply(clean_text)


In [12]:
tfidf = TfidfVectorizer(
    max_features=5000,
    ngram_range=(1,2)
)

X = tfidf.fit_transform(final_dataset['clean_text'])
y = final_dataset['label']


In [13]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42
)


In [14]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


In [15]:
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.89125
              precision    recall  f1-score   support

           0       0.92      0.96      0.94      1195
           1       0.86      0.97      0.91      1335
           2       0.89      0.70      0.78       332
           3       0.93      0.84      0.88       540
           4       0.86      0.83      0.84       442
           5       0.89      0.48      0.62       156

    accuracy                           0.89      4000
   macro avg       0.89      0.80      0.83      4000
weighted avg       0.89      0.89      0.89      4000



In [16]:
emotion_map = {
    0: "Sad üòî",
    1: "Happy üòä",
    2: "Love ‚ù§Ô∏è",
    3: "Angry üò°",
    4: "Fear üò®",
    5: "Surprise üò≤"
}


In [17]:
def predict_emotion(text):
    text = clean_text(text)
    vector = tfidf.transform([text])
    prediction = model.predict(vector)[0]
    return emotion_map[prediction]


In [18]:
predict_emotion("I feel very lonely and broken")


'Sad üòî'

In [19]:
predict_emotion("I am very excited today")


'Happy üòä'