In [1]:
# ===============================
# 🧠 Mental Health Detection - Emotion Classification
# ===============================

# 1️⃣ Import libraries
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib
import os

nltk.download('stopwords')

# 2️⃣ Load dataset
df = pd.read_csv(r"D:\mental-health-ai\data\Emotion_final.csv")   # rename if needed
df.head()

# 3️⃣ Check columns
print(df.columns)
# Usually columns: ['text', 'emotion']
df = df.rename(columns={'Text':'text', 'Emotion':'emotion'})

# 4️⃣ Clean text
def clean_text(text):
    text = str(text).lower()
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    text = ' '.join([w for w in text.split() if w not in stopwords.words('english')])
    return text

df['clean_text'] = df['text'].apply(clean_text)

# 5️⃣ Split data
X = df['clean_text']
y = df['emotion']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# 6️⃣ TF-IDF Vectorizer
tfidf = TfidfVectorizer(max_features=5000)
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

# 7️⃣ Train model
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)

# 8️⃣ Evaluate
y_pred = model.predict(X_test_tfidf)
print("✅ Accuracy:", round(accuracy_score(y_test, y_pred)*100, 2), "%")
print(classification_report(y_test, y_pred))

# 9️⃣ Save model and vectorizer
os.makedirs("model", exist_ok=True)
joblib.dump(model, "model/emotion_model.pkl")
joblib.dump(tfidf, "model/tfidf_vectorizer.pkl")
print("🎉 Model and vectorizer saved in /model folder!")


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\lucky\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.


Index(['Text', 'Emotion'], dtype='object')
✅ Accuracy: 74.67 %
              precision    recall  f1-score   support

       anger       0.90      0.57      0.70       599
        fear       0.86      0.48      0.62       530
       happy       0.70      0.97      0.81      1406
        love       0.96      0.20      0.33       328
     sadness       0.73      0.93      0.82      1253
    surprise       1.00      0.07      0.14       176

    accuracy                           0.75      4292
   macro avg       0.86      0.54      0.57      4292
weighted avg       0.79      0.75      0.71      4292

🎉 Model and vectorizer saved in /model folder!
