In [1]:
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

In [2]:
data = pd.read_csv(r"C:\Users\HP\Desktop\Project_emot\text0012.csv")

In [3]:
data.head(4)

Unnamed: 0,text,Emotion
0,i tried to break up was based on me not being ...,Anger
1,i was trying out the free flyknit they were ge...,Fear
2,i feel sincere relationship and staggering und...,Joy
3,i feel gods pressence its a neurotic message t...,Fear


In [4]:
data.shape

(436809, 2)

In [5]:
data.duplicated().sum()

np.int64(20646)

# Load and Preprocess Data

In [6]:
def load_and_preprocess_data(file_path):
    """Load and clean the dataset."""
    # Load dataset
    data = pd.read_csv(file_path)

    # Inspect and clean the data
    data.dropna(inplace=True)  # Drop missing values

    # Standardize text
    def clean_text(text):
        text = re.sub(r'[^a-zA-Z\s]', '', text)  # Remove punctuation and special characters
        text = text.lower()  # Convert to lowercase
        text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
        return text

    data['cleaned_text'] = data['text'].apply(clean_text)

    return data

# Load the data

In [7]:
# Load the data
data_path = "M:/Emot01/text001.csv"
data = load_and_preprocess_data(data_path)

# Step 2: Split the Data
X = data['cleaned_text']  # Features (Text)
y = data['Emotion']          # Target (Emotion)

train_texts, val_texts, train_labels, val_labels = train_test_split(X, y, test_size=0.2, random_state=42)

# Create the TF-IDF representation (sparse matrix)

In [8]:
vectorizer = TfidfVectorizer(max_features=5000)  # Limit features for efficiency
X_train = vectorizer.fit_transform(train_texts)
X_val = vectorizer.transform(val_texts)

# Train model

In [9]:
model = LogisticRegression(max_iter=1000, random_state=18)
model.fit(X_train, train_labels)

# Evaluate model

In [10]:
val_predictions = model.predict(X_val)
print("Classification Report:\n", classification_report(val_labels, val_predictions))
print("Accuracy Score:", accuracy_score(val_labels, val_predictions))

Classification Report:
               precision    recall  f1-score   support

       Anger       0.89      0.91      0.90     11295
     Anxiety       1.00      1.00      1.00      1035
        Fear       0.85      0.85      0.85      9572
         Joy       0.92      0.94      0.93     29257
        Love       0.81      0.77      0.79      6893
     Sadness       0.94      0.94      0.94     25328
      Stress       1.00      1.00      1.00      1007
    Surprise       0.78      0.69      0.73      2975

    accuracy                           0.90     87362
   macro avg       0.90      0.89      0.89     87362
weighted avg       0.90      0.90      0.90     87362

Accuracy Score: 0.9044779194615508


# Cultural responses dictionary

In [11]:
cultural_responses = {
    "sadness": "I'm here to support you. Would you like to share more?",
    "anxiety": "Take a deep breath. It's okay to feel this way. How can I help?",
    "stress": "I'm sorry you're feeling stressed. What can we do to ease it?",
    "joy": "That's wonderful! I'm so happy for you.",
    "anger": "I understand you're feeling angry. Do you want to talk about it?",
    "fear": "It's okay to be afraid. You're not alone.",
    "love": "That sounds beautiful! Love is a wonderful feeling.",
    "surprise": "Wow! That must have been unexpected. Tell me more."
}

In [12]:
def respond_to_user(text):
    """Classify emotion and provide a response."""
    cleaned_text = re.sub(r'[^a-zA-Z\s]', '', text.lower())
    transformed_text = vectorizer.transform([cleaned_text])
    emotion = model.predict(transformed_text)[0]
    response = cultural_responses.get(emotion, "I'm here to listen. Tell me more.")
    return emotion, response

# user input

In [None]:
user_input = input("Enter a sentence: ")
emotion, response = respond_to_user(user_input)
print(f"Detected Emotion: {emotion}")
print(f"Chatbot Response: {response}")