<a href="https://colab.research.google.com/github/vinit6085903/anpr/blob/main/SoulSupport_%E2%80%93_Machine_Learning_Based_Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("elvis23/mental-health-conversational-data")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/elvis23/mental-health-conversational-data?dataset_version_number=1...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 11.8k/11.8k [00:00<00:00, 11.9MB/s]

Extracting files...
Path to dataset files: /root/.cache/kagglehub/datasets/elvis23/mental-health-conversational-data/versions/1





In [2]:
import os
print(os.listdir(path))

['intents.json']


In [3]:
import pandas as pd

df = pd.read_json(path + "/intents.json")

print(df.head())


                                             intents
0  {'tag': 'greeting', 'patterns': ['Hi', 'Hey', ...
1  {'tag': 'morning', 'patterns': ['Good morning'...
2  {'tag': 'afternoon', 'patterns': ['Good aftern...
3  {'tag': 'evening', 'patterns': ['Good evening'...
4  {'tag': 'night', 'patterns': ['Good night'], '...


In [15]:
import json
import pandas as pd

file_path = path + "/intents.json"

with open(file_path, "r", encoding="utf-8") as f:
    data = json.load(f)

rows = []

for intent in data["intents"]:
    tag = intent["tag"]
    for pattern in intent["patterns"]:
        rows.append([pattern, tag])

df = pd.DataFrame(rows, columns=["text", "intent"])

print(df.shape)
print(df.head())


(232, 2)
               text    intent
0                Hi  greeting
1               Hey  greeting
2  Is anyone there?  greeting
3          Hi there  greeting
4             Hello  greeting


In [16]:
important_intents = [
    "anxiety", "depression", "sad",
    "stress", "lonely", "panic",
    "greeting", "goodbye"
]

df = df[df["intent"].isin(important_intents)]
print(df["intent"].value_counts())


intent
greeting    12
goodbye      8
sad          8
Name: count, dtype: int64


In [17]:
import re
import nltk
nltk.download("stopwords")

from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

stemmer = PorterStemmer()
stop_words = set(stopwords.words("english"))

def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-zA-Z]", " ", text)
    words = text.split()
    words = [stemmer.stem(w) for w in words if w not in stop_words]
    return " ".join(words)

df["clean_text"] = df["text"].apply(clean_text)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [18]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(
    ngram_range=(1,2),
    max_features=3000
)

X = vectorizer.fit_transform(df["clean_text"])


In [19]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y = le.fit_transform(df["intent"])

print(le.classes_)


['goodbye' 'greeting' 'sad']


In [20]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)


In [21]:
from sklearn.svm import LinearSVC

model = LinearSVC()
model.fit(X_train, y_train)


In [22]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))

print("\nClassification Report:\n")
print(classification_report(
    y_test,
    y_pred,
    target_names=le.classes_
))


Accuracy: 0.8333333333333334

Classification Report:

              precision    recall  f1-score   support

     goodbye       1.00      0.50      0.67         2
    greeting       0.67      1.00      0.80         2
         sad       1.00      1.00      1.00         2

    accuracy                           0.83         6
   macro avg       0.89      0.83      0.82         6
weighted avg       0.89      0.83      0.82         6



In [23]:
def chatbot_predict(text):
    text = clean_text(text)
    vec = vectorizer.transform([text])
    pred = model.predict(vec)
    return le.inverse_transform(pred)[0]

print(chatbot_predict("I feel very lonely and tired"))


sad


In [24]:
important_intents = [
    "greeting",
    "goodbye",
    "sad",
    "lonely",
    "depression",
    "anxiety",
    "stress"
]


In [25]:
df = df[df["intent"].isin(important_intents)]


In [26]:
def chatbot_predict(text):
    # preprocess
    text_clean = clean_text(text)

    # vectorize
    vec = vectorizer.transform([text_clean])

    # predict
    pred = model.predict(vec)

    # convert label to intent
    intent = le.inverse_transform(pred)[0]

    return intent


In [27]:
print(chatbot_predict("hello bro"))
print(chatbot_predict("bye see you"))
print(chatbot_predict("i feel very lonely and empty"))
print(chatbot_predict("my heart is racing and i am scared"))


greeting
goodbye
sad
greeting


In [28]:
import random
import json

with open(file_path, "r", encoding="utf-8") as f:
    full_data = json.load(f)

# response dictionary
responses_dict = {}

for intent in full_data["intents"]:
    if intent["tag"] in important_intents:
        responses_dict[intent["tag"]] = intent["responses"]

print(responses_dict.keys())


dict_keys(['greeting', 'goodbye', 'sad'])


In [29]:
def chatbot_response(user_input):
    intent = chatbot_predict(user_input)

    # pick random response
    if intent in responses_dict:
        return random.choice(responses_dict[intent])
    else:
        return "I'm here to listen. Can you tell me more?"


In [30]:
print("ü§ñ Mental Health Chatbot")
print("Type 'quit' to stop\n")

while True:
    user = input("You: ")

    if user.lower() == "quit":
        print("Bot: Take care ‚ù§Ô∏è You are not alone.")
        break

    bot_reply = chatbot_response(user)
    print("Bot:", bot_reply)


ü§ñ Mental Health Chatbot
Type 'quit' to stop

You: hii
Bot: Hello there. Tell me how are you feeling today?
You: i feel very lonely
Bot: How long have you been feeling this way?
You: nothing feels good
Bot: I'm here for you. Could you tell me why you're feeling this way?
You: quit
Bot: Take care ‚ù§Ô∏è You are not alone.
