In [1]:
# Import necessary libraries
import json
import pandas as pd
import nltk
import random
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords


In [2]:
# Download necessary NLTK data
nltk.download('wordnet')
nltk.download('stopwords')

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\sixni\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sixni\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [3]:
# Initialize the WordNet lemmatizer
lemmatizer = WordNetLemmatizer()

In [4]:
with open('intents.json', 'r') as f:
    intents_data = json.load(f)['intents']
    
intents_dict = {intent['tag']: intent for intent in intents_data}

# Load the statistical dataset
stats_df = pd.read_csv('mental_health_data.csv')

In [5]:
# Preprocess the intents data
intents_df = pd.DataFrame(intents_data)
dic = {"tag":[], "patterns":[], "responses":[]}
for i in range(len(intents_df)):
    ptrns = intents_df[intents_df.index == i]['patterns'].values[0]
    rspns = intents_df[intents_df.index == i]['responses'].values[0]
    tag = intents_df[intents_df.index == i]['tag'].values[0]
    for j in range(len(ptrns)):
        dic['tag'].append(tag)
        dic['patterns'].append(ptrns[j])
        dic['responses'].append(rspns)

intents_df = pd.DataFrame.from_dict(dic)

In [6]:
# Preprocess the patterns
intents_df['patterns'] = intents_df['patterns'].apply(lambda x: ' '.join([lemmatizer.lemmatize(word) for word in x.split() if word not in set(stopwords.words('english'))]))

In [7]:
# Split the data into training and testing sets
X = intents_df['patterns']
y = intents_df['tag']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Vectorize the text data using TF-IDF
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)


In [9]:
# Train a Random Forest classifier
model = RandomForestClassifier()
model.fit(X_train_vec, y_train)

In [10]:
# Predict intents for the testing set
y_pred = model.predict(X_test_vec)

In [11]:
states = ['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming']

In [12]:
def chat():
    print("Start chatting with the bot (type 'quit' to stop)!")
    while True:
        inp = input("You: ")
        if inp.lower() == "quit":
            break

        # Vectorize the user's input
        inp_vec = vectorizer.transform([inp])

        # Predict the intent
        pred = model.predict(inp_vec)

        # Generate a response
        if pred[0] == 'stats':
            state = next((state for state in stats_df['state'] if state in inp), None)
            if state is not None:
                statistic = stats_df[stats_df['state'] == state]['statistic'].values[0]
                response = f"The mental health statistic for {state} is {statistic}."
            else:
                response = "I'm sorry, but I couldn't find a state in your message. Could you please specify the state you're interested in?"
        else:
            intent_responses = intents_dict.get(pred[0], {}).get('responses', [])
            if intent_responses:
                response = random.choice(intent_responses)
            else:
                response = "Hello! How can I assist you today?"

        print(f"Bot: {response}")

# Call the chat function to start a conversation
chat()


Start chatting with the bot (type 'quit' to stop)!
You: quit


In [13]:
# Calculate accuracy
correct_predictions = sum(y_test == y_pred)
total_predictions = len(y_test)
accuracy = correct_predictions / total_predictions

print(f"Accuracy: {accuracy * 100}%")



Accuracy: 25.53191489361702%
