In [10]:
# Import necessary libraries
import json
import pandas as pd
import nltk
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords


In [11]:
# Download necessary NLTK data
nltk.download('wordnet')
nltk.download('stopwords')

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\sixni\AppData\Roaming\nltk_data...
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sixni\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.


True

In [12]:
# Initialize the WordNet lemmatizer
lemmatizer = WordNetLemmatizer()

In [13]:
# Load the dataset
with open('intents.json', 'r') as f:
    data = json.load(f)

In [14]:
# Convert the data to a DataFrame
df = pd.DataFrame(data['intents'])


In [15]:
# Preprocess the data
dic = {"tag":[], "patterns":[], "responses":[]}
for i in range(len(df)):
    ptrns = df[df.index == i]['patterns'].values[0]
    rspns = df[df.index == i]['responses'].values[0]
    tag = df[df.index == i]['tag'].values[0]
    for j in range(len(ptrns)):
        dic['tag'].append(tag)
        dic['patterns'].append(ptrns[j])
        dic['responses'].append(rspns)

df = pd.DataFrame.from_dict(dic)

In [16]:
# Lemmatize the patterns
df['patterns'] = df['patterns'].apply(lambda x: ' '.join([lemmatizer.lemmatize(word) for word in x.split() if word not in set(stopwords.words('english'))]))


In [17]:
# Split the data into training and testing sets
X = df['patterns']
y = df['tag']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [18]:
# Vectorize the text data using TF-IDF
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [19]:
# Train a Random Forest classifier
model = RandomForestClassifier()
model.fit(X_train_vec, y_train)

In [20]:
# Predict intents for the testing set
y_pred = model.predict(X_test_vec)

In [21]:
# Evaluate the model's performance
report = classification_report(y_test, y_pred, output_dict=True, zero_division=0)
print(report)

{'about': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0.0}, 'anxious': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1.0}, 'casual': {'precision': 0.09523809523809523, 'recall': 1.0, 'f1-score': 0.17391304347826086, 'support': 2.0}, 'creation': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0.0}, 'death': {'precision': 1.0, 'recall': 0.5, 'f1-score': 0.6666666666666666, 'support': 2.0}, 'default': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 3.0}, 'depressed': {'precision': 0.3333333333333333, 'recall': 1.0, 'f1-score': 0.5, 'support': 1.0}, 'done': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1.0}, 'fact-10': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0.0}, 'fact-19': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1.0}, 'fact-22': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1.0}, 'fact-29': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 