In [8]:
pip install pandas scikit-learn joblib

Note: you may need to restart the kernel to use updated packages.




In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, classification_report
from joblib import dump

# Load your dataset
df = pd.read_csv("my-dataset.csv")
# df
# Split the data into training and testing sets
train_data, test_data, train_labels, test_labels = train_test_split(
    df['input'], df['intent'], test_size=0.2, random_state=42
)

In [10]:
# Create a pipeline with TF-IDF vectorizer and a classifier (e.g., Naive Bayes)
model = make_pipeline(TfidfVectorizer(), MultinomialNB())

In [11]:
# Train the model on the training data
model.fit(train_data, train_labels)

Pipeline(steps=[('tfidfvectorizer', TfidfVectorizer()),
                ('multinomialnb', MultinomialNB())])

In [12]:
# Make predictions on the test data
predictions = model.predict(test_data)

In [13]:
# Evaluate the model performance
accuracy = accuracy_score(test_labels, predictions)
report = classification_report(test_labels, predictions, zero_division=1)

print(f"Accuracy: {accuracy}")
print("Classification Report:\n", report)

Accuracy: 0.899581589958159
Classification Report:
                    precision    recall  f1-score   support

         add_task       1.00      0.30      0.46        10
      get_weather       1.00      0.00      0.00         1
         greeting       1.00      0.08      0.15        12
greeting_response       1.00      0.00      0.00         2
           quotes       0.91      1.00      0.95        42
         reminder       0.94      1.00      0.97        46
    search_google       1.00      0.00      0.00         2
       send_email       0.98      1.00      0.99        44
        tell_joke       0.86      1.00      0.92        48
     update_tasks       1.00      0.00      0.00         1
        wikipedia       0.79      1.00      0.89        31

         accuracy                           0.90       239
        macro avg       0.95      0.49      0.49       239
     weighted avg       0.91      0.90      0.86       239



In [15]:
# Use the model to predict the intent of new inputs
new_input = ["I'm sad, make me smile"]
predicted_intent = model.predict(new_input)
print(f"Predicted intent: {predicted_intent}")
# Save the trained model to a file
# dump(model, 'intent.joblib')

Predicted intent: ['tell_joke']
