In [None]:
pip install pandas scikit-learn nltk

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, classification_report
import nltk
from nltk import word_tokenize, pos_tag, ne_chunk

In [None]:
# Load your dataset
df = pd.read_csv("my-dataset.csv")

In [None]:
# Split the data into training and testing sets
train_data, test_data, train_labels, test_labels = train_test_split(
    df['input'], df['intent'], test_size=0.2, random_state=42
)

In [None]:
# Create a pipeline with TF-IDF vectorizer and a classifier (e.g., Naive Bayes)
model = make_pipeline(TfidfVectorizer(), MultinomialNB())

In [None]:
# Train the model on the training data
model.fit(train_data, train_labels)

In [None]:
# Evaluate the model performance
predictions = model.predict(test_data)
accuracy = accuracy_score(test_labels, predictions)
report = classification_report(test_labels, predictions, zero_division=1)

print(f"Accuracy: {accuracy}")
print("Classification Report:\n", report)

In [None]:
# Use the model to predict the intent of new inputs
new_input = ["make me laugh"]
predicted_intent = model.predict(new_input)
print(f"Predicted intent: {predicted_intent}")

In [None]:
# Entity extraction using NLTK
def extract_entities(text):
    words = word_tokenize(text)
    pos_tags = pos_tag(words)
    tree = ne_chunk(pos_tags)
    entities = []

    for subtree in tree:
        if isinstance(subtree, nltk.Tree):
            entity = " ".join([token for token, pos in subtree.leaves()])
            entities.append((entity, subtree.label()))
            
    return entities

In [None]:
# Example usage
for input_text in new_input:
    entities = extract_entities(input_text)
    print(f"Entities in '{input_text}': {entities}")