In [1]:
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

# -------------------------------------------------------
# 1. Load dataset
# -------------------------------------------------------
df = pd.read_csv("../datasets/intents.csv")

print("Dataset shape:", df.shape)
print(df.head())

# -------------------------------------------------------
# 2. Label encode targets
# -------------------------------------------------------
label_encoder = LabelEncoder()
df["intent_id"] = label_encoder.fit_transform(df["intent"])

print("\nLabel Mapping:")
for cls, idx in zip(label_encoder.classes_, range(len(label_encoder.classes_))):
    print(cls, "→", idx)

# -------------------------------------------------------
# 3. Train-test split
# -------------------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    df["text"].tolist(),
    df["intent_id"].tolist(),
    test_size=0.2,
    random_state=42,
    stratify=df["intent_id"]
)

# -------------------------------------------------------
# 4. Encode text → embeddings (MiniLM)
# -------------------------------------------------------
print("\nEncoding text to embeddings…")
embedder = SentenceTransformer("all-MiniLM-L6-v2")

X_train_emb = embedder.encode(X_train, batch_size=32, convert_to_numpy=True)
X_test_emb  = embedder.encode(X_test, batch_size=32, convert_to_numpy=True)

print("Embedding shape:", X_train_emb.shape)

# -------------------------------------------------------
# 5. Train Logistic Regression classifier
# -------------------------------------------------------
clf = LogisticRegression(
    max_iter=2000,
    class_weight="balanced",    # helps small datasets
    n_jobs=-1
)

clf.fit(X_train_emb, y_train)

# -------------------------------------------------------
# 6. Evaluation
# -------------------------------------------------------
y_pred = clf.predict(X_test_emb)

print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

print("\nConfusion Matrix:\n")
print(confusion_matrix(y_test, y_pred))

# -------------------------------------------------------
# 7. Save Model + Label Encoder
# -------------------------------------------------------
import joblib

joblib.dump(clf, "../models/intent/intent_classifier_lr.pkl")
joblib.dump(label_encoder, "../models/intent/intent_label_encoder.pkl")
embedder.save("../models/intent/intent_embedder/")

print("\nModel saved as intent_classifier_lr.pkl")


  from .autonotebook import tqdm as notebook_tqdm



Dataset shape: (223, 2)
                                                text      intent
0  I'm drained today, like someone unplugged my b...  mood_input
1     Everything feels heavy and pointless right now  mood_input
2           Honestly? I’m weirdly cheerful today lol  mood_input
3         My anxiety is doing cartwheels in my chest  mood_input
4                    Feeling hyper for no reason omg  mood_input

Label Mapping:
ask_recommendation → 0
genre_preference → 1
mood_input → 2

Encoding text to embeddings…
Embedding shape: (178, 384)

Accuracy: 0.9555555555555556

Classification Report:

                    precision    recall  f1-score   support

ask_recommendation       0.93      0.93      0.93        15
  genre_preference       0.93      0.93      0.93        15
        mood_input       1.00      1.00      1.00        15

          accuracy                           0.96        45
         macro avg       0.96      0.96      0.96        45
      weighted avg       0.96      

In [2]:
import joblib
from sentence_transformers import SentenceTransformer

# load models
clf = joblib.load("../models/intent/intent_classifier_lr.pkl")
label_encoder = joblib.load("../models/intent/intent_label_encoder.pkl")
embedder = SentenceTransformer("../models/intent/intent_embedder/")

def predict_intent(text):
    emb = embedder.encode([text], convert_to_numpy=True)
    pred_id = clf.predict(emb)[0]
    return label_encoder.inverse_transform([pred_id])[0]

print(predict_intent("I feel low and drained today"))
print(predict_intent("Recommend something fun"))
print(predict_intent("I want something fantasy related, I like harry potter something like that"))

mood_input
ask_recommendation
genre_preference
