In [39]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, accuracy_score
import warnings
warnings.filterwarnings('ignore')

In [40]:
df = pd.read_csv(r"C:\Users\lenovo\intent_detection\data\sofmattress_train.csv")
df = df.dropna()
texts = df['sentence'].astype(str)
labels = df['label'].astype(str)

In [41]:
df.head()

Unnamed: 0,sentence,label
0,You guys provide EMI option?,EMI
1,Do you offer Zero Percent EMI payment options?,EMI
2,0% EMI.,EMI
3,EMI,EMI
4,I want in installment,EMI


In [42]:
df.nunique()

sentence    324
label        21
dtype: int64

In [43]:
df.shape

(328, 2)

In [53]:

X_train, X_test, y_train, y_test = train_test_split(
    texts, labels, test_size=0.2, random_state=42, stratify=labels
)

In [54]:
vectorizer = TfidfVectorizer(ngram_range=(1, 2), stop_words='english', max_features=3000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

In [55]:
models = {
    "Naive Bayes": MultinomialNB(),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Linear SVM": LinearSVC()
}

In [None]:
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.naive_bayes import MultinomialNB

# # Fit TF-IDF on training text
# vectorizer = TfidfVectorizer()
# X_train_tfidf = vectorizer.fit_transform(X_train)
# # Train Naive Bayes
# nb = MultinomialNB()
# nb.fit(X_train_tfidf, y_train)


0,1,2
,alpha,1.0
,force_alpha,True
,fit_prior,True
,class_prior,


In [56]:
results = {}
for name, model in models.items():
    model.fit(X_train_tfidf, y_train)
    y_pred = model.predict(X_test_tfidf)
    acc = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    
    print(f"\n{'='*40}\nModel: {name}")
    print(f"Accuracy: {acc:.4f}")
    print("Classification Report:\n", report)
    results[name] = acc


Model: Naive Bayes
Accuracy: 0.6667
Classification Report:
                        precision    recall  f1-score   support

100_NIGHT_TRIAL_OFFER       1.00      0.75      0.86         4
   ABOUT_SOF_MATTRESS       1.00      0.50      0.67         2
         CANCEL_ORDER       1.00      1.00      1.00         2
        CHECK_PINCODE       1.00      0.50      0.67         2
                  COD       1.00      1.00      1.00         2
           COMPARISON       1.00      0.50      0.67         2
    DELAY_IN_DELIVERY       0.00      0.00      0.00         2
         DISTRIBUTORS       0.32      1.00      0.48         7
                  EMI       1.00      1.00      1.00         5
        ERGO_FEATURES       1.00      0.50      0.67         2
             LEAD_GEN       0.50      0.25      0.33         4
        MATTRESS_COST       1.00      1.00      1.00         5
               OFFERS       0.00      0.00      0.00         2
         ORDER_STATUS       0.75      0.75      0.75    

In [57]:
import joblib

# Save TF-IDF Vectorizer
joblib.dump(vectorizer, "tfidf_vectorizer.pkl")

# Save Models
joblib.dump(models["Naive Bayes"], "naive_bayes_model.pkl")
joblib.dump(models["Logistic Regression"], "logistic_regression_model.pkl")
joblib.dump(models["Linear SVM"], "linear_svm_model.pkl")


['linear_svm_model.pkl']

In [63]:
import joblib

# Load vectorizer and model
vectorizer = joblib.load("tfidf_vectorizer.pkl")
model = joblib.load("linear_svm_model.pkl")  # or any other model

# Sample prediction
sample_text = ["is the mattress available ?"]
X_tfidf = vectorizer.transform(sample_text)
prediction = model.predict(X_tfidf)

print("Predicted Intent:", prediction[0])


Predicted Intent: PRODUCT_VARIANTS
