In [1]:
import joblib
import re
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Normalizer

# -----------------------------
# TEXT NORMALIZATION
# -----------------------------
def normalize_text(text):
    text = text.lower()
    text = re.sub(r"[^\u0900-\u097Fa-zA-Z0-9 ]", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

# -----------------------------
# DATASET
# -----------------------------
data = {
    "time": ["समय क्या हुआ है", "कितने बजे हैं", "समय बताओ", "टाइम बताओ"],
    "date": ["तारीख बताओ", "तारीख कितनी है", "आज क्या तारीख है"],
    "weather": ["मौसम बताओ", "मौसम कैसा है", "क्या बारिश होगी"],
    "calculate": ["गणना करो", "हिसाब लगाओ", "कैलकुलेट करो"],
    "file_open": ["फ़ाइल खोलें", "टेक्स्ट फाइल बनाओ", "फाइल ओपन करो"],
    "file_close": ["फ़ाइल बंद करो", "फाइल क्लोज करो"],
    "music_play": ["गाने चलाओ", "गाने बजाओ", "म्यूजिक प्ले करो"],
    "music_next": ["अगला गाना बजाओ", "अगला संगीत चलाओ"],
    "music_stop": ["गाने बंद करो", "म्यूजिक स्टॉप"],
    "gpio_fan_on": ["पंखा शुरू करो", "पंखा चलाओ", "फैन ऑन करो"],
    "gpio_fan_off": ["पंखा बंद करो", "फैन ऑफ करो"],
    "gpio_light_on": ["लाइट शुरू करो", "लाइट जलाओ", "बल्ब ऑन करो"],
    "gpio_light_off": ["लाइट बंद करो", "लाइट ऑफ करो"],
    "translate_en": ["अंग्रेजी में अनुवाद करो", "इंग्लिश ट्रांसलेशन"],
    "translate_hi": ["हिंदी में अनुवाद करो", "इसे हिंदी में बदलो"],
    "web_search": ["गूगल खोलो", "ब्राउज़र ओपन करो", "क्रोम खोलो"],
    "reminder": ["याद दिलाओ", "अलार्म लगाओ", "रिमाइंडर"],
    "timer":["घड़ी लगाओ", "गिनती करो"],
    "send_message":["संदेश भेजो", "मैसेज भेज दो"],
    "tell_joke": ["चुटकुले बताओ", "मुझे हँसना है"],
    "introduce": ["अपना परिचय दो", "अपने बारे में बताओ"]
}

X, y = [], []

for intent, phrases in data.items():
    for phrase in phrases:
        X.append(normalize_text(phrase))
        y.append(intent)

# -----------------------------
# OPTIMIZED VECTORIZER
# -----------------------------
vectorizer = TfidfVectorizer(
    analyzer="char_wb",          # Better for Hindi ASR errors
    ngram_range=(2, 4),          # Robust against mis-recognition
    max_features=5000,           # Cap memory
    sublinear_tf=True,
    dtype=np.float32             # Reduce memory
)

classifier = LinearSVC(
    C=1.0,
    max_iter=1000
)

model = Pipeline([
    ("tfidf", vectorizer),
    ("norm", Normalizer()),      # L2 normalization
    ("clf", classifier)
])

model.fit(X, y)

joblib.dump(model, "models/intent_model_optimized.joblib", compress=3)
print("Optimized intent model saved.")


Optimized intent model saved.


In [2]:
import joblib
import numpy as np
import re

# Load once at startup
model = joblib.load("models/intent_model_optimized.joblib")

def normalize_text(text):
    text = text.lower()
    text = re.sub(r"[^\u0900-\u097Fa-zA-Z0-9 ]", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

def predict_intent(text, threshold=0.009):
    text = normalize_text(text)
    decision = model.decision_function([text])
    
    confidence = np.max(decision)
    intent = model.classes_[np.argmax(decision)]
    
    if confidence < threshold:
        return "unknown", confidence
    
    return intent, confidence


# Example
intent, conf = predict_intent("समय बताओ")
print(intent, conf)

intent, conf = predict_intent("संदेश भेजो")
print(intent, conf)

intent, conf = predict_intent("फैन ऑन करो")
print(intent, conf)

intent, conf = predict_intent("टाइम बताओ")
print(intent, conf)

intent, conf = predict_intent("समय क्या हुआ है")
print(intent, conf)

intent, conf = predict_intent("म्यूजिक स्टॉप")
print(intent, conf)

intent, conf = predict_intent("बल्ब ऑन करो")
print(intent, conf)

intent, conf = predict_intent("क्रोम खोलो")
print(intent, conf)

intent, conf = predict_intent("मौसम बताओ")
print(intent, conf)


time 0.507954396726424
send_message 0.4607662028882539
gpio_fan_on 0.07522966163071987
time 0.3501331086237167
time 0.45980400712827874
music_stop 0.2401376510857216
gpio_light_on 0.26092213423733823
web_search 0.5107751124970458
weather 0.42193109388345806


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix,
    hinge_loss
)

# X_train, X_test, y_train, y_test = train_test_split(
#     X, y, test_size=0.25, random_state=42, stratify=y
# )
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

# Re-train on split data
model.fit(X_train, y_train)

# -----------------------------
# PREDICTIONS
# -----------------------------
y_pred = model.predict(X_test)
decision_scores = model.decision_function(X_test)

# -----------------------------
# 1️⃣ ACCURACY
# -----------------------------
accuracy = accuracy_score(y_test, y_pred)
print("\nAccuracy:", accuracy)

# -----------------------------
# 2️⃣ PRECISION / RECALL / F1
# -----------------------------
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# -----------------------------
# 3️⃣ CONFUSION MATRIX
# -----------------------------
cm = confusion_matrix(y_test, y_pred, labels=model.classes_)

print("\nConfusion Matrix:")
print("Labels:", model.classes_)
print(cm)

# -----------------------------
# 4️⃣ HINGE LOSS (TRUE SVM LOSS)
# -----------------------------
loss = hinge_loss(y_test, decision_scores, labels=model.classes_)
print("\nHinge Loss:", loss)

# -----------------------------
# 5️⃣ CONFIDENCE MATRIX (FULL CLASS SCORES)
# -----------------------------
print("\nConfidence Matrix (Decision Scores Per Class):")
for text, scores in zip(X_test, decision_scores):
    print(f"\nInput: {text}")
    for cls, score in zip(model.classes_, scores):
        print(f"{cls:15s} : {score:.4f}")

# -----------------------------
# 6️⃣ SINGLE SAMPLE CONFIDENCE VECTOR
# -----------------------------
sample = "समय बताओ"
sample = normalize_text(sample)

scores = model.decision_function([sample])[0]

print("\nSingle Input Confidence Vector:")
for cls, score in zip(model.classes_, scores):
    print(f"{cls:15s} : {score:.4f}")

predicted_intent = model.classes_[np.argmax(scores)]
confidence = np.max(scores)

print("\nPredicted Intent:", predicted_intent)
print("Confidence Score:", confidence)


Accuracy: 0.35714285714285715

Classification Report:
               precision    recall  f1-score   support

    calculate       0.00      0.00      0.00         1
         date       1.00      1.00      1.00         2
   file_close       0.00      0.00      0.00         1
    file_open       0.00      0.00      0.00         1
 gpio_fan_off       0.00      0.00      0.00         0
  gpio_fan_on       0.00      0.00      0.00         1
gpio_light_on       0.00      0.00      0.00         1
   music_play       0.00      0.00      0.00         1
   music_stop       0.00      0.00      0.00         0
     reminder       0.00      0.00      0.00         1
 send_message       1.00      1.00      1.00         1
    tell_joke       0.00      0.00      0.00         1
         time       0.00      0.00      0.00         1
        timer       0.00      0.00      0.00         0
      weather       0.50      1.00      0.67         1
   web_search       1.00      1.00      1.00         1

     acc

  y_type = type_of_target(y, input_name="y")
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
