diff --git a/config/intelligent-routing/in-tree/embedding.yaml b/config/intelligent-routing/in-tree/embedding.yaml new file mode 100644 index 000000000..f507f6997 --- /dev/null +++ b/config/intelligent-routing/in-tree/embedding.yaml @@ -0,0 +1,183 @@ +bert_model: + model_id: models/all-MiniLM-L12-v2 + threshold: 0.6 + use_cpu: true + +semantic_cache: + enabled: true + backend_type: "memory" # Options: "memory", "milvus", or "hybrid" + similarity_threshold: 0.8 + max_entries: 1000 # Only applies to memory backend + ttl_seconds: 3600 + eviction_policy: "fifo" + # HNSW index configuration (for memory backend only) + use_hnsw: true # Enable HNSW index for faster similarity search + hnsw_m: 16 # Number of bi-directional links (higher = better recall, more memory) + hnsw_ef_construction: 200 # Construction parameter (higher = better quality, slower build) + + # Hybrid cache configuration (when backend_type: "hybrid") + # Combines in-memory HNSW for fast search with Milvus for scalable storage + # max_memory_entries: 100000 # Max entries in HNSW index (default: 100,000) + # backend_config_path: "config/milvus.yaml" # Path to Milvus config + + # Embedding model for semantic similarity matching + # Options: "bert" (fast, 384-dim), "qwen3" (high quality, 1024-dim, 32K context), "gemma" (balanced, 768-dim, 8K context) + # Default: "bert" (fastest, lowest memory) + embedding_model: "bert" + +tools: + enabled: true + top_k: 3 + similarity_threshold: 0.2 + tools_db_path: "config/tools_db.json" + fallback_to_empty: true + +prompt_guard: + enabled: true # Global default - can be overridden per category with jailbreak_enabled + use_modernbert: true + model_id: "models/jailbreak_classifier_modernbert-base_model" + threshold: 0.7 + use_cpu: true + jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json" + +# vLLM Endpoints Configuration +# IMPORTANT: 'address' field must be a valid IP address (IPv4 or IPv6) +# Supported formats: 127.0.0.1, 192.168.1.1, ::1, 2001:db8::1 +# NOT supported: domain names (example.com), protocol prefixes (http://), paths (/api), ports in address (use 'port' field) +vllm_endpoints: + - name: "endpoint1" + address: "172.28.0.20" # Static IPv4 of llm-katan within docker compose network + port: 8002 + weight: 1 + +model_config: + "qwen3": + reasoning_family: "qwen3" # This model uses Qwen-3 reasoning syntax + preferred_endpoints: ["endpoint1"] + pii_policy: + allow_by_default: true + +# Classifier configuration +classifier: + category_model: + model_id: "models/category_classifier_modernbert-base_model" + use_modernbert: true + threshold: 0.6 + use_cpu: true + category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json" + pii_model: + model_id: "models/pii_classifier_modernbert-base_presidio_token_model" + use_modernbert: true + threshold: 0.7 + use_cpu: true + pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json" + +# Embedding-based classification rules +# These rules use semantic similarity between query text and keywords +embedding_rules: + - category: "technical_support" + threshold: 0.75 + keywords: + - "how to configure the system" + - "installation guide" + - "troubleshooting steps" + - "error message explanation" + - "setup instructions" + aggregation_method: "max" # Options: "max", "avg", "any" + model: "auto" # Options: "auto", "qwen3", "gemma" + dimension: 768 # Options: 128, 256, 512, 768, 1024 + quality_priority: 0.7 # 0.0-1.0, only for "auto" model + latency_priority: 0.3 # 0.0-1.0, only for "auto" model + + - category: "product_inquiry" + threshold: 0.70 + keywords: + - "product features and specifications" + - "pricing information" + - "availability and stock" + - "product comparison" + - "warranty details" + aggregation_method: "avg" + model: "gemma" + dimension: 768 + + - category: "account_management" + threshold: 0.72 + keywords: + - "password reset" + - "account settings" + - "profile update" + - "subscription management" + - "billing information" + aggregation_method: "max" + model: "qwen3" + dimension: 1024 + + - category: "general_inquiry" + threshold: 0.65 + keywords: + - "general question" + - "information request" + - "help needed" + - "customer service" + aggregation_method: "any" + model: "auto" + dimension: 512 + quality_priority: 0.5 + latency_priority: 0.5 + +# Categories with model scores +categories: + # Embedding-based categories + - name: technical_support + system_prompt: "You are a technical support specialist. Provide detailed, step-by-step guidance for technical issues. Use clear explanations and include relevant troubleshooting steps." + model_scores: + - model: qwen3 + score: 0.9 + use_reasoning: true + jailbreak_enabled: true + pii_detection_enabled: true + + - name: product_inquiry + system_prompt: "You are a product specialist. Provide accurate information about products, features, pricing, and availability. Be helpful and informative." + model_scores: + - model: qwen3 + score: 0.85 + use_reasoning: false + jailbreak_enabled: true + pii_detection_enabled: false + + - name: account_management + system_prompt: "You are an account management assistant. Help users with account-related tasks such as password resets, profile updates, and subscription management. Prioritize security and privacy." + model_scores: + - model: qwen3 + score: 0.88 + use_reasoning: false + jailbreak_enabled: true + pii_detection_enabled: true + + - name: general_inquiry + system_prompt: "You are a helpful general assistant. Answer questions clearly and concisely. If you need more information, ask clarifying questions." + model_scores: + - model: qwen3 + score: 0.75 + use_reasoning: false + jailbreak_enabled: true + pii_detection_enabled: false + +# Embedding Models Configuration +# These models provide intelligent embedding generation with automatic routing: +# - Qwen3-Embedding-0.6B: Up to 32K context, high quality, 1024-dim embeddings +# - EmbeddingGemma-300M: Up to 8K context, fast inference, Matryoshka support (768/512/256/128) +embedding_models: + qwen3_model_path: "models/Qwen3-Embedding-0.6B" + gemma_model_path: "models/embeddinggemma-300m" + use_cpu: true # Set to false for GPU acceleration (requires CUDA) + +# Default model for fallback +default_model: "qwen3" + +# Entropy-based reasoning configuration +entropy_threshold: 0.5 # Threshold for entropy-based reasoning decision +high_entropy_threshold: 0.8 # High entropy threshold for complex queries + diff --git a/src/semantic-router/pkg/classification/classifier.go b/src/semantic-router/pkg/classification/classifier.go index 58521c3ce..608132600 100644 --- a/src/semantic-router/pkg/classification/classifier.go +++ b/src/semantic-router/pkg/classification/classifier.go @@ -382,91 +382,6 @@ func (c *Classifier) initializeCategoryClassifier() error { return c.categoryInitializer.Init(c.Config.CategoryModel.ModelID, c.Config.CategoryModel.UseCPU, numClasses) } -// ClassifyCategory performs category classification on the given text -func (c *Classifier) ClassifyCategory(text string) (string, float64, error) { - // Try keyword classifier first - if c.keywordClassifier != nil { - category, confidence, err := c.keywordClassifier.Classify(text) - if err != nil { - return "", 0.0, err - } - if category != "" { - return category, confidence, nil - } - } - // TODO: more sophiscated fusion engine needs to be designed and implemented to combine classifiers' results - // Try embedding based similarity classification if properly configured - if c.keywordEmbeddingClassifier != nil { - category, confidence, err := c.keywordEmbeddingClassifier.Classify(text) - if err != nil { - return "", 0.0, err - } - if category != "" { - return category, confidence, nil - } - } - // Try in-tree first if properly configured - if c.IsCategoryEnabled() && c.categoryInference != nil { - return c.classifyCategoryInTree(text) - } - - // If in-tree classifier was initialized but config is now invalid, return specific error - if c.categoryInference != nil && !c.IsCategoryEnabled() { - return "", 0.0, fmt.Errorf("category classification is not properly configured") - } - - // Fall back to MCP - if c.IsMCPCategoryEnabled() && c.mcpCategoryInference != nil { - return c.classifyCategoryMCP(text) - } - - return "", 0.0, fmt.Errorf("no category classification method available") -} - -// classifyCategoryInTree performs category classification using in-tree model -func (c *Classifier) classifyCategoryInTree(text string) (string, float64, error) { - if !c.IsCategoryEnabled() { - return "", 0.0, fmt.Errorf("category classification is not properly configured") - } - - // Use appropriate classifier based on configuration - var result candle_binding.ClassResult - var err error - - start := time.Now() - result, err = c.categoryInference.Classify(text) - metrics.RecordClassifierLatency("category", time.Since(start).Seconds()) - - if err != nil { - return "", 0.0, fmt.Errorf("classification error: %w", err) - } - - logging.Infof("Classification result: class=%d, confidence=%.4f", result.Class, result.Confidence) - - // Check confidence threshold - if result.Confidence < c.Config.CategoryModel.Threshold { - logging.Infof("Classification confidence (%.4f) below threshold (%.4f)", - result.Confidence, c.Config.CategoryModel.Threshold) - return "", float64(result.Confidence), nil - } - - // Convert class index to category name (MMLU-Pro) - categoryName, ok := c.CategoryMapping.GetCategoryFromIndex(result.Class) - if !ok { - logging.Warnf("Class index %d not found in category mapping", result.Class) - return "", float64(result.Confidence), nil - } - - // Translate to generic category if mapping is configured - genericCategory := c.translateMMLUToGeneric(categoryName) - - // Record the category classification metric using generic name when available - metrics.RecordCategoryClassification(genericCategory) - - logging.Infof("Classified as category: %s (mmlu=%s)", genericCategory, categoryName) - return genericCategory, float64(result.Confidence), nil -} - // IsJailbreakEnabled checks if jailbreak detection is enabled and properly configured func (c *Classifier) IsJailbreakEnabled() bool { return c.Config.PromptGuard.Enabled && c.Config.PromptGuard.ModelID != "" && c.Config.PromptGuard.JailbreakMappingPath != "" && c.JailbreakMapping != nil @@ -611,6 +526,19 @@ func (c *Classifier) ClassifyCategoryWithEntropy(text string) (string, float64, } } + // Try embedding based similarity classification if properly configured + if c.keywordEmbeddingClassifier != nil { + category, confidence, err := c.keywordEmbeddingClassifier.Classify(text) + if err != nil { + return "", 0.0, entropy.ReasoningDecision{}, err + } + if category != "" { + // Keyword embedding matched - determine reasoning mode from category configuration + reasoningDecision := c.makeReasoningDecisionForKeywordCategory(category) + return category, confidence, reasoningDecision, nil + } + } + // Try in-tree first if properly configured if c.IsCategoryEnabled() && c.categoryInference != nil { return c.classifyCategoryWithEntropyInTree(text) @@ -926,29 +854,6 @@ func (c *Classifier) AnalyzeContentForPIIWithThreshold(contentList []string, thr return hasPII, analysisResults, nil } -// ClassifyAndSelectBestModel performs classification and selects the best model for the query -func (c *Classifier) ClassifyAndSelectBestModel(query string) string { - // If no categories defined, return default model - if len(c.Config.Categories) == 0 { - return c.Config.DefaultModel - } - - // First, classify the text to determine the category - categoryName, confidence, err := c.ClassifyCategory(query) - if err != nil { - logging.Errorf("Classification error: %v, falling back to default model", err) - return c.Config.DefaultModel - } - - if categoryName == "" { - logging.Infof("Classification confidence (%.4f) below threshold, using default model", confidence) - return c.Config.DefaultModel - } - - // Then select the best model from the determined category based on score and TTFT - return c.SelectBestModelForCategory(categoryName) -} - // SelectBestModelForCategory selects the best model from a category based on score and TTFT func (c *Classifier) SelectBestModelForCategory(categoryName string) string { cat := c.findCategory(categoryName) diff --git a/src/semantic-router/pkg/classification/classifier_test.go b/src/semantic-router/pkg/classification/classifier_test.go index 6d376e263..7fc4dddd1 100644 --- a/src/semantic-router/pkg/classification/classifier_test.go +++ b/src/semantic-router/pkg/classification/classifier_test.go @@ -106,7 +106,7 @@ var _ = Describe("category classification and model selection", func() { }) }) - Describe("classify category", func() { + Describe("classify category with entropy", func() { type row struct { ModelID string CategoryMappingPath string @@ -118,7 +118,7 @@ var _ = Describe("category classification and model selection", func() { classifier.Config.CategoryModel.ModelID = r.ModelID classifier.Config.CategoryMappingPath = r.CategoryMappingPath classifier.CategoryMapping = r.CategoryMapping - _, _, err := classifier.ClassifyCategory("Some text") + _, _, _, err := classifier.ClassifyCategoryWithEntropy("Some text") Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("category classification is not properly configured")) }, @@ -129,12 +129,14 @@ var _ = Describe("category classification and model selection", func() { Context("when classification succeeds with high confidence", func() { It("should return the correct category", func() { - mockCategoryModel.classifyResult = candle_binding.ClassResult{ - Class: 2, - Confidence: 0.95, + mockCategoryModel.classifyWithProbsResult = candle_binding.ClassResultWithProbs{ + Class: 2, + Confidence: 0.95, + Probabilities: []float32{0.02, 0.03, 0.95}, + NumClasses: 3, } - category, score, err := classifier.ClassifyCategory("This is about politics") + category, score, _, err := classifier.ClassifyCategoryWithEntropy("This is about politics") Expect(err).ToNot(HaveOccurred()) Expect(category).To(Equal("politics")) @@ -144,12 +146,14 @@ var _ = Describe("category classification and model selection", func() { Context("when classification confidence is below threshold", func() { It("should return empty category", func() { - mockCategoryModel.classifyResult = candle_binding.ClassResult{ - Class: 0, - Confidence: 0.3, + mockCategoryModel.classifyWithProbsResult = candle_binding.ClassResultWithProbs{ + Class: 0, + Confidence: 0.3, + Probabilities: []float32{0.3, 0.35, 0.35}, + NumClasses: 3, } - category, score, err := classifier.ClassifyCategory("Ambiguous text") + category, score, _, err := classifier.ClassifyCategoryWithEntropy("Ambiguous text") Expect(err).ToNot(HaveOccurred()) Expect(category).To(Equal("")) @@ -159,9 +163,9 @@ var _ = Describe("category classification and model selection", func() { Context("when model inference fails", func() { It("should return empty category with zero score", func() { - mockCategoryModel.classifyError = errors.New("model inference failed") + mockCategoryModel.classifyWithProbsError = errors.New("model inference failed") - category, score, err := classifier.ClassifyCategory("Some text") + category, score, _, err := classifier.ClassifyCategoryWithEntropy("Some text") Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("classification error")) @@ -172,12 +176,14 @@ var _ = Describe("category classification and model selection", func() { Context("when input is empty or invalid", func() { It("should handle empty text gracefully", func() { - mockCategoryModel.classifyResult = candle_binding.ClassResult{ - Class: 0, - Confidence: 0.8, + mockCategoryModel.classifyWithProbsResult = candle_binding.ClassResultWithProbs{ + Class: 0, + Confidence: 0.8, + Probabilities: []float32{0.8, 0.1, 0.1}, + NumClasses: 3, } - category, score, err := classifier.ClassifyCategory("") + category, score, _, err := classifier.ClassifyCategoryWithEntropy("") Expect(err).ToNot(HaveOccurred()) Expect(category).To(Equal("technology")) @@ -187,12 +193,14 @@ var _ = Describe("category classification and model selection", func() { Context("when class index is not found in category mapping", func() { It("should handle invalid category mapping gracefully", func() { - mockCategoryModel.classifyResult = candle_binding.ClassResult{ - Class: 9, - Confidence: 0.8, + mockCategoryModel.classifyWithProbsResult = candle_binding.ClassResultWithProbs{ + Class: 9, + Confidence: 0.8, + Probabilities: []float32{0.1, 0.1, 0.0}, + NumClasses: 3, } - category, score, err := classifier.ClassifyCategory("Some text") + category, score, _, err := classifier.ClassifyCategoryWithEntropy("Some text") Expect(err).ToNot(HaveOccurred()) Expect(category).To(Equal("")) @@ -384,44 +392,6 @@ var _ = Describe("category classification and model selection", func() { }) }) - Describe("classify and select best model", func() { - It("should return the best model", func() { - mockCategoryModel.classifyResult = candle_binding.ClassResult{ - Class: 0, - Confidence: 0.9, - } - model := classifier.ClassifyAndSelectBestModel("Some text") - Expect(model).To(Equal("model-a")) - }) - - Context("when the categories are empty", func() { - It("should return the default model", func() { - classifier.Config.Categories = nil - model := classifier.ClassifyAndSelectBestModel("Some text") - Expect(model).To(Equal("default-model")) - }) - }) - - Context("when the classification fails", func() { - It("should return the default model", func() { - mockCategoryModel.classifyError = errors.New("classification failed") - model := classifier.ClassifyAndSelectBestModel("Some text") - Expect(model).To(Equal("default-model")) - }) - }) - - Context("when the category name is empty", func() { - It("should return the default model", func() { - mockCategoryModel.classifyResult = candle_binding.ClassResult{ - Class: 9, - Confidence: 0.9, - } - model := classifier.ClassifyAndSelectBestModel("Some text") - Expect(model).To(Equal("default-model")) - }) - }) - }) - Describe("internal helper methods", func() { type row struct { query string @@ -1349,11 +1319,16 @@ var _ = Describe("generic category mapping (MMLU-Pro -> generic)", func() { Expect(classifier.GenericToMMLU).To(HaveKeyWithValue("politics", ConsistOf("politics"))) }) - It("translates ClassifyCategory result to generic category", func() { + It("translates ClassifyCategoryWithEntropy result to generic category", func() { // Model returns class index 0 -> "Computer Science" (MMLU) which maps to generic "tech" - mockCategoryModel.classifyResult = candle_binding.ClassResult{Class: 0, Confidence: 0.92} + mockCategoryModel.classifyWithProbsResult = candle_binding.ClassResultWithProbs{ + Class: 0, + Confidence: 0.92, + Probabilities: []float32{0.92, 0.05, 0.03}, + NumClasses: 3, + } - category, score, err := classifier.ClassifyCategory("This text is about GPUs and compilers") + category, score, _, err := classifier.ClassifyCategoryWithEntropy("This text is about GPUs and compilers") Expect(err).ToNot(HaveOccurred()) Expect(category).To(Equal("tech")) Expect(score).To(BeNumerically("~", 0.92, 0.001)) @@ -1378,9 +1353,14 @@ var _ = Describe("generic category mapping (MMLU-Pro -> generic)", func() { It("falls back to identity when no mapping exists for an MMLU label", func() { // index 2 -> "politics" (no explicit mapping provided, but present in MMLU set) - mockCategoryModel.classifyResult = candle_binding.ClassResult{Class: 2, Confidence: 0.91} + mockCategoryModel.classifyWithProbsResult = candle_binding.ClassResultWithProbs{ + Class: 2, + Confidence: 0.91, + Probabilities: []float32{0.04, 0.05, 0.91}, + NumClasses: 3, + } - category, score, err := classifier.ClassifyCategory("This is a political debate") + category, score, _, err := classifier.ClassifyCategoryWithEntropy("This is a political debate") Expect(err).ToNot(HaveOccurred()) Expect(category).To(Equal("politics")) Expect(score).To(BeNumerically("~", 0.91, 0.001)) diff --git a/src/semantic-router/pkg/config/config.go b/src/semantic-router/pkg/config/config.go index 0b41de781..cb1546edd 100644 --- a/src/semantic-router/pkg/config/config.go +++ b/src/semantic-router/pkg/config/config.go @@ -216,7 +216,7 @@ type EmbeddingRule struct { Category string `yaml:"category"` SimilarityThreshold float32 `yaml:"threshold"` Keywords []string `yaml:"keywords"` - AggregationMethodConfiged AggregationMethod `yaml:"aggregation_mathod"` + AggregationMethodConfiged AggregationMethod `yaml:"aggregation_method"` Model string `json:"model,omitempty"` // "auto" (default), "qwen3", "gemma" Dimension int `json:"dimension,omitempty"` // Target dimension: 768 (default), 512, 256, 128 QualityPriority float32 `json:"quality_priority,omitempty"` // 0.0-1.0, only for "auto" model diff --git a/src/semantic-router/pkg/extproc/req_filter_classification.go b/src/semantic-router/pkg/extproc/req_filter_classification.go index 30b78cb12..e987792e7 100644 --- a/src/semantic-router/pkg/extproc/req_filter_classification.go +++ b/src/semantic-router/pkg/extproc/req_filter_classification.go @@ -39,9 +39,8 @@ func (r *OpenAIRouter) performClassificationAndModelSelection(originalModel stri // Perform entropy-based classification once catName, confidence, reasoningDec, err := r.Classifier.ClassifyCategoryWithEntropy(classificationText) if err != nil { - logging.Errorf("Entropy-based classification error: %v, falling back to simple classification", err) - // Fall back to simple classification - categoryName = r.findCategoryForClassification(classificationText) + logging.Errorf("Entropy-based classification error: %v, using empty category", err) + categoryName = "" classificationConfidence = 0.0 reasoningDecision = entropy.ReasoningDecision{} } else { @@ -60,18 +59,3 @@ func (r *OpenAIRouter) performClassificationAndModelSelection(originalModel stri return categoryName, classificationConfidence, reasoningDecision, selectedModel } - -// findCategoryForClassification determines the category for the given text using classification -func (r *OpenAIRouter) findCategoryForClassification(query string) string { - if len(r.CategoryDescriptions) == 0 { - return "" - } - - categoryName, _, err := r.Classifier.ClassifyCategory(query) - if err != nil { - logging.Errorf("Category classification error: %v", err) - return "" - } - - return categoryName -} diff --git a/src/semantic-router/pkg/services/classification.go b/src/semantic-router/pkg/services/classification.go index c82d931e6..f83ed9e59 100644 --- a/src/semantic-router/pkg/services/classification.go +++ b/src/semantic-router/pkg/services/classification.go @@ -202,7 +202,7 @@ func (s *ClassificationService) ClassifyIntent(req IntentRequest) (*IntentRespon } // Perform classification using the existing classifier - category, confidence, err := s.classifier.ClassifyCategory(req.Text) + category, confidence, _, err := s.classifier.ClassifyCategoryWithEntropy(req.Text) if err != nil { return nil, fmt.Errorf("classification failed: %w", err) }