diff --git a/config/config.development.yaml b/config/config.development.yaml
index 86458928..9c03ecdc 100644
--- a/config/config.development.yaml
+++ b/config/config.development.yaml
@@ -47,6 +47,9 @@ classifier:
 categories:
   - name: test
     system_prompt: "You are a test assistant."
+    # Example: Category-level cache settings
+    # semantic_cache_enabled: true
+    # semantic_cache_similarity_threshold: 0.85
     model_scores:
       - model: test-model
         score: 1.0
diff --git a/config/config.e2e.yaml b/config/config.e2e.yaml
index b588849f..60362cc7 100644
--- a/config/config.e2e.yaml
+++ b/config/config.e2e.yaml
@@ -107,6 +107,9 @@ categories:
         score: 0.4
         use_reasoning: false
   - name: psychology
+    # Example: Strict cache threshold for psychology - clinical nuances matter
+    # semantic_cache_enabled: true
+    # semantic_cache_similarity_threshold: 0.92
     model_scores:
       - model: "Model-A"
         score: 0.6
@@ -156,6 +159,9 @@ categories:
         score: 0.4
         use_reasoning: false
   - name: other
+    # Example: Lower threshold for general queries - better cache hit rate
+    # semantic_cache_enabled: true
+    # semantic_cache_similarity_threshold: 0.75
     model_scores:
       - model: "Model-B"
         score: 0.8
@@ -168,6 +174,9 @@ categories:
         score: 0.6
         use_reasoning: false
   - name: health
+    # Example: Very strict cache threshold for health - word changes matter medically
+    # semantic_cache_enabled: true
+    # semantic_cache_similarity_threshold: 0.95
     model_scores:
       - model: "Model-B"
         score: 0.8
diff --git a/config/config.production.yaml b/config/config.production.yaml
index 9c4dd4f8..2651a4a7 100644
--- a/config/config.production.yaml
+++ b/config/config.production.yaml
@@ -60,12 +60,18 @@ classifier:
 categories:
   - name: math
     system_prompt: "You are a mathematics expert. Provide step-by-step solutions."
+    # Example: High threshold for math - precision matters
+    # semantic_cache_enabled: true
+    # semantic_cache_similarity_threshold: 0.92
     model_scores:
       - model: openai/gpt-oss-20b
         score: 1.0
         use_reasoning: true
   - name: other
     system_prompt: "You are a helpful assistant."
+    # Example: Lower threshold for general queries - more cache hits
+    # semantic_cache_enabled: true
+    # semantic_cache_similarity_threshold: 0.75
     model_scores:
       - model: openai/gpt-oss-20b
         score: 0.7
diff --git a/config/config.recipe-accuracy.yaml b/config/config.recipe-accuracy.yaml
index 584b0291..96bd258b 100644
--- a/config/config.recipe-accuracy.yaml
+++ b/config/config.recipe-accuracy.yaml
@@ -87,6 +87,9 @@ categories:
         use_reasoning: true  # Enable reasoning for legal analysis
   - name: psychology
     system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
+    # Category-level cache override (if global cache is enabled)
+    # semantic_cache_enabled: true
+    # semantic_cache_similarity_threshold: 0.92  # Strict for clinical nuances
     model_scores:
       - model: openai/gpt-oss-20b
         score: 1.0
@@ -117,6 +120,9 @@ categories:
         use_reasoning: false  # Default queries don't need reasoning
   - name: health
     system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
+    # Category-level cache override (if global cache is enabled)
+    # semantic_cache_enabled: true
+    # semantic_cache_similarity_threshold: 0.95  # Very strict - medical accuracy critical
     model_scores:
       - model: openai/gpt-oss-20b
         score: 1.0
diff --git a/config/config.recipe-latency.yaml b/config/config.recipe-latency.yaml
index ce31a36f..56a4bf29 100644
--- a/config/config.recipe-latency.yaml
+++ b/config/config.recipe-latency.yaml
@@ -105,6 +105,9 @@ categories:
         use_reasoning: false
   - name: other
     system_prompt: "Provide helpful responses."
+    # Category-level cache (optional, already enabled globally with low threshold)
+    # semantic_cache_enabled: true
+    # semantic_cache_similarity_threshold: 0.65  # Even lower for general queries
     model_scores:
       - model: openai/gpt-oss-20b
         score: 0.7
diff --git a/config/config.recipe-token-efficiency.yaml b/config/config.recipe-token-efficiency.yaml
index 49008db5..16a71f53 100644
--- a/config/config.recipe-token-efficiency.yaml
+++ b/config/config.recipe-token-efficiency.yaml
@@ -110,6 +110,9 @@ categories:
         use_reasoning: false
   - name: other
     system_prompt: "You are a helpful assistant. Provide concise, accurate responses."
+    # Category-level cache (optional, already enabled globally)
+    # semantic_cache_enabled: true
+    # semantic_cache_similarity_threshold: 0.7  # Match global or slightly lower
     model_scores:
       - model: openai/gpt-oss-20b
         score: 0.7
diff --git a/config/config.testing.yaml b/config/config.testing.yaml
index 91722f56..8e4b631f 100644
--- a/config/config.testing.yaml
+++ b/config/config.testing.yaml
@@ -42,6 +42,9 @@ model_config:
 
 categories:
   - name: other
+    # Category-level cache settings (optional - falls back to global if not set)
+    # semantic_cache_enabled: true
+    # semantic_cache_similarity_threshold: 0.8
     model_scores:
       - model: openai/gpt-oss-20b
         score: 0.7
diff --git a/config/config.yaml b/config/config.yaml
index 667e41f8..279feb67 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -74,6 +74,8 @@ categories:
         use_reasoning: false
   - name: psychology
     system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
+    semantic_cache_enabled: true
+    semantic_cache_similarity_threshold: 0.92 # High threshold for psychology - sensitive to nuances
     model_scores:
       - model: qwen3
         score: 0.6
@@ -98,12 +100,16 @@ categories:
         use_reasoning: false
   - name: other
     system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics."
+    semantic_cache_enabled: true
+    semantic_cache_similarity_threshold: 0.75 # Lower threshold for general chat - less sensitive
     model_scores:
       - model: qwen3
         score: 0.7
         use_reasoning: false
   - name: health
     system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
+    semantic_cache_enabled: true
+    semantic_cache_similarity_threshold: 0.95 # High threshold for health - very sensitive to word changes
     model_scores:
       - model: qwen3
         score: 0.5
diff --git a/src/semantic-router/pkg/cache/cache_interface.go b/src/semantic-router/pkg/cache/cache_interface.go
index f35e165c..fcdf0073 100644
--- a/src/semantic-router/pkg/cache/cache_interface.go
+++ b/src/semantic-router/pkg/cache/cache_interface.go
@@ -33,6 +33,11 @@ type CacheBackend interface {
 	// Returns the cached response, match status, and any error
 	FindSimilar(model string, query string) ([]byte, bool, error)
 
+	// FindSimilarWithThreshold searches for semantically similar cached requests using a specific threshold
+	// This allows category-specific similarity thresholds
+	// Returns the cached response, match status, and any error
+	FindSimilarWithThreshold(model string, query string, threshold float32) ([]byte, bool, error)
+
 	// Close releases all resources held by the cache backend
 	Close() error
 
diff --git a/src/semantic-router/pkg/cache/inmemory_cache.go b/src/semantic-router/pkg/cache/inmemory_cache.go
index 10386420..5820c5f8 100644
--- a/src/semantic-router/pkg/cache/inmemory_cache.go
+++ b/src/semantic-router/pkg/cache/inmemory_cache.go
@@ -207,20 +207,25 @@ func (c *InMemoryCache) AddEntry(requestID string, model string, query string, r
 	return nil
 }
 
-// FindSimilar searches for semantically similar cached requests
+// FindSimilar searches for semantically similar cached requests using the default threshold
 func (c *InMemoryCache) FindSimilar(model string, query string) ([]byte, bool, error) {
+	return c.FindSimilarWithThreshold(model, query, c.similarityThreshold)
+}
+
+// FindSimilarWithThreshold searches for semantically similar cached requests using a specific threshold
+func (c *InMemoryCache) FindSimilarWithThreshold(model string, query string, threshold float32) ([]byte, bool, error) {
 	start := time.Now()
 
 	if !c.enabled {
-		observability.Debugf("InMemoryCache.FindSimilar: cache disabled")
+		observability.Debugf("InMemoryCache.FindSimilarWithThreshold: cache disabled")
 		return nil, false, nil
 	}
 	queryPreview := query
 	if len(query) > 50 {
 		queryPreview = query[:50] + "..."
 	}
-	observability.Debugf("InMemoryCache.FindSimilar: searching for model='%s', query='%s' (len=%d chars)",
-		model, queryPreview, len(query))
+	observability.Debugf("InMemoryCache.FindSimilarWithThreshold: searching for model='%s', query='%s' (len=%d chars), threshold=%.4f",
+		model, queryPreview, len(query), threshold)
 
 	// Generate semantic embedding for similarity comparison
 	queryEmbedding, err := candle_binding.GetEmbedding(query, 0) // Auto-detect dimension
@@ -237,7 +242,7 @@ func (c *InMemoryCache) FindSimilar(model string, query string) ([]byte, bool, e
 		entriesChecked int
 		expiredCount   int
 	)
-	// Capture the lookup time after acquiring the read lock so TTL checks aren’t skewed by embedding work or lock wait
+	// Capture the lookup time after acquiring the read lock so TTL checks aren't skewed by embedding work or lock wait
 	now := time.Now()
 
 	// Compare with completed entries for the same model, tracking only the best match
@@ -292,26 +297,26 @@ func (c *InMemoryCache) FindSimilar(model string, query string) ([]byte, bool, e
 	// Handle case where no suitable entries exist
 	if bestIndex < 0 {
 		atomic.AddInt64(&c.missCount, 1)
-		observability.Debugf("InMemoryCache.FindSimilar: no entries found with responses")
+		observability.Debugf("InMemoryCache.FindSimilarWithThreshold: no entries found with responses")
 		metrics.RecordCacheOperation("memory", "find_similar", "miss", time.Since(start).Seconds())
 		metrics.RecordCacheMiss()
 		return nil, false, nil
 	}
 
 	// Check if the best match meets the similarity threshold
-	if bestSimilarity >= c.similarityThreshold {
+	if bestSimilarity >= threshold {
 		atomic.AddInt64(&c.hitCount, 1)
 
 		c.mu.Lock()
 		c.updateAccessInfo(bestIndex, bestEntry)
 		c.mu.Unlock()
 
-		observability.Debugf("InMemoryCache.FindSimilar: CACHE HIT - similarity=%.4f >= threshold=%.4f, response_size=%d bytes",
-			bestSimilarity, c.similarityThreshold, len(bestEntry.ResponseBody))
+		observability.Debugf("InMemoryCache.FindSimilarWithThreshold: CACHE HIT - similarity=%.4f >= threshold=%.4f, response_size=%d bytes",
+			bestSimilarity, threshold, len(bestEntry.ResponseBody))
 		observability.LogEvent("cache_hit", map[string]interface{}{
 			"backend":    "memory",
 			"similarity": bestSimilarity,
-			"threshold":  c.similarityThreshold,
+			"threshold":  threshold,
 			"model":      model,
 		})
 		metrics.RecordCacheOperation("memory", "find_similar", "hit", time.Since(start).Seconds())
@@ -320,12 +325,12 @@ func (c *InMemoryCache) FindSimilar(model string, query string) ([]byte, bool, e
 	}
 
 	atomic.AddInt64(&c.missCount, 1)
-	observability.Debugf("InMemoryCache.FindSimilar: CACHE MISS - best_similarity=%.4f < threshold=%.4f (checked %d entries)",
-		bestSimilarity, c.similarityThreshold, entriesChecked)
+	observability.Debugf("InMemoryCache.FindSimilarWithThreshold: CACHE MISS - best_similarity=%.4f < threshold=%.4f (checked %d entries)",
+		bestSimilarity, threshold, entriesChecked)
 	observability.LogEvent("cache_miss", map[string]interface{}{
 		"backend":         "memory",
 		"best_similarity": bestSimilarity,
-		"threshold":       c.similarityThreshold,
+		"threshold":       threshold,
 		"model":           model,
 		"entries_checked": entriesChecked,
 	})
diff --git a/src/semantic-router/pkg/cache/milvus_cache.go b/src/semantic-router/pkg/cache/milvus_cache.go
index 4af89184..372c0656 100644
--- a/src/semantic-router/pkg/cache/milvus_cache.go
+++ b/src/semantic-router/pkg/cache/milvus_cache.go
@@ -487,18 +487,23 @@ func (c *MilvusCache) addEntry(id string, requestID string, model string, query
 
 // FindSimilar searches for semantically similar cached requests
 func (c *MilvusCache) FindSimilar(model string, query string) ([]byte, bool, error) {
+	return c.FindSimilarWithThreshold(model, query, c.similarityThreshold)
+}
+
+// FindSimilarWithThreshold searches for semantically similar cached requests using a specific threshold
+func (c *MilvusCache) FindSimilarWithThreshold(model string, query string, threshold float32) ([]byte, bool, error) {
 	start := time.Now()
 
 	if !c.enabled {
-		observability.Debugf("MilvusCache.FindSimilar: cache disabled")
+		observability.Debugf("MilvusCache.FindSimilarWithThreshold: cache disabled")
 		return nil, false, nil
 	}
 	queryPreview := query
 	if len(query) > 50 {
 		queryPreview = query[:50] + "..."
 	}
-	observability.Debugf("MilvusCache.FindSimilar: searching for model='%s', query='%s' (len=%d chars)",
-		model, queryPreview, len(query))
+	observability.Debugf("MilvusCache.FindSimilarWithThreshold: searching for model='%s', query='%s' (len=%d chars), threshold=%.4f",
+		model, queryPreview, len(query), threshold)
 
 	// Generate semantic embedding for similarity comparison
 	queryEmbedding, err := candle_binding.GetEmbedding(query, 0) // Auto-detect dimension
@@ -529,7 +534,7 @@ func (c *MilvusCache) FindSimilar(model string, query string) ([]byte, bool, err
 		searchParam,
 	)
 	if err != nil {
-		observability.Debugf("MilvusCache.FindSimilar: search failed: %v", err)
+		observability.Debugf("MilvusCache.FindSimilarWithThreshold: search failed: %v", err)
 		atomic.AddInt64(&c.missCount, 1)
 		metrics.RecordCacheOperation("milvus", "find_similar", "error", time.Since(start).Seconds())
 		metrics.RecordCacheMiss()
@@ -538,21 +543,21 @@ func (c *MilvusCache) FindSimilar(model string, query string) ([]byte, bool, err
 
 	if len(searchResult) == 0 || searchResult[0].ResultCount == 0 {
 		atomic.AddInt64(&c.missCount, 1)
-		observability.Debugf("MilvusCache.FindSimilar: no entries found")
+		observability.Debugf("MilvusCache.FindSimilarWithThreshold: no entries found")
 		metrics.RecordCacheOperation("milvus", "find_similar", "miss", time.Since(start).Seconds())
 		metrics.RecordCacheMiss()
 		return nil, false, nil
 	}
 
 	bestScore := searchResult[0].Scores[0]
-	if bestScore < c.similarityThreshold {
+	if bestScore < threshold {
 		atomic.AddInt64(&c.missCount, 1)
-		observability.Debugf("MilvusCache.FindSimilar: CACHE MISS - best_similarity=%.4f < threshold=%.4f",
-			bestScore, c.similarityThreshold)
+		observability.Debugf("MilvusCache.FindSimilarWithThreshold: CACHE MISS - best_similarity=%.4f < threshold=%.4f",
+			bestScore, threshold)
 		observability.LogEvent("cache_miss", map[string]interface{}{
 			"backend":         "milvus",
 			"best_similarity": bestScore,
-			"threshold":       c.similarityThreshold,
+			"threshold":       threshold,
 			"model":           model,
 			"collection":      c.collectionName,
 		})
@@ -569,7 +574,7 @@ func (c *MilvusCache) FindSimilar(model string, query string) ([]byte, bool, err
 	}
 
 	if responseBody == nil {
-		observability.Debugf("MilvusCache.FindSimilar: cache hit but response_body is missing or not a string")
+		observability.Debugf("MilvusCache.FindSimilarWithThreshold: cache hit but response_body is missing or not a string")
 		atomic.AddInt64(&c.missCount, 1)
 		metrics.RecordCacheOperation("milvus", "find_similar", "error", time.Since(start).Seconds())
 		metrics.RecordCacheMiss()
@@ -577,12 +582,12 @@ func (c *MilvusCache) FindSimilar(model string, query string) ([]byte, bool, err
 	}
 
 	atomic.AddInt64(&c.hitCount, 1)
-	observability.Debugf("MilvusCache.FindSimilar: CACHE HIT - similarity=%.4f >= threshold=%.4f, response_size=%d bytes",
-		bestScore, c.similarityThreshold, len(responseBody))
+	observability.Debugf("MilvusCache.FindSimilarWithThreshold: CACHE HIT - similarity=%.4f >= threshold=%.4f, response_size=%d bytes",
+		bestScore, threshold, len(responseBody))
 	observability.LogEvent("cache_hit", map[string]interface{}{
 		"backend":    "milvus",
 		"similarity": bestScore,
-		"threshold":  c.similarityThreshold,
+		"threshold":  threshold,
 		"model":      model,
 		"collection": c.collectionName,
 	})
diff --git a/src/semantic-router/pkg/config/config.go b/src/semantic-router/pkg/config/config.go
index e8c09e7d..9766d473 100644
--- a/src/semantic-router/pkg/config/config.go
+++ b/src/semantic-router/pkg/config/config.go
@@ -364,6 +364,12 @@ type Category struct {
 	// "replace": Replace any existing system message with the category-specific prompt
 	// "insert": Prepend the category-specific prompt to the existing system message content
 	SystemPromptMode string `yaml:"system_prompt_mode,omitempty"`
+	// SemanticCacheEnabled controls whether semantic caching is enabled for this category
+	// If nil, inherits from global SemanticCache.Enabled setting
+	SemanticCacheEnabled *bool `yaml:"semantic_cache_enabled,omitempty"`
+	// SemanticCacheSimilarityThreshold defines the minimum similarity score for cache hits (0.0-1.0)
+	// If nil, uses the global threshold from SemanticCache.SimilarityThreshold or BertModel.Threshold
+	SemanticCacheSimilarityThreshold *float32 `yaml:"semantic_cache_similarity_threshold,omitempty"`
 }
 
 // GetModelReasoningFamily returns the reasoning family configuration for a given model name
@@ -419,6 +425,11 @@ func BoolPtr(b bool) *bool {
 	return &b
 }
 
+// Float32Ptr returns a pointer to a float32 value (helper for tests and config)
+func Float32Ptr(f float32) *float32 {
+	return &f
+}
+
 // validateConfigStructure performs additional validation on the parsed config
 func validateConfigStructure(cfg *RouterConfig) error {
 	// Ensure all categories have at least one model with scores
@@ -782,3 +793,25 @@ func (c *RouterConfig) GetCategoryByName(name string) *Category {
 	}
 	return nil
 }
+
+// IsCacheEnabledForCategory returns whether semantic caching is enabled for a specific category
+// If the category has an explicit setting, it takes precedence; otherwise, uses global setting
+func (c *RouterConfig) IsCacheEnabledForCategory(categoryName string) bool {
+	category := c.GetCategoryByName(categoryName)
+	if category != nil && category.SemanticCacheEnabled != nil {
+		return *category.SemanticCacheEnabled
+	}
+	// Fall back to global setting
+	return c.SemanticCache.Enabled
+}
+
+// GetCacheSimilarityThresholdForCategory returns the effective cache similarity threshold for a category
+// Priority: category-specific > global semantic_cache > bert_model threshold
+func (c *RouterConfig) GetCacheSimilarityThresholdForCategory(categoryName string) float32 {
+	category := c.GetCategoryByName(categoryName)
+	if category != nil && category.SemanticCacheSimilarityThreshold != nil {
+		return *category.SemanticCacheSimilarityThreshold
+	}
+	// Fall back to global cache threshold or bert threshold
+	return c.GetCacheSimilarityThreshold()
+}
diff --git a/src/semantic-router/pkg/config/config_test.go b/src/semantic-router/pkg/config/config_test.go
index 657d18f1..8a34f399 100644
--- a/src/semantic-router/pkg/config/config_test.go
+++ b/src/semantic-router/pkg/config/config_test.go
@@ -1753,4 +1753,161 @@ default_model: "test-model"
 			})
 		})
 	})
+
+	Describe("Category-Level Cache Settings", func() {
+		Context("with category-specific cache configuration", func() {
+			It("should use category-specific cache enabled setting", func() {
+				yamlContent := `
+bert_model:
+  model_id: "test-model"
+  threshold: 0.7
+
+semantic_cache:
+  enabled: true
+  similarity_threshold: 0.8
+
+categories:
+  - name: health
+    semantic_cache_enabled: true
+    semantic_cache_similarity_threshold: 0.95
+    model_scores:
+      - model: test-model
+        score: 1.0
+        use_reasoning: false
+  - name: general
+    semantic_cache_enabled: false
+    model_scores:
+      - model: test-model
+        score: 1.0
+        use_reasoning: false
+  - name: other
+    model_scores:
+      - model: test-model
+        score: 1.0
+        use_reasoning: false
+`
+				var cfg config.RouterConfig
+				err := yaml.Unmarshal([]byte(yamlContent), &cfg)
+				Expect(err).NotTo(HaveOccurred())
+
+				// Test category-specific enabled settings
+				Expect(cfg.IsCacheEnabledForCategory("health")).To(BeTrue())
+				Expect(cfg.IsCacheEnabledForCategory("general")).To(BeFalse())
+				// "other" should fall back to global setting
+				Expect(cfg.IsCacheEnabledForCategory("other")).To(BeTrue())
+				// Unknown category should also fall back to global
+				Expect(cfg.IsCacheEnabledForCategory("unknown")).To(BeTrue())
+			})
+
+			It("should use category-specific similarity thresholds", func() {
+				yamlContent := `
+bert_model:
+  model_id: "test-model"
+  threshold: 0.7
+
+semantic_cache:
+  enabled: true
+  similarity_threshold: 0.8
+
+categories:
+  - name: health
+    semantic_cache_similarity_threshold: 0.95
+    model_scores:
+      - model: test-model
+        score: 1.0
+        use_reasoning: false
+  - name: psychology
+    semantic_cache_similarity_threshold: 0.92
+    model_scores:
+      - model: test-model
+        score: 1.0
+        use_reasoning: false
+  - name: other
+    semantic_cache_similarity_threshold: 0.75
+    model_scores:
+      - model: test-model
+        score: 1.0
+        use_reasoning: false
+  - name: general
+    model_scores:
+      - model: test-model
+        score: 1.0
+        use_reasoning: false
+`
+				var cfg config.RouterConfig
+				err := yaml.Unmarshal([]byte(yamlContent), &cfg)
+				Expect(err).NotTo(HaveOccurred())
+
+				// Test category-specific thresholds
+				Expect(cfg.GetCacheSimilarityThresholdForCategory("health")).To(Equal(float32(0.95)))
+				Expect(cfg.GetCacheSimilarityThresholdForCategory("psychology")).To(Equal(float32(0.92)))
+				Expect(cfg.GetCacheSimilarityThresholdForCategory("other")).To(Equal(float32(0.75)))
+				// "general" should fall back to global semantic_cache threshold
+				Expect(cfg.GetCacheSimilarityThresholdForCategory("general")).To(Equal(float32(0.8)))
+				// Unknown category should also fall back
+				Expect(cfg.GetCacheSimilarityThresholdForCategory("unknown")).To(Equal(float32(0.8)))
+			})
+
+			It("should fall back to bert threshold when semantic_cache threshold is not set", func() {
+				yamlContent := `
+bert_model:
+  model_id: "test-model"
+  threshold: 0.6
+
+semantic_cache:
+  enabled: true
+
+categories:
+  - name: test
+    model_scores:
+      - model: test-model
+        score: 1.0
+        use_reasoning: false
+`
+				var cfg config.RouterConfig
+				err := yaml.Unmarshal([]byte(yamlContent), &cfg)
+				Expect(err).NotTo(HaveOccurred())
+
+				// Should fall back to bert_model.threshold
+				Expect(cfg.GetCacheSimilarityThresholdForCategory("test")).To(Equal(float32(0.6)))
+				Expect(cfg.GetCacheSimilarityThreshold()).To(Equal(float32(0.6)))
+			})
+
+			It("should handle nil pointers for optional cache settings", func() {
+				category := config.Category{
+					Name: "test",
+					ModelScores: []config.ModelScore{
+						{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)},
+					},
+				}
+
+				cfg := &config.RouterConfig{
+					SemanticCache: struct {
+						BackendType         string   `yaml:"backend_type,omitempty"`
+						Enabled             bool     `yaml:"enabled"`
+						SimilarityThreshold *float32 `yaml:"similarity_threshold,omitempty"`
+						MaxEntries          int      `yaml:"max_entries,omitempty"`
+						TTLSeconds          int      `yaml:"ttl_seconds,omitempty"`
+						EvictionPolicy      string   `yaml:"eviction_policy,omitempty"`
+						BackendConfigPath   string   `yaml:"backend_config_path,omitempty"`
+					}{
+						Enabled:             true,
+						SimilarityThreshold: config.Float32Ptr(0.8),
+					},
+					BertModel: struct {
+						ModelID   string  `yaml:"model_id"`
+						Threshold float32 `yaml:"threshold"`
+						UseCPU    bool    `yaml:"use_cpu"`
+					}{
+						Threshold: 0.7,
+					},
+					Categories: []config.Category{category},
+				}
+
+				// Nil values should use defaults
+				Expect(cfg.IsCacheEnabledForCategory("test")).To(BeTrue())
+				Expect(cfg.GetCacheSimilarityThresholdForCategory("test")).To(Equal(float32(0.8)))
+			})
+		})
+	})
 })
diff --git a/src/semantic-router/pkg/extproc/caching_test.go b/src/semantic-router/pkg/extproc/caching_test.go
index fcd4dc79..5f345b79 100644
--- a/src/semantic-router/pkg/extproc/caching_test.go
+++ b/src/semantic-router/pkg/extproc/caching_test.go
@@ -250,4 +250,59 @@ var _ = Describe("Caching Functionality", func() {
 			Expect(response.GetRequestBody().Response.Status).To(Equal(ext_proc.CommonResponse_CONTINUE))
 		})
 	})
+
+	Describe("Category-Specific Caching", func() {
+		It("should use category-specific cache settings", func() {
+			// Create a config with category-specific cache settings
+			cfg := CreateTestConfig()
+			cfg.SemanticCache.Enabled = true
+			cfg.SemanticCache.SimilarityThreshold = config.Float32Ptr(0.8)
+
+			// Add categories with different cache settings
+			cfg.Categories = []config.Category{
+				{
+					Name: "health",
+					ModelScores: []config.ModelScore{
+						{Model: "model-a", Score: 1.0, UseReasoning: config.BoolPtr(false)},
+					},
+					SemanticCacheEnabled:             config.BoolPtr(true),
+					SemanticCacheSimilarityThreshold: config.Float32Ptr(0.95),
+				},
+				{
+					Name: "general",
+					ModelScores: []config.ModelScore{
+						{Model: "model-a", Score: 1.0, UseReasoning: config.BoolPtr(false)},
+					},
+					SemanticCacheEnabled:             config.BoolPtr(false),
+					SemanticCacheSimilarityThreshold: config.Float32Ptr(0.7),
+				},
+			}
+
+			// Verify category cache settings are correct
+			Expect(cfg.IsCacheEnabledForCategory("health")).To(BeTrue())
+			Expect(cfg.IsCacheEnabledForCategory("general")).To(BeFalse())
+			Expect(cfg.GetCacheSimilarityThresholdForCategory("health")).To(Equal(float32(0.95)))
+			Expect(cfg.GetCacheSimilarityThresholdForCategory("general")).To(Equal(float32(0.7)))
+		})
+
+		It("should fall back to global settings when category doesn't specify", func() {
+			cfg := CreateTestConfig()
+			cfg.SemanticCache.Enabled = true
+			cfg.SemanticCache.SimilarityThreshold = config.Float32Ptr(0.8)
+
+			// Add category without cache settings
+			cfg.Categories = []config.Category{
+				{
+					Name: "test",
+					ModelScores: []config.ModelScore{
+						{Model: "model-a", Score: 1.0, UseReasoning: config.BoolPtr(false)},
+					},
+				},
+			}
+
+			// Should use global settings
+			Expect(cfg.IsCacheEnabledForCategory("test")).To(BeTrue())
+			Expect(cfg.GetCacheSimilarityThresholdForCategory("test")).To(Equal(float32(0.8)))
+		})
+	})
 })
diff --git a/src/semantic-router/pkg/extproc/request_handler.go b/src/semantic-router/pkg/extproc/request_handler.go
index 8d375ba6..e90f4745 100644
--- a/src/semantic-router/pkg/extproc/request_handler.go
+++ b/src/semantic-router/pkg/extproc/request_handler.go
@@ -401,8 +401,24 @@ func (r *OpenAIRouter) handleRequestBody(v *ext_proc.ProcessingRequest_RequestBo
 		return response, nil
 	}
 
-	// Handle caching
-	if response, shouldReturn := r.handleCaching(ctx); shouldReturn {
+	// Classify the request early to determine category for cache settings
+	var categoryName string
+	if r.Config != nil && r.Config.IsAutoModelName(originalModel) && (len(nonUserMessages) > 0 || userContent != "") {
+		// Determine text to use for classification
+		var classificationText string
+		if len(userContent) > 0 {
+			classificationText = userContent
+		} else if len(nonUserMessages) > 0 {
+			classificationText = strings.Join(nonUserMessages, " ")
+		}
+		if classificationText != "" {
+			categoryName = r.findCategoryForClassification(classificationText)
+			observability.Debugf("Classified request to category: %s", categoryName)
+		}
+	}
+
+	// Handle caching with category-specific settings
+	if response, shouldReturn := r.handleCaching(ctx, categoryName); shouldReturn {
 		return response, nil
 	}
 
@@ -476,8 +492,8 @@ func (r *OpenAIRouter) performSecurityChecks(ctx *RequestContext, userContent st
 	return nil, false
 }
 
-// handleCaching handles cache lookup and storage
-func (r *OpenAIRouter) handleCaching(ctx *RequestContext) (*ext_proc.ProcessingResponse, bool) {
+// handleCaching handles cache lookup and storage with category-specific settings
+func (r *OpenAIRouter) handleCaching(ctx *RequestContext, categoryName string) (*ext_proc.ProcessingResponse, bool) {
 	// Extract the model and query for cache lookup
 	requestModel, requestQuery, err := cache.ExtractQueryFromOpenAIRequest(ctx.OriginalRequestBody)
 	if err != nil {
@@ -489,20 +505,34 @@ func (r *OpenAIRouter) handleCaching(ctx *RequestContext) (*ext_proc.ProcessingR
 	ctx.RequestModel = requestModel
 	ctx.RequestQuery = requestQuery
 
-	if requestQuery != "" && r.Cache.IsEnabled() {
+	// Check if caching is enabled for this category
+	cacheEnabled := r.Config.SemanticCache.Enabled
+	if categoryName != "" {
+		cacheEnabled = r.Config.IsCacheEnabledForCategory(categoryName)
+	}
+
+	if requestQuery != "" && r.Cache.IsEnabled() && cacheEnabled {
+		// Get category-specific threshold
+		threshold := r.Config.GetCacheSimilarityThreshold()
+		if categoryName != "" {
+			threshold = r.Config.GetCacheSimilarityThresholdForCategory(categoryName)
+		}
+
 		// Start cache lookup span
 		spanCtx, span := observability.StartSpan(ctx.TraceContext, observability.SpanCacheLookup)
 		defer span.End()
 
 		startTime := time.Now()
-		// Try to find a similar cached response
-		cachedResponse, found, cacheErr := r.Cache.FindSimilar(requestModel, requestQuery)
+		// Try to find a similar cached response using category-specific threshold
+		cachedResponse, found, cacheErr := r.Cache.FindSimilarWithThreshold(requestModel, requestQuery, threshold)
 		lookupTime := time.Since(startTime).Milliseconds()
 
 		observability.SetSpanAttributes(span,
 			attribute.String(observability.AttrCacheKey, requestQuery),
 			attribute.Bool(observability.AttrCacheHit, found),
-			attribute.Int64(observability.AttrCacheLookupTimeMs, lookupTime))
+			attribute.Int64(observability.AttrCacheLookupTimeMs, lookupTime),
+			attribute.String(observability.AttrCategoryName, categoryName),
+			attribute.Float64("cache.threshold", float64(threshold)))
 
 		if cacheErr != nil {
 			observability.Errorf("Error searching cache: %v", cacheErr)
@@ -515,6 +545,8 @@ func (r *OpenAIRouter) handleCaching(ctx *RequestContext) (*ext_proc.ProcessingR
 				"request_id": ctx.RequestID,
 				"model":      requestModel,
 				"query":      requestQuery,
+				"category":   categoryName,
+				"threshold":  threshold,
 			})
 			// Return immediate response from cache
 			response := http.CreateCacheHitResponse(cachedResponse, ctx.ExpectStreamingResponse)
diff --git a/website/docs/installation/configuration.md b/website/docs/installation/configuration.md
index 340ad847..37d0f8e0 100644
--- a/website/docs/installation/configuration.md
+++ b/website/docs/installation/configuration.md
@@ -23,7 +23,7 @@ bert_model:
 semantic_cache:
   backend_type: "memory"  # Options: "memory" or "milvus"
   enabled: false
-  similarity_threshold: 0.8
+  similarity_threshold: 0.8  # Global default threshold
   max_entries: 1000
   ttl_seconds: 3600
   eviction_policy: "fifo"  # Options: "fifo", "lru", "lfu"
@@ -81,6 +81,9 @@ categories:
   - model: your-model
     score: 1.0
     use_reasoning: true  # Enable reasoning for math problems
+  # Optional: Category-level cache settings
+  # semantic_cache_enabled: true
+  # semantic_cache_similarity_threshold: 0.9  # Higher threshold for math
 - name: computer science
   model_scores:
   - model: your-model
@@ -91,6 +94,7 @@ categories:
   - model: your-model
     score: 0.8
     use_reasoning: false # No reasoning for general queries
+  # semantic_cache_similarity_threshold: 0.75  # Lower threshold for general queries
 
 default_model: your-model
 
@@ -457,13 +461,38 @@ Configure additional features:
 ```yaml
 # Semantic Caching
 semantic_cache:
-  enabled: true                   # Enable semantic caching
+  enabled: true                   # Enable semantic caching globally
   backend_type: "memory"          # Options: "memory" or "milvus"
-  similarity_threshold: 0.8       # Cache hit threshold
+  similarity_threshold: 0.8       # Global default cache hit threshold
   max_entries: 1000               # Maximum cache entries
   ttl_seconds: 3600               # Cache expiration time
   eviction_policy: "fifo"         # Options: "fifo", "lru", "lfu"
 
+# Category-Level Cache Configuration (New)
+# Override global cache settings for specific categories
+categories:
+  - name: health
+    semantic_cache_enabled: true
+    semantic_cache_similarity_threshold: 0.95  # Very strict - medical accuracy critical
+    model_scores:
+      - model: your-model
+        score: 0.5
+        use_reasoning: false
+  
+  - name: general_chat
+    semantic_cache_similarity_threshold: 0.75  # Relaxed for better cache hits
+    model_scores:
+      - model: your-model
+        score: 0.7
+        use_reasoning: false
+  
+  - name: troubleshooting
+    # No cache settings - uses global default (0.8)
+    model_scores:
+      - model: your-model
+        score: 0.7
+        use_reasoning: false
+
 # Tool Auto-Selection
 tools:
   enabled: true                    # Enable automatic tool selection
@@ -604,6 +633,112 @@ batch_size_ranges:
 
 Access metrics at: `http://localhost:9190/metrics`
 
+## Category-Level Cache Configuration
+
+**NEW**: Configure semantic cache settings at the category level for fine-grained control over caching behavior.
+
+### Why Use Category-Level Cache Settings?
+
+Different categories have different tolerance for semantic variations:
+
+- **Sensitive categories** (health, psychology, law): Small word changes can have significant meaning differences. Require high similarity thresholds (0.92-0.95).
+- **General categories** (chat, troubleshooting): Less sensitive to minor wording changes. Can use lower thresholds (0.75-0.82) for better cache hit rates.
+- **Privacy categories**: May need caching disabled entirely for compliance or security reasons.
+
+### Configuration Examples
+
+#### Example 1: Mixed Thresholds for Different Categories
+
+```yaml
+semantic_cache:
+  enabled: true
+  backend_type: "memory"
+  similarity_threshold: 0.8  # Global default
+
+categories:
+  - name: health
+    system_prompt: "You are a health expert..."
+    semantic_cache_enabled: true
+    semantic_cache_similarity_threshold: 0.95  # Very strict - "headache" vs "severe headache" = different
+    model_scores:
+      - model: your-model
+        score: 0.5
+        use_reasoning: false
+
+  - name: psychology
+    system_prompt: "You are a psychology expert..."
+    semantic_cache_similarity_threshold: 0.92  # Strict - clinical nuances matter
+    model_scores:
+      - model: your-model
+        score: 0.6
+        use_reasoning: false
+
+  - name: general_chat
+    system_prompt: "You are a helpful assistant..."
+    semantic_cache_similarity_threshold: 0.75  # Relaxed - "how's the weather" = "what's the weather"
+    model_scores:
+      - model: your-model
+        score: 0.7
+        use_reasoning: false
+
+  - name: troubleshooting
+    system_prompt: "You are a tech support expert..."
+    # No cache settings - uses global threshold of 0.8
+    model_scores:
+      - model: your-model
+        score: 0.7
+        use_reasoning: false
+```
+
+#### Example 2: Disable Cache for Sensitive Data
+
+```yaml
+categories:
+  - name: personal_data
+    system_prompt: "Handle personal information..."
+    semantic_cache_enabled: false  # Disable cache entirely for privacy
+    model_scores:
+      - model: your-model
+        score: 0.8
+        use_reasoning: false
+```
+
+### Configuration Options
+
+**Category-Level Fields:**
+
+- `semantic_cache_enabled` (optional, boolean): Enable/disable caching for this category. If not specified, inherits from global `semantic_cache.enabled`.
+- `semantic_cache_similarity_threshold` (optional, float 0.0-1.0): Minimum similarity score for cache hits in this category. If not specified, inherits from global `semantic_cache.similarity_threshold`.
+
+**Fallback Hierarchy:**
+
+1. Category-specific `semantic_cache_similarity_threshold` (if set)
+2. Global `semantic_cache.similarity_threshold` (if set)
+3. `bert_model.threshold` (final fallback)
+
+### Best Practices
+
+**Threshold Selection:**
+
+- **High precision (0.92-0.95)**: health, psychology, law, finance
+- **Medium precision (0.85-0.90)**: technical documentation, education
+- **Lower precision (0.75-0.82)**: general chat, FAQs, troubleshooting
+
+**Privacy and Compliance:**
+
+- Disable caching (`semantic_cache_enabled: false`) for categories handling:
+  - Personal identifiable information (PII)
+  - Financial data
+  - Health records
+  - Sensitive business information
+
+**Performance Tuning:**
+
+- Start with conservative (higher) thresholds
+- Monitor cache hit rates per category
+- Lower thresholds for categories with low hit rates
+- Raise thresholds for categories with incorrect cache hits
+
 ## Common Configuration Examples
 
 ### Enable All Security Features
diff --git a/website/docs/tutorials/semantic-cache/in-memory-cache.md b/website/docs/tutorials/semantic-cache/in-memory-cache.md
index 56214c87..4ba99a8a 100644
--- a/website/docs/tutorials/semantic-cache/in-memory-cache.md
+++ b/website/docs/tutorials/semantic-cache/in-memory-cache.md
@@ -44,23 +44,71 @@ graph TB
 semantic_cache:
   enabled: true
   backend_type: "memory"
-  similarity_threshold: 0.8
+  similarity_threshold: 0.8       # Global default threshold
   max_entries: 1000
   ttl_seconds: 3600
   eviction_policy: "fifo"
 ```
 
+### Category-Level Configuration (New)
+
+Configure cache settings per category for fine-grained control:
+
+```yaml
+semantic_cache:
+  enabled: true
+  backend_type: "memory"
+  similarity_threshold: 0.8       # Global default
+  max_entries: 1000
+  ttl_seconds: 3600
+  eviction_policy: "fifo"
+
+categories:
+  - name: health
+    system_prompt: "You are a health expert..."
+    semantic_cache_enabled: true
+    semantic_cache_similarity_threshold: 0.95  # Very strict for medical accuracy
+    model_scores:
+      - model: your-model
+        score: 0.5
+        use_reasoning: false
+
+  - name: general_chat
+    system_prompt: "You are a helpful assistant..."
+    semantic_cache_similarity_threshold: 0.75  # Relaxed for better hit rate
+    model_scores:
+      - model: your-model
+        score: 0.7
+        use_reasoning: false
+
+  - name: troubleshooting
+    # No cache settings - uses global default (0.8)
+    model_scores:
+      - model: your-model
+        score: 0.7
+        use_reasoning: false
+```
+
 ### Configuration Options
 
 | Parameter | Type | Default | Description |
 |-----------|------|---------|-------------|
-| `enabled` | boolean | `false` | Enable/disable semantic caching |
+| `enabled` | boolean | `false` | Enable/disable semantic caching globally |
 | `backend_type` | string | `"memory"` | Cache backend type (must be "memory") |
-| `similarity_threshold` | float | `0.8` | Minimum similarity for cache hits (0.0-1.0) |
+| `similarity_threshold` | float | `0.8` | Global minimum similarity for cache hits (0.0-1.0) |
 | `max_entries` | integer | `1000` | Maximum number of cached entries |
 | `ttl_seconds` | integer | `3600` | Time-to-live for cache entries (seconds, 0 = no expiration) |
 | `eviction_policy` | string | `"fifo"` | Eviction policy: `"fifo"`, `"lru"`, `"lfu"` |
 
+### Category-Level Configuration Options
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `semantic_cache_enabled` | boolean | (inherits global) | Enable/disable caching for this category |
+| `semantic_cache_similarity_threshold` | float | (inherits global) | Category-specific similarity threshold (0.0-1.0) |
+
+Category-level settings override global settings. If not specified, the category uses the global cache configuration.
+
 ### Environment Examples
 
 #### Development Environment
diff --git a/website/docs/tutorials/semantic-cache/overview.md b/website/docs/tutorials/semantic-cache/overview.md
index 93460c87..8a259f32 100644
--- a/website/docs/tutorials/semantic-cache/overview.md
+++ b/website/docs/tutorials/semantic-cache/overview.md
@@ -10,7 +10,15 @@ Uses embeddings and cosine similarity to match queries by meaning rather than ex
 
 ### Configurable Thresholds
 
-Adjustable similarity thresholds balance cache hit rates with response quality.
+Adjustable similarity thresholds balance cache hit rates with response quality. Thresholds can be set globally or per-category for fine-grained control.
+
+### Category-Level Control
+
+**NEW**: Configure cache settings at the category level for precise control over sensitive and general content:
+
+- **Sensitive categories** (health, psychology, law): Use high thresholds (0.92-0.95) to prevent incorrect cache hits where word nuances matter
+- **General categories** (chat, troubleshooting): Use lower thresholds (0.75-0.82) for better cache hit rates
+- **Privacy categories**: Disable caching entirely for specific categories
 
 ### Multiple Backends