diff --git a/src/semantic-router/pkg/extproc/req_filter_cache.go b/src/semantic-router/pkg/extproc/req_filter_cache.go index 7caed3144..bfa441536 100644 --- a/src/semantic-router/pkg/extproc/req_filter_cache.go +++ b/src/semantic-router/pkg/extproc/req_filter_cache.go @@ -60,6 +60,13 @@ func (r *OpenAIRouter) handleCaching(ctx *RequestContext, categoryName string) ( } else if found { // Mark this request as a cache hit ctx.VSRCacheHit = true + + // Set VSR decision context even for cache hits so headers are populated + // The categoryName passed here is the decision name from classification + if categoryName != "" { + ctx.VSRSelectedDecisionName = categoryName + } + // Log cache hit logging.LogEvent("cache_hit", map[string]interface{}{ "request_id": ctx.RequestID, @@ -69,7 +76,7 @@ func (r *OpenAIRouter) handleCaching(ctx *RequestContext, categoryName string) ( "threshold": threshold, }) // Return immediate response from cache - response := http.CreateCacheHitResponse(cachedResponse, ctx.ExpectStreamingResponse) + response := http.CreateCacheHitResponse(cachedResponse, ctx.ExpectStreamingResponse, categoryName) ctx.TraceContext = spanCtx return response, true } diff --git a/src/semantic-router/pkg/k8s/reconciler.go b/src/semantic-router/pkg/k8s/reconciler.go index ce2fc9fb3..b56fb2b2e 100644 --- a/src/semantic-router/pkg/k8s/reconciler.go +++ b/src/semantic-router/pkg/k8s/reconciler.go @@ -259,7 +259,15 @@ func (r *Reconciler) validateAndUpdate(ctx context.Context, pool *v1alpha1.Intel // Create new config by merging with static config newConfig := *r.staticConfig newConfig.BackendModels = *backendModels - newConfig.IntelligentRouting = *intelligentRouting + + // Copy IntelligentRouting fields explicitly (since it's embedded with ,inline in YAML) + // Assigning the whole struct doesn't work correctly with embedded structs + newConfig.KeywordRules = intelligentRouting.KeywordRules + newConfig.EmbeddingRules = intelligentRouting.EmbeddingRules + newConfig.Categories = intelligentRouting.Categories + newConfig.Decisions = intelligentRouting.Decisions + newConfig.Strategy = intelligentRouting.Strategy + newConfig.ReasoningConfig = intelligentRouting.ReasoningConfig // Call update callback if r.onConfigUpdate != nil { diff --git a/src/semantic-router/pkg/utils/http/response.go b/src/semantic-router/pkg/utils/http/response.go index dce194ae9..d212ffe75 100644 --- a/src/semantic-router/pkg/utils/http/response.go +++ b/src/semantic-router/pkg/utils/http/response.go @@ -233,7 +233,7 @@ func CreateJailbreakViolationResponse(jailbreakType string, confidence float32, } // CreateCacheHitResponse creates an immediate response from cache -func CreateCacheHitResponse(cachedResponse []byte, isStreaming bool) *ext_proc.ProcessingResponse { +func CreateCacheHitResponse(cachedResponse []byte, isStreaming bool, vsrDecisionName string) *ext_proc.ProcessingResponse { var responseBody []byte var contentType string @@ -283,25 +283,38 @@ func CreateCacheHitResponse(cachedResponse []byte, isStreaming bool) *ext_proc.P responseBody = cachedResponse } + // Build headers including VSR decision headers for cache hits + setHeaders := []*core.HeaderValueOption{ + { + Header: &core.HeaderValue{ + Key: "content-type", + RawValue: []byte(contentType), + }, + }, + { + Header: &core.HeaderValue{ + Key: headers.VSRCacheHit, + RawValue: []byte("true"), + }, + }, + } + + // Add VSR decision header if provided + if vsrDecisionName != "" { + setHeaders = append(setHeaders, &core.HeaderValueOption{ + Header: &core.HeaderValue{ + Key: headers.VSRSelectedDecision, + RawValue: []byte(vsrDecisionName), + }, + }) + } + immediateResponse := &ext_proc.ImmediateResponse{ Status: &typev3.HttpStatus{ Code: typev3.StatusCode_OK, }, Headers: &ext_proc.HeaderMutation{ - SetHeaders: []*core.HeaderValueOption{ - { - Header: &core.HeaderValue{ - Key: "content-type", - RawValue: []byte(contentType), - }, - }, - { - Header: &core.HeaderValue{ - Key: headers.VSRCacheHit, - RawValue: []byte("true"), - }, - }, - }, + SetHeaders: setHeaders, }, Body: responseBody, } diff --git a/src/semantic-router/pkg/utils/http/response_test.go b/src/semantic-router/pkg/utils/http/response_test.go index b53539fc9..22f3ce410 100644 --- a/src/semantic-router/pkg/utils/http/response_test.go +++ b/src/semantic-router/pkg/utils/http/response_test.go @@ -38,7 +38,7 @@ func TestCreateCacheHitResponse_NonStreaming(t *testing.T) { } // Test non-streaming response - response := CreateCacheHitResponse(cachedResponse, false) + response := CreateCacheHitResponse(cachedResponse, false, "test_decision") // Verify response structure if response == nil { @@ -121,7 +121,7 @@ func TestCreateCacheHitResponse_Streaming(t *testing.T) { } // Test streaming response - response := CreateCacheHitResponse(cachedResponse, true) + response := CreateCacheHitResponse(cachedResponse, true, "test_decision") // Verify response structure if response == nil { @@ -226,7 +226,7 @@ func TestCreateCacheHitResponse_StreamingWithInvalidJSON(t *testing.T) { // Test with invalid JSON invalidJSON := []byte("invalid json") - response := CreateCacheHitResponse(invalidJSON, true) + response := CreateCacheHitResponse(invalidJSON, true, "") // Verify response structure if response == nil {