Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/semantic-router/pkg/extproc/req_filter_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ func (r *OpenAIRouter) handleCaching(ctx *RequestContext, categoryName string) (
} else if found {
// Mark this request as a cache hit
ctx.VSRCacheHit = true

// Set VSR decision context even for cache hits so headers are populated
// The categoryName passed here is the decision name from classification
if categoryName != "" {
ctx.VSRSelectedDecisionName = categoryName
}

// Log cache hit
logging.LogEvent("cache_hit", map[string]interface{}{
"request_id": ctx.RequestID,
Expand All @@ -69,7 +76,7 @@ func (r *OpenAIRouter) handleCaching(ctx *RequestContext, categoryName string) (
"threshold": threshold,
})
// Return immediate response from cache
response := http.CreateCacheHitResponse(cachedResponse, ctx.ExpectStreamingResponse)
response := http.CreateCacheHitResponse(cachedResponse, ctx.ExpectStreamingResponse, categoryName)
ctx.TraceContext = spanCtx
return response, true
}
Expand Down
10 changes: 9 additions & 1 deletion src/semantic-router/pkg/k8s/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,15 @@ func (r *Reconciler) validateAndUpdate(ctx context.Context, pool *v1alpha1.Intel
// Create new config by merging with static config
newConfig := *r.staticConfig
newConfig.BackendModels = *backendModels
newConfig.IntelligentRouting = *intelligentRouting

// Copy IntelligentRouting fields explicitly (since it's embedded with ,inline in YAML)
// Assigning the whole struct doesn't work correctly with embedded structs
newConfig.KeywordRules = intelligentRouting.KeywordRules
newConfig.EmbeddingRules = intelligentRouting.EmbeddingRules
newConfig.Categories = intelligentRouting.Categories
newConfig.Decisions = intelligentRouting.Decisions
newConfig.Strategy = intelligentRouting.Strategy
newConfig.ReasoningConfig = intelligentRouting.ReasoningConfig

// Call update callback
if r.onConfigUpdate != nil {
Expand Down
43 changes: 28 additions & 15 deletions src/semantic-router/pkg/utils/http/response.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ func CreateJailbreakViolationResponse(jailbreakType string, confidence float32,
}

// CreateCacheHitResponse creates an immediate response from cache
func CreateCacheHitResponse(cachedResponse []byte, isStreaming bool) *ext_proc.ProcessingResponse {
func CreateCacheHitResponse(cachedResponse []byte, isStreaming bool, vsrDecisionName string) *ext_proc.ProcessingResponse {
var responseBody []byte
var contentType string

Expand Down Expand Up @@ -283,25 +283,38 @@ func CreateCacheHitResponse(cachedResponse []byte, isStreaming bool) *ext_proc.P
responseBody = cachedResponse
}

// Build headers including VSR decision headers for cache hits
setHeaders := []*core.HeaderValueOption{
{
Header: &core.HeaderValue{
Key: "content-type",
RawValue: []byte(contentType),
},
},
{
Header: &core.HeaderValue{
Key: headers.VSRCacheHit,
RawValue: []byte("true"),
},
},
}

// Add VSR decision header if provided
if vsrDecisionName != "" {
setHeaders = append(setHeaders, &core.HeaderValueOption{
Header: &core.HeaderValue{
Key: headers.VSRSelectedDecision,
RawValue: []byte(vsrDecisionName),
},
})
}

immediateResponse := &ext_proc.ImmediateResponse{
Status: &typev3.HttpStatus{
Code: typev3.StatusCode_OK,
},
Headers: &ext_proc.HeaderMutation{
SetHeaders: []*core.HeaderValueOption{
{
Header: &core.HeaderValue{
Key: "content-type",
RawValue: []byte(contentType),
},
},
{
Header: &core.HeaderValue{
Key: headers.VSRCacheHit,
RawValue: []byte("true"),
},
},
},
SetHeaders: setHeaders,
},
Body: responseBody,
}
Expand Down
6 changes: 3 additions & 3 deletions src/semantic-router/pkg/utils/http/response_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ func TestCreateCacheHitResponse_NonStreaming(t *testing.T) {
}

// Test non-streaming response
response := CreateCacheHitResponse(cachedResponse, false)
response := CreateCacheHitResponse(cachedResponse, false, "test_decision")

// Verify response structure
if response == nil {
Expand Down Expand Up @@ -121,7 +121,7 @@ func TestCreateCacheHitResponse_Streaming(t *testing.T) {
}

// Test streaming response
response := CreateCacheHitResponse(cachedResponse, true)
response := CreateCacheHitResponse(cachedResponse, true, "test_decision")

// Verify response structure
if response == nil {
Expand Down Expand Up @@ -226,7 +226,7 @@ func TestCreateCacheHitResponse_StreamingWithInvalidJSON(t *testing.T) {
// Test with invalid JSON
invalidJSON := []byte("invalid json")

response := CreateCacheHitResponse(invalidJSON, true)
response := CreateCacheHitResponse(invalidJSON, true, "")

// Verify response structure
if response == nil {
Expand Down
Loading