From 76c9e25d747f8d8edbaace5c42b995a4db9784ab Mon Sep 17 00:00:00 2001 From: Senan Zedan Date: Wed, 19 Nov 2025 14:26:37 +0200 Subject: [PATCH] Change the test localtion to e2e/testcase and remove not needed code that was for other tests cases that not done yet. Signed-off-by: Senan Zedan --- .github/workflows/integration-test-k8s.yml | 2 +- .github/workflows/unit-test-e2e-testcases.yml | 322 --------- e2e-tests/testcases/go.mod | 48 -- e2e-tests/testcases/go.sum | 88 --- e2e-tests/testcases/helpers.go | 163 ----- e2e-tests/testcases/keyword_routing_test.go | 505 -------------- e2e-tests/testcases/suite_test.go | 25 - e2e/cmd/e2e/main.go | 4 + e2e/profiles/routing-strategies/config.yaml | 638 ++++++++++++++++++ e2e/profiles/routing-strategies/profile.go | 325 +++++++++ e2e/profiles/routing-strategies/values.yaml | 343 ++++++++++ e2e/testcases/keyword_routing.go | 328 +++++++++ .../testdata/keyword_routing_cases.json | 40 ++ 13 files changed, 1679 insertions(+), 1152 deletions(-) delete mode 100644 .github/workflows/unit-test-e2e-testcases.yml delete mode 100644 e2e-tests/testcases/go.mod delete mode 100644 e2e-tests/testcases/go.sum delete mode 100644 e2e-tests/testcases/helpers.go delete mode 100644 e2e-tests/testcases/keyword_routing_test.go delete mode 100644 e2e-tests/testcases/suite_test.go create mode 100644 e2e/profiles/routing-strategies/config.yaml create mode 100644 e2e/profiles/routing-strategies/profile.go create mode 100644 e2e/profiles/routing-strategies/values.yaml create mode 100644 e2e/testcases/keyword_routing.go rename {e2e-tests => e2e}/testcases/testdata/keyword_routing_cases.json (83%) diff --git a/.github/workflows/integration-test-k8s.yml b/.github/workflows/integration-test-k8s.yml index aa1d14109..a4f56d485 100644 --- a/.github/workflows/integration-test-k8s.yml +++ b/.github/workflows/integration-test-k8s.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false # Continue testing other profiles even if one fails matrix: - profile: [ai-gateway, aibrix] + profile: [ai-gateway, aibrix, routing-strategies] steps: - name: Check out the repo diff --git a/.github/workflows/unit-test-e2e-testcases.yml b/.github/workflows/unit-test-e2e-testcases.yml deleted file mode 100644 index 8a6776a25..000000000 --- a/.github/workflows/unit-test-e2e-testcases.yml +++ /dev/null @@ -1,322 +0,0 @@ -name: E2E Testcases Unit Tests - -on: - pull_request: - paths: - - "e2e-tests/testcases/**" - - "src/semantic-router/pkg/classification/**" - - "src/semantic-router/pkg/config/**" - - "candle-binding/**" - - ".github/workflows/unit-test-e2e-testcases.yml" - push: - branches: - - main - workflow_dispatch: - -env: - GO_VERSION: '1.24' - RUST_VERSION: '1.90.0' - -jobs: - test-keyword-routing: - name: Keyword Routing Tests - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version: ${{ env.GO_VERSION }} - cache: true - cache-dependency-path: e2e-tests/testcases/go.sum - - - name: Set up Rust - uses: actions-rust-lang/setup-rust-toolchain@v1 - with: - toolchain: ${{ env.RUST_VERSION }} - - - name: Cache Rust dependencies - uses: actions/cache@v4 - with: - path: | - ~/.cargo/bin/ - ~/.cargo/registry/index/ - ~/.cargo/registry/cache/ - ~/.cargo/git/db/ - candle-binding/target/ - key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - restore-keys: | - ${{ runner.os }}-cargo- - - - name: Build Rust Candle Bindings - run: | - cd candle-binding - cargo build --release --no-default-features - ls -la target/release/ - - - name: Verify Rust library - run: | - if [ -f "candle-binding/target/release/libcandle_semantic_router.so" ]; then - echo "✅ Rust library built successfully" - ls -lh candle-binding/target/release/libcandle_semantic_router.so - else - echo "❌ Rust library not found" - exit 1 - fi - - - name: Run Keyword Routing Tests - env: - LD_LIBRARY_PATH: ${{ github.workspace }}/candle-binding/target/release - run: | - cd e2e-tests/testcases - echo "Running keyword routing tests..." - go test -v -coverprofile=coverage-keyword.out -covermode=atomic -coverpkg=github.com/vllm-project/semantic-router/src/semantic-router/pkg/classification - - - name: Generate coverage report - if: always() - run: | - cd e2e-tests/testcases - go tool cover -func=coverage-keyword.out > coverage-summary.txt - echo "=== Full Coverage Summary ===" - cat coverage-summary.txt - - echo "" - echo "=== Keyword Classifier Coverage ===" - grep "keyword_classifier.go" coverage-summary.txt || echo "No keyword_classifier.go coverage found" - - # Extract coverage for keyword_classifier.go only - # Filter lines containing keyword_classifier.go, extract percentage, calculate average - KEYWORD_COVERAGE=$(grep "keyword_classifier.go" coverage-summary.txt | awk '{gsub(/%/, "", $NF); sum+=$NF; count++} END {if(count>0) printf "%.1f", sum/count; else print "0.0"}') - echo "Keyword Classifier Average Coverage: ${KEYWORD_COVERAGE}%" - echo "COVERAGE=${KEYWORD_COVERAGE}%" >> $GITHUB_ENV - - - name: Check coverage threshold - if: always() - run: | - cd e2e-tests/testcases - COVERAGE_PERCENT=$(echo $COVERAGE | sed 's/%//') - THRESHOLD=80 - - if (( $(echo "$COVERAGE_PERCENT < $THRESHOLD" | bc -l) )); then - echo "❌ Coverage $COVERAGE is below threshold ${THRESHOLD}%" - exit 1 - else - echo "✅ Coverage $COVERAGE meets threshold ${THRESHOLD}%" - fi - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v4 - if: always() - with: - files: ./e2e-tests/testcases/coverage-keyword.out - flags: e2e-testcases-keyword - name: keyword-routing-coverage - fail_ci_if_error: false - - - name: Test Summary - if: always() - run: | - echo "### Keyword Routing Test Results :test_tube:" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "**Coverage:** $COVERAGE" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "#### Test Categories" >> $GITHUB_STEP_SUMMARY - echo "- ✅ OR operator tests" >> $GITHUB_STEP_SUMMARY - echo "- ✅ AND operator tests" >> $GITHUB_STEP_SUMMARY - echo "- ✅ NOR operator tests" >> $GITHUB_STEP_SUMMARY - echo "- ✅ Case sensitivity tests" >> $GITHUB_STEP_SUMMARY - echo "- ✅ Word boundary tests" >> $GITHUB_STEP_SUMMARY - echo "- ✅ Regex special character tests" >> $GITHUB_STEP_SUMMARY - echo "- ✅ Edge case tests" >> $GITHUB_STEP_SUMMARY - echo "- ✅ Multiple rule matching" >> $GITHUB_STEP_SUMMARY - echo "- ✅ Confidence score validation" >> $GITHUB_STEP_SUMMARY - echo "- ✅ JSON test data loading" >> $GITHUB_STEP_SUMMARY - echo "- ✅ Error handling" >> $GITHUB_STEP_SUMMARY - - test-embedding-routing: - name: Embedding Routing Tests - runs-on: ubuntu-latest - # Only run if embedding tests exist (for future PRs) - if: | - contains(github.event.pull_request.changed_files, 'e2e-tests/testcases/embedding_routing_test.go') || - github.event_name == 'workflow_dispatch' - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version: ${{ env.GO_VERSION }} - cache: true - - - name: Set up Rust - uses: actions-rust-lang/setup-rust-toolchain@v1 - with: - toolchain: ${{ env.RUST_VERSION }} - - - name: Build Rust Candle Bindings - run: | - cd candle-binding - cargo build --release --no-default-features - - - name: Run Embedding Routing Tests - env: - LD_LIBRARY_PATH: ${{ github.workspace }}/candle-binding/target/release - run: | - cd e2e-tests/testcases - if [ -f "embedding_routing_test.go" ] && ! [[ "$(basename embedding_routing_test.go)" =~ \.skip$ ]]; then - echo "Running embedding routing tests..." - go test -v -run "Embedding Routing" -coverprofile=coverage-embedding.out -covermode=atomic - else - echo "⏭️ Embedding routing tests not ready yet (skipped)" - fi - - test-hybrid-routing: - name: Hybrid Routing Tests - runs-on: ubuntu-latest - # Only run if hybrid tests exist (for future PRs) - if: | - contains(github.event.pull_request.changed_files, 'e2e-tests/testcases/hybrid_routing_test.go') || - github.event_name == 'workflow_dispatch' - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version: ${{ env.GO_VERSION }} - cache: true - - - name: Set up Rust - uses: actions-rust-lang/setup-rust-toolchain@v1 - with: - toolchain: ${{ env.RUST_VERSION }} - - - name: Build Rust Candle Bindings - run: | - cd candle-binding - cargo build --release --no-default-features - - - name: Run Hybrid Routing Tests - env: - LD_LIBRARY_PATH: ${{ github.workspace }}/candle-binding/target/release - run: | - cd e2e-tests/testcases - if [ -f "hybrid_routing_test.go" ] && ! [[ "$(basename hybrid_routing_test.go)" =~ \.skip$ ]]; then - echo "Running hybrid routing tests..." - go test -v -run "Hybrid Routing" -coverprofile=coverage-hybrid.out -covermode=atomic - else - echo "⏭️ Hybrid routing tests not ready yet (skipped)" - fi - - race-detection: - name: Race Condition Detection - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version: ${{ env.GO_VERSION }} - cache: true - - - name: Set up Rust - uses: actions-rust-lang/setup-rust-toolchain@v1 - with: - toolchain: ${{ env.RUST_VERSION }} - - - name: Build Rust Candle Bindings - run: | - cd candle-binding - cargo build --release --no-default-features - - - name: Run tests with race detector - env: - LD_LIBRARY_PATH: ${{ github.workspace }}/candle-binding/target/release - run: | - cd e2e-tests/testcases - echo "Running tests with race detector..." - go test -race -v || { - echo "❌ Race conditions detected!" - exit 1 - } - echo "✅ No race conditions detected" - - lint: - name: Lint Go Code - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version: ${{ env.GO_VERSION }} - cache: true - - - name: Run golangci-lint - uses: golangci/golangci-lint-action@v6 - with: - version: latest - working-directory: e2e-tests/testcases - args: --timeout=5m - - summary: - name: Test Summary - if: always() - runs-on: ubuntu-latest - needs: [test-keyword-routing, race-detection, lint] - - steps: - - name: Check test results - run: | - echo "=== E2E Testcases Summary ===" - echo "Keyword Routing Tests: ${{ needs.test-keyword-routing.result }}" - echo "Race Detection: ${{ needs.race-detection.result }}" - echo "Lint: ${{ needs.lint.result }}" - - # Count failures - FAILURES=0 - if [[ "${{ needs.test-keyword-routing.result }}" == "failure" ]]; then - echo "❌ Keyword routing tests failed" - FAILURES=$((FAILURES + 1)) - fi - if [[ "${{ needs.race-detection.result }}" == "failure" ]]; then - echo "❌ Race detection failed" - FAILURES=$((FAILURES + 1)) - fi - if [[ "${{ needs.lint.result }}" == "failure" ]]; then - echo "❌ Lint failed" - FAILURES=$((FAILURES + 1)) - fi - - echo "" - echo "=== Test Coverage (Issue #667) ===" - echo "✅ OR operator - any keyword matches" - echo "✅ AND operator - all keywords must match" - echo "✅ NOR operator - no keywords match" - echo "✅ Case-sensitive vs case-insensitive matching" - echo "✅ Regex pattern matching" - echo "✅ Word boundary detection" - echo "✅ Priority over embedding and intent-based routing" - - if [ $FAILURES -gt 0 ]; then - echo "" - echo "❌ $FAILURES test(s) failed. Check the logs for details." - exit 1 - else - echo "" - echo "✅ All E2E testcases passed!" - fi diff --git a/e2e-tests/testcases/go.mod b/e2e-tests/testcases/go.mod deleted file mode 100644 index 3f6d8736b..000000000 --- a/e2e-tests/testcases/go.mod +++ /dev/null @@ -1,48 +0,0 @@ -module github.com/vllm-project/semantic-router/e2e-tests/testcases - -go 1.24.1 - -require ( - github.com/onsi/ginkgo/v2 v2.23.4 - github.com/onsi/gomega v1.38.0 - github.com/vllm-project/semantic-router/src/semantic-router v0.0.0 -) - -require ( - github.com/bahlo/generic-list-go v0.2.0 // indirect - github.com/beorn7/perks v1.0.1 // indirect - github.com/buger/jsonparser v1.1.1 // indirect - github.com/cespare/xxhash/v2 v2.3.0 // indirect - github.com/go-logr/logr v1.4.3 // indirect - github.com/go-task/slim-sprig/v3 v3.0.0 // indirect - github.com/google/go-cmp v0.7.0 // indirect - github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect - github.com/google/uuid v1.6.0 // indirect - github.com/invopop/jsonschema v0.13.0 // indirect - github.com/mailru/easyjson v0.7.7 // indirect - github.com/mark3labs/mcp-go v0.42.0-beta.1 // indirect - github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect - github.com/prometheus/client_golang v1.23.0 // indirect - github.com/prometheus/client_model v0.6.2 // indirect - github.com/prometheus/common v0.65.0 // indirect - github.com/prometheus/procfs v0.16.1 // indirect - github.com/spf13/cast v1.7.1 // indirect - github.com/vllm-project/semantic-router/candle-binding v0.0.0-00010101000000-000000000000 // indirect - github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect - github.com/yosida95/uritemplate/v3 v3.0.2 // indirect - go.uber.org/automaxprocs v1.6.0 // indirect - go.uber.org/multierr v1.11.0 // indirect - go.uber.org/zap v1.27.0 // indirect - golang.org/x/net v0.43.0 // indirect - golang.org/x/sys v0.37.0 // indirect - golang.org/x/text v0.28.0 // indirect - golang.org/x/tools v0.35.0 // indirect - google.golang.org/protobuf v1.36.9 // indirect - gopkg.in/yaml.v2 v2.4.0 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect -) - -replace ( - github.com/vllm-project/semantic-router/candle-binding => ../../candle-binding - github.com/vllm-project/semantic-router/src/semantic-router => ../../src/semantic-router -) diff --git a/e2e-tests/testcases/go.sum b/e2e-tests/testcases/go.sum deleted file mode 100644 index 60e1c796d..000000000 --- a/e2e-tests/testcases/go.sum +++ /dev/null @@ -1,88 +0,0 @@ -github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk= -github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg= -github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= -github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs= -github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= -github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= -github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= -github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= -github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= -github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= -github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= -github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= -github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= -github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= -github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= -github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8= -github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= -github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= -github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E= -github.com/invopop/jsonschema v0.13.0/go.mod h1:ffZ5Km5SWWRAIN6wbDXItl95euhFz2uON45H2qjYt+0= -github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= -github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= -github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= -github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= -github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= -github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= -github.com/mark3labs/mcp-go v0.42.0-beta.1 h1:jXCUOg7vHwSuknzy4hPvOXASnzmLluM3AMx1rPh/OYM= -github.com/mark3labs/mcp-go v0.42.0-beta.1/go.mod h1:T7tUa2jO6MavG+3P25Oy/jR7iCeJPHImCZHRymCn39g= -github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= -github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= -github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus= -github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8= -github.com/onsi/gomega v1.38.0 h1:c/WX+w8SLAinvuKKQFh77WEucCnPk4j2OTUr7lt7BeY= -github.com/onsi/gomega v1.38.0/go.mod h1:OcXcwId0b9QsE7Y49u+BTrL4IdKOBOKnD6VQNTJEB6o= -github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= -github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g= -github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U= -github.com/prometheus/client_golang v1.23.0 h1:ust4zpdl9r4trLY/gSjlm07PuiBq2ynaXXlptpfy8Uc= -github.com/prometheus/client_golang v1.23.0/go.mod h1:i/o0R9ByOnHX0McrTMTyhYvKE4haaf2mW08I+jGAjEE= -github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= -github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= -github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2VzE= -github.com/prometheus/common v0.65.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8= -github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= -github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= -github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= -github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= -github.com/samber/lo v1.52.0 h1:Rvi+3BFHES3A8meP33VPAxiBZX/Aws5RxrschYGjomw= -github.com/samber/lo v1.52.0/go.mod h1:4+MXEGsJzbKGaUEQFKBq2xtfuznW9oz/WrgyzMzRoM0= -github.com/spf13/cast v1.7.1 h1:cuNEagBQEHWN1FnbGEjCXL2szYEXqfJPbP2HNUaca9Y= -github.com/spf13/cast v1.7.1/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= -github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= -github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= -github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc= -github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw= -github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= -github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= -go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= -go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8= -go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= -go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= -go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= -go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= -go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= -go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= -golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= -golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= -golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= -golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= -golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= -golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0= -golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw= -google.golang.org/protobuf v1.36.9 h1:w2gp2mA27hUeUzj9Ex9FBjsBm40zfaDtEWow293U7Iw= -google.golang.org/protobuf v1.36.9/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= -gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= -gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/e2e-tests/testcases/helpers.go b/e2e-tests/testcases/helpers.go deleted file mode 100644 index 02acc3658..000000000 --- a/e2e-tests/testcases/helpers.go +++ /dev/null @@ -1,163 +0,0 @@ -package testcases - -import ( - "encoding/json" - "os" - - "github.com/vllm-project/semantic-router/src/semantic-router/pkg/classification" - "github.com/vllm-project/semantic-router/src/semantic-router/pkg/config" -) - -// KeywordTestCase represents a test case for keyword routing -type KeywordTestCase struct { - Name string `json:"name"` - Description string `json:"description"` - Query string `json:"query"` - ExpectedCategory string `json:"expected_category"` - ExpectedConfidence float64 `json:"expected_confidence"` - MatchedKeywords []string `json:"matched_keywords"` -} - -// EmbeddingTestCase represents a test case for embedding-based routing -type EmbeddingTestCase struct { - Name string `json:"name"` - Description string `json:"description"` - Query string `json:"query"` - ExpectedCategory string `json:"expected_category"` - MinSimilarity float64 `json:"min_similarity"` - AggregationMethod string `json:"aggregation_method"` - ModelType string `json:"model_type"` -} - -// HybridTestCase represents a test case for hybrid routing (priority testing) -type HybridTestCase struct { - Name string `json:"name"` - Description string `json:"description"` - Query string `json:"query"` - ExpectedCategory string `json:"expected_category"` - ExpectedRoutingMethod string `json:"expected_routing_method"` // "keyword", "embedding", "mcp" - ExpectedConfidence float64 `json:"expected_confidence"` -} - -// EntropyTestCase represents a test case for entropy-based routing -type EntropyTestCase struct { - Name string `json:"name"` - Description string `json:"description"` - Query string `json:"query"` - ExpectedEntropy float64 `json:"expected_entropy"` - ExpectedReasoning bool `json:"expected_reasoning"` - EntropyThreshold float64 `json:"entropy_threshold"` -} - -// ReasoningControlTestCase represents a test case for reasoning control -type ReasoningControlTestCase struct { - Name string `json:"name"` - Description string `json:"description"` - Query string `json:"query"` - Category string `json:"category"` - ExpectedReasoning bool `json:"expected_reasoning"` - EffortLevel string `json:"effort_level"` - ModelFamily string `json:"model_family"` -} - -// ToolSelectionTestCase represents a test case for tool selection -type ToolSelectionTestCase struct { - Name string `json:"name"` - Description string `json:"description"` - Query string `json:"query"` - ExpectedTools []string `json:"expected_tools"` - TopK int `json:"top_k"` - SimilarityThreshold float64 `json:"similarity_threshold"` -} - -// LoadKeywordTestCases loads keyword test cases from a JSON file -func LoadKeywordTestCases(path string) ([]KeywordTestCase, error) { - data, err := os.ReadFile(path) - if err != nil { - return nil, err - } - - var cases []KeywordTestCase - err = json.Unmarshal(data, &cases) - return cases, err -} - -// LoadEmbeddingTestCases loads embedding test cases from a JSON file -func LoadEmbeddingTestCases(path string) ([]EmbeddingTestCase, error) { - data, err := os.ReadFile(path) - if err != nil { - return nil, err - } - - var cases []EmbeddingTestCase - err = json.Unmarshal(data, &cases) - return cases, err -} - -// LoadHybridTestCases loads hybrid routing test cases from a JSON file -func LoadHybridTestCases(path string) ([]HybridTestCase, error) { - data, err := os.ReadFile(path) - if err != nil { - return nil, err - } - - var cases []HybridTestCase - err = json.Unmarshal(data, &cases) - return cases, err -} - -// CreateKeywordTestRules creates standard keyword rules for testing -// Note: Rules are evaluated in order. NOR rule is last to avoid matching everything. -func CreateKeywordTestRules() []config.KeywordRule { - return []config.KeywordRule{ - { - Name: "urgent_request", - Operator: "OR", - Keywords: []string{"urgent", "immediate", "asap", "emergency"}, - CaseSensitive: false, - }, - { - Name: "sensitive_data", - Operator: "AND", - Keywords: []string{"SSN", "credit card"}, - CaseSensitive: false, - }, - { - Name: "case_sensitive_test", - Operator: "OR", - Keywords: []string{"SECRET"}, - CaseSensitive: true, - }, - { - Name: "secret_detection", - Operator: "OR", - Keywords: []string{"secret"}, - CaseSensitive: false, - }, - { - Name: "version_check", - Operator: "OR", - Keywords: []string{"1.0", "2.0", "3.0"}, - CaseSensitive: false, - }, - { - Name: "wildcard_test", - Operator: "OR", - Keywords: []string{"*"}, - CaseSensitive: false, - }, - // NOR rule at end - matches when NO spam keywords present - // This will match most text, so it's placed last - { - Name: "spam", - Operator: "NOR", - Keywords: []string{"buy now", "free money", "click here"}, - CaseSensitive: false, - }, - } -} - -// CreateTestKeywordClassifier creates a keyword classifier instance for testing -func CreateTestKeywordClassifier(rules []config.KeywordRule) (*classification.KeywordClassifier, error) { - return classification.NewKeywordClassifier(rules) -} diff --git a/e2e-tests/testcases/keyword_routing_test.go b/e2e-tests/testcases/keyword_routing_test.go deleted file mode 100644 index 29e97b4fc..000000000 --- a/e2e-tests/testcases/keyword_routing_test.go +++ /dev/null @@ -1,505 +0,0 @@ -package testcases - -import ( - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - - "github.com/vllm-project/semantic-router/src/semantic-router/pkg/classification" - "github.com/vllm-project/semantic-router/src/semantic-router/pkg/config" -) - -var _ = Describe("Keyword Routing", func() { - var ( - classifier *classification.KeywordClassifier - rules []config.KeywordRule - rulesWithoutNOR []config.KeywordRule - ) - - BeforeEach(func() { - // Get all rules including NOR - allRules := CreateKeywordTestRules() - - // Create version without NOR for tests that expect empty results - rulesWithoutNOR = []config.KeywordRule{} - for _, rule := range allRules { - if rule.Operator != "NOR" { - rulesWithoutNOR = append(rulesWithoutNOR, rule) - } - } - - // By default, use rules without NOR - // Tests that specifically test NOR will create their own classifier - rules = rulesWithoutNOR - var err error - classifier, err = CreateTestKeywordClassifier(rules) - Expect(err).NotTo(HaveOccurred()) - Expect(classifier).NotTo(BeNil()) - }) - - Context("OR Operator", func() { - It("should match when any keyword is present", func() { - testCases := []struct { - query string - expectedCategory string - }{ - {"I need urgent help", "urgent_request"}, - {"This is an immediate issue", "urgent_request"}, - {"Please respond asap", "urgent_request"}, - {"This is an emergency situation", "urgent_request"}, - } - - for _, tc := range testCases { - category, confidence, err := classifier.Classify(tc.query) - Expect(err).NotTo(HaveOccurred(), "Query: %s", tc.query) - Expect(category).To(Equal(tc.expectedCategory), - "Query '%s' should match category %s", tc.query, tc.expectedCategory) - Expect(confidence).To(Equal(1.0), "Keyword matches should have 100%% confidence") - } - }) - - It("should not match when no keywords are present", func() { - category, _, err := classifier.Classify("Just a normal query") - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(BeEmpty()) - }) - - It("should be case-insensitive when configured", func() { - testCases := []string{ - "This is URGENT", - "This is Urgent", - "This is urgent", - "This is UrGeNt", - } - - for _, query := range testCases { - category, _, err := classifier.Classify(query) - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(Equal("urgent_request"), - "Query '%s' should match case-insensitively", query) - } - }) - - It("should match keyword at beginning of text", func() { - category, _, err := classifier.Classify("Urgent: please help") - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(Equal("urgent_request")) - }) - - It("should match keyword at end of text", func() { - category, _, err := classifier.Classify("Please help, this is urgent") - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(Equal("urgent_request")) - }) - - It("should match keyword in middle of text", func() { - category, _, err := classifier.Classify("This is an urgent matter that needs attention") - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(Equal("urgent_request")) - }) - }) - - Context("AND Operator", func() { - It("should match when all keywords are present", func() { - query := "My SSN and credit card were stolen" - category, confidence, err := classifier.Classify(query) - - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(Equal("sensitive_data")) - Expect(confidence).To(Equal(1.0)) - }) - - It("should not match when only some keywords are present", func() { - queries := []string{ - "My SSN was stolen", // Only SSN - "My credit card was stolen", // Only credit card - "Something else entirely", // Neither - } - - for _, query := range queries { - category, _, err := classifier.Classify(query) - Expect(err).NotTo(HaveOccurred()) - Expect(category).NotTo(Equal("sensitive_data"), - "Query '%s' should not match AND rule", query) - } - }) - - It("should match regardless of keyword order", func() { - queries := []string{ - "My SSN and credit card", - "My credit card and SSN", - "SSN credit card stolen", - "credit card and SSN compromised", - } - - for _, query := range queries { - category, _, err := classifier.Classify(query) - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(Equal("sensitive_data")) - } - }) - - It("should match with keywords far apart in text", func() { - query := "My SSN was compromised yesterday, and today I noticed my credit card was also affected" - category, _, err := classifier.Classify(query) - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(Equal("sensitive_data")) - }) - - It("should match with repeated keywords", func() { - query := "SSN SSN credit card credit card" - category, _, err := classifier.Classify(query) - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(Equal("sensitive_data")) - }) - }) - - Context("NOR Operator", func() { - var norClassifier *classification.KeywordClassifier - - BeforeEach(func() { - // Create classifier with ALL rules including NOR for these tests - allRules := CreateKeywordTestRules() - var err error - norClassifier, err = CreateTestKeywordClassifier(allRules) - Expect(err).NotTo(HaveOccurred()) - Expect(norClassifier).NotTo(BeNil()) - }) - - It("should match spam when no forbidden keywords are present", func() { - // NOR matches when NONE of the keywords are found - queries := []string{ - "How do I reset my password?", - "What is the capital of France?", - "Can you help me with my account?", - } - - for _, query := range queries { - category, confidence, err := norClassifier.Classify(query) - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(Equal("spam"), - "Query '%s' should match spam via NOR (no spam keywords present)", query) - Expect(confidence).To(Equal(1.0)) - } - }) - - It("should not match spam when any forbidden keyword is present", func() { - // NOR does NOT match when any keyword is found - queries := []string{ - "Buy now and save!", - "Click here for free money", - "Free money available now", - "Buy now, click here for free money", - } - - for _, query := range queries { - category, _, err := norClassifier.Classify(query) - Expect(err).NotTo(HaveOccurred()) - Expect(category).NotTo(Equal("spam"), - "Query '%s' should NOT match spam via NOR (spam keywords present)", query) - } - }) - }) - - Context("Case Sensitivity", func() { - It("should match exact case when case-sensitive enabled", func() { - category, _, err := classifier.Classify("This is SECRET") - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(Equal("case_sensitive_test")) - }) - - It("should not match different case when case-sensitive enabled", func() { - queries := []string{ - "This is secret", - "This is Secret", - "This is sEcReT", - "This is seCRet", - } - - for _, query := range queries { - category, _, err := classifier.Classify(query) - Expect(err).NotTo(HaveOccurred()) - Expect(category).NotTo(Equal("case_sensitive_test"), - "Query '%s' should not match case-sensitive rule", query) - } - }) - - It("should handle case-insensitive rules correctly", func() { - // secret_detection has case_sensitive: false - // Use lowercase to avoid matching case_sensitive_test first - queries := []string{ - "This is secret", - "This is Secret", - "This is sEcReT", - } - - for _, query := range queries { - category, _, err := classifier.Classify(query) - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(Equal("secret_detection"), - "Query '%s' should match case-insensitive secret_detection", query) - } - }) - }) - - Context("Word Boundaries", func() { - It("should respect word boundaries - positive case", func() { - queries := []string{ - "This is a secret", - "The secret is safe", - "secret meeting", - "A secret!", - } - - for _, query := range queries { - category, _, err := classifier.Classify(query) - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(Equal("secret_detection"), - "Query '%s' should match secret as whole word", query) - } - }) - - It("should respect word boundaries - negative case", func() { - queries := []string{ - "Talk to my secretary", - "The secretariat is here", - "Secretive behavior", - } - - for _, query := range queries { - category, _, err := classifier.Classify(query) - Expect(err).NotTo(HaveOccurred()) - Expect(category).NotTo(Equal("secret_detection"), - "Query '%s' should not match secret in partial word", query) - } - }) - - It("should handle word boundaries with punctuation", func() { - queries := []string{ - "secret.", - "secret!", - "secret?", - "secret,", - "(secret)", - "\"secret\"", - } - - for _, query := range queries { - category, _, err := classifier.Classify(query) - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(Equal("secret_detection"), - "Query '%s' should match secret with punctuation", query) - } - }) - }) - - Context("Regex Special Characters", func() { - It("should handle dots literally", func() { - queries := []string{ - "Version 1.0 released", - "Using 2.0 now", - "3.0 is coming", - } - - for _, query := range queries { - category, _, err := classifier.Classify(query) - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(Equal("version_check"), - "Query '%s' should match version with literal dot", query) - } - }) - - It("should not match dots as wildcard", func() { - // 1.0 should match literally, not 1X0 - category, _, err := classifier.Classify("Version 1X0") - Expect(err).NotTo(HaveOccurred()) - Expect(category).NotTo(Equal("version_check")) - }) - - It("should handle asterisks literally", func() { - queries := []string{ - "The symbol * is here", - "Use * wildcard", - "asterisk *", - } - - for _, query := range queries { - category, _, err := classifier.Classify(query) - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(Equal("wildcard_test"), - "Query '%s' should match asterisk literally", query) - } - }) - }) - - Context("Edge Cases", func() { - It("should handle empty text", func() { - category, _, err := classifier.Classify("") - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(BeEmpty()) - }) - - It("should handle whitespace-only text", func() { - queries := []string{ - " ", - "\t\t", - "\n\n", - " \t\n ", - } - - for _, query := range queries { - category, _, err := classifier.Classify(query) - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(BeEmpty()) - } - }) - - It("should handle very long text", func() { - longPrefix := "This is normal text that goes on and on. " - longSuffix := "More normal text. " - var longText string - for i := 0; i < 100; i++ { - longText += longPrefix - } - longText += "urgent " - for i := 0; i < 100; i++ { - longText += longSuffix - } - - category, _, err := classifier.Classify(longText) - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(Equal("urgent_request")) - }) - - It("should handle Unicode characters", func() { - queries := []string{ - "需要 urgent 帮助", - "緊急 urgent 事項", - "срочно urgent помощь", - } - - for _, query := range queries { - category, _, err := classifier.Classify(query) - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(Equal("urgent_request"), - "Query '%s' should match with Unicode", query) - } - }) - - It("should handle emoji", func() { - queries := []string{ - "🚨 urgent 🚨", - "😱 urgent help 😱", - "⚠️ urgent ⚠️", - } - - for _, query := range queries { - category, _, err := classifier.Classify(query) - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(Equal("urgent_request"), - "Query '%s' should match with emoji", query) - } - }) - - It("should handle newlines in text", func() { - query := "This is\nurgent\nhelp" - category, _, err := classifier.Classify(query) - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(Equal("urgent_request")) - }) - - It("should handle tabs in text", func() { - query := "This is\turgent\thelp" - category, _, err := classifier.Classify(query) - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(Equal("urgent_request")) - }) - }) - - Context("Multiple Rule Matching", func() { - It("should use first matching rule when multiple rules match", func() { - // Add overlapping rules - overlappingRules := []config.KeywordRule{ - {Name: "rule1", Operator: "OR", Keywords: []string{"urgent"}, CaseSensitive: false}, - {Name: "rule2", Operator: "OR", Keywords: []string{"urgent"}, CaseSensitive: false}, - } - newClassifier, err := CreateTestKeywordClassifier(overlappingRules) - Expect(err).NotTo(HaveOccurred()) - - category, _, err := newClassifier.Classify("urgent request") - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(Equal("rule1"), "Should match first rule") - }) - - It("should handle multiple different keywords matching", func() { - query := "This is urgent and also an emergency" - category, _, err := classifier.Classify(query) - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(Equal("urgent_request")) - }) - }) - - Context("Confidence Scores", func() { - It("should always return confidence 1.0 for keyword matches", func() { - testCases := []string{ - "urgent", - "This is urgent", - "URGENT", - "My SSN and credit card", - } - - for _, query := range testCases { - _, confidence, err := classifier.Classify(query) - Expect(err).NotTo(HaveOccurred()) - Expect(confidence).To(Equal(1.0), - "Query '%s' should have confidence 1.0", query) - } - }) - }) - - Context("Loading from JSON test data", func() { - It("should pass all test cases from JSON file", func() { - testCases, err := LoadKeywordTestCases("testdata/keyword_routing_cases.json") - if err != nil { - Skip("Test data file not found: " + err.Error()) - return - } - - for _, tc := range testCases { - category, confidence, err := classifier.Classify(tc.Query) - Expect(err).NotTo(HaveOccurred(), "Test: %s - %s", tc.Name, tc.Description) - - if tc.ExpectedCategory != "" { - Expect(category).To(Equal(tc.ExpectedCategory), - "Test: %s - Query: %s", tc.Name, tc.Query) - } - - if tc.ExpectedConfidence > 0 { - Expect(confidence).To(Equal(tc.ExpectedConfidence), - "Test: %s - Query: %s", tc.Name, tc.Query) - } - } - }) - }) - - Context("Error Handling", func() { - It("should handle invalid operator gracefully", func() { - invalidRules := []config.KeywordRule{ - {Name: "invalid", Operator: "INVALID", Keywords: []string{"test"}, CaseSensitive: false}, - } - _, err := CreateTestKeywordClassifier(invalidRules) - Expect(err).To(HaveOccurred()) - }) - - It("should handle empty keywords array", func() { - emptyRules := []config.KeywordRule{ - {Name: "empty", Operator: "OR", Keywords: []string{}, CaseSensitive: false}, - } - newClassifier, err := CreateTestKeywordClassifier(emptyRules) - Expect(err).NotTo(HaveOccurred()) - - category, _, err := newClassifier.Classify("any text") - Expect(err).NotTo(HaveOccurred()) - Expect(category).To(BeEmpty()) - }) - }) -}) diff --git a/e2e-tests/testcases/suite_test.go b/e2e-tests/testcases/suite_test.go deleted file mode 100644 index 086554309..000000000 --- a/e2e-tests/testcases/suite_test.go +++ /dev/null @@ -1,25 +0,0 @@ -package testcases - -import ( - "testing" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -// TestE2ETestcases is the entry point for the Ginkgo test suite -func TestE2ETestcases(t *testing.T) { - RegisterFailHandler(Fail) - RunSpecs(t, "E2E Testcases Suite") -} - -// Suite-level setup -var _ = BeforeSuite(func() { - // Initialize any suite-level resources - // e.g., models, databases, etc. -}) - -// Suite-level cleanup -var _ = AfterSuite(func() { - // Cleanup suite-level resources -}) diff --git a/e2e/cmd/e2e/main.go b/e2e/cmd/e2e/main.go index 54ff691f0..3d2e6863b 100644 --- a/e2e/cmd/e2e/main.go +++ b/e2e/cmd/e2e/main.go @@ -12,10 +12,12 @@ import ( aigateway "github.com/vllm-project/semantic-router/e2e/profiles/ai-gateway" aibrix "github.com/vllm-project/semantic-router/e2e/profiles/aibrix" dynamicconfig "github.com/vllm-project/semantic-router/e2e/profiles/dynamic-config" + routingstrategies "github.com/vllm-project/semantic-router/e2e/profiles/routing-strategies" // Import profiles to register test cases _ "github.com/vllm-project/semantic-router/e2e/profiles/ai-gateway" _ "github.com/vllm-project/semantic-router/e2e/profiles/aibrix" + _ "github.com/vllm-project/semantic-router/e2e/profiles/routing-strategies" ) const version = "v1.0.0" @@ -103,6 +105,8 @@ func getProfile(name string) (framework.Profile, error) { return dynamicconfig.NewProfile(), nil case "aibrix": return aibrix.NewProfile(), nil + case "routing-strategies": + return routingstrategies.NewProfile(), nil // Add more profiles here as they are implemented // case "istio": // return istio.NewProfile(), nil diff --git a/e2e/profiles/routing-strategies/config.yaml b/e2e/profiles/routing-strategies/config.yaml new file mode 100644 index 000000000..167aa5136 --- /dev/null +++ b/e2e/profiles/routing-strategies/config.yaml @@ -0,0 +1,638 @@ +bert_model: + model_id: models/all-MiniLM-L12-v2 + threshold: 0.6 + use_cpu: true + +semantic_cache: + enabled: true + backend_type: "memory" # Options: "memory", "milvus", or "hybrid" + similarity_threshold: 0.8 + max_entries: 1000 # Only applies to memory backend + ttl_seconds: 3600 + eviction_policy: "fifo" + # HNSW index configuration (for memory backend only) + use_hnsw: true # Enable HNSW index for faster similarity search + hnsw_m: 16 # Number of bi-directional links (higher = better recall, more memory) + hnsw_ef_construction: 200 # Construction parameter (higher = better quality, slower build) + + # Hybrid cache configuration (when backend_type: "hybrid") + # Combines in-memory HNSW for fast search with Milvus for scalable storage + # max_memory_entries: 100000 # Max entries in HNSW index (default: 100,000) + # backend_config_path: "config/milvus.yaml" # Path to Milvus config + + # Embedding model for semantic similarity matching + # Options: "bert" (fast, 384-dim), "qwen3" (high quality, 1024-dim, 32K context), "gemma" (balanced, 768-dim, 8K context) + # Default: "bert" (fastest, lowest memory) + embedding_model: "bert" + +tools: + enabled: true + top_k: 3 + similarity_threshold: 0.2 + tools_db_path: "config/tools_db.json" + fallback_to_empty: true + +prompt_guard: + enabled: true # Global default - can be overridden per category with jailbreak_enabled + use_modernbert: true + model_id: "models/jailbreak_classifier_modernbert-base_model" + threshold: 0.7 + use_cpu: true + jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json" + +# vLLM Endpoints Configuration +# IMPORTANT: 'address' field must be a valid IP address (IPv4 or IPv6) +# Supported formats: 127.0.0.1, 192.168.1.1, ::1, 2001:db8::1 +# NOT supported: domain names (example.com), protocol prefixes (http://), paths (/api), ports in address (use 'port' field) +vllm_endpoints: + - name: "endpoint1" + address: "172.28.0.20" # Static IPv4 of llm-katan within docker compose network + port: 8002 + weight: 1 + +model_config: + "qwen3": + reasoning_family: "qwen3" # This model uses Qwen-3 reasoning syntax + preferred_endpoints: ["endpoint1"] + +# Classifier configuration +classifier: + category_model: + model_id: "models/category_classifier_modernbert-base_model" + use_modernbert: true + threshold: 0.6 + use_cpu: true + category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json" + pii_model: + model_id: "models/pii_classifier_modernbert-base_presidio_token_model" + use_modernbert: true + threshold: 0.7 + use_cpu: true + pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json" + +keyword_rules: + - category: "urgent_request" + operator: "OR" + keywords: ["urgent", "immediate", "asap"] + case_sensitive: false + - category: "sensitive_data" + operator: "AND" + keywords: ["SSN", "social security number", "credit card"] + case_sensitive: false + - category: "exclude_spam" + operator: "NOR" + keywords: ["buy now", "free money"] + case_sensitive: false + - category: "regex_pattern_match" + operator: "OR" + keywords: ["user\\.name@domain\\.com", "C:\\Program Files\\\\"] # Keywords are treated as regex + case_sensitive: false + +# Categories define domain metadata only (no routing logic) +categories: + - name: urgent_request + description: "Urgent and time-sensitive requests" + mmlu_categories: ["urgent_request"] + - name: sensitive_data + description: "Requests involving sensitive personal data" + mmlu_categories: ["sensitive_data"] + - name: exclude_spam + description: "Potential spam or suspicious requests" + mmlu_categories: ["exclude_spam"] + - name: regex_pattern_match + description: "Structured data and pattern-based requests" + mmlu_categories: ["regex_pattern_match"] + - name: business + description: "Business and management related queries" + mmlu_categories: ["business"] + - name: law + description: "Legal questions and law-related topics" + mmlu_categories: ["law"] + - name: psychology + description: "Psychology and mental health topics" + mmlu_categories: ["psychology"] + - name: biology + description: "Biology and life sciences questions" + mmlu_categories: ["biology"] + - name: chemistry + description: "Chemistry and chemical sciences questions" + mmlu_categories: ["chemistry"] + - name: history + description: "Historical questions and cultural topics" + mmlu_categories: ["history"] + - name: other + description: "General knowledge and miscellaneous topics" + mmlu_categories: ["other"] + - name: health + description: "Health and medical information queries" + mmlu_categories: ["health"] + - name: economics + description: "Economics and financial topics" + mmlu_categories: ["economics"] + - name: math + description: "Mathematics and quantitative reasoning" + mmlu_categories: ["math"] + - name: physics + description: "Physics and physical sciences" + mmlu_categories: ["physics"] + - name: computer_science + description: "Computer science and programming" + mmlu_categories: ["computer_science"] + - name: philosophy + description: "Philosophy and ethical questions" + mmlu_categories: ["philosophy"] + - name: engineering + description: "Engineering and technical problem-solving" + mmlu_categories: ["engineering"] + +strategy: "priority" + +decisions: + - name: "urgent_request_decision" + description: "Urgent and time-sensitive requests" + priority: 150 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "urgent_request" + modelRefs: + - model: "qwen3" + use_reasoning: false + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are a highly responsive assistant specialized in handling urgent requests. Prioritize speed and efficiency while maintaining accuracy. Provide concise, actionable responses and focus on immediate solutions." + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + - name: "sensitive_data_decision" + description: "Requests involving sensitive personal data" + priority: 150 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "sensitive_data" + modelRefs: + - model: "qwen3" + use_reasoning: false + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are a security-conscious assistant specialized in handling sensitive data. Exercise extreme caution with personal information, follow data protection best practices, and remind users about privacy considerations." + - type: "jailbreak" + configuration: + enabled: true + threshold: 0.6 + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + - name: "exclude_spam_decision" + description: "Potential spam or suspicious requests" + priority: 150 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "exclude_spam" + modelRefs: + - model: "qwen3" + use_reasoning: false + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are a content moderation assistant. This request has been flagged as potential spam. Please verify the legitimacy of the request before proceeding." + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + - name: "regex_pattern_match_decision" + description: "Structured data and pattern-based requests" + priority: 150 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "regex_pattern_match" + modelRefs: + - model: "qwen3" + use_reasoning: false + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are a technical assistant specialized in handling structured data and pattern-based requests. Provide precise, format-aware responses." + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + # Standard category decisions (similar to config.yaml) + - name: "business_decision" + description: "Business and management related queries" + priority: 100 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "business" + modelRefs: + - model: "qwen3" + use_reasoning: false + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations." + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + # Add remaining standard decisions (law through engineering) + - name: "law_decision" + description: "Legal questions and law-related topics" + priority: 100 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "law" + modelRefs: + - model: "qwen3" + use_reasoning: false + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters." + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + - name: "psychology_decision" + description: "Psychology and mental health topics" + priority: 100 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "psychology" + modelRefs: + - model: "qwen3" + use_reasoning: false + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice." + - type: "semantic-cache" + configuration: + enabled: true + similarity_threshold: 0.92 + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + - name: "biology_decision" + description: "Biology and life sciences questions" + priority: 100 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "biology" + modelRefs: + - model: "qwen3" + use_reasoning: false + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems." + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + - name: "chemistry_decision" + description: "Chemistry and chemical sciences questions" + priority: 100 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "chemistry" + modelRefs: + - model: "qwen3" + use_reasoning: true + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations." + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + - name: "history_decision" + description: "Historical questions and cultural topics" + priority: 100 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "history" + modelRefs: + - model: "qwen3" + use_reasoning: false + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis." + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + - name: "health_decision" + description: "Health and medical information queries" + priority: 100 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "health" + modelRefs: + - model: "qwen3" + use_reasoning: false + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies." + - type: "semantic-cache" + configuration: + enabled: true + similarity_threshold: 0.95 + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + - name: "economics_decision" + description: "Economics and financial topics" + priority: 100 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "economics" + modelRefs: + - model: "qwen3" + use_reasoning: false + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses." + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + - name: "math_decision" + description: "Mathematics and quantitative reasoning" + priority: 100 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "math" + modelRefs: + - model: "qwen3" + use_reasoning: true + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way." + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + - name: "physics_decision" + description: "Physics and physical sciences" + priority: 100 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "physics" + modelRefs: + - model: "qwen3" + use_reasoning: true + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate." + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + - name: "computer_science_decision" + description: "Computer science and programming" + priority: 100 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "computer_science" + modelRefs: + - model: "qwen3" + use_reasoning: false + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful." + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + - name: "philosophy_decision" + description: "Philosophy and ethical questions" + priority: 100 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "philosophy" + modelRefs: + - model: "qwen3" + use_reasoning: false + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates." + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + - name: "engineering_decision" + description: "Engineering and technical problem-solving" + priority: 100 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "engineering" + modelRefs: + - model: "qwen3" + use_reasoning: false + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards." + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + - name: "general_decision" + description: "General knowledge and miscellaneous topics" + priority: 50 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "other" + modelRefs: + - model: "qwen3" + use_reasoning: false + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics." + - type: "semantic-cache" + configuration: + enabled: true + similarity_threshold: 0.75 + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + +# Router Configuration for Dual-Path Selection +router: + # High confidence threshold for automatic LoRA selection + high_confidence_threshold: 0.99 + # Low latency threshold in milliseconds for LoRA path selection + low_latency_threshold_ms: 2000 + # Baseline scores for path evaluation + lora_baseline_score: 0.8 + traditional_baseline_score: 0.7 + embedding_baseline_score: 0.75 + # Success rate calculation threshold + success_confidence_threshold: 0.8 + # Large batch size threshold for parallel processing + large_batch_threshold: 4 + # Default performance metrics (milliseconds) + lora_default_execution_time_ms: 1345 + traditional_default_execution_time_ms: 4567 + # Default processing requirements + default_confidence_threshold: 0.95 + default_max_latency_ms: 5000 + default_batch_size: 4 + default_avg_execution_time_ms: 3000 + # Default confidence and success rates + lora_default_confidence: 0.99 + traditional_default_confidence: 0.95 + lora_default_success_rate: 0.98 + traditional_default_success_rate: 0.95 + # Scoring weights for intelligent path selection (balanced approach) + multi_task_lora_weight: 0.30 # LoRA advantage for multi-task processing + single_task_traditional_weight: 0.30 # Traditional advantage for single tasks + large_batch_lora_weight: 0.25 # LoRA advantage for large batches (≥4) + small_batch_traditional_weight: 0.25 # Traditional advantage for single items + medium_batch_weight: 0.10 # Neutral weight for medium batches (2-3) + high_confidence_lora_weight: 0.25 # LoRA advantage for high confidence (≥0.99) + low_confidence_traditional_weight: 0.25 # Traditional for lower confidence (≤0.9) + low_latency_lora_weight: 0.30 # LoRA advantage for low latency (≤2000ms) + high_latency_traditional_weight: 0.10 # Traditional acceptable for relaxed timing + performance_history_weight: 0.20 # Historical performance comparison factor + # Traditional model specific configurations + traditional_bert_confidence_threshold: 0.95 # Traditional BERT confidence threshold + traditional_modernbert_confidence_threshold: 0.8 # Traditional ModernBERT confidence threshold + traditional_pii_detection_threshold: 0.5 # Traditional PII detection confidence threshold + traditional_token_classification_threshold: 0.9 # Traditional token classification threshold + traditional_dropout_prob: 0.1 # Traditional model dropout probability + traditional_attention_dropout_prob: 0.1 # Traditional model attention dropout probability + tie_break_confidence: 0.5 # Confidence value for tie-breaking situations + +default_model: qwen3 + +# Reasoning family configurations +reasoning_families: + deepseek: + type: "chat_template_kwargs" + parameter: "thinking" + + qwen3: + type: "chat_template_kwargs" + parameter: "enable_thinking" + + gpt-oss: + type: "reasoning_effort" + parameter: "reasoning_effort" + gpt: + type: "reasoning_effort" + parameter: "reasoning_effort" + +# Global default reasoning effort level +default_reasoning_effort: high + +# API Configuration +api: + batch_classification: + max_batch_size: 100 + concurrency_threshold: 5 + max_concurrency: 8 + metrics: + enabled: true + detailed_goroutine_tracking: true + high_resolution_timing: false + sample_rate: 1.0 + duration_buckets: + [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30] + size_buckets: [1, 2, 5, 10, 20, 50, 100, 200] + +# Embedding Models Configuration +# These models provide intelligent embedding generation with automatic routing: +# - Qwen3-Embedding-0.6B: Up to 32K context, high quality, +# - EmbeddingGemma-300M: Up to 8K context, fast inference, Matryoshka support (768/512/256/128) +embedding_models: + qwen3_model_path: "models/Qwen3-Embedding-0.6B" + gemma_model_path: "models/embeddinggemma-300m" + use_cpu: true # Set to false for GPU acceleration (requires CUDA) + +# Observability Configuration +observability: + tracing: + enabled: true # Enable distributed tracing for docker-compose stack + provider: "opentelemetry" # Provider: opentelemetry, openinference, openllmetry + exporter: + type: "otlp" # Export spans to Jaeger (via OTLP gRPC) + endpoint: "jaeger:4317" # Jaeger collector inside compose network + insecure: true # Use insecure connection (no TLS) + sampling: + type: "always_on" # Sampling: always_on, always_off, probabilistic + rate: 1.0 # Sampling rate for probabilistic (0.0-1.0) + resource: + service_name: "vllm-semantic-router" + service_version: "v0.1.0" + deployment_environment: "development" diff --git a/e2e/profiles/routing-strategies/profile.go b/e2e/profiles/routing-strategies/profile.go new file mode 100644 index 000000000..70d1be7c8 --- /dev/null +++ b/e2e/profiles/routing-strategies/profile.go @@ -0,0 +1,325 @@ +package routingstrategies + +import ( + "context" + "fmt" + "os" + "os/exec" + "time" + + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" + + "github.com/vllm-project/semantic-router/e2e/pkg/framework" + "github.com/vllm-project/semantic-router/e2e/pkg/helm" + "github.com/vllm-project/semantic-router/e2e/pkg/helpers" + + // Import testcases package to register all test cases via their init() functions + _ "github.com/vllm-project/semantic-router/e2e/testcases" +) + +// Profile implements the Routing Strategies test profile +type Profile struct { + verbose bool +} + +// NewProfile creates a new Routing Strategies profile +func NewProfile() *Profile { + return &Profile{} +} + +// Name returns the profile name +func (p *Profile) Name() string { + return "routing-strategies" +} + +// Description returns the profile description +func (p *Profile) Description() string { + return "Tests different routing strategies including keyword-based routing" +} + +// Setup deploys all required components for Routing Strategies testing +func (p *Profile) Setup(ctx context.Context, opts *framework.SetupOptions) error { + p.verbose = opts.Verbose + p.log("Setting up Routing Strategies test environment") + + deployer := helm.NewDeployer(opts.KubeConfig, opts.Verbose) + + // Step 1: Deploy Semantic Router with keyword routing configuration + p.log("Step 1/4: Deploying Semantic Router with keyword routing config") + if err := p.deploySemanticRouter(ctx, deployer, opts); err != nil { + return fmt.Errorf("failed to deploy semantic router: %w", err) + } + + // Step 2: Deploy Envoy Gateway + p.log("Step 2/4: Deploying Envoy Gateway") + if err := p.deployEnvoyGateway(ctx, deployer, opts); err != nil { + return fmt.Errorf("failed to deploy envoy gateway: %w", err) + } + + // Step 3: Deploy Envoy AI Gateway + p.log("Step 3/4: Deploying Envoy AI Gateway") + if err := p.deployEnvoyAIGateway(ctx, deployer, opts); err != nil { + return fmt.Errorf("failed to deploy envoy ai gateway: %w", err) + } + + // Step 4: Deploy Demo LLM and Gateway API Resources + p.log("Step 4/5: Deploying Demo LLM and Gateway API Resources") + if err := p.deployGatewayResources(ctx, opts); err != nil { + return fmt.Errorf("failed to deploy gateway resources: %w", err) + } + + // Step 5: Verify all components are ready + p.log("Step 5/5: Verifying all components are ready") + if err := p.verifyEnvironment(ctx, opts); err != nil { + return fmt.Errorf("failed to verify environment: %w", err) + } + + p.log("Routing Strategies test environment setup complete") + return nil +} + +// Teardown cleans up all deployed resources +func (p *Profile) Teardown(ctx context.Context, opts *framework.TeardownOptions) error { + p.verbose = opts.Verbose + p.log("Tearing down Routing Strategies test environment") + + deployer := helm.NewDeployer(opts.KubeConfig, opts.Verbose) + + // Clean up in reverse order + p.log("Cleaning up Gateway API resources") + p.cleanupGatewayResources(ctx, opts) + + p.log("Uninstalling Envoy AI Gateway") + deployer.Uninstall(ctx, "aieg-crd", "envoy-ai-gateway-system") + deployer.Uninstall(ctx, "aieg", "envoy-ai-gateway-system") + + p.log("Uninstalling Envoy Gateway") + deployer.Uninstall(ctx, "eg", "envoy-gateway-system") + + p.log("Uninstalling Semantic Router") + deployer.Uninstall(ctx, "semantic-router", "vllm-semantic-router-system") + + p.log("Routing Strategies test environment teardown complete") + return nil +} + +// GetTestCases returns the list of test cases for this profile +func (p *Profile) GetTestCases() []string { + return []string{ + "keyword-routing", + } +} + +// GetServiceConfig returns the service configuration for accessing the deployed service +func (p *Profile) GetServiceConfig() framework.ServiceConfig { + return framework.ServiceConfig{ + LabelSelector: "gateway.envoyproxy.io/owning-gateway-namespace=default,gateway.envoyproxy.io/owning-gateway-name=semantic-router", + Namespace: "envoy-gateway-system", + PortMapping: "8080:80", + } +} + +func (p *Profile) deploySemanticRouter(ctx context.Context, deployer *helm.Deployer, opts *framework.SetupOptions) error { + // Use local Helm chart with keyword routing configuration + chartPath := "deploy/helm/semantic-router" + valuesFile := "e2e/profiles/routing-strategies/values.yaml" + + // Override image to use locally built image + imageRepo := "ghcr.io/vllm-project/semantic-router/extproc" + imageTag := opts.ImageTag + + installOpts := helm.InstallOptions{ + ReleaseName: "semantic-router", + Chart: chartPath, + Namespace: "vllm-semantic-router-system", + ValuesFiles: []string{valuesFile}, + Set: map[string]string{ + "image.repository": imageRepo, + "image.tag": imageTag, + "image.pullPolicy": "Never", // Use local image, don't pull from registry + }, + Wait: true, + Timeout: "30m", + } + + if err := deployer.Install(ctx, installOpts); err != nil { + return err + } + + return deployer.WaitForDeployment(ctx, "vllm-semantic-router-system", "semantic-router", 10*time.Minute) +} + +func (p *Profile) deployEnvoyGateway(ctx context.Context, deployer *helm.Deployer, _ *framework.SetupOptions) error { + installOpts := helm.InstallOptions{ + ReleaseName: "eg", + Chart: "oci://docker.io/envoyproxy/gateway-helm", + Namespace: "envoy-gateway-system", + Version: "v0.0.0-latest", + ValuesFiles: []string{"https://raw.githubusercontent.com/envoyproxy/ai-gateway/main/manifests/envoy-gateway-values.yaml"}, + Wait: true, + Timeout: "10m", + } + + if err := deployer.Install(ctx, installOpts); err != nil { + return err + } + + return deployer.WaitForDeployment(ctx, "envoy-gateway-system", "envoy-gateway", 10*time.Minute) +} + +func (p *Profile) deployEnvoyAIGateway(ctx context.Context, deployer *helm.Deployer, _ *framework.SetupOptions) error { + // Install AI Gateway CRDs + crdOpts := helm.InstallOptions{ + ReleaseName: "aieg-crd", + Chart: "oci://docker.io/envoyproxy/ai-gateway-crds-helm", + Namespace: "envoy-ai-gateway-system", + Version: "v0.0.0-latest", + Wait: true, + Timeout: "10m", + } + + if err := deployer.Install(ctx, crdOpts); err != nil { + return err + } + + // Install AI Gateway + installOpts := helm.InstallOptions{ + ReleaseName: "aieg", + Chart: "oci://docker.io/envoyproxy/ai-gateway-helm", + Namespace: "envoy-ai-gateway-system", + Version: "v0.0.0-latest", + Wait: true, + Timeout: "10m", + } + + if err := deployer.Install(ctx, installOpts); err != nil { + return err + } + + return deployer.WaitForDeployment(ctx, "envoy-ai-gateway-system", "ai-gateway-controller", 10*time.Minute) +} + +func (p *Profile) deployGatewayResources(ctx context.Context, opts *framework.SetupOptions) error { + // Apply base model + if err := p.kubectlApply(ctx, opts.KubeConfig, "deploy/kubernetes/ai-gateway/aigw-resources/base-model.yaml"); err != nil { + return fmt.Errorf("failed to apply base model: %w", err) + } + + // Apply gateway API resources + if err := p.kubectlApply(ctx, opts.KubeConfig, "deploy/kubernetes/ai-gateway/aigw-resources/gwapi-resources.yaml"); err != nil { + return fmt.Errorf("failed to apply gateway API resources: %w", err) + } + + return nil +} + +func (p *Profile) verifyEnvironment(ctx context.Context, opts *framework.SetupOptions) error { + // Create Kubernetes client + config, err := clientcmd.BuildConfigFromFlags("", opts.KubeConfig) + if err != nil { + return fmt.Errorf("failed to build kubeconfig: %w", err) + } + + client, err := kubernetes.NewForConfig(config) + if err != nil { + return fmt.Errorf("failed to create kube client: %w", err) + } + + // Wait for Envoy Gateway service to be ready with retry + retryTimeout := 10 * time.Minute + retryInterval := 5 * time.Second + startTime := time.Now() + + p.log("Waiting for Envoy Gateway service to be ready...") + + // Label selector for the semantic-router gateway service + labelSelector := "gateway.envoyproxy.io/owning-gateway-namespace=default,gateway.envoyproxy.io/owning-gateway-name=semantic-router" + + var envoyService string + for { + // Try to get Envoy service name + envoyService, err = helpers.GetEnvoyServiceName(ctx, client, labelSelector, p.verbose) + if err == nil { + // Verify that the service has exactly 1 pod running with all containers ready + podErr := helpers.VerifyServicePodsRunning(ctx, client, "envoy-gateway-system", envoyService, p.verbose) + if podErr == nil { + p.log("Envoy Gateway service is ready: %s", envoyService) + break + } + if p.verbose { + p.log("Envoy service found but pods not ready: %v", podErr) + } + err = fmt.Errorf("service pods not ready: %w", podErr) + } + + if time.Since(startTime) >= retryTimeout { + return fmt.Errorf("failed to get Envoy service with running pods after %v: %w", retryTimeout, err) + } + + if p.verbose { + p.log("Envoy service not ready, retrying in %v... (elapsed: %v)", + retryInterval, time.Since(startTime).Round(time.Second)) + } + + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(retryInterval): + // Continue retry + } + } + + // Check all deployments are healthy + p.log("Verifying all deployments are healthy...") + + // Check semantic-router deployment + if err := helpers.CheckDeployment(ctx, client, "vllm-semantic-router-system", "semantic-router", p.verbose); err != nil { + return fmt.Errorf("semantic-router deployment not healthy: %w", err) + } + + // Check envoy-gateway deployment + if err := helpers.CheckDeployment(ctx, client, "envoy-gateway-system", "envoy-gateway", p.verbose); err != nil { + return fmt.Errorf("envoy-gateway deployment not healthy: %w", err) + } + + // Check ai-gateway-controller deployment + if err := helpers.CheckDeployment(ctx, client, "envoy-ai-gateway-system", "ai-gateway-controller", p.verbose); err != nil { + return fmt.Errorf("ai-gateway-controller deployment not healthy: %w", err) + } + + p.log("All deployments are healthy") + + return nil +} + +func (p *Profile) cleanupGatewayResources(ctx context.Context, opts *framework.TeardownOptions) error { + // Delete in reverse order + p.kubectlDelete(ctx, opts.KubeConfig, "deploy/kubernetes/ai-gateway/aigw-resources/gwapi-resources.yaml") + p.kubectlDelete(ctx, opts.KubeConfig, "deploy/kubernetes/ai-gateway/aigw-resources/base-model.yaml") + return nil +} + +func (p *Profile) kubectlApply(ctx context.Context, kubeConfig, manifest string) error { + return p.runKubectl(ctx, kubeConfig, "apply", "-f", manifest) +} + +func (p *Profile) kubectlDelete(ctx context.Context, kubeConfig, manifest string) error { + return p.runKubectl(ctx, kubeConfig, "delete", "-f", manifest) +} + +func (p *Profile) runKubectl(ctx context.Context, kubeConfig string, args ...string) error { + args = append(args, "--kubeconfig", kubeConfig) + cmd := exec.CommandContext(ctx, "kubectl", args...) + if p.verbose { + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + } + return cmd.Run() +} + +func (p *Profile) log(format string, args ...interface{}) { + if p.verbose { + fmt.Printf("[Routing-Strategies] "+format+"\n", args...) + } +} diff --git a/e2e/profiles/routing-strategies/values.yaml b/e2e/profiles/routing-strategies/values.yaml new file mode 100644 index 000000000..81b4d7695 --- /dev/null +++ b/e2e/profiles/routing-strategies/values.yaml @@ -0,0 +1,343 @@ +# Semantic Router Configuration for Routing Strategies E2E Tests +config: + bert_model: + model_id: models/all-MiniLM-L12-v2 + threshold: 0.6 + use_cpu: true + + semantic_cache: + enabled: true + backend_type: "memory" # Options: "memory", "milvus", or "hybrid" + similarity_threshold: 0.8 + max_entries: 1000 # Only applies to memory backend + ttl_seconds: 3600 + eviction_policy: "fifo" + # HNSW index configuration (for memory backend only) + use_hnsw: true # Enable HNSW index for faster similarity search + hnsw_m: 16 # Number of bi-directional links (higher = better recall, more memory) + hnsw_ef_construction: 200 # Construction parameter (higher = better quality, slower build) + + # Embedding model for semantic similarity matching + # Options: "bert" (fast, 384-dim), "qwen3" (high quality, 1024-dim, 32K context), "gemma" (balanced, 768-dim, 8K context) + # Default: "bert" (fastest, lowest memory) + embedding_model: "bert" + + tools: + enabled: true + top_k: 3 + similarity_threshold: 0.2 + tools_db_path: "config/tools_db.json" + fallback_to_empty: true + + prompt_guard: + enabled: true # Global default - can be overridden per category with jailbreak_enabled + use_modernbert: true + model_id: "models/jailbreak_classifier_modernbert-base_model" + threshold: 0.7 + use_cpu: true + jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json" + + # Classifier configuration + classifier: + category_model: + model_id: "models/category_classifier_modernbert-base_model" + use_modernbert: true + threshold: 0.6 + use_cpu: true + category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json" + pii_model: + model_id: "models/pii_classifier_modernbert-base_presidio_token_model" + use_modernbert: true + threshold: 0.7 + use_cpu: true + pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json" + + keyword_rules: + - category: "urgent_request" + operator: "OR" + keywords: ["urgent", "immediate", "asap"] + case_sensitive: false + - category: "sensitive_data" + operator: "AND" + keywords: ["SSN", "social security number", "credit card"] + case_sensitive: false + - category: "exclude_spam" + operator: "NOR" + keywords: ["buy now", "free money"] + case_sensitive: false + - category: "regex_pattern_match" + operator: "OR" + keywords: ["user\\.name@domain\\.com", "C:\\Program Files\\\\"] # Keywords are treated as regex + case_sensitive: false + + # Categories define domain metadata only (no routing logic) + categories: + - name: urgent_request + description: "Urgent and time-sensitive requests" + mmlu_categories: ["urgent_request"] + - name: sensitive_data + description: "Requests involving sensitive personal data" + mmlu_categories: ["sensitive_data"] + - name: exclude_spam + description: "Potential spam or suspicious requests" + mmlu_categories: ["exclude_spam"] + - name: regex_pattern_match + description: "Structured data and pattern-based requests" + mmlu_categories: ["regex_pattern_match"] + - name: business + description: "Business and management related queries" + mmlu_categories: ["business"] + - name: law + description: "Legal questions and law-related topics" + mmlu_categories: ["law"] + - name: psychology + description: "Psychology and mental health topics" + mmlu_categories: ["psychology"] + - name: biology + description: "Biology and life sciences questions" + mmlu_categories: ["biology"] + - name: chemistry + description: "Chemistry and chemical sciences questions" + mmlu_categories: ["chemistry"] + - name: history + description: "Historical questions and cultural topics" + mmlu_categories: ["history"] + - name: other + description: "General knowledge and miscellaneous topics" + mmlu_categories: ["other"] + - name: health + description: "Health and medical information queries" + mmlu_categories: ["health"] + - name: economics + description: "Economics and financial topics" + mmlu_categories: ["economics"] + - name: math + description: "Mathematics and quantitative reasoning" + mmlu_categories: ["math"] + - name: physics + description: "Physics and physical sciences" + mmlu_categories: ["physics"] + - name: computer_science + description: "Computer science and programming" + mmlu_categories: ["computer_science"] + - name: philosophy + description: "Philosophy and ethical questions" + mmlu_categories: ["philosophy"] + - name: engineering + description: "Engineering and technical problem-solving" + mmlu_categories: ["engineering"] + + strategy: "priority" + + # NOTE: vLLM endpoints will be set via environment variables in the container + # For E2E tests, we don't need to configure them in the Helm values + vllm_endpoints: [] + + model_config: {} + + decisions: + - name: "urgent_request_decision" + description: "Urgent and time-sensitive requests" + priority: 150 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "urgent_request" + modelRefs: + - model: "qwen3" + use_reasoning: false + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are a highly responsive assistant specialized in handling urgent requests. Prioritize speed and efficiency while maintaining accuracy. Provide concise, actionable responses and focus on immediate solutions." + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + - name: "sensitive_data_decision" + description: "Requests involving sensitive personal data" + priority: 150 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "sensitive_data" + modelRefs: + - model: "qwen3" + use_reasoning: false + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are a security-conscious assistant specialized in handling sensitive data. Exercise extreme caution with personal information, follow data protection best practices, and remind users about privacy considerations." + - type: "jailbreak" + configuration: + enabled: true + threshold: 0.6 + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + - name: "exclude_spam_decision" + description: "Potential spam or suspicious requests" + priority: 150 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "exclude_spam" + modelRefs: + - model: "qwen3" + use_reasoning: false + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are a content moderation assistant. This request has been flagged as potential spam. Please verify the legitimacy of the request before proceeding." + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + - name: "regex_pattern_match_decision" + description: "Structured data and pattern-based requests" + priority: 150 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "regex_pattern_match" + modelRefs: + - model: "qwen3" + use_reasoning: false + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are a technical assistant specialized in handling structured data and pattern-based requests. Provide precise, format-aware responses." + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + # Standard category decisions + - name: "business_decision" + description: "Business and management related queries" + priority: 100 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "business" + modelRefs: + - model: "qwen3" + use_reasoning: false + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices." + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + - name: "general_decision" + description: "General knowledge and miscellaneous topics" + priority: 50 + rules: + operator: "AND" + conditions: + - type: "domain" + name: "other" + modelRefs: + - model: "qwen3" + use_reasoning: false + plugins: + - type: "system_prompt" + configuration: + system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics." + - type: "semantic-cache" + configuration: + enabled: true + similarity_threshold: 0.75 + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + + # Router Configuration for Dual-Path Selection + router: + high_confidence_threshold: 0.99 + low_latency_threshold_ms: 2000 + lora_baseline_score: 0.8 + traditional_baseline_score: 0.7 + embedding_baseline_score: 0.75 + success_confidence_threshold: 0.8 + large_batch_threshold: 4 + lora_default_execution_time_ms: 1345 + traditional_default_execution_time_ms: 4567 + default_confidence_threshold: 0.95 + default_max_latency_ms: 5000 + default_batch_size: 4 + default_avg_execution_time_ms: 3000 + lora_default_confidence: 0.99 + traditional_default_confidence: 0.95 + lora_default_success_rate: 0.98 + traditional_default_success_rate: 0.95 + + default_model: qwen3 + + # Reasoning family configurations + reasoning_families: + deepseek: + type: "chat_template_kwargs" + parameter: "thinking" + qwen3: + type: "chat_template_kwargs" + parameter: "enable_thinking" + gpt-oss: + type: "reasoning_effort" + parameter: "reasoning_effort" + gpt: + type: "reasoning_effort" + parameter: "reasoning_effort" + + default_reasoning_effort: high + + # API Configuration + api: + batch_classification: + max_batch_size: 100 + concurrency_threshold: 5 + max_concurrency: 8 + metrics: + enabled: true + detailed_goroutine_tracking: true + high_resolution_timing: false + sample_rate: 1.0 + duration_buckets: + [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30] + size_buckets: [1, 2, 5, 10, 20, 50, 100, 200] + + # Embedding Models Configuration + embedding_models: + qwen3_model_path: "models/Qwen3-Embedding-0.6B" + gemma_model_path: "models/embeddinggemma-300m" + use_cpu: true + + # Observability Configuration + observability: + tracing: + enabled: true + provider: "opentelemetry" + exporter: + type: "otlp" + endpoint: "jaeger:4317" + insecure: true + sampling: + type: "always_on" + rate: 1.0 + resource: + service_name: "vllm-semantic-router" + service_version: "v0.1.0" + deployment_environment: "development" diff --git a/e2e/testcases/keyword_routing.go b/e2e/testcases/keyword_routing.go new file mode 100644 index 000000000..21d71a1bc --- /dev/null +++ b/e2e/testcases/keyword_routing.go @@ -0,0 +1,328 @@ +package testcases + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "reflect" + "strings" + "time" + + pkgtestcases "github.com/vllm-project/semantic-router/e2e/pkg/testcases" + "k8s.io/client-go/kubernetes" +) + +func init() { + pkgtestcases.Register("keyword-routing", pkgtestcases.TestCase{ + Description: "Test keyword routing accuracy and verify matched keywords", + Tags: []string{"ai-gateway", "routing", "keyword"}, + Fn: testKeywordRouting, + }) +} + +// KeywordRoutingCase represents a test case for keyword routing +type KeywordRoutingCase struct { + Name string `json:"name"` + Description string `json:"description"` + Query string `json:"query"` + ExpectedCategory string `json:"expected_category"` + ExpectedConfidence float64 `json:"expected_confidence"` + MatchedKeywords []string `json:"matched_keywords"` +} + +// KeywordRoutingResult tracks the result of a single keyword routing test +type KeywordRoutingResult struct { + Name string + Query string + ExpectedCategory string + ActualCategory string + ExpectedKeywords []string + ActualKeywords []string + Correct bool + KeywordsMatch bool + Error string +} + +func testKeywordRouting(ctx context.Context, client *kubernetes.Clientset, opts pkgtestcases.TestCaseOptions) error { + if opts.Verbose { + fmt.Println("[Test] Testing keyword routing accuracy") + } + + // Setup service connection and get local port + localPort, stopPortForward, err := setupServiceConnection(ctx, client, opts) + if err != nil { + return err + } + defer stopPortForward() // Ensure port forwarding is stopped when test completes + + // Load test cases from JSON file + testCases, err := loadKeywordRoutingCases("e2e/testcases/testdata/keyword_routing_cases.json") + if err != nil { + return fmt.Errorf("failed to load test cases: %w", err) + } + + // Run keyword routing tests + var results []KeywordRoutingResult + totalTests := 0 + correctTests := 0 + keywordsCorrect := 0 + + for _, testCase := range testCases { + totalTests++ + result := testSingleKeywordRouting(ctx, testCase, localPort, opts.Verbose) + results = append(results, result) + if result.Correct { + correctTests++ + } + if result.KeywordsMatch { + keywordsCorrect++ + } + } + + // Calculate accuracy + accuracy := float64(correctTests) / float64(totalTests) * 100 + keywordAccuracy := float64(keywordsCorrect) / float64(totalTests) * 100 + + // Set details for reporting + if opts.SetDetails != nil { + opts.SetDetails(map[string]interface{}{ + "total_tests": totalTests, + "correct_tests": correctTests, + "accuracy_rate": fmt.Sprintf("%.2f%%", accuracy), + "keywords_correct": keywordsCorrect, + "keyword_accuracy": fmt.Sprintf("%.2f%%", keywordAccuracy), + "failed_tests": totalTests - correctTests, + }) + } + + // Print results + printKeywordRoutingResults(results, totalTests, correctTests, keywordsCorrect, accuracy, keywordAccuracy) + + if opts.Verbose { + fmt.Printf("[Test] Keyword routing test completed: %d/%d correct (%.2f%% accuracy), %d/%d keywords matched (%.2f%%)\n", + correctTests, totalTests, accuracy, keywordsCorrect, totalTests, keywordAccuracy) + } + + // Return error if accuracy is 0% + if correctTests == 0 { + return fmt.Errorf("keyword routing test failed: 0%% accuracy (0/%d correct)", totalTests) + } + + return nil +} + +func loadKeywordRoutingCases(filepath string) ([]KeywordRoutingCase, error) { + data, err := os.ReadFile(filepath) + if err != nil { + return nil, fmt.Errorf("failed to read test cases file: %w", err) + } + + var cases []KeywordRoutingCase + if err := json.Unmarshal(data, &cases); err != nil { + return nil, fmt.Errorf("failed to parse test cases: %w", err) + } + + return cases, nil +} + +func testSingleKeywordRouting(ctx context.Context, testCase KeywordRoutingCase, localPort string, verbose bool) KeywordRoutingResult { + result := KeywordRoutingResult{ + Name: testCase.Name, + Query: testCase.Query, + ExpectedCategory: testCase.ExpectedCategory, + ExpectedKeywords: testCase.MatchedKeywords, + } + + // Create chat completion request + requestBody := map[string]interface{}{ + "model": "MoM", + "messages": []map[string]string{ + {"role": "user", "content": testCase.Query}, + }, + } + + jsonData, err := json.Marshal(requestBody) + if err != nil { + result.Error = fmt.Sprintf("failed to marshal request: %v", err) + return result + } + + // Send request + url := fmt.Sprintf("http://localhost:%s/v1/chat/completions", localPort) + req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + result.Error = fmt.Sprintf("failed to create request: %v", err) + return result + } + req.Header.Set("Content-Type", "application/json") + + httpClient := &http.Client{Timeout: 30 * time.Second} + resp, err := httpClient.Do(req) + if err != nil { + result.Error = fmt.Sprintf("failed to send request: %v", err) + return result + } + defer resp.Body.Close() + + // Check response status + if resp.StatusCode != http.StatusOK { + bodyBytes, _ := io.ReadAll(resp.Body) + + // Log detailed error information including headers + var errorMsg strings.Builder + errorMsg.WriteString(fmt.Sprintf("Unexpected status code: %d\n", resp.StatusCode)) + errorMsg.WriteString(fmt.Sprintf("Response body: %s\n", string(bodyBytes))) + errorMsg.WriteString("Response headers:\n") + errorMsg.WriteString(formatResponseHeaders(resp.Header)) + + result.Error = errorMsg.String() + + // Print detailed error to console for debugging + if verbose { + fmt.Printf("[Test] ✗ HTTP %d Error for test case: %s\n", resp.StatusCode, testCase.Name) + fmt.Printf(" Query: %s\n", testCase.Query) + fmt.Printf(" Expected category: %s\n", testCase.ExpectedCategory) + fmt.Printf(" Response Headers:\n%s", formatResponseHeaders(resp.Header)) + fmt.Printf(" Response Body: %s\n", string(bodyBytes)) + } + + return result + } + + // Extract routing headers + result.ActualCategory = resp.Header.Get("x-vsr-selected-category") + + // Parse matched keywords from header (assuming comma-separated) + keywordsHeader := resp.Header.Get("x-vsr-matched-keywords") + if keywordsHeader != "" { + result.ActualKeywords = strings.Split(keywordsHeader, ",") + // Trim whitespace from each keyword + for i, kw := range result.ActualKeywords { + result.ActualKeywords[i] = strings.TrimSpace(kw) + } + } else { + result.ActualKeywords = []string{} + } + + // Check if category is correct + result.Correct = (result.ActualCategory == testCase.ExpectedCategory) + + // Check if matched keywords are correct + // For empty expected keywords, also expect empty actual keywords + if len(testCase.MatchedKeywords) == 0 && len(result.ActualKeywords) == 0 { + result.KeywordsMatch = true + } else { + // Compare keyword lists (order-independent) + result.KeywordsMatch = keywordListsMatch(testCase.MatchedKeywords, result.ActualKeywords) + } + + if verbose && (!result.Correct || !result.KeywordsMatch) { + fmt.Printf("[Test] Test case failed: %s\n", testCase.Name) + if !result.Correct { + fmt.Printf(" Category mismatch: query='%s', expected=%s, actual=%s\n", + testCase.Query, testCase.ExpectedCategory, result.ActualCategory) + } + if !result.KeywordsMatch { + fmt.Printf(" Keywords mismatch: expected=%v, actual=%v\n", + testCase.MatchedKeywords, result.ActualKeywords) + } + } + + return result +} + +// keywordListsMatch checks if two keyword lists match (order-independent) +func keywordListsMatch(expected, actual []string) bool { + if len(expected) != len(actual) { + return false + } + + // Create maps for order-independent comparison + expectedMap := make(map[string]bool) + for _, kw := range expected { + expectedMap[kw] = true + } + + actualMap := make(map[string]bool) + for _, kw := range actual { + actualMap[kw] = true + } + + return reflect.DeepEqual(expectedMap, actualMap) +} + +func printKeywordRoutingResults(results []KeywordRoutingResult, totalTests, correctTests, keywordsCorrect int, accuracy, keywordAccuracy float64) { + separator := "================================================================================" + fmt.Println("\n" + separator) + fmt.Println("KEYWORD ROUTING TEST RESULTS") + fmt.Println(separator) + fmt.Printf("Total Tests: %d\n", totalTests) + fmt.Printf("Correct Categories: %d (%.2f%%)\n", correctTests, accuracy) + fmt.Printf("Correct Keyword Matches: %d (%.2f%%)\n", keywordsCorrect, keywordAccuracy) + fmt.Println(separator) + + // Print failed category matches + categoryFailures := 0 + for _, result := range results { + if !result.Correct && result.Error == "" { + categoryFailures++ + } + } + + if categoryFailures > 0 { + fmt.Println("\nFailed Category Matches:") + for _, result := range results { + if !result.Correct && result.Error == "" { + fmt.Printf(" - Test: %s\n", result.Name) + fmt.Printf(" Query: %s\n", result.Query) + fmt.Printf(" Expected Category: %s\n", result.ExpectedCategory) + fmt.Printf(" Actual Category: %s\n", result.ActualCategory) + } + } + } + + // Print failed keyword matches + keywordFailures := 0 + for _, result := range results { + if !result.KeywordsMatch && result.Error == "" { + keywordFailures++ + } + } + + if keywordFailures > 0 { + fmt.Println("\nFailed Keyword Matches:") + for _, result := range results { + if !result.KeywordsMatch && result.Error == "" { + fmt.Printf(" - Test: %s\n", result.Name) + fmt.Printf(" Query: %s\n", result.Query) + fmt.Printf(" Expected Keywords: %v\n", result.ExpectedKeywords) + fmt.Printf(" Actual Keywords: %v\n", result.ActualKeywords) + } + } + } + + // Print errors + errorCount := 0 + for _, result := range results { + if result.Error != "" { + errorCount++ + } + } + + if errorCount > 0 { + fmt.Println("\nErrors:") + for _, result := range results { + if result.Error != "" { + fmt.Printf(" - Test: %s\n", result.Name) + fmt.Printf(" Query: %s\n", result.Query) + fmt.Printf(" Error: %s\n", result.Error) + } + } + } + + fmt.Println(separator + "\n") +} diff --git a/e2e-tests/testcases/testdata/keyword_routing_cases.json b/e2e/testcases/testdata/keyword_routing_cases.json similarity index 83% rename from e2e-tests/testcases/testdata/keyword_routing_cases.json rename to e2e/testcases/testdata/keyword_routing_cases.json index c28a4a8cb..01a99d6b9 100644 --- a/e2e-tests/testcases/testdata/keyword_routing_cases.json +++ b/e2e/testcases/testdata/keyword_routing_cases.json @@ -206,5 +206,45 @@ "expected_category": "urgent_request", "expected_confidence": 1.0, "matched_keywords": ["urgent"] + }, + { + "name": "NOR operator - clean text passes", + "description": "Test NOR operator with text containing no spam keywords", + "query": "I need help with my account", + "expected_category": "exclude_spam", + "expected_confidence": 1.0, + "matched_keywords": [] + }, + { + "name": "NOR operator - buy now fails", + "description": "Test NOR operator rejects text with 'buy now' spam keyword", + "query": "Click here to buy now and save money", + "expected_category": "", + "expected_confidence": 0.0, + "matched_keywords": [] + }, + { + "name": "NOR operator - free money fails", + "description": "Test NOR operator rejects text with 'free money' spam keyword", + "query": "Get free money with this amazing offer", + "expected_category": "", + "expected_confidence": 0.0, + "matched_keywords": [] + }, + { + "name": "NOR operator - multiple spam keywords fails", + "description": "Test NOR operator rejects text with multiple spam keywords", + "query": "Buy now and get free money today", + "expected_category": "", + "expected_confidence": 0.0, + "matched_keywords": [] + }, + { + "name": "NOR operator - partial word no match", + "description": "Test NOR operator allows text with partial keyword match (buying vs buy)", + "query": "I am buying a new product", + "expected_category": "exclude_spam", + "expected_confidence": 1.0, + "matched_keywords": [] } ]