From 4e3f91ec75906d03349262b5a1a5d14f9248d972 Mon Sep 17 00:00:00 2001 From: Yehudit Kerido Date: Tue, 18 Nov 2025 09:25:03 +0200 Subject: [PATCH 1/2] feat: Add aibrix profile for E2E testing framework Signed-off-by: Yehudit Kerido --- .github/workflows/integration-test-k8s.yml | 21 +- e2e/README.md | 2 +- e2e/cmd/e2e/main.go | 4 + e2e/profiles/aibrix/profile.go | 474 +++++++++++++++++++++ tools/make/e2e.mk | 1 + 5 files changed, 493 insertions(+), 9 deletions(-) create mode 100644 e2e/profiles/aibrix/profile.go diff --git a/.github/workflows/integration-test-k8s.yml b/.github/workflows/integration-test-k8s.yml index 45c4ee59e..aa1d14109 100644 --- a/.github/workflows/integration-test-k8s.yml +++ b/.github/workflows/integration-test-k8s.yml @@ -13,6 +13,10 @@ jobs: integration-test: runs-on: ubuntu-latest timeout-minutes: 60 + strategy: + fail-fast: false # Continue testing other profiles even if one fails + matrix: + profile: [ai-gateway, aibrix] steps: - name: Check out the repo @@ -61,11 +65,11 @@ jobs: run: | make build-e2e - - name: Run Integration E2E tests + - name: Run Integration E2E tests (${{ matrix.profile }}) id: e2e-test run: | set +e # Don't exit on error, we want to capture the result - make e2e-test E2E_PROFILE=ai-gateway E2E_VERBOSE=true E2E_KEEP_CLUSTER=false + make e2e-test E2E_PROFILE=${{ matrix.profile }} E2E_VERBOSE=true E2E_KEEP_CLUSTER=false TEST_EXIT_CODE=$? echo "test_exit_code=${TEST_EXIT_CODE}" >> $GITHUB_OUTPUT exit ${TEST_EXIT_CODE} @@ -74,7 +78,7 @@ jobs: if: always() uses: actions/upload-artifact@v4 with: - name: test-reports + name: test-reports-${{ matrix.profile }} path: | test-report.json test-report.md @@ -123,25 +127,26 @@ jobs: fi # Add additional context - cat >> $GITHUB_STEP_SUMMARY << 'EOF' + cat >> $GITHUB_STEP_SUMMARY << EOF --- ### 📚 Additional Resources + - **Profile:** \`${{ matrix.profile }}\` - **Trigger:** ${{ github.event_name }} - - **Branch:** `${{ github.ref_name }}` - - **Commit:** `${{ github.sha }}` + - **Branch:** \`${{ github.ref_name }}\` + - **Commit:** \`${{ github.sha }}\` - **Workflow Run:** [${{ github.run_id }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) - [E2E Test Framework Documentation](https://github.com/${{ github.repository }}/tree/main/e2e) - - [AI Gateway Profile](https://github.com/${{ github.repository }}/tree/main/e2e/profiles/ai-gateway) + - [${{ matrix.profile }} Profile](https://github.com/${{ github.repository }}/tree/main/e2e/profiles/${{ matrix.profile }}) ### 📦 Artifacts - **test-report.json** - Detailed test results in JSON format - **test-report.md** - Human-readable test report - **semantic-router-logs.txt** - Complete semantic-router pod logs - - All artifacts are retained for 30 days + - All artifacts are retained for 30 days as \`test-reports-${{ matrix.profile }}\` EOF else echo "⚠️ Test report file not found!" >> $GITHUB_STEP_SUMMARY diff --git a/e2e/README.md b/e2e/README.md index d71fb043a..777169dc3 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -13,11 +13,11 @@ The framework follows a **separation of concerns** design: ### Supported Profiles - **ai-gateway**: Tests Semantic Router with Envoy AI Gateway integration +- **aibrix**: Tests Semantic Router with vLLM AIBrix integration - **istio**: Tests Semantic Router with Istio Gateway (future) - **production-stack**: Tests vLLM Production Stack configurations (future) - **llm-d**: Tests with LLM-D (future) - **dynamo**: Tests with Nvidia Dynamo (future) -- **aibrix**: Tests with vLLM AIBrix (future) ## Directory Structure diff --git a/e2e/cmd/e2e/main.go b/e2e/cmd/e2e/main.go index a7f8b53eb..54ff691f0 100644 --- a/e2e/cmd/e2e/main.go +++ b/e2e/cmd/e2e/main.go @@ -10,10 +10,12 @@ import ( "github.com/vllm-project/semantic-router/e2e/pkg/banner" "github.com/vllm-project/semantic-router/e2e/pkg/framework" aigateway "github.com/vllm-project/semantic-router/e2e/profiles/ai-gateway" + aibrix "github.com/vllm-project/semantic-router/e2e/profiles/aibrix" dynamicconfig "github.com/vllm-project/semantic-router/e2e/profiles/dynamic-config" // Import profiles to register test cases _ "github.com/vllm-project/semantic-router/e2e/profiles/ai-gateway" + _ "github.com/vllm-project/semantic-router/e2e/profiles/aibrix" ) const version = "v1.0.0" @@ -99,6 +101,8 @@ func getProfile(name string) (framework.Profile, error) { return aigateway.NewProfile(), nil case "dynamic-config": return dynamicconfig.NewProfile(), nil + case "aibrix": + return aibrix.NewProfile(), nil // Add more profiles here as they are implemented // case "istio": // return istio.NewProfile(), nil diff --git a/e2e/profiles/aibrix/profile.go b/e2e/profiles/aibrix/profile.go new file mode 100644 index 000000000..a8910569d --- /dev/null +++ b/e2e/profiles/aibrix/profile.go @@ -0,0 +1,474 @@ +package aibrix + +import ( + "context" + "fmt" + "os" + "os/exec" + "time" + + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" + + "github.com/vllm-project/semantic-router/e2e/pkg/framework" + "github.com/vllm-project/semantic-router/e2e/pkg/helm" + "github.com/vllm-project/semantic-router/e2e/pkg/helpers" + + // Import testcases package to register all test cases via their init() functions + _ "github.com/vllm-project/semantic-router/e2e/testcases" +) + +const ( + // Version Configuration + // AIBrix version - can be overridden via AIBRIX_VERSION environment variable + defaultAIBrixVersion = "v0.4.1" + + // Kubernetes Namespaces - used frequently throughout + namespaceSemanticRouter = "vllm-semantic-router-system" + namespaceEnvoyGateway = "envoy-gateway-system" + namespaceAIBrix = "aibrix-system" + + // Deployment Names - used in multiple verification steps + deploymentSemanticRouter = "semantic-router" + deploymentEnvoyGateway = "envoy-gateway" + deploymentAIBrixGatewayPlugins = "aibrix-gateway-plugins" + deploymentAIBrixMetadataService = "aibrix-metadata-service" + deploymentAIBrixControllerManager = "aibrix-controller-manager" + deploymentDemoLLM = "vllm-llama3-8b-instruct" + + // Label Selectors - complex values + labelSelectorAIBrixGateway = "gateway.envoyproxy.io/owning-gateway-namespace=aibrix-system,gateway.envoyproxy.io/owning-gateway-name=aibrix-eg" + + // Timeouts - configuration values for tuning + timeoutSemanticRouterDeploy = 20 * time.Minute + timeoutComponentDeploy = 2 * time.Minute // For Envoy Gateway, AIBrix plugins/metadata, Demo LLM + timeoutWebhookDeploy = 5 * time.Minute // For webhook-enabled components (controller-manager) + timeoutEnvoyServiceReady = 10 * time.Minute + timeoutStabilization = 60 * time.Second // Increased for CI environments + retryInterval = 5 * time.Second +) + +// Profile implements the AIBrix test profile +type Profile struct { + verbose bool + aibrixVersion string +} + +// NewProfile creates a new AIBrix profile +func NewProfile() *Profile { + // Allow version override via environment variable + version := os.Getenv("AIBRIX_VERSION") + if version == "" { + version = defaultAIBrixVersion + } + return &Profile{ + aibrixVersion: version, + } +} + +// Name returns the profile name +func (p *Profile) Name() string { + return "aibrix" +} + +// Description returns the profile description +func (p *Profile) Description() string { + return "Tests Semantic Router with vLLM AIBrix integration" +} + +// Setup deploys all required components for AIBrix testing +func (p *Profile) Setup(ctx context.Context, opts *framework.SetupOptions) error { + p.verbose = opts.Verbose + p.log("Setting up AIBrix test environment") + + deployer := helm.NewDeployer(opts.KubeConfig, opts.Verbose) + + // Track what we've deployed for cleanup on error + var ( + semanticRouterDeployed bool + aibrixDepsDeployed bool + aibrixCoreDeployed bool + gatewayResourcesDeployed bool + ) + + // Ensure cleanup on error + defer func() { + if r := recover(); r != nil { + p.log("Panic during setup, cleaning up...") + p.cleanupPartialDeployment(ctx, opts, semanticRouterDeployed, aibrixDepsDeployed, aibrixCoreDeployed, gatewayResourcesDeployed) + panic(r) // Re-panic after cleanup + } + }() + + // Step 1: Deploy Semantic Router + p.log("Step 1/5: Deploying Semantic Router") + if err := p.deploySemanticRouter(ctx, deployer, opts); err != nil { + return fmt.Errorf("failed to deploy semantic router: %w", err) + } + semanticRouterDeployed = true + + // Step 2: Deploy AIBrix Dependencies + p.log("Step 2/5: Deploying AIBrix Dependencies") + if err := p.deployAIBrixDependencies(ctx, opts); err != nil { + p.cleanupPartialDeployment(ctx, opts, semanticRouterDeployed, false, false, false) + return fmt.Errorf("failed to deploy AIBrix dependencies: %w", err) + } + aibrixDepsDeployed = true + + // Step 3: Deploy AIBrix Core + p.log("Step 3/5: Deploying AIBrix Core") + if err := p.deployAIBrixCore(ctx, opts); err != nil { + p.cleanupPartialDeployment(ctx, opts, semanticRouterDeployed, aibrixDepsDeployed, false, false) + return fmt.Errorf("failed to deploy AIBrix core: %w", err) + } + aibrixCoreDeployed = true + + // Step 4: Deploy Demo LLM and Gateway API Resources + p.log("Step 4/5: Deploying Demo LLM and Gateway API Resources") + if err := p.deployGatewayResources(ctx, opts); err != nil { + p.cleanupPartialDeployment(ctx, opts, semanticRouterDeployed, aibrixDepsDeployed, aibrixCoreDeployed, false) + return fmt.Errorf("failed to deploy gateway resources: %w", err) + } + gatewayResourcesDeployed = true + + // Step 5: Verify all components are ready + p.log("Step 5/5: Verifying all components are ready") + if err := p.verifyEnvironment(ctx, opts); err != nil { + p.log("ERROR: Environment verification failed: %v", err) + p.cleanupPartialDeployment(ctx, opts, semanticRouterDeployed, aibrixDepsDeployed, aibrixCoreDeployed, gatewayResourcesDeployed) + return fmt.Errorf("failed to verify environment: %w", err) + } + + p.log("AIBrix test environment setup complete") + return nil +} + +// Teardown cleans up all deployed resources +func (p *Profile) Teardown(ctx context.Context, opts *framework.TeardownOptions) error { + p.verbose = opts.Verbose + p.log("Tearing down AIBrix test environment") + + deployer := helm.NewDeployer(opts.KubeConfig, opts.Verbose) + + // Clean up in reverse order + p.log("Cleaning up Gateway API resources") + p.cleanupGatewayResources(ctx, opts) + + p.log("Cleaning up AIBrix components") + p.cleanupAIBrix(ctx, opts) + + p.log("Uninstalling Semantic Router") + deployer.Uninstall(ctx, deploymentSemanticRouter, namespaceSemanticRouter) + + p.log("AIBrix test environment teardown complete") + return nil +} + +// GetTestCases returns the list of test cases for this profile +func (p *Profile) GetTestCases() []string { + return []string{ + "chat-completions-request", + "chat-completions-stress-request", + "domain-classify", + "semantic-cache", + "pii-detection", + "jailbreak-detection", + "chat-completions-progressive-stress", + } +} + +// GetServiceConfig returns the service configuration for accessing the deployed service +func (p *Profile) GetServiceConfig() framework.ServiceConfig { + return framework.ServiceConfig{ + LabelSelector: labelSelectorAIBrixGateway, + Namespace: namespaceEnvoyGateway, + PortMapping: "8080:80", + } +} + +func (p *Profile) deploySemanticRouter(ctx context.Context, deployer *helm.Deployer, opts *framework.SetupOptions) error { + // Use local Helm chart instead of remote OCI registry + installOpts := helm.InstallOptions{ + ReleaseName: deploymentSemanticRouter, + Chart: "deploy/helm/semantic-router", + Namespace: namespaceSemanticRouter, + ValuesFiles: []string{"deploy/kubernetes/aibrix/semantic-router-values/values.yaml"}, + Set: map[string]string{ + "image.repository": "ghcr.io/vllm-project/semantic-router/extproc", + "image.tag": opts.ImageTag, + "image.pullPolicy": "Never", // Use local image, don't pull from registry + }, + Wait: true, + Timeout: "20m", // Increased timeout for model downloads + } + + if err := deployer.Install(ctx, installOpts); err != nil { + return err + } + + return deployer.WaitForDeployment(ctx, namespaceSemanticRouter, deploymentSemanticRouter, timeoutSemanticRouterDeploy) +} + +func (p *Profile) deployAIBrixDependencies(ctx context.Context, opts *framework.SetupOptions) error { + // Apply AIBrix dependency components from GitHub release + dependencyURL := fmt.Sprintf("https://github.com/vllm-project/aibrix/releases/download/%s/aibrix-dependency-%s.yaml", + p.aibrixVersion, p.aibrixVersion) + + p.log("Deploying AIBrix dependencies (version: %s)", p.aibrixVersion) + if err := p.kubectlApply(ctx, opts.KubeConfig, dependencyURL); err != nil { + return fmt.Errorf("failed to apply AIBrix dependencies: %w", err) + } + + // Wait for Envoy Gateway to be ready + return p.waitForDeployment(ctx, opts, namespaceEnvoyGateway, deploymentEnvoyGateway, timeoutComponentDeploy) +} + +func (p *Profile) deployAIBrixCore(ctx context.Context, opts *framework.SetupOptions) error { + // Apply AIBrix core components from GitHub release + coreURL := fmt.Sprintf("https://github.com/vllm-project/aibrix/releases/download/%s/aibrix-core-%s.yaml", + p.aibrixVersion, p.aibrixVersion) + + p.log("Deploying AIBrix core (version: %s)", p.aibrixVersion) + if err := p.kubectlApply(ctx, opts.KubeConfig, coreURL); err != nil { + return fmt.Errorf("failed to apply AIBrix core: %w", err) + } + + // Patch aibrix-gateway-plugins to reduce resource requests for CI environments + // The default requests (2 CPU, 8Gi memory) are too high for GitHub Actions runners + p.log("Patching aibrix-gateway-plugins resource requests for CI compatibility...") + patchCmd := exec.CommandContext(ctx, "kubectl", "--kubeconfig", opts.KubeConfig, + "patch", "deployment", deploymentAIBrixGatewayPlugins, + "-n", namespaceAIBrix, + "--type", "json", + "-p", `[ + {"op": "replace", "path": "/spec/template/spec/containers/0/resources/requests/cpu", "value": "500m"}, + {"op": "replace", "path": "/spec/template/spec/containers/0/resources/requests/memory", "value": "1Gi"}, + {"op": "replace", "path": "/spec/template/spec/containers/0/resources/limits/cpu", "value": "1"}, + {"op": "replace", "path": "/spec/template/spec/containers/0/resources/limits/memory", "value": "2Gi"} + ]`) + if p.verbose { + patchCmd.Stdout = os.Stdout + patchCmd.Stderr = os.Stderr + } + if err := patchCmd.Run(); err != nil { + p.log("Warning: Failed to patch resource requests (proceeding anyway): %v", err) + } + + // Wait for AIBrix core components to be ready + deployments := []struct { + namespace string + name string + timeout time.Duration + }{ + {namespaceAIBrix, deploymentAIBrixGatewayPlugins, timeoutComponentDeploy}, + {namespaceAIBrix, deploymentAIBrixMetadataService, timeoutComponentDeploy}, + {namespaceAIBrix, deploymentAIBrixControllerManager, timeoutWebhookDeploy}, // Longer timeout for webhook setup + } + + for _, dep := range deployments { + p.log("Waiting for %s/%s to be ready (timeout: %v)...", dep.namespace, dep.name, dep.timeout) + if err := p.waitForDeployment(ctx, opts, dep.namespace, dep.name, dep.timeout); err != nil { + return fmt.Errorf("deployment %s/%s not ready: %w", dep.namespace, dep.name, err) + } + } + + return nil +} + +func (p *Profile) deployGatewayResources(ctx context.Context, opts *framework.SetupOptions) error { + // Apply base model (Demo LLM) + if err := p.kubectlApply(ctx, opts.KubeConfig, "deploy/kubernetes/aibrix/aigw-resources/base-model.yaml"); err != nil { + return fmt.Errorf("failed to apply base model: %w", err) + } + + // Wait for Demo LLM deployment + if err := p.waitForDeployment(ctx, opts, "default", deploymentDemoLLM, timeoutComponentDeploy); err != nil { + return fmt.Errorf("demo LLM deployment not ready: %w", err) + } + + // Apply gateway API resources + if err := p.kubectlApply(ctx, opts.KubeConfig, "deploy/kubernetes/aibrix/aigw-resources/gwapi-resources.yaml"); err != nil { + return fmt.Errorf("failed to apply gateway API resources: %w", err) + } + + return nil +} + +func (p *Profile) verifyEnvironment(ctx context.Context, opts *framework.SetupOptions) error { + // Create Kubernetes client + config, err := clientcmd.BuildConfigFromFlags("", opts.KubeConfig) + if err != nil { + return fmt.Errorf("failed to build kubeconfig: %w", err) + } + + client, err := kubernetes.NewForConfig(config) + if err != nil { + return fmt.Errorf("failed to create kube client: %w", err) + } + + // Give deployments extra time to stabilize after initial readiness + p.log("Waiting for deployments to stabilize...") + time.Sleep(timeoutStabilization) + + // Wait for Envoy Gateway service to be ready with retry + startTime := time.Now() + + p.log("Waiting for Envoy Gateway service to be ready...") + + var envoyService string + for { + // Try to get Envoy service name + envoyService, err = helpers.GetEnvoyServiceName(ctx, client, labelSelectorAIBrixGateway, p.verbose) + if err == nil { + // Verify that the service has exactly 1 pod running with all containers ready + podErr := helpers.VerifyServicePodsRunning(ctx, client, namespaceEnvoyGateway, envoyService, p.verbose) + if podErr == nil { + p.log("Envoy Gateway service is ready: %s", envoyService) + break + } + if p.verbose { + p.log("Envoy service found but pods not ready: %v", podErr) + } + err = fmt.Errorf("service pods not ready: %w", podErr) + } + + if time.Since(startTime) >= timeoutEnvoyServiceReady { + return fmt.Errorf("failed to get Envoy service with running pods after %v: %w", timeoutEnvoyServiceReady, err) + } + + if p.verbose { + p.log("Envoy service not ready, retrying in %v... (elapsed: %v)", + retryInterval, time.Since(startTime).Round(time.Second)) + } + + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(retryInterval): + // Continue retry + } + } + + // Check all deployments are healthy + p.log("Verifying all deployments are healthy...") + + // Check semantic-router deployment + p.log("Checking semantic-router deployment...") + if err := helpers.CheckDeployment(ctx, client, namespaceSemanticRouter, deploymentSemanticRouter, p.verbose); err != nil { + return fmt.Errorf("semantic-router deployment not healthy: %w", err) + } + + // Check AIBrix deployments + aibrixDeployments := []struct { + namespace string + name string + }{ + {namespaceAIBrix, deploymentAIBrixGatewayPlugins}, + {namespaceAIBrix, deploymentAIBrixMetadataService}, + {namespaceAIBrix, deploymentAIBrixControllerManager}, + } + + for _, dep := range aibrixDeployments { + p.log("Checking %s deployment...", dep.name) + if err := helpers.CheckDeployment(ctx, client, dep.namespace, dep.name, p.verbose); err != nil { + return fmt.Errorf("%s deployment not healthy: %w", dep.name, err) + } + } + + // Check envoy-gateway deployment + p.log("Checking envoy-gateway deployment...") + if err := helpers.CheckDeployment(ctx, client, namespaceEnvoyGateway, deploymentEnvoyGateway, p.verbose); err != nil { + return fmt.Errorf("envoy-gateway deployment not healthy: %w", err) + } + + // Check demo LLM deployment + p.log("Checking demo LLM deployment...") + if err := helpers.CheckDeployment(ctx, client, "default", deploymentDemoLLM, p.verbose); err != nil { + return fmt.Errorf("demo LLM deployment not healthy: %w", err) + } + + p.log("All deployments are healthy") + + return nil +} + +func (p *Profile) cleanupGatewayResources(ctx context.Context, opts *framework.TeardownOptions) error { + // Delete in reverse order + p.kubectlDelete(ctx, opts.KubeConfig, "deploy/kubernetes/aibrix/aigw-resources/gwapi-resources.yaml") + p.kubectlDelete(ctx, opts.KubeConfig, "deploy/kubernetes/aibrix/aigw-resources/base-model.yaml") + return nil +} + +func (p *Profile) cleanupPartialDeployment(ctx context.Context, opts *framework.SetupOptions, semanticRouter, aibrixDeps, aibrixCore, gatewayResources bool) { + p.log("Cleaning up partial deployment (semanticRouter=%v, aibrixDeps=%v, aibrixCore=%v, gatewayResources=%v)", + semanticRouter, aibrixDeps, aibrixCore, gatewayResources) + + // Create TeardownOptions from SetupOptions + teardownOpts := &framework.TeardownOptions{ + KubeClient: opts.KubeClient, + KubeConfig: opts.KubeConfig, + ClusterName: opts.ClusterName, + Verbose: opts.Verbose, + } + + // Clean up in reverse order + if gatewayResources { + p.log("Cleaning up Gateway API resources...") + p.cleanupGatewayResources(ctx, teardownOpts) + } + + if aibrixCore || aibrixDeps { + p.log("Cleaning up AIBrix components...") + p.cleanupAIBrix(ctx, teardownOpts) + } + + if semanticRouter { + p.log("Uninstalling Semantic Router...") + deployer := helm.NewDeployer(opts.KubeConfig, opts.Verbose) + deployer.Uninstall(ctx, deploymentSemanticRouter, namespaceSemanticRouter) + } + + p.log("Partial deployment cleanup complete") +} + +func (p *Profile) cleanupAIBrix(ctx context.Context, opts *framework.TeardownOptions) error { + // Delete AIBrix core and dependencies + coreURL := fmt.Sprintf("https://github.com/vllm-project/aibrix/releases/download/%s/aibrix-core-%s.yaml", + p.aibrixVersion, p.aibrixVersion) + dependencyURL := fmt.Sprintf("https://github.com/vllm-project/aibrix/releases/download/%s/aibrix-dependency-%s.yaml", + p.aibrixVersion, p.aibrixVersion) + + p.kubectlDelete(ctx, opts.KubeConfig, coreURL) + p.kubectlDelete(ctx, opts.KubeConfig, dependencyURL) + + return nil +} + +func (p *Profile) waitForDeployment(ctx context.Context, opts *framework.SetupOptions, namespace, name string, timeout time.Duration) error { + deployer := helm.NewDeployer(opts.KubeConfig, opts.Verbose) + return deployer.WaitForDeployment(ctx, namespace, name, timeout) +} + +func (p *Profile) kubectlApply(ctx context.Context, kubeConfig, manifest string) error { + return p.runKubectl(ctx, kubeConfig, "apply", "--server-side", "-f", manifest) +} + +func (p *Profile) kubectlDelete(ctx context.Context, kubeConfig, manifest string) error { + return p.runKubectl(ctx, kubeConfig, "delete", "-f", manifest) +} + +func (p *Profile) runKubectl(ctx context.Context, kubeConfig string, args ...string) error { + args = append(args, "--kubeconfig", kubeConfig) + cmd := exec.CommandContext(ctx, "kubectl", args...) + if p.verbose { + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + } + return cmd.Run() +} + +func (p *Profile) log(format string, args ...interface{}) { + if p.verbose { + fmt.Printf("[AIBrix] "+format+"\n", args...) + } +} diff --git a/tools/make/e2e.mk b/tools/make/e2e.mk index 2beae9771..cd8981b89 100644 --- a/tools/make/e2e.mk +++ b/tools/make/e2e.mk @@ -95,6 +95,7 @@ e2e-help: ## Show help for E2E testing @echo "" @echo "Available Profiles:" @echo " ai-gateway - Test Semantic Router with Envoy AI Gateway" + @echo " aibrix - Test Semantic Router with vLLM AIBrix" @echo " istio - Test Semantic Router with Istio (coming soon)" @echo "" @echo "Environment Variables:" From a4a5e9382cb211d7e8fe719725a815a1a8978911 Mon Sep 17 00:00:00 2001 From: yehudit1987 <34643974+yehudit1987@users.noreply.github.com> Date: Tue, 18 Nov 2025 16:45:11 +0200 Subject: [PATCH 2/2] Update e2e/profiles/aibrix/profile.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: yehudit1987 <34643974+yehudit1987@users.noreply.github.com> --- .../aibrix/semantic-router-values/values.yaml | 410 ++++++++++++++---- e2e/profiles/aibrix/profile.go | 2 +- 2 files changed, 338 insertions(+), 74 deletions(-) diff --git a/deploy/kubernetes/aibrix/semantic-router-values/values.yaml b/deploy/kubernetes/aibrix/semantic-router-values/values.yaml index e853beb8b..5b0a242c9 100644 --- a/deploy/kubernetes/aibrix/semantic-router-values/values.yaml +++ b/deploy/kubernetes/aibrix/semantic-router-values/values.yaml @@ -19,107 +19,371 @@ config: - name: "general-expert" description: "General-purpose adapter for diverse topics" - # Categories with LoRA routing - # Each category uses the vllm-llama3-8b-instruct model with a specific LoRA adapter + # Categories - now only contain metadata for domain classification categories: - name: business - system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations." - # jailbreak_enabled: true # Optional: Override global jailbreak detection per category - # jailbreak_threshold: 0.8 # Optional: Override global jailbreak threshold per category - model_scores: - - model: vllm-llama3-8b-instruct # Base model name (for endpoint selection and PII policy) - score: 0.7 - use_reasoning: false # Business performs better without reasoning - name: law - system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters." - model_scores: + - name: psychology + - name: biology + - name: chemistry + - name: history + - name: other + - name: health + - name: economics + - name: math + - name: physics + - name: computer science + - name: philosophy + - name: engineering + - name: thinking + + # Decisions - define routing logic with rules, model selection, and plugins + decisions: + - name: business_decision + description: "Business and management related queries" + priority: 10 + rules: + operator: "OR" + conditions: + - type: "domain" + name: "business" + modelRefs: - model: vllm-llama3-8b-instruct - score: 0.4 use_reasoning: false - - name: psychology - system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice." - semantic_cache_enabled: true - semantic_cache_similarity_threshold: 0.92 # High threshold for psychology - sensitive to nuances - model_scores: + plugins: + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + - type: "system_prompt" + configuration: + enabled: true + system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations." + mode: "replace" + + - name: law_decision + description: "Legal questions and law-related topics" + priority: 10 + rules: + operator: "OR" + conditions: + - type: "domain" + name: "law" + modelRefs: - model: vllm-llama3-8b-instruct - score: 0.6 use_reasoning: false - - name: biology - system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems." - model_scores: + plugins: + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + - type: "system_prompt" + configuration: + enabled: true + system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters." + mode: "replace" + + - name: psychology_decision + description: "Psychology and mental health topics" + priority: 10 + rules: + operator: "OR" + conditions: + - type: "domain" + name: "psychology" + modelRefs: - model: vllm-llama3-8b-instruct - score: 0.9 use_reasoning: false - - name: chemistry - system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations." - model_scores: + plugins: + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + - type: "semantic-cache" + configuration: + enabled: true + similarity_threshold: 0.92 + - type: "system_prompt" + configuration: + enabled: true + system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice." + mode: "replace" + + - name: biology_decision + description: "Biology and life sciences questions" + priority: 10 + rules: + operator: "OR" + conditions: + - type: "domain" + name: "biology" + modelRefs: - model: vllm-llama3-8b-instruct - score: 0.6 - use_reasoning: true # Enable reasoning for complex chemistry - - name: history - system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis." - model_scores: + use_reasoning: false + plugins: + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + - type: "system_prompt" + configuration: + enabled: true + system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems." + mode: "replace" + + - name: chemistry_decision + description: "Chemistry and chemical sciences questions" + priority: 10 + rules: + operator: "OR" + conditions: + - type: "domain" + name: "chemistry" + modelRefs: + - model: vllm-llama3-8b-instruct + use_reasoning: true + plugins: + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + - type: "system_prompt" + configuration: + enabled: true + system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations." + mode: "replace" + + - name: history_decision + description: "Historical questions and cultural topics" + priority: 10 + rules: + operator: "OR" + conditions: + - type: "domain" + name: "history" + modelRefs: - model: vllm-llama3-8b-instruct - score: 0.7 use_reasoning: false - - name: other - system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics." - semantic_cache_enabled: true - semantic_cache_similarity_threshold: 0.75 # Lower threshold for general chat - less sensitive - model_scores: + plugins: + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + - type: "system_prompt" + configuration: + enabled: true + system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis." + mode: "replace" + + - name: other_decision + description: "General knowledge and miscellaneous topics" + priority: 5 + rules: + operator: "OR" + conditions: + - type: "domain" + name: "other" + modelRefs: - model: vllm-llama3-8b-instruct - score: 0.7 use_reasoning: false - - name: health - system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies." - semantic_cache_enabled: true - semantic_cache_similarity_threshold: 0.95 # High threshold for health - very sensitive to word changes - model_scores: + plugins: + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + - type: "semantic-cache" + configuration: + enabled: true + similarity_threshold: 0.75 + - type: "system_prompt" + configuration: + enabled: true + system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics." + mode: "replace" + + - name: health_decision + description: "Health and medical information queries" + priority: 10 + rules: + operator: "OR" + conditions: + - type: "domain" + name: "health" + modelRefs: - model: vllm-llama3-8b-instruct - score: 0.5 use_reasoning: false - - name: economics - system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses." - model_scores: + plugins: + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + - type: "semantic-cache" + configuration: + enabled: true + similarity_threshold: 0.95 + - type: "system_prompt" + configuration: + enabled: true + system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies." + mode: "replace" + + - name: economics_decision + description: "Economics and financial topics" + priority: 10 + rules: + operator: "OR" + conditions: + - type: "domain" + name: "economics" + modelRefs: - model: vllm-llama3-8b-instruct - score: 1.0 use_reasoning: false - - name: math - system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way." - model_scores: + plugins: + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + - type: "system_prompt" + configuration: + enabled: true + system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses." + mode: "replace" + + - name: math_decision + description: "Mathematics and quantitative reasoning" + priority: 10 + rules: + operator: "OR" + conditions: + - type: "domain" + name: "math" + modelRefs: - model: vllm-llama3-8b-instruct - score: 1.0 - use_reasoning: true # Enable reasoning for complex math - - name: physics - system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate." - model_scores: + use_reasoning: true + plugins: + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + - type: "system_prompt" + configuration: + enabled: true + system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way." + mode: "replace" + + - name: physics_decision + description: "Physics and physical sciences" + priority: 10 + rules: + operator: "OR" + conditions: + - type: "domain" + name: "physics" + modelRefs: - model: vllm-llama3-8b-instruct - score: 0.7 - use_reasoning: true # Enable reasoning for physics - - name: computer science - system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful." - model_scores: + use_reasoning: true + plugins: + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + - type: "system_prompt" + configuration: + enabled: true + system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate." + mode: "replace" + + - name: computer_science_decision + description: "Computer science and programming" + priority: 10 + rules: + operator: "OR" + conditions: + - type: "domain" + name: "computer science" + modelRefs: - model: vllm-llama3-8b-instruct - score: 0.6 use_reasoning: false - - name: philosophy - system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates." - model_scores: + plugins: + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + - type: "system_prompt" + configuration: + enabled: true + system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful." + mode: "replace" + + - name: philosophy_decision + description: "Philosophy and ethical questions" + priority: 10 + rules: + operator: "OR" + conditions: + - type: "domain" + name: "philosophy" + modelRefs: - model: vllm-llama3-8b-instruct - score: 0.5 use_reasoning: false - - name: engineering - system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards." - model_scores: + plugins: + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + - type: "system_prompt" + configuration: + enabled: true + system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates." + mode: "replace" + + - name: engineering_decision + description: "Engineering and technical problem-solving" + priority: 10 + rules: + operator: "OR" + conditions: + - type: "domain" + name: "engineering" + modelRefs: - model: vllm-llama3-8b-instruct - score: 0.7 use_reasoning: false - - name: thinking - system_prompt: "You are a thinking expert, should think multiple steps before answering. Please answer the question step by step." - model_scores: + plugins: + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + - type: "system_prompt" + configuration: + enabled: true + system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards." + mode: "replace" + + - name: thinking_decision + description: "Complex reasoning and multi-step thinking" + priority: 15 + rules: + operator: "OR" + conditions: + - type: "domain" + name: "thinking" + modelRefs: - model: vllm-llama3-8b-instruct - score: 0.7 use_reasoning: true + plugins: + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + - type: "system_prompt" + configuration: + enabled: true + system_prompt: "You are a thinking expert, should think multiple steps before answering. Please answer the question step by step." + mode: "replace" + + # Strategy for selecting between multiple matching decisions + # Options: "priority" (use decision with highest priority) or "confidence" (use decision with highest confidence) + strategy: "priority" default_model: vllm-llama3-8b-instruct diff --git a/e2e/profiles/aibrix/profile.go b/e2e/profiles/aibrix/profile.go index a8910569d..e25480197 100644 --- a/e2e/profiles/aibrix/profile.go +++ b/e2e/profiles/aibrix/profile.go @@ -458,7 +458,7 @@ func (p *Profile) kubectlDelete(ctx context.Context, kubeConfig, manifest string } func (p *Profile) runKubectl(ctx context.Context, kubeConfig string, args ...string) error { - args = append(args, "--kubeconfig", kubeConfig) + args = append([]string{"--kubeconfig", kubeConfig}, args...) cmd := exec.CommandContext(ctx, "kubectl", args...) if p.verbose { cmd.Stdout = os.Stdout