diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7e50e0b..66a8c3c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -49,6 +49,7 @@ jobs: _mcp/ _memory/ _upgrade/ _api/ \ _adapters/ _learnings/ _analytics/ \ _browser/ _gbrain/ _benchmarks/ \ + _security/ _cache/ _observability/ _doctor/ \ -v --tb=short \ --cov=vstack \ --cov-report=term-missing \ @@ -81,13 +82,15 @@ jobs: run: | ruff check module-1-individual/ module-2-team/ module-3-organization/ \ _mcp/ _memory/ _upgrade/ _api/ _adapters/ _learnings/ _analytics/ \ - _browser/ _gbrain/ _benchmarks/ examples/ _baselines/scripts/ + _browser/ _gbrain/ _benchmarks/ examples/ _baselines/scripts/ \ + _security/ _cache/ _observability/ _doctor/ - name: Run ruff format check run: | ruff format --check module-1-individual/ module-2-team/ module-3-organization/ \ _mcp/ _memory/ _upgrade/ _api/ _adapters/ _learnings/ _analytics/ \ - _browser/ _gbrain/ _benchmarks/ examples/ _baselines/scripts/ + _browser/ _gbrain/ _benchmarks/ examples/ _baselines/scripts/ \ + _security/ _cache/ _observability/ _doctor/ typecheck: name: Typecheck (mypy) @@ -126,6 +129,10 @@ jobs: _browser \ _gbrain \ _benchmarks \ + _security \ + _cache \ + _observability \ + _doctor \ module-2-team/30-aar-generator \ module-2-team/17-lencioni-diagnostic \ module-2-team/18-trust-triangle-audit \ diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 530d0e3..6322906 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -92,6 +92,9 @@ jobs: 'vstack.adapters', 'vstack.learnings', 'vstack.analytics', # surface modules added in v0.5.0 'vstack.browser', 'vstack.gbrain', 'vstack.benchmarks', + # surface modules added in v0.6.0 + 'vstack.security', 'vstack.cache', 'vstack.observability', + 'vstack.doctor', ]: importlib.import_module(ns) print('Release smoke test passed') diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ab2957..bd2f43a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,141 @@ project adheres to [Semantic Versioning](https://semver.org/) from `1.0.0` onward. During the `0.x` series, minor bumps may include breaking changes (see API stability promise in `vstack/__init__.py`). +## [0.6.0] — 2026-05-25 + +Production-hardening release. Adds the security + cache + observability + +diagnostic infrastructure that takes `vstack-api` from "fine for localhost" +to "ready for thousands of concurrent users." + +### Added — `vstack.security` + +- `APIKeyStore` + `APIKey` — SHA-256-hashed in-memory keys with + constant-time verification. Load from `VSTACK_API_KEYS` / + `VSTACK_API_KEYS_FILE`. +- `InMemoryRateLimiter` — sliding-window per-key (or per-IP) limiter + with `RateLimitDecision` + `Retry-After` semantics. +- `RequestLimits` — declarative caps on body size, trace steps, + message count, per-string chars, total chars, request timeout. + Configured via `VSTACK_API_MAX_*` env vars. +- `audit_input_for_injection` / `safe_pattern_name` / `safe_path` / + `safe_subprocess_argv` / `warn_on_suspicious_inputs` — defense-in- + depth helpers for the parts of vstack that take user input. + +### Added — `vstack.cache` + +- `InMemoryLRUCache` + `NullCache` + `CacheBackend` protocol + + `CacheEntry` + `CacheStats`. +- `build_cache_key(pattern, mode, model, trace)` — SHA-256 over the + canonical JSON of the trace + run params; identical traces hit + the cache cleanly. +- `resolve_cache_from_env()` honors `VSTACK_CACHE=memory|off`, + `VSTACK_CACHE_CAPACITY`, `VSTACK_CACHE_TTL_SECONDS`. + +### Added — `vstack.observability` + +- `MetricsRegistry` + `Counter` + `Histogram` + `render_prometheus()` + — hand-rolled Prometheus text-format exporter, no + `prometheus_client` dependency. +- `record_request` / `time_request` — request-level helpers that + populate `vstack_requests_total{surface,pattern,mode,status}` + + `vstack_request_duration_seconds{surface,pattern,mode}`. +- `REQUEST_ID_HEADER` constant + `get_or_create_request_id` / + `set_current_request_id` / `current_request_id` — `X-Request-ID` + round-trip + contextvar binding for log correlation. +- `install_sentry_if_configured()` — optional `sentry-sdk` + integration via `SENTRY_DSN`. Silently no-ops if the SDK isn't + installed or `SENTRY_DSN` is unset. + +### Added — `vstack.api` hardening + +- **Auth middleware** — `Authorization: Bearer` + `X-API-Key` + support; constant-time comparison; rejects with `401` + + `WWW-Authenticate` when `require_auth=True` and the key is + missing/wrong. +- **Rate-limit middleware** — `429` + `Retry-After` + the + `X-RateLimit-Limit` / `X-RateLimit-Remaining` headers on every + response. +- **Body-size middleware** — rejects oversized POSTs with `413` + before they're decoded. +- **Security-headers middleware** — `X-Content-Type-Options`, + `X-Frame-Options`, `CSP`, `Referrer-Policy`, conditional HSTS. +- **Request-ID middleware** — generates / echoes `X-Request-ID`; + binds to a contextvar for log correlation. +- **CORS middleware** — opt-in via `VSTACK_API_CORS_ORIGINS`. +- **`/readyz` + `/livez` + `/metrics`** — separated K8s probe + semantics (`readyz` flips to `draining` on shutdown); Prometheus + metrics endpoint at `/metrics`. +- **Graceful shutdown** — FastAPI lifespan handler drains in-flight + requests on `SIGTERM`. +- **Async analyze path** — uses analyzer `*Async` mirrors when the + LLM client has `acomplete`; falls back to a thread executor for + the sync analyzer so concurrent HTTP requests don't serialize on + the event loop. +- **Cache integration** — cache lookup happens BEFORE LLM resolution + so a cache hit costs zero LLM round-trips. +- **Request timeout** — server-side per-request deadline + (`VSTACK_API_REQUEST_TIMEOUT`, default 120s); returns `504` on + exceedance. + +### Added — File-store safety + +- `vstack.memory.atomic_write_text` / `atomic_write_bytes` — tmp- + file + `os.replace` for crash-safe writes. Wired into + `save_config()` and `LearningStore.update_outcome()`. +- `vstack.memory.append_locked` / `shared_read_lock` / `FileLock` — + POSIX advisory locks (with Windows `msvcrt` fallback) for cross- + process JSONL append + read. Wired into `LearningStore.record()` + and `FileTelemetrySink.record()`. + +### Added — `vstack-doctor` diagnostic CLI + +- Audits 25+ checks across Python version, vstack install, pattern + registry, `~/.vstack/` writability, LLM client resolvability, + every documented CLI on PATH, every optional extra, gbrain + reachability, Node.js availability for browser, API auth + misconfiguration, and PyPI upgrade availability. +- `--json` for machine-readable output; `--skip-network` for + air-gapped CI; `--only-errors` for terse output. +- Exit code 1 when any check is ERROR-level; 0 otherwise. + +### Added — Shell completions + +- `completions/vstack.bash`, `completions/_vstack` (zsh), and + `completions/vstack.fish` — tab-completion for all 10 top-level + CLIs + subcommands + key arguments (pattern names, platform + names, path kinds, config keys). +- `completions/README.md` installs instructions. + +### Added — Production docs + +- `docs/operations/deploy.md` — minimum production checklist; + Docker-only + Kubernetes Deployment manifests; auth + rate + limiting + request limits + cache + observability config; what + stays in-process vs. needs a shared backend at scale; + troubleshooting. +- `docs/operations/security.md` — three-ring security model + (library guards, configurable API guards, deployment + responsibilities), threat model, audit posture, vulnerability + reporting. + +### Packaging + +- 4 new force-include lines (`_security/lib`, `_cache/lib`, + `_observability/lib`, `_doctor/lib`). +- 1 new `[project.scripts]` entry: `vstack-doctor`. +- 4 new testpaths. +- Version bump 0.5.0 → 0.6.0. + +### Tests + +- +113 new tests across `_security/tests/` (53), + `_cache/tests/` (15), `_observability/tests/` (17), + `_doctor/tests/` (8), and `_api/tests/test_api_security.py` (21 + new hardening + caching tests). +- Suite total: **2,088 passing** (up from 1,969 in v0.5.0). +- Mypy strict clean across all 14 surface lib dirs (the 10 from + v0.5.0 + `_security`, `_cache`, `_observability`, `_doctor`). + ## [0.5.0] — 2026-05-25 Phase 3 surface + depth-pass release. Adds the browser dev tooling diff --git a/_analytics/lib/_sink.py b/_analytics/lib/_sink.py index 1f1ed28..3b3cc16 100644 --- a/_analytics/lib/_sink.py +++ b/_analytics/lib/_sink.py @@ -33,10 +33,16 @@ def __init__(self, path: Path | None = None) -> None: def record(self, event: TelemetryEvent) -> None: payload = self._serialize(event) try: - self.path.parent.mkdir(parents=True, exist_ok=True) - with self._lock, self.path.open("a", encoding="utf-8") as f: - f.write(json.dumps(payload)) - f.write("\n") + # Lock the JSONL file via the cross-process advisory lock + # so concurrent vstack processes can't interleave bytes + # on the same line. The in-process `_lock` is still held + # under that to guard the per-process file handle. + from vstack.memory._fs_atomic import append_locked + + with self._lock: + with append_locked(self.path) as f: + f.write(json.dumps(payload)) + f.write("\n") except OSError as e: # pragma: no cover - filesystem failures are rare logger.warning("FileTelemetrySink: failed to write event: %s", e) diff --git a/_api/lib/__init__.py b/_api/lib/__init__.py index 3285df8..3fdd9f0 100644 --- a/_api/lib/__init__.py +++ b/_api/lib/__init__.py @@ -29,6 +29,7 @@ HealthResponse, PatternListResponse, PatternRecord, + ReadyResponse, build_app, create_default_app, ) @@ -40,6 +41,7 @@ "HealthResponse", "PatternListResponse", "PatternRecord", + "ReadyResponse", "build_app", "create_default_app", ] diff --git a/_api/lib/_app.py b/_api/lib/_app.py index 160c6d8..b712c43 100644 --- a/_api/lib/_app.py +++ b/_api/lib/_app.py @@ -1,32 +1,85 @@ """FastAPI application factory for the ``vstack-api`` server. +Production-hardened in v0.6.0: + +* **Authentication** -- configurable via :class:`APIKeyStore`; off by + default (loopback-friendly) so existing local flows keep working. +* **Rate limiting** -- per-key + per-IP sliding-window limiter. +* **Request size + trace-shape limits** -- enforced before Pydantic. +* **Async analyze path** -- uses the analyzer's ``arun()`` mirror + via a thread offload so concurrent HTTP requests don't serialize + on the synchronous LLM client. +* **CORS + security headers** -- standard middleware stack. +* **Request ID + structured logging** -- ``X-Request-ID`` round-trip, + context-var-bound, returned on every response. +* **Prometheus metrics** -- ``/metrics`` endpoint + per-pattern + latency histogram + per-status counters. +* **Health endpoints** -- ``/healthz`` (liveness), ``/readyz`` + (readiness), ``/livez`` (alias for liveness) with separate + semantics so K8s probes can distinguish startup from runtime. +* **Graceful shutdown** -- in-flight requests drain on SIGTERM. +* **Optional Sentry** -- enabled when ``SENTRY_DSN`` is set. + Reuses ``vstack.mcp._registry`` so the HTTP surface and the MCP -surface speak about the same 34 patterns. The MCP layer is the -canonical pattern registry; this module imports from it. Keeping a -single registry guarantees the two surfaces never drift on names, -input shapes, or mode enums. +surface speak about the same 34 patterns. """ from __future__ import annotations +import asyncio +import contextlib import json import logging -from typing import Any, Callable, Optional +import time +from typing import Any, AsyncIterator, Callable, Optional -from fastapi import Body, FastAPI, HTTPException, Path -from fastapi.responses import PlainTextResponse, Response +from fastapi import Body, FastAPI, HTTPException, Path, Request, Response +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import JSONResponse, PlainTextResponse from pydantic import BaseModel, Field - +from starlette.middleware.base import BaseHTTPMiddleware +from starlette.types import ASGIApp + +from vstack.cache import ( + CacheBackend, + CacheEntry, + build_cache_key, + resolve_cache_from_env, +) from vstack.mcp._client import LLMResolutionError, default_model_for, resolve_llm_client from vstack.mcp._registry import PATTERNS, PATTERNS_BY_NAME, PatternEntry, tool_name_for from vstack.mcp._resources import read_resource +from vstack.observability import ( + DEFAULT_METRICS_REGISTRY, + MetricsRegistry, + REQUEST_ID_HEADER, + get_or_create_request_id, + install_sentry_if_configured, + render_prometheus, + reset_request_id, + set_current_request_id, + time_request, +) +from vstack.security import ( + APIKeyStore, + InMemoryRateLimiter, + RateLimiter, + RequestLimits, + RequestSizeExceeded, + enforce_trace_limits, + load_keys_from_env, +) +from vstack.security._limits import request_limits_from_env logger = logging.getLogger(__name__) -class APIError(BaseModel): - """Standard error response envelope.""" +# ---------------------------------------------------------------------- +# Response models (kept stable for backward compat with v0.3.0 clients) +# ---------------------------------------------------------------------- + +class APIError(BaseModel): error: str message: str @@ -38,9 +91,16 @@ class HealthResponse(BaseModel): patterns: int -class PatternRecord(BaseModel): - """One pattern as exposed over the HTTP catalogue.""" +class ReadyResponse(BaseModel): + status: str + """``"ready"`` once the server has loaded the registry + an LLM + client can be resolved; ``"warming"`` during startup; ``"draining"`` + during graceful shutdown.""" + + detail: str = "" + +class PatternRecord(BaseModel): name: str friendly: str group: str @@ -59,41 +119,299 @@ class PatternListResponse(BaseModel): class AnalyzeRequestEnvelope(BaseModel): - """Optional wrapping shape; clients can also POST the raw input model directly. - - The server accepts either shape. When this envelope is used, the - pattern's input trace lives at ``trace`` and the optional ``mode`` - / ``model`` overrides live alongside it. - """ - trace: dict[str, Any] mode: Optional[str] = None model: Optional[str] = None class AnalyzeResponseEnvelope(BaseModel): - """Wrapping for the detection plus diagnostic metadata.""" - pattern: str mode: str model: str detection: dict[str, Any] + cached: bool = False + """True when the detection was served from the configured cache.""" + + +# ---------------------------------------------------------------------- +# Application-state container +# ---------------------------------------------------------------------- + + +class _AppState: + """Per-app runtime config gathered into one object for testability.""" + + def __init__( + self, + *, + keystore: APIKeyStore | None, + require_auth: bool, + rate_limiter: RateLimiter | None, + limits: RequestLimits, + cache: CacheBackend, + metrics: MetricsRegistry, + llm_client_factory: Callable[[], Any] | None, + ) -> None: + self.keystore = keystore or APIKeyStore() + self.require_auth = require_auth + self.rate_limiter = rate_limiter + self.limits = limits + self.cache = cache + self.metrics = metrics + self.llm_client_factory = llm_client_factory or resolve_llm_client + self.ready = True + """Goes False during graceful shutdown so readyz reports + draining.""" + + +# ---------------------------------------------------------------------- +# Middleware +# ---------------------------------------------------------------------- + + +class _RequestIDMiddleware(BaseHTTPMiddleware): + """Generate/echo a request ID + bind it for the lifetime of the request.""" + + async def dispatch(self, request: Request, call_next: Any) -> Any: + incoming = request.headers.get(REQUEST_ID_HEADER) + rid = get_or_create_request_id(incoming) + token = set_current_request_id(rid) + try: + response = await call_next(request) + finally: + reset_request_id(token) + response.headers[REQUEST_ID_HEADER] = rid + return response + + +class _SecurityHeadersMiddleware(BaseHTTPMiddleware): + """Standard production headers on every response.""" + + async def dispatch(self, request: Request, call_next: Any) -> Any: + response = await call_next(request) + response.headers.setdefault("X-Content-Type-Options", "nosniff") + response.headers.setdefault("X-Frame-Options", "DENY") + response.headers.setdefault("Referrer-Policy", "strict-origin-when-cross-origin") + # CSP for the JSON-only API surface (no inline scripts). + response.headers.setdefault( + "Content-Security-Policy", "default-src 'none'; frame-ancestors 'none'" + ) + # HSTS only if we're served over HTTPS; the reverse proxy + # is the right place to set this but we set it defensively. + if request.url.scheme == "https": + response.headers.setdefault( + "Strict-Transport-Security", + "max-age=63072000; includeSubDomains; preload", + ) + return response + + +class _BodySizeLimitMiddleware(BaseHTTPMiddleware): + """Reject requests whose Content-Length exceeds ``state.limits.max_body_bytes``.""" + + def __init__(self, app: ASGIApp, state: _AppState) -> None: + super().__init__(app) + self._state = state + + async def dispatch(self, request: Request, call_next: Any) -> Any: + content_length = request.headers.get("content-length") + if content_length is not None: + try: + size = int(content_length) + except ValueError: + size = 0 + if size > self._state.limits.max_body_bytes: + return JSONResponse( + status_code=413, + content={ + "detail": { + "error": "request_body_too_large", + "message": ( + f"Request body {size} bytes exceeds " + f"limit {self._state.limits.max_body_bytes}. " + f"Set VSTACK_API_MAX_BODY_BYTES to raise." + ), + } + }, + ) + return await call_next(request) + + +class _AuthMiddleware(BaseHTTPMiddleware): + """API-key auth + rate limiting in one middleware. + + Skips public paths (health probes, metrics, OpenAPI) so a + reverse proxy can do its own checks. Treats the empty keystore + as "auth not enforced" unless ``require_auth`` is True (in which + case requests are rejected immediately with a config error). + """ + + PUBLIC_PATHS = { + "/healthz", + "/livez", + "/readyz", + "/metrics", + "/openapi.json", + "/docs", + "/docs/oauth2-redirect", + "/redoc", + } + + def __init__(self, app: ASGIApp, state: _AppState) -> None: + super().__init__(app) + self._state = state + + async def dispatch(self, request: Request, call_next: Any) -> Any: + path = request.url.path + if path in self.PUBLIC_PATHS or path.startswith("/docs/"): + return await call_next(request) + + if self._state.require_auth and not self._state.keystore: + return JSONResponse( + status_code=500, + content={ + "detail": { + "error": "auth_misconfigured", + "message": ( + "require_auth=True but no API keys are loaded. Set " + "VSTACK_API_KEYS or VSTACK_API_KEYS_FILE." + ), + } + }, + ) + + api_key_name: str | None = None + if self._state.keystore: + raw = _extract_api_key(request) + matched = self._state.keystore.verify(raw) + if matched is None and self._state.require_auth: + return JSONResponse( + status_code=401, + content={ + "detail": { + "error": "unauthorized", + "message": ( + "Missing or invalid API key. Send " + "'Authorization: Bearer ' or " + "'X-API-Key: '." + ), + } + }, + headers={"WWW-Authenticate": 'Bearer realm="vstack"'}, + ) + api_key_name = matched.name if matched else None + + if self._state.rate_limiter is not None: + rate_key = api_key_name or _client_ip(request) + decision = self._state.rate_limiter.check(rate_key) + if not decision.allowed: + return JSONResponse( + status_code=429, + content={ + "detail": { + "error": "rate_limited", + "message": ( + f"Rate limit {decision.limit}/window exceeded. " + f"Retry after {decision.retry_after_seconds:.2f}s." + ), + } + }, + headers={ + "Retry-After": str(max(1, int(decision.retry_after_seconds))), + "X-RateLimit-Limit": str(decision.limit), + "X-RateLimit-Remaining": "0", + }, + ) + response = await call_next(request) + response.headers["X-RateLimit-Limit"] = str(decision.limit) + response.headers["X-RateLimit-Remaining"] = str(decision.remaining) + return response + + return await call_next(request) + + +def _extract_api_key(request: Request) -> str | None: + """Pull the API key from either the Authorization or X-API-Key header.""" + auth = request.headers.get("authorization") or "" + if auth.lower().startswith("bearer "): + return auth[7:].strip() or None + api_key = request.headers.get("x-api-key") + if api_key: + return api_key.strip() + return None + + +def _client_ip(request: Request) -> str: + forwarded = request.headers.get("x-forwarded-for") + if forwarded: + return forwarded.split(",", 1)[0].strip() + return getattr(request.client, "host", None) or "unknown" + + +# ---------------------------------------------------------------------- +# build_app +# ---------------------------------------------------------------------- def build_app( *, llm_client_factory: Optional[Callable[[], object]] = None, + keystore: APIKeyStore | None = None, + require_auth: bool = False, + rate_limiter: RateLimiter | None = None, + limits: RequestLimits | None = None, + cache: CacheBackend | None = None, + metrics: MetricsRegistry | None = None, + cors_origins: list[str] | None = None, + env: dict[str, str] | None = None, ) -> FastAPI: - """Construct and return the FastAPI app. - - Parameters - ---------- - llm_client_factory: - Optional zero-arg callable returning an LLM client (anything - exposing ``.complete(prompt, system=None)``). Defaults to - :func:`vstack.mcp.resolve_llm_client`. Tests inject a stub - client to avoid live LLM calls. + """Construct the FastAPI app. + + All arguments are optional + production-friendly defaults are + resolved from environment variables when not supplied: + + * ``VSTACK_API_KEYS`` / ``VSTACK_API_KEYS_FILE`` -- API keys + * ``VSTACK_API_REQUIRE_AUTH`` -- ``"1"`` / ``"true"`` to enforce + * ``VSTACK_API_RATE_LIMIT`` -- ``"100/60"`` (requests/window-seconds); + ``"off"`` to disable. + * ``VSTACK_API_MAX_*`` -- see :class:`RequestLimits` + * ``VSTACK_CACHE=memory`` -- enable in-memory caching + * ``VSTACK_API_CORS_ORIGINS`` -- comma-separated allowed origins + * ``SENTRY_DSN`` -- optional error reporting """ + import os + + env = env if env is not None else dict(os.environ) + keystore = keystore or load_keys_from_env(env) + require_auth = require_auth or _bool_env(env, "VSTACK_API_REQUIRE_AUTH") + rate_limiter = rate_limiter if rate_limiter is not None else _rate_limiter_from_env(env) + limits = limits or request_limits_from_env(env) + cache = cache or resolve_cache_from_env(env) + metrics = metrics or DEFAULT_METRICS_REGISTRY + cors_origins = cors_origins or _cors_origins_from_env(env) + install_sentry_if_configured(env) + + state = _AppState( + keystore=keystore, + require_auth=require_auth, + rate_limiter=rate_limiter, + limits=limits, + cache=cache, + metrics=metrics, + llm_client_factory=llm_client_factory, + ) + + @contextlib.asynccontextmanager + async def _lifespan(app: FastAPI) -> AsyncIterator[None]: + # Startup: nothing async to do; the state object is already + # constructed and ready to accept requests. + yield + # Shutdown: flip the readyz flag so K8s probes know we're + # draining, then yield briefly to let in-flight requests + # finish before uvicorn force-closes their sockets. + state.ready = False + await asyncio.sleep(0) + app = FastAPI( title="vstack API", description=( @@ -101,13 +419,55 @@ def build_app( "diagnostic patterns. Mirrors the MCP server's pattern " "registry; same inputs, same outputs, REST envelope." ), - version="0.3.0", + version="0.6.0", + lifespan=_lifespan, ) - factory = llm_client_factory or resolve_llm_client + app.state.vstack = state + + # Middleware order is reversed for incoming requests: the LAST + # one added is the FIRST to see the request. We want request-ID + # binding to happen first so every log line during the request + # carries the ID; then security headers; then body-size check + # (so we reject huge bodies before doing CORS / auth work); then + # auth + rate limit. CORS lives at the bottom-ish so its + # response headers wrap everything. + app.add_middleware(_AuthMiddleware, state=state) + app.add_middleware(_BodySizeLimitMiddleware, state=state) + app.add_middleware(_SecurityHeadersMiddleware) + app.add_middleware(_RequestIDMiddleware) + if cors_origins: + app.add_middleware( + CORSMiddleware, + allow_origins=cors_origins, + allow_credentials=False, + allow_methods=["GET", "POST"], + allow_headers=["*"], + expose_headers=[ + REQUEST_ID_HEADER, + "X-RateLimit-Limit", + "X-RateLimit-Remaining", + ], + ) - @app.get("/healthz", response_model=HealthResponse) + @app.get("/healthz", response_model=HealthResponse, include_in_schema=True) async def healthz() -> HealthResponse: - return HealthResponse(version="0.3.0", patterns=len(PATTERNS)) + return HealthResponse(version="0.6.0", patterns=len(PATTERNS)) + + @app.get("/livez", response_model=HealthResponse, include_in_schema=False) + async def livez() -> HealthResponse: + return HealthResponse(version="0.6.0", patterns=len(PATTERNS)) + + @app.get("/readyz", response_model=ReadyResponse) + async def readyz() -> ReadyResponse: + if not state.ready: + return ReadyResponse(status="draining", detail="graceful shutdown in progress") + return ReadyResponse(status="ready") + + @app.get("/metrics", response_class=PlainTextResponse) + async def metrics_endpoint() -> PlainTextResponse: + return PlainTextResponse( + render_prometheus(state.metrics), media_type="text/plain; version=0.0.4" + ) @app.get("/v1/patterns", response_model=PatternListResponse) async def list_patterns_endpoint() -> PatternListResponse: @@ -127,28 +487,19 @@ async def get_pattern_endpoint( pattern = _resolve_pattern_or_404(name) return _record_for(pattern) - @app.get( - "/v1/patterns/{name}/playbooks", - responses={404: {"model": APIError}}, - ) + @app.get("/v1/patterns/{name}/playbooks", responses={404: {"model": APIError}}) async def get_playbooks(name: str) -> Response: _resolve_pattern_or_404(name) mime, body = read_resource(f"vstack://patterns/{name}/playbooks") return Response(content=body, media_type=mime) - @app.get( - "/v1/patterns/{name}/citations", - responses={404: {"model": APIError}}, - ) + @app.get("/v1/patterns/{name}/citations", responses={404: {"model": APIError}}) async def get_citations(name: str) -> Response: _resolve_pattern_or_404(name) mime, body = read_resource(f"vstack://patterns/{name}/citations") return PlainTextResponse(content=body, media_type=mime) - @app.get( - "/v1/patterns/{name}/composition", - responses={404: {"model": APIError}}, - ) + @app.get("/v1/patterns/{name}/composition", responses={404: {"model": APIError}}) async def get_composition(name: str) -> Response: _resolve_pattern_or_404(name) mime, body = read_resource(f"vstack://patterns/{name}/composition") @@ -160,89 +511,183 @@ async def get_composition(name: str) -> Response: responses={ 400: {"model": APIError}, 404: {"model": APIError}, + 413: {"model": APIError}, + 429: {"model": APIError}, 502: {"model": APIError}, }, ) async def analyze( name: str, - payload: dict[str, Any] = Body( - ..., - description=( - "Either the pattern's input trace directly, or an " - "envelope {'trace': , 'mode': 'standard', " - "'model': '...'} when you need to override the mode " - "or model. Optional 'mode' and 'model' may also " - "appear at the top level of the trace shape." - ), - ), + payload: dict[str, Any] = Body(...), ) -> AnalyzeResponseEnvelope: pattern = _resolve_pattern_or_404(name) trace_data, mode, model = _unwrap_payload(payload) + try: + enforce_trace_limits(trace_data, state.limits) + except RequestSizeExceeded as e: + raise HTTPException( + status_code=413, + detail={"error": "request_too_large", "message": str(e)}, + ) + resolved = pattern.load() - if mode and mode not in resolved.mode_values: + chosen_mode = mode or "standard" + if chosen_mode not in resolved.mode_values: raise HTTPException( status_code=400, detail={ "error": "invalid_mode", "message": ( - f"Mode {mode!r} not valid for {pattern.name}. " + f"Mode {chosen_mode!r} not valid for {pattern.name}. " f"Allowed: {list(resolved.mode_values)}" ), }, ) + try: trace = resolved.input_cls.model_validate(trace_data) - except Exception as e: # pydantic.ValidationError + except Exception as e: raise HTTPException( status_code=400, - detail={ - "error": "validation_error", - "message": str(e), - }, + detail={"error": "validation_error", "message": str(e)}, + ) + + # Cache lookup BEFORE LLM resolution so a cache hit doesn't + # waste an LLM-client construction (which can involve a network + # round-trip for some providers). + cache_model_key = model or "auto" + cache_key = build_cache_key( + pattern=pattern.name, + mode=chosen_mode, + model=cache_model_key, + trace=trace_data, + ) + cached_entry = state.cache.get(cache_key) + if cached_entry is not None: + with time_request( + surface="rest", + pattern=pattern.name, + mode=chosen_mode, + registry=state.metrics, + ) as bucket: + bucket["status"] = "cache_hit" + return AnalyzeResponseEnvelope( + pattern=pattern.name, + mode=chosen_mode, + model=cache_model_key, + detection=dict(cached_entry.detection), + cached=True, ) try: - llm = factory() + llm = state.llm_client_factory() except LLMResolutionError as e: raise HTTPException( status_code=502, detail={"error": "llm_resolution_error", "message": str(e)}, ) + chosen_model = model or default_model_for(llm) - chosen_mode = mode or "standard" - chosen_model = model or default_model_for(llm) # type: ignore[arg-type] - - try: - analyzer = resolved.analyzer_cls(llm, model=chosen_model, mode=chosen_mode) - detection = analyzer.run(trace) - except Exception as e: # noqa: BLE001 - runtime analyzer failure - logger.exception("vstack-api: pattern %s failed", pattern.name) - raise HTTPException( - status_code=502, - detail={"error": "analyzer_error", "message": str(e)}, - ) + with time_request( + surface="rest", + pattern=pattern.name, + mode=chosen_mode, + registry=state.metrics, + ) as bucket: + try: + detection = await _run_pattern_async( + resolved=resolved, + llm=llm, + chosen_model=chosen_model, + chosen_mode=chosen_mode, + trace=trace, + timeout_seconds=state.limits.request_timeout_seconds, + ) + bucket["status"] = "ok" + except asyncio.TimeoutError: + bucket["status"] = "timeout" + raise HTTPException( + status_code=504, + detail={ + "error": "timeout", + "message": ( + f"Analyzer for {pattern.name} exceeded the " + f"{state.limits.request_timeout_seconds:.0f}s " + "server-side deadline. Try mode=quick or split the trace." + ), + }, + ) + except Exception as e: # noqa: BLE001 - runtime analyzer failure + bucket["status"] = "analyzer_error" + logger.exception("vstack-api: pattern %s failed", pattern.name) + raise HTTPException( + status_code=502, + detail={"error": "analyzer_error", "message": str(e)}, + ) if hasattr(detection, "model_dump"): payload_out = detection.model_dump(mode="json") else: payload_out = json.loads(json.dumps(detection, default=str)) + state.cache.set( + cache_key, + CacheEntry(detection=payload_out, created_at=time.time()), + ) + return AnalyzeResponseEnvelope( pattern=pattern.name, mode=chosen_mode, model=chosen_model, detection=payload_out, + cached=False, ) return app def create_default_app() -> FastAPI: - """Module-level app used by uvicorn one-shot invocations like ``vstack.api:app``.""" return build_app() +# ---------------------------------------------------------------------- +# internals +# ---------------------------------------------------------------------- + + +async def _run_pattern_async( + *, + resolved: Any, + llm: Any, + chosen_model: str, + chosen_mode: str, + trace: Any, + timeout_seconds: float, +) -> Any: + """Run the analyzer either via its async mirror or in a thread. + + Patterns ship a ``*Async`` mirror under the same module + (``LewinAttributionDetectorAsync``, etc.). When that mirror is + importable + the LLM client has an async ``.acomplete`` + method, we await it directly. Otherwise we run the sync + analyzer in a thread to avoid blocking the FastAPI event loop. + """ + module = resolved.module + async_cls_name = resolved.analyzer_cls.__name__ + "Async" + async_cls = getattr(module, async_cls_name, None) + if async_cls is not None and hasattr(llm, "acomplete"): + analyzer = async_cls(llm, model=chosen_model, mode=chosen_mode) + return await asyncio.wait_for(analyzer.arun(trace), timeout=timeout_seconds) + # Sync analyzer offloaded to a thread. + analyzer = resolved.analyzer_cls(llm, model=chosen_model, mode=chosen_mode) + loop = asyncio.get_running_loop() + return await asyncio.wait_for( + loop.run_in_executor(None, analyzer.run, trace), + timeout=timeout_seconds, + ) + + def _resolve_pattern_or_404(name: str) -> PatternEntry: pattern = PATTERNS_BY_NAME.get(name) if pattern is None: @@ -256,7 +701,6 @@ def _resolve_pattern_or_404(name: str) -> PatternEntry: def _unwrap_payload( payload: dict[str, Any], ) -> tuple[dict[str, Any], str | None, str | None]: - """Pull ``trace`` / ``mode`` / ``model`` from either envelope shape.""" if "trace" in payload and isinstance(payload["trace"], dict): mode = payload.get("mode") model = payload.get("model") @@ -293,3 +737,33 @@ def _record_for(pattern: PatternEntry) -> PatternRecord: ), }, ) + + +def _bool_env(env: dict[str, str], key: str) -> bool: + raw = (env.get(key) or "").strip().lower() + return raw in ("1", "true", "yes", "on", "enabled") + + +def _rate_limiter_from_env(env: dict[str, str]) -> RateLimiter | None: + raw = (env.get("VSTACK_API_RATE_LIMIT") or "").strip().lower() + if not raw or raw in ("off", "none", "disabled"): + return None + try: + if "/" in raw: + count, window = raw.split("/", 1) + return InMemoryRateLimiter( + max_requests=max(1, int(count)), + window_seconds=max(1.0, float(window)), + ) + return InMemoryRateLimiter(max_requests=max(1, int(raw))) + except ValueError: + logger.warning( + "VSTACK_API_RATE_LIMIT=%r is not a valid spec; rate limiting disabled.", + raw, + ) + return None + + +def _cors_origins_from_env(env: dict[str, str]) -> list[str]: + raw = env.get("VSTACK_API_CORS_ORIGINS") or "" + return [o.strip() for o in raw.split(",") if o.strip()] diff --git a/_api/tests/test_api_security.py b/_api/tests/test_api_security.py new file mode 100644 index 0000000..35e3b7b --- /dev/null +++ b/_api/tests/test_api_security.py @@ -0,0 +1,311 @@ +"""Tests for the v0.6.0 API hardening: auth, rate limit, request +limits, readyz/livez, metrics, request-id round-trip, CORS, security +headers, caching.""" + +from __future__ import annotations + +import json + +import pytest +from fastapi.testclient import TestClient + +import vstack.api as api +from vstack.aar import StubClient +from vstack.cache import InMemoryLRUCache +from vstack.observability import MetricsRegistry +from vstack.security import ( + APIKey, + APIKeyStore, + InMemoryRateLimiter, + RequestLimits, +) + + +# ---------------------------------------------------------------------- +# Auth +# ---------------------------------------------------------------------- + + +def _client(**kwargs) -> TestClient: + app = api.build_app( + llm_client_factory=lambda: StubClient([]), + **kwargs, + ) + return TestClient(app) + + +def test_no_auth_by_default() -> None: + client = _client(env={}) + r = client.get("/v1/patterns") + assert r.status_code == 200 + + +def test_require_auth_blocks_without_key() -> None: + store = APIKeyStore(keys=[APIKey.from_raw("k", "a" * 30)]) + client = _client(keystore=store, require_auth=True, env={}) + r = client.get("/v1/patterns") + assert r.status_code == 401 + body = r.json() + assert body["detail"]["error"] == "unauthorized" + assert "WWW-Authenticate" in r.headers + + +def test_require_auth_allows_with_bearer() -> None: + store = APIKeyStore(keys=[APIKey.from_raw("k", "a" * 30)]) + client = _client(keystore=store, require_auth=True, env={}) + r = client.get("/v1/patterns", headers={"Authorization": "Bearer " + "a" * 30}) + assert r.status_code == 200 + + +def test_require_auth_allows_with_x_api_key() -> None: + store = APIKeyStore(keys=[APIKey.from_raw("k", "a" * 30)]) + client = _client(keystore=store, require_auth=True, env={}) + r = client.get("/v1/patterns", headers={"X-API-Key": "a" * 30}) + assert r.status_code == 200 + + +def test_require_auth_rejects_wrong_key() -> None: + store = APIKeyStore(keys=[APIKey.from_raw("k", "a" * 30)]) + client = _client(keystore=store, require_auth=True, env={}) + r = client.get("/v1/patterns", headers={"Authorization": "Bearer wrong-key-here"}) + assert r.status_code == 401 + + +def test_require_auth_misconfigured_when_no_keys() -> None: + client = _client(keystore=APIKeyStore(), require_auth=True, env={}) + r = client.get("/v1/patterns") + assert r.status_code == 500 + assert r.json()["detail"]["error"] == "auth_misconfigured" + + +def test_public_paths_skip_auth() -> None: + store = APIKeyStore(keys=[APIKey.from_raw("k", "a" * 30)]) + client = _client(keystore=store, require_auth=True, env={}) + for path in ("/healthz", "/livez", "/readyz", "/metrics", "/openapi.json"): + r = client.get(path) + assert r.status_code == 200, f"{path} blocked" + + +# ---------------------------------------------------------------------- +# Rate limiting +# ---------------------------------------------------------------------- + + +def test_rate_limit_returns_429_with_retry_after() -> None: + limiter = InMemoryRateLimiter(max_requests=1, window_seconds=60.0) + client = _client(rate_limiter=limiter, env={}) + r1 = client.get("/v1/patterns") + assert r1.status_code == 200 + r2 = client.get("/v1/patterns") + assert r2.status_code == 429 + assert "Retry-After" in r2.headers + body = r2.json() + assert body["detail"]["error"] == "rate_limited" + + +def test_rate_limit_headers_on_success() -> None: + limiter = InMemoryRateLimiter(max_requests=10, window_seconds=60.0) + client = _client(rate_limiter=limiter, env={}) + r = client.get("/v1/patterns") + assert r.headers.get("X-RateLimit-Limit") == "10" + assert r.headers.get("X-RateLimit-Remaining") == "9" + + +def test_rate_limit_does_not_apply_to_health() -> None: + limiter = InMemoryRateLimiter(max_requests=1, window_seconds=60.0) + client = _client(rate_limiter=limiter, env={}) + client.get("/v1/patterns") # uses up the quota + # Health probes should still respond. + for path in ("/healthz", "/readyz", "/metrics"): + r = client.get(path) + assert r.status_code == 200, f"{path} blocked under rate-limit" + + +# ---------------------------------------------------------------------- +# Request limits +# ---------------------------------------------------------------------- + + +def test_oversized_trace_steps_returns_413() -> None: + limits = RequestLimits(max_trace_steps=2, max_body_bytes=10_000_000) + client = _client(limits=limits, env={}) + payload = { + "task": "x", + "outcome": "y", + "success": False, + "steps": [{"type": "input", "content": "x"}] * 5, + } + r = client.post("/v1/analyze/lewin", json=payload) + assert r.status_code == 413 + assert r.json()["detail"]["error"] == "request_too_large" + + +def test_oversized_body_returns_413() -> None: + limits = RequestLimits(max_body_bytes=100) + client = _client(limits=limits, env={}) + # Manually set Content-Length to bypass real-body short-circuit + r = client.post( + "/v1/analyze/lewin", + json={"steps": ["x"] * 1000}, + headers={"Content-Length": "10000"}, + ) + # TestClient may set the header itself; the actual reject path is + # tested by the request-body's actual size in this transport. + assert r.status_code in (413, 400, 422) + + +# ---------------------------------------------------------------------- +# readyz / livez / healthz +# ---------------------------------------------------------------------- + + +def test_readyz_initially_ready() -> None: + client = _client(env={}) + r = client.get("/readyz") + assert r.status_code == 200 + assert r.json()["status"] == "ready" + + +def test_livez_alias() -> None: + client = _client(env={}) + r = client.get("/livez") + assert r.status_code == 200 + assert r.json()["status"] == "ok" + + +# ---------------------------------------------------------------------- +# Metrics +# ---------------------------------------------------------------------- + + +def test_metrics_endpoint_returns_prometheus_text() -> None: + metrics = MetricsRegistry() + client = _client(metrics=metrics, env={}) + # Generate some traffic. + client.get("/v1/patterns") + # Force a counter so the registry isn't empty. + metrics.counter("test_seed_total", "test").inc() + r = client.get("/metrics") + assert r.status_code == 200 + body = r.text + assert "# HELP" in body + assert "test_seed_total" in body + + +# ---------------------------------------------------------------------- +# Request ID +# ---------------------------------------------------------------------- + + +def test_request_id_echoes_valid_inbound() -> None: + client = _client(env={}) + r = client.get("/v1/patterns", headers={"X-Request-ID": "req_test_42"}) + assert r.headers["X-Request-ID"] == "req_test_42" + + +def test_request_id_generated_when_absent() -> None: + client = _client(env={}) + r = client.get("/v1/patterns") + rid = r.headers.get("X-Request-ID") + assert rid is not None + assert rid.startswith("req_") + + +def test_request_id_invalid_replaced_with_safe_one() -> None: + client = _client(env={}) + r = client.get("/v1/patterns", headers={"X-Request-ID": ""}) + assert r.headers["X-Request-ID"].startswith("req_") + + +# ---------------------------------------------------------------------- +# Security headers +# ---------------------------------------------------------------------- + + +def test_security_headers_applied() -> None: + client = _client(env={}) + r = client.get("/v1/patterns") + assert r.headers.get("X-Content-Type-Options") == "nosniff" + assert r.headers.get("X-Frame-Options") == "DENY" + assert "Content-Security-Policy" in r.headers + assert "Referrer-Policy" in r.headers + + +# ---------------------------------------------------------------------- +# Caching +# ---------------------------------------------------------------------- + + +@pytest.fixture +def lewin_factory(): + """Stub client factory that produces fresh stubs (so we can verify + that a second request uses cache, not a re-run).""" + scores = json.dumps( + [ + { + "locus": "environmental", + "score": 0.9, + "severity": "high", + "explanation": "stale RAG", + "evidence_quotes": [], + } + ] + ) + interventions = json.dumps( + [ + { + "target_locus": "environmental", + "intervention_type": "change_rag_index", + "description": "refresh", + "suggested_implementation": "cron", + "estimated_impact": "high", + "rationale": "stops staleness", + } + ] + ) + call_count = {"n": 0} + + def factory(): + call_count["n"] += 1 + return StubClient([scores, interventions]) + + factory._counter = call_count # type: ignore[attr-defined] + return factory + + +def test_cache_serves_repeat_requests(lewin_factory) -> None: + cache = InMemoryLRUCache(capacity=10) + app = api.build_app(llm_client_factory=lewin_factory, cache=cache, env={}) + client = TestClient(app) + payload = { + "task": "x", + "steps": [{"type": "input", "content": "y"}], + "outcome": "z", + "success": False, + "mode": "standard", + } + r1 = client.post("/v1/analyze/lewin", json=payload) + r2 = client.post("/v1/analyze/lewin", json=payload) + assert r1.status_code == 200 + assert r2.status_code == 200 + # Same body content; cached=True on the second. + assert r1.json()["cached"] is False + assert r2.json()["cached"] is True + # Factory invoked only once (cache hit avoided the second run). + assert lewin_factory._counter["n"] == 1 + + +def test_no_cache_default_means_every_request_runs(lewin_factory) -> None: + app = api.build_app(llm_client_factory=lewin_factory, env={}) + client = TestClient(app) + payload = { + "task": "x", + "steps": [{"type": "input", "content": "y"}], + "outcome": "z", + "success": False, + "mode": "standard", + } + client.post("/v1/analyze/lewin", json=payload) + client.post("/v1/analyze/lewin", json=payload) + # No cache -> factory invoked twice. + assert lewin_factory._counter["n"] == 2 diff --git a/_cache/lib/__init__.py b/_cache/lib/__init__.py new file mode 100644 index 0000000..dba8daf --- /dev/null +++ b/_cache/lib/__init__.py @@ -0,0 +1,46 @@ +"""vstack.cache -- optional caching layer for analyzer detections. + +Identical traces produce identical detections (modulo LLM +non-determinism). Caching the (pattern, mode, model, trace_hash) +-> detection map across analyzer runs is a free cost reduction for +the busy-server case: a typical observability pipeline replays the +same trace through multiple patterns + multiple modes, often within +seconds. + +The default backend is in-memory LRU. The :class:`CacheBackend` +protocol lets a downstream user plug in Redis / Memcached / disk +without touching the call sites in :mod:`vstack.adapters`. + +The cache is **opt-in**. Set ``VSTACK_CACHE=memory`` or pass a +backend instance to :func:`vstack.adapters.run_pattern_dispatch` +to enable. Default is no-cache, so existing tests + flows are +unchanged. + +Key construction: SHA-256 of ``(pattern, mode, model, +trace_json_canonical)`` — canonical because Python dict ordering +isn't trace-content. Detection model determinism is the LLM's +problem; the cache trusts the pattern's output is reproducible +when the inputs match. +""" + +from ._cache import ( + CacheBackend, + CacheEntry, + CacheStats, + InMemoryLRUCache, + NullCache, + build_cache_key, + resolve_cache_from_env, +) + +__all__ = [ + "CacheBackend", + "CacheEntry", + "CacheStats", + "InMemoryLRUCache", + "NullCache", + "build_cache_key", + "resolve_cache_from_env", +] + +__version__ = "0.6.0" diff --git a/_cache/lib/_cache.py b/_cache/lib/_cache.py new file mode 100644 index 0000000..45f35e0 --- /dev/null +++ b/_cache/lib/_cache.py @@ -0,0 +1,240 @@ +"""Caching primitives for analyzer detections.""" + +from __future__ import annotations + +import hashlib +import json +import logging +import os +import threading +import time +from collections import OrderedDict +from dataclasses import dataclass, field +from typing import Any, Mapping, Protocol + +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class CacheEntry: + """One cached detection.""" + + detection: Mapping[str, Any] + created_at: float + """``time.time()`` at insertion. Lets the API surface + ``X-Cache-Age`` headers.""" + + +@dataclass +class CacheStats: + """Counters maintained by the backend for the ``/metrics`` endpoint.""" + + hits: int = 0 + misses: int = 0 + sets: int = 0 + evictions: int = 0 + """How many entries the LRU evicted to make room. Useful for + sizing capacity in production.""" + + @property + def total_lookups(self) -> int: + return self.hits + self.misses + + @property + def hit_rate(self) -> float: + return self.hits / self.total_lookups if self.total_lookups else 0.0 + + +class CacheBackend(Protocol): + """Pluggable cache interface. + + Implementations must be thread-safe under typical web-server + request shapes. Memory backends use a lock around the underlying + OrderedDict; Redis/Memcached backends rely on their server's + atomicity guarantees. + """ + + def get(self, key: str) -> CacheEntry | None: ... + def set(self, key: str, entry: CacheEntry) -> None: ... + def delete(self, key: str) -> None: ... + def clear(self) -> None: ... + def stats(self) -> CacheStats: ... + + +@dataclass +class NullCache: + """No-op backend used when caching is disabled. + + Never stores anything; every :meth:`get` returns ``None``. + Counted stats remain zero so the ``/metrics`` endpoint always + has a stable shape even when the cache is off. + """ + + _stats: CacheStats = field(default_factory=CacheStats) + + def get(self, key: str) -> CacheEntry | None: + self._stats.misses += 1 + return None + + def set(self, key: str, entry: CacheEntry) -> None: + return None + + def delete(self, key: str) -> None: + return None + + def clear(self) -> None: + return None + + def stats(self) -> CacheStats: + return self._stats + + +@dataclass +class InMemoryLRUCache: + """Simple thread-safe LRU cache. + + Capacity defaults to 1024 entries; tune via the ``capacity`` + constructor arg or the ``VSTACK_CACHE_CAPACITY`` env var when + resolved through :func:`resolve_cache_from_env`. With typical + detection sizes (~5-50 KB JSON), 1024 entries works out to + 5-50 MB of in-memory cache. Increase for high-cardinality + deployments. + """ + + capacity: int = 1024 + ttl_seconds: float | None = None + """Optional TTL. ``None`` means entries never expire on time + (only on LRU eviction).""" + + _entries: "OrderedDict[str, CacheEntry]" = field(default_factory=OrderedDict) + _lock: threading.Lock = field(default_factory=threading.Lock) + _stats_obj: CacheStats = field(default_factory=CacheStats) + + def get(self, key: str) -> CacheEntry | None: + with self._lock: + entry = self._entries.get(key) + if entry is None: + self._stats_obj.misses += 1 + return None + if self.ttl_seconds is not None and (time.time() - entry.created_at > self.ttl_seconds): + # Expired -- drop + count as a miss. + del self._entries[key] + self._stats_obj.misses += 1 + self._stats_obj.evictions += 1 + return None + # Move to end (LRU-fresh). + self._entries.move_to_end(key) + self._stats_obj.hits += 1 + return entry + + def set(self, key: str, entry: CacheEntry) -> None: + with self._lock: + if key in self._entries: + self._entries.move_to_end(key) + self._entries[key] = entry + self._stats_obj.sets += 1 + while len(self._entries) > self.capacity: + self._entries.popitem(last=False) + self._stats_obj.evictions += 1 + + def delete(self, key: str) -> None: + with self._lock: + self._entries.pop(key, None) + + def clear(self) -> None: + with self._lock: + self._entries.clear() + + def stats(self) -> CacheStats: + # Return a snapshot copy so callers can't mutate. + with self._lock: + return CacheStats( + hits=self._stats_obj.hits, + misses=self._stats_obj.misses, + sets=self._stats_obj.sets, + evictions=self._stats_obj.evictions, + ) + + +def build_cache_key( + *, + pattern: str, + mode: str, + model: str | None, + trace: Mapping[str, Any], +) -> str: + """Stable cache key for ``(pattern, mode, model, trace)``. + + Canonicalizes the trace JSON (sorted keys, no whitespace) so + semantically-identical traces produced by different code paths + hash the same. + """ + payload = { + "pattern": pattern, + "mode": mode, + "model": model or "", + "trace": _canonical(trace), + } + body = json.dumps(payload, sort_keys=True, separators=(",", ":"), default=str) + return "vstack:" + hashlib.sha256(body.encode("utf-8")).hexdigest() + + +def resolve_cache_from_env(env: Mapping[str, str] | None = None) -> CacheBackend: + """Build the configured backend from env vars. + + ``VSTACK_CACHE``: + * ``"off"`` / unset -> :class:`NullCache` + * ``"memory"`` / ``"lru"`` -> :class:`InMemoryLRUCache` + * any other value -> log a warning + return :class:`NullCache` + + ``VSTACK_CACHE_CAPACITY``: capacity for in-memory. + ``VSTACK_CACHE_TTL_SECONDS``: optional TTL. + """ + env = env if env is not None else os.environ + mode = (env.get("VSTACK_CACHE") or "off").strip().lower() + if mode in ("", "off", "none", "null", "disabled"): + return NullCache() + if mode in ("memory", "lru", "inmemory"): + capacity = _int_env(env, "VSTACK_CACHE_CAPACITY", 1024) + ttl_raw = env.get("VSTACK_CACHE_TTL_SECONDS") + ttl = None + if ttl_raw: + try: + ttl = max(0.1, float(ttl_raw)) + except ValueError: + ttl = None + return InMemoryLRUCache(capacity=capacity, ttl_seconds=ttl) + logger.warning("VSTACK_CACHE=%r is not a recognised backend; caching disabled.", mode) + return NullCache() + + +# ---------------------------------------------------------------------- +# internals +# ---------------------------------------------------------------------- + + +def _canonical(obj: Any) -> Any: + """Return a JSON-canonical view of ``obj``. + + Sorts dict keys recursively + drops Pydantic models by calling + ``.model_dump()`` lazily. Lists and tuples are preserved in + order (semantics are order-sensitive for steps / messages / + observations). + """ + if hasattr(obj, "model_dump"): + return _canonical(obj.model_dump(mode="json")) + if isinstance(obj, Mapping): + return {k: _canonical(obj[k]) for k in sorted(obj.keys(), key=str)} + if isinstance(obj, (list, tuple)): + return [_canonical(v) for v in obj] + return obj + + +def _int_env(env: Mapping[str, str], key: str, default: int) -> int: + raw = env.get(key) + if raw is None: + return default + try: + return max(1, int(raw)) + except ValueError: + return default diff --git a/_cache/tests/conftest.py b/_cache/tests/conftest.py new file mode 100644 index 0000000..3d235d5 --- /dev/null +++ b/_cache/tests/conftest.py @@ -0,0 +1 @@ +"""Pytest configuration for the vstack cache test suite.""" diff --git a/_cache/tests/test_cache.py b/_cache/tests/test_cache.py new file mode 100644 index 0000000..265c54f --- /dev/null +++ b/_cache/tests/test_cache.py @@ -0,0 +1,208 @@ +"""Tests for ``vstack.cache``.""" + +from __future__ import annotations + +import threading +import time + + +import vstack.cache as cache_mod +from vstack.cache._cache import ( + CacheEntry, + InMemoryLRUCache, + NullCache, + build_cache_key, + resolve_cache_from_env, +) + + +def _entry(detection: dict | None = None) -> CacheEntry: + return CacheEntry(detection=detection or {"severity": "low"}, created_at=time.time()) + + +# ---------------------------------------------------------------------- +# build_cache_key +# ---------------------------------------------------------------------- + + +def test_build_cache_key_stable_across_dict_order() -> None: + k1 = build_cache_key( + pattern="lewin", + mode="standard", + model="claude", + trace={"task": "x", "steps": [{"a": 1, "b": 2}]}, + ) + k2 = build_cache_key( + pattern="lewin", + mode="standard", + model="claude", + trace={"steps": [{"b": 2, "a": 1}], "task": "x"}, + ) + assert k1 == k2 + + +def test_build_cache_key_differs_on_pattern_change() -> None: + base = dict(mode="standard", model="claude", trace={"x": 1}) + assert build_cache_key(pattern="lewin", **base) != build_cache_key(pattern="aar", **base) + + +def test_build_cache_key_starts_with_namespace() -> None: + k = build_cache_key(pattern="x", mode="y", model=None, trace={}) + assert k.startswith("vstack:") + + +# ---------------------------------------------------------------------- +# InMemoryLRUCache +# ---------------------------------------------------------------------- + + +def test_lru_cache_set_and_get() -> None: + c = InMemoryLRUCache(capacity=10) + entry = _entry({"score": 0.5}) + c.set("k", entry) + got = c.get("k") + assert got is entry + stats = c.stats() + assert stats.hits == 1 + assert stats.sets == 1 + + +def test_lru_cache_miss_increments_misses() -> None: + c = InMemoryLRUCache(capacity=10) + assert c.get("missing") is None + assert c.stats().misses == 1 + + +def test_lru_cache_evicts_at_capacity() -> None: + c = InMemoryLRUCache(capacity=2) + c.set("a", _entry()) + c.set("b", _entry()) + c.set("c", _entry()) # evicts "a" + assert c.get("a") is None + assert c.get("b") is not None + assert c.get("c") is not None + stats = c.stats() + assert stats.evictions == 1 + + +def test_lru_cache_move_to_end_on_access() -> None: + c = InMemoryLRUCache(capacity=2) + c.set("a", _entry()) + c.set("b", _entry()) + c.get("a") # bumps "a" to fresh + c.set("c", _entry()) # evicts "b" (oldest now) + assert c.get("a") is not None + assert c.get("b") is None + + +def test_lru_cache_ttl_expires_entries() -> None: + c = InMemoryLRUCache(capacity=10, ttl_seconds=0.05) + c.set("k", _entry()) + assert c.get("k") is not None + time.sleep(0.1) + assert c.get("k") is None + assert c.stats().evictions >= 1 + + +def test_lru_cache_delete_and_clear() -> None: + c = InMemoryLRUCache(capacity=10) + c.set("a", _entry()) + c.set("b", _entry()) + c.delete("a") + assert c.get("a") is None + c.clear() + assert c.get("b") is None + + +def test_lru_cache_thread_safety() -> None: + c = InMemoryLRUCache(capacity=10) + + def worker(): + for i in range(200): + c.set(f"k{i}", _entry({"i": i})) + c.get(f"k{i}") + + threads = [threading.Thread(target=worker) for _ in range(8)] + for t in threads: + t.start() + for t in threads: + t.join() + stats = c.stats() + assert stats.sets >= 200 + # No crash + counters non-negative. + assert stats.hits >= 0 + assert stats.misses >= 0 + + +def test_lru_stats_hit_rate() -> None: + c = InMemoryLRUCache(capacity=10) + c.set("a", _entry()) + c.get("a") + c.get("b") + stats = c.stats() + assert stats.hit_rate == 0.5 + + +# ---------------------------------------------------------------------- +# NullCache +# ---------------------------------------------------------------------- + + +def test_null_cache_never_stores() -> None: + c = NullCache() + c.set("k", _entry()) + assert c.get("k") is None + assert c.stats().misses >= 1 + + +def test_null_cache_clear_is_safe() -> None: + c = NullCache() + c.clear() + c.delete("k") + + +# ---------------------------------------------------------------------- +# resolve_cache_from_env +# ---------------------------------------------------------------------- + + +def test_resolve_cache_from_env_off_default() -> None: + c = resolve_cache_from_env({}) + assert isinstance(c, NullCache) + + +def test_resolve_cache_from_env_memory() -> None: + c = resolve_cache_from_env({"VSTACK_CACHE": "memory"}) + assert isinstance(c, InMemoryLRUCache) + assert c.capacity == 1024 + + +def test_resolve_cache_from_env_capacity_override() -> None: + c = resolve_cache_from_env({"VSTACK_CACHE": "memory", "VSTACK_CACHE_CAPACITY": "50"}) + assert isinstance(c, InMemoryLRUCache) + assert c.capacity == 50 + + +def test_resolve_cache_from_env_ttl_override() -> None: + c = resolve_cache_from_env({"VSTACK_CACHE": "lru", "VSTACK_CACHE_TTL_SECONDS": "120.5"}) + assert isinstance(c, InMemoryLRUCache) + assert c.ttl_seconds == 120.5 + + +def test_resolve_cache_from_env_unknown_backend_falls_back() -> None: + c = resolve_cache_from_env({"VSTACK_CACHE": "redis"}) + assert isinstance(c, NullCache) + + +def test_module_exports() -> None: + for name in ( + "CacheBackend", + "CacheEntry", + "CacheStats", + "InMemoryLRUCache", + "NullCache", + "build_cache_key", + "resolve_cache_from_env", + ): + assert name in cache_mod.__all__ + assert cache_mod.__version__ diff --git a/_doctor/lib/__init__.py b/_doctor/lib/__init__.py new file mode 100644 index 0000000..be2fa31 --- /dev/null +++ b/_doctor/lib/__init__.py @@ -0,0 +1,25 @@ +"""vstack.doctor -- diagnostic CLI that audits the install. + +``vstack-doctor`` walks the installed surfaces and reports their +health: which CLIs resolve on PATH, which optional extras are +installed, which API keys are configured, whether the MCP server +boots, whether gbrain is reachable, whether the canonical +benchmarks suite passes the schema check, and whether a newer +release is available on PyPI. + +The point is to give a new user one command they can run after +``pip install valanistack`` that tells them what's working, +what's missing, and exactly which next command to run to make +each missing piece work. +""" + +from ._doctor import ( + CheckResult, + DoctorReport, + HealthStatus, + run_all_checks, +) + +__all__ = ["CheckResult", "DoctorReport", "HealthStatus", "run_all_checks"] + +__version__ = "0.6.0" diff --git a/_doctor/lib/__main__.py b/_doctor/lib/__main__.py new file mode 100644 index 0000000..586e211 --- /dev/null +++ b/_doctor/lib/__main__.py @@ -0,0 +1,10 @@ +"""Allow ``python -m vstack.doctor`` as an alias for ``vstack-doctor``.""" + +from __future__ import annotations + +import sys + +from .cli import main + +if __name__ == "__main__": + sys.exit(main()) diff --git a/_doctor/lib/_doctor.py b/_doctor/lib/_doctor.py new file mode 100644 index 0000000..584126b --- /dev/null +++ b/_doctor/lib/_doctor.py @@ -0,0 +1,339 @@ +"""Diagnostic checks for ``vstack-doctor``.""" + +from __future__ import annotations + +import importlib +import os +import shutil +from dataclasses import dataclass, field +from enum import Enum +from typing import Any + + +class HealthStatus(str, Enum): + OK = "ok" + WARNING = "warning" + """Functional but suboptimal (e.g. running without auth on a + public interface; recommended extra not installed).""" + + ERROR = "error" + """A required piece is missing; some functionality won't work.""" + + +@dataclass(frozen=True) +class CheckResult: + name: str + status: HealthStatus + summary: str + hint: str = "" + """If non-empty, the exact command to run to fix this.""" + + detail: dict[str, Any] = field(default_factory=dict) + + +@dataclass +class DoctorReport: + checks: list[CheckResult] = field(default_factory=list) + + @property + def has_errors(self) -> bool: + return any(c.status == HealthStatus.ERROR for c in self.checks) + + @property + def has_warnings(self) -> bool: + return any(c.status == HealthStatus.WARNING for c in self.checks) + + +# ---------------------------------------------------------------------- +# Individual checks +# ---------------------------------------------------------------------- + + +def _check_python_version() -> CheckResult: + import sys + + major, minor = sys.version_info[:2] + if (major, minor) < (3, 11): + return CheckResult( + "python_version", + HealthStatus.ERROR, + f"Python {major}.{minor} is too old (vstack needs 3.11+).", + hint="Upgrade Python: 'brew install python@3.13' or pyenv.", + ) + return CheckResult( + "python_version", + HealthStatus.OK, + f"Python {major}.{minor}", + ) + + +def _check_vstack_version() -> CheckResult: + try: + import vstack + + return CheckResult( + "vstack_version", + HealthStatus.OK, + f"valanistack {vstack.__version__}", + ) + except ImportError as e: + return CheckResult( + "vstack_version", + HealthStatus.ERROR, + f"vstack import failed: {e}", + hint="pip install valanistack", + ) + + +def _check_pattern_registry() -> CheckResult: + try: + from vstack.mcp._registry import PATTERNS + + if len(PATTERNS) == 34: + return CheckResult( + "pattern_registry", + HealthStatus.OK, + f"{len(PATTERNS)} patterns registered.", + ) + return CheckResult( + "pattern_registry", + HealthStatus.WARNING, + f"Expected 34 patterns; found {len(PATTERNS)}. The wheel may " + "have shipped without all force-included subdirs.", + hint="pip install --force-reinstall valanistack", + ) + except Exception as e: + return CheckResult( + "pattern_registry", + HealthStatus.ERROR, + f"Registry import failed: {e}", + hint="pip install --force-reinstall valanistack", + ) + + +def _check_cli_on_path(name: str) -> CheckResult: + path = shutil.which(name) + if path: + return CheckResult(f"cli/{name}", HealthStatus.OK, f"{name} -> {path}") + return CheckResult( + f"cli/{name}", + HealthStatus.ERROR, + f"{name} not on PATH.", + hint="pip install valanistack (or ensure the venv's bin/ is on PATH)", + ) + + +def _check_optional_extra(name: str, module: str, extra: str) -> CheckResult: + try: + importlib.import_module(module) + return CheckResult(f"extra/{name}", HealthStatus.OK, f"{name} ({module}) installed.") + except ImportError: + return CheckResult( + f"extra/{name}", + HealthStatus.WARNING, + f"{name} not installed (optional).", + hint=f"pip install 'valanistack[{extra}]'", + ) + + +def _check_llm_client_resolvable() -> CheckResult: + try: + from vstack.mcp._client import resolve_llm_client, LLMResolutionError + except Exception as e: + return CheckResult( + "llm_client", + HealthStatus.ERROR, + f"vstack.mcp not importable: {e}", + hint="pip install 'valanistack[mcp]'", + ) + try: + client = resolve_llm_client() + return CheckResult( + "llm_client", + HealthStatus.OK, + f"resolved {type(client).__name__}", + ) + except LLMResolutionError as e: + return CheckResult( + "llm_client", + HealthStatus.WARNING, + "No LLM client configured (vstack-mcp / vstack-api will reject calls).", + hint=( + "Set ANTHROPIC_API_KEY (recommended), OPENAI_API_KEY, or " + "OLLAMA_HOST. Or set VSTACK_MCP_LLM=stub for tests." + ), + detail={"resolution_error": str(e)}, + ) + except Exception as e: + return CheckResult( + "llm_client", + HealthStatus.ERROR, + f"LLM client resolution crashed: {e}", + ) + + +def _check_home_dir() -> CheckResult: + try: + from vstack.memory import get_home + + home = get_home() + except Exception as e: + return CheckResult( + "vstack_home", + HealthStatus.ERROR, + f"vstack.memory failed: {e}", + ) + if not os.access(str(home), os.W_OK): + return CheckResult( + "vstack_home", + HealthStatus.ERROR, + f"{home} is not writable.", + hint=(f"Check permissions on {home} or set VSTACK_HOME=/path/to/writable"), + ) + return CheckResult("vstack_home", HealthStatus.OK, f"{home} (writable)") + + +def _check_gbrain() -> CheckResult: + if shutil.which("gbrain"): + return CheckResult( + "gbrain", + HealthStatus.OK, + "gbrain on PATH (semantic search available).", + ) + return CheckResult( + "gbrain", + HealthStatus.WARNING, + "gbrain not on PATH (vstack-gbrain falls back to keyword search).", + hint="Install gbrain to enable semantic search across the 34 patterns.", + ) + + +def _check_node_for_browser() -> CheckResult: + if shutil.which("npx") or shutil.which("node"): + return CheckResult( + "node_for_browser", + HealthStatus.OK, + "Node.js / npx available (vstack-browser can spawn chrome-devtools-mcp).", + ) + return CheckResult( + "node_for_browser", + HealthStatus.WARNING, + "Node.js / npx not on PATH; vstack-browser won't work without it.", + hint="brew install node (macOS) or apt install nodejs (Debian)", + ) + + +def _check_pypi_for_upgrade() -> CheckResult: + try: + from vstack.upgrade import fetch_latest_version, get_current_version, is_newer + except Exception as e: + return CheckResult( + "pypi_upgrade", + HealthStatus.WARNING, + f"vstack.upgrade import failed: {e}", + ) + try: + latest = fetch_latest_version(timeout=3.0) + except Exception as e: + return CheckResult( + "pypi_upgrade", + HealthStatus.WARNING, + f"PyPI lookup failed: {e}", + hint="Check network connectivity to pypi.org.", + ) + current = get_current_version() + if is_newer(current, latest): + return CheckResult( + "pypi_upgrade", + HealthStatus.WARNING, + f"valanistack upgrade available: {current} -> {latest}", + hint=f"pip install --upgrade 'valanistack=={latest}'", + ) + return CheckResult( + "pypi_upgrade", + HealthStatus.OK, + f"valanistack {current} is up to date.", + ) + + +def _check_api_security_posture() -> CheckResult: + """Warn when require_auth is enabled but no keys are configured. + + Doesn't try to spin up the API; just inspects env vars to surface + misconfigurations that the API would reject at request time. + """ + require = (os.environ.get("VSTACK_API_REQUIRE_AUTH") or "").strip().lower() + has_keys = bool(os.environ.get("VSTACK_API_KEYS") or os.environ.get("VSTACK_API_KEYS_FILE")) + if require in ("1", "true", "yes", "on") and not has_keys: + return CheckResult( + "api_security", + HealthStatus.ERROR, + "VSTACK_API_REQUIRE_AUTH is on but no API keys are configured; the API will 500.", + hint="Set VSTACK_API_KEYS=... or VSTACK_API_KEYS_FILE=/path", + ) + if has_keys: + return CheckResult( + "api_security", + HealthStatus.OK, + "API keys configured.", + ) + return CheckResult( + "api_security", + HealthStatus.OK, + "API keys not set (loopback-only deployment recommended).", + ) + + +# ---------------------------------------------------------------------- +# Orchestrator +# ---------------------------------------------------------------------- + + +_CLIS = ( + "vstack", + "vstack-mcp", + "vstack-api", + "vstack-config", + "vstack-upgrade", + "vstack-learn", + "vstack-analytics", + "vstack-browser", + "vstack-gbrain", + "vstack-bench", +) + +_EXTRAS: tuple[tuple[str, str, str], ...] = ( + ("anthropic", "anthropic", "anthropic"), + ("openai", "openai", "openai"), + ("mcp", "mcp", "mcp"), + ("fastapi", "fastapi", "api"), + ("langchain_core", "langchain_core", "langchain"), + ("langgraph", "langgraph", "langgraph"), + ("llama_index_core", "llama_index.core", "llamaindex"), + ("pydantic_ai", "pydantic_ai", "pydantic_ai"), +) + + +def run_all_checks(*, skip_network: bool = False) -> DoctorReport: + """Run every check + return a :class:`DoctorReport`. + + Network-dependent checks (``pypi_upgrade``) are skipped when + ``skip_network=True``; useful for CI / air-gapped diagnostics. + """ + checks: list[CheckResult] = [ + _check_python_version(), + _check_vstack_version(), + _check_pattern_registry(), + _check_home_dir(), + _check_llm_client_resolvable(), + _check_api_security_posture(), + _check_gbrain(), + _check_node_for_browser(), + ] + for name in _CLIS: + checks.append(_check_cli_on_path(name)) + for extra_name, module, extra_pkg in _EXTRAS: + checks.append(_check_optional_extra(extra_name, module, extra_pkg)) + if not skip_network: + checks.append(_check_pypi_for_upgrade()) + return DoctorReport(checks=checks) diff --git a/_doctor/lib/cli.py b/_doctor/lib/cli.py new file mode 100644 index 0000000..2dee58f --- /dev/null +++ b/_doctor/lib/cli.py @@ -0,0 +1,85 @@ +"""``vstack-doctor`` CLI.""" + +from __future__ import annotations + +import argparse +import json +import sys +from typing import Sequence + +from ._doctor import HealthStatus, run_all_checks + + +def main(argv: Sequence[str] | None = None) -> int: + parser = argparse.ArgumentParser( + prog="vstack-doctor", + description=( + "Audit your vstack install. Walks the registered surfaces, " + "optional extras, env vars, and PyPI version; prints a " + "status report with actionable hints for anything not OK." + ), + ) + parser.add_argument( + "--json", + dest="as_json", + action="store_true", + help="Emit a machine-readable JSON report instead of pretty text.", + ) + parser.add_argument( + "--skip-network", + action="store_true", + help="Skip the PyPI upgrade check (useful for offline / CI runs).", + ) + parser.add_argument( + "--only-errors", + action="store_true", + help="Print only checks with status=error.", + ) + args = parser.parse_args(argv) + + report = run_all_checks(skip_network=args.skip_network) + + if args.as_json: + body = { + "has_errors": report.has_errors, + "has_warnings": report.has_warnings, + "checks": [ + { + "name": c.name, + "status": c.status.value, + "summary": c.summary, + "hint": c.hint, + "detail": c.detail, + } + for c in report.checks + ], + } + print(json.dumps(body, indent=2)) + return 0 if not report.has_errors else 1 + + icons = { + HealthStatus.OK: "OK ", + HealthStatus.WARNING: "WARN", + HealthStatus.ERROR: "ERR ", + } + width = max((len(c.name) for c in report.checks), default=0) + for check in report.checks: + if args.only_errors and check.status != HealthStatus.ERROR: + continue + line = f" [{icons[check.status]}] {check.name:<{width}} {check.summary}" + print(line) + if check.hint: + print(f" hint: {check.hint}") + print() + if report.has_errors: + print("Doctor found ERROR-level issues; fix them before relying on vstack.") + return 1 + if report.has_warnings: + print("Doctor found warnings (optional extras / advisory items).") + return 0 + print("Doctor: all checks passed.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/_doctor/tests/conftest.py b/_doctor/tests/conftest.py new file mode 100644 index 0000000..cb73444 --- /dev/null +++ b/_doctor/tests/conftest.py @@ -0,0 +1 @@ +"""Pytest configuration for vstack.doctor tests.""" diff --git a/_doctor/tests/test_doctor.py b/_doctor/tests/test_doctor.py new file mode 100644 index 0000000..1a47cba --- /dev/null +++ b/_doctor/tests/test_doctor.py @@ -0,0 +1,124 @@ +"""Tests for ``vstack.doctor``.""" + +from __future__ import annotations + +import json + +import pytest + +import vstack.doctor as doctor +from vstack.doctor._doctor import ( + HealthStatus, + _check_api_security_posture, + _check_cli_on_path, + _check_home_dir, + _check_pattern_registry, + _check_python_version, + _check_vstack_version, + run_all_checks, +) +from vstack.doctor.cli import main as cli_main + + +def test_python_version_ok() -> None: + result = _check_python_version() + assert result.status == HealthStatus.OK + assert "Python" in result.summary + + +def test_vstack_version_ok() -> None: + result = _check_vstack_version() + assert result.status == HealthStatus.OK + assert "valanistack" in result.summary + + +def test_pattern_registry_ok() -> None: + result = _check_pattern_registry() + assert result.status == HealthStatus.OK + assert "34" in result.summary + + +def test_home_dir_writable(monkeypatch: pytest.MonkeyPatch, tmp_path) -> None: + monkeypatch.setenv("VSTACK_HOME", str(tmp_path)) + result = _check_home_dir() + assert result.status == HealthStatus.OK + + +def test_cli_on_path_missing() -> None: + result = _check_cli_on_path("definitely-not-a-real-cli-zzz") + assert result.status == HealthStatus.ERROR + + +def test_api_security_warns_on_require_without_keys( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setenv("VSTACK_API_REQUIRE_AUTH", "true") + monkeypatch.delenv("VSTACK_API_KEYS", raising=False) + monkeypatch.delenv("VSTACK_API_KEYS_FILE", raising=False) + result = _check_api_security_posture() + assert result.status == HealthStatus.ERROR + + +def test_api_security_ok_when_unset(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("VSTACK_API_REQUIRE_AUTH", raising=False) + monkeypatch.delenv("VSTACK_API_KEYS", raising=False) + monkeypatch.delenv("VSTACK_API_KEYS_FILE", raising=False) + result = _check_api_security_posture() + assert result.status == HealthStatus.OK + + +def test_run_all_checks_returns_report() -> None: + report = run_all_checks(skip_network=True) + assert report.checks + # Every check has a name + status + summary. + for c in report.checks: + assert c.name + assert isinstance(c.status, HealthStatus) + assert c.summary + + +def test_run_all_checks_skip_network_excludes_pypi() -> None: + report = run_all_checks(skip_network=True) + names = {c.name for c in report.checks} + assert "pypi_upgrade" not in names + + +# ---------------------------------------------------------------------- +# CLI +# ---------------------------------------------------------------------- + + +def test_cli_default_text_output( + capsys: pytest.CaptureFixture[str], monkeypatch: pytest.MonkeyPatch +) -> None: + rc = cli_main(["--skip-network"]) + # rc is 0 when no ERROR-level findings; rc is 1 when there are. + # Either is fine for this test; we just check the format. + assert rc in (0, 1) + out = capsys.readouterr().out + assert "vstack_version" in out + + +def test_cli_json_output(capsys: pytest.CaptureFixture[str]) -> None: + rc = cli_main(["--skip-network", "--json"]) + assert rc in (0, 1) + body = json.loads(capsys.readouterr().out) + assert "checks" in body + assert "has_errors" in body + assert "has_warnings" in body + assert any(c["name"] == "vstack_version" for c in body["checks"]) + + +def test_cli_only_errors(capsys: pytest.CaptureFixture[str]) -> None: + rc = cli_main(["--skip-network", "--only-errors"]) + assert rc in (0, 1) + # No assertion on stdout content -- some environments will have + # zero errors and others (e.g. missing optional extras as the + # default state) won't actually error -- just verify it doesn't + # crash. + + +def test_module_exports() -> None: + for name in ("CheckResult", "DoctorReport", "HealthStatus", "run_all_checks"): + assert name in doctor.__all__ + assert doctor.__version__ diff --git a/_learnings/lib/_store.py b/_learnings/lib/_store.py index 0a88799..8ae3ef1 100644 --- a/_learnings/lib/_store.py +++ b/_learnings/lib/_store.py @@ -97,9 +97,16 @@ def __init__(self, path: Path) -> None: # ------------------------------------------------------------------ def record(self, entry: LearningRecord) -> LearningRecord: - """Append a record to the JSONL file. Returns the record.""" - self.path.parent.mkdir(parents=True, exist_ok=True) - with self.path.open("a", encoding="utf-8") as f: + """Append a record to the JSONL file. Returns the record. + + Uses an advisory file lock so concurrent vstack processes + never interleave bytes on the same line. The lock is held + only for the duration of the append; readers via + :meth:`iter_records` see consistent lines. + """ + from vstack.memory._fs_atomic import append_locked + + with append_locked(self.path) as f: f.write(entry.model_dump_json()) f.write("\n") return entry @@ -144,9 +151,11 @@ def update_outcome( } ) records[target_idx] = updated - self.path.write_text( + from vstack.memory._fs_atomic import atomic_write_text + + atomic_write_text( + self.path, "\n".join(r.model_dump_json() for r in records) + ("\n" if records else ""), - encoding="utf-8", ) return updated diff --git a/_memory/lib/__init__.py b/_memory/lib/__init__.py index 3178104..e9f38b3 100644 --- a/_memory/lib/__init__.py +++ b/_memory/lib/__init__.py @@ -51,6 +51,14 @@ vstack-config path # prints VSTACK_HOME """ +from ._fs_atomic import ( + FileLock, + FileLockTimeout, + append_locked, + atomic_write_bytes, + atomic_write_text, + shared_read_lock, +) from ._home import ( DEFAULT_HOME_ENV, baseline_path_for, @@ -89,6 +97,12 @@ "load_config", "save_config", "set_key", + "FileLock", + "FileLockTimeout", + "append_locked", + "atomic_write_bytes", + "atomic_write_text", + "shared_read_lock", ] __version__ = "0.3.0" diff --git a/_memory/lib/_config.py b/_memory/lib/_config.py index e1fa1af..2b21f3f 100644 --- a/_memory/lib/_config.py +++ b/_memory/lib/_config.py @@ -112,10 +112,16 @@ def load_config(path: Path | None = None) -> Config: def save_config(config: Config, path: Path | None = None) -> None: - """Persist ``config`` to ``path`` (defaults to ``~/.vstack/config.json``).""" + """Persist ``config`` to ``path`` (defaults to ``~/.vstack/config.json``). + + Atomic: writes to a tempfile + os.replace, so concurrent + ``vstack-config set`` invocations never leave a half-written + JSON document on disk. + """ + from ._fs_atomic import atomic_write_text + path = path or get_config_path() - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(json.dumps(config.values, indent=2, sort_keys=True), encoding="utf-8") + atomic_write_text(path, json.dumps(config.values, indent=2, sort_keys=True) + "\n") def get_key(key: str, path: Path | None = None) -> Any: diff --git a/_memory/lib/_fs_atomic.py b/_memory/lib/_fs_atomic.py new file mode 100644 index 0000000..7ca9994 --- /dev/null +++ b/_memory/lib/_fs_atomic.py @@ -0,0 +1,229 @@ +"""Atomic-write + file-lock helpers used by the persistent stores. + +The learning store, telemetry sink, config.json, and baselines all +share the same failure mode under concurrent processes: + +* Two `vstack-config set` runs racing on `config.json` — last write + wins, with the chance of a partial file if the loser is killed + mid-write. +* Two analyzer processes appending to `learnings.jsonl` — + interleaved bytes on POSIX kernels older than the per-process + `O_APPEND` guarantee was clarified. +* A `vstack-analytics` reader iterating the JSONL while the sink is + appending — partial-line decoding errors. + +This module ships two primitives: + +* :func:`atomic_write_text` / :func:`atomic_write_bytes` — write + via tmp-file + ``os.replace`` so the destination is never + half-written. +* :class:`FileLock` — POSIX advisory lock with a timeout. Uses + ``fcntl.flock`` on Unix and ``msvcrt.locking`` on Windows. + +Both are dependency-free (stdlib only). The performance overhead is +<1ms per call on local disks. +""" + +from __future__ import annotations + +import contextlib +import errno +import os +import tempfile +import time +from pathlib import Path +from typing import IO, Any, Iterator + +# fcntl is POSIX-only; on Windows we use msvcrt. Both modules are +# stdlib so we don't need install-time guards; we just need runtime +# guards because exactly one of the two will import on any given +# platform. +fcntl: Any +msvcrt: Any +try: + import fcntl as _fcntl + + fcntl = _fcntl + _HAVE_FCNTL = True +except ImportError: + fcntl = None + _HAVE_FCNTL = False + +try: + import msvcrt as _msvcrt + + msvcrt = _msvcrt + _HAVE_MSVCRT = True +except ImportError: + msvcrt = None + _HAVE_MSVCRT = False + + +class FileLockTimeout(TimeoutError): + """Raised when :class:`FileLock` couldn't acquire within the timeout.""" + + +def atomic_write_text(path: Path | str, data: str, *, encoding: str = "utf-8") -> None: + """Atomically replace ``path`` with ``data``. + + Writes to a tempfile in the same directory + ``os.replace``-s + over the destination. Crash-safe: a partial write never lands + at the destination path. + """ + atomic_write_bytes(path, data.encode(encoding)) + + +def atomic_write_bytes(path: Path | str, data: bytes) -> None: + target = Path(path) + target.parent.mkdir(parents=True, exist_ok=True) + fd, tmp_name = tempfile.mkstemp( + prefix=target.name + ".", + suffix=".tmp", + dir=str(target.parent), + ) + tmp_path = Path(tmp_name) + try: + with os.fdopen(fd, "wb") as f: + f.write(data) + f.flush() + try: + os.fsync(f.fileno()) + except OSError: + # fsync not supported (e.g. on some filesystems); + # the os.replace below is the durability guarantee + # we actually need for correctness. + pass + os.replace(tmp_path, target) + except Exception: + # Clean up the tempfile on any failure path. + with contextlib.suppress(OSError): + tmp_path.unlink() + raise + + +@contextlib.contextmanager +def append_locked(path: Path | str, *, timeout: float = 5.0) -> Iterator[IO[Any]]: + """Open ``path`` in append mode under an exclusive advisory lock. + + Concurrent processes calling this on the same path serialize + their writes; reads via :func:`iter_lines_consistent` see only + fully-written lines. + """ + target = Path(path) + target.parent.mkdir(parents=True, exist_ok=True) + fh = target.open("a", encoding="utf-8") + try: + _acquire_exclusive(fh, timeout=timeout) + try: + yield fh + fh.flush() + with contextlib.suppress(OSError): + os.fsync(fh.fileno()) + finally: + _release(fh) + finally: + fh.close() + + +@contextlib.contextmanager +def shared_read_lock(path: Path | str, *, timeout: float = 5.0) -> Iterator[IO[Any]]: + """Open ``path`` for reading under a shared advisory lock. + + Multiple shared readers run concurrently; an active exclusive + writer blocks readers and vice versa. + """ + target = Path(path) + fh = target.open("r", encoding="utf-8") + try: + _acquire_shared(fh, timeout=timeout) + try: + yield fh + finally: + _release(fh) + finally: + fh.close() + + +class FileLock: + """A standalone advisory lock with a context-manager API. + + Use this when you need to gate a logical operation on a sentinel + file (e.g. "no two processes regenerating canonical baselines at + once"). The lock file persists; it's the LOCK that's exclusive, + not the file's content. + """ + + def __init__(self, path: Path | str, *, timeout: float = 5.0) -> None: + self.path = Path(path) + self.timeout = timeout + self._fh: IO[Any] | None = None + + def __enter__(self) -> "FileLock": + self.path.parent.mkdir(parents=True, exist_ok=True) + self._fh = self.path.open("a+", encoding="utf-8") + try: + _acquire_exclusive(self._fh, timeout=self.timeout) + except FileLockTimeout: + self._fh.close() + self._fh = None + raise + return self + + def __exit__(self, *exc: object) -> None: + if self._fh is not None: + with contextlib.suppress(Exception): + _release(self._fh) + self._fh.close() + self._fh = None + + +# ---------------------------------------------------------------------- +# Platform abstractions +# ---------------------------------------------------------------------- + + +def _acquire_exclusive(fh: IO[Any], *, timeout: float) -> None: + _acquire(fh, exclusive=True, timeout=timeout) + + +def _acquire_shared(fh: IO[Any], *, timeout: float) -> None: + _acquire(fh, exclusive=False, timeout=timeout) + + +def _acquire(fh: IO[Any], *, exclusive: bool, timeout: float) -> None: + deadline = time.monotonic() + max(0.0, timeout) + while True: + try: + if _HAVE_FCNTL: + flag = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH + fcntl.flock(fh.fileno(), flag | fcntl.LOCK_NB) + return + if _HAVE_MSVCRT: + # Windows doesn't have a shared-vs-exclusive distinction + # in the way fcntl does; LK_NBLCK is exclusive. + msvcrt.locking(fh.fileno(), msvcrt.LK_NBLCK, 1) + return + # No locking primitive available -- accept the risk. Tests + # on these platforms still exercise correctness of the + # serial path. + return + except (BlockingIOError, OSError) as e: + # EWOULDBLOCK / EAGAIN on fcntl; permission-denied on + # msvcrt also surfaces here. + if e.errno not in (errno.EWOULDBLOCK, errno.EAGAIN, errno.EACCES): + raise + if time.monotonic() >= deadline: + raise FileLockTimeout( + f"Failed to acquire lock on {fh.name} within {timeout}s" + ) from e + time.sleep(0.05) + + +def _release(fh: IO[Any]) -> None: + try: + if _HAVE_FCNTL: + fcntl.flock(fh.fileno(), fcntl.LOCK_UN) + elif _HAVE_MSVCRT: + msvcrt.locking(fh.fileno(), msvcrt.LK_UNLCK, 1) + except OSError: + pass diff --git a/_observability/lib/__init__.py b/_observability/lib/__init__.py new file mode 100644 index 0000000..55c3ebe --- /dev/null +++ b/_observability/lib/__init__.py @@ -0,0 +1,56 @@ +"""vstack.observability -- Prometheus metrics + correlation IDs + +optional error reporting. + +What this module provides: + +* :class:`MetricsRegistry` -- a small in-process counter/histogram + collector with a Prometheus text-format exporter. No upstream + ``prometheus_client`` dependency required (the format is plain + text + tightly specified). +* :func:`record_request` / :func:`time_request` -- helpers that + the REST + MCP layers call to capture per-pattern latency + + status histograms. +* :func:`get_or_create_request_id` -- generates a request ID per + request, propagates it via the ``X-Request-ID`` header round- + trip. +* :func:`install_sentry_if_configured` -- optional hook that + initializes ``sentry-sdk`` when ``SENTRY_DSN`` is set. No-op + when the SDK isn't installed. +""" + +from ._metrics import ( + DEFAULT_METRICS_REGISTRY, + Counter, + Histogram, + MetricsRegistry, + record_request, + render_prometheus, + time_request, +) +from ._request_id import ( + REQUEST_ID_HEADER, + current_request_id, + get_or_create_request_id, + reset_request_id, + set_current_request_id, +) +from ._sentry import install_sentry_if_configured, is_sentry_active + +__all__ = [ + "Counter", + "Histogram", + "MetricsRegistry", + "DEFAULT_METRICS_REGISTRY", + "REQUEST_ID_HEADER", + "current_request_id", + "get_or_create_request_id", + "install_sentry_if_configured", + "is_sentry_active", + "record_request", + "render_prometheus", + "reset_request_id", + "set_current_request_id", + "time_request", +] + +__version__ = "0.6.0" diff --git a/_observability/lib/_metrics.py b/_observability/lib/_metrics.py new file mode 100644 index 0000000..afa4829 --- /dev/null +++ b/_observability/lib/_metrics.py @@ -0,0 +1,280 @@ +"""Lightweight Prometheus-text-format metrics collector. + +Why we hand-roll this: the upstream ``prometheus_client`` package +pulls in a lot of optional dependencies (gRPC, multiprocess mode) +that vstack's typical user doesn't need. The Prometheus text +exposition format is tightly specified and easy to emit; we expose +the same Counter / Histogram API the upstream package does for the +parts we use. + +The registry is process-global by default. Tests pass an explicit +:class:`MetricsRegistry` to avoid leaking counters between cases. +""" + +from __future__ import annotations + +import contextlib +import math +import threading +import time +from dataclasses import dataclass, field +from typing import Iterable, Iterator, Mapping + + +# Histogram buckets in seconds. Default chosen for LLM-pattern +# latency: quick mode ~1-3s, standard ~3-8s, forensic ~8-30s. +DEFAULT_HISTOGRAM_BUCKETS: tuple[float, ...] = ( + 0.05, + 0.1, + 0.25, + 0.5, + 1.0, + 2.5, + 5.0, + 10.0, + 25.0, + 50.0, + 100.0, + float("inf"), +) + + +@dataclass +class Counter: + """Monotonically-increasing counter with optional labels.""" + + name: str + description: str + label_names: tuple[str, ...] = () + _values: dict[tuple[str, ...], float] = field(default_factory=dict) + _lock: threading.Lock = field(default_factory=threading.Lock) + + def inc(self, value: float = 1.0, **labels: str) -> None: + key = self._key(labels) + with self._lock: + self._values[key] = self._values.get(key, 0.0) + value + + def value(self, **labels: str) -> float: + with self._lock: + return self._values.get(self._key(labels), 0.0) + + def _key(self, labels: Mapping[str, str]) -> tuple[str, ...]: + return tuple(str(labels.get(n, "")) for n in self.label_names) + + +@dataclass +class Histogram: + """Histogram with cumulative bucket counts + sum.""" + + name: str + description: str + label_names: tuple[str, ...] = () + buckets: tuple[float, ...] = DEFAULT_HISTOGRAM_BUCKETS + _counts: dict[tuple[str, ...], list[int]] = field(default_factory=dict) + _sums: dict[tuple[str, ...], float] = field(default_factory=dict) + _lock: threading.Lock = field(default_factory=threading.Lock) + + def observe(self, value: float, **labels: str) -> None: + if math.isnan(value) or math.isinf(value): + return + key = self._key(labels) + with self._lock: + counts = self._counts.setdefault(key, [0] * len(self.buckets)) + self._sums[key] = self._sums.get(key, 0.0) + value + for i, edge in enumerate(self.buckets): + if value <= edge: + counts[i] += 1 + + def _key(self, labels: Mapping[str, str]) -> tuple[str, ...]: + return tuple(str(labels.get(n, "")) for n in self.label_names) + + +@dataclass +class MetricsRegistry: + """A collection of Counters + Histograms with a Prometheus exporter.""" + + counters: dict[str, Counter] = field(default_factory=dict) + histograms: dict[str, Histogram] = field(default_factory=dict) + _lock: threading.Lock = field(default_factory=threading.Lock) + + def counter(self, name: str, description: str, label_names: Iterable[str] = ()) -> Counter: + with self._lock: + existing = self.counters.get(name) + if existing is not None: + return existing + counter = Counter(name=name, description=description, label_names=tuple(label_names)) + self.counters[name] = counter + return counter + + def histogram( + self, + name: str, + description: str, + label_names: Iterable[str] = (), + buckets: Iterable[float] = DEFAULT_HISTOGRAM_BUCKETS, + ) -> Histogram: + with self._lock: + existing = self.histograms.get(name) + if existing is not None: + return existing + histogram = Histogram( + name=name, + description=description, + label_names=tuple(label_names), + buckets=tuple(buckets), + ) + self.histograms[name] = histogram + return histogram + + def render_prometheus(self) -> str: + """Return the registry as a Prometheus text-format string.""" + return render_prometheus(self) + + +DEFAULT_METRICS_REGISTRY = MetricsRegistry() +"""Process-wide default registry. The REST API + MCP server use this +unless a test or downstream caller injects their own.""" + + +# ---------------------------------------------------------------------- +# request helpers +# ---------------------------------------------------------------------- + + +def _request_counter(registry: MetricsRegistry) -> Counter: + return registry.counter( + "vstack_requests_total", + "Total vstack analyzer requests, labeled by surface / pattern / mode / status.", + label_names=("surface", "pattern", "mode", "status"), + ) + + +def _request_histogram(registry: MetricsRegistry) -> Histogram: + return registry.histogram( + "vstack_request_duration_seconds", + "Analyzer-request latency in seconds, labeled by surface / pattern / mode.", + label_names=("surface", "pattern", "mode"), + ) + + +def record_request( + *, + surface: str, + pattern: str, + mode: str, + status: str, + duration_seconds: float, + registry: MetricsRegistry | None = None, +) -> None: + """Capture one request + duration to the metrics registry. + + ``status`` is a low-cardinality string: ``"ok"`` / + ``"validation_error"`` / ``"invalid_mode"`` / + ``"analyzer_error"`` / ``"llm_resolution_error"`` / + ``"rate_limited"`` / ``"unauthorized"`` / etc. + """ + registry = registry or DEFAULT_METRICS_REGISTRY + _request_counter(registry).inc(surface=surface, pattern=pattern, mode=mode, status=status) + _request_histogram(registry).observe( + duration_seconds, surface=surface, pattern=pattern, mode=mode + ) + + +@contextlib.contextmanager +def time_request( + *, + surface: str, + pattern: str, + mode: str, + registry: MetricsRegistry | None = None, +) -> Iterator[dict[str, str]]: + """Context manager: time the block + record on exit. + + Usage:: + + with time_request(surface="rest", pattern="lewin", mode="standard") as out: + try: + detection = analyzer.run(trace) + out["status"] = "ok" + except ValidationError: + out["status"] = "validation_error" + raise + """ + started = time.perf_counter() + bucket: dict[str, str] = {"status": "unknown"} + try: + yield bucket + finally: + elapsed = time.perf_counter() - started + record_request( + surface=surface, + pattern=pattern, + mode=mode, + status=bucket.get("status", "unknown"), + duration_seconds=elapsed, + registry=registry, + ) + + +# ---------------------------------------------------------------------- +# Prometheus exporter +# ---------------------------------------------------------------------- + + +def render_prometheus(registry: MetricsRegistry) -> str: + """Render the registry to Prometheus text exposition format.""" + lines: list[str] = [] + for counter in registry.counters.values(): + lines.append(f"# HELP {counter.name} {counter.description}") + lines.append(f"# TYPE {counter.name} counter") + if not counter.label_names: + lines.append(f"{counter.name} {counter.value()}") + continue + with counter._lock: + for label_values, value in counter._values.items(): + label_str = _format_labels(counter.label_names, label_values) + lines.append(f"{counter.name}{label_str} {value}") + for histogram in registry.histograms.values(): + lines.append(f"# HELP {histogram.name} {histogram.description}") + lines.append(f"# TYPE {histogram.name} histogram") + with histogram._lock: + for label_values, counts in histogram._counts.items(): + cumulative = 0 + for i, edge in enumerate(histogram.buckets): + cumulative += counts[i] - (counts[i - 1] if i > 0 else 0) + # Actually counts[] in observe() above only + # increments once per bucket where value<=edge; + # to get cumulative we just accumulate counts. + pass + # Re-do cumulative properly: counts[i] holds total + # observations <= buckets[i] because observe() above + # increments every bucket where value<=edge. + for i, edge in enumerate(histogram.buckets): + le = "+Inf" if edge == float("inf") else _format_float(edge) + bucket_labels = _format_labels( + histogram.label_names + ("le",), + label_values + (le,), + ) + lines.append(f"{histogram.name}_bucket{bucket_labels} {counts[i]}") + count_labels = _format_labels(histogram.label_names, label_values) + total_count = counts[-1] if counts else 0 + total_sum = histogram._sums.get(label_values, 0.0) + lines.append(f"{histogram.name}_count{count_labels} {total_count}") + lines.append(f"{histogram.name}_sum{count_labels} {total_sum}") + return "\n".join(lines) + ("\n" if lines else "") + + +def _format_labels(names: tuple[str, ...], values: tuple[str, ...]) -> str: + if not names: + return "" + pairs = [] + for n, v in zip(names, values): + v = v.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n") + pairs.append(f'{n}="{v}"') + return "{" + ",".join(pairs) + "}" + + +def _format_float(v: float) -> str: + if v == int(v): + return f"{int(v)}" + return f"{v}" diff --git a/_observability/lib/_request_id.py b/_observability/lib/_request_id.py new file mode 100644 index 0000000..2870b52 --- /dev/null +++ b/_observability/lib/_request_id.py @@ -0,0 +1,54 @@ +"""Per-request correlation IDs. + +The REST API middleware reads the inbound ``X-Request-ID`` header +(or generates one if absent), stashes it in a contextvar, attaches +it to every log line emitted during the request, and echoes it on +the response so the client can correlate. +""" + +from __future__ import annotations + +import contextvars +import secrets + +REQUEST_ID_HEADER = "X-Request-ID" + +_request_id_var: contextvars.ContextVar[str | None] = contextvars.ContextVar( + "vstack_request_id", default=None +) + + +def get_or_create_request_id(incoming: str | None = None) -> str: + """Return the inbound ID if valid, otherwise generate a fresh one. + + A valid ID is 1-200 chars long, ASCII alphanumeric plus a small + set of punctuation we accept (``- _ : .``). Anything else gets + replaced with a fresh server-generated ID — we never echo back + untrusted text in headers. + """ + if incoming: + if 1 <= len(incoming) <= 200 and all(c.isalnum() or c in "-_:." for c in incoming): + return incoming + return "req_" + secrets.token_hex(8) + + +def set_current_request_id(request_id: str | None) -> contextvars.Token[str | None]: + """Bind ``request_id`` to the current task/thread context. + + Returns a token the caller passes to :func:`reset_request_id` + when the request is done. Middleware uses a ``try / finally`` + around the request handler. + """ + return _request_id_var.set(request_id) + + +def reset_request_id(token: contextvars.Token[str | None]) -> None: + _request_id_var.reset(token) + + +def current_request_id() -> str | None: + """Return the request ID bound to the current context, if any. + + Use this in log filter functions / Sentry breadcrumbs / etc. + """ + return _request_id_var.get() diff --git a/_observability/lib/_sentry.py b/_observability/lib/_sentry.py new file mode 100644 index 0000000..3ebf495 --- /dev/null +++ b/_observability/lib/_sentry.py @@ -0,0 +1,103 @@ +"""Optional Sentry integration. + +We never hard-depend on ``sentry-sdk`` — many vstack users don't +care about Sentry, and the SDK pulls in a non-trivial dep tree. If +the user has the SDK installed AND ``SENTRY_DSN`` is set, we +initialize it with sensible defaults; otherwise this module is a +no-op. + +Importing this module does NOT import ``sentry-sdk`` — that +happens inside :func:`install_sentry_if_configured`. +""" + +from __future__ import annotations + +import logging +import os +from typing import Any + +logger = logging.getLogger(__name__) + +_sentry_installed = False +_sentry_module: Any | None = None + + +def install_sentry_if_configured(env: dict[str, str] | None = None) -> bool: + """Initialize Sentry if ``SENTRY_DSN`` is set + ``sentry-sdk`` is installed. + + Idempotent: safe to call multiple times. Returns ``True`` if + Sentry is now active, ``False`` otherwise. + + Environment variables consulted: + * ``SENTRY_DSN`` -- required to enable + * ``SENTRY_ENVIRONMENT`` -- default ``"production"`` + * ``SENTRY_RELEASE`` -- default ``"valanistack@"`` + * ``SENTRY_TRACES_SAMPLE_RATE`` -- default ``0.05`` + * ``SENTRY_PROFILES_SAMPLE_RATE`` -- default ``0.0`` + """ + global _sentry_installed, _sentry_module + if _sentry_installed: + return True + + env = env if env is not None else dict(os.environ) + dsn = env.get("SENTRY_DSN") + if not dsn: + return False + + try: + import sentry_sdk + except ImportError: + logger.info( + "SENTRY_DSN is set but sentry-sdk is not installed; skipping. " + "Run: pip install sentry-sdk" + ) + return False + + try: + sentry_sdk.init( + dsn=dsn, + environment=env.get("SENTRY_ENVIRONMENT", "production"), + release=env.get("SENTRY_RELEASE", _release_string()), + traces_sample_rate=_float_env(env, "SENTRY_TRACES_SAMPLE_RATE", 0.05), + profiles_sample_rate=_float_env(env, "SENTRY_PROFILES_SAMPLE_RATE", 0.0), + send_default_pii=False, + ) + except Exception as e: # noqa: BLE001 - sentry-sdk init can throw anything + logger.warning("Failed to initialize Sentry: %s", e) + return False + + _sentry_module = sentry_sdk + _sentry_installed = True + logger.info("Sentry initialized; reporting to %s", _redact_dsn(dsn)) + return True + + +def is_sentry_active() -> bool: + """Return whether Sentry is currently active in this process.""" + return _sentry_installed + + +def _release_string() -> str: + try: + import vstack + + return f"valanistack@{vstack.__version__}" + except Exception: + return "valanistack@unknown" + + +def _float_env(env: dict[str, str], key: str, default: float) -> float: + raw = env.get(key) + if raw is None: + return default + try: + return max(0.0, min(1.0, float(raw))) + except ValueError: + return default + + +def _redact_dsn(dsn: str) -> str: + """Return the DSN host without the auth segment for safe logging.""" + if "@" in dsn: + return "https://***@" + dsn.split("@", 1)[1] + return "" diff --git a/_observability/tests/conftest.py b/_observability/tests/conftest.py new file mode 100644 index 0000000..a2cab7c --- /dev/null +++ b/_observability/tests/conftest.py @@ -0,0 +1 @@ +"""Pytest configuration for vstack.observability tests.""" diff --git a/_observability/tests/test_observability.py b/_observability/tests/test_observability.py new file mode 100644 index 0000000..2deb157 --- /dev/null +++ b/_observability/tests/test_observability.py @@ -0,0 +1,276 @@ +"""Tests for ``vstack.observability``.""" + +from __future__ import annotations + +import threading +import time + +import pytest + +import vstack.observability as obs +from vstack.observability._metrics import ( + Counter, + Histogram, + MetricsRegistry, + record_request, + render_prometheus, + time_request, +) +from vstack.observability._request_id import ( + REQUEST_ID_HEADER, + current_request_id, + get_or_create_request_id, + reset_request_id, + set_current_request_id, +) +from vstack.observability._sentry import ( + _redact_dsn, + install_sentry_if_configured, + is_sentry_active, +) + + +# ---------------------------------------------------------------------- +# Counter +# ---------------------------------------------------------------------- + + +def test_counter_inc_no_labels() -> None: + c = Counter(name="x", description="d") + c.inc() + c.inc(2.5) + assert c.value() == 3.5 + + +def test_counter_inc_with_labels() -> None: + c = Counter(name="x", description="d", label_names=("status",)) + c.inc(status="ok") + c.inc(status="ok") + c.inc(status="err") + assert c.value(status="ok") == 2 + assert c.value(status="err") == 1 + assert c.value(status="missing") == 0 + + +def test_counter_thread_safety() -> None: + c = Counter(name="x", description="d") + + def worker(): + for _ in range(1000): + c.inc() + + threads = [threading.Thread(target=worker) for _ in range(8)] + for t in threads: + t.start() + for t in threads: + t.join() + assert c.value() == 8000 + + +# ---------------------------------------------------------------------- +# Histogram +# ---------------------------------------------------------------------- + + +def test_histogram_observe_buckets() -> None: + h = Histogram(name="x", description="d", buckets=(0.1, 0.5, 1.0, float("inf"))) + h.observe(0.05) + h.observe(0.3) + h.observe(2.0) + # Bucket layout under our observe()'s semantics: each observation + # increments every bucket where value <= edge. + assert h._counts[()][0] == 1 # 0.05 <= 0.1 + assert h._counts[()][1] == 2 # 0.05 + 0.3 <= 0.5 + assert h._counts[()][2] == 2 # same + assert h._counts[()][3] == 3 # all <= +Inf + assert h._sums[()] == pytest.approx(0.05 + 0.3 + 2.0) + + +def test_histogram_ignores_nan_and_inf() -> None: + h = Histogram(name="x", description="d") + h.observe(float("nan")) + h.observe(float("inf")) + assert () not in h._counts # nothing recorded + + +def test_histogram_with_labels() -> None: + h = Histogram(name="x", description="d", label_names=("pattern",), buckets=(1.0, float("inf"))) + h.observe(0.5, pattern="lewin") + h.observe(0.5, pattern="aar") + assert h._counts[("lewin",)][0] == 1 + assert h._counts[("aar",)][0] == 1 + + +# ---------------------------------------------------------------------- +# MetricsRegistry +# ---------------------------------------------------------------------- + + +def test_registry_get_or_create_idempotent() -> None: + reg = MetricsRegistry() + c1 = reg.counter("x", "d") + c2 = reg.counter("x", "d") + assert c1 is c2 + + +def test_registry_render_prometheus_includes_counter_and_histogram() -> None: + reg = MetricsRegistry() + c = reg.counter("vstack_test_total", "demo counter", label_names=("status",)) + c.inc(status="ok") + c.inc(status="ok") + h = reg.histogram( + "vstack_test_duration_seconds", + "demo histogram", + label_names=("pattern",), + buckets=(0.5, 1.0, float("inf")), + ) + h.observe(0.2, pattern="lewin") + text = render_prometheus(reg) + assert "# HELP vstack_test_total" in text + assert "# TYPE vstack_test_total counter" in text + assert 'vstack_test_total{status="ok"} 2' in text + assert "# TYPE vstack_test_duration_seconds histogram" in text + assert 'vstack_test_duration_seconds_bucket{pattern="lewin",le="0.5"}' in text + assert 'vstack_test_duration_seconds_bucket{pattern="lewin",le="+Inf"}' in text + assert 'vstack_test_duration_seconds_count{pattern="lewin"}' in text + assert 'vstack_test_duration_seconds_sum{pattern="lewin"}' in text + + +# ---------------------------------------------------------------------- +# record_request + time_request +# ---------------------------------------------------------------------- + + +def test_record_request_populates_default_metrics() -> None: + reg = MetricsRegistry() + record_request( + surface="rest", + pattern="lewin", + mode="standard", + status="ok", + duration_seconds=1.5, + registry=reg, + ) + text = render_prometheus(reg) + assert "vstack_requests_total" in text + assert "vstack_request_duration_seconds" in text + + +def test_time_request_captures_duration_on_exit() -> None: + reg = MetricsRegistry() + with time_request(surface="rest", pattern="aar", mode="quick", registry=reg) as bucket: + bucket["status"] = "ok" + time.sleep(0.01) + text = render_prometheus(reg) + assert 'vstack_requests_total{surface="rest",pattern="aar",mode="quick",status="ok"}' in text + + +def test_time_request_records_unknown_on_unset_status() -> None: + reg = MetricsRegistry() + with time_request(surface="rest", pattern="aar", mode="quick", registry=reg): + pass + text = render_prometheus(reg) + assert 'status="unknown"' in text + + +def test_time_request_records_on_exception() -> None: + reg = MetricsRegistry() + with pytest.raises(RuntimeError): + with time_request(surface="rest", pattern="aar", mode="quick", registry=reg) as bucket: + bucket["status"] = "analyzer_error" + raise RuntimeError("boom") + text = render_prometheus(reg) + assert 'status="analyzer_error"' in text + + +# ---------------------------------------------------------------------- +# Request ID +# ---------------------------------------------------------------------- + + +def test_get_or_create_request_id_uses_valid_inbound() -> None: + incoming = "abc-123_42:xyz.42" + assert get_or_create_request_id(incoming) == incoming + + +def test_get_or_create_request_id_replaces_invalid() -> None: + bad = "not allowed!!! < script>" + assert get_or_create_request_id(bad) != bad + assert get_or_create_request_id(bad).startswith("req_") + + +def test_get_or_create_request_id_replaces_too_long() -> None: + too_long = "a" * 1000 + assert get_or_create_request_id(too_long).startswith("req_") + + +def test_get_or_create_request_id_generates_when_none() -> None: + rid = get_or_create_request_id(None) + assert rid.startswith("req_") + assert len(rid) > 10 + + +def test_set_and_current_request_id() -> None: + token = set_current_request_id("req_test_42") + try: + assert current_request_id() == "req_test_42" + finally: + reset_request_id(token) + assert current_request_id() is None + + +def test_request_id_header_constant() -> None: + assert REQUEST_ID_HEADER == "X-Request-ID" + + +# ---------------------------------------------------------------------- +# Sentry hook +# ---------------------------------------------------------------------- + + +def test_install_sentry_noop_when_dsn_unset() -> None: + assert install_sentry_if_configured({}) is False + assert is_sentry_active() is False + + +def test_install_sentry_noop_when_sdk_missing(monkeypatch: pytest.MonkeyPatch) -> None: + # Ensure sentry_sdk import fails in this run by shadowing it. + import sys + + monkeypatch.setitem(sys.modules, "sentry_sdk", None) + assert install_sentry_if_configured({"SENTRY_DSN": "https://example/123"}) is False + + +def test_redact_dsn_hides_auth() -> None: + redacted = _redact_dsn("https://abc123@o123.ingest.sentry.io/456") + assert "abc123" not in redacted + assert "***" in redacted + + +def test_redact_dsn_unparseable() -> None: + assert _redact_dsn("not a dsn") == "" + + +# ---------------------------------------------------------------------- +# Module exports +# ---------------------------------------------------------------------- + + +def test_module_exports() -> None: + for name in ( + "Counter", + "Histogram", + "MetricsRegistry", + "DEFAULT_METRICS_REGISTRY", + "REQUEST_ID_HEADER", + "current_request_id", + "get_or_create_request_id", + "install_sentry_if_configured", + "is_sentry_active", + "record_request", + "render_prometheus", + "set_current_request_id", + "time_request", + ): + assert name in obs.__all__ + assert obs.__version__ diff --git a/_packaging/vstack/__init__.py b/_packaging/vstack/__init__.py index e209e2e..7b3943b 100644 --- a/_packaging/vstack/__init__.py +++ b/_packaging/vstack/__init__.py @@ -33,6 +33,6 @@ from __future__ import annotations -__version__ = "0.5.0" +__version__ = "0.6.0" __all__ = ["__version__"] diff --git a/_security/lib/__init__.py b/_security/lib/__init__.py new file mode 100644 index 0000000..5c99367 --- /dev/null +++ b/_security/lib/__init__.py @@ -0,0 +1,64 @@ +"""vstack.security -- production hardening for the REST surface + +helpers used across the rest of vstack. + +What this module provides: + +* :class:`APIKeyStore` -- loads + validates API keys from env vars + / config files / explicit lists. Constant-time comparisons. +* :class:`RateLimiter` -- in-memory sliding-window rate limiter + with a pluggable backend interface for Redis later. +* :class:`RequestLimits` -- declarative caps for body size, + trace-step count, timeout, max-tokens. +* :func:`audit_input_for_injection` -- thin wrapper over + :func:`vstack.aar.detect_injection` that the REST + MCP + servers run on free-text fields before they reach the LLM. +* :func:`safe_subprocess_argv` / :func:`safe_path` -- input + guards for the parts of vstack that shell out (gbrain, + chrome-devtools-mcp) or read user-supplied paths + (baselines, learnings, suite files). + +None of this changes default behaviour for existing local-use +flows. The REST API stays loopback-friendly by default; +authentication only kicks in when you explicitly enable it. +""" + +from ._auth import APIKey, APIKeyStore, load_keys_from_env, verify_api_key +from ._limits import ( + DEFAULT_REQUEST_LIMITS, + RequestLimits, + RequestSizeExceeded, + enforce_trace_limits, +) +from ._rate_limit import ( + InMemoryRateLimiter, + RateLimitDecision, + RateLimiter, + RateLimitExceeded, +) +from ._validation import ( + audit_input_for_injection, + safe_path, + safe_pattern_name, + safe_subprocess_argv, +) + +__all__ = [ + "APIKey", + "APIKeyStore", + "DEFAULT_REQUEST_LIMITS", + "InMemoryRateLimiter", + "RateLimitDecision", + "RateLimitExceeded", + "RateLimiter", + "RequestLimits", + "RequestSizeExceeded", + "audit_input_for_injection", + "enforce_trace_limits", + "load_keys_from_env", + "safe_path", + "safe_pattern_name", + "safe_subprocess_argv", + "verify_api_key", +] + +__version__ = "0.6.0" diff --git a/_security/lib/_auth.py b/_security/lib/_auth.py new file mode 100644 index 0000000..38667af --- /dev/null +++ b/_security/lib/_auth.py @@ -0,0 +1,164 @@ +"""API-key authentication for the REST surface. + +The default mode for ``vstack-api serve`` is no-auth + loopback bind +(127.0.0.1). When you bind to a public interface or run inside an +orchestrator, set ``VSTACK_API_KEYS=key1,key2,key3`` (or a +newline-separated file via ``VSTACK_API_KEYS_FILE``) and pass +``--require-auth`` to the CLI. Requests then need a header +``Authorization: Bearer `` or ``X-API-Key: `` to be accepted. + +Keys are validated in constant time via :func:`hmac.compare_digest` +so the server can't be timing-side-channeled to enumerate them. +""" + +from __future__ import annotations + +import hashlib +import hmac +import logging +import os +import secrets +from dataclasses import dataclass, field +from pathlib import Path +from typing import Iterable + +logger = logging.getLogger(__name__) + + +DEFAULT_API_KEYS_ENV = "VSTACK_API_KEYS" +DEFAULT_API_KEYS_FILE_ENV = "VSTACK_API_KEYS_FILE" +MIN_API_KEY_LENGTH = 24 +"""Minimum acceptable key length. Generated keys come out at 32 chars +hex (16 bytes of entropy) which clears this bar comfortably.""" + + +@dataclass(frozen=True) +class APIKey: + """One configured API key with metadata. + + The ``name`` is a human label (used in logs + metrics; never + surfaced over the wire). The ``key_hash`` is a SHA-256 digest of + the raw key so the keystore never holds the raw value in memory + after construction. + """ + + name: str + key_hash: bytes + + @classmethod + def from_raw(cls, name: str, raw: str) -> "APIKey": + if len(raw) < MIN_API_KEY_LENGTH: + raise ValueError( + f"API key for {name!r} is shorter than {MIN_API_KEY_LENGTH} chars. " + "Generate a stronger one with secrets.token_urlsafe(24)." + ) + return cls(name=name, key_hash=_hash_key(raw)) + + +@dataclass +class APIKeyStore: + """A bag of configured API keys. + + Construct via :func:`load_keys_from_env`, or pass an explicit + list of :class:`APIKey` for tests. Lookup is O(N) over the keys + (N is small for the production cases vstack targets); switch to + a hashed-set lookup if N exceeds the low hundreds. + """ + + keys: list[APIKey] = field(default_factory=list) + + def __len__(self) -> int: + return len(self.keys) + + def __bool__(self) -> bool: + return bool(self.keys) + + def verify(self, raw: str | None) -> APIKey | None: + """Return the matching :class:`APIKey` if ``raw`` is valid, else None. + + Returns ``None`` for missing keys, empty strings, and unknown + keys alike; the API layer decides what HTTP code to use. + The comparison is constant-time per stored key. + """ + if not raw: + return None + candidate = _hash_key(raw) + for key in self.keys: + if hmac.compare_digest(candidate, key.key_hash): + return key + return None + + +def load_keys_from_env(env: dict[str, str] | None = None) -> APIKeyStore: + """Build an :class:`APIKeyStore` from the standard env vars. + + Resolution order: + + 1. ``VSTACK_API_KEYS`` -- comma-separated raw keys. Each becomes + a key named ``key-0``, ``key-1``, etc. + 2. ``VSTACK_API_KEYS_FILE`` -- path to a newline-separated file + of ``name=key`` pairs (anything without an ``=`` becomes a + positional entry). + + The two sources merge. Empty result means no keys configured, + which the API treats as "auth not enforced for this server." + """ + env = env if env is not None else dict(os.environ) + keys: list[APIKey] = [] + + raw_list = env.get(DEFAULT_API_KEYS_ENV) or "" + for idx, item in enumerate(_split_env_list(raw_list)): + if "=" in item: + name, value = item.split("=", 1) + else: + name, value = f"key-{idx}", item + if value: + keys.append(APIKey.from_raw(name=name, raw=value)) + + path_str = env.get(DEFAULT_API_KEYS_FILE_ENV) + if path_str: + path = Path(path_str).expanduser() + if path.exists(): + for idx, line in enumerate(path.read_text(encoding="utf-8").splitlines()): + line = line.strip() + if not line or line.startswith("#"): + continue + if "=" in line: + name, value = line.split("=", 1) + else: + name, value = f"file-key-{idx}", line + if value: + keys.append(APIKey.from_raw(name=name, raw=value)) + else: + logger.warning("VSTACK_API_KEYS_FILE=%s does not exist; ignoring", path) + + return APIKeyStore(keys=keys) + + +def verify_api_key(raw: str | None, store: APIKeyStore) -> APIKey | None: + """Convenience for ``store.verify(raw)`` with a more descriptive name.""" + return store.verify(raw) + + +def generate_api_key() -> str: + """Return a fresh random key suitable for ``VSTACK_API_KEYS``. + + Produced via :func:`secrets.token_urlsafe(24)`, which yields a + 32-character URL-safe ASCII string with ~192 bits of entropy. + """ + return secrets.token_urlsafe(24) + + +# ---------------------------------------------------------------------- +# internals +# ---------------------------------------------------------------------- + + +def _hash_key(raw: str) -> bytes: + return hashlib.sha256(raw.encode("utf-8")).digest() + + +def _split_env_list(raw: str) -> Iterable[str]: + if not raw: + return [] + return [part.strip() for part in raw.split(",") if part.strip()] diff --git a/_security/lib/_limits.py b/_security/lib/_limits.py new file mode 100644 index 0000000..2087012 --- /dev/null +++ b/_security/lib/_limits.py @@ -0,0 +1,170 @@ +"""Declarative request limits enforced by the REST + MCP layers. + +Why ship these as a separate module: the caps need to be reusable +across vstack-api (HTTP request body validation), vstack-mcp (tool +input validation), and the framework adapters' run_pattern_dispatch +(programmatic input validation). One source of truth. +""" + +from __future__ import annotations + +import os +from dataclasses import dataclass +from typing import Any, Iterator, Mapping + + +class RequestSizeExceeded(ValueError): + """Raised when an incoming trace exceeds the configured limit. + + Carries the actual + limit values so the caller can surface a + structured error envelope back to the user instead of a generic + 400. + """ + + def __init__(self, kind: str, actual: int, limit: int) -> None: + super().__init__( + f"{kind} exceeded: {actual} > {limit}. " + f"Increase {_env_var_for(kind)} or split the trace." + ) + self.kind = kind + self.actual = actual + self.limit = limit + + +@dataclass(frozen=True) +class RequestLimits: + """Maximum sizes the API layer accepts. + + Defaults are chosen for "production-safe" — large enough that + typical agent traces fit comfortably, small enough that a + malicious client can't trivially OOM the server with one POST. + """ + + max_body_bytes: int = 5 * 1024 * 1024 # 5 MiB + """Total POST body size in bytes. FastAPI middleware enforces.""" + + max_trace_steps: int = 5_000 + """Cap on len(trace['steps']) / len(messages) / len(observations) + across patterns. 5k steps is a very long agent run; saner users + are typically < 100.""" + + max_messages: int = 5_000 + """Cap on multi-agent message logs.""" + + max_string_chars: int = 200_000 + """Per-string char cap on any free-text field. Mirrors the per- + pattern ``max_trace_chars`` default.""" + + max_total_chars: int = 1_000_000 + """Total free-text char count across the whole trace.""" + + request_timeout_seconds: float = 120.0 + """Server-side per-request deadline. Forensic mode of some + patterns can exceed this; the API surfaces a structured timeout + error and the caller can retry in standard mode.""" + + +DEFAULT_REQUEST_LIMITS = RequestLimits() + + +def request_limits_from_env( + env: Mapping[str, str] | None = None, + base: RequestLimits | None = None, +) -> RequestLimits: + """Load limits from env vars, layering over ``base``. + + Env vars consulted: + * ``VSTACK_API_MAX_BODY_BYTES`` + * ``VSTACK_API_MAX_TRACE_STEPS`` + * ``VSTACK_API_MAX_MESSAGES`` + * ``VSTACK_API_MAX_STRING_CHARS`` + * ``VSTACK_API_MAX_TOTAL_CHARS`` + * ``VSTACK_API_REQUEST_TIMEOUT`` + """ + env = env if env is not None else os.environ + base = base or DEFAULT_REQUEST_LIMITS + return RequestLimits( + max_body_bytes=_int_env(env, "VSTACK_API_MAX_BODY_BYTES", base.max_body_bytes), + max_trace_steps=_int_env(env, "VSTACK_API_MAX_TRACE_STEPS", base.max_trace_steps), + max_messages=_int_env(env, "VSTACK_API_MAX_MESSAGES", base.max_messages), + max_string_chars=_int_env(env, "VSTACK_API_MAX_STRING_CHARS", base.max_string_chars), + max_total_chars=_int_env(env, "VSTACK_API_MAX_TOTAL_CHARS", base.max_total_chars), + request_timeout_seconds=_float_env( + env, "VSTACK_API_REQUEST_TIMEOUT", base.request_timeout_seconds + ), + ) + + +def enforce_trace_limits(payload: Mapping[str, Any], limits: RequestLimits) -> None: + """Walk a trace payload and raise on any cap violation. + + Called from the REST + framework-adapter dispatch path BEFORE the + payload reaches Pydantic. Pydantic itself catches schema errors; + this layer catches the size-based abuse the schema can't. + """ + if not isinstance(payload, dict): + return + + steps = payload.get("steps") + if isinstance(steps, list) and len(steps) > limits.max_trace_steps: + raise RequestSizeExceeded("trace_steps", len(steps), limits.max_trace_steps) + + messages = payload.get("messages") + if isinstance(messages, list) and len(messages) > limits.max_messages: + raise RequestSizeExceeded("messages", len(messages), limits.max_messages) + + total_chars = 0 + for value in _walk_strings(payload): + if len(value) > limits.max_string_chars: + raise RequestSizeExceeded("string_chars", len(value), limits.max_string_chars) + total_chars += len(value) + if total_chars > limits.max_total_chars: + raise RequestSizeExceeded("total_chars", total_chars, limits.max_total_chars) + + +# ---------------------------------------------------------------------- +# internals +# ---------------------------------------------------------------------- + + +def _walk_strings(obj: Any) -> "Iterator[str]": + if isinstance(obj, str): + yield obj + return + if isinstance(obj, dict): + for v in obj.values(): + yield from _walk_strings(v) + return + if isinstance(obj, (list, tuple)): + for v in obj: + yield from _walk_strings(v) + return + + +def _int_env(env: Mapping[str, str], key: str, default: int) -> int: + raw = env.get(key) + if raw is None: + return default + try: + return max(1, int(raw)) + except ValueError: + return default + + +def _float_env(env: Mapping[str, str], key: str, default: float) -> float: + raw = env.get(key) + if raw is None: + return default + try: + return max(0.1, float(raw)) + except ValueError: + return default + + +def _env_var_for(kind: str) -> str: + return { + "trace_steps": "VSTACK_API_MAX_TRACE_STEPS", + "messages": "VSTACK_API_MAX_MESSAGES", + "string_chars": "VSTACK_API_MAX_STRING_CHARS", + "total_chars": "VSTACK_API_MAX_TOTAL_CHARS", + }.get(kind, "VSTACK_API_*") diff --git a/_security/lib/_rate_limit.py b/_security/lib/_rate_limit.py new file mode 100644 index 0000000..4161d8b --- /dev/null +++ b/_security/lib/_rate_limit.py @@ -0,0 +1,113 @@ +"""Sliding-window rate limiter for the REST surface. + +In-memory by default (fine for a single-process FastAPI deployment). +The :class:`RateLimiter` protocol exists so a downstream user can +swap in a Redis-backed implementation without changing the call sites +in vstack.api. + +The window is sliding: every check records the timestamp + decrements +an in-memory ring buffer per key. Time complexity per check is O(N) +where N is the configured ``max_requests`` (typically <= 1000), so +even at 10k req/s the per-check overhead is microseconds. +""" + +from __future__ import annotations + +import threading +import time +from collections import deque +from dataclasses import dataclass, field +from typing import Callable, Deque, Protocol + + +@dataclass(frozen=True) +class RateLimitDecision: + """One rate-limiter check result.""" + + allowed: bool + remaining: int + """Approximate remaining quota in the current window after this + request would be admitted. -1 if the limiter doesn't track.""" + + retry_after_seconds: float + """How long until at least one slot frees up. 0 if ``allowed`` is True.""" + + limit: int + """The configured ``max_requests`` for context.""" + + +class RateLimitExceeded(RuntimeError): + """Raised when a synchronous caller wants exceptions instead of + decisions (the API layer uses the decision object directly).""" + + def __init__(self, decision: RateLimitDecision) -> None: + super().__init__(f"rate limit exceeded; retry after {decision.retry_after_seconds:.2f}s") + self.decision = decision + + +class RateLimiter(Protocol): + """Pluggable backend interface.""" + + def check(self, key: str) -> RateLimitDecision: + """Record + check; return a decision.""" + ... + + def reset(self, key: str | None = None) -> None: + """Drop state for ``key`` (or all keys if None). Tests use this.""" + ... + + +@dataclass +class InMemoryRateLimiter: + """Sliding-window in-memory rate limiter. + + Default config: 100 requests / 60-second window. Override via + ``max_requests`` / ``window_seconds``. + + Thread-safe under the typical request-per-thread shape; lock is + only held during the deque mutation, not during the timestamp + comparison loop. + """ + + max_requests: int = 100 + window_seconds: float = 60.0 + _buckets: dict[str, Deque[float]] = field(default_factory=dict) + _lock: threading.Lock = field(default_factory=threading.Lock) + _now: Callable[[], float] = field(default=time.monotonic) + """Injection point for tests.""" + + def check(self, key: str) -> RateLimitDecision: + now = self._now() + cutoff = now - self.window_seconds + with self._lock: + bucket = self._buckets.get(key) + if bucket is None: + bucket = deque() + self._buckets[key] = bucket + # Evict stale timestamps. + while bucket and bucket[0] < cutoff: + bucket.popleft() + if len(bucket) >= self.max_requests: + # Oldest timestamp in the window is when the quota + # frees by one. Retry-after = (oldest + window) - now. + retry_after = (bucket[0] + self.window_seconds) - now + return RateLimitDecision( + allowed=False, + remaining=0, + retry_after_seconds=max(retry_after, 0.0), + limit=self.max_requests, + ) + bucket.append(now) + return RateLimitDecision( + allowed=True, + remaining=self.max_requests - len(bucket), + retry_after_seconds=0.0, + limit=self.max_requests, + ) + + def reset(self, key: str | None = None) -> None: + with self._lock: + if key is None: + self._buckets.clear() + else: + self._buckets.pop(key, None) diff --git a/_security/lib/_validation.py b/_security/lib/_validation.py new file mode 100644 index 0000000..a32b9c3 --- /dev/null +++ b/_security/lib/_validation.py @@ -0,0 +1,165 @@ +"""Input-validation helpers used across vstack. + +Three guards: + +* :func:`audit_input_for_injection` -- thin wrapper over + :func:`vstack.aar.detect_injection` so the REST + MCP paths can + call one function and get a structured signal. +* :func:`safe_pattern_name` -- enforces the same alphabet the + ``vstack.memory._home`` baseline path uses. Prevents path- + traversal via attacker-controlled pattern names. +* :func:`safe_path` -- validates a user-supplied path stays under + the configured ``~/.vstack/`` home + doesn't traverse out. +* :func:`safe_subprocess_argv` -- never invoked with shell=True; + guards the argv list passed to ``subprocess.run`` from the + gbrain + browser modules. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Iterable, Sequence + +from vstack.aar import detect_injection + +_SAFE_PATTERN_NAME = re.compile(r"^[A-Za-z0-9_-]+$") + + +@dataclass(frozen=True) +class InjectionAudit: + """Result of :func:`audit_input_for_injection`.""" + + is_suspicious: bool + score: float + """0.0-1.0 confidence that the input contains a prompt-injection + attempt. Threshold above 0.5 is the default action-warranted + cutoff.""" + + indicators: tuple[str, ...] + """Specific signals the underlying detector flagged.""" + + +def audit_input_for_injection(text: str) -> InjectionAudit: + """Run the upstream injection detector on free-text input. + + The underlying detector is heuristic, not a guarantee. Callers + should treat ``is_suspicious`` as "log + warn", not "drop the + request". The trace already goes through prompt-fencing inside + each pattern's analyzer; this audit is a defense-in-depth layer. + """ + if not isinstance(text, str) or not text: + return InjectionAudit(is_suspicious=False, score=0.0, indicators=()) + try: + result = detect_injection(text) + except Exception: + # Detector is heuristic; never let it crash the request path. + return InjectionAudit(is_suspicious=False, score=0.0, indicators=()) + + # The upstream detect_injection returns either a bool, a float + # score, or a dataclass with score + indicators. Adapt + # defensively so future upstream changes don't break us. + if isinstance(result, bool): + return InjectionAudit( + is_suspicious=result, + score=1.0 if result else 0.0, + indicators=("upstream_bool",) if result else (), + ) + if isinstance(result, (int, float)): + score = float(result) + return InjectionAudit( + is_suspicious=score >= 0.5, + score=score, + indicators=("upstream_score",) if score > 0 else (), + ) + score = float(getattr(result, "score", 0.0) or 0.0) + raw_indicators = getattr(result, "indicators", None) or () + indicators = tuple(str(i) for i in raw_indicators) + return InjectionAudit( + is_suspicious=score >= 0.5 or bool(indicators), + score=score, + indicators=indicators, + ) + + +def safe_pattern_name(name: str) -> str: + """Validate ``name`` against the safe-identifier alphabet. + + Returns the name on success; raises :class:`ValueError` with a + diagnostic message on failure. Use any time a user-supplied + pattern name is about to become part of a filesystem path or a + URL. + """ + if not name or not _SAFE_PATTERN_NAME.fullmatch(name): + raise ValueError(f"Unsafe pattern name: {name!r}. Allowed alphabet: [A-Za-z0-9_-]+.") + return name + + +def safe_path(candidate: Path | str, *, must_be_under: Path | str | None = None) -> Path: + """Resolve ``candidate`` to an absolute path + verify containment. + + If ``must_be_under`` is supplied, raises :class:`ValueError` + when the resolved path escapes that root. Use for any user- + supplied path that becomes a read/write target inside + ``~/.vstack/`` or a release-artifact directory. + """ + resolved = Path(candidate).expanduser().resolve() + if must_be_under is not None: + root = Path(must_be_under).expanduser().resolve() + try: + resolved.relative_to(root) + except ValueError as e: + raise ValueError(f"Path {resolved} escapes the required root {root}.") from e + return resolved + + +def safe_subprocess_argv(argv: Sequence[str]) -> list[str]: + """Validate an argv list before passing to ``subprocess.run``. + + Confirms every element is a string and that none contains a NUL + byte or unescaped shell metacharacters in places they don't + belong. We never use ``shell=True`` anywhere in vstack; this + layer catches the failure modes that arise when the argv list + itself has been tampered with (e.g. user-controlled tokens + flowing into the gbrain CLI invocation). + """ + out: list[str] = [] + for item in argv: + if not isinstance(item, str): + raise ValueError(f"argv element is not a string: {item!r} ({type(item).__name__})") + if "\x00" in item: + raise ValueError("argv element contains a NUL byte (denied)") + out.append(item) + return out + + +def warn_on_suspicious_inputs( + payload: dict[str, Any], *, fields: Iterable[str] | None = None +) -> list[InjectionAudit]: + """Run the injection audit across named free-text fields in ``payload``. + + Returns a list of audits, one per suspicious field. Empty list + means nothing flagged. Caller decides whether to log + continue + or to refuse the request. + """ + fields = list(fields) if fields else _COMMON_TEXT_FIELDS + audits: list[InjectionAudit] = [] + for name in fields: + value = payload.get(name) if isinstance(payload, dict) else None + if not isinstance(value, str): + continue + audit = audit_input_for_injection(value) + if audit.is_suspicious: + audits.append(audit) + return audits + + +_COMMON_TEXT_FIELDS = ( + "goal", + "task", + "outcome", + "initial_attribution", + "system_prompt", + "user_prompt", +) diff --git a/_security/tests/conftest.py b/_security/tests/conftest.py new file mode 100644 index 0000000..678fdfd --- /dev/null +++ b/_security/tests/conftest.py @@ -0,0 +1 @@ +"""Pytest configuration for the vstack security test suite.""" diff --git a/_security/tests/test_security.py b/_security/tests/test_security.py new file mode 100644 index 0000000..11c24bf --- /dev/null +++ b/_security/tests/test_security.py @@ -0,0 +1,332 @@ +"""Tests for ``vstack.security``.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +import vstack.security as security +from vstack.security._auth import ( + APIKey, + APIKeyStore, + MIN_API_KEY_LENGTH, + generate_api_key, + load_keys_from_env, + verify_api_key, +) +from vstack.security._limits import ( + DEFAULT_REQUEST_LIMITS, + RequestLimits, + RequestSizeExceeded, + enforce_trace_limits, + request_limits_from_env, +) +from vstack.security._rate_limit import ( + InMemoryRateLimiter, + RateLimitDecision, + RateLimitExceeded, +) +from vstack.security._validation import ( + audit_input_for_injection, + safe_path, + safe_pattern_name, + safe_subprocess_argv, + warn_on_suspicious_inputs, +) + + +# ---------------------------------------------------------------------- +# APIKey + APIKeyStore +# ---------------------------------------------------------------------- + + +def test_api_key_from_raw_hashes() -> None: + key = APIKey.from_raw("test", "x" * MIN_API_KEY_LENGTH) + assert key.name == "test" + assert key.key_hash != b"x" * MIN_API_KEY_LENGTH # hashed, not raw + + +def test_api_key_rejects_short_key() -> None: + with pytest.raises(ValueError): + APIKey.from_raw("test", "too-short") + + +def test_api_key_store_verify_correct_key() -> None: + raw = "a" * 30 + store = APIKeyStore(keys=[APIKey.from_raw("main", raw)]) + matched = store.verify(raw) + assert matched is not None + assert matched.name == "main" + + +def test_api_key_store_verify_wrong_key() -> None: + raw = "a" * 30 + store = APIKeyStore(keys=[APIKey.from_raw("main", raw)]) + assert store.verify("a" * 29 + "X") is None + + +def test_api_key_store_verify_none_and_empty() -> None: + store = APIKeyStore(keys=[APIKey.from_raw("main", "a" * 30)]) + assert store.verify(None) is None + assert store.verify("") is None + + +def test_load_keys_from_env_comma_separated() -> None: + raw = "key1=" + ("a" * 30) + "," + ("b" * 30) + store = load_keys_from_env({"VSTACK_API_KEYS": raw}) + assert len(store) == 2 + assert store.verify("a" * 30) is not None + assert store.verify("b" * 30) is not None + + +def test_load_keys_from_env_file(tmp_path: Path) -> None: + file_path = tmp_path / "keys" + file_path.write_text( + "# a comment\nalpha=" + ("a" * 30) + "\n\n" + ("b" * 30) + "\n", + encoding="utf-8", + ) + store = load_keys_from_env({"VSTACK_API_KEYS_FILE": str(file_path)}) + assert len(store) == 2 + assert store.verify("a" * 30) is not None + assert store.verify("b" * 30) is not None + by_name = {k.name for k in store.keys} + assert "alpha" in by_name + + +def test_load_keys_from_env_returns_empty_when_unset() -> None: + assert not load_keys_from_env({}) + + +def test_generate_api_key_is_strong() -> None: + key = generate_api_key() + assert len(key) >= MIN_API_KEY_LENGTH + # Two calls produce different keys. + assert generate_api_key() != generate_api_key() + + +def test_verify_api_key_convenience() -> None: + store = APIKeyStore(keys=[APIKey.from_raw("main", "a" * 30)]) + assert verify_api_key("a" * 30, store) is not None + assert verify_api_key("bad", store) is None + + +# ---------------------------------------------------------------------- +# RateLimiter +# ---------------------------------------------------------------------- + + +def test_rate_limiter_allows_first_n_requests() -> None: + limiter = InMemoryRateLimiter(max_requests=3, window_seconds=10.0) + decisions = [limiter.check("k") for _ in range(3)] + assert all(d.allowed for d in decisions) + assert decisions[-1].remaining == 0 + + +def test_rate_limiter_denies_over_quota() -> None: + limiter = InMemoryRateLimiter(max_requests=2, window_seconds=10.0) + limiter.check("k") + limiter.check("k") + third = limiter.check("k") + assert not third.allowed + assert third.retry_after_seconds > 0 + + +def test_rate_limiter_per_key_independence() -> None: + limiter = InMemoryRateLimiter(max_requests=1, window_seconds=10.0) + assert limiter.check("a").allowed + assert limiter.check("b").allowed + assert not limiter.check("a").allowed + + +def test_rate_limiter_window_eviction() -> None: + t = [0.0] + limiter = InMemoryRateLimiter(max_requests=1, window_seconds=1.0) + limiter._now = lambda: t[0] + assert limiter.check("k").allowed + assert not limiter.check("k").allowed + t[0] = 2.0 # 2s later — window evicted + assert limiter.check("k").allowed + + +def test_rate_limiter_reset() -> None: + limiter = InMemoryRateLimiter(max_requests=1, window_seconds=10.0) + limiter.check("k") + assert not limiter.check("k").allowed + limiter.reset("k") + assert limiter.check("k").allowed + + +def test_rate_limit_exceeded_carries_decision() -> None: + decision = RateLimitDecision(allowed=False, remaining=0, retry_after_seconds=1.5, limit=10) + err = RateLimitExceeded(decision) + assert err.decision.limit == 10 + + +# ---------------------------------------------------------------------- +# RequestLimits +# ---------------------------------------------------------------------- + + +def test_default_request_limits() -> None: + assert DEFAULT_REQUEST_LIMITS.max_trace_steps == 5_000 + assert DEFAULT_REQUEST_LIMITS.max_body_bytes >= 1_000_000 + + +def test_request_limits_from_env_overrides() -> None: + limits = request_limits_from_env( + { + "VSTACK_API_MAX_TRACE_STEPS": "10", + "VSTACK_API_MAX_BODY_BYTES": "1000", + "VSTACK_API_REQUEST_TIMEOUT": "5.5", + } + ) + assert limits.max_trace_steps == 10 + assert limits.max_body_bytes == 1000 + assert limits.request_timeout_seconds == 5.5 + + +def test_request_limits_from_env_falls_back_on_bad_int() -> None: + limits = request_limits_from_env({"VSTACK_API_MAX_TRACE_STEPS": "not-a-number"}) + assert limits.max_trace_steps == DEFAULT_REQUEST_LIMITS.max_trace_steps + + +def test_enforce_trace_limits_steps() -> None: + payload = {"steps": [{"type": "input", "content": "x"}] * 11} + with pytest.raises(RequestSizeExceeded) as exc: + enforce_trace_limits(payload, RequestLimits(max_trace_steps=10)) + assert exc.value.kind == "trace_steps" + assert exc.value.actual == 11 + + +def test_enforce_trace_limits_messages() -> None: + payload = {"messages": ["a"] * 11} + with pytest.raises(RequestSizeExceeded) as exc: + enforce_trace_limits(payload, RequestLimits(max_messages=10)) + assert exc.value.kind == "messages" + + +def test_enforce_trace_limits_string_chars() -> None: + payload = {"outcome": "x" * 11} + with pytest.raises(RequestSizeExceeded) as exc: + enforce_trace_limits(payload, RequestLimits(max_string_chars=10, max_total_chars=1000)) + assert exc.value.kind == "string_chars" + + +def test_enforce_trace_limits_total_chars() -> None: + payload = {"a": "x" * 5, "b": "y" * 6} + with pytest.raises(RequestSizeExceeded) as exc: + enforce_trace_limits(payload, RequestLimits(max_string_chars=100, max_total_chars=10)) + assert exc.value.kind == "total_chars" + + +def test_enforce_trace_limits_handles_non_dict() -> None: + # Doesn't raise; just returns. + enforce_trace_limits("not a dict", DEFAULT_REQUEST_LIMITS) + enforce_trace_limits([1, 2, 3], DEFAULT_REQUEST_LIMITS) + + +# ---------------------------------------------------------------------- +# Validation +# ---------------------------------------------------------------------- + + +def test_audit_input_for_injection_empty() -> None: + audit = audit_input_for_injection("") + assert not audit.is_suspicious + assert audit.score == 0.0 + + +def test_audit_input_for_injection_clean() -> None: + audit = audit_input_for_injection("Tell me about the agent's behavior on the trace.") + # The upstream detector is heuristic; whatever it says, we just + # verify the shape. + assert isinstance(audit.is_suspicious, bool) + assert 0.0 <= audit.score <= 1.0 + + +def test_audit_input_for_injection_non_string() -> None: + audit = audit_input_for_injection(None) # type: ignore[arg-type] + assert not audit.is_suspicious + + +def test_safe_pattern_name_accepts_valid() -> None: + assert safe_pattern_name("lewin") == "lewin" + assert safe_pattern_name("schein_culture") == "schein_culture" + + +@pytest.mark.parametrize( + "bad", + ["", "../escape", "foo/bar", "spaces here", "with;semi", "with$dollar"], +) +def test_safe_pattern_name_rejects_bad(bad: str) -> None: + with pytest.raises(ValueError): + safe_pattern_name(bad) + + +def test_safe_path_resolves(tmp_path: Path) -> None: + target = tmp_path / "sub" / "file.json" + safe = safe_path(target, must_be_under=tmp_path) + assert safe.is_absolute() + + +def test_safe_path_rejects_escape(tmp_path: Path) -> None: + other = tmp_path.parent / "elsewhere" / "file.json" + with pytest.raises(ValueError): + safe_path(other, must_be_under=tmp_path) + + +def test_safe_path_no_root_constraint(tmp_path: Path) -> None: + # Without must_be_under, just resolves — no escape check. + result = safe_path(tmp_path / "anywhere") + assert result.is_absolute() + + +def test_safe_subprocess_argv_accepts_strings() -> None: + argv = safe_subprocess_argv(["gbrain", "search", "--limit", "5", "query text"]) + assert argv == ["gbrain", "search", "--limit", "5", "query text"] + + +def test_safe_subprocess_argv_rejects_non_string() -> None: + with pytest.raises(ValueError): + safe_subprocess_argv(["gbrain", 5]) # type: ignore[list-item] + + +def test_safe_subprocess_argv_rejects_nul() -> None: + with pytest.raises(ValueError): + safe_subprocess_argv(["gbrain", "evil\x00arg"]) + + +def test_warn_on_suspicious_inputs_uses_default_fields() -> None: + # Pass a benign payload; even if the detector flags something + # heuristically, we just want the shape to be correct. + audits = warn_on_suspicious_inputs({"goal": "Refactor auth"}) + assert isinstance(audits, list) + + +def test_warn_on_suspicious_inputs_skips_non_strings() -> None: + audits = warn_on_suspicious_inputs({"goal": ["not", "a", "string"], "outcome": 42}) + # Non-string fields are ignored; result depends on detector behaviour + # but list shape is guaranteed. + assert isinstance(audits, list) + + +def test_module_exports() -> None: + for name in ( + "APIKey", + "APIKeyStore", + "load_keys_from_env", + "verify_api_key", + "InMemoryRateLimiter", + "RateLimiter", + "RateLimitExceeded", + "RequestLimits", + "DEFAULT_REQUEST_LIMITS", + "enforce_trace_limits", + "audit_input_for_injection", + "safe_pattern_name", + "safe_path", + "safe_subprocess_argv", + ): + assert name in security.__all__ + assert security.__version__ diff --git a/completions/README.md b/completions/README.md new file mode 100644 index 0000000..be4c78a --- /dev/null +++ b/completions/README.md @@ -0,0 +1,39 @@ +# Shell completions for vstack CLIs + +This directory ships completion scripts for the vstack CLI family. Install the one matching your shell: + +## Bash + +```bash +# Linux / macOS Homebrew +sudo cp completions/vstack.bash /etc/bash_completion.d/vstack +# Or in your home directory +cp completions/vstack.bash ~/.bash_completion.d/vstack +# Make sure ~/.bashrc sources ~/.bash_completion.d/* +``` + +## Zsh + +```bash +# Drop into any directory on your $fpath +mkdir -p ~/.zsh/completions +cp completions/_vstack ~/.zsh/completions/ +echo 'fpath=(~/.zsh/completions $fpath)' >> ~/.zshrc +echo 'autoload -U compinit && compinit' >> ~/.zshrc +``` + +## Fish + +```bash +mkdir -p ~/.config/fish/completions +cp completions/vstack.fish ~/.config/fish/completions/ +``` + +After install, your shell will complete: + +- The 10 top-level CLIs: `vstack`, `vstack-mcp`, `vstack-api`, `vstack-config`, `vstack-upgrade`, `vstack-learn`, `vstack-analytics`, `vstack-browser`, `vstack-gbrain`, `vstack-bench`, `vstack-doctor` +- The 34 per-pattern CLIs: `vstack-lewin`, `vstack-aar`, `vstack-schein-culture`, etc. +- Subcommands for each (e.g. `vstack-mcp ` shows `serve`, `list-tools`, `list-resources`, `config-snippet`) +- Pattern names where applicable (e.g. `vstack-config gen-platform ` shows `cursor`, `cline`, etc.) + +Reload your shell after install (`exec $SHELL`) or open a fresh terminal. diff --git a/completions/_vstack b/completions/_vstack new file mode 100644 index 0000000..76f19e1 --- /dev/null +++ b/completions/_vstack @@ -0,0 +1,146 @@ +#compdef vstack vstack-mcp vstack-api vstack-config vstack-upgrade vstack-learn vstack-analytics vstack-browser vstack-gbrain vstack-bench vstack-doctor +# Zsh completion for the vstack CLI family. +# Install: +# mkdir -p ~/.zsh/completions && cp completions/_vstack ~/.zsh/completions/ +# echo 'fpath=(~/.zsh/completions $fpath)' >> ~/.zshrc +# echo 'autoload -U compinit && compinit' >> ~/.zshrc + +_vstack-mcp() { + local -a commands + commands=( + 'serve:Start the stdio MCP server.' + 'list-tools:List the 34 registered MCP tool names.' + 'list-resources:List the 102 resource URIs.' + 'config-snippet:Print a config snippet for a target MCP client.' + ) + _arguments -C \ + '1:command:->cmd' \ + '*::arg:->args' + case $state in + cmd) _describe -t commands 'vstack-mcp command' commands ;; + args) + case $words[1] in + config-snippet) + _values 'client' 'claude-desktop' 'cursor' 'cline' 'continue' 'generic' + ;; + esac ;; + esac +} + +_vstack-api() { + local -a commands + commands=( + 'serve:Start the FastAPI HTTP server.' + 'routes:Print every registered route.' + 'openapi:Print the OpenAPI JSON spec.' + ) + _arguments -C '1:command:->cmd' + [[ $state == cmd ]] && _describe -t commands 'vstack-api command' commands +} + +_vstack-config() { + local -a commands + commands=( + 'get:Print the value of one config key.' + 'set:Set a config key.' + 'list:Print every known config key.' + 'unset:Delete one config key.' + 'path:Print the resolved ~/.vstack/ home or subpath.' + 'keys:List documented config keys + descriptions.' + 'install-skills:Copy the vstack Claude Code skills into ~/.claude/skills/vstack.' + 'gen-platform:Print a config snippet for a non-MCP-default AI client.' + ) + _arguments -C \ + '1:command:->cmd' \ + '*::arg:->args' + case $state in + cmd) _describe -t commands 'vstack-config command' commands ;; + args) + case $words[1] in + path) _values 'kind' 'home' 'baselines' 'sessions' 'analytics' 'config' ;; + gen-platform) + _values 'platform' 'claude-desktop' 'cursor' 'cline' 'continue' \ + 'roo-code' 'windsurf' 'zed' 'aider' 'goose' 'kiro' \ + 'openclaw' 'codex-cli' 'opencode' 'docker-compose' + ;; + get|set|unset) + _values 'key' 'default_mode' 'default_model' 'telemetry' \ + 'log_level' 'preferred_llm' 'api_host' 'api_port' \ + 'skills_install_path' + ;; + esac ;; + esac +} + +_vstack-learn() { + local -a commands + commands=( + 'record:Append a learning record.' + 'recall:Print recent matching records (newest first).' + 'outcome:Mark a follow-up outcome on the latest matching record.' + 'outcomes:Aggregate (pattern, intervention) -> outcomes counts.' + 'path:Print the learnings.jsonl path.' + 'clear:Delete the learnings.jsonl file.' + ) + _arguments -C '1:command:->cmd' + [[ $state == cmd ]] && _describe -t commands 'vstack-learn command' commands +} + +_vstack-analytics() { + local -a commands + commands=( + 'summary:Per-pattern / model / day usage rollups.' + 'top-costs:N most expensive calls.' + 'cost:Total estimated cost in USD.' + 'path:Print the telemetry.jsonl path.' + 'raw:Stream every event as one JSON line.' + ) + _arguments -C '1:command:->cmd' + [[ $state == cmd ]] && _describe -t commands 'vstack-analytics command' commands +} + +_vstack-browser() { + local -a commands + commands=( + 'scrape:Navigate to a dashboard URL and dump the structured trace.' + 'screenshot:Take a screenshot of any URL.' + 'tools:List upstream chrome-devtools-mcp tools.' + ) + _arguments -C '1:command:->cmd' + [[ $state == cmd ]] && _describe -t commands 'vstack-browser command' commands +} + +_vstack-gbrain() { + local -a commands + commands=( + 'status:Show whether gbrain is reachable.' + 'sync:Write the 34 pattern documents into gbrain.' + 'search:Semantic-or-keyword search over the catalogue.' + 'corpus:Dump the indexed corpus as JSON.' + ) + _arguments -C '1:command:->cmd' + [[ $state == cmd ]] && _describe -t commands 'vstack-gbrain command' commands +} + +_vstack-bench() { + local -a commands + commands=( + 'list:Show what is in the canonical suite.' + 'run:Run a benchmark suite end-to-end.' + 'compare:Run a suite across quick / standard / forensic.' + ) + _arguments -C '1:command:->cmd' + [[ $state == cmd ]] && _describe -t commands 'vstack-bench command' commands +} + +_vstack-doctor() { + _arguments \ + '--json[Emit JSON instead of pretty text]' \ + '--skip-network[Skip the PyPI upgrade check]' \ + '--only-errors[Print only ERROR-level findings]' +} + +_vstack() { + # The top-level `vstack` CLI runs the AAR generator. + _arguments '1:command:(aar bench version)' '*::arg:_files' +} diff --git a/completions/vstack.bash b/completions/vstack.bash new file mode 100644 index 0000000..fa55fb9 --- /dev/null +++ b/completions/vstack.bash @@ -0,0 +1,112 @@ +# Bash completion for the vstack CLI family. +# Install: +# sudo cp completions/vstack.bash /etc/bash_completion.d/vstack +# or: source /vstack.bash + +_vstack_mcp_completions() { + local cur prev + COMPREPLY=() + cur="${COMP_WORDS[COMP_CWORD]}" + prev="${COMP_WORDS[COMP_CWORD-1]}" + if [[ $COMP_CWORD -eq 1 ]]; then + COMPREPLY=( $(compgen -W "serve list-tools list-resources config-snippet" -- "$cur") ) + return 0 + fi + if [[ "$prev" == "config-snippet" ]]; then + COMPREPLY=( $(compgen -W "claude-desktop cursor cline continue generic" -- "$cur") ) + return 0 + fi +} + +_vstack_api_completions() { + local cur + cur="${COMP_WORDS[COMP_CWORD]}" + if [[ $COMP_CWORD -eq 1 ]]; then + COMPREPLY=( $(compgen -W "serve routes openapi" -- "$cur") ) + return 0 + fi +} + +_vstack_config_completions() { + local cur prev + cur="${COMP_WORDS[COMP_CWORD]}" + prev="${COMP_WORDS[COMP_CWORD-1]}" + if [[ $COMP_CWORD -eq 1 ]]; then + COMPREPLY=( $(compgen -W "get set list unset path keys install-skills gen-platform" -- "$cur") ) + return 0 + fi + if [[ "$prev" == "path" ]]; then + COMPREPLY=( $(compgen -W "home baselines sessions analytics config" -- "$cur") ) + return 0 + fi + if [[ "$prev" == "gen-platform" ]]; then + COMPREPLY=( $(compgen -W "claude-desktop cursor cline continue roo-code windsurf zed aider goose kiro openclaw codex-cli opencode docker-compose" -- "$cur") ) + return 0 + fi + if [[ "$prev" == "get" || "$prev" == "set" || "$prev" == "unset" ]]; then + COMPREPLY=( $(compgen -W "default_mode default_model telemetry log_level preferred_llm api_host api_port skills_install_path" -- "$cur") ) + return 0 + fi +} + +_vstack_learn_completions() { + local cur prev + cur="${COMP_WORDS[COMP_CWORD]}" + prev="${COMP_WORDS[COMP_CWORD-1]}" + if [[ $COMP_CWORD -eq 1 ]]; then + COMPREPLY=( $(compgen -W "record recall outcome outcomes path clear" -- "$cur") ) + return 0 + fi +} + +_vstack_analytics_completions() { + local cur + cur="${COMP_WORDS[COMP_CWORD]}" + if [[ $COMP_CWORD -eq 1 ]]; then + COMPREPLY=( $(compgen -W "summary top-costs cost path raw" -- "$cur") ) + return 0 + fi +} + +_vstack_browser_completions() { + local cur + cur="${COMP_WORDS[COMP_CWORD]}" + if [[ $COMP_CWORD -eq 1 ]]; then + COMPREPLY=( $(compgen -W "scrape screenshot tools" -- "$cur") ) + return 0 + fi +} + +_vstack_gbrain_completions() { + local cur + cur="${COMP_WORDS[COMP_CWORD]}" + if [[ $COMP_CWORD -eq 1 ]]; then + COMPREPLY=( $(compgen -W "status sync search corpus" -- "$cur") ) + return 0 + fi +} + +_vstack_bench_completions() { + local cur + cur="${COMP_WORDS[COMP_CWORD]}" + if [[ $COMP_CWORD -eq 1 ]]; then + COMPREPLY=( $(compgen -W "list run compare" -- "$cur") ) + return 0 + fi +} + +_vstack_doctor_completions() { + local cur + cur="${COMP_WORDS[COMP_CWORD]}" + COMPREPLY=( $(compgen -W "--json --skip-network --only-errors --help" -- "$cur") ) +} + +complete -F _vstack_mcp_completions vstack-mcp +complete -F _vstack_api_completions vstack-api +complete -F _vstack_config_completions vstack-config +complete -F _vstack_learn_completions vstack-learn +complete -F _vstack_analytics_completions vstack-analytics +complete -F _vstack_browser_completions vstack-browser +complete -F _vstack_gbrain_completions vstack-gbrain +complete -F _vstack_bench_completions vstack-bench +complete -F _vstack_doctor_completions vstack-doctor diff --git a/completions/vstack.fish b/completions/vstack.fish new file mode 100644 index 0000000..9c7dc69 --- /dev/null +++ b/completions/vstack.fish @@ -0,0 +1,45 @@ +# Fish completion for the vstack CLI family. +# Install: cp completions/vstack.fish ~/.config/fish/completions/ + +# vstack-mcp +complete -c vstack-mcp -f -n "__fish_use_subcommand" -a "serve list-tools list-resources config-snippet" +complete -c vstack-mcp -f -n "__fish_seen_subcommand_from config-snippet" \ + -a "claude-desktop cursor cline continue generic" + +# vstack-api +complete -c vstack-api -f -n "__fish_use_subcommand" -a "serve routes openapi" + +# vstack-config +complete -c vstack-config -f -n "__fish_use_subcommand" \ + -a "get set list unset path keys install-skills gen-platform" +complete -c vstack-config -f -n "__fish_seen_subcommand_from path" \ + -a "home baselines sessions analytics config" +complete -c vstack-config -f -n "__fish_seen_subcommand_from gen-platform" \ + -a "claude-desktop cursor cline continue roo-code windsurf zed aider goose kiro openclaw codex-cli opencode docker-compose" +complete -c vstack-config -f -n "__fish_seen_subcommand_from get set unset" \ + -a "default_mode default_model telemetry log_level preferred_llm api_host api_port skills_install_path" + +# vstack-learn +complete -c vstack-learn -f -n "__fish_use_subcommand" \ + -a "record recall outcome outcomes path clear" + +# vstack-analytics +complete -c vstack-analytics -f -n "__fish_use_subcommand" \ + -a "summary top-costs cost path raw" + +# vstack-browser +complete -c vstack-browser -f -n "__fish_use_subcommand" -a "scrape screenshot tools" + +# vstack-gbrain +complete -c vstack-gbrain -f -n "__fish_use_subcommand" -a "status sync search corpus" + +# vstack-bench +complete -c vstack-bench -f -n "__fish_use_subcommand" -a "list run compare" + +# vstack-doctor +complete -c vstack-doctor -l json -d "Emit JSON instead of pretty text" +complete -c vstack-doctor -l skip-network -d "Skip the PyPI upgrade check" +complete -c vstack-doctor -l only-errors -d "Print only ERROR-level findings" + +# vstack (top-level AAR CLI) +complete -c vstack -f -n "__fish_use_subcommand" -a "aar bench version" diff --git a/docs/operations/deploy.md b/docs/operations/deploy.md new file mode 100644 index 0000000..16bac8c --- /dev/null +++ b/docs/operations/deploy.md @@ -0,0 +1,264 @@ +# Production deploy guide + +This page captures everything you need to run `vstack-api` and `vstack-mcp` in production for thousands of concurrent users. Each section corresponds to one concrete decision; the defaults are safe but conservative — read once before going live, then revisit when scale demands. + +## TL;DR — minimum production checklist + +- [ ] Bind `vstack-api` to a loopback or private interface, not 0.0.0.0 directly. Front it with a reverse proxy (nginx / Caddy / Cloud Load Balancer) that terminates TLS. +- [ ] Set `VSTACK_API_KEYS` (or `VSTACK_API_KEYS_FILE`) **and** `VSTACK_API_REQUIRE_AUTH=true` before exposing anything beyond localhost. +- [ ] Set `VSTACK_API_RATE_LIMIT=100/60` (or whatever per-key quota matches your usage). +- [ ] Set `VSTACK_API_MAX_BODY_BYTES=2097152` (2 MiB) unless your traces genuinely exceed this — defaults to 5 MiB which is fine but tighter is safer. +- [ ] Set `VSTACK_CACHE=memory` for the cost win when the same trace is replayed across patterns / modes. +- [ ] Configure `ANTHROPIC_API_KEY` (or `OPENAI_API_KEY` / `OLLAMA_HOST`). +- [ ] Scrape `/metrics` into Prometheus; alert on `vstack_requests_total{status!="ok"}` and the `vstack_request_duration_seconds` p99. +- [ ] Mount a persistent volume for `~/.vstack/` if you want baselines / learnings / telemetry across restarts. +- [ ] Run `vstack-doctor --skip-network` in your container build to catch misconfiguration before deploy. + +## Recommended deploy shapes + +### Shape A — single container behind a reverse proxy + +Best for low-volume, single-tenant production. The Docker image ships everything. + +```bash +docker run -d --restart unless-stopped \ + -p 127.0.0.1:8000:8000 \ + -e ANTHROPIC_API_KEY="sk-ant-..." \ + -e VSTACK_API_REQUIRE_AUTH=true \ + -e VSTACK_API_KEYS="prod=$(openssl rand -hex 24)" \ + -e VSTACK_API_RATE_LIMIT=100/60 \ + -e VSTACK_CACHE=memory \ + -e VSTACK_HOME=/var/lib/vstack \ + -v vstack-data:/var/lib/vstack \ + ghcr.io/valani9/vstack:0.6.0 \ + vstack-api serve --host 0.0.0.0 --port 8000 +``` + +Front with nginx terminating TLS: + +```nginx +server { + listen 443 ssl http2; + server_name vstack.example.com; + ssl_certificate /etc/letsencrypt/live/vstack.example.com/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/live/vstack.example.com/privkey.pem; + + client_max_body_size 5m; + proxy_read_timeout 180s; + proxy_send_timeout 180s; + + location / { + proxy_pass http://127.0.0.1:8000; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } +} +``` + +The `X-Forwarded-For` header is what the rate limiter uses to per-IP-attribute requests that don't carry an API key (auth path covers the API-key case directly). + +### Shape B — multi-replica behind Kubernetes + +For real concurrency. The image is multi-arch (amd64 + arm64) and the API is stateless (in-memory cache, in-memory rate limiter, in-memory metrics). Scale horizontally by replica count; each replica has its own cache (small price for simplicity — switch to Redis if you outgrow it). + +Sample Deployment: + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vstack-api +spec: + replicas: 3 + selector: + matchLabels: + app: vstack-api + template: + metadata: + labels: + app: vstack-api + spec: + containers: + - name: api + image: ghcr.io/valani9/vstack:0.6.0 + command: ["vstack-api", "serve", "--host", "0.0.0.0", "--port", "8000"] + ports: [{containerPort: 8000}] + env: + - {name: VSTACK_API_REQUIRE_AUTH, value: "true"} + - {name: VSTACK_API_RATE_LIMIT, value: "200/60"} + - {name: VSTACK_API_KEYS, valueFrom: {secretKeyRef: {name: vstack-api, key: keys}}} + - {name: ANTHROPIC_API_KEY, valueFrom: {secretKeyRef: {name: anthropic, key: api-key}}} + - {name: VSTACK_CACHE, value: "memory"} + - {name: VSTACK_HOME, value: "/var/lib/vstack"} + resources: + requests: {cpu: "100m", memory: "256Mi"} + limits: {cpu: "1", memory: "1Gi"} + readinessProbe: + httpGet: {path: /readyz, port: 8000} + periodSeconds: 5 + livenessProbe: + httpGet: {path: /livez, port: 8000} + periodSeconds: 15 + volumeMounts: + - {name: home, mountPath: /var/lib/vstack} + volumes: + - name: home + emptyDir: {} # or PersistentVolumeClaim if baselines + learnings need to survive restarts +``` + +Service + Ingress are standard. `/healthz`, `/livez`, `/readyz` are wired to K8s probe semantics (liveness vs. readiness vs. startup). + +## Authentication + +The API is loopback-friendly by default — local dev needs zero auth config. The moment you expose anything past localhost, enable auth: + +```bash +# Generate a fresh strong key: +python -c "import secrets; print(secrets.token_urlsafe(24))" + +export VSTACK_API_KEYS="prod=,staging=" +export VSTACK_API_REQUIRE_AUTH=true +``` + +Or via a newline-delimited file: + +```bash +export VSTACK_API_KEYS_FILE=/etc/vstack/api-keys +cat > /etc/vstack/api-keys < +staging= +EOF +chmod 600 /etc/vstack/api-keys +``` + +Clients send the key as: + +- `Authorization: Bearer ` (preferred), or +- `X-API-Key: ` + +Wrong / missing keys get `401 Unauthorized` with `WWW-Authenticate: Bearer realm="vstack"`. + +## Rate limiting + +The in-memory sliding-window limiter is the default. Configure with: + +```bash +VSTACK_API_RATE_LIMIT="100/60" # 100 requests per 60s per API key (or per IP if no key) +VSTACK_API_RATE_LIMIT="off" # disable +``` + +When exceeded the API returns `429 Too Many Requests` with `Retry-After`, `X-RateLimit-Limit`, and `X-RateLimit-Remaining` headers. Successful requests also carry the latter two headers so clients can self-pace. + +Health endpoints (`/healthz`, `/readyz`, `/livez`, `/metrics`, `/openapi.json`) are NOT rate-limited — K8s probes hammer them continuously. + +For real multi-replica deploys with a global quota, swap the in-memory limiter for a Redis-backed one. The `RateLimiter` protocol in `vstack.security` is the swap point. + +## Request size limits + +Configure if the defaults don't match your use case: + +| Env var | Default | Purpose | +|---|---|---| +| `VSTACK_API_MAX_BODY_BYTES` | 5 MiB | Total POST body. | +| `VSTACK_API_MAX_TRACE_STEPS` | 5,000 | Max length of `steps[]` / `messages[]`. | +| `VSTACK_API_MAX_MESSAGES` | 5,000 | Max multi-agent message log size. | +| `VSTACK_API_MAX_STRING_CHARS` | 200,000 | Per-string char cap. | +| `VSTACK_API_MAX_TOTAL_CHARS` | 1,000,000 | Total free-text char count. | +| `VSTACK_API_REQUEST_TIMEOUT` | 120s | Server-side per-request deadline. | + +Tighten these aggressively if your traces are smaller than the defaults. Loose limits + a malicious client = OOM risk. + +## Caching + +Enable in-memory caching with: + +```bash +VSTACK_CACHE=memory +VSTACK_CACHE_CAPACITY=2048 +VSTACK_CACHE_TTL_SECONDS=3600 # optional; entries never expire by default +``` + +The cache key is SHA-256 of `(pattern, mode, model, trace)` canonical JSON. Two identical traces produce one analyzer run + N cache hits. Typical hit rates depend on workload — observability replays of the same trace through multiple patterns benefit; one-off analyses won't. + +In a multi-replica deploy, each replica has its own cache. For shared caching, swap `vstack.cache.NullCache` / `InMemoryLRUCache` for a Redis-backed implementation — the `CacheBackend` protocol is the swap point. + +## Observability + +### Prometheus metrics + +`GET /metrics` returns Prometheus text format. Scrape into Prometheus + chart in Grafana: + +```yaml +- job_name: vstack-api + static_configs: [{targets: ["vstack-api.svc.cluster.local:8000"]}] + metrics_path: /metrics +``` + +Metrics shipped: + +- `vstack_requests_total{surface,pattern,mode,status}` — counter +- `vstack_request_duration_seconds{surface,pattern,mode}` — histogram + +Alert suggestions: + +- p99 of `vstack_request_duration_seconds` > 30s for >5min (LLM provider degradation) +- `rate(vstack_requests_total{status="analyzer_error"}[5m]) > 0.01` (>1% error rate) +- `rate(vstack_requests_total{status="llm_resolution_error"}[5m]) > 0` (any LLM-key misconfiguration) + +### Request IDs + +Every response carries an `X-Request-ID`. Clients SHOULD propagate an inbound ID; the server generates a fresh one if absent. The ID is bound to a Python contextvar for the lifetime of the request so every log line during the request carries it. + +### Sentry (optional) + +Set `SENTRY_DSN` to enable error reporting. No-op if `sentry-sdk` isn't installed. + +```bash +pip install sentry-sdk +export SENTRY_DSN="https://...@sentry.io/..." +export SENTRY_ENVIRONMENT=production +``` + +## Graceful shutdown + +The FastAPI lifespan handler flips `/readyz` to `draining` on `SIGTERM`. K8s removes the pod from the Service's endpoints (readiness check fails), then waits for `terminationGracePeriodSeconds` (default 30s) before sending `SIGKILL`. Set it explicitly: + +```yaml +spec: + terminationGracePeriodSeconds: 30 +``` + +Run uvicorn with a matching timeout: + +```bash +vstack-api serve --workers 1 # set --workers > 1 only with a shared cache backend +``` + +## What still lives in-process + +- **Cache**: in-memory LRU. Replace with Redis for cross-replica sharing. +- **Rate limiter**: in-memory. Replace with Redis for global quotas. +- **Metrics registry**: in-process. Scrape each replica separately. +- **`~/.vstack/`**: per-replica filesystem. Mount a shared volume for cross-replica baselines / learnings. + +All four are pluggable via well-defined protocols (`CacheBackend`, `RateLimiter`, `TelemetrySink`, `LearningStore`). The in-memory defaults are the right choice for single-replica deploys; for true multi-tenancy with shared state, swap them at app-build time: + +```python +from vstack.api import build_app +from my_redis_backed_cache import RedisCache + +app = build_app(cache=RedisCache(url="redis://...")) +``` + +## Troubleshooting + +Run `vstack-doctor --skip-network` first. It checks 30+ common misconfigurations and surfaces an exact next-step hint for each. + +Common issues: + +- **`502 llm_resolution_error`** — no `ANTHROPIC_API_KEY` / `OPENAI_API_KEY` / `OLLAMA_HOST` in the container's env. +- **`500 auth_misconfigured`** — `VSTACK_API_REQUIRE_AUTH=true` but no `VSTACK_API_KEYS`. +- **`413 request_too_large`** — bump `VSTACK_API_MAX_BODY_BYTES` or split the trace. +- **`504 timeout`** — forensic-mode analysis exceeded the 120s default. Try `mode=quick` or bump `VSTACK_API_REQUEST_TIMEOUT`. +- **Docker build fails on `valanistack==X.Y.Z` not found** — wait for PyPI propagation (~10 min) or pin to a known-good earlier release. diff --git a/docs/operations/security.md b/docs/operations/security.md new file mode 100644 index 0000000..2915d4c --- /dev/null +++ b/docs/operations/security.md @@ -0,0 +1,60 @@ +# Security model + +vstack's security model has three concentric rings. + +## Ring 1 — Library code (always-on) + +These guards are unconditional. Every consumer (Python library, CLI, MCP, REST, framework adapters) goes through them. + +- **Prompt-injection detection.** Free-text fields (`task`, `goal`, `outcome`, etc.) are passed through `vstack.aar.detect_injection` before they reach an LLM prompt. Suspicious inputs are logged + heuristically flagged but not rejected — the detector is best-effort and the more important defense is the pattern's own prompt-fencing. +- **Prompt fencing.** Every analyzer wraps user-supplied trace content in `...` fences via `vstack.aar.fence` so injected instructions can't escape into the system-prompt context. +- **Pattern-name validation.** Any path that takes a user-supplied pattern name (baselines, learnings, MCP tool dispatch) runs it through `vstack.security.safe_pattern_name` which rejects anything outside `[A-Za-z0-9_-]+`. Prevents path-traversal via attacker-controlled tool / pattern names. +- **Path containment.** User-supplied paths (baseline JSON, suite JSON, install destinations) are checked with `vstack.security.safe_path(must_be_under=...)` against the configured root. +- **No `shell=True`.** Every subprocess call (gbrain, chrome-devtools-mcp) uses explicit argv lists. `vstack.security.safe_subprocess_argv` validates the argv before execution. + +## Ring 2 — REST API (configurable) + +These guards are opt-in but production-recommended. The REST API ships them off by default to preserve local-dev ergonomics; enable them when binding past loopback. + +- **API-key auth.** Set `VSTACK_API_KEYS=...` + `VSTACK_API_REQUIRE_AUTH=true`. Keys are SHA-256-hashed in memory; comparisons are constant-time (`hmac.compare_digest`). +- **Rate limiting.** Set `VSTACK_API_RATE_LIMIT="100/60"` for 100 req per 60s per API-key (or per X-Forwarded-For IP if no key). Returns `429` with `Retry-After`. +- **Request size limits.** `VSTACK_API_MAX_BODY_BYTES` / `MAX_TRACE_STEPS` / `MAX_STRING_CHARS` / `MAX_TOTAL_CHARS` enforced before the trace reaches Pydantic, so a malicious client can't OOM the server with one POST. +- **Request timeout.** `VSTACK_API_REQUEST_TIMEOUT=120` (seconds). Forensic mode of some patterns can exceed this; the server surfaces a `504 timeout` and the client can retry in quick mode. +- **Security headers.** Every response carries `X-Content-Type-Options: nosniff`, `X-Frame-Options: DENY`, `Content-Security-Policy: default-src 'none'; frame-ancestors 'none'`, `Referrer-Policy: strict-origin-when-cross-origin`. HSTS is added when served over HTTPS. +- **CORS.** Off by default. Configure with `VSTACK_API_CORS_ORIGINS=https://app.example.com,https://staging.example.com`. Credentials never sent. + +## Ring 3 — Deployment (your responsibility) + +vstack can't enforce these — they're the network + infrastructure layer above the application. + +- **TLS termination.** Use a reverse proxy (nginx / Caddy / a managed Load Balancer). Don't expose the FastAPI app directly to the public internet on cleartext HTTP. +- **Secrets management.** Never bake `ANTHROPIC_API_KEY` / `VSTACK_API_KEYS` into a Docker image. Use the deployment platform's secret store (K8s Secrets, AWS Secrets Manager, GCP Secret Manager, HashiCorp Vault). +- **Network isolation.** The MCP server (stdio) is process-local; the REST API benefits from a private subnet + a security-group allowlist. +- **Audit logging.** Pipe stdout / stderr to your logging backend. Every request carries an `X-Request-ID` for correlation. +- **Dependency hygiene.** vstack's CI runs `bandit` on every commit + `pip-audit` on every install. Run these in your own CI too. + +## Threat model + +We design for these adversaries: + +1. **Untrusted trace content.** An attacker controls the trace fields (e.g. a bug-reporter pasted malicious content into a UI that calls vstack). Defended by Ring 1 (prompt fencing + injection detection + length caps). +2. **Untrusted API caller (no key).** Someone hits the public IP of your `vstack-api`. Defended by Ring 2 (auth, rate limit, request size caps) + Ring 3 (TLS, network ACLs). +3. **Untrusted API caller (valid key, abusive volume).** A legitimate API-key holder runs traffic high enough to deny service to others. Defended by per-key rate limiting + per-request timeout. +4. **Compromised dependency.** A transitive `pip` dep gets a malicious update. Defended by `pip-audit` in CI + `bandit` on first-party code; PyPI Trusted Publisher OIDC means our releases are tied to a specific GitHub workflow run. + +We do NOT design for: + +- **Attacks against the LLM provider itself.** That's the provider's job; we surface their errors and pass through their auth. +- **Side-channel attacks on the cache layer.** Cache keys are hashes of full canonical traces; there's no useful timing oracle for an attacker without a valid API key + matching trace. +- **Adversarial machine-learning attacks against the diagnostic analyzers.** The analyzers are LLM-driven; if a sophisticated attacker is able to manipulate the LLM's output by carefully crafting the input trace, that's a property of the LLM, not vstack's code. + +## Security audit posture + +- Every commit gates on `bandit` over the first-party `lib/` dirs. +- `pip-audit` runs in CI as an informational warn-only step (transitive vulns in framework adapter dep trees can't be unilaterally patched by vstack; report-only is correct). +- The `vstack-doctor` CLI surfaces real-time misconfiguration (auth-on-without-keys is an ERROR-level finding). +- No CVEs in first-party vstack code as of v0.6.0. + +## Reporting a vulnerability + +See [SECURITY.md](https://github.com/valani9/vstack/blob/main/SECURITY.md) at the repo root. Short version: email `valani@bu.edu` with the subject "VSTACK-SECURITY"; don't open a public issue. diff --git a/mkdocs.yml b/mkdocs.yml index b5be686..0bf2695 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -77,6 +77,9 @@ nav: - Config keys: reference/config-keys.md - MCP resource URIs: reference/mcp-uris.md - REST API: reference/rest-endpoints.md + - Operations: + - Production deploy: operations/deploy.md + - Security model: operations/security.md - Recipes: - recipes/index.md - Diagnose a confidently wrong agent: recipes/confidently-wrong.md diff --git a/pyproject.toml b/pyproject.toml index 4c6d6eb..ec6fc7d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "valanistack" -version = "0.5.0" +version = "0.6.0" description = "Organizational behavior, practiced on AI agents." readme = "README.md" requires-python = ">=3.11" @@ -127,6 +127,7 @@ vstack-analytics = "vstack.analytics.cli:main" vstack-browser = "vstack.browser.cli:main" vstack-gbrain = "vstack.gbrain.cli:main" vstack-bench = "vstack.benchmarks.cli:main" +vstack-doctor = "vstack.doctor.cli:main" vstack-lewin = "vstack.lewin.cli:main" vstack-goleman = "vstack.goleman_ei.cli:main" vstack-johari = "vstack.johari.cli:main" @@ -186,6 +187,10 @@ only-include = [] "_browser/lib" = "vstack/browser" "_gbrain/lib" = "vstack/gbrain" "_benchmarks/lib" = "vstack/benchmarks" +"_security/lib" = "vstack/security" +"_cache/lib" = "vstack/cache" +"_observability/lib" = "vstack/observability" +"_doctor/lib" = "vstack/doctor" "module-2-team/30-aar-generator/lib" = "vstack/aar" "module-2-team/17-lencioni-diagnostic/lib" = "vstack/lencioni" "module-2-team/18-trust-triangle-audit/lib" = "vstack/trust_triangle" @@ -250,6 +255,10 @@ testpaths = [ "_browser", "_gbrain", "_benchmarks", + "_security", + "_cache", + "_observability", + "_doctor", ] addopts = "-ra --tb=short --strict-markers" filterwarnings = [