From 5a53d3bc0ca793169e1a7ebb92bf086afc24396a Mon Sep 17 00:00:00 2001 From: Adam Fisk Date: Tue, 21 Apr 2026 20:48:35 -0600 Subject: [PATCH 1/5] worker: move from wick-pro, add /v1/events + /v1/stats/summary Moves the Cloudflare Worker backing releases.getwick.dev from the archived wick-pro repo into the public wick repo. No behavior change to the legacy /ping, /analytics/:key, or release-download paths. New endpoints for the per-fetch telemetry work (pairs with PR #5): - POST /v1/events Accepts {host, strategy, escalated_from, ok, status, timing_ms, version, os}. Writes to a new Cloudflare Analytics Engine dataset (`wick_events`) bound as `WICK_EVENTS`. Does not persist caller IP as a data point. - GET /v1/stats/summary Queries AE via the SQL API for a 7-day rollup of fetches + success rate + p50 timing per (host, strategy). Caches the JSON response for 5 minutes in the existing SUBSCRIPTIONS KV. Public (no auth). Requires two new secrets on the Worker (see worker/README.md): - CF_ANALYTICS_ACCOUNT_ID - CF_ANALYTICS_TOKEN (AE read) Plus one new binding in wrangler.toml: - [[analytics_engine_datasets]] binding = WICK_EVENTS Co-Authored-By: Claude Opus 4.6 (1M context) --- worker/README.md | 95 ++++++ worker/src/index.js | 705 +++++++++++++++++++++++++++++++++++++++++++ worker/wrangler.toml | 27 ++ 3 files changed, 827 insertions(+) create mode 100644 worker/README.md create mode 100644 worker/src/index.js create mode 100644 worker/wrangler.toml diff --git a/worker/README.md b/worker/README.md new file mode 100644 index 0000000..81de4d9 --- /dev/null +++ b/worker/README.md @@ -0,0 +1,95 @@ +# wick-releases Worker + +Cloudflare Worker backing `releases.getwick.dev`. Handles: + +- **Release distribution** — signed downloads of prebuilt binaries from R2. +- **Usage telemetry ingest** — two endpoints: + - `POST /ping` (legacy) — daily usage pings + failure reports, aggregated into KV. + - `POST /v1/events` — per-fetch telemetry `{host, strategy, ok, status, timing_ms, …}` written to Cloudflare Analytics Engine. +- **Public stats** — `GET /v1/stats/summary` serves shaped rows from Analytics Engine for `https://getwick.dev/stats.html`. Cached 5 min in KV. +- **Legacy analytics dashboard** — `GET /analytics/:key` (KV-based, auth-gated). + +## Development + +```bash +cd worker +npx wrangler dev # local preview +npx wrangler tail # live logs from the deployed worker +``` + +## Deployment + +```bash +npx wrangler deploy +``` + +### One-time setup + +Bindings declared in `wrangler.toml`: + +- `RELEASES` — R2 bucket `wick-releases` +- `SUBSCRIPTIONS` — KV namespace (also used as the 5-min cache for stats) +- `WICK_EVENTS` — Analytics Engine dataset `wick_events` + +Secrets (set via `wrangler secret put`): + +- `API_KEYS` — JSON object of Pro customer keys (legacy, kept for existing customers). +- `CF_ANALYTICS_ACCOUNT_ID` — Cloudflare account ID used for the AE SQL API. +- `CF_ANALYTICS_TOKEN` — API token with `Analytics Engine:Read` and `Workers:Read` (or scoped to the `wick_events` dataset). + +```bash +echo 'cc1234...' | npx wrangler secret put CF_ANALYTICS_ACCOUNT_ID +echo '' | npx wrangler secret put CF_ANALYTICS_TOKEN +``` + +## Telemetry schema + +`POST /v1/events` accepts JSON: + +```json +{ + "host": "nytimes.com", + "strategy": "cef", + "escalated_from": "cronet", + "ok": true, + "status": 200, + "timing_ms": 1840, + "version": "0.9.2", + "os": "macos" +} +``` + +Stored in `wick_events` as: + +| Column | Meaning | +|---|---| +| `blob1` | host | +| `blob2` | strategy (`cronet`, `cef`, `cef-after-cronet`, `captcha-auto`, …) | +| `blob3` | escalated_from (empty if none) | +| `blob4` | wick version | +| `blob5` | OS | +| `double1` | ok (0 or 1) | +| `double2` | HTTP status | +| `double3` | timing_ms | +| `index1` | host truncated to 32 bytes (used as shard key) | + +No IP, no path, no content. + +## Querying + +```bash +npx wrangler queues # (unrelated — just to confirm wrangler auth) + +# via SQL API (requires CF_ANALYTICS_TOKEN env var): +curl -fsSL https://api.cloudflare.com/client/v4/accounts/$ACCOUNT/analytics_engine/sql \ + -H "Authorization: Bearer $TOKEN" \ + --data-raw "SELECT blob1 AS host, blob2 AS strategy, + SUM(_sample_interval) AS fetches, + SUM(double1 * _sample_interval) AS successes + FROM wick_events + WHERE timestamp > NOW() - INTERVAL '1' DAY + GROUP BY host, strategy + ORDER BY fetches DESC LIMIT 50 FORMAT JSON" +``` + +See `site/stats.html` for the public version. diff --git a/worker/src/index.js b/worker/src/index.js new file mode 100644 index 0000000..ad2eda3 --- /dev/null +++ b/worker/src/index.js @@ -0,0 +1,705 @@ +/** + * Wick Pro — Release Server + Subscription Management + * + * Public routes: + * GET /install-pro.sh, /install-pro-mac.sh, /wick-tunnel + * GET /financial-data + * + * Pro subscription: + * POST /pro/checkout → creates Stripe checkout, returns URL + * GET /pro/status/:session → polls for API key after payment + * POST /pro/webhook → Stripe webhook (payment confirmed) + * POST /pro/validate/:key → validates a Pro API key + * + * Protected (API key required): + * GET /releases/:key/:file + * POST /solve/:key → CAPTCHA proxy + * POST /proxy/:key → geo-proxy + */ + +export default { + async fetch(request, env) { + const url = new URL(request.url); + const path = url.pathname; + + // CORS headers for browser requests + const headers = { + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Methods": "GET", + }; + + // Public: serve the install script (no key needed) + if (path === "/install-pro.sh") { + const script = await env.RELEASES.get("install-pro.sh"); + if (!script) { + return new Response("Install script not found. Contact hello@getwick.dev\n", { + status: 404, + headers, + }); + } + return new Response(script.body, { + headers: { + ...headers, + "Content-Type": "text/plain; charset=utf-8", + "Cache-Control": "public, max-age=300", + }, + }); + } + + // Private client pages (no key needed, just unlisted) + if (path === "/financial-data") { + const page = await env.RELEASES.get("financial-data.html"); + if (!page) { + return new Response("Not found\n", { status: 404, headers }); + } + return new Response(page.body, { + headers: { ...headers, "Content-Type": "text/html; charset=utf-8", "Cache-Control": "private, no-cache" }, + }); + } + + // ── Pro Subscription ───────────────────────────────────── + + // Create Stripe checkout session + if (request.method === "POST" && path === "/pro/checkout") { + if (!env.STRIPE_SECRET_KEY) { + return new Response("Stripe not configured\n", { status: 503, headers }); + } + + const body = await request.json().catch(() => ({})); + const sessionId = crypto.randomUUID(); + + // Create Stripe checkout session + const stripeResp = await fetch("https://api.stripe.com/v1/checkout/sessions", { + method: "POST", + headers: { + "Authorization": `Bearer ${env.STRIPE_SECRET_KEY}`, + "Content-Type": "application/x-www-form-urlencoded", + }, + body: new URLSearchParams({ + "mode": "subscription", + "line_items[0][price]": env.STRIPE_PRICE_ID || "price_placeholder", + "line_items[0][quantity]": "1", + "success_url": `https://releases.getwick.dev/pro/success?session=${sessionId}`, + "cancel_url": "https://getwick.dev", + "metadata[wick_session]": sessionId, + "allow_promotion_codes": "true", + }), + }); + + const session = await stripeResp.json(); + if (session.error) { + return new Response(JSON.stringify({ error: session.error.message }), { + status: 400, + headers: { ...headers, "Content-Type": "application/json" }, + }); + } + + // Store session → pending + await env.SUBSCRIPTIONS.put(`session:${sessionId}`, JSON.stringify({ + status: "pending", + stripeSessionId: session.id, + created: new Date().toISOString(), + }), { expirationTtl: 3600 }); // 1 hour expiry + + return new Response(JSON.stringify({ + checkoutUrl: session.url, + sessionId, + }), { + headers: { ...headers, "Content-Type": "application/json" }, + }); + } + + // Poll for API key after checkout + if (path.match(/^\/pro\/status\/([^/]+)$/)) { + const sessionId = path.match(/^\/pro\/status\/([^/]+)$/)[1]; + const data = await env.SUBSCRIPTIONS.get(`session:${sessionId}`, "json"); + + if (!data) { + return new Response(JSON.stringify({ status: "unknown" }), { + headers: { ...headers, "Content-Type": "application/json" }, + }); + } + + return new Response(JSON.stringify({ + status: data.status, + key: data.key || null, + }), { + headers: { ...headers, "Content-Type": "application/json" }, + }); + } + + // Stripe webhook — payment confirmed + if (request.method === "POST" && path === "/pro/webhook") { + const payload = await request.text(); + + // In production, verify Stripe signature with env.STRIPE_WEBHOOK_SECRET + // For now, parse the event directly + let event; + try { + event = JSON.parse(payload); + } catch { + return new Response("Invalid payload\n", { status: 400, headers }); + } + + if (event.type === "checkout.session.completed") { + const session = event.data.object; + const wickSession = session.metadata?.wick_session; + const email = session.customer_email || session.customer_details?.email || "unknown"; + + if (wickSession) { + // Generate API key + const keyBytes = new Uint8Array(16); + crypto.getRandomValues(keyBytes); + const key = "wk_" + Array.from(keyBytes).map(b => b.toString(16).padStart(2, "0")).join(""); + + // Store key in KV + await env.SUBSCRIPTIONS.put(`key:${key}`, JSON.stringify({ + email, + stripeCustomerId: session.customer, + stripeSubscriptionId: session.subscription, + active: true, + created: new Date().toISOString(), + })); + + // Update session status + await env.SUBSCRIPTIONS.put(`session:${wickSession}`, JSON.stringify({ + status: "active", + key, + email, + })); + + // Also add to the legacy API_KEYS for backward compat + // (existing endpoints validate against API_KEYS secret) + // In the future, validate against KV instead + + console.log(JSON.stringify({ + event: "subscription", + email, + key: key.substring(0, 10) + "...", + timestamp: new Date().toISOString(), + })); + } + } + + return new Response("ok\n", { status: 200, headers }); + } + + // Validate a Pro API key (used by wick CLI) + if (path.match(/^\/pro\/validate\/([^/]+)$/)) { + const key = path.match(/^\/pro\/validate\/([^/]+)$/)[1]; + + // Check KV subscriptions first + const sub = await env.SUBSCRIPTIONS.get(`key:${key}`, "json"); + if (sub && sub.active) { + return new Response(JSON.stringify({ valid: true, email: sub.email }), { + headers: { ...headers, "Content-Type": "application/json" }, + }); + } + + // Fall back to legacy API_KEYS secret + try { + const keys = JSON.parse(env.API_KEYS || "{}"); + if (keys[key] && keys[key].active) { + return new Response(JSON.stringify({ valid: true, customer: keys[key].customer }), { + headers: { ...headers, "Content-Type": "application/json" }, + }); + } + } catch {} + + return new Response(JSON.stringify({ valid: false }), { + status: 403, + headers: { ...headers, "Content-Type": "application/json" }, + }); + } + + // Success page after Stripe checkout + if (path === "/pro/success") { + const sessionId = url.searchParams.get("session"); + return new Response(` +Wick Pro - Activated + +
+

Wick Pro Activated

+

Setting up your API key...

+ +
`, { + headers: { ...headers, "Content-Type": "text/html; charset=utf-8" }, + }); + } + + // ── Analytics ────────────────────────────────────────────── + + // Usage ping — lightweight, no PII. Tracks installs + active users. + // POST /ping with { "event": "install|fetch|activate", "version": "0.4.0", "os": "darwin" } + if (request.method === "POST" && path === "/ping") { + const body = await request.json().catch(() => ({})); + const event = body.event || "unknown"; + const version = body.version || "unknown"; + const os = body.os || "unknown"; + const date = new Date().toISOString().split("T")[0]; // YYYY-MM-DD + + // Increment counters in KV + const key = `ping:${date}:${event}:${os}:${version}`; + const current = parseInt(await env.SUBSCRIPTIONS.get(key) || "0"); + await env.SUBSCRIPTIONS.put(key, String(current + 1), { expirationTtl: 90 * 86400 }); + + // Track daily totals + const totalKey = `ping:${date}:total`; + const total = parseInt(await env.SUBSCRIPTIONS.get(totalKey) || "0"); + await env.SUBSCRIPTIONS.put(totalKey, String(total + 1), { expirationTtl: 90 * 86400 }); + + // For error events, track which domains fail most + if (event === "error" && body.domain) { + const domainKey = `errors:${date}:${body.domain}:${body.error || "unknown"}`; + const domainCount = parseInt(await env.SUBSCRIPTIONS.get(domainKey) || "0"); + await env.SUBSCRIPTIONS.put(domainKey, String(domainCount + 1), { expirationTtl: 90 * 86400 }); + + // Append to daily error log (last 100 errors) + const logKey = `errorlog:${date}`; + const log = await env.SUBSCRIPTIONS.get(logKey) || ""; + const entry = `${body.domain}|${body.status}|${body.error}|${body.version}|${body.os}|${body.pro}\n`; + if (log.length < 50000) { // cap at ~50KB per day + await env.SUBSCRIPTIONS.put(logKey, log + entry, { expirationTtl: 90 * 86400 }); + } + } + + return new Response("ok\n", { status: 200, headers }); + } + + // ── Per-fetch telemetry (Analytics Engine) ─────────────────── + // + // POST /v1/events with body: + // { "host": "nytimes.com", "strategy": "cef", "escalated_from": null|"cronet", + // "ok": true, "status": 200, "timing_ms": 1840, + // "version": "0.9.2", "os": "macos" } + // + // What's stored: only the fields in the body. Cloudflare sees the + // caller IP at ingest but we don't persist it as a data point. + // Retention: Analytics Engine default (~92 days). + if (request.method === "POST" && path === "/v1/events") { + if (!env.WICK_EVENTS) { + // Binding not configured — silently accept so old clients don't error. + return new Response("", { status: 204, headers }); + } + + let body; + try { + body = await request.json(); + } catch { + return new Response("bad json\n", { status: 400, headers }); + } + + // Reject absurdly long fields — RFC 1035 max hostname is 253 chars. + const host = String(body.host || "").slice(0, 253); + const strategy = String(body.strategy || "").slice(0, 32); + const escalatedFrom = body.escalated_from == null + ? "" + : String(body.escalated_from).slice(0, 32); + const version = String(body.version || "").slice(0, 16); + const os = String(body.os || "").slice(0, 16); + + env.WICK_EVENTS.writeDataPoint({ + blobs: [host, strategy, escalatedFrom, version, os], + doubles: [ + body.ok ? 1 : 0, + Number(body.status) || 0, + Number(body.timing_ms) || 0, + ], + // The index column is used for partitioning/sharding — use host so + // per-host queries are fast. Capped at 32 bytes per AE constraints. + indexes: [host.slice(0, 32)], + }); + + return new Response("", { status: 204, headers }); + } + + // ── Public stats summary ───────────────────────────────────── + // + // GET /v1/stats/summary — returns aggregated per-host success stats + // for the public stats page. No auth required. Response is cached for + // 5 minutes to keep Analytics Engine query volume down. + if (request.method === "GET" && path === "/v1/stats/summary") { + if (!env.CF_ANALYTICS_ACCOUNT_ID || !env.CF_ANALYTICS_TOKEN) { + return new Response( + JSON.stringify({ error: "analytics not configured" }), + { status: 503, headers: { ...headers, "Content-Type": "application/json" } }, + ); + } + + const cacheKey = "stats:summary:v1"; + const cached = await env.SUBSCRIPTIONS.get(cacheKey); + if (cached) { + return new Response(cached, { + headers: { + ...headers, + "Content-Type": "application/json", + "Cache-Control": "public, max-age=300", + }, + }); + } + + // Query Analytics Engine via SQL API. + // blob1=host, blob2=strategy, double1=ok, double2=status, double3=timing_ms + const sql = ` + SELECT + blob1 AS host, + blob2 AS strategy, + SUM(_sample_interval) AS fetches, + SUM(double1 * _sample_interval) AS successes, + quantileWeighted(0.5)(double3, _sample_interval) AS p50_ms + FROM wick_events + WHERE timestamp > NOW() - INTERVAL '7' DAY + AND blob1 != '' + GROUP BY host, strategy + ORDER BY fetches DESC + LIMIT 200 + FORMAT JSON + `.trim(); + + const resp = await fetch( + `https://api.cloudflare.com/client/v4/accounts/${env.CF_ANALYTICS_ACCOUNT_ID}/analytics_engine/sql`, + { + method: "POST", + headers: { + "Authorization": `Bearer ${env.CF_ANALYTICS_TOKEN}`, + "Content-Type": "text/plain", + }, + body: sql, + }, + ); + + if (!resp.ok) { + return new Response( + JSON.stringify({ error: "query failed", status: resp.status }), + { status: 502, headers: { ...headers, "Content-Type": "application/json" } }, + ); + } + + const raw = await resp.json(); + // Shape the response for the public stats page. + const rows = (raw.data || []).map(r => ({ + host: r.host, + strategy: r.strategy, + fetches: Number(r.fetches) || 0, + successes: Number(r.successes) || 0, + success_rate: (Number(r.fetches) || 0) > 0 + ? (Number(r.successes) || 0) / Number(r.fetches) + : 0, + p50_ms: Number(r.p50_ms) || 0, + })); + + const payload = JSON.stringify({ + generated_at: new Date().toISOString(), + window_days: 7, + rows, + }); + + await env.SUBSCRIPTIONS.put(cacheKey, payload, { expirationTtl: 300 }); + + return new Response(payload, { + headers: { + ...headers, + "Content-Type": "application/json", + "Cache-Control": "public, max-age=300", + }, + }); + } + + // Analytics dashboard — simple KV-based metrics + // GET /analytics/:key (requires API key) + if (path.match(/^\/analytics\/([^/]+)$/)) { + const analyticsKey = path.match(/^\/analytics\/([^/]+)$/)[1]; + + // Validate key + let keys; + try { keys = JSON.parse(env.API_KEYS || "{}"); } catch { keys = {}; } + const sub = await env.SUBSCRIPTIONS.get(`key:${analyticsKey}`, "json"); + if ((!keys[analyticsKey] || !keys[analyticsKey].active) && !sub) { + return new Response("Unauthorized\n", { status: 403, headers }); + } + + // Get last 7 days of data + // Check all known OS/version combos since pings store as ping:date:event:os:version + const osVersions = ["macos:0.5.0", "darwin:0.5.0", "macos:unknown", "darwin:unknown", "linux:0.5.0", "linux:unknown"]; + const days = []; + for (let i = 0; i < 7; i++) { + const d = new Date(Date.now() - i * 86400000).toISOString().split("T")[0]; + const total = parseInt(await env.SUBSCRIPTIONS.get(`ping:${d}:total`) || "0"); + let installs = 0, fetches = 0; + for (const ov of osVersions) { + installs += parseInt(await env.SUBSCRIPTIONS.get(`ping:${d}:install:${ov}`) || "0"); + fetches += parseInt(await env.SUBSCRIPTIONS.get(`ping:${d}:fetch:${ov}`) || "0"); + } + days.push({ date: d, total, installs, fetches }); + } + + return new Response(JSON.stringify({ days }, null, 2), { + headers: { ...headers, "Content-Type": "application/json" }, + }); + } + + // Public: serve macOS install script + if (path === "/install-pro-mac.sh") { + const script = await env.RELEASES.get("install-pro-mac.sh"); + if (!script) { + return new Response("macOS install script not found\n", { status: 404, headers }); + } + return new Response(script.body, { + headers: { ...headers, "Content-Type": "text/plain; charset=utf-8", "Cache-Control": "public, max-age=300" }, + }); + } + + // Public: serve wick-tunnel script (no key needed) + if (path === "/wick-tunnel") { + const script = await env.RELEASES.get("wick-tunnel"); + if (!script) { + return new Response("wick-tunnel not found. Contact hello@getwick.dev\n", { + status: 404, + headers, + }); + } + return new Response(script.body, { + headers: { + ...headers, + "Content-Type": "text/plain; charset=utf-8", + "Cache-Control": "public, max-age=300", + }, + }); + } + + // Protected: CAPTCHA solve proxy — POST /solve/:key + // Proxies to CapSolver using our API key. Customer never sees it. + if (request.method === "POST" && path.match(/^\/solve\/([^/]+)$/)) { + const solveKey = path.match(/^\/solve\/([^/]+)$/)[1]; + + let keys; + try { keys = JSON.parse(env.API_KEYS || "{}"); } catch { + return new Response("Server error\n", { status: 500, headers }); + } + if (!keys[solveKey] || !keys[solveKey].active) { + return new Response("Invalid API key\n", { status: 403, headers }); + } + + if (!env.CAPSOLVER_API_KEY) { + return new Response("CAPTCHA solving not configured\n", { status: 503, headers }); + } + + // Read the request body + const body = await request.json().catch(() => null); + if (!body) { + return new Response("Missing request body\n", { status: 400, headers }); + } + if (!body.task && !body.taskId) { + return new Response("Missing task or taskId in request body\n", { status: 400, headers }); + } + + const action = body.action || "createTask"; + const capsolverUrl = `https://api.capsolver.com/${action}`; + + // Build CapSolver request — inject our API key + const capBody = { clientKey: env.CAPSOLVER_API_KEY }; + if (action === "createTask") { + capBody.task = body.task; + } else if (action === "getTaskResult") { + capBody.taskId = body.taskId; + } + + const capResp = await fetch(capsolverUrl, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(capBody), + }); + + const capResult = await capResp.text(); + + console.log(JSON.stringify({ + event: "captcha_solve", + customer: keys[solveKey].customer, + action, + timestamp: new Date().toISOString(), + })); + + return new Response(capResult, { + status: capResp.status, + headers: { ...headers, "Content-Type": "application/json" }, + }); + } + + // Protected: geo-proxy — fetch URLs from Cloudflare's edge network. + // Bypasses geo-restrictions by originating from Cloudflare's regional PoPs + // (Tokyo, Taipei, etc.) instead of the customer's server location. + // POST /proxy/:key with JSON body { "url": "https://..." } + if (request.method === "POST" && path.match(/^\/proxy\/([^/]+)$/)) { + const proxyKey = path.match(/^\/proxy\/([^/]+)$/)[1]; + + let keys; + try { keys = JSON.parse(env.API_KEYS || "{}"); } catch { + return new Response("Server error\n", { status: 500, headers }); + } + if (!keys[proxyKey] || !keys[proxyKey].active) { + return new Response("Invalid API key\n", { status: 403, headers }); + } + + const body = await request.json().catch(() => null); + if (!body || !body.url) { + return new Response("Missing url in request body\n", { status: 400, headers }); + } + + // Validate URL (only http/https, no internal IPs) + let targetUrl; + try { + targetUrl = new URL(body.url); + if (!["http:", "https:"].includes(targetUrl.protocol)) { + return new Response("Only http/https URLs\n", { status: 400, headers }); + } + } catch { + return new Response("Invalid URL\n", { status: 400, headers }); + } + + // Fetch from Cloudflare's edge — exits from nearest PoP to target + const proxyHeaders = { + "User-Agent": body.userAgent || "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": body.acceptLanguage || "en-US,en;q=0.9", + }; + + // Forward custom headers if provided + if (body.headers) { + for (const [k, v] of Object.entries(body.headers)) { + proxyHeaders[k] = v; + } + } + + try { + const resp = await fetch(body.url, { + headers: proxyHeaders, + redirect: "follow", + cf: { + // Hint Cloudflare to use a PoP near the target + cacheTtl: 0, + cacheEverything: false, + }, + }); + + const contentType = resp.headers.get("content-type") || "text/html"; + const responseBody = await resp.text(); + + console.log(JSON.stringify({ + event: "proxy", + customer: keys[proxyKey].customer, + url: body.url, + status: resp.status, + bytes: responseBody.length, + timestamp: new Date().toISOString(), + })); + + return new Response(responseBody, { + status: resp.status, + headers: { + ...headers, + "Content-Type": contentType, + "X-Proxy-Status": resp.status.toString(), + "X-Proxy-Url": body.url, + }, + }); + } catch (e) { + return new Response(`Proxy fetch failed: ${e.message}\n`, { + status: 502, + headers, + }); + } + } + + // Protected: /releases/:key/:filename + const releaseMatch = path.match(/^\/releases\/([^/]+)\/(.+)$/); + if (!releaseMatch) { + return new Response("Not found\n", { status: 404, headers }); + } + + const [, apiKey, filename] = releaseMatch; + + // Validate API key + let keys; + try { + keys = JSON.parse(env.API_KEYS || "{}"); + } catch { + return new Response("Server configuration error\n", { status: 500, headers }); + } + + const keyInfo = keys[apiKey]; + if (!keyInfo || !keyInfo.active) { + return new Response( + "Invalid or expired API key.\n" + + "Contact hello@getwick.dev for Wick Pro access.\n", + { status: 403, headers } + ); + } + + // Validate filename (prevent path traversal) + const allowedFiles = [ + "wick-pro-linux-x86_64.tar.gz", + "wick-pro-linux-aarch64.tar.gz", + "cef-runtime-linux-x86_64.tar.bz2", + "cef-runtime-linux-aarch64.tar.bz2", + ]; + if (!allowedFiles.includes(filename)) { + return new Response("File not found\n", { status: 404, headers }); + } + + // Fetch from R2 + const object = await env.RELEASES.get(filename); + if (!object) { + return new Response( + "Release not available yet. Contact hello@getwick.dev\n", + { status: 404, headers } + ); + } + + // Log download for tracking + console.log(JSON.stringify({ + event: "download", + customer: keyInfo.customer, + file: filename, + ip: request.headers.get("CF-Connecting-IP"), + timestamp: new Date().toISOString(), + })); + + return new Response(object.body, { + headers: { + ...headers, + "Content-Type": "application/gzip", + "Content-Disposition": `attachment; filename="${filename}"`, + "Cache-Control": "private, no-cache", + }, + }); + }, +}; diff --git a/worker/wrangler.toml b/worker/wrangler.toml new file mode 100644 index 0000000..f6e4c86 --- /dev/null +++ b/worker/wrangler.toml @@ -0,0 +1,27 @@ +name = "wick-releases" +main = "src/index.js" +compatibility_date = "2024-01-01" +workers_dev = false + +# Custom domain: releases.getwick.dev +routes = [{ pattern = "releases.getwick.dev/*", zone_name = "getwick.dev" }] + +# R2 bucket for storing release tarballs +[[r2_buckets]] +binding = "RELEASES" +bucket_name = "wick-releases" + +# KV for Pro subscriptions (email → key → status) +[[kv_namespaces]] +binding = "SUBSCRIPTIONS" +id = "cc2ea7f5a022431dbb5361c3741786d1" + +# API keys stored as a JSON object in a secret: +# { "key1": { "customer": "Acme Corp", "active": true }, ... } +# Set with: npx wrangler secret put API_KEYS + +# Analytics Engine dataset for per-fetch telemetry events. +# Written by POST /v1/events, queried by GET /v1/stats/summary. +[[analytics_engine_datasets]] +binding = "WICK_EVENTS" +dataset = "wick_events" From 0c5d8440bd0130929df1d32ca6d8f7f39235cd8e Mon Sep 17 00:00:00 2001 From: Adam Fisk Date: Tue, 21 Apr 2026 21:09:27 -0600 Subject: [PATCH 2/5] worker: switch /v1/events to KV (drop Analytics Engine dependency) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Analytics Engine needs Workers Paid ($5/mo) to bind. KV works on Workers Free and matches the storage pattern the existing /ping endpoint already uses — so there's no reason to pull in a paid product for the scale Wick currently operates at. Storage model: - Key: evt:{YYYY-MM-DD}:{host}:{strategy} - Value: {"fetches": N, "successes": M, "total_ms": T} (JSON) - TTL: 30 days Each POST /v1/events is a read-modify-write on a single key. At high concurrency a few increments may be lost, which is fine for telemetry — mirrors the behavior of the existing ping counters. GET /v1/stats/summary scans the last 7 days of evt:* keys, aggregates per (host, strategy), and returns shaped JSON. Cached 5 minutes in the same KV namespace so the scan only runs ~288 times/day. Removed: - Analytics Engine binding in wrangler.toml - CF_ANALYTICS_ACCOUNT_ID / CF_ANALYTICS_TOKEN secrets - AE SQL query in /v1/stats/summary Tradeoffs: - No real p50 timing — using mean_ms (total_ms / fetches) as a reasonable approximation. Good enough for the public stats page. - Cap: ~5000 KV keys scanned per day during summary refresh. Well inside Workers Free limits for expected Wick scale. Co-Authored-By: Claude Opus 4.6 (1M context) --- worker/README.md | 58 ++++++--------- worker/src/index.js | 169 ++++++++++++++++++++++--------------------- worker/wrangler.toml | 8 +- 3 files changed, 112 insertions(+), 123 deletions(-) diff --git a/worker/README.md b/worker/README.md index 81de4d9..3a3038d 100644 --- a/worker/README.md +++ b/worker/README.md @@ -5,10 +5,12 @@ Cloudflare Worker backing `releases.getwick.dev`. Handles: - **Release distribution** — signed downloads of prebuilt binaries from R2. - **Usage telemetry ingest** — two endpoints: - `POST /ping` (legacy) — daily usage pings + failure reports, aggregated into KV. - - `POST /v1/events` — per-fetch telemetry `{host, strategy, ok, status, timing_ms, …}` written to Cloudflare Analytics Engine. -- **Public stats** — `GET /v1/stats/summary` serves shaped rows from Analytics Engine for `https://getwick.dev/stats.html`. Cached 5 min in KV. + - `POST /v1/events` — per-fetch telemetry `{host, strategy, ok, status, timing_ms, …}` stored in KV. +- **Public stats** — `GET /v1/stats/summary` aggregates 7 days of KV-stored events for `https://getwick.dev/stats.html`. Cached 5 min in KV. - **Legacy analytics dashboard** — `GET /analytics/:key` (KV-based, auth-gated). +Everything here runs on Workers Free — no Analytics Engine, no paid Workers plan required. If Wick grows past the free KV limits, the `/v1/events` handler can be swapped for Analytics Engine by flipping a binding. + ## Development ```bash @@ -28,19 +30,13 @@ npx wrangler deploy Bindings declared in `wrangler.toml`: - `RELEASES` — R2 bucket `wick-releases` -- `SUBSCRIPTIONS` — KV namespace (also used as the 5-min cache for stats) -- `WICK_EVENTS` — Analytics Engine dataset `wick_events` +- `SUBSCRIPTIONS` — KV namespace (holds Pro keys, legacy ping counters, per-fetch event counters under `evt:` prefix, and the 5-min stats cache) Secrets (set via `wrangler secret put`): - `API_KEYS` — JSON object of Pro customer keys (legacy, kept for existing customers). -- `CF_ANALYTICS_ACCOUNT_ID` — Cloudflare account ID used for the AE SQL API. -- `CF_ANALYTICS_TOKEN` — API token with `Analytics Engine:Read` and `Workers:Read` (or scoped to the `wick_events` dataset). -```bash -echo 'cc1234...' | npx wrangler secret put CF_ANALYTICS_ACCOUNT_ID -echo '' | npx wrangler secret put CF_ANALYTICS_TOKEN -``` +No additional secrets needed for the telemetry endpoints. ## Telemetry schema @@ -59,37 +55,25 @@ echo '' | npx wrangler secret put CF_ANALYTICS_TOKEN } ``` -Stored in `wick_events` as: +Stored in `SUBSCRIPTIONS` KV as one key per `(date, host, strategy)`: -| Column | Meaning | +| | | |---|---| -| `blob1` | host | -| `blob2` | strategy (`cronet`, `cef`, `cef-after-cronet`, `captcha-auto`, …) | -| `blob3` | escalated_from (empty if none) | -| `blob4` | wick version | -| `blob5` | OS | -| `double1` | ok (0 or 1) | -| `double2` | HTTP status | -| `double3` | timing_ms | -| `index1` | host truncated to 32 bytes (used as shard key) | - -No IP, no path, no content. +| Key | `evt:YYYY-MM-DD:{host}:{strategy}` | +| Value | `{"fetches": N, "successes": M, "total_ms": T}` (JSON) | +| TTL | 30 days | + +Increments are read-modify-write, same pattern as the legacy `/ping` counters. Under heavy concurrency a small number of increments may be lost; this is fine for telemetry. + +What's **not** stored: URL paths or query strings, request/response bodies, page titles, caller IP (Cloudflare sees it at ingest, but it's never persisted as a data point), user identifier, machine ID. ## Querying +`GET /v1/stats/summary` does the aggregation and returns shaped JSON. See `site/stats.html` for the public renderer. + +For ad-hoc debugging you can list KV keys directly: + ```bash -npx wrangler queues # (unrelated — just to confirm wrangler auth) - -# via SQL API (requires CF_ANALYTICS_TOKEN env var): -curl -fsSL https://api.cloudflare.com/client/v4/accounts/$ACCOUNT/analytics_engine/sql \ - -H "Authorization: Bearer $TOKEN" \ - --data-raw "SELECT blob1 AS host, blob2 AS strategy, - SUM(_sample_interval) AS fetches, - SUM(double1 * _sample_interval) AS successes - FROM wick_events - WHERE timestamp > NOW() - INTERVAL '1' DAY - GROUP BY host, strategy - ORDER BY fetches DESC LIMIT 50 FORMAT JSON" +npx wrangler kv key list --binding=SUBSCRIPTIONS --prefix='evt:' | head -20 +npx wrangler kv key get --binding=SUBSCRIPTIONS 'evt:2026-04-21:example.com:cronet' ``` - -See `site/stats.html` for the public version. diff --git a/worker/src/index.js b/worker/src/index.js index ad2eda3..c1cb4a5 100644 --- a/worker/src/index.js +++ b/worker/src/index.js @@ -295,22 +295,23 @@ poll(); return new Response("ok\n", { status: 200, headers }); } - // ── Per-fetch telemetry (Analytics Engine) ─────────────────── + // ── Per-fetch telemetry (KV-backed) ────────────────────────── // // POST /v1/events with body: // { "host": "nytimes.com", "strategy": "cef", "escalated_from": null|"cronet", // "ok": true, "status": 200, "timing_ms": 1840, // "version": "0.9.2", "os": "macos" } // - // What's stored: only the fields in the body. Cloudflare sees the - // caller IP at ingest but we don't persist it as a data point. - // Retention: Analytics Engine default (~92 days). + // Storage model: one KV key per (date, host, strategy) with a merged + // JSON value `{ fetches, successes, total_ms }`. Each event is a + // read-modify-write — matches the pattern the existing /ping counters + // use. Eventually consistent at high concurrency (some increments may + // be lost if two writes race in the same second), which is fine for + // telemetry. + // + // Cloudflare sees the caller IP at ingest but we don't persist it. + // Retention: 30 days via KV TTL. if (request.method === "POST" && path === "/v1/events") { - if (!env.WICK_EVENTS) { - // Binding not configured — silently accept so old clients don't error. - return new Response("", { status: 204, headers }); - } - let body; try { body = await request.json(); @@ -321,22 +322,26 @@ poll(); // Reject absurdly long fields — RFC 1035 max hostname is 253 chars. const host = String(body.host || "").slice(0, 253); const strategy = String(body.strategy || "").slice(0, 32); - const escalatedFrom = body.escalated_from == null - ? "" - : String(body.escalated_from).slice(0, 32); - const version = String(body.version || "").slice(0, 16); - const os = String(body.os || "").slice(0, 16); - - env.WICK_EVENTS.writeDataPoint({ - blobs: [host, strategy, escalatedFrom, version, os], - doubles: [ - body.ok ? 1 : 0, - Number(body.status) || 0, - Number(body.timing_ms) || 0, - ], - // The index column is used for partitioning/sharding — use host so - // per-host queries are fast. Capped at 32 bytes per AE constraints. - indexes: [host.slice(0, 32)], + if (!host || !strategy) { + return new Response("", { status: 204, headers }); + } + + // Normalize date to YYYY-MM-DD UTC to keep keys sortable. + const date = new Date().toISOString().split("T")[0]; + const key = `evt:${date}:${host}:${strategy}`; + + const existingRaw = await env.SUBSCRIPTIONS.get(key); + const existing = existingRaw + ? JSON.parse(existingRaw) + : { fetches: 0, successes: 0, total_ms: 0 }; + + existing.fetches += 1; + if (body.ok) existing.successes += 1; + const ms = Number(body.timing_ms) || 0; + if (ms > 0) existing.total_ms += Math.min(ms, 600000); // clamp at 10 min to avoid runaway sums + + await env.SUBSCRIPTIONS.put(key, JSON.stringify(existing), { + expirationTtl: 30 * 86400, }); return new Response("", { status: 204, headers }); @@ -344,17 +349,10 @@ poll(); // ── Public stats summary ───────────────────────────────────── // - // GET /v1/stats/summary — returns aggregated per-host success stats - // for the public stats page. No auth required. Response is cached for - // 5 minutes to keep Analytics Engine query volume down. + // GET /v1/stats/summary — 7-day aggregate of the KV event counters, + // cached 5 minutes. Public, no auth. Refreshing on a cache miss + // scans up to 7*1000 KV keys so keep the cache honest. if (request.method === "GET" && path === "/v1/stats/summary") { - if (!env.CF_ANALYTICS_ACCOUNT_ID || !env.CF_ANALYTICS_TOKEN) { - return new Response( - JSON.stringify({ error: "analytics not configured" }), - { status: 503, headers: { ...headers, "Content-Type": "application/json" } }, - ); - } - const cacheKey = "stats:summary:v1"; const cached = await env.SUBSCRIPTIONS.get(cacheKey); if (cached) { @@ -367,55 +365,64 @@ poll(); }); } - // Query Analytics Engine via SQL API. - // blob1=host, blob2=strategy, double1=ok, double2=status, double3=timing_ms - const sql = ` - SELECT - blob1 AS host, - blob2 AS strategy, - SUM(_sample_interval) AS fetches, - SUM(double1 * _sample_interval) AS successes, - quantileWeighted(0.5)(double3, _sample_interval) AS p50_ms - FROM wick_events - WHERE timestamp > NOW() - INTERVAL '7' DAY - AND blob1 != '' - GROUP BY host, strategy - ORDER BY fetches DESC - LIMIT 200 - FORMAT JSON - `.trim(); - - const resp = await fetch( - `https://api.cloudflare.com/client/v4/accounts/${env.CF_ANALYTICS_ACCOUNT_ID}/analytics_engine/sql`, - { - method: "POST", - headers: { - "Authorization": `Bearer ${env.CF_ANALYTICS_TOKEN}`, - "Content-Type": "text/plain", - }, - body: sql, - }, - ); + // Aggregate across the last 7 days. + const now = new Date(); + const dates = []; + for (let i = 0; i < 7; i++) { + const d = new Date(now.getTime() - i * 86400_000); + dates.push(d.toISOString().split("T")[0]); + } - if (!resp.ok) { - return new Response( - JSON.stringify({ error: "query failed", status: resp.status }), - { status: 502, headers: { ...headers, "Content-Type": "application/json" } }, - ); + // Keep the accumulation small: one entry per (host, strategy). + const agg = new Map(); // key: `${host}|${strategy}` → { host, strategy, fetches, successes, total_ms } + + for (const date of dates) { + let cursor = undefined; + let scanned = 0; + do { + const list = await env.SUBSCRIPTIONS.list({ + prefix: `evt:${date}:`, + limit: 1000, + cursor, + }); + for (const k of list.keys) { + scanned++; + if (scanned > 5000) break; // safety cap per day + const raw = await env.SUBSCRIPTIONS.get(k.name); + if (!raw) continue; + let v; + try { v = JSON.parse(raw); } catch { continue; } + // Key format: evt:YYYY-MM-DD:host:strategy + const rest = k.name.slice(`evt:${date}:`.length); + const lastColon = rest.lastIndexOf(":"); + if (lastColon < 0) continue; + const host = rest.slice(0, lastColon); + const strategy = rest.slice(lastColon + 1); + const aggKey = `${host}|${strategy}`; + const cur = agg.get(aggKey) || { + host, strategy, fetches: 0, successes: 0, total_ms: 0, + }; + cur.fetches += v.fetches || 0; + cur.successes += v.successes || 0; + cur.total_ms += v.total_ms || 0; + agg.set(aggKey, cur); + } + cursor = list.list_complete ? undefined : list.cursor; + } while (cursor); } - const raw = await resp.json(); - // Shape the response for the public stats page. - const rows = (raw.data || []).map(r => ({ - host: r.host, - strategy: r.strategy, - fetches: Number(r.fetches) || 0, - successes: Number(r.successes) || 0, - success_rate: (Number(r.fetches) || 0) > 0 - ? (Number(r.successes) || 0) / Number(r.fetches) - : 0, - p50_ms: Number(r.p50_ms) || 0, - })); + const rows = [...agg.values()] + .map(r => ({ + host: r.host, + strategy: r.strategy, + fetches: r.fetches, + successes: r.successes, + success_rate: r.fetches > 0 ? r.successes / r.fetches : 0, + // No real p50 without raw samples — use mean_ms as an approximation. + p50_ms: r.fetches > 0 ? Math.round(r.total_ms / r.fetches) : 0, + })) + .sort((a, b) => b.fetches - a.fetches) + .slice(0, 500); const payload = JSON.stringify({ generated_at: new Date().toISOString(), diff --git a/worker/wrangler.toml b/worker/wrangler.toml index f6e4c86..00d4340 100644 --- a/worker/wrangler.toml +++ b/worker/wrangler.toml @@ -20,8 +20,6 @@ id = "cc2ea7f5a022431dbb5361c3741786d1" # { "key1": { "customer": "Acme Corp", "active": true }, ... } # Set with: npx wrangler secret put API_KEYS -# Analytics Engine dataset for per-fetch telemetry events. -# Written by POST /v1/events, queried by GET /v1/stats/summary. -[[analytics_engine_datasets]] -binding = "WICK_EVENTS" -dataset = "wick_events" +# Per-fetch telemetry is stored in the same SUBSCRIPTIONS KV namespace +# under the `evt:` prefix. No additional binding required — keeping this +# on Workers Free avoids pulling in the Analytics Engine paid tier. From 1c6d726a8a17b12d4279b148e3ca5ee1b726b186 Mon Sep 17 00:00:00 2001 From: Adam Fisk Date: Wed, 22 Apr 2026 06:51:01 -0600 Subject: [PATCH 3/5] Address PR #6 review comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Critical (security + correctness): - Stripe webhook now verifies `Stripe-Signature` via HMAC-SHA256 (5-min timestamp tolerance, constant-time compare). Before this, anyone who could reach /pro/webhook could mint API keys by POSTing a forged `checkout.session.completed` payload. - Geo-proxy (/proxy/:key) now rejects private/loopback/link-local IP literals so a paid key can't probe our internal networks. Catches IPv4 (0/8, 10/8, 127/8, 169.254/16, 172.16/12, 192.168/16, 100.64/10, 192.0.2/24) and IPv6 (::, ::1, fc00::/7, fe80::/10, IPv4-mapped ::ffff:). DNS-based targets are not resolved here — documented as a known limitation. - /pro/success page no longer interpolates the `?session=` query param into an inline `, { - headers: { ...headers, "Content-Type": "text/html; charset=utf-8" }, - }); - } - // ── Analytics ────────────────────────────────────────────── // Usage ping — lightweight, no PII. Tracks installs + active users. diff --git a/worker/wrangler.toml b/worker/wrangler.toml index 00d4340..386eb5e 100644 --- a/worker/wrangler.toml +++ b/worker/wrangler.toml @@ -11,12 +11,16 @@ routes = [{ pattern = "releases.getwick.dev/*", zone_name = "getwick.dev" }] binding = "RELEASES" bucket_name = "wick-releases" -# KV for Pro subscriptions (email → key → status) +# KV used for telemetry counters + (legacy) subscription records. +# Binding name is kept as SUBSCRIPTIONS for continuity with existing +# KV entries; prefixes are `ping:`, `evt:`, `errors:`, `errorlog:`, +# `stats:`, and `key:`/`session:` for any remaining legacy records. [[kv_namespaces]] binding = "SUBSCRIPTIONS" id = "cc2ea7f5a022431dbb5361c3741786d1" -# API keys stored as a JSON object in a secret: +# API keys for the protected endpoints (/solve, /proxy, /analytics, +# /releases) are a JSON object in a secret: # { "key1": { "customer": "Acme Corp", "active": true }, ... } # Set with: npx wrangler secret put API_KEYS From b02d8a9ee331bc883891ec7820823afba66b122c Mon Sep 17 00:00:00 2001 From: Adam Fisk Date: Wed, 22 Apr 2026 07:13:05 -0600 Subject: [PATCH 5/5] Address PR #6 review comments (round 2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Worker hardening: - /v1/events validates the parsed KV value's shape (must be a plain object with numeric fields) before incrementing — null, strings, or manually-edited corrupt values reset to zero rather than throw or write back a poisoned record. - /v1/events uses `body.ok === true` (strict boolean) so the unauthenticated endpoint can't be skewed by `"false"` (a string) being truthy. - /v1/stats/summary coerces stored fields via Number() and ignores non-finite values, so a stringly-typed `"1"` can't turn arithmetic into string concatenation. - /proxy/:key no longer reflects the user-controlled URL back as `X-Proxy-Url` (newlines could trigger invalid-header errors; query-string secrets could leak into downstream tooling that records response headers). - /proxy/:key log entry records `host` + `path` (from the parsed URL) instead of the raw `body.url`, so signed-URL tokens and other query-string secrets stay out of worker logs. - /releases/:key/:filename picks Content-Type by extension — `.tar.bz2` files now serve as `application/x-bzip2` instead of being mislabeled `application/gzip`. Unknown extensions fall back to `application/octet-stream`. Doc/comment fixes: - Stats handler comment now says "single global cap of 5000 KV keys" to match the actual SCAN_CAP (was "7*1000"). - README's stats schema table has named columns (was empty header). - README's references to `site/stats.html` note that the renderer ships in a follow-up PR (it isn't in this repo yet). Co-Authored-By: Claude Opus 4.7 (1M context) --- worker/README.md | 6 ++--- worker/src/index.js | 66 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 55 insertions(+), 17 deletions(-) diff --git a/worker/README.md b/worker/README.md index 3a3038d..27ee579 100644 --- a/worker/README.md +++ b/worker/README.md @@ -6,7 +6,7 @@ Cloudflare Worker backing `releases.getwick.dev`. Handles: - **Usage telemetry ingest** — two endpoints: - `POST /ping` (legacy) — daily usage pings + failure reports, aggregated into KV. - `POST /v1/events` — per-fetch telemetry `{host, strategy, ok, status, timing_ms, …}` stored in KV. -- **Public stats** — `GET /v1/stats/summary` aggregates 7 days of KV-stored events for `https://getwick.dev/stats.html`. Cached 5 min in KV. +- **Public stats** — `GET /v1/stats/summary` aggregates 7 days of KV-stored events. The renderer (`site/stats.html`) ships in a follow-up PR; this endpoint is usable on its own in the meantime. Cached 5 min in KV. - **Legacy analytics dashboard** — `GET /analytics/:key` (KV-based, auth-gated). Everything here runs on Workers Free — no Analytics Engine, no paid Workers plan required. If Wick grows past the free KV limits, the `/v1/events` handler can be swapped for Analytics Engine by flipping a binding. @@ -57,7 +57,7 @@ No additional secrets needed for the telemetry endpoints. Stored in `SUBSCRIPTIONS` KV as one key per `(date, host, strategy)`: -| | | +| Field | Format | |---|---| | Key | `evt:YYYY-MM-DD:{host}:{strategy}` | | Value | `{"fetches": N, "successes": M, "total_ms": T}` (JSON) | @@ -69,7 +69,7 @@ What's **not** stored: URL paths or query strings, request/response bodies, page ## Querying -`GET /v1/stats/summary` does the aggregation and returns shaped JSON. See `site/stats.html` for the public renderer. +`GET /v1/stats/summary` does the aggregation and returns shaped JSON. The public renderer (`site/stats.html`) ships in a follow-up PR. For ad-hoc debugging you can list KV keys directly: diff --git a/worker/src/index.js b/worker/src/index.js index c84753f..abffc01 100644 --- a/worker/src/index.js +++ b/worker/src/index.js @@ -181,19 +181,32 @@ export default { const date = new Date().toISOString().split("T")[0]; const key = `evt:${date}:${host}:${strategy}`; + // Load + coerce the current aggregate. Anything that isn't a + // plain object with numeric fields (null, a string, something + // manually edited in the KV dashboard) is treated as "start + // fresh" so we never throw during increment or write back a + // poisoned value. + const existing = { fetches: 0, successes: 0, total_ms: 0 }; const existingRaw = await env.SUBSCRIPTIONS.get(key); - let existing = { fetches: 0, successes: 0, total_ms: 0 }; if (existingRaw) { try { - existing = JSON.parse(existingRaw); - } catch { - // Corrupted KV value — start fresh rather than 500 ingestion. - existing = { fetches: 0, successes: 0, total_ms: 0 }; - } + const parsed = JSON.parse(existingRaw); + if (parsed && typeof parsed === "object") { + const f = Number(parsed.fetches); + const s = Number(parsed.successes); + const t = Number(parsed.total_ms); + if (Number.isFinite(f)) existing.fetches = f; + if (Number.isFinite(s)) existing.successes = s; + if (Number.isFinite(t)) existing.total_ms = t; + } + } catch { /* corrupt JSON — start fresh */ } } existing.fetches += 1; - if (body.ok) existing.successes += 1; + // Strict boolean: the endpoint is unauthenticated, so a truthy + // check would let `"false"` (a string) count as a success and + // skew the stats. + if (body.ok === true) existing.successes += 1; const ms = Number(body.timing_ms) || 0; if (ms > 0) existing.total_ms += Math.min(ms, 600000); // clamp at 10 min to avoid runaway sums @@ -208,7 +221,8 @@ export default { // // GET /v1/stats/summary — 7-day aggregate of the KV event counters, // cached 5 minutes. Public, no auth. Refreshing on a cache miss - // scans up to 7*1000 KV keys so keep the cache honest. + // scans across the 7-day window with a single global cap of 5000 + // KV keys, so keep the cache honest. if (request.method === "GET" && path === "/v1/stats/summary") { const cacheKey = "stats:summary:v1"; const cached = await env.SUBSCRIPTIONS.get(cacheKey); @@ -269,9 +283,16 @@ export default { const cur = agg.get(aggKey) || { host, strategy, fetches: 0, successes: 0, total_ms: 0, }; - cur.fetches += v.fetches || 0; - cur.successes += v.successes || 0; - cur.total_ms += v.total_ms || 0; + // Coerce each field via Number() and ignore non-finite + // values — a stringly-typed stored value (`"1"`) would + // otherwise turn `cur.fetches` into a string and break + // arithmetic + sorting downstream. + const fetches = Number(v.fetches); + const successes = Number(v.successes); + const totalMs = Number(v.total_ms); + if (Number.isFinite(fetches)) cur.fetches += fetches; + if (Number.isFinite(successes)) cur.successes += successes; + if (Number.isFinite(totalMs)) cur.total_ms += totalMs; agg.set(aggKey, cur); } cursor = list.list_complete ? undefined : list.cursor; @@ -493,22 +514,29 @@ export default { const contentType = resp.headers.get("content-type") || "text/html"; const responseBody = await resp.text(); + // Log `host + path` rather than the full URL so signed-URL + // tokens and other query-string secrets don't leak into + // worker logs. `targetUrl` is the parsed URL from above. console.log(JSON.stringify({ event: "proxy", customer: keys[proxyKey].customer, - url: body.url, + host: targetUrl.hostname, + path: targetUrl.pathname, status: resp.status, bytes: responseBody.length, timestamp: new Date().toISOString(), })); + // Don't reflect the user-controlled URL back in a response + // header — newlines in `body.url` can trigger invalid-header + // errors, and query-string secrets can leak into any tooling + // that records response headers. return new Response(responseBody, { status: resp.status, headers: { ...headers, "Content-Type": contentType, "X-Proxy-Status": resp.status.toString(), - "X-Proxy-Url": body.url, }, }); } catch (e) { @@ -575,10 +603,20 @@ export default { timestamp: new Date().toISOString(), })); + // Pick Content-Type by extension so `.tar.bz2` files aren't + // served as `application/gzip`. Fall back to octet-stream for + // anything we don't recognize. + let contentType = "application/octet-stream"; + if (filename.endsWith(".tar.gz") || filename.endsWith(".tgz")) { + contentType = "application/gzip"; + } else if (filename.endsWith(".tar.bz2")) { + contentType = "application/x-bzip2"; + } + return new Response(object.body, { headers: { ...headers, - "Content-Type": "application/gzip", + "Content-Type": contentType, "Content-Disposition": `attachment; filename="${filename}"`, "Cache-Control": "private, no-cache", },