From 7fb2bb7c7fae8a849f52673096a1486aceb621b8 Mon Sep 17 00:00:00 2001 From: Peter Wielander Date: Fri, 22 May 2026 12:28:43 +0200 Subject: [PATCH 1/2] [tests] Surface HTTP status + elapsed time in abort-fetch e2e diagnostics When abortFetchInFlightWorkflow flakes in CI, the assertion failure ("expected 'fetch' to be 'timeout'") gave us no signal as to why httpbin.org/delay/30 returned early. The step now also reports `status` and `elapsedMs`, and the test passes the full returnValue JSON as the assertion message so the next flake reveals the response status and timing. Same treatment for abortVoidSleepTimeoutWorkflow which uses the same step + URL and flakes for the same upstream reason. Co-Authored-By: Claude Opus 4.7 (1M context) --- .changeset/upset-ghosts-rush.md | 4 ++++ packages/core/e2e/e2e.test.ts | 19 ++++++++++++++----- workbench/example/workflows/99_e2e.ts | 24 +++++++++++++++++++++--- 3 files changed, 39 insertions(+), 8 deletions(-) create mode 100644 .changeset/upset-ghosts-rush.md diff --git a/.changeset/upset-ghosts-rush.md b/.changeset/upset-ghosts-rush.md new file mode 100644 index 0000000000..864621d664 --- /dev/null +++ b/.changeset/upset-ghosts-rush.md @@ -0,0 +1,4 @@ +--- +--- + +chore(tests): surface HTTP status and elapsed time in abort-fetch e2e diagnostics so flaky failures of `abortFetchInFlightWorkflow` and `abortVoidSleepTimeoutWorkflow` reveal why the slow upstream returned early. diff --git a/packages/core/e2e/e2e.test.ts b/packages/core/e2e/e2e.test.ts index 23021b4107..5f00f28ae1 100644 --- a/packages/core/e2e/e2e.test.ts +++ b/packages/core/e2e/e2e.test.ts @@ -3016,11 +3016,16 @@ describe('e2e', () => { const run = await start(await e2e('abortFetchInFlightWorkflow'), []); const returnValue = await run.returnValue; - expect(returnValue.winner).toBe('timeout'); + // Include the full returnValue (status + elapsedMs from the step) in + // the assertion message so a flaky failure surfaces *why* fetch won + // the race — e.g. httpbin returning a 5xx in <1s — instead of just + // "expected 'fetch' to be 'timeout'". + const summary = JSON.stringify(returnValue); + expect(returnValue.winner, summary).toBe('timeout'); // The step's catch path returned aborted=true (fetch threw AbortError), // not the natural-completion path (which would set ok=true,aborted=false). - expect(returnValue.fetchResult.aborted).toBe(true); - expect(returnValue.fetchResult.ok).toBe(false); + expect(returnValue.fetchResult.aborted, summary).toBe(true); + expect(returnValue.fetchResult.ok, summary).toBe(false); } ); @@ -3041,8 +3046,12 @@ describe('e2e', () => { const run = await start(await e2e('abortVoidSleepTimeoutWorkflow'), []); const returnValue = await run.returnValue; - expect(returnValue.aborted).toBe(true); - expect(returnValue.ok).toBe(false); + // Same diagnostic treatment as abortFetchInFlightWorkflow: when the + // slow upstream returns early the step result includes status and + // elapsedMs, which are what we'll need to triage the next flake. + const summary = JSON.stringify(returnValue); + expect(returnValue.aborted, summary).toBe(true); + expect(returnValue.ok, summary).toBe(false); } ); diff --git a/workbench/example/workflows/99_e2e.ts b/workbench/example/workflows/99_e2e.ts index af4d52af0f..ac92e6f73e 100644 --- a/workbench/example/workflows/99_e2e.ts +++ b/workbench/example/workflows/99_e2e.ts @@ -1727,18 +1727,36 @@ async function abortFromStep( /** * Step that uses fetch with an AbortSignal. * Uses a URL that intentionally delays, so the abort cancels it. + * + * Also reports `status` and `elapsedMs` to make flaky failures of the + * abort-fetch tests actionable: when the upstream slow endpoint (e.g. + * httpbin.org/delay/N) returns early — sometimes in well under a second + * from GitHub Actions runners — the diagnostic surfaces the actual HTTP + * status and timing instead of leaving us guessing why the race winner + * was `fetch` instead of `timeout`. */ async function fetchWithSignal( url: string, signal: AbortSignal -): Promise<{ ok: boolean; aborted: boolean }> { +): Promise<{ + ok: boolean; + aborted: boolean; + status?: number; + elapsedMs: number; +}> { 'use step'; + const startedAt = Date.now(); try { const response = await globalThis.fetch(url, { signal }); - return { ok: response.ok, aborted: false }; + return { + ok: response.ok, + aborted: false, + status: response.status, + elapsedMs: Date.now() - startedAt, + }; } catch (err: any) { if (err.name === 'AbortError') { - return { ok: false, aborted: true }; + return { ok: false, aborted: true, elapsedMs: Date.now() - startedAt }; } throw err; } From 017645f84369a06b88696a3b127e12a33b196754 Mon Sep 17 00:00:00 2001 From: Peter Wielander Date: Fri, 22 May 2026 13:05:01 +0200 Subject: [PATCH 2/2] [tests] Fall back to httpbin only when postman-echo fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The diagnostic patch in the previous commit confirmed the abort-fetch flake: httpbin.org/delay/30 returns HTTP 502 from GH Actions runners in 56ms-1709ms intermittently — observed 4 times in a single CI run. Switch the abort-fetch e2e tests to a primary slow endpoint (postman-echo.com/delay/10) with httpbin.org/delay/10 as a fallback that only kicks in when the primary returns a 5xx or non-AbortError network failure. Both upstreams would need to be unhealthy at the same instant to flake the test, which is exponentially less likely than either failing alone. Co-Authored-By: Claude Opus 4.7 (1M context) --- workbench/example/workflows/99_e2e.ts | 97 ++++++++++++++++++--------- 1 file changed, 65 insertions(+), 32 deletions(-) diff --git a/workbench/example/workflows/99_e2e.ts b/workbench/example/workflows/99_e2e.ts index ac92e6f73e..41ed11de25 100644 --- a/workbench/example/workflows/99_e2e.ts +++ b/workbench/example/workflows/99_e2e.ts @@ -1728,40 +1728,77 @@ async function abortFromStep( * Step that uses fetch with an AbortSignal. * Uses a URL that intentionally delays, so the abort cancels it. * - * Also reports `status` and `elapsedMs` to make flaky failures of the - * abort-fetch tests actionable: when the upstream slow endpoint (e.g. - * httpbin.org/delay/N) returns early — sometimes in well under a second - * from GitHub Actions runners — the diagnostic surfaces the actual HTTP - * status and timing instead of leaving us guessing why the race winner - * was `fetch` instead of `timeout`. + * Accepts a list of URLs and tries them in order, falling back to the + * next on 5xx (or non-AbortError network failure) so a single bad upstream + * doesn't flake the abort-fetch tests. Empirically, httpbin.org returns + * 502 from GH Actions runners often enough to dominate CI flakiness; + * pairing it with a second slow endpoint gives both belt and suspenders. + * + * Reports `status`, `elapsedMs`, and the `url` that resolved so that when + * the abort-fetch tests do fail, the assertion message shows exactly what + * the upstream(s) returned instead of leaving us guessing why the race + * winner was `fetch` instead of `timeout`. */ async function fetchWithSignal( - url: string, + urls: readonly string[], signal: AbortSignal ): Promise<{ ok: boolean; aborted: boolean; status?: number; + url?: string; elapsedMs: number; + attempts: { url: string; status?: number; error?: string }[]; }> { 'use step'; const startedAt = Date.now(); - try { - const response = await globalThis.fetch(url, { signal }); - return { - ok: response.ok, - aborted: false, - status: response.status, - elapsedMs: Date.now() - startedAt, - }; - } catch (err: any) { - if (err.name === 'AbortError') { - return { ok: false, aborted: true, elapsedMs: Date.now() - startedAt }; + const attempts: { url: string; status?: number; error?: string }[] = []; + for (const url of urls) { + try { + const response = await globalThis.fetch(url, { signal }); + attempts.push({ url, status: response.status }); + if (response.ok) { + return { + ok: true, + aborted: false, + status: response.status, + url, + elapsedMs: Date.now() - startedAt, + attempts, + }; + } + // Non-2xx — fall through and try the next URL. + } catch (err: any) { + if (err.name === 'AbortError') { + attempts.push({ url, error: 'AbortError' }); + return { + ok: false, + aborted: true, + elapsedMs: Date.now() - startedAt, + attempts, + }; + } + attempts.push({ url, error: err?.message ?? String(err) }); + // Network error — fall through and try the next URL. } - throw err; } + return { + ok: false, + aborted: false, + elapsedMs: Date.now() - startedAt, + attempts, + }; } +// Slow endpoints used by the abort-fetch e2e tests. Tried in order; postman- +// echo first because httpbin.org has historically returned 502s from GH +// Actions. Both cap at /delay/10 in practice, which is comfortably longer +// than the 2s race threshold these tests use. +const SLOW_FETCH_URLS = [ + 'https://postman-echo.com/delay/10', + 'https://httpbin.org/delay/10', +] as const; + /** * E2E: Basic timeout cancellation. * Creates controller in workflow, races step vs sleep, aborts on timeout. @@ -2212,15 +2249,14 @@ export async function abortFetchInFlightWorkflow() { 'use workflow'; const controller = new AbortController(); - // httpbin.org/delay/N holds the response open for N seconds — used here - // as a slow endpoint that the abort can cancel mid-flight. Same external- - // service pattern as other e2e workflows in this file (jsonplaceholder, - // example.com). Avoids needing a per-workbench /api/delay route, which - // would only exist on the one workbench it was added to. - const fetchPromise = fetchWithSignal( - 'https://httpbin.org/delay/30', - controller.signal - ); + // SLOW_FETCH_URLS holds the response open for ~10s — used here as a slow + // endpoint that the abort can cancel mid-flight. Same external-service + // pattern as other e2e workflows in this file (jsonplaceholder, example.com). + // Avoids needing a per-workbench /api/delay route, which would only exist + // on the one workbench it was added to. The step falls back to the second + // URL only if the first returns a 5xx or non-AbortError network failure, + // so a transient outage on one upstream doesn't flake the test. + const fetchPromise = fetchWithSignal(SLOW_FETCH_URLS, controller.signal); // Race the fetch against a 2s sleep. Sleep wins; abort fires. const winner = await Promise.race([ @@ -2263,10 +2299,7 @@ export async function abortVoidSleepTimeoutWorkflow() { const controller = new AbortController(); void sleep('2s').then(() => controller.abort()); - return await fetchWithSignal( - 'https://httpbin.org/delay/30', - controller.signal - ); + return await fetchWithSignal(SLOW_FETCH_URLS, controller.signal); } /**