From 8913c1ad8a6f1c5b95a0af9db7ed8dffbe53fa16 Mon Sep 17 00:00:00 2001 From: Zax Shen Date: Mon, 27 Apr 2026 00:29:28 -0700 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20fix(ci):=20wire=20TMB=5FCLAUDE?= =?UTF-8?q?=5FTIMEOUT=3D600=20into=20l5-dogfood=20+=20release-canary?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v0.5.0-rc.2 L5 dogfood + release-canary failed on 3 flows because runs hit the default 180s claude -p timeout mid-SWE chain. The full planning + SWE chain takes ~190s locally even on a clean path. We added the TMB_CLAUDE_TIMEOUT env override in #172 and wired it through ab-scenario.yml in #177 — but missed l5-dogfood.yml and release-canary.Dockerfile. This PR fixes that gap. Failure pattern (rc.2): - 02-simple-task: 180.4s elapsed — SWE cut off before file_registry + last_verified_sha update - 10-codebase-memory-cold-start: 180.0s — same - 11-codebase-memory-verify-on-drift: 180s — same - 12-source-edit-attempt: 180s in canary, planning_complete missing (12 in L5 dogfood ALSO showed trajectory_required failure, but the underlying chain ran — that's the separate #164 / #179 stream-json refactor territory, not a timeout issue.) Cost impact: per-flow worst case rises from 180s → 600s. Most flows finish in ~200s; budget is rarely consumed in full. Total CI time expected +0-5 minutes. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/l5-dogfood.yml | 5 +++++ tests/docker/release-canary.Dockerfile | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/.github/workflows/l5-dogfood.yml b/.github/workflows/l5-dogfood.yml index 8e1a84c6..cb831076 100644 --- a/.github/workflows/l5-dogfood.yml +++ b/.github/workflows/l5-dogfood.yml @@ -53,6 +53,11 @@ jobs: env: CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} L5_KEEP_ARTIFACTS: '1' + # Default 180s is too tight for code-touching flows that spawn SWE + # (the full planning + SWE chain runs ~190s locally even on a clean + # path; v0.5.0-rc.2 had 3 flows fail because runs hit the 180s cap + # mid-SWE). 600s gives headroom without ballooning total CI time. + TMB_CLAUDE_TIMEOUT: '600' run: bash tests/dogfood/run-l5.sh - name: Upload trajectory dumps on failure diff --git a/tests/docker/release-canary.Dockerfile b/tests/docker/release-canary.Dockerfile index 5204f584..7c2f8a05 100644 --- a/tests/docker/release-canary.Dockerfile +++ b/tests/docker/release-canary.Dockerfile @@ -76,6 +76,11 @@ USER node WORKDIR /plugin ENV TMB_DEBUG_TRAJECTORY=1 ENV HOME=/home/node +# Default 180s is too tight for code-touching flows that spawn SWE +# (the full planning + SWE chain runs ~190s locally even on a clean path; +# v0.5.0-rc.2 had 3 flows fail because runs hit the 180s cap mid-SWE). +# 600s gives headroom without ballooning total CI time. +ENV TMB_CLAUDE_TIMEOUT=600 # The runner reads CLAUDE_CODE_OAUTH_TOKEN from env. BuildKit secrets are # mounted at /run/secrets/; uid=1000 makes the file readable by node.