tscircuit · AnasSarkiz · May 29, 2026 · May 27, 2026 · May 29, 2026
diff --git a/.github/workflows/benchmark-instructions.yml b/.github/workflows/benchmark-instructions.yml
@@ -29,7 +29,7 @@ jobs:
               '```',
               '',
               'Everything after `/benchmark` is forwarded directly to `./benchmark.sh`.',
-              'The result comment shows the latest stored main-branch benchmark artifact beside the PR run; main is not rerun for each PR benchmark request.',
+              'The result comment shows the matching dataset from the latest stored main-branch benchmark artifact beside the PR run; main is not rerun for each PR benchmark request.',
               '',
               'Examples:',
               '- `/benchmark` -> hg07, all samples, core solver',

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -220,22 +220,85 @@ jobs:
       - name: Run benchmark
         env:
           BENCHMARK_ARGS_JSON: ${{ steps.parse.outputs.benchmark_args_json }}
+          GITHUB_EVENT_NAME: ${{ github.event_name }}
+          GITHUB_REF_NAME: ${{ github.ref_name }}
         run: |
           chmod +x ./benchmark.sh
           set +e
           node <<'NODE'
+          const fs = require('node:fs')
           const { spawnSync } = require('node:child_process')
           const args = JSON.parse(process.env.BENCHMARK_ARGS_JSON || '[]')
-          const renderedArgs = args.map((arg) => JSON.stringify(arg)).join(' ')
-          console.log(`Running benchmark command: ./benchmark.sh${renderedArgs ? ` ${renderedArgs}` : ''}`)
+          const isMainPush = process.env.GITHUB_EVENT_NAME === 'push' && process.env.GITHUB_REF_NAME === 'main'
+          const mainBranchDatasets = ['hg07', 'srj18']
+
+          const withoutDataset = (baseArgs) => {
+            const nextArgs = []
+            for (let index = 0; index < baseArgs.length; index += 1) {
+              if (baseArgs[index] === '--dataset') {
+                index += 1
+                continue
+              }
+              nextArgs.push(baseArgs[index])
+            }
+            return nextArgs
+          }
+
+          const withDataset = (baseArgs, datasetName) => [
+            ...withoutDataset(baseArgs),
+            '--dataset',
+            datasetName,
+          ]
+
+          const runBenchmark = (benchmarkArgs) => {
+            const renderedArgs = benchmarkArgs.map((arg) => JSON.stringify(arg)).join(' ')
+            console.log(`Running benchmark command: ./benchmark.sh${renderedArgs ? ` ${renderedArgs}` : ''}`)
+
+            const result = spawnSync('./benchmark.sh', benchmarkArgs, {
+              stdio: 'inherit',
+              env: process.env,
+            })
+
+            if (result.error) throw result.error
+            return result.status ?? 1
+          }
+
+          if (isMainPush) {
+            const reports = []
+            const textReports = []
+            let status = 0
+
+            for (const datasetName of mainBranchDatasets) {
+              status = runBenchmark(withDataset(args, datasetName))
+              if (status !== 0) break
+
+              const report = JSON.parse(fs.readFileSync('benchmark-result.json', 'utf8'))
+              const text = fs.readFileSync('benchmark-result.txt', 'utf8').trim()
+              reports.push(report)
+              textReports.push(`## Dataset: ${report.datasetName || datasetName}\n\n${text}`)
+            }
+
+            if (reports.length > 0) {
+              fs.writeFileSync(
+                'benchmark-result.json',
+                JSON.stringify(
+                  {
+                    version: 2,
+                    kind: 'benchmark-report-collection',
+                    generatedFor: 'main',
+                    reports,
+                  },
+                  null,
+                  2,
+                ),
+              )
+              fs.writeFileSync('benchmark-result.txt', `${textReports.join('\n\n---\n\n')}\n`)
+            }
 
-          const result = spawnSync('./benchmark.sh', args, {
-            stdio: 'inherit',
-            env: process.env,
-          })
+            process.exit(status)
+          }
 
-          if (result.error) throw result.error
-          process.exit(result.status ?? 1)
+          process.exit(runBenchmark(args))
           NODE
           STATUS=$?
           set -e
@@ -414,10 +477,42 @@ jobs:
               '</details>',
             ]
 
+            const getBenchmarkReports = (report) => {
+              if (Array.isArray(report?.reports)) {
+                return report.reports.filter(Boolean)
+              }
+              return report ? [report] : []
+            }
+
+            const selectMainReportForPr = (mainReport, prReport) => {
+              const reports = getBenchmarkReports(mainReport)
+              if (reports.length === 0) return null
+
+              const prDatasetName = prReport?.datasetName
+              if (!prDatasetName) return reports[0] ?? null
+
+              return reports.find((report) => report?.datasetName === prDatasetName) ?? null
+            }
+
+            const getMissingMainMessage = (mainReport, prReport) => {
+              const prDatasetName = prReport?.datasetName
+              if (!prDatasetName) return '(no main branch benchmark result available)'
+              const reports = getBenchmarkReports(mainReport)
+              if (reports.length === 0) {
+                return `(no main branch benchmark result available for dataset ${prDatasetName})`
+              }
+              const availableDatasets = reports.map((report) => report?.datasetName).filter(Boolean).join(', ')
+              return `(no cached main branch benchmark result for dataset ${prDatasetName}; cached datasets: ${availableDatasets || 'none'})`
+            }
+
             const mainText = readText('benchmark-result-main.txt')
             const prText = readText('benchmark-result-pr.txt') || readText('benchmark-result.txt')
-            const mainReport = readJson('benchmark-result-main.json')
+            const rawMainReport = readJson('benchmark-result-main.json')
             const prReport = readJson('benchmark-result-pr.json') || readJson('benchmark-result.json')
+            const mainReport = selectMainReportForPr(rawMainReport, prReport)
+            const mainFallbackText = mainReport
+              ? mainText
+              : getMissingMainMessage(rawMainReport, prReport)
             const title = jobStatus === 'success'
               ? '## Tiny Hypergraph Benchmark Results'
               : '## Tiny Hypergraph Benchmark Failed'
@@ -427,7 +522,7 @@ jobs:
               '',
               ...renderComparison(mainReport, prReport),
               '',
-              ...renderDetails('Main Branch Details', renderReport(mainReport, mainText, options)),
+              ...renderDetails('Main Branch Details', renderReport(mainReport, mainFallbackText, options)),
               '',
               ...renderDetails('PR Details', renderReport(prReport, prText, options), true),
               '',

diff --git a/README.md b/README.md
@@ -51,7 +51,6 @@ If you want to call the converter directly:
 import { convertToSerializedHyperGraph } from "lib/compat/convertToSerializedHyperGraph"
 
 const solvedGraph = convertToSerializedHyperGraph(solver)
-
 ```
 
 The converter expects the solver to be fully solved and not failed.