import 'dotenv/config'
import { createGateway } from '@ai-sdk/gateway'
import { generateText, stepCountIs, tool, ToolLoopAgent } from 'ai'
import { z } from 'zod'
const args = process.argv.slice(2)
function getArg(name: string, defaultValue: string): string {
const arg = args.find((a) => a.startsWith(`--${name}=`))
return arg ? arg.split('=')[1] : defaultValue
}
const COUNT_PER_MODEL = parseInt(getArg('count', '20'))
const CONCURRENCY = parseInt(getArg('concurrency', '10'))
const MODELS = getArg(
'models',
'gpt-4.1-mini,gemini-3-flash-preview,claude-sonnet-4-6',
).split(',')
const AI_CALL_DEFAULTS = {
timeout: { stepMs: 90_000 },
maxRetries: 3,
} as const
function toGatewayModelId(modelId: string): string {
if (modelId.includes('/')) return modelId
if (modelId.startsWith('gemini-')) return `google/${modelId}`
if (modelId.startsWith('claude-')) return `anthropic/${modelId}`
if (modelId.startsWith('grok-')) return `xai/${modelId}`
return `openai/${modelId}`
}
function createGatewayModel(model: string) {
const gateway = createGateway({
apiKey: process.env.AI_GATEWAY_API_KEY,
})
return gateway(toGatewayModelId(model))
}
interface RequestResult {
index: number
model: string
mode: 'simple' | 'tool-loop'
success: boolean
durationMs: number
steps?: number
error?: string
errorName?: string
}
const dummyTools = {
get_weather: tool({
description: 'Get current weather for a city',
inputSchema: z.object({ city: z.string() }),
execute: async ({ city }) => ({
city,
temperature: Math.round(Math.random() * 30),
condition: 'sunny',
}),
}),
lookup_order: tool({
description: 'Look up an order by ID',
inputSchema: z.object({ orderId: z.string() }),
execute: async ({ orderId }) => ({
orderId,
status: 'shipped',
trackingNumber: 'TRK-' + Math.random().toString(36).slice(2, 8),
}),
}),
search_knowledge: tool({
description: 'Search the knowledge base',
inputSchema: z.object({ query: z.string() }),
execute: async ({ query }) => ({
results: [
{ title: 'FAQ: Returns', snippet: 'Return within 14 days...' },
{ title: 'FAQ: Shipping', snippet: 'Free shipping over 50€...' },
],
query,
}),
}),
}
async function fireSimpleRequest(index: number, model: string): Promise<RequestResult> {
const start = Date.now()
try {
await generateText({
model: createGatewayModel(model),
prompt: `Reply with exactly: "OK ${index}"`,
maxOutputTokens: 20,
...AI_CALL_DEFAULTS,
})
return { index, model, mode: 'simple', success: true, durationMs: Date.now() - start }
} catch (error) {
return {
index, model, mode: 'simple', success: false, durationMs: Date.now() - start,
error: (error as Error)?.message?.substring(0, 200),
errorName: (error as Error)?.name,
}
}
}
async function fireToolLoopRequest(index: number, model: string): Promise<RequestResult> {
const start = Date.now()
try {
const agent = new ToolLoopAgent({
model: createGatewayModel(model),
instructions: 'You are a helpful agent. Use the available tools to answer. You MUST call at least one tool.',
tools: dummyTools,
stopWhen: stepCountIs(5),
})
const result = await agent.generate({
messages: [{ role: 'user', content: 'What is the weather in Berlin and check order ORD-12345? Also search for return policy.' }],
...AI_CALL_DEFAULTS,
})
return { index, model, mode: 'tool-loop', success: true, durationMs: Date.now() - start, steps: result.steps?.length }
} catch (error) {
return {
index, model, mode: 'tool-loop', success: false, durationMs: Date.now() - start,
error: (error as Error)?.message?.substring(0, 200),
errorName: (error as Error)?.name,
}
}
}
async function main() {
if (!process.env.AI_GATEWAY_API_KEY) { console.error('Missing AI_GATEWAY_API_KEY'); process.exit(1) }
console.log(`\n=== AI Gateway Stress Test ===`)
console.log(`Models: ${MODELS.join(', ')} | Count: ${COUNT_PER_MODEL} | Concurrency: ${CONCURRENCY}\n`)
const allResults: RequestResult[] = []
for (const model of MODELS) {
console.log(`--- ${model} ---`)
const simpleCount = Math.ceil(COUNT_PER_MODEL / 2)
const toolLoopCount = COUNT_PER_MODEL - simpleCount
const tasks = [
...Array.from({ length: simpleCount }, (_, i) => () => fireSimpleRequest(i, model)),
...Array.from({ length: toolLoopCount }, (_, i) => () => fireToolLoopRequest(i, model)),
]
for (let i = 0; i < tasks.length; i += CONCURRENCY) {
const batch = tasks.slice(i, i + CONCURRENCY)
process.stdout.write(` Batch ${Math.floor(i / CONCURRENCY) + 1}/${Math.ceil(tasks.length / CONCURRENCY)} (${batch.length} reqs)... `)
const results = await Promise.all(batch.map((t) => t()))
const ok = results.filter((r) => r.success).length
const fails = results.filter((r) => !r.success)
console.log(`${ok}/${batch.length} OK` + (fails.length ? ` | FAIL: ${fails.map((f) => `${f.mode}#${f.index}:${f.errorName}`).join(', ')}` : ''))
allResults.push(...results)
}
}
const failures = allResults.filter((r) => !r.success)
console.log(`\nSuccess: ${allResults.length - failures.length}/${allResults.length} (${(((allResults.length - failures.length) / allResults.length) * 100).toFixed(1)}%)`)
if (failures.length > 0) {
const byError = new Map<string, number>()
for (const f of failures) byError.set(f.errorName || 'Unknown', (byError.get(f.errorName || 'Unknown') || 0) + 1)
console.log('Error breakdown:', Object.fromEntries(byError))
for (const f of failures) console.log(` FAIL ${f.mode}#${f.index} ${f.model} (${f.durationMs}ms): [${f.errorName}] ${f.error}`)
}
process.exit(failures.length > 0 ? 1 : 0)
}
main().catch((e) => { console.error('Fatal:', e); process.exit(1) })
Description
We're seeing three distinct intermittent error types from
@ai-sdk/gatewayin production, affecting all models (OpenAI, Gemini, Anthropic). These are not rate-limit or quota errors — they happen under normal production load (~50-100 concurrent AI calls) and affect bothgenerateTextandToolLoopAgenttool-loop workflows.Error types observed
1. GatewayResponseError (related to #9579)
2. GatewayTimeoutError / HeadersTimeoutError
This happens even with
timeout: { stepMs: 90_000 }(90s). The error fires in ~900ms suggesting the gateway itself is rejecting/dropping connections, not a real timeout.3. AbortError
Reproduction script
Self-contained stress test. Errors are intermittent — may need multiple runs or higher concurrency to trigger.
stress-test-gateway.ts
Workaround
We wrap every model from
createGateway()with adoGenerate/doStreamproxy that catches these transient errors and retries via a different gateway. This works but shouldn't be necessary:Environment
@ai-sdk/gateway:3.0.66ai:6.0.7822.22.0