diff --git a/.changeset/honor-retry-after-header.md b/.changeset/honor-retry-after-header.md new file mode 100644 index 000000000..0fa2fec3a --- /dev/null +++ b/.changeset/honor-retry-after-header.md @@ -0,0 +1,5 @@ +--- +"@voltagent/core": patch +--- + +Honor the provider's `Retry-After` header on retried model calls. The retry loop in `executeWithModelFallback` previously always used local exponential backoff capped at 10 seconds, regardless of what the server asked for; this caused concurrent agents under shared 429/503 contention to converge their retry windows. The delay now uses `Retry-After` (delta-seconds or HTTP-date, RFC 7231) as a floor, keeps the exponential floor as a backpressure baseline, and caps at 5 minutes for safety. diff --git a/packages/core/src/agent/agent.spec.ts b/packages/core/src/agent/agent.spec.ts index ca7bf4ac8..8439f573f 100644 --- a/packages/core/src/agent/agent.spec.ts +++ b/packages/core/src/agent/agent.spec.ts @@ -3689,6 +3689,71 @@ Use pandas and summarize findings.`.split("\n"), } }); + it("should honor the Retry-After header on retried model calls", async () => { + const agent = new Agent({ + name: "RetryAfterAgent", + instructions: "Test", + model: mockModel as any, + maxRetries: 1, + }); + + const mockResponse = { + text: "Retry response", + content: [{ type: "text", text: "Retry response" }], + reasoning: [], + files: [], + sources: [], + toolCalls: [], + toolResults: [], + finishReason: "stop", + usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 }, + warnings: [], + request: {}, + response: { + id: "retry-after-response", + modelId: "test-model", + timestamp: new Date(), + messages: [], + }, + steps: [], + }; + + const observedDelays: number[] = []; + const originalSetTimeout = global.setTimeout; + const setTimeoutSpy = vi.spyOn(global, "setTimeout").mockImplementation((( + cb: any, + delay: any, + ...args: any[] + ) => { + if (typeof delay === "number") observedDelays.push(delay); + return originalSetTimeout(cb, 0, ...args); + }) as typeof setTimeout); + + let callCount = 0; + vi.mocked(ai.generateText).mockImplementation(async () => { + callCount += 1; + if (callCount < 2) { + const error = new Error("Rate limited"); + (error as any).isRetryable = true; + (error as any).statusCode = 429; + (error as any).responseHeaders = { "retry-after": "30" }; + throw error; + } + return mockResponse as any; + }); + + try { + const result = await agent.generateText("Test"); + + expect(result.text).toBe("Retry response"); + expect(vi.mocked(ai.generateText)).toHaveBeenCalledTimes(2); + // attemptIndex=0: exponential floor=1000ms, server hint=30000ms ⇒ delay=30000. + expect(observedDelays).toContain(30_000); + } finally { + setTimeoutSpy.mockRestore(); + } + }); + it("should handle model errors gracefully", async () => { const agent = new Agent({ name: "TestAgent", diff --git a/packages/core/src/agent/agent.ts b/packages/core/src/agent/agent.ts index 6c1a1a25b..abd863747 100644 --- a/packages/core/src/agent/agent.ts +++ b/packages/core/src/agent/agent.ts @@ -120,6 +120,7 @@ import type { ToolExecuteOptions, UsageInfo, } from "./providers/base/types"; +import { computeRetryDelayMs } from "./retry-after"; import { coerceStringifiedJsonToolArgs } from "./tool-input-coercion"; export type { AgentHooks } from "./hooks"; export type { @@ -5885,7 +5886,7 @@ export class Agent { const canRetry = retryEligible && !isLastAttempt; if (canRetry) { - const retryDelayMs = Math.min(1000 * 2 ** attemptIndex, 10000); + const retryDelayMs = computeRetryDelayMs(error, attemptIndex); logger.debug(`[Agent:${this.name}] - Model attempt failed, retrying`, { operation, modelName, diff --git a/packages/core/src/agent/retry-after.spec.ts b/packages/core/src/agent/retry-after.spec.ts new file mode 100644 index 000000000..bcdca4e00 --- /dev/null +++ b/packages/core/src/agent/retry-after.spec.ts @@ -0,0 +1,116 @@ +import { describe, expect, it } from "vitest"; +import { computeRetryDelayMs, getRetryAfterMs, parseRetryAfter } from "./retry-after"; + +const FIXED_NOW = Date.parse("2026-05-14T20:00:00Z"); + +describe("parseRetryAfter", () => { + it("returns null for missing values", () => { + expect(parseRetryAfter(undefined)).toBeNull(); + expect(parseRetryAfter(null)).toBeNull(); + expect(parseRetryAfter("")).toBeNull(); + expect(parseRetryAfter(" ")).toBeNull(); + }); + + it("parses delta-seconds form", () => { + expect(parseRetryAfter("0")).toBe(0); + expect(parseRetryAfter("1")).toBe(1000); + expect(parseRetryAfter("120")).toBe(120 * 1000); + expect(parseRetryAfter(" 30 ")).toBe(30 * 1000); + }); + + it("rejects non-integer delta-seconds forms", () => { + expect(parseRetryAfter("1.5")).toBeNull(); + expect(parseRetryAfter("10ms")).toBeNull(); + expect(parseRetryAfter("-5")).toBeNull(); + expect(parseRetryAfter("0x10")).toBeNull(); + }); + + it("parses HTTP-date form into a relative delay", () => { + const fiveSecondsLater = new Date(FIXED_NOW + 5000).toUTCString(); + expect(parseRetryAfter(fiveSecondsLater, FIXED_NOW)).toBe(5000); + }); + + it("returns 0 when the HTTP-date has already passed", () => { + const pastDate = new Date(FIXED_NOW - 60_000).toUTCString(); + expect(parseRetryAfter(pastDate, FIXED_NOW)).toBe(0); + }); + + it("returns null for malformed HTTP-date strings", () => { + expect(parseRetryAfter("Definitely not a date")).toBeNull(); + expect(parseRetryAfter("Fri, 99 Foo 9999 99:99:99 GMT")).toBeNull(); + }); + + it("clamps very large delta-seconds to the 5-minute safety cap", () => { + expect(parseRetryAfter("3600")).toBe(5 * 60 * 1000); + expect(parseRetryAfter("999999")).toBe(5 * 60 * 1000); + }); + + it("clamps very far-future HTTP-dates to the 5-minute safety cap", () => { + const farFuture = new Date(FIXED_NOW + 60 * 60 * 1000).toUTCString(); + expect(parseRetryAfter(farFuture, FIXED_NOW)).toBe(5 * 60 * 1000); + }); +}); + +describe("getRetryAfterMs", () => { + it("reads lowercased header from responseHeaders", () => { + const err = { responseHeaders: { "retry-after": "10" } }; + expect(getRetryAfterMs(err)).toBe(10_000); + }); + + it("accepts the canonical-case spelling too", () => { + const err = { responseHeaders: { "Retry-After": "10" } }; + expect(getRetryAfterMs(err)).toBe(10_000); + }); + + it("prefers lowercase over canonical when both are present", () => { + const err = { responseHeaders: { "retry-after": "5", "Retry-After": "999" } }; + expect(getRetryAfterMs(err)).toBe(5_000); + }); + + it("returns null when the header is absent", () => { + expect(getRetryAfterMs({ responseHeaders: {} })).toBeNull(); + expect(getRetryAfterMs({ responseHeaders: { "x-foo": "bar" } })).toBeNull(); + }); + + it("returns null when there are no response headers at all", () => { + expect(getRetryAfterMs({})).toBeNull(); + expect(getRetryAfterMs(null)).toBeNull(); + expect(getRetryAfterMs(undefined)).toBeNull(); + expect(getRetryAfterMs(new Error("plain"))).toBeNull(); + }); +}); + +describe("computeRetryDelayMs", () => { + it("falls back to exponential when no Retry-After is provided", () => { + const err = new Error("transient"); + expect(computeRetryDelayMs(err, 0)).toBe(1000); + expect(computeRetryDelayMs(err, 1)).toBe(2000); + expect(computeRetryDelayMs(err, 2)).toBe(4000); + expect(computeRetryDelayMs(err, 3)).toBe(8000); + expect(computeRetryDelayMs(err, 4)).toBe(10_000); + expect(computeRetryDelayMs(err, 10)).toBe(10_000); + }); + + it("uses the server's Retry-After as a floor when it exceeds the exponential floor", () => { + const err = { responseHeaders: { "retry-after": "30" } }; + expect(computeRetryDelayMs(err, 0)).toBe(30_000); + expect(computeRetryDelayMs(err, 4)).toBe(30_000); + }); + + it("keeps the exponential floor when Retry-After is shorter", () => { + const err = { responseHeaders: { "retry-after": "0" } }; + expect(computeRetryDelayMs(err, 0)).toBe(1000); + expect(computeRetryDelayMs(err, 3)).toBe(8000); + }); + + it("honors HTTP-date Retry-After values", () => { + const tenSecondsLater = new Date(FIXED_NOW + 10_000).toUTCString(); + const err = { responseHeaders: { "retry-after": tenSecondsLater } }; + expect(computeRetryDelayMs(err, 0, FIXED_NOW)).toBe(10_000); + }); + + it("respects the 5-minute safety cap even when the server asks for longer", () => { + const err = { responseHeaders: { "retry-after": "999999" } }; + expect(computeRetryDelayMs(err, 0)).toBe(5 * 60 * 1000); + }); +}); diff --git a/packages/core/src/agent/retry-after.ts b/packages/core/src/agent/retry-after.ts new file mode 100644 index 000000000..30eaac71a --- /dev/null +++ b/packages/core/src/agent/retry-after.ts @@ -0,0 +1,106 @@ +/** + * Cap how long we'll honor a server-supplied `Retry-After` header. + * A misconfigured or hostile server can otherwise pin an agent for hours. + */ +const MAX_RETRY_AFTER_MS = 5 * 60 * 1000; + +/** + * Parse an HTTP `Retry-After` header value (RFC 7231 §7.1.3) into milliseconds. + * + * Accepts the two RFC-defined forms: + * - delta-seconds: a non-negative integer (e.g. `Retry-After: 120`) + * - HTTP-date: a fixed-form HTTP date (e.g. `Retry-After: Fri, 31 Dec 1999 23:59:59 GMT`) + * + * Returns `null` when the value is absent, empty, malformed, or negative. + * The result is clamped to {@link MAX_RETRY_AFTER_MS}. + * + * @param value The raw header value, or `undefined`/`null` when absent. + * @param nowMs Current time in milliseconds, injected for tests. Defaults to `Date.now()`. + */ +export function parseRetryAfter( + value: string | undefined | null, + nowMs: number = Date.now(), +): number | null { + if (value == null) { + return null; + } + + const trimmed = value.trim(); + if (trimmed === "") { + return null; + } + + if (/^\d+$/.test(trimmed)) { + const seconds = Number.parseInt(trimmed, 10); + if (!Number.isFinite(seconds) || seconds < 0) { + return null; + } + return Math.min(seconds * 1000, MAX_RETRY_AFTER_MS); + } + + // HTTP-date form mandates a day-name and month-name (RFC 7231 §7.1.1.1), + // so an HTTP-date always contains ASCII letters. Reject numeric-looking + // values like "1.5", "10ms", or "-5" before falling into `Date.parse`, + // which is permissive enough to coerce some of them into past dates. + if (!/[A-Za-z]/.test(trimmed)) { + return null; + } + + const dateMs = Date.parse(trimmed); + if (Number.isNaN(dateMs)) { + return null; + } + + const delta = dateMs - nowMs; + if (delta <= 0) { + return 0; + } + return Math.min(delta, MAX_RETRY_AFTER_MS); +} + +/** + * Read the `Retry-After` header off an error's `responseHeaders` bag and return + * its parsed value in milliseconds, or `null` when absent. + * + * AI SDK populates `responseHeaders` on `APICallError` and similar provider errors + * with lowercased header names. Both lowercase and canonical-case forms are + * accepted to stay robust against providers that don't normalize. + */ +export function getRetryAfterMs(error: unknown, nowMs: number = Date.now()): number | null { + const headers = (error as { responseHeaders?: Record } | undefined) + ?.responseHeaders; + if (!headers || typeof headers !== "object") { + return null; + } + const raw = headers["retry-after"] ?? headers["Retry-After"]; + return parseRetryAfter(raw, nowMs); +} + +/** + * Compute the wait between two retry attempts. + * + * When the provider supplies a `Retry-After` header (typical on 429 and 503), + * use it as the floor — the server has just told us the earliest moment it's + * willing to serve another request, and ignoring that signal causes + * coordinated retry-storms across concurrent agents. + * + * In every case we keep the exponential floor as a backpressure baseline so a + * `Retry-After: 0` (or an absent header on transient errors) still spaces + * subsequent attempts out. + * + * @param error The error thrown by the model invocation. + * @param attemptIndex Zero-based retry attempt index. + * @param nowMs Current time in ms, injected for tests. + */ +export function computeRetryDelayMs( + error: unknown, + attemptIndex: number, + nowMs: number = Date.now(), +): number { + const exponentialMs = Math.min(1000 * 2 ** attemptIndex, 10000); + const serverHintMs = getRetryAfterMs(error, nowMs); + if (serverHintMs == null) { + return exponentialMs; + } + return Math.max(serverHintMs, exponentialMs); +}