Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/honor-retry-after-header.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@voltagent/core": patch
---

Honor the provider's `Retry-After` header on retried model calls. The retry loop in `executeWithModelFallback` previously always used local exponential backoff capped at 10 seconds, regardless of what the server asked for; this caused concurrent agents under shared 429/503 contention to converge their retry windows. The delay now uses `Retry-After` (delta-seconds or HTTP-date, RFC 7231) as a floor, keeps the exponential floor as a backpressure baseline, and caps at 5 minutes for safety.
65 changes: 65 additions & 0 deletions packages/core/src/agent/agent.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3689,6 +3689,71 @@ Use pandas and summarize findings.`.split("\n"),
}
});

it("should honor the Retry-After header on retried model calls", async () => {
const agent = new Agent({
name: "RetryAfterAgent",
instructions: "Test",
model: mockModel as any,
maxRetries: 1,
});

const mockResponse = {
text: "Retry response",
content: [{ type: "text", text: "Retry response" }],
reasoning: [],
files: [],
sources: [],
toolCalls: [],
toolResults: [],
finishReason: "stop",
usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 },
warnings: [],
request: {},
response: {
id: "retry-after-response",
modelId: "test-model",
timestamp: new Date(),
messages: [],
},
steps: [],
};

const observedDelays: number[] = [];
const originalSetTimeout = global.setTimeout;
const setTimeoutSpy = vi.spyOn(global, "setTimeout").mockImplementation(((
cb: any,
delay: any,
...args: any[]
) => {
if (typeof delay === "number") observedDelays.push(delay);
return originalSetTimeout(cb, 0, ...args);
}) as typeof setTimeout);

let callCount = 0;
vi.mocked(ai.generateText).mockImplementation(async () => {
callCount += 1;
if (callCount < 2) {
const error = new Error("Rate limited");
(error as any).isRetryable = true;
(error as any).statusCode = 429;
(error as any).responseHeaders = { "retry-after": "30" };
throw error;
}
return mockResponse as any;
});

try {
const result = await agent.generateText("Test");

expect(result.text).toBe("Retry response");
expect(vi.mocked(ai.generateText)).toHaveBeenCalledTimes(2);
// attemptIndex=0: exponential floor=1000ms, server hint=30000ms ⇒ delay=30000.
expect(observedDelays).toContain(30_000);
} finally {
setTimeoutSpy.mockRestore();
}
});

it("should handle model errors gracefully", async () => {
const agent = new Agent({
name: "TestAgent",
Expand Down
3 changes: 2 additions & 1 deletion packages/core/src/agent/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ import type {
ToolExecuteOptions,
UsageInfo,
} from "./providers/base/types";
import { computeRetryDelayMs } from "./retry-after";
import { coerceStringifiedJsonToolArgs } from "./tool-input-coercion";
export type { AgentHooks } from "./hooks";
export type {
Expand Down Expand Up @@ -5885,7 +5886,7 @@ export class Agent {
const canRetry = retryEligible && !isLastAttempt;

if (canRetry) {
const retryDelayMs = Math.min(1000 * 2 ** attemptIndex, 10000);
const retryDelayMs = computeRetryDelayMs(error, attemptIndex);
logger.debug(`[Agent:${this.name}] - Model attempt failed, retrying`, {
operation,
modelName,
Expand Down
116 changes: 116 additions & 0 deletions packages/core/src/agent/retry-after.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import { describe, expect, it } from "vitest";
import { computeRetryDelayMs, getRetryAfterMs, parseRetryAfter } from "./retry-after";

const FIXED_NOW = Date.parse("2026-05-14T20:00:00Z");

describe("parseRetryAfter", () => {
it("returns null for missing values", () => {
expect(parseRetryAfter(undefined)).toBeNull();
expect(parseRetryAfter(null)).toBeNull();
expect(parseRetryAfter("")).toBeNull();
expect(parseRetryAfter(" ")).toBeNull();
});

it("parses delta-seconds form", () => {
expect(parseRetryAfter("0")).toBe(0);
expect(parseRetryAfter("1")).toBe(1000);
expect(parseRetryAfter("120")).toBe(120 * 1000);
expect(parseRetryAfter(" 30 ")).toBe(30 * 1000);
});

it("rejects non-integer delta-seconds forms", () => {
expect(parseRetryAfter("1.5")).toBeNull();
expect(parseRetryAfter("10ms")).toBeNull();
expect(parseRetryAfter("-5")).toBeNull();
expect(parseRetryAfter("0x10")).toBeNull();
});

it("parses HTTP-date form into a relative delay", () => {
const fiveSecondsLater = new Date(FIXED_NOW + 5000).toUTCString();
expect(parseRetryAfter(fiveSecondsLater, FIXED_NOW)).toBe(5000);
});

it("returns 0 when the HTTP-date has already passed", () => {
const pastDate = new Date(FIXED_NOW - 60_000).toUTCString();
expect(parseRetryAfter(pastDate, FIXED_NOW)).toBe(0);
});

it("returns null for malformed HTTP-date strings", () => {
expect(parseRetryAfter("Definitely not a date")).toBeNull();
expect(parseRetryAfter("Fri, 99 Foo 9999 99:99:99 GMT")).toBeNull();
});

it("clamps very large delta-seconds to the 5-minute safety cap", () => {
expect(parseRetryAfter("3600")).toBe(5 * 60 * 1000);
expect(parseRetryAfter("999999")).toBe(5 * 60 * 1000);
});

it("clamps very far-future HTTP-dates to the 5-minute safety cap", () => {
const farFuture = new Date(FIXED_NOW + 60 * 60 * 1000).toUTCString();
expect(parseRetryAfter(farFuture, FIXED_NOW)).toBe(5 * 60 * 1000);
});
});

describe("getRetryAfterMs", () => {
it("reads lowercased header from responseHeaders", () => {
const err = { responseHeaders: { "retry-after": "10" } };
expect(getRetryAfterMs(err)).toBe(10_000);
});

it("accepts the canonical-case spelling too", () => {
const err = { responseHeaders: { "Retry-After": "10" } };
expect(getRetryAfterMs(err)).toBe(10_000);
});

it("prefers lowercase over canonical when both are present", () => {
const err = { responseHeaders: { "retry-after": "5", "Retry-After": "999" } };
expect(getRetryAfterMs(err)).toBe(5_000);
});

it("returns null when the header is absent", () => {
expect(getRetryAfterMs({ responseHeaders: {} })).toBeNull();
expect(getRetryAfterMs({ responseHeaders: { "x-foo": "bar" } })).toBeNull();
});

it("returns null when there are no response headers at all", () => {
expect(getRetryAfterMs({})).toBeNull();
expect(getRetryAfterMs(null)).toBeNull();
expect(getRetryAfterMs(undefined)).toBeNull();
expect(getRetryAfterMs(new Error("plain"))).toBeNull();
});
});

describe("computeRetryDelayMs", () => {
it("falls back to exponential when no Retry-After is provided", () => {
const err = new Error("transient");
expect(computeRetryDelayMs(err, 0)).toBe(1000);
expect(computeRetryDelayMs(err, 1)).toBe(2000);
expect(computeRetryDelayMs(err, 2)).toBe(4000);
expect(computeRetryDelayMs(err, 3)).toBe(8000);
expect(computeRetryDelayMs(err, 4)).toBe(10_000);
expect(computeRetryDelayMs(err, 10)).toBe(10_000);
});

it("uses the server's Retry-After as a floor when it exceeds the exponential floor", () => {
const err = { responseHeaders: { "retry-after": "30" } };
expect(computeRetryDelayMs(err, 0)).toBe(30_000);
expect(computeRetryDelayMs(err, 4)).toBe(30_000);
});

it("keeps the exponential floor when Retry-After is shorter", () => {
const err = { responseHeaders: { "retry-after": "0" } };
expect(computeRetryDelayMs(err, 0)).toBe(1000);
expect(computeRetryDelayMs(err, 3)).toBe(8000);
});

it("honors HTTP-date Retry-After values", () => {
const tenSecondsLater = new Date(FIXED_NOW + 10_000).toUTCString();
const err = { responseHeaders: { "retry-after": tenSecondsLater } };
expect(computeRetryDelayMs(err, 0, FIXED_NOW)).toBe(10_000);
});

it("respects the 5-minute safety cap even when the server asks for longer", () => {
const err = { responseHeaders: { "retry-after": "999999" } };
expect(computeRetryDelayMs(err, 0)).toBe(5 * 60 * 1000);
});
});
106 changes: 106 additions & 0 deletions packages/core/src/agent/retry-after.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
/**
* Cap how long we'll honor a server-supplied `Retry-After` header.
* A misconfigured or hostile server can otherwise pin an agent for hours.
*/
const MAX_RETRY_AFTER_MS = 5 * 60 * 1000;

/**
* Parse an HTTP `Retry-After` header value (RFC 7231 §7.1.3) into milliseconds.
*
* Accepts the two RFC-defined forms:
* - delta-seconds: a non-negative integer (e.g. `Retry-After: 120`)
* - HTTP-date: a fixed-form HTTP date (e.g. `Retry-After: Fri, 31 Dec 1999 23:59:59 GMT`)
*
* Returns `null` when the value is absent, empty, malformed, or negative.
* The result is clamped to {@link MAX_RETRY_AFTER_MS}.
*
* @param value The raw header value, or `undefined`/`null` when absent.
* @param nowMs Current time in milliseconds, injected for tests. Defaults to `Date.now()`.
*/
export function parseRetryAfter(
value: string | undefined | null,
nowMs: number = Date.now(),
): number | null {
if (value == null) {
return null;
}

const trimmed = value.trim();
if (trimmed === "") {
return null;
}

if (/^\d+$/.test(trimmed)) {
const seconds = Number.parseInt(trimmed, 10);
if (!Number.isFinite(seconds) || seconds < 0) {
return null;
}
return Math.min(seconds * 1000, MAX_RETRY_AFTER_MS);
}

// HTTP-date form mandates a day-name and month-name (RFC 7231 §7.1.1.1),
// so an HTTP-date always contains ASCII letters. Reject numeric-looking
// values like "1.5", "10ms", or "-5" before falling into `Date.parse`,
// which is permissive enough to coerce some of them into past dates.
if (!/[A-Za-z]/.test(trimmed)) {
return null;
}

const dateMs = Date.parse(trimmed);
if (Number.isNaN(dateMs)) {
return null;
}

const delta = dateMs - nowMs;
if (delta <= 0) {
return 0;
}
return Math.min(delta, MAX_RETRY_AFTER_MS);
}

/**
* Read the `Retry-After` header off an error's `responseHeaders` bag and return
* its parsed value in milliseconds, or `null` when absent.
*
* AI SDK populates `responseHeaders` on `APICallError` and similar provider errors
* with lowercased header names. Both lowercase and canonical-case forms are
* accepted to stay robust against providers that don't normalize.
*/
export function getRetryAfterMs(error: unknown, nowMs: number = Date.now()): number | null {
const headers = (error as { responseHeaders?: Record<string, string> } | undefined)
?.responseHeaders;
if (!headers || typeof headers !== "object") {
return null;
}
const raw = headers["retry-after"] ?? headers["Retry-After"];
return parseRetryAfter(raw, nowMs);
Comment on lines +69 to +76
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Handle Retry-After header names fully case-insensitively.

Line 75 only checks two exact key spellings. A mixed-case header key will be missed, so the server hint can be ignored unexpectedly.

Proposed fix
 export function getRetryAfterMs(error: unknown, nowMs: number = Date.now()): number | null {
   const headers = (error as { responseHeaders?: Record<string, string> } | undefined)
     ?.responseHeaders;
   if (!headers || typeof headers !== "object") {
     return null;
   }
-  const raw = headers["retry-after"] ?? headers["Retry-After"];
+  const raw = Object.entries(headers).find(
+    ([key]) => key.toLowerCase() === "retry-after",
+  )?.[1];
   return parseRetryAfter(raw, nowMs);
 }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
export function getRetryAfterMs(error: unknown, nowMs: number = Date.now()): number | null {
const headers = (error as { responseHeaders?: Record<string, string> } | undefined)
?.responseHeaders;
if (!headers || typeof headers !== "object") {
return null;
}
const raw = headers["retry-after"] ?? headers["Retry-After"];
return parseRetryAfter(raw, nowMs);
export function getRetryAfterMs(error: unknown, nowMs: number = Date.now()): number | null {
const headers = (error as { responseHeaders?: Record<string, string> } | undefined)
?.responseHeaders;
if (!headers || typeof headers !== "object") {
return null;
}
const raw = Object.entries(headers).find(
([key]) => key.toLowerCase() === "retry-after",
)?.[1];
return parseRetryAfter(raw, nowMs);
}
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@packages/core/src/agent/retry-after.ts` around lines 69 - 76, getRetryAfterMs
currently only checks headers["retry-after"] and headers["Retry-After"], which
misses mixed-case names; change the lookup to be case-insensitive by normalizing
header keys (e.g., iterate Object.keys(responseHeaders) and compare
key.toLowerCase() === "retry-after") or build a lower-cased map before fetching
the value, then pass the found raw value to parseRetryAfter; update references
in getRetryAfterMs to use the normalized lookup of responseHeaders rather than
the two exact keys.

}

/**
* Compute the wait between two retry attempts.
*
* When the provider supplies a `Retry-After` header (typical on 429 and 503),
* use it as the floor — the server has just told us the earliest moment it's
* willing to serve another request, and ignoring that signal causes
* coordinated retry-storms across concurrent agents.
*
* In every case we keep the exponential floor as a backpressure baseline so a
* `Retry-After: 0` (or an absent header on transient errors) still spaces
* subsequent attempts out.
*
* @param error The error thrown by the model invocation.
* @param attemptIndex Zero-based retry attempt index.
* @param nowMs Current time in ms, injected for tests.
*/
export function computeRetryDelayMs(
error: unknown,
attemptIndex: number,
nowMs: number = Date.now(),
): number {
const exponentialMs = Math.min(1000 * 2 ** attemptIndex, 10000);
const serverHintMs = getRetryAfterMs(error, nowMs);
if (serverHintMs == null) {
return exponentialMs;
}
return Math.max(serverHintMs, exponentialMs);
}
Loading