Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/friendly-agents-wait.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@voltagent/core": patch
---

Honor provider Retry-After headers when retrying failed model calls.
89 changes: 89 additions & 0 deletions packages/core/src/agent/agent.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3689,6 +3689,95 @@ Use pandas and summarize findings.`.split("\n"),
}
});

it("should honor Retry-After when retrying provider rate limits", async () => {
vi.useFakeTimers();
const setTimeoutSpy = vi.spyOn(globalThis, "setTimeout");

let resolveRetry!: () => void;
const retrySeen = new Promise<void>((resolve) => {
resolveRetry = resolve;
});
const onRetry = vi.fn(() => {
resolveRetry();
});
const agent = new Agent({
name: "RetryAfterAgent",
instructions: "Test",
model: mockModel as any,
maxRetries: 1,
hooks: createHooks({ onRetry }),
});

const mockResponse = {
text: "Retry response",
content: [{ type: "text", text: "Retry response" }],
reasoning: [],
files: [],
sources: [],
toolCalls: [],
toolResults: [],
finishReason: "stop",
usage: {
inputTokens: 10,
outputTokens: 5,
totalTokens: 15,
},
warnings: [],
request: {},
response: {
id: "retry-response",
modelId: "test-model",
timestamp: new Date(),
messages: [],
},
steps: [],
};

let callCount = 0;
vi.mocked(ai.generateText).mockImplementation(async () => {
callCount += 1;
if (callCount === 1) {
const error = new Error("Rate limited");
(error as any).isRetryable = true;
(error as any).statusCode = 429;
(error as any).headers = new Headers({ "retry-after": "3" });
throw error;
}
return mockResponse as any;
});

const resultPromise = agent.generateText("Test");

try {
await retrySeen;
await Promise.resolve();

expect(onRetry).toHaveBeenCalledTimes(1);
expect(vi.mocked(ai.generateText)).toHaveBeenCalledTimes(1);
expect(setTimeoutSpy).toHaveBeenCalledWith(expect.any(Function), 3000);

await vi.advanceTimersByTimeAsync(3000);
await expect(resultPromise).resolves.toMatchObject({ text: "Retry response" });
expect(vi.mocked(ai.generateText)).toHaveBeenCalledTimes(2);
} finally {
setTimeoutSpy.mockRestore();
vi.useRealTimers();
}
});

it("should clamp oversized Retry-After values to Node's max timer delay", async () => {
const agent = new Agent({
name: "RetryAfterClampAgent",
instructions: "Test",
model: mockModel as any,
});

const error = new Error("Rate limited");
(error as any).headers = new Headers({ "retry-after": "9999999999" });

expect((agent as any).getRetryAfterDelayMs(error)).toBe(2_147_483_647);
});

it("should handle model errors gracefully", async () => {
const agent = new Agent({
name: "TestAgent",
Expand Down
30 changes: 29 additions & 1 deletion packages/core/src/agent/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ const DEFAULT_CONVERSATION_TITLE_MAX_OUTPUT_TOKENS = 32;
const DEFAULT_CONVERSATION_TITLE_MAX_CHARS = 80;
const CONVERSATION_TITLE_INPUT_MAX_CHARS = 2000;
const DEFAULT_TOOL_SEARCH_TOP_K = 1;
const MAX_NODE_TIMER_MS = 2_147_483_647;

type ResolvedConversationPersistenceOptions = {
mode: AgentConversationPersistenceMode;
Expand Down Expand Up @@ -5738,6 +5739,30 @@ export class Agent {
return true;
}

private getRetryAfterDelayMs(error: unknown): number | undefined {
const headers = (error as { headers?: Headers | Record<string, string> } | undefined)?.headers;
const retryAfter =
headers instanceof Headers
? headers.get("retry-after")
: (headers?.["retry-after"] ?? headers?.["Retry-After"]);

if (!retryAfter) {
return undefined;
}

const seconds = Number.parseInt(retryAfter, 10);
if (Number.isFinite(seconds) && seconds > 0) {
return Math.min(seconds * 1000, MAX_NODE_TIMER_MS);
}

const retryAt = Date.parse(retryAfter);
if (Number.isFinite(retryAt)) {
return Math.min(Math.max(retryAt - Date.now(), 0), MAX_NODE_TIMER_MS);
}

return undefined;
}

private async executeWithModelFallback<T>({
oc,
operation,
Expand Down Expand Up @@ -5885,7 +5910,10 @@ export class Agent {
const canRetry = retryEligible && !isLastAttempt;

if (canRetry) {
const retryDelayMs = Math.min(1000 * 2 ** attemptIndex, 10000);
const retryDelayMs = Math.min(
this.getRetryAfterDelayMs(error) ?? Math.min(1000 * 2 ** attemptIndex, 10000),
MAX_NODE_TIMER_MS,
);
logger.debug(`[Agent:${this.name}] - Model attempt failed, retrying`, {
operation,
modelName,
Expand Down