diff --git a/.changeset/honest-news-create.md b/.changeset/honest-news-create.md new file mode 100644 index 000000000..d32e4c31d --- /dev/null +++ b/.changeset/honest-news-create.md @@ -0,0 +1,5 @@ +--- +'@livekit/agents-plugin-openai': patch +--- + +Add Reasoning param for gpt-realtime-2\* model family diff --git a/plugins/openai/src/realtime/api_proto.ts b/plugins/openai/src/realtime/api_proto.ts index 1d4c775c8..df003e483 100644 --- a/plugins/openai/src/realtime/api_proto.ts +++ b/plugins/openai/src/realtime/api_proto.ts @@ -10,6 +10,20 @@ export const OUT_FRAME_SIZE = 1200; // 50ms export const BASE_URL = 'wss://api.openai.com/v1'; export type Model = 'gpt-4o-realtime-preview-2024-10-01' | string; // Open-ended, for future models + +/** + * Models that support the `reasoning` configuration on the Realtime API. + * Currently only the `gpt-realtime-2` family supports it. + * + * Ref: https://developers.openai.com/api/reference/resources/realtime/subresources/calls/methods/accept + */ +export type ReasoningCapableModel = 'gpt-realtime-2' | `gpt-realtime-2-${string}`; + +export type ReasoningEffort = 'minimal' | 'low' | 'medium' | 'high' | 'xhigh'; + +export interface Reasoning { + effort?: ReasoningEffort; +} export type Voice = | 'alloy' | 'shimmer' @@ -328,6 +342,7 @@ export interface SessionUpdateEvent extends BaseClientEvent { audio?: RealtimeAudioConfig; // GA: nested audio config max_output_tokens?: number | 'inf'; // GA: renamed from max_response_output_tokens tracing?: TracingConfig | null; // GA: tracing config + reasoning?: Reasoning | null; // GA: reasoning config (gpt-realtime-2 only) // Common fields model: Model; instructions: string; diff --git a/plugins/openai/src/realtime/realtime_model.ts b/plugins/openai/src/realtime/realtime_model.ts index 662448112..d3987447b 100644 --- a/plugins/openai/src/realtime/realtime_model.ts +++ b/plugins/openai/src/realtime/realtime_model.ts @@ -48,6 +48,7 @@ interface RealtimeOptions { maxResponseOutputTokens?: number | 'inf'; speed?: number; tracing?: api_proto.TracingConfig | null; + reasoning?: api_proto.Reasoning; apiKey?: string; baseURL: string; isAzure: boolean; @@ -158,8 +159,10 @@ export class RealtimeModel extends llm.RealtimeModel { } constructor( - options: { - model?: string; + options: ( + | { model: api_proto.ReasoningCapableModel; reasoning?: api_proto.Reasoning } + | { model?: string; reasoning?: never } + ) & { voice?: string; /** @deprecated Unused in GA API (v1). Temperature is no longer supported. */ temperature?: number; @@ -514,6 +517,7 @@ export class RealtimeSession extends llm.RealtimeSession { // GA format (OpenAI or Azure GA) const audioFormat: api_proto.AudioFormat = { type: 'audio/pcm', rate: SAMPLE_RATE }; const modality: Modality = opts.modalities.includes('audio') ? 'audio' : 'text'; + const includeReasoning = opts.reasoning && opts.model.startsWith('gpt-realtime-2'); return { type: 'session.update', session: { @@ -537,6 +541,7 @@ export class RealtimeSession extends llm.RealtimeSession { tool_choice: toOaiToolChoice(opts.toolChoice), tracing: opts.tracing, instructions: this.instructions, + ...(includeReasoning ? { reasoning: opts.reasoning } : {}), }, }; }