diff --git a/.changeset/lemonslice-audio-output-order.md b/.changeset/lemonslice-audio-output-order.md new file mode 100644 index 000000000..73b0cc211 --- /dev/null +++ b/.changeset/lemonslice-audio-output-order.md @@ -0,0 +1,5 @@ +--- +'@livekit/agents-plugin-lemonslice': patch +--- + +fix(lemonslice): bind avatar audio output before starting upstream session diff --git a/plugins/lemonslice/src/avatar.ts b/plugins/lemonslice/src/avatar.ts index e4d6a970b..d86f5d9b2 100644 --- a/plugins/lemonslice/src/avatar.ts +++ b/plugins/lemonslice/src/avatar.ts @@ -186,8 +186,8 @@ export class AvatarSession extends voice.AvatarSession { * * This method: * 1. Creates a LiveKit token for the avatar participant - * 2. Calls the LemonSlice API to start the avatar session - * 3. Configures the agent's audio output to stream to the avatar + * 2. Configures the agent's audio output to stream to the avatar + * 3. Calls the LemonSlice API to start the avatar session * * @param agentSession - The agent session to connect to the avatar * @param room - The LiveKit room where the avatar will join @@ -249,9 +249,8 @@ export class AvatarSession extends voice.AvatarSession { const livekitToken = await at.toJwt(); - this.#logger.debug('starting avatar session'); - const sessionId = await this.startAgent(livekitUrl, livekitToken); - + // Bind audio output before the upstream HTTP call so subsequent generations route to + // the avatar identity while DataStreamAudioOutput waits for the video track. agentSession.output.audio = new voice.DataStreamAudioOutput({ room, destinationIdentity: this.avatarIdentity, @@ -260,6 +259,9 @@ export class AvatarSession extends voice.AvatarSession { waitPlaybackStart: true, }); + this.#logger.debug('starting avatar session'); + const sessionId = await this.startAgent(livekitUrl, livekitToken); + return sessionId; }