Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/assemblyai-continuous-partials.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@livekit/agents-plugin-assemblyai": patch
---

feat(assemblyai): add continuousPartials and interruptionDelay streaming options
32 changes: 32 additions & 0 deletions plugins/assemblyai/src/stt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,17 @@ export interface STTOptions {
/** Maximum silence (ms) before end-of-turn is forced regardless of confidence. */
maxTurnSilence?: number;
formatTurns?: boolean;
/**
* Whether to emit additional partial transcripts during long turns at a steady
* cadence. Only supported with the `u3-rt-pro` model. Defaults to true for
* `u3-rt-pro`.
*/
continuousPartials?: boolean;
/**
* How soon the first early partial is emitted, in milliseconds. Only supported
* with the `u3-rt-pro` model.
*/
interruptionDelay?: number;
keytermsPrompt?: string[];
/** Only supported with the `u3-rt-pro` model. */
prompt?: string;
Expand Down Expand Up @@ -121,6 +132,18 @@ export class STT extends stt.STT {
throw new Error("The 'prompt' parameter is only supported with the 'u3-rt-pro' model.");
}

if (opts.continuousPartials !== undefined && opts.speechModel !== 'u3-rt-pro') {
throw new Error(
"The 'continuousPartials' parameter is only supported with the 'u3-rt-pro' model.",
);
}

if (opts.interruptionDelay !== undefined && opts.speechModel !== 'u3-rt-pro') {
throw new Error(
"The 'interruptionDelay' parameter is only supported with the 'u3-rt-pro' model.",
);
}

const apiKey = opts.apiKey ?? defaultSTTOptions.apiKey;
if (!apiKey) {
throw new Error(
Expand All @@ -130,12 +153,15 @@ export class STT extends stt.STT {

// Minimize latency; matches LK's end-of-turn detector well.
const minTurnSilence = opts.minTurnSilence ?? 100;
const continuousPartials =
opts.continuousPartials ?? (opts.speechModel === 'u3-rt-pro' ? true : undefined);

this.#opts = {
...defaultSTTOptions,
...opts,
apiKey,
minTurnSilence,
continuousPartials,
};
}

Expand Down Expand Up @@ -210,6 +236,10 @@ export class SpeechStream extends stt.SpeechStream {
if (opts.endOfTurnConfidenceThreshold !== undefined) {
configMsg.end_of_turn_confidence_threshold = opts.endOfTurnConfidenceThreshold;
}
if (opts.continuousPartials !== undefined) {
configMsg.continuous_partials = opts.continuousPartials;
}
if (opts.interruptionDelay !== undefined) configMsg.interruption_delay = opts.interruptionDelay;
if (opts.vadThreshold !== undefined) configMsg.vad_threshold = opts.vadThreshold;

// Only send if any actual fields (besides `type`) were specified.
Expand Down Expand Up @@ -280,6 +310,8 @@ export class SpeechStream extends stt.SpeechStream {
encoding: this.#opts.encoding,
speech_model: this.#opts.speechModel,
format_turns: this.#opts.formatTurns,
continuous_partials: this.#opts.continuousPartials,
interruption_delay: this.#opts.interruptionDelay,
end_of_turn_confidence_threshold: this.#opts.endOfTurnConfidenceThreshold,
min_turn_silence: minSilence,
max_turn_silence: maxSilence,
Expand Down
Loading