Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions livekit-plugins/livekit-plugins-kittentts/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# LiveKit Plugins KittenTTS

Support for local KittenTTS synthesis in LiveKit Agents.

## Installation

```bash
pip install livekit-plugins-kittentts
```

## Usage

```python
from livekit.plugins import kittentts

tts = kittentts.TTS(
model="KittenML/kitten-tts-nano-0.8",
voice="expr-voice-5-m",
)
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Copyright 2026 LiveKit, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""KittenTTS plugin for LiveKit Agents."""

from livekit.agents import Plugin

from .log import logger
from .tts import TTS
from .version import __version__

__all__ = ["TTS", "__version__"]


class KittenTTSPlugin(Plugin):
def __init__(self) -> None:
super().__init__(__name__, __version__, __package__, logger)


Plugin.register_plugin(KittenTTSPlugin())

_module = dir()
NOT_IN_ALL = [m for m in _module if m not in __all__]

__pdoc__ = {}

for n in NOT_IN_ALL:
__pdoc__[n] = False
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright 2026 LiveKit, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging

logger = logging.getLogger("livekit.plugins.kittentts")
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
# Copyright 2026 LiveKit, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import asyncio
import importlib
import uuid
from dataclasses import dataclass, replace
from typing import Any, cast

import numpy as np

from livekit.agents import APIConnectOptions, tts
from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS, NOT_GIVEN, NotGivenOr
from livekit.agents.utils import is_given

SAMPLE_RATE = 24000
NUM_CHANNELS = 1
DEFAULT_MODEL = "KittenML/kitten-tts-nano-0.8"
DEFAULT_VOICE = "expr-voice-5-m"
DEFAULT_SPEED = 1.0


@dataclass
class _TTSOptions:
model: str
voice: str
speed: float
clean_text: bool
cache_dir: str | None


def _audio_to_pcm16(audio: Any) -> bytes:
samples = np.asarray(audio, dtype=np.float32).squeeze()
if samples.size == 0:
return b""
samples = np.clip(samples, -1.0, 1.0)
return (samples * 32767.0).astype("<i2").tobytes()


def _next_chunk(iterator: Any) -> Any | None:
try:
return next(iterator)
except StopIteration:
return None


class TTS(tts.TTS):
def __init__(
self,
*,
model: str = DEFAULT_MODEL,
voice: str = DEFAULT_VOICE,
speed: float = DEFAULT_SPEED,
clean_text: bool = True,
cache_dir: str | None = None,
) -> None:
"""Create a KittenTTS text-to-speech instance."""
super().__init__(
capabilities=tts.TTSCapabilities(streaming=False),
sample_rate=SAMPLE_RATE,
num_channels=NUM_CHANNELS,
)
self._opts = _TTSOptions(
model=model,
voice=voice,
speed=speed,
clean_text=clean_text,
cache_dir=cache_dir,
)
self._model: Any | None = None
self._opts_revision = 0
self._model_lock = asyncio.Lock()

@property
def model(self) -> str:
return self._opts.model

@property
def provider(self) -> str:
return "KittenML"

def update_options(
self,
*,
model: NotGivenOr[str] = NOT_GIVEN,
voice: NotGivenOr[str] = NOT_GIVEN,
speed: NotGivenOr[float] = NOT_GIVEN,
clean_text: NotGivenOr[bool] = NOT_GIVEN,
cache_dir: NotGivenOr[str | None] = NOT_GIVEN,
) -> None:
reset_model = False
if is_given(model) and model != self._opts.model:
self._opts.model = model
reset_model = True
if is_given(cache_dir) and cache_dir != self._opts.cache_dir:
self._opts.cache_dir = cache_dir
reset_model = True
if is_given(voice):
self._opts.voice = voice
if is_given(speed):
self._opts.speed = speed
if is_given(clean_text):
self._opts.clean_text = clean_text
if reset_model:
self._opts_revision += 1
self._model = None
Comment thread
devin-ai-integration[bot] marked this conversation as resolved.

async def _ensure_model(self) -> Any:
if self._model is not None:
return self._model

while True:
async with self._model_lock:
if self._model is not None:
return self._model
opts = replace(self._opts)
opts_revision = self._opts_revision

def load_model(opts: _TTSOptions = opts) -> Any:
try:
kittentts_module = importlib.import_module("kittentts")
except ModuleNotFoundError as e:
raise ModuleNotFoundError(
"KittenTTS is required. Install it with "
"`pip install "
"https://github.com/KittenML/KittenTTS/releases/download/0.8.1/"
"kittentts-0.8.1-py3-none-any.whl`."
) from e

KittenTTS = cast(Any, kittentts_module).KittenTTS
return KittenTTS(opts.model, cache_dir=opts.cache_dir)

model = await asyncio.to_thread(load_model)
if opts_revision == self._opts_revision:
self._model = model
return self._model

def synthesize(
self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
) -> tts.ChunkedStream:
return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)


class ChunkedStream(tts.ChunkedStream):
def __init__(self, *, tts: TTS, input_text: str, conn_options: APIConnectOptions) -> None:
super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
self._tts: TTS = tts
self._opts = replace(tts._opts)

async def _run(self, output_emitter: tts.AudioEmitter) -> None:
output_emitter.initialize(
request_id=str(uuid.uuid4()),
sample_rate=SAMPLE_RATE,
num_channels=NUM_CHANNELS,
mime_type="audio/pcm",
)

model = await self._tts._ensure_model()
iterator = model.generate_stream(
self.input_text,
voice=self._opts.voice,
speed=self._opts.speed,
clean_text=self._opts.clean_text,
)

while True:
chunk = await asyncio.to_thread(_next_chunk, iterator)
if chunk is None:
break
pcm = _audio_to_pcm16(chunk)
if pcm:
output_emitter.push(pcm)

output_emitter.flush()
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright 2026 LiveKit, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = "1.5.12"
49 changes: 49 additions & 0 deletions livekit-plugins/livekit-plugins-kittentts/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[project]
name = "livekit-plugins-kittentts"
dynamic = ["version"]
description = "Agent Framework plugin for KittenTTS"
readme = "README.md"
license = "Apache-2.0"
requires-python = ">=3.10.0"
authors = [{ name = "LiveKit", email = "hello@livekit.io" }]
keywords = ["voice", "ai", "realtime", "audio", "video", "livekit", "tts", "kittentts"]
classifiers = [
"Intended Audience :: Developers",
"License :: OSI Approved :: Apache Software License",
"Topic :: Multimedia :: Sound/Audio",
"Topic :: Multimedia :: Video",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3 :: Only",
]
dependencies = [
"livekit-agents>=1.5.12",
"numpy>=1.26",
"kittentts @ https://github.com/KittenML/KittenTTS/releases/download/0.8.1/kittentts-0.8.1-py3-none-any.whl",
]

[project.urls]
Documentation = "https://docs.livekit.io"
Website = "https://livekit.io/"
Source = "https://github.com/livekit/agents"

[tool.hatch.version]
path = "livekit/plugins/kittentts/version.py"

[tool.hatch.metadata]
allow-direct-references = true

[tool.hatch.build.targets.wheel]
packages = ["livekit"]

[tool.hatch.build.targets.sdist]
include = ["/livekit"]

[tool.uv]
exclude-newer = "7 days"
exclude-newer-package = { livekit-agents = "0 days" }
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ livekit-plugins-hume = { workspace = true }
livekit-plugins-inworld = { workspace = true }
livekit-plugins-krisp = { workspace = true }
livekit-plugins-keyframe = { workspace = true }
livekit-plugins-kittentts = { workspace = true }
livekit-plugins-langchain = { workspace = true }
livekit-plugins-lemonslice = { workspace = true}
livekit-plugins-liveavatar = { workspace = true }
Expand Down
Loading
Loading