diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py
index 7e65489368..8b8b1bd479 100644
--- a/astrbot/core/config/default.py
+++ b/astrbot/core/config/default.py
@@ -1627,6 +1627,8 @@
"mimo-tts-style-prompt": "",
"mimo-tts-dialect": "",
"mimo-tts-seed-text": "Hello, MiMo, have you had lunch?",
+ "mimo-tts-user-prompt": "",
+ "mimo-tts-voice-audio-path": "",
"timeout": "20",
"proxy": "",
},
@@ -2590,18 +2592,28 @@
"mimo-tts-style-prompt": {
"description": "风格提示词",
"type": "string",
- "hint": "会以 标签形式添加到待合成文本开头,用于控制语速、情绪、角色或风格,例如 开心、变快、孙悟空、悄悄话。可留空。",
+ "hint": "用于控制语速、情绪、角色或风格,例如 开心、变快、孙悟空、悄悄话。v2 系列会以 标签拼接到文本开头,v2.5 系列会以(...)括号形式拼接。可留空。",
},
"mimo-tts-dialect": {
"description": "方言",
"type": "string",
- "hint": "会与风格提示词一起写入开头的 标签中,例如 东北话、四川话、河南话、粤语。可留空。",
+ "hint": "与风格提示词一起拼接到文本开头,例如 东北话、四川话、河南话、粤语。v2 系列使用 "
+ # v2.5 uses parentheses; v2 uses "
def _build_assistant_content(self, text: str) -> str:
return f"{self._build_style_prefix()}{text}"
+ def _read_voice_audio_base64(self) -> str:
+ if not self.voice_audio_path.strip():
+ return ""
+ path = Path(self.voice_audio_path.strip())
+ if not path.exists():
+ logger.warning("Voice audio file not found: %s", path)
+ return ""
+ try:
+ suffix = path.suffix.lower().lstrip(".")
+ mime_map = {"wav": "audio/wav", "mp3": "audio/mpeg", "ogg": "audio/ogg"}
+ mime = mime_map.get(suffix, "audio/wav")
+ b64 = base64.b64encode(path.read_bytes()).decode("utf-8")
+ return f"data:{mime};base64,{b64}"
+ except Exception as exc:
+ logger.warning("Failed to read voice audio file %s: %s", path, exc)
+ return ""
+
def _build_payload(self, text: str) -> dict:
messages: list[dict[str, str]] = []
@@ -88,10 +124,16 @@ def _build_payload(self, text: str) -> dict:
}
)
- audio_params = {"format": self.audio_format}
- # voice design 模型不支持 audio.voice 参数
+ audio_params: dict[str, str] = {"format": self.audio_format}
if "voicedesign" not in self.model_name:
- audio_params["voice"] = self.voice
+ if "voiceclone" in self.model_name:
+ voice_audio_b64 = self._read_voice_audio_base64()
+ if voice_audio_b64:
+ audio_params["voice"] = voice_audio_b64
+ else:
+ audio_params["voice"] = self.voice
+ else:
+ audio_params["voice"] = self.voice
return {
"model": self.model_name,
diff --git a/dashboard/src/i18n/locales/en-US/features/config-metadata.json b/dashboard/src/i18n/locales/en-US/features/config-metadata.json
index 6363b71e31..9c1bd74f8a 100644
--- a/dashboard/src/i18n/locales/en-US/features/config-metadata.json
+++ b/dashboard/src/i18n/locales/en-US/features/config-metadata.json
@@ -1536,16 +1536,24 @@
},
"mimo-tts-style-prompt": {
"description": "Style prompt",
- "hint": "Prepended to the synthesis target text as a tag to control speed, emotion, character, or style, such as happy, faster, Sun Wukong, or whispering. Optional."
+ "hint": "Controls speed, emotion, character, or style, such as happy, faster, Sun Wukong, or whispering. v2 series prepends a tag; v2.5 series uses (...) parentheses. Optional."
},
"mimo-tts-dialect": {
"description": "Dialect",
- "hint": "Combined with the style prompt inside the leading tag, for example Northeastern Mandarin, Sichuan dialect, Henan dialect, or Cantonese. Optional."
+ "hint": "Combined with the style prompt at the beginning of the text, for example Northeastern Mandarin, Sichuan dialect, Henan dialect, or Cantonese. v2 series uses и управляет скоростью, эмоцией, ролью или манерой речи. Необязательно."
+ "hint": "Управляет скоростью, эмоцией, ролью или манерой речи. Серия v2 добавляет тег ; серия v2.5 использует скобки (...). Необязательно."
},
"mimo-tts-dialect": {
"description": "Диалект",
- "hint": "Объединяется с подсказкой стиля внутри начального тега , например северо-восточный, сычуаньский, хэнаньский или кантонский вариант речи. Необязательно."
+ "hint": "Объединяется с подсказкой стиля в начале текста, например северо-восточный, сычуаньский, хэнаньский или кантонский. Серия v2 использует теги 标签形式添加到待合成文本开头,用于控制语速、情绪、角色或风格,例如 开心、变快、孙悟空、悄悄话。可留空。"
+ "hint": "用于控制语速、情绪、角色或风格,例如 开心、变快、孙悟空、悄悄话。v2 系列会以 标签拼接到文本开头,v2.5 系列会以(...)括号形式拼接。可留空。"
},
"mimo-tts-dialect": {
"description": "方言",
- "hint": "会与风格提示词一起写入开头的 标签中,例如 东北话、四川话、河南话、粤语。可留空。"
+ "hint": "与风格提示词一起拼接到文本开头,例如 东北话、四川话、河南话、粤语。v2 系列使用