diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py index 7e65489368..8b8b1bd479 100644 --- a/astrbot/core/config/default.py +++ b/astrbot/core/config/default.py @@ -1627,6 +1627,8 @@ "mimo-tts-style-prompt": "", "mimo-tts-dialect": "", "mimo-tts-seed-text": "Hello, MiMo, have you had lunch?", + "mimo-tts-user-prompt": "", + "mimo-tts-voice-audio-path": "", "timeout": "20", "proxy": "", }, @@ -2590,18 +2592,28 @@ "mimo-tts-style-prompt": { "description": "风格提示词", "type": "string", - "hint": "会以 标签形式添加到待合成文本开头,用于控制语速、情绪、角色或风格,例如 开心、变快、孙悟空、悄悄话。可留空。", + "hint": "用于控制语速、情绪、角色或风格,例如 开心、变快、孙悟空、悄悄话。v2 系列会以 标签拼接到文本开头,v2.5 系列会以(...)括号形式拼接。可留空。", }, "mimo-tts-dialect": { "description": "方言", "type": "string", - "hint": "会与风格提示词一起写入开头的 标签中,例如 东北话、四川话、河南话、粤语。可留空。", + "hint": "与风格提示词一起拼接到文本开头,例如 东北话、四川话、河南话、粤语。v2 系列使用 " + # v2.5 uses parentheses; v2 uses " def _build_assistant_content(self, text: str) -> str: return f"{self._build_style_prefix()}{text}" + def _read_voice_audio_base64(self) -> str: + if not self.voice_audio_path.strip(): + return "" + path = Path(self.voice_audio_path.strip()) + if not path.exists(): + logger.warning("Voice audio file not found: %s", path) + return "" + try: + suffix = path.suffix.lower().lstrip(".") + mime_map = {"wav": "audio/wav", "mp3": "audio/mpeg", "ogg": "audio/ogg"} + mime = mime_map.get(suffix, "audio/wav") + b64 = base64.b64encode(path.read_bytes()).decode("utf-8") + return f"data:{mime};base64,{b64}" + except Exception as exc: + logger.warning("Failed to read voice audio file %s: %s", path, exc) + return "" + def _build_payload(self, text: str) -> dict: messages: list[dict[str, str]] = [] @@ -88,10 +124,16 @@ def _build_payload(self, text: str) -> dict: } ) - audio_params = {"format": self.audio_format} - # voice design 模型不支持 audio.voice 参数 + audio_params: dict[str, str] = {"format": self.audio_format} if "voicedesign" not in self.model_name: - audio_params["voice"] = self.voice + if "voiceclone" in self.model_name: + voice_audio_b64 = self._read_voice_audio_base64() + if voice_audio_b64: + audio_params["voice"] = voice_audio_b64 + else: + audio_params["voice"] = self.voice + else: + audio_params["voice"] = self.voice return { "model": self.model_name, diff --git a/dashboard/src/i18n/locales/en-US/features/config-metadata.json b/dashboard/src/i18n/locales/en-US/features/config-metadata.json index 6363b71e31..9c1bd74f8a 100644 --- a/dashboard/src/i18n/locales/en-US/features/config-metadata.json +++ b/dashboard/src/i18n/locales/en-US/features/config-metadata.json @@ -1536,16 +1536,24 @@ }, "mimo-tts-style-prompt": { "description": "Style prompt", - "hint": "Prepended to the synthesis target text as a tag to control speed, emotion, character, or style, such as happy, faster, Sun Wukong, or whispering. Optional." + "hint": "Controls speed, emotion, character, or style, such as happy, faster, Sun Wukong, or whispering. v2 series prepends a tag; v2.5 series uses (...) parentheses. Optional." }, "mimo-tts-dialect": { "description": "Dialect", - "hint": "Combined with the style prompt inside the leading tag, for example Northeastern Mandarin, Sichuan dialect, Henan dialect, or Cantonese. Optional." + "hint": "Combined with the style prompt at the beginning of the text, for example Northeastern Mandarin, Sichuan dialect, Henan dialect, or Cantonese. v2 series uses и управляет скоростью, эмоцией, ролью или манерой речи. Необязательно." + "hint": "Управляет скоростью, эмоцией, ролью или манерой речи. Серия v2 добавляет тег ; серия v2.5 использует скобки (...). Необязательно." }, "mimo-tts-dialect": { "description": "Диалект", - "hint": "Объединяется с подсказкой стиля внутри начального тега , например северо-восточный, сычуаньский, хэнаньский или кантонский вариант речи. Необязательно." + "hint": "Объединяется с подсказкой стиля в начале текста, например северо-восточный, сычуаньский, хэнаньский или кантонский. Серия v2 использует теги 标签形式添加到待合成文本开头,用于控制语速、情绪、角色或风格,例如 开心、变快、孙悟空、悄悄话。可留空。" + "hint": "用于控制语速、情绪、角色或风格,例如 开心、变快、孙悟空、悄悄话。v2 系列会以 标签拼接到文本开头,v2.5 系列会以(...)括号形式拼接。可留空。" }, "mimo-tts-dialect": { "description": "方言", - "hint": "会与风格提示词一起写入开头的 标签中,例如 东北话、四川话、河南话、粤语。可留空。" + "hint": "与风格提示词一起拼接到文本开头,例如 东北话、四川话、河南话、粤语。v2 系列使用