From 0c6ae244adc26faeefb6d7ee3dfc2e30131a838d Mon Sep 17 00:00:00 2001 From: Windpicker-owo <3431391539@qq.com> Date: Fri, 24 Oct 2025 23:22:04 +0800 Subject: [PATCH 1/3] =?UTF-8?q?feat(chat):=20=E7=BB=9F=E4=B8=80=E6=B6=88?= =?UTF-8?q?=E6=81=AF=E6=A0=BC=E5=BC=8F=E4=BF=A1=E6=81=AF=E5=A4=84=E7=90=86?= =?UTF-8?q?=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 在多个消息处理模块中统一了 format_info 的处理方式,确保适配器支持的消息类型能够正确传递给 action_modifier: - 在 bot.py、chat_stream.py、optimized_chat_stream.py 中新增 _prepare_additional_config 方法 - 将 format_info 嵌入到 additional_config 中,确保数据库存储一致性 - 增强 action_modifier 中的适配器类型检查逻辑,添加更详细的错误日志 - 修复 storage.py 中的 additional_config 处理逻辑,避免覆盖原始配置 这些改进确保了 Action 能够正确检查适配器支持的消息类型,避免因缺少 format_info 导致的类型检查失败。 --- src/chat/message_receive/bot.py | 36 ++++++++++++ src/chat/message_receive/chat_stream.py | 57 +++++++++++++++++-- .../message_receive/optimized_chat_stream.py | 55 +++++++++++++++++- src/chat/message_receive/storage.py | 18 ++++-- src/chat/planner_actions/action_modifier.py | 57 +++++++++++++++---- 5 files changed, 199 insertions(+), 24 deletions(-) diff --git a/src/chat/message_receive/bot.py b/src/chat/message_receive/bot.py index dc6634f65..544dec94f 100644 --- a/src/chat/message_receive/bot.py +++ b/src/chat/message_receive/bot.py @@ -659,6 +659,41 @@ class ChatBot: group_name = getattr(group_info, "group_name", None) group_platform = getattr(group_info, "platform", None) + # 准备 additional_config,将 format_info 嵌入其中 + additional_config_str = None + try: + import orjson + + additional_config_data = {} + + # 首先获取adapter传递的additional_config + if hasattr(message_info, 'additional_config') and message_info.additional_config: + if isinstance(message_info.additional_config, dict): + additional_config_data = message_info.additional_config.copy() + elif isinstance(message_info.additional_config, str): + try: + additional_config_data = orjson.loads(message_info.additional_config) + except Exception as e: + logger.warning(f"无法解析 additional_config JSON: {e}") + additional_config_data = {} + + # 然后添加format_info到additional_config中 + if hasattr(message_info, 'format_info') and message_info.format_info: + try: + format_info_dict = message_info.format_info.to_dict() + additional_config_data["format_info"] = format_info_dict + logger.debug(f"[bot.py] 嵌入 format_info 到 additional_config: {format_info_dict}") + except Exception as e: + logger.warning(f"将 format_info 转换为字典失败: {e}") + else: + logger.warning(f"[bot.py] [问题] 消息缺少 format_info: message_id={message_id}") + + # 序列化为JSON字符串 + if additional_config_data: + additional_config_str = orjson.dumps(additional_config_data).decode("utf-8") + except Exception as e: + logger.error(f"准备 additional_config 失败: {e}") + # 创建数据库消息对象 db_message = DatabaseMessages( message_id=message_id, @@ -674,6 +709,7 @@ class ChatBot: is_notify=bool(message.is_notify), is_public_notice=bool(message.is_public_notice), notice_type=message.notice_type, + additional_config=additional_config_str, user_id=user_id, user_nickname=user_nickname, user_cardname=user_cardname, diff --git a/src/chat/message_receive/chat_stream.py b/src/chat/message_receive/chat_stream.py index d3d418648..1cc3e548b 100644 --- a/src/chat/message_receive/chat_stream.py +++ b/src/chat/message_receive/chat_stream.py @@ -213,8 +213,8 @@ class ChatStream: priority_info=json.dumps(getattr(message, "priority_info", None)) if getattr(message, "priority_info", None) else None, - # 额外配置 - additional_config=getattr(message_info, "additional_config", None), + # 额外配置 - 需要将 format_info 嵌入到 additional_config 中 + additional_config=self._prepare_additional_config(message_info), # 用户信息 user_id=str(getattr(user_info, "user_id", "")), user_nickname=getattr(user_info, "user_nickname", ""), @@ -253,8 +253,59 @@ class ChatStream: f"interest_value: {db_message.interest_value}" ) + def _prepare_additional_config(self, message_info) -> str | None: + """ + 准备 additional_config,将 format_info 嵌入其中 + + 这个方法模仿 storage.py 中的逻辑,确保 DatabaseMessages 中的 additional_config + 包含 format_info,使得 action_modifier 能够正确获取适配器支持的消息类型 + + Args: + message_info: BaseMessageInfo 对象 + + Returns: + str | None: JSON 字符串格式的 additional_config,如果为空则返回 None + """ + import orjson + + # 首先获取adapter传递的additional_config + additional_config_data = {} + if hasattr(message_info, 'additional_config') and message_info.additional_config: + if isinstance(message_info.additional_config, dict): + additional_config_data = message_info.additional_config.copy() + elif isinstance(message_info.additional_config, str): + # 如果是字符串,尝试解析 + try: + additional_config_data = orjson.loads(message_info.additional_config) + except Exception as e: + logger.warning(f"无法解析 additional_config JSON: {e}") + additional_config_data = {} + + # 然后添加format_info到additional_config中 + if hasattr(message_info, 'format_info') and message_info.format_info: + try: + format_info_dict = message_info.format_info.to_dict() + additional_config_data["format_info"] = format_info_dict + logger.debug(f"嵌入 format_info 到 additional_config: {format_info_dict}") + except Exception as e: + logger.warning(f"将 format_info 转换为字典失败: {e}") + else: + logger.warning(f"[问题] 消息缺少 format_info: message_id={getattr(message_info, 'message_id', 'unknown')}") + logger.warning("[问题] 这可能导致 Action 无法正确检查适配器支持的类型") + + # 序列化为JSON字符串 + if additional_config_data: + try: + return orjson.dumps(additional_config_data).decode("utf-8") + except Exception as e: + logger.error(f"序列化 additional_config 失败: {e}") + return None + return None + def _safe_get_actions(self, message: "MessageRecv") -> list | None: """安全获取消息的actions字段""" + import json + try: actions = getattr(message, "actions", None) if actions is None: @@ -263,8 +314,6 @@ class ChatStream: # 如果是字符串,尝试解析为JSON if isinstance(actions, str): try: - import json - actions = json.loads(actions) except json.JSONDecodeError: logger.warning(f"无法解析actions JSON字符串: {actions}") diff --git a/src/chat/message_receive/optimized_chat_stream.py b/src/chat/message_receive/optimized_chat_stream.py index bc59631e6..2f7059a3e 100644 --- a/src/chat/message_receive/optimized_chat_stream.py +++ b/src/chat/message_receive/optimized_chat_stream.py @@ -230,7 +230,7 @@ class OptimizedChatStream: priority_info=json.dumps(getattr(message, "priority_info", None)) if getattr(message, "priority_info", None) else None, - additional_config=getattr(message_info, "additional_config", None), + additional_config=self._prepare_additional_config(message_info), user_id=str(getattr(user_info, "user_id", "")), user_nickname=getattr(user_info, "user_nickname", ""), user_cardname=getattr(user_info, "user_cardname", None), @@ -342,8 +342,59 @@ class OptimizedChatStream: return instance + def _prepare_additional_config(self, message_info) -> str | None: + """ + 准备 additional_config,将 format_info 嵌入其中 + + 这个方法模仿 storage.py 中的逻辑,确保 DatabaseMessages 中的 additional_config + 包含 format_info,使得 action_modifier 能够正确获取适配器支持的消息类型 + + Args: + message_info: BaseMessageInfo 对象 + + Returns: + str | None: JSON 字符串格式的 additional_config,如果为空则返回 None + """ + import orjson + + # 首先获取adapter传递的additional_config + additional_config_data = {} + if hasattr(message_info, 'additional_config') and message_info.additional_config: + if isinstance(message_info.additional_config, dict): + additional_config_data = message_info.additional_config.copy() + elif isinstance(message_info.additional_config, str): + # 如果是字符串,尝试解析 + try: + additional_config_data = orjson.loads(message_info.additional_config) + except Exception as e: + logger.warning(f"无法解析 additional_config JSON: {e}") + additional_config_data = {} + + # 然后添加format_info到additional_config中 + if hasattr(message_info, 'format_info') and message_info.format_info: + try: + format_info_dict = message_info.format_info.to_dict() + additional_config_data["format_info"] = format_info_dict + logger.debug(f"嵌入 format_info 到 additional_config: {format_info_dict}") + except Exception as e: + logger.warning(f"将 format_info 转换为字典失败: {e}") + else: + logger.warning(f"[问题] 消息缺少 format_info: message_id={getattr(message_info, 'message_id', 'unknown')}") + logger.warning("[问题] 这可能导致 Action 无法正确检查适配器支持的类型") + + # 序列化为JSON字符串 + if additional_config_data: + try: + return orjson.dumps(additional_config_data).decode("utf-8") + except Exception as e: + logger.error(f"序列化 additional_config 失败: {e}") + return None + return None + def _safe_get_actions(self, message: "MessageRecv") -> list | None: """安全获取消息的actions字段""" + import json + try: actions = getattr(message, "actions", None) if actions is None: @@ -351,8 +402,6 @@ class OptimizedChatStream: if isinstance(actions, str): try: - import json - actions = json.loads(actions) except json.JSONDecodeError: logger.warning(f"无法解析actions JSON字符串: {actions}") diff --git a/src/chat/message_receive/storage.py b/src/chat/message_receive/storage.py index 6b03bb1dd..174001411 100644 --- a/src/chat/message_receive/storage.py +++ b/src/chat/message_receive/storage.py @@ -100,16 +100,22 @@ class MessageStorage: priority_info_json = orjson.dumps(priority_info).decode("utf-8") if priority_info else None # 准备additional_config,包含format_info和其他配置 - additional_config_data = {} + additional_config_data = None - # 保存format_info到additional_config中 + # 首先获取adapter传递的additional_config + if hasattr(message.message_info, 'additional_config') and message.message_info.additional_config: + additional_config_data = message.message_info.additional_config.copy() # 避免修改原始对象 + else: + additional_config_data = {} + + # 然后添加format_info到additional_config中 if hasattr(message.message_info, 'format_info') and message.message_info.format_info: format_info_dict = message.message_info.format_info.to_dict() additional_config_data["format_info"] = format_info_dict - - # 合并adapter传递的其他additional_config - if hasattr(message.message_info, 'additional_config') and message.message_info.additional_config: - additional_config_data.update(message.message_info.additional_config) + logger.debug(f"保存format_info: {format_info_dict}") + else: + logger.warning(f"[问题] 消息缺少format_info: message_id={getattr(message.message_info, 'message_id', 'unknown')}") + logger.warning("[问题] 这可能导致Action无法正确检查适配器支持的类型") # 序列化为JSON字符串以便存储 additional_config_json = orjson.dumps(additional_config_data).decode("utf-8") if additional_config_data else None diff --git a/src/chat/planner_actions/action_modifier.py b/src/chat/planner_actions/action_modifier.py index 69fc902de..d48af9761 100644 --- a/src/chat/planner_actions/action_modifier.py +++ b/src/chat/planner_actions/action_modifier.py @@ -223,12 +223,32 @@ class ActionModifier: list[str]: 支持的输出类型列表 """ # 检查additional_config是否存在且不为空 - if (chat_context.current_message - and hasattr(chat_context.current_message, "additional_config") - and chat_context.current_message.additional_config): + additional_config = None + has_additional_config = False + # 先检查 current_message 是否存在 + if not chat_context.current_message: + logger.warning(f"{self.log_prefix} [问题] chat_context.current_message 为 None,无法获取适配器支持的类型") + return ["text", "emoji"] # 返回基础类型 + + if hasattr(chat_context.current_message, "additional_config"): + additional_config = chat_context.current_message.additional_config + + # 更准确的非空判断 + if additional_config is not None: + if isinstance(additional_config, str) and additional_config.strip(): + has_additional_config = True + elif isinstance(additional_config, dict): + # 字典存在就可以,即使为空也可能有format_info字段 + has_additional_config = True + else: + logger.warning(f"{self.log_prefix} [问题] current_message 没有 additional_config 属性") + + logger.debug(f"{self.log_prefix} [调试] has_additional_config: {has_additional_config}") + + if has_additional_config: try: - additional_config = chat_context.current_message.additional_config + logger.debug(f"{self.log_prefix} [调试] 开始解析 additional_config") format_info = None # 处理additional_config可能是字符串或字典的情况 @@ -237,8 +257,7 @@ class ActionModifier: try: config = orjson.loads(additional_config) format_info = config.get("format_info") - except (orjson.JSONDecodeError, AttributeError, TypeError): - logger.debug("无法解析additional_config JSON字符串") + except (orjson.JSONDecodeError, AttributeError, TypeError) as e: format_info = None elif isinstance(additional_config, dict): @@ -247,7 +266,6 @@ class ActionModifier: # 如果找到了format_info,从中提取支持的类型 if format_info: - # 优先检查accept_format字段 if "accept_format" in format_info: accept_format = format_info["accept_format"] if isinstance(accept_format, str): @@ -258,11 +276,13 @@ class ActionModifier: accept_format = list(accept_format) if hasattr(accept_format, "__iter__") else [] # 合并基础类型和适配器特定类型 - return list(set(accept_format)) + result = list(set(accept_format)) + return result # 备用检查content_format字段 elif "content_format" in format_info: content_format = format_info["content_format"] + logger.debug(f"{self.log_prefix} [调试] 找到 content_format: {content_format}") if isinstance(content_format, str): content_format = [content_format] elif isinstance(content_format, list): @@ -270,10 +290,25 @@ class ActionModifier: else: content_format = list(content_format) if hasattr(content_format, "__iter__") else [] - return list(set(content_format)) - + result = list(set(content_format)) + return result + else: + logger.warning(f"{self.log_prefix} [问题] additional_config 中没有 format_info 字段") except Exception as e: - logger.debug(f"解析适配器格式信息失败,使用默认支持类型: {e}") + logger.error(f"{self.log_prefix} [问题] 解析适配器格式信息失败: {e}", exc_info=True) + else: + logger.warning(f"{self.log_prefix} [问题] additional_config 不存在或为空") + + # 如果无法获取格式信息,返回默认支持的基础类型 + default_types = ["text", "emoji"] + logger.warning( + f"{self.log_prefix} [问题] 无法从适配器获取支持的消息类型,使用默认类型: {default_types}" + ) + logger.warning( + f"{self.log_prefix} [问题] 这可能导致某些 Action 被错误地过滤。" + f"请检查适配器是否正确设置了 format_info。" + ) + return default_types async def _get_deactivated_actions_by_type( From b9e6caadc6454797f98a9d9fb8306424cd09c834 Mon Sep 17 00:00:00 2001 From: tt-P607 <68868379+tt-P607@users.noreply.github.com> Date: Sat, 25 Oct 2025 03:00:48 +0800 Subject: [PATCH 2/3] =?UTF-8?q?feat(tts):=20=E9=87=8D=E6=9E=84TTS=20Action?= =?UTF-8?q?=EF=BC=8C=E5=AE=9E=E7=8E=B0LLM=E5=AF=B9=E8=AF=AD=E9=9F=B3?= =?UTF-8?q?=E9=A3=8E=E6=A0=BC=E5=92=8C=E8=AF=AD=E8=A8=80=E7=9A=84=E7=B2=BE?= =?UTF-8?q?=E7=A1=AE=E6=8E=A7=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 本次更新对TTS插件进行了重大重构,旨在赋予规划模型(LLM)对语音合成过程更直接、更精确的控制能力,从而显著提升语音输出的质量、灵活性和响应速度。 主要变更包括: 1. **LLM直控模式**: - 移除了原有的“主模型重写文本”步骤,TTS Action现在直接使用规划器在 `text` 参数中提供的最终文本进行合成。 - **原因**: 减少了不必要的API调用和处理延迟,同时确保LLM的意图能够被无损地传达到语音生成环节。 2. **增强的参数化**: - Action新增了 `voice_style` 和 `text_language` 参数,允许LLM根据对话上下文动态选择最合适的语音风格和语言模式。 - **原因**: 使语音能够更好地匹配情感和场景,并解决了以往自动语言检测在多语言混合场景下(如中日、中粤)可能出错的问题。 3. **动态风格加载**: - 可用的语音风格列表不再硬编码,而是从插件的 `config.toml` 配置文件中动态读取。 - **原因**: 极大地增强了插件的可配置性和可维护性,用户可以轻松地通过修改配置文件来添加或调整语音风格。 4. **优化的语言决策**: - 在 `TTSService` 中实现了更智能的语言选择逻辑,其优先级为:LLM直接指定 > 风格配置默认 > 内容自动检测。 - **原因**: 提供了多层次的控制,确保在各种情况下都能选择最优的语言模式进行合成。 5. **提示词强化**: - 更新了Action的描述和规则,特别是增加了对标点符号使用的“铁则”,以引导LLM生成更规范、更适合语音合成的文本。 - **原因**: 从源头上提升输入文本的质量,以确保语音停顿自然,避免合成失败。 --- .../tts_voice_plugin/actions/tts_action.py | 131 ++++++++++++------ .../tts_voice_plugin/services/tts_service.py | 53 +++++-- 2 files changed, 129 insertions(+), 55 deletions(-) diff --git a/src/plugins/built_in/tts_voice_plugin/actions/tts_action.py b/src/plugins/built_in/tts_voice_plugin/actions/tts_action.py index 9ea6a87f3..795891c02 100644 --- a/src/plugins/built_in/tts_voice_plugin/actions/tts_action.py +++ b/src/plugins/built_in/tts_voice_plugin/actions/tts_action.py @@ -2,8 +2,10 @@ TTS 语音合成 Action """ +import toml +from pathlib import Path + from src.common.logger import get_logger -from src.plugin_system.apis import generator_api from src.plugin_system.base.base_action import ActionActivationType, BaseAction, ChatMode from ..services.manager import get_service @@ -11,24 +13,96 @@ from ..services.manager import get_service logger = get_logger("tts_voice_plugin.action") +def _get_available_styles() -> list[str]: + """动态读取配置文件,获取所有可用的TTS风格名称""" + try: + # 这个路径构建逻辑是为了确保无论从哪里启动,都能准确定位到配置文件 + plugin_file = Path(__file__).resolve() + # Bot/src/plugins/built_in/tts_voice_plugin/actions -> Bot + bot_root = plugin_file.parent.parent.parent.parent.parent.parent + config_file = bot_root / "config" / "plugins" / "tts_voice_plugin" / "config.toml" + + if not config_file.is_file(): + logger.warning("在 tts_action 中未找到 tts_voice_plugin 的配置文件,无法动态加载风格列表。") + return ["default"] + + config = toml.loads(config_file.read_text(encoding="utf-8")) + + styles_config = config.get("tts_styles", []) + if not isinstance(styles_config, list): + return ["default"] + + # 使用显式循环和类型检查来提取 style_name,以确保 Pylance 类型检查通过 + style_names: list[str] = [] + for style in styles_config: + if isinstance(style, dict): + name = style.get("style_name") + # 确保 name 是一个非空字符串 + if isinstance(name, str) and name: + style_names.append(name) + + return style_names if style_names else ["default"] + except Exception as e: + logger.error(f"动态加载TTS风格列表时出错: {e}", exc_info=True) + return ["default"] # 出现任何错误都回退 + + +# 在类定义之前执行函数,获取风格列表 +AVAILABLE_STYLES = _get_available_styles() +STYLE_OPTIONS_DESC = ", ".join(f"'{s}'" for s in AVAILABLE_STYLES) + + class TTSVoiceAction(BaseAction): """ 通过关键词或规划器自动触发 TTS 语音合成 """ action_name = "tts_voice_action" - action_description = "使用GPT-SoVITS将文本转换为语音并发送" + action_description = "将你生成好的文本转换为语音并发送。你必须提供要转换的文本。" mode_enable = ChatMode.ALL parallel_action = False + action_parameters = { + "text": { + "type": "string", + "description": "需要转换为语音并发送的完整、自然、适合口语的文本内容。", + "required": True + }, + "voice_style": { + "type": "string", + "description": f"语音的风格。可用选项: [{STYLE_OPTIONS_DESC}]。请根据对话的情感和上下文选择一个最合适的风格。如果未提供,将使用默认风格。", + "required": False + }, + "text_language": { + "type": "string", + "description": ( + "指定用于合成的语言模式,请务必根据文本内容选择最精确、范围最小的选项以获得最佳效果。" + "可用选项说明:\n" + "- 'zh': 中文与英文混合 (最优选)\n" + "- 'ja': 日文与英文混合 (最优选)\n" + "- 'yue': 粤语与英文混合 (最优选)\n" + "- 'ko': 韩文与英文混合 (最优选)\n" + "- 'en': 纯英文\n" + "- 'all_zh': 纯中文\n" + "- 'all_ja': 纯日文\n" + "- 'all_yue': 纯粤语\n" + "- 'all_ko': 纯韩文\n" + "- 'auto': 多语种混合自动识别 (备用选项,当前两种语言时优先使用上面的精确选项)\n" + "- 'auto_yue': 多语种混合自动识别(包含粤语)(备用选项)" + ), + "required": False + } + } + action_require = [ + "在调用此动作时,你必须在 'text' 参数中提供要合成语音的完整回复内容。这是强制性的。", "当用户明确请求使用语音进行回复时,例如‘发个语音听听’、‘用语音说’等。", "当对话内容适合用语音表达,例如讲故事、念诗、撒嬌或进行角色扮演时。", "在表达特殊情感(如安慰、鼓励、庆祝)的场景下,可以主动使用语音来增强感染力。", "不要在日常的、简短的问答或闲聊中频繁使用语音,避免打扰用户。", - "文本内容必须是纯粹的对话,不能包含任何括号或方括号括起来的动作、表情、或场景描述(例如,不要出现 '(笑)' 或 '[歪头]')", - "必须使用标准、完整的标点符号(如逗号、句号、问号)来进行自然的断句,以确保语音停顿自然,避免生成一长串没有停顿的文本。" + "提供的 'text' 内容必须是纯粹的对话,不能包含任何括号或方括号括起来的动作、表情、或场景描述(例如,不要出现 '(笑)' 或 '[歪头]')", + "【**铁则**】为了确保语音停顿自然,'text' 参数中的所有断句【必须】使用且仅能使用以下标准标点符号:','、'。'、'?'、'!'。严禁使用 '~'、'...' 或其他任何非标准符号来分隔句子,否则将导致语音合成失败。" ] def __init__(self, *args, **kwargs): @@ -80,16 +154,23 @@ class TTSVoiceAction(BaseAction): initial_text = self.action_data.get("text", "").strip() voice_style = self.action_data.get("voice_style", "default") - logger.info(f"{self.log_prefix} 接收到规划器的初步文本: '{initial_text[:70]}...'") + # 新增:从决策模型获取指定的语言模式 + text_language = self.action_data.get("text_language") # 如果模型没给,就是 None + logger.info(f"{self.log_prefix} 接收到规划器初步文本: '{initial_text[:70]}...', 指定风格: {voice_style}, 指定语言: {text_language}") - # 1. 请求主回复模型生成高质量文本 - text = await self._generate_final_text(initial_text) + # 1. 使用规划器提供的文本 + text = initial_text if not text: - logger.warning(f"{self.log_prefix} 最终生成的文本为空,静默处理。") - return False, "最终生成的文本为空" + logger.warning(f"{self.log_prefix} 规划器提供的文本为空,静默处理。") + return False, "规划器提供的文本为空" # 2. 调用 TTSService 生成语音 - audio_b64 = await self.tts_service.generate_voice(text, voice_style) + logger.info(f"{self.log_prefix} 使用最终文本进行语音合成: '{text[:70]}...'") + audio_b64 = await self.tts_service.generate_voice( + text=text, + style_hint=voice_style, + language_hint=text_language # 新增:将决策模型指定的语言传递给服务 + ) if audio_b64: await self.send_custom(message_type="voice", content=audio_b64) @@ -115,33 +196,3 @@ class TTSVoiceAction(BaseAction): ) return False, f"语音合成出错: {e!s}" - async def _generate_final_text(self, initial_text: str) -> str: - """请求主回复模型生成或优化文本""" - try: - generation_reason = ( - "这是一个为语音消息(TTS)生成文本的特殊任务。" - "请基于规划器提供的初步文本,结合对话历史和自己的人设,将它优化成一句自然、富有感情、适合用语音说出的话。" - "最终指令:请务-必确保文本听起来像真实的、自然的口语对话,而不是书面语。" - ) - - logger.info(f"{self.log_prefix} 请求主回复模型(replyer)全新生成TTS文本...") - success, response_set, _ = await generator_api.rewrite_reply( - chat_stream=self.chat_stream, - reply_data={"raw_reply": initial_text, "reason": generation_reason}, - request_type="replyer" - ) - - if success and response_set: - text = "".join(str(seg[1]) if isinstance(seg, tuple) else str(seg) for seg in response_set).strip() - logger.info(f"{self.log_prefix} 成功生成高质量TTS文本: {text}") - return text - - if initial_text: - logger.warning(f"{self.log_prefix} 主模型生成失败,使用规划器原始文本作为兜底。") - return initial_text - - raise Exception("主模型未能生成回复,且规划器也未提供兜底文本。") - - except Exception as e: - logger.error(f"{self.log_prefix} 生成高质量回复内容时失败: {e}", exc_info=True) - return "" diff --git a/src/plugins/built_in/tts_voice_plugin/services/tts_service.py b/src/plugins/built_in/tts_voice_plugin/services/tts_service.py index c00eb31dd..d11dbd925 100644 --- a/src/plugins/built_in/tts_voice_plugin/services/tts_service.py +++ b/src/plugins/built_in/tts_voice_plugin/services/tts_service.py @@ -80,21 +80,34 @@ class TTSService: "prompt_language": style_cfg.get("prompt_language", "zh"), "gpt_weights": style_cfg.get("gpt_weights", default_gpt_weights), "sovits_weights": style_cfg.get("sovits_weights", default_sovits_weights), - "speed_factor": style_cfg.get("speed_factor"), # 读取独立的语速配置 + "speed_factor": style_cfg.get("speed_factor"), + "text_language": style_cfg.get("text_language", "auto"), # 新增:读取文本语言模式 } return styles - # ... [其他方法保持不变] ... - def _detect_language(self, text: str) -> str: - chinese_chars = len(re.findall(r"[\u4e00-\u9fff]", text)) - english_chars = len(re.findall(r"[a-zA-Z]", text)) + def _determine_final_language(self, text: str, mode: str) -> str: + """根据配置的语言策略和文本内容,决定最终发送给API的语言代码""" + # 如果策略是具体的语言(如 all_zh, ja),直接使用 + if mode not in ["auto", "auto_yue"]: + return mode + + # 对于 auto 和 auto_yue 策略,进行内容检测 + # 优先检测粤语 + if mode == "auto_yue": + cantonese_keywords = ["嘅", "喺", "咗", "唔", "係", "啲", "咩", "乜", "喂"] + if any(keyword in text for keyword in cantonese_keywords): + logger.info("在 auto_yue 模式下检测到粤语关键词,最终语言: yue") + return "yue" + + # 检测日语(简单启发式规则) japanese_chars = len(re.findall(r"[\u3040-\u309f\u30a0-\u30ff]", text)) - total_chars = chinese_chars + english_chars + japanese_chars - if total_chars == 0: return "zh" - if chinese_chars / total_chars > 0.3: return "zh" - elif japanese_chars / total_chars > 0.3: return "ja" - elif english_chars / total_chars > 0.8: return "en" - else: return "zh" + if japanese_chars > 5 and japanese_chars > len(re.findall(r"[\u4e00-\u9fff]", text)) * 0.5: + logger.info("检测到日语字符,最终语言: ja") + return "ja" + + # 默认回退到中文 + logger.info(f"在 {mode} 模式下未检测到特定语言,默认回退到: zh") + return "zh" def _clean_text_for_tts(self, text: str) -> str: # 1. 基本清理 @@ -259,7 +272,7 @@ class TTSService: logger.error(f"应用空间效果时出错: {e}", exc_info=True) return audio_data # 如果出错,返回原始音频 - async def generate_voice(self, text: str, style_hint: str = "default") -> str | None: + async def generate_voice(self, text: str, style_hint: str = "default", language_hint: str | None = None) -> str | None: self._load_config() if not self.tts_styles: @@ -282,11 +295,21 @@ class TTSService: clean_text = self._clean_text_for_tts(text) if not clean_text: return None - text_language = self._detect_language(clean_text) - logger.info(f"开始TTS语音合成,文本:{clean_text[:50]}..., 风格:{style}") + # 语言决策流程: + # 1. 优先使用决策模型直接指定的 language_hint (最高优先级) + if language_hint: + final_language = language_hint + logger.info(f"使用决策模型指定的语言: {final_language}") + else: + # 2. 如果模型未指定,则使用风格配置的 language_policy + language_policy = server_config.get("text_language", "auto") + final_language = self._determine_final_language(clean_text, language_policy) + logger.info(f"决策模型未指定语言,使用策略 '{language_policy}' -> 最终语言: {final_language}") + + logger.info(f"开始TTS语音合成,文本:{clean_text[:50]}..., 风格:{style}, 最终语言: {final_language}") audio_data = await self._call_tts_api( - server_config=server_config, text=clean_text, text_language=text_language, + server_config=server_config, text=clean_text, text_language=final_language, refer_wav_path=server_config.get("refer_wav_path"), prompt_text=server_config.get("prompt_text"), prompt_language=server_config.get("prompt_language"), From b72090f0242233624715c2d3b5358e9e95a52045 Mon Sep 17 00:00:00 2001 From: tt-P607 <68868379+tt-P607@users.noreply.github.com> Date: Sat, 25 Oct 2025 03:29:14 +0800 Subject: [PATCH 3/3] =?UTF-8?q?fix(chat):=20=E4=BF=AE=E5=A4=8D=E5=8A=A8?= =?UTF-8?q?=E4=BD=9C=E6=89=A7=E8=A1=8C=E5=BC=82=E5=B8=B8=E6=97=B6=20is=5Fr?= =?UTF-8?q?eplying=20=E7=8A=B6=E6=80=81=E6=9C=AA=E9=87=8D=E7=BD=AE?= =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 在 `execute_action` 方法中引入 `try...finally` 结构,以确保无论动作执行成功与否,`is_replying` 状态最终都能被可靠地重置为 `False`。 此更改解决了在动作执行期间发生意外错误时,聊天流可能被永久锁定在“正在回复”状态的问题,从而提高了系统的健壮性。 --- src/chat/planner_actions/action_manager.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/chat/planner_actions/action_manager.py b/src/chat/planner_actions/action_manager.py index e7ff21ad4..90d2b265e 100644 --- a/src/chat/planner_actions/action_manager.py +++ b/src/chat/planner_actions/action_manager.py @@ -165,6 +165,7 @@ class ChatterActionManager: 执行结果 """ + chat_stream = None try: logger.debug(f"🎯 [ActionManager] execute_action接收到 target_message: {target_message}") # 通过chat_id获取chat_stream @@ -180,6 +181,9 @@ class ChatterActionManager: "error": "chat_stream not found", } + # 设置正在回复的状态 + chat_stream.context_manager.context.is_replying = True + if action_name == "no_action": return {"action_type": "no_action", "success": True, "reply_text": "", "command": ""} @@ -205,7 +209,7 @@ class ChatterActionManager: action_build_into_prompt=False, action_prompt_display=reason, action_done=True, - thinking_id=thinking_id, + thinking_id=thinking_id or "", action_data={"reason": reason}, action_name="no_reply", ) @@ -298,6 +302,10 @@ class ChatterActionManager: "loop_info": None, "error": str(e), } + finally: + # 确保重置正在回复的状态 + if chat_stream: + chat_stream.context_manager.context.is_replying = False async def _record_action_to_message(self, chat_stream, action_name, target_message, action_data): """