refactor(chat): overhaul response processing for robustness and integrity

This commit redesigns the LLM response handling pipeline to gracefully manage complex, long, and structured outputs, shifting from a restrictive filtering model to a more resilient and content-aware approach. The previous implementation suffered from brittleness, often discarding valid responses or corrupting formatted content. This refactor introduces two core changes to ensure response integrity: 1. **Adaptive Message Merging:** Replaces the rigid message count and length limits, which caused the silent dropping of valid long-form answers. The new logic dynamically merges the shortest adjacent sentences if a response is overly fragmented, ensuring the full thought is preserved while adhering to display limits. 2. **Content-Aware Pre-processing:** Implements a protection mechanism for special text formats. It now identifies and isolates Markdown code blocks, LaTeX-style math equations, and other code-like segments using placeholders before splitting or typo generation. This prevents formatting corruption and ensures technical content is rendered correctly. Additionally, the LLM prompt for splitting has been refined to emphasize logical coherence over simple fragmentation. Finally, this commit also hardens the `LLMRequest` class against race conditions by adding an `asyncio.Lock` to serialize generation requests, improving the system's stability under concurrent workloads.
2025-10-17 02:34:39 +08:00
parent 514da0b56f
commit 1f26238368
3 changed files with 146 additions and 59 deletions
--- a/src/chat/replyer/default_generator.py
+++ b/src/chat/replyer/default_generator.py
@@ -1474,19 +1474,19 @@ class DefaultReplyer:
        # --- 动态添加分割指令 ---
        if global_config.response_splitter.enable and global_config.response_splitter.split_mode == "llm":
            split_instruction = """
-## 消息分段指导
+## 关于回复分割的一些小建议
 你的任务是将回复分割成多段发送，以模仿人类自然、富有节奏感的聊天方式。
-**核心指导**:
+这个指令的**唯一目的**是为了**提高可读性**，将一个**单一、完整的回复**拆分成视觉上更易读的短句，**而不是让你生成多个不同的回复**。
 1.  **短句优先**: 请尽量将回复拆分成 20-30 字左右的短句，模拟逐步思考和表达的节奏。
 2.  **智能分割时机**: 为了让分割更自然，**推荐**在以下时机插入 `[SPLIT]` 标记：
    - **标点符号后**: 这是最常见的分割点，如句号、逗号、问号、感叹号之后。
    - **逻辑转折处**: 当意思发生转变时，例如使用“但是”、“不过”、“而且”、“所以”等词语后。
    - **话题切换时**: 当你准备从一个话题跳到另一个相关话题时。
 3.  **保持流畅**: 如果一句话本身就很短，或者分割后会显得不自然，就不要强行分割。
-**任务**:
+请在思考好的、连贯的回复中，找到合适的停顿点插入 `[SPLIT]` 标记。
-请像一个真正在聊天的朋友一样，参考以上建议，自然地决定在哪里插入 `[SPLIT]` 标记，让对话显得更生动。
+
 **最重要的原则：**
 - **禁止内容重复**：分割后的各个部分必须是**一个连贯思想的不同阶段**，绝不能是相似意思的重复表述。
 **一些可以参考的分割时机：**
 1.  **短句优先**: 整体上，让每个分割后的句子长度在 20-30 字左右会显得很自然。
 2.  **自然停顿**: 在自然的标点符号（如逗号、问号）后，或者在逻辑转折词（如“而且”、“不过”）后，都是不错的分割点。
 3.  **保留连贯性**: 请确保所有被 `[SPLIT]` 分隔的句子能无缝拼接成一个逻辑通顺的完整回复。如果一句话很短，或者分割会破坏语感，就不要分割。
 """
            # 将分段指令添加到提示词顶部
            prompt_text = f"{split_instruction}\n{prompt_text}"
--- a/src/chat/utils/utils.py
+++ b/src/chat/utils/utils.py
@@ -295,35 +295,91 @@ def random_remove_punctuation(text: str) -> str:
    return result
 def protect_special_blocks(text: str) -> tuple[str, dict[str, str]]:
    """识别并保护数学公式和代码块，返回处理后的文本和映射"""
    placeholder_map = {}
    # 第一层防护：优先保护标准Markdown格式
    # 使用 re.S 来让 . 匹配换行符
    markdown_patterns = {
        'code': r"```.*?```",
        'math': r"\$\$.*?\$\$",
    }
    placeholder_idx = 0
    for block_type, pattern in markdown_patterns.items():
        matches = re.findall(pattern, text, re.S)
        for match in matches:
            placeholder = f"__SPECIAL_{block_type.upper()}_{placeholder_idx}__"
            text = text.replace(match, placeholder, 1)
            placeholder_map[placeholder] = match
            placeholder_idx += 1
    # 第二层防护：保护非标准的、可能是公式或代码的片段
    # 这个正则表达式寻找连续5个以上的、主要由非中文字符组成的片段
    general_pattern = r"(?:[a-zA-Z0-9\s.,;:(){}\[\]_+\-*/=<>^|&%?!'\"√²³ⁿ∑∫≠≥≤]){5,}"
    # 为了避免与已保护的占位符冲突，我们在剩余的文本上进行查找
    # 这是一个简化的处理，更稳妥的方式是分段查找，但目前这样足以应对多数情况
    try:
        matches = re.findall(general_pattern, text)
        for match in matches:
            # 避免将包含占位符的片段再次保护
            if "__SPECIAL_" in match:
                continue
            placeholder = f"__SPECIAL_GENERAL_{placeholder_idx}__"
            text = text.replace(match, placeholder, 1)
            placeholder_map[placeholder] = match
            placeholder_idx += 1
    except re.error as e:
        logger.error(f"特殊区域防护正则表达式错误: {e}")
    return text, placeholder_map
 def recover_special_blocks(sentences: list[str], placeholder_map: dict[str, str]) -> list[str]:
    """恢复被保护的特殊块"""
    recovered_sentences = []
    for sentence in sentences:
        for placeholder, original_block in placeholder_map.items():
            sentence = sentence.replace(placeholder, original_block)
        recovered_sentences.append(sentence)
    return recovered_sentences
 def process_llm_response(text: str, enable_splitter: bool = True, enable_chinese_typo: bool = True) -> list[str]:
    if not global_config.response_post_process.enable_response_post_process:
        return [text]
-    # 先保护颜文字
+    # --- 双层防护系统 ---
-    if global_config.response_splitter.enable_kaomoji_protection:
+    # 第一层：保护颜文字
-        protected_text, kaomoji_mapping = protect_kaomoji(text)
+    protected_text, kaomoji_mapping = protect_kaomoji(text) if global_config.response_splitter.enable_kaomoji_protection else (text, {})
-        logger.debug(f"保护颜文字后的文本: {protected_text}")
+    
-    else:
+    # 第二层：保护数学公式和代码块
-        protected_text = text
+    protected_text, special_blocks_mapping = protect_special_blocks(protected_text)
-        kaomoji_mapping = {}
+    
    # 提取被 () 或 [] 或 （）包裹且包含中文的内容
    pattern = re.compile(r"[(\[（](?=.*[一-鿿]).*?[)\]）]")
-    _extracted_contents = pattern.findall(protected_text)  # 在保护后的文本上查找
+    _extracted_contents = pattern.findall(protected_text)
    # 去除 () 和 [] 及其包裹的内容
    cleaned_text = pattern.sub("", protected_text)
-    if cleaned_text == "":
+    if cleaned_text.strip() == "":
        # 如果清理后只剩下特殊块，直接恢复并返回
        if special_blocks_mapping:
             recovered = recover_special_blocks([protected_text], special_blocks_mapping)
             return recover_kaomoji(recovered, kaomoji_mapping)
        return ["呃呃"]
    logger.debug(f"{text}去除括号处理后的文本: {cleaned_text}")
    # 对清理后的文本进行进一步处理
    max_length = global_config.response_splitter.max_length * 2
    max_sentence_num = global_config.response_splitter.max_sentence_num
-    # 如果基本上是中文，则进行长度过滤
+    
-    if get_western_ratio(cleaned_text) < 0.1 and len(cleaned_text) > max_length:
+    # --- 移除总长度检查 ---
-        logger.warning(f"回复过长 ({len(cleaned_text)} 字符)，返回默认回复")
+    # 原有的总长度检查会导致长回复被直接丢弃，现已移除，由后续的智能合并逻辑处理。
-        return ["懒得说"]
+    # max_length = global_config.response_splitter.max_length * 2
    # if get_western_ratio(cleaned_text) < 0.1 and len(cleaned_text) > max_length:
    #     logger.warning(f"回复过长 ({len(cleaned_text)} 字符)，返回默认回复")
    #     return ["懒得说"]
    typo_generator = ChineseTypoGenerator(
        error_rate=global_config.chinese_typo.error_rate,
@@ -364,15 +420,44 @@ def process_llm_response(text: str, enable_splitter: bool = True, enable_chinese
        else:
            sentences.append(sentence)
    # 如果分割后的句子数量超过上限，则启动智能合并逻辑
    if len(sentences) > max_sentence_num:
-        logger.warning(f"分割后消息数量过多 ({len(sentences)} 条)，返回默认回复")
+        logger.info(f"分割后消息数量 ({len(sentences)}) 超过上限 ({max_sentence_num})，启动智能合并...")
-        return [f"{global_config.bot.nickname}不知道哦"]
+
        # 计算需要合并的次数
        num_to_merge = len(sentences) - max_sentence_num
        for _ in range(num_to_merge):
            # 如果句子数量已经达标，提前退出
            if len(sentences) <= max_sentence_num:
                break
            # 寻找最短的相邻句子对
            min_len = float('inf')
            merge_idx = -1
            for i in range(len(sentences) - 1):
                combined_len = len(sentences[i]) + len(sentences[i+1])
                if combined_len < min_len:
                    min_len = combined_len
                    merge_idx = i
            # 如果找到了可以合并的对，则执行合并
            if merge_idx != -1:
                # 将后一个句子合并到前一个句子
                # 我们在合并时保留原始标点（如果有的话），或者添加一个逗号来确保可读性
                merged_sentence = sentences[merge_idx] + "，" + sentences[merge_idx + 1]
                sentences[merge_idx] = merged_sentence
                # 删除后一个句子
                del sentences[merge_idx + 1]
        logger.info(f"智能合并完成，最终消息数量: {len(sentences)}")
    # if extracted_contents:
    #     for content in extracted_contents:
    #         sentences.append(content)
-    # 在所有句子处理完毕后，对包含占位符的列表进行恢复
+    # --- 恢复所有被保护的内容 ---
    sentences = recover_special_blocks(sentences, special_blocks_mapping)
    if global_config.response_splitter.enable_kaomoji_protection:
        sentences = recover_kaomoji(sentences, kaomoji_mapping)
--- a/src/llm_models/utils_model.py
+++ b/src/llm_models/utils_model.py
@@ -802,6 +802,7 @@ class LLMRequest:
            for model in self.model_for_task.model_list
        }
        """模型使用量记录"""
        self._lock = asyncio.Lock()
        # 初始化辅助类
        self._model_selector = _ModelSelector(self.model_for_task.model_list, self.model_usage)
@@ -930,42 +931,43 @@ class LLMRequest:
        tools: list[dict[str, Any]] | None = None,
        raise_when_empty: bool = True,
    ) -> tuple[str, tuple[str, str, list[ToolCall] | None]]:
-        """
+        async with self._lock:
-        执行单次文本生成请求的内部方法。
+            """
-        这是 `generate_response_async` 的核心实现，处理单个请求的完整生命周期，
+            执行单次文本生成请求的内部方法。
-        包括工具构建、故障转移执行和用量记录。
+            这是 `generate_response_async` 的核心实现，处理单个请求的完整生命周期，
            包括工具构建、故障转移执行和用量记录。
-        Args:
+            Args:
-            prompt (str): 用户的提示。
+                prompt (str): 用户的提示。
-            temperature (Optional[float]): 生成温度。
+                temperature (Optional[float]): 生成温度。
-            max_tokens (Optional[int]): 最大生成令牌数。
+                max_tokens (Optional[int]): 最大生成令牌数。
-            tools (Optional[List[Dict[str, Any]]]): 可用工具列表。
+                tools (Optional[List[Dict[str, Any]]]): 可用工具列表。
-            raise_when_empty (bool): 如果响应为空是否引发异常。
+                raise_when_empty (bool): 如果响应为空是否引发异常。
-        Returns:
+            Returns:
-            Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]:
+                Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]:
-                (响应内容, (推理过程, 模型名称, 工具调用))
+                    (响应内容, (推理过程, 模型名称, 工具调用))
-        """
+            """
-        start_time = time.time()
+            start_time = time.time()
-        tool_options = await self._build_tool_options(tools)
+            tool_options = await self._build_tool_options(tools)
-        response, model_info = await self._strategy.execute_with_failover(
+            response, model_info = await self._strategy.execute_with_failover(
-            RequestType.RESPONSE,
+                RequestType.RESPONSE,
-            raise_when_empty=raise_when_empty,
+                raise_when_empty=raise_when_empty,
-            prompt=prompt,  # 传递原始prompt，由strategy处理
+                prompt=prompt,  # 传递原始prompt，由strategy处理
-            tool_options=tool_options,
+                tool_options=tool_options,
-            temperature=self.model_for_task.temperature if temperature is None else temperature,
+                temperature=self.model_for_task.temperature if temperature is None else temperature,
-            max_tokens=self.model_for_task.max_tokens if max_tokens is None else max_tokens,
+                max_tokens=self.model_for_task.max_tokens if max_tokens is None else max_tokens,
-        )
+            )
-        await self._record_usage(model_info, response.usage, time.time() - start_time, "/chat/completions")
+            await self._record_usage(model_info, response.usage, time.time() - start_time, "/chat/completions")
-        if not response.content and not response.tool_calls:
+            if not response.content and not response.tool_calls:
-            if raise_when_empty:
+                if raise_when_empty:
-                raise RuntimeError("所选模型生成了空回复。")
+                    raise RuntimeError("所选模型生成了空回复。")
-            response.content = "生成的响应为空"
+                response.content = "生成的响应为空"
-        return response.content or "", (response.reasoning_content or "", model_info.name, response.tool_calls)
+            return response.content or "", (response.reasoning_content or "", model_info.name, response.tool_calls)
    async def get_embedding(self, embedding_input: str) -> tuple[list[float], str]:
        """