diff --git a/src/chat/replyer/default_generator.py b/src/chat/replyer/default_generator.py
index 4d6d7294c..5afaaca75 100644
--- a/src/chat/replyer/default_generator.py
+++ b/src/chat/replyer/default_generator.py
@@ -1474,19 +1474,19 @@ class DefaultReplyer:
         # --- 动态添加分割指令 ---
         if global_config.response_splitter.enable and global_config.response_splitter.split_mode == "llm":
             split_instruction = """
-## 消息分段指导
-你的任务是将回复分割成多段发送，以模仿人类自然、富有节奏感的聊天方式。
+## 关于回复分割的一些小建议
 
-**核心指导**:
-1.  **短句优先**: 请尽量将回复拆分成 20-30 字左右的短句，模拟逐步思考和表达的节奏。
-2.  **智能分割时机**: 为了让分割更自然，**推荐**在以下时机插入 `[SPLIT]` 标记：
-    - **标点符号后**: 这是最常见的分割点，如句号、逗号、问号、感叹号之后。
-    - **逻辑转折处**: 当意思发生转变时，例如使用“但是”、“不过”、“而且”、“所以”等词语后。
-    - **话题切换时**: 当你准备从一个话题跳到另一个相关话题时。
-3.  **保持流畅**: 如果一句话本身就很短，或者分割后会显得不自然，就不要强行分割。
+这个指令的**唯一目的**是为了**提高可读性**，将一个**单一、完整的回复**拆分成视觉上更易读的短句，**而不是让你生成多个不同的回复**。
 
-**任务**:
-请像一个真正在聊天的朋友一样，参考以上建议，自然地决定在哪里插入 `[SPLIT]` 标记，让对话显得更生动。
+请在思考好的、连贯的回复中，找到合适的停顿点插入 `[SPLIT]` 标记。
+
+**最重要的原则：**
+- **禁止内容重复**：分割后的各个部分必须是**一个连贯思想的不同阶段**，绝不能是相似意思的重复表述。
+
+**一些可以参考的分割时机：**
+1.  **短句优先**: 整体上，让每个分割后的句子长度在 20-30 字左右会显得很自然。
+2.  **自然停顿**: 在自然的标点符号（如逗号、问号）后，或者在逻辑转折词（如“而且”、“不过”）后，都是不错的分割点。
+3.  **保留连贯性**: 请确保所有被 `[SPLIT]` 分隔的句子能无缝拼接成一个逻辑通顺的完整回复。如果一句话很短，或者分割会破坏语感，就不要分割。
 """
             # 将分段指令添加到提示词顶部
             prompt_text = f"{split_instruction}\n{prompt_text}"
diff --git a/src/chat/utils/utils.py b/src/chat/utils/utils.py
index 6f2854f38..905b0f0cb 100644
--- a/src/chat/utils/utils.py
+++ b/src/chat/utils/utils.py
@@ -295,35 +295,91 @@ def random_remove_punctuation(text: str) -> str:
     return result
 
 
+def protect_special_blocks(text: str) -> tuple[str, dict[str, str]]:
+    """识别并保护数学公式和代码块，返回处理后的文本和映射"""
+    placeholder_map = {}
+    
+    # 第一层防护：优先保护标准Markdown格式
+    # 使用 re.S 来让 . 匹配换行符
+    markdown_patterns = {
+        'code': r"```.*?```",
+        'math': r"\$\$.*?\$\$",
+    }
+    
+    placeholder_idx = 0
+    for block_type, pattern in markdown_patterns.items():
+        matches = re.findall(pattern, text, re.S)
+        for match in matches:
+            placeholder = f"__SPECIAL_{block_type.upper()}_{placeholder_idx}__"
+            text = text.replace(match, placeholder, 1)
+            placeholder_map[placeholder] = match
+            placeholder_idx += 1
+
+    # 第二层防护：保护非标准的、可能是公式或代码的片段
+    # 这个正则表达式寻找连续5个以上的、主要由非中文字符组成的片段
+    general_pattern = r"(?:[a-zA-Z0-9\s.,;:(){}\[\]_+\-*/=<>^|&%?!'\"√²³ⁿ∑∫≠≥≤]){5,}"
+    
+    # 为了避免与已保护的占位符冲突，我们在剩余的文本上进行查找
+    # 这是一个简化的处理，更稳妥的方式是分段查找，但目前这样足以应对多数情况
+    try:
+        matches = re.findall(general_pattern, text)
+        for match in matches:
+            # 避免将包含占位符的片段再次保护
+            if "__SPECIAL_" in match:
+                continue
+            
+            placeholder = f"__SPECIAL_GENERAL_{placeholder_idx}__"
+            text = text.replace(match, placeholder, 1)
+            placeholder_map[placeholder] = match
+            placeholder_idx += 1
+    except re.error as e:
+        logger.error(f"特殊区域防护正则表达式错误: {e}")
+
+    return text, placeholder_map
+
+def recover_special_blocks(sentences: list[str], placeholder_map: dict[str, str]) -> list[str]:
+    """恢复被保护的特殊块"""
+    recovered_sentences = []
+    for sentence in sentences:
+        for placeholder, original_block in placeholder_map.items():
+            sentence = sentence.replace(placeholder, original_block)
+        recovered_sentences.append(sentence)
+    return recovered_sentences
+
 def process_llm_response(text: str, enable_splitter: bool = True, enable_chinese_typo: bool = True) -> list[str]:
     if not global_config.response_post_process.enable_response_post_process:
         return [text]
 
-    # 先保护颜文字
-    if global_config.response_splitter.enable_kaomoji_protection:
-        protected_text, kaomoji_mapping = protect_kaomoji(text)
-        logger.debug(f"保护颜文字后的文本: {protected_text}")
-    else:
-        protected_text = text
-        kaomoji_mapping = {}
+    # --- 双层防护系统 ---
+    # 第一层：保护颜文字
+    protected_text, kaomoji_mapping = protect_kaomoji(text) if global_config.response_splitter.enable_kaomoji_protection else (text, {})
+    
+    # 第二层：保护数学公式和代码块
+    protected_text, special_blocks_mapping = protect_special_blocks(protected_text)
+    
     # 提取被 () 或 [] 或 （）包裹且包含中文的内容
     pattern = re.compile(r"[(\[（](?=.*[一-鿿]).*?[)\]）]")
-    _extracted_contents = pattern.findall(protected_text)  # 在保护后的文本上查找
-    # 去除 () 和 [] 及其包裹的内容
+    _extracted_contents = pattern.findall(protected_text)
     cleaned_text = pattern.sub("", protected_text)
 
-    if cleaned_text == "":
+    if cleaned_text.strip() == "":
+        # 如果清理后只剩下特殊块，直接恢复并返回
+        if special_blocks_mapping:
+             recovered = recover_special_blocks([protected_text], special_blocks_mapping)
+             return recover_kaomoji(recovered, kaomoji_mapping)
         return ["呃呃"]
 
     logger.debug(f"{text}去除括号处理后的文本: {cleaned_text}")
 
     # 对清理后的文本进行进一步处理
-    max_length = global_config.response_splitter.max_length * 2
     max_sentence_num = global_config.response_splitter.max_sentence_num
-    # 如果基本上是中文，则进行长度过滤
-    if get_western_ratio(cleaned_text) < 0.1 and len(cleaned_text) > max_length:
-        logger.warning(f"回复过长 ({len(cleaned_text)} 字符)，返回默认回复")
-        return ["懒得说"]
+    
+    # --- 移除总长度检查 ---
+    # 原有的总长度检查会导致长回复被直接丢弃，现已移除，由后续的智能合并逻辑处理。
+    # max_length = global_config.response_splitter.max_length * 2
+    # if get_western_ratio(cleaned_text) < 0.1 and len(cleaned_text) > max_length:
+    #     logger.warning(f"回复过长 ({len(cleaned_text)} 字符)，返回默认回复")
+    #     return ["懒得说"]
 
     typo_generator = ChineseTypoGenerator(
         error_rate=global_config.chinese_typo.error_rate,
@@ -364,15 +420,44 @@ def process_llm_response(text: str, enable_splitter: bool = True, enable_chinese
         else:
             sentences.append(sentence)
 
+    # 如果分割后的句子数量超过上限，则启动智能合并逻辑
     if len(sentences) > max_sentence_num:
-        logger.warning(f"分割后消息数量过多 ({len(sentences)} 条)，返回默认回复")
-        return [f"{global_config.bot.nickname}不知道哦"]
+        logger.info(f"分割后消息数量 ({len(sentences)}) 超过上限 ({max_sentence_num})，启动智能合并...")
+
+        # 计算需要合并的次数
+        num_to_merge = len(sentences) - max_sentence_num
+
+        for _ in range(num_to_merge):
+            # 如果句子数量已经达标，提前退出
+            if len(sentences) <= max_sentence_num:
+                break
+
+            # 寻找最短的相邻句子对
+            min_len = float('inf')
+            merge_idx = -1
+            for i in range(len(sentences) - 1):
+                combined_len = len(sentences[i]) + len(sentences[i+1])
+                if combined_len < min_len:
+                    min_len = combined_len
+                    merge_idx = i
+
+            # 如果找到了可以合并的对，则执行合并
+            if merge_idx != -1:
+                # 将后一个句子合并到前一个句子
+                # 我们在合并时保留原始标点（如果有的话），或者添加一个逗号来确保可读性
+                merged_sentence = sentences[merge_idx] + "，" + sentences[merge_idx + 1]
+                sentences[merge_idx] = merged_sentence
+                # 删除后一个句子
+                del sentences[merge_idx + 1]
+        
+        logger.info(f"智能合并完成，最终消息数量: {len(sentences)}")
 
     # if extracted_contents:
     #     for content in extracted_contents:
     #         sentences.append(content)
 
-    # 在所有句子处理完毕后，对包含占位符的列表进行恢复
+    # --- 恢复所有被保护的内容 ---
+    sentences = recover_special_blocks(sentences, special_blocks_mapping)
     if global_config.response_splitter.enable_kaomoji_protection:
         sentences = recover_kaomoji(sentences, kaomoji_mapping)
 
diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py
index b2b4ba928..f60f8c205 100644
--- a/src/llm_models/utils_model.py
+++ b/src/llm_models/utils_model.py
@@ -802,6 +802,7 @@ class LLMRequest:
             for model in self.model_for_task.model_list
         }
         """模型使用量记录"""
+        self._lock = asyncio.Lock()
 
         # 初始化辅助类
         self._model_selector = _ModelSelector(self.model_for_task.model_list, self.model_usage)
@@ -930,42 +931,43 @@ class LLMRequest:
         tools: list[dict[str, Any]] | None = None,
         raise_when_empty: bool = True,
     ) -> tuple[str, tuple[str, str, list[ToolCall] | None]]:
-        """
-        执行单次文本生成请求的内部方法。
-        这是 `generate_response_async` 的核心实现，处理单个请求的完整生命周期，
-        包括工具构建、故障转移执行和用量记录。
+        async with self._lock:
+            """
+            执行单次文本生成请求的内部方法。
+            这是 `generate_response_async` 的核心实现，处理单个请求的完整生命周期，
+            包括工具构建、故障转移执行和用量记录。
 
-        Args:
-            prompt (str): 用户的提示。
-            temperature (Optional[float]): 生成温度。
-            max_tokens (Optional[int]): 最大生成令牌数。
-            tools (Optional[List[Dict[str, Any]]]): 可用工具列表。
-            raise_when_empty (bool): 如果响应为空是否引发异常。
+            Args:
+                prompt (str): 用户的提示。
+                temperature (Optional[float]): 生成温度。
+                max_tokens (Optional[int]): 最大生成令牌数。
+                tools (Optional[List[Dict[str, Any]]]): 可用工具列表。
+                raise_when_empty (bool): 如果响应为空是否引发异常。
 
-        Returns:
-            Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]:
-                (响应内容, (推理过程, 模型名称, 工具调用))
-        """
-        start_time = time.time()
-        tool_options = await self._build_tool_options(tools)
+            Returns:
+                Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]:
+                    (响应内容, (推理过程, 模型名称, 工具调用))
+            """
+            start_time = time.time()
+            tool_options = await self._build_tool_options(tools)
 
-        response, model_info = await self._strategy.execute_with_failover(
-            RequestType.RESPONSE,
-            raise_when_empty=raise_when_empty,
-            prompt=prompt,  # 传递原始prompt，由strategy处理
-            tool_options=tool_options,
-            temperature=self.model_for_task.temperature if temperature is None else temperature,
-            max_tokens=self.model_for_task.max_tokens if max_tokens is None else max_tokens,
-        )
+            response, model_info = await self._strategy.execute_with_failover(
+                RequestType.RESPONSE,
+                raise_when_empty=raise_when_empty,
+                prompt=prompt,  # 传递原始prompt，由strategy处理
+                tool_options=tool_options,
+                temperature=self.model_for_task.temperature if temperature is None else temperature,
+                max_tokens=self.model_for_task.max_tokens if max_tokens is None else max_tokens,
+            )
 
-        await self._record_usage(model_info, response.usage, time.time() - start_time, "/chat/completions")
+            await self._record_usage(model_info, response.usage, time.time() - start_time, "/chat/completions")
 
-        if not response.content and not response.tool_calls:
-            if raise_when_empty:
-                raise RuntimeError("所选模型生成了空回复。")
-            response.content = "生成的响应为空"
+            if not response.content and not response.tool_calls:
+                if raise_when_empty:
+                    raise RuntimeError("所选模型生成了空回复。")
+                response.content = "生成的响应为空"
 
-        return response.content or "", (response.reasoning_content or "", model_info.name, response.tool_calls)
+            return response.content or "", (response.reasoning_content or "", model_info.name, response.tool_calls)
 
     async def get_embedding(self, embedding_input: str) -> tuple[list[float], str]:
         """