From 62c548ad2b6682dc772c62bc77a9d64751195097 Mon Sep 17 00:00:00 2001
From: tt-P607 <68868379+tt-P607@users.noreply.github.com>
Date: Thu, 11 Sep 2025 16:09:48 +0800
Subject: [PATCH] =?UTF-8?q?feat(chat):=20=E5=AE=9E=E7=8E=B0=E7=94=B1=20LLM?=
 =?UTF-8?q?=20=E6=8E=A7=E5=88=B6=E7=9A=84=E8=87=AA=E7=84=B6=E5=9B=9E?=
 =?UTF-8?q?=E5=A4=8D=E5=88=86=E5=89=B2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

之前基于标点符号的自动分割逻辑较为僵硬，有时会破坏回复的连贯性，导致对话体验不佳。

本次更新引入了一种由 LLM 主导的回复分割机制：
1.  在 Prompt 中增加了明确的分割指令，引导 LLM 在需要模拟人类对话停顿或转折时，使用 `[SPLIT]` 标记。
2.  后端回复处理逻辑相应更新，优先根据 `[SPLIT]` 标记分割消息。
3.  若 LLM 未提供 `[SPLIT]` 标记，则将整段回复作为单条消息发送，避免了不必要的拆分。

此项改动旨在让消息的发送节奏更贴近真实人类的聊天习惯，从而提升交互的自然感和流畅度。
---
 src/chat/replyer/default_generator.py | 17 ++++++++++++++++-
 src/chat/utils/utils.py               | 11 ++++++++++-
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/src/chat/replyer/default_generator.py b/src/chat/replyer/default_generator.py
index ef9cce84d..3ad209aa4 100644
--- a/src/chat/replyer/default_generator.py
+++ b/src/chat/replyer/default_generator.py
@@ -136,7 +136,7 @@ def init_prompt():
 4.  不要浮夸，不要夸张修辞，不要输出多余内容(包括前后缀，冒号和引号，括号()，表情包，at或 @等 )。
 最终请输出一条简短、完整且口语化的回复。
 
---------------------------------
+ --------------------------------
 {time_block}
 
 {reply_target_block}
@@ -1024,6 +1024,21 @@ class DefaultReplyer:
         prompt = Prompt(template=template_prompt.template, parameters=prompt_parameters)
         prompt_text = await prompt.build()
 
+        # --- 动态添加分割指令 ---
+        if global_config.response_splitter.enable:
+            split_instruction = """
+## 分割指令
+你正在通过一个即时聊天软件发送消息。请模仿一个真实人类的打字和发送习惯：
+- **简洁明了**: 如果一句话能说清楚，就一次性发出去，不要添加任何标记。
+- **自然断句**: 当你想表达一个转折、一个停顿，或者想补充说明时，就像正常人会先发一部分再发另一部分一样，请在断句处插入 `[SPLIT]` 标记。
+- **动机**: 使用 `[SPLIT]` 的目的是为了让对话节奏更自然，更有层次感，而不是为了分割而分割。
+示例: "我刚刚看到一个超好笑的视频！[SPLIT]等我找找发给你~"
+"""
+            # 在 "现在，你说：" 之前插入
+            parts = prompt_text.rsplit("现在，你说：", 1)
+            if len(parts) == 2:
+                prompt_text = f"{parts[0]}{split_instruction}\n现在，你说：{parts[1]}"
+
         return prompt_text
 
     async def build_prompt_rewrite_context(
diff --git a/src/chat/utils/utils.py b/src/chat/utils/utils.py
index 501bf382d..19f3ced99 100644
--- a/src/chat/utils/utils.py
+++ b/src/chat/utils/utils.py
@@ -331,8 +331,17 @@ def process_llm_response(text: str, enable_splitter: bool = True, enable_chinese
     )
 
     if global_config.response_splitter.enable and enable_splitter:
-        split_sentences = split_into_sentences_w_remove_punctuation(cleaned_text)
+        logger.info("回复分割器已启用。")
+        if "[SPLIT]" in cleaned_text:
+            split_sentences_raw = cleaned_text.split("[SPLIT]")
+            # 清理每个句子首尾可能由LLM添加的空格或换行符，并移除空句子
+            split_sentences = [s.strip() for s in split_sentences_raw if s.strip()]
+            logger.debug(f"LLM 自定义分割结果: {split_sentences}")
+        else:
+            # 如果没有 [SPLIT] 标记，则不进行任何分割
+            split_sentences = [cleaned_text]
     else:
+        logger.debug("回复分割器已禁用。")
         split_sentences = [cleaned_text]
 
     sentences = []