From 3cded7220a471b3b41c437c2ed0ccb1ecefb1699 Mon Sep 17 00:00:00 2001 From: minecraft1024a Date: Sat, 27 Sep 2025 14:37:06 +0800 Subject: [PATCH] =?UTF-8?q?fix(chat):=20=E5=AE=8C=E5=96=84LLM=E5=88=86?= =?UTF-8?q?=E5=8F=A5=E9=80=BB=E8=BE=91=EF=BC=8C=E5=9C=A8=E6=97=A0=E5=88=86?= =?UTF-8?q?=E5=89=B2=E6=A0=87=E8=AE=B0=E6=97=B6=E5=9B=9E=E9=80=80=E8=87=B3?= =?UTF-8?q?=E6=A0=87=E7=82=B9=E5=88=86=E5=89=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 当使用 "llm" 模式进行分句时,如果模型未能按预期生成 `[SPLIT]` 标记,之前的逻辑会直接返回整个未分割的文本。 这可能导致过长的句子被发送到下游模块(如TTS),影响体验。本次修改添加了回退机制,当未检测到 `[SPLIT]` 标记时,会自动切换到基于标点的传统分句方法,以提高分句的鲁棒性。 --- src/chat/utils/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/chat/utils/utils.py b/src/chat/utils/utils.py index 85e665328..746b13e63 100644 --- a/src/chat/utils/utils.py +++ b/src/chat/utils/utils.py @@ -341,9 +341,9 @@ def process_llm_response(text: str, enable_splitter: bool = True, enable_chinese split_sentences = [s.strip() for s in split_sentences_raw if s.strip()] else: if split_mode == "llm": - logger.debug("未检测到 [SPLIT] 标记,本次不进行分割。") - split_sentences = [cleaned_text] - else: # mode == "punctuation" + logger.debug("未检测到 [SPLIT] 标记,回退到基于标点的传统模式进行分割。") + split_sentences = split_into_sentences_w_remove_punctuation(cleaned_text) + else: # mode == "punctuation" logger.debug("使用基于标点的传统模式进行分割。") split_sentences = split_into_sentences_w_remove_punctuation(cleaned_text) else: