From 375a51e01f7babe6e882b0127f20085bfd5e65d7 Mon Sep 17 00:00:00 2001
From: minecraft1024a <wwwww95915@qq.com>
Date: Fri, 26 Sep 2025 19:50:06 +0800
Subject: [PATCH] =?UTF-8?q?fix(llm):=20=E4=BF=AE=E5=A4=8D=20<think>=20?=
 =?UTF-8?q?=E6=A0=87=E7=AD=BE=E8=A7=A3=E6=9E=90=E5=90=8E=E5=8F=AF=E8=83=BD?=
 =?UTF-8?q?=E6=AE=8B=E7=95=99=E7=A9=BA=E7=99=BD=E7=9A=84=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

之前的 <think> 标签解析逻辑在移除标签内容后，没有处理紧随其后的空白字符，这可能导致清理后的内容开头有多余的空格或换行符。

本次更新使用更精确的正则表达式 `<think>(.*?)</think>\s*`，可以在一次操作中同时移除 <think> 标签块和其后的所有空白字符，确保返回的内容格式正确，提高了处理的鲁棒性。
---
 src/llm_models/prompt_processor.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)
diff --git a/src/llm_models/prompt_processor.py b/src/llm_models/prompt_processor.py
index 94a0a2ef5..0ae944369 100644
--- a/src/llm_models/prompt_processor.py
+++ b/src/llm_models/prompt_processor.py
@@ -154,10 +154,17 @@ class PromptProcessor:
                 - 清理后的内容（移除了<think>标签及其内容）。
                 - 提取出的思考过程文本（如果没有则为空字符串）。
         """
-        # 使用正则表达式查找<think>标签
-        match = re.search(r"(?:<think>)?(.*?)</think>", content, re.DOTALL)
-        # 从内容中移除<think>标签及其包裹的所有内容
-        clean_content = re.sub(r"(?:<think>)?.*?</think>", "", content, flags=re.DOTALL, count=1).strip()
-        # 如果找到匹配项，则提取思考过程
-        reasoning = match.group(1).strip() if match else ""
+        # 使用正则表达式精确查找 <think>...</think> 标签及其内容
+        think_pattern = re.compile(r"<think>(.*?)</think>\s*", re.DOTALL)
+        match = think_pattern.search(content)
+
+        if match:
+            # 提取思考过程
+            reasoning = match.group(1).strip()
+            # 从原始内容中移除匹配到的整个部分（包括标签和后面的空白）
+            clean_content = think_pattern.sub("", content, count=1).strip()
+        else:
+            reasoning = ""
+            clean_content = content.strip()
+            
         return clean_content, reasoning