From dcc8f6477ed3b86f9a3b310f311ad871dbdfc925 Mon Sep 17 00:00:00 2001 From: tt-P607 <68868379+tt-P607@users.noreply.github.com> Date: Thu, 13 Nov 2025 20:38:59 +0800 Subject: [PATCH] =?UTF-8?q?refactor(chat):=20=E7=AE=80=E5=8C=96=E6=8B=AC?= =?UTF-8?q?=E5=8F=B7=E5=86=85=E5=AE=B9=E8=BF=87=E6=BB=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 将用于过滤 `[表情包:...]` 和 `[图片:...]` 等内容的多个特定正则表达式模式替换为一个更通用的 `[.*?]` 模式。 此更改简化了代码,并通过处理方括号内的任何内容(包括未处理的格式如 `[at=...]`)提高了过滤的稳健性。 --- src/chat/utils/utils.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/chat/utils/utils.py b/src/chat/utils/utils.py index 38205477e..71d2d1861 100644 --- a/src/chat/utils/utils.py +++ b/src/chat/utils/utils.py @@ -956,16 +956,11 @@ def filter_system_format_content(content: str | None) -> str: cleaned_content = cleaned_content[last_bracket_index + 1 :].strip() # 在处理完回复格式后,再清理其他简单的格式 - # 移除表情包格式:[表情包:xxx] - cleaned_content = re.sub(r"\[表情包:[^\]]*\]", "", cleaned_content) - # 移除图片格式:[图片:xxx] - cleaned_content = re.sub(r"\[图片:[^\]]*\]", "", cleaned_content) + # 新增:移除所有残余的 [...] 格式,例如 [at=...] 等 + cleaned_content = re.sub(r"\[.*?\]", "", cleaned_content) + # 移除@格式:@ cleaned_content = re.sub(r"@<[^>]*>", "", cleaned_content) - # [表情包(描述生成失败)] 等错误格式 - cleaned_content = re.sub(r"\[表情包\([^)]*\)\]", "", cleaned_content) - # [图片(描述生成失败)] 等错误格式 - cleaned_content = re.sub(r"\[图片\([^)]*\)\]", "", cleaned_content) # 记录过滤操作 if cleaned_content != original_content.strip():