From d302baff5f8340628ad2527b57a3d582f3c5294d Mon Sep 17 00:00:00 2001
From: Gardel <gardel741@outlook.com>
Date: Sat, 6 Dec 2025 08:39:58 +0800
Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=20VLM=20=E8=A7=A3?=
 =?UTF-8?q?=E6=9E=90?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/chat/emoji_system/emoji_manager.py | 55 ++++++++++++++++++++------
 1 file changed, 43 insertions(+), 12 deletions(-)

diff --git a/src/chat/emoji_system/emoji_manager.py b/src/chat/emoji_system/emoji_manager.py
index 125907a6d..05b2d2de9 100644
--- a/src/chat/emoji_system/emoji_manager.py
+++ b/src/chat/emoji_system/emoji_manager.py
@@ -4,6 +4,7 @@ import binascii
 import hashlib
 import io
 import json
+import json_repair
 import os
 import random
 import re
@@ -1023,6 +1024,15 @@ class EmojiManager:
     -   必须是表情包，非普通截图。
     -   图中文字不超过5个。
 请确保你的最终输出是严格的JSON对象，不要添加任何额外解释或文本。
+输出格式:
+```json
+{{
+  "detailed_description": "",
+  "keywords": [],
+  "refined_sentence": "",
+  "is_compliant": true
+}}
+```
 """
 
                 image_data_for_vlm, image_format_for_vlm = image_base64, image_format
@@ -1042,16 +1052,14 @@ class EmojiManager:
                         if not vlm_response_str:
                             continue
 
-                        match = re.search(r"\{.*\}", vlm_response_str, re.DOTALL)
-                        if match:
-                            vlm_response_json = json.loads(match.group(0))
-                            description = vlm_response_json.get("detailed_description", "")
-                            emotions = vlm_response_json.get("keywords", [])
-                            refined_description = vlm_response_json.get("refined_sentence", "")
-                            is_compliant = vlm_response_json.get("is_compliant", False)
-                            if description and emotions and refined_description:
-                                logger.info("[VLM分析] 成功解析VLM返回的JSON数据。")
-                                break
+                        vlm_response_json = self._parse_json_response(vlm_response_str)
+                        description = vlm_response_json.get("detailed_description", "")
+                        emotions = vlm_response_json.get("keywords", [])
+                        refined_description = vlm_response_json.get("refined_sentence", "")
+                        is_compliant = vlm_response_json.get("is_compliant", False)
+                        if description and emotions and refined_description:
+                            logger.info("[VLM分析] 成功解析VLM返回的JSON数据。")
+                            break
                         logger.warning("[VLM分析] VLM返回的JSON数据不完整或格式错误，准备重试。")
                     except (json.JSONDecodeError, AttributeError) as e:
                         logger.error(f"VLM JSON解析失败 (第 {i+1}/3 次): {e}")
@@ -1122,7 +1130,7 @@ class EmojiManager:
                 if emoji_base64 is None:  # 再次检查读取
                     logger.error(f"[注册失败] 无法读取图片以生成描述: {filename}")
                     return False
-                
+
                 # 等待描述生成完成
                 description, emotions = await self.build_emoji_description(emoji_base64)
 
@@ -1135,7 +1143,7 @@ class EmojiManager:
                     except Exception as e:
                         logger.error(f"[错误] 删除描述生成失败文件时出错: {e!s}")
                     return False
-                
+
                 new_emoji.description = description
                 new_emoji.emotion = emotions
             except Exception as build_desc_error:
@@ -1196,6 +1204,29 @@ class EmojiManager:
                     logger.error(f"[错误] 删除异常处理文件时出错: {remove_error}")
             return False
 
+    @classmethod
+    def _parse_json_response(cls, response: str) -> dict[str, Any] | None:
+        """解析 LLM 的 JSON 响应"""
+        try:
+            # 尝试提取 JSON 代码块
+            json_match = re.search(r"```json\s*(.*?)\s*```", response, re.DOTALL)
+            if json_match:
+                json_str = json_match.group(1)
+            else:
+                # 尝试直接解析
+                json_str = response.strip()
+
+            # 移除可能的注释
+            json_str = re.sub(r"//.*", "", json_str)
+            json_str = re.sub(r"/\*.*?\*/", "", json_str, flags=re.DOTALL)
+
+            data = json_repair.loads(json_str)
+            return data
+
+        except json.JSONDecodeError as e:
+            logger.warning(f"JSON 解析失败: {e}, 响应: {response[:200]}")
+            return None
+
 
 emoji_manager = None