From d302baff5f8340628ad2527b57a3d582f3c5294d Mon Sep 17 00:00:00 2001 From: Gardel Date: Sat, 6 Dec 2025 08:39:58 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=20VLM=20=E8=A7=A3?= =?UTF-8?q?=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chat/emoji_system/emoji_manager.py | 55 ++++++++++++++++++++------ 1 file changed, 43 insertions(+), 12 deletions(-) diff --git a/src/chat/emoji_system/emoji_manager.py b/src/chat/emoji_system/emoji_manager.py index 125907a6d..05b2d2de9 100644 --- a/src/chat/emoji_system/emoji_manager.py +++ b/src/chat/emoji_system/emoji_manager.py @@ -4,6 +4,7 @@ import binascii import hashlib import io import json +import json_repair import os import random import re @@ -1023,6 +1024,15 @@ class EmojiManager: - 必须是表情包,非普通截图。 - 图中文字不超过5个。 请确保你的最终输出是严格的JSON对象,不要添加任何额外解释或文本。 +输出格式: +```json +{{ + "detailed_description": "", + "keywords": [], + "refined_sentence": "", + "is_compliant": true +}} +``` """ image_data_for_vlm, image_format_for_vlm = image_base64, image_format @@ -1042,16 +1052,14 @@ class EmojiManager: if not vlm_response_str: continue - match = re.search(r"\{.*\}", vlm_response_str, re.DOTALL) - if match: - vlm_response_json = json.loads(match.group(0)) - description = vlm_response_json.get("detailed_description", "") - emotions = vlm_response_json.get("keywords", []) - refined_description = vlm_response_json.get("refined_sentence", "") - is_compliant = vlm_response_json.get("is_compliant", False) - if description and emotions and refined_description: - logger.info("[VLM分析] 成功解析VLM返回的JSON数据。") - break + vlm_response_json = self._parse_json_response(vlm_response_str) + description = vlm_response_json.get("detailed_description", "") + emotions = vlm_response_json.get("keywords", []) + refined_description = vlm_response_json.get("refined_sentence", "") + is_compliant = vlm_response_json.get("is_compliant", False) + if description and emotions and refined_description: + logger.info("[VLM分析] 成功解析VLM返回的JSON数据。") + break logger.warning("[VLM分析] VLM返回的JSON数据不完整或格式错误,准备重试。") except (json.JSONDecodeError, AttributeError) as e: logger.error(f"VLM JSON解析失败 (第 {i+1}/3 次): {e}") @@ -1122,7 +1130,7 @@ class EmojiManager: if emoji_base64 is None: # 再次检查读取 logger.error(f"[注册失败] 无法读取图片以生成描述: {filename}") return False - + # 等待描述生成完成 description, emotions = await self.build_emoji_description(emoji_base64) @@ -1135,7 +1143,7 @@ class EmojiManager: except Exception as e: logger.error(f"[错误] 删除描述生成失败文件时出错: {e!s}") return False - + new_emoji.description = description new_emoji.emotion = emotions except Exception as build_desc_error: @@ -1196,6 +1204,29 @@ class EmojiManager: logger.error(f"[错误] 删除异常处理文件时出错: {remove_error}") return False + @classmethod + def _parse_json_response(cls, response: str) -> dict[str, Any] | None: + """解析 LLM 的 JSON 响应""" + try: + # 尝试提取 JSON 代码块 + json_match = re.search(r"```json\s*(.*?)\s*```", response, re.DOTALL) + if json_match: + json_str = json_match.group(1) + else: + # 尝试直接解析 + json_str = response.strip() + + # 移除可能的注释 + json_str = re.sub(r"//.*", "", json_str) + json_str = re.sub(r"/\*.*?\*/", "", json_str, flags=re.DOTALL) + + data = json_repair.loads(json_str) + return data + + except json.JSONDecodeError as e: + logger.warning(f"JSON 解析失败: {e}, 响应: {response[:200]}") + return None + emoji_manager = None