From d2d0cfc4db0f5af08aee48ece923c951d4e56cdc Mon Sep 17 00:00:00 2001 From: tt-P607 <68868379+tt-P607@users.noreply.github.com> Date: Fri, 28 Nov 2025 16:00:55 +0800 Subject: [PATCH] =?UTF-8?q?feat(image):=20=E4=B8=BA=20VLM=20=E5=A4=84?= =?UTF-8?q?=E7=90=86=E6=B7=BB=E5=8A=A0=E8=87=AA=E5=8A=A8=20GIF=20=E8=BD=AC?= =?UTF-8?q?=20JPG=20=E8=BD=AC=E6=8D=A2=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 此提交引入了在将 GIF 图像发送到视觉语言模型 (VLM) 进行描述之前,自动检测并转换为 JPG 格式的功能。这确保了与不支持动画格式的 VLM 的兼容性,防止处理失败。 此外,还添加了若干空值检查断言,以提高代码的健壮性,并防止配置对象未正确初始化时可能出现的运行时错误。还实现了对 `global_config.emoji` 的防御性检查。 --- src/chat/utils/utils_image.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/chat/utils/utils_image.py b/src/chat/utils/utils_image.py index 8e649f197..f0ae224c8 100644 --- a/src/chat/utils/utils_image.py +++ b/src/chat/utils/utils_image.py @@ -54,6 +54,7 @@ class ImageManager: self._ensure_image_dir() self._initialized = True + assert model_config is not None self.vlm = LLMRequest(model_set=model_config.model_task_config.vlm, request_type="image") # try: @@ -189,7 +190,7 @@ class ImageManager: return "[表情包(描述生成失败)]" # 4. (可选) 如果启用了“偷表情包”,则将图片和完整描述存入待注册区 - if global_config.emoji.steal_emoji: + if global_config and global_config.emoji and global_config.emoji.steal_emoji: logger.debug(f"偷取表情包功能已开启,保存待注册表情包: {image_hash}") try: image_format = (Image.open(io.BytesIO(image_bytes)).format or "jpeg").lower() @@ -226,6 +227,22 @@ class ImageManager: image_bytes = base64.b64decode(image_base64) image_hash = hashlib.md5(image_bytes).hexdigest() + # 1.5. 如果是GIF,先转换为JPG + try: + image_format_check = (Image.open(io.BytesIO(image_bytes)).format or "jpeg").lower() + if image_format_check == "gif": + logger.info(f"检测到GIF图片 (Hash: {image_hash[:8]}...),正在转换为JPG...") + if transformed_b64 := self.transform_gif(image_base64): + image_base64 = transformed_b64 + image_bytes = base64.b64decode(image_base64) + logger.info("GIF转换成功,将使用转换后的图片进行描述") + else: + logger.error("GIF转换失败,无法生成描述") + return "[图片(GIF转换失败)]" + except Exception as e: + logger.warning(f"图片格式检测失败: {e!s},将按原格式处理") + + # 2. 优先查询 Images 表缓存 async with get_db_session() as session: result = await session.execute(select(Images).where(Images.emoji_hash == image_hash)) @@ -242,6 +259,8 @@ class ImageManager: # 4. 如果都未命中,则同步调用VLM生成新描述 logger.info(f"[新图片识别] 无缓存 (Hash: {image_hash[:8]}...),调用VLM生成描述") description = None + assert global_config is not None + assert global_config.custom_prompt is not None prompt = global_config.custom_prompt.image_prompt logger.info(f"[识图VLM调用] Prompt: {prompt}") for i in range(3): # 重试3次