From 1e785a117d9452d55b7c65507f6066830ef561e1 Mon Sep 17 00:00:00 2001 From: minecraft1024a Date: Wed, 13 Aug 2025 13:20:15 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E5=9B=BE=E7=89=87=E6=A0=BC?= =?UTF-8?q?=E5=BC=8F=E5=A4=84=E7=90=86=EF=BC=8C=E7=BB=9F=E4=B8=80=E5=B0=86?= =?UTF-8?q?'jpg'=E6=A0=BC=E5=BC=8F=E8=BD=AC=E6=8D=A2=E4=B8=BA'jpeg'?= =?UTF-8?q?=EF=BC=8C=E6=96=B0=E5=A2=9E=E6=A0=87=E5=87=86=E5=8C=96=E5=92=8C?= =?UTF-8?q?MIME=E7=B1=BB=E5=9E=8B=E8=BD=AC=E6=8D=A2=E5=8A=9F=E8=83=BD?= =?UTF-8?q?=EF=BC=8C=E4=BB=A5=E7=A1=AE=E4=BF=9DAPI=E5=85=BC=E5=AE=B9?= =?UTF-8?q?=E6=80=A7=E5=92=8C=E6=8F=90=E9=AB=98=E4=BB=A3=E7=A0=81=E5=8F=AF?= =?UTF-8?q?=E8=AF=BB=E6=80=A7(=E6=9C=80=E4=B8=BB=E8=A6=81=E7=9A=84?= =?UTF-8?q?=E8=BF=98=E6=98=AF=E4=B8=BA=E4=BA=86gemini)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chat/emoji_system/emoji_manager.py | 2 +- src/chat/utils/utils_image.py | 2 +- .../model_client/aiohttp_gemini_client.py | 25 ++++++++++++- src/llm_models/model_client/gemini_client.py | 21 ++++++++++- src/llm_models/utils_model.py | 37 ++++++++++++++++++- 5 files changed, 82 insertions(+), 5 deletions(-) diff --git a/src/chat/emoji_system/emoji_manager.py b/src/chat/emoji_system/emoji_manager.py index 2b5b5f715..df82b7205 100644 --- a/src/chat/emoji_system/emoji_manager.py +++ b/src/chat/emoji_system/emoji_manager.py @@ -957,7 +957,7 @@ class EmojiManager: raise RuntimeError("GIF表情包转换失败") prompt = "这是一个动态图表情包,每一张图代表了动态图的某一帧,黑色背景代表透明,描述一下表情包表达的情感和内容,描述细节,从互联网梗,meme的角度去分析" description, _ = await self.vlm.generate_response_for_image( - prompt, image_base64, "jpg", temperature=0.3, max_tokens=1000 + prompt, image_base64, "jpeg", temperature=0.3, max_tokens=1000 ) else: prompt = ( diff --git a/src/chat/utils/utils_image.py b/src/chat/utils/utils_image.py index 8b514959e..2bb52395e 100644 --- a/src/chat/utils/utils_image.py +++ b/src/chat/utils/utils_image.py @@ -161,7 +161,7 @@ class ImageManager: return "[表情包(GIF处理失败)]" vlm_prompt = "这是一个动态图表情包,每一张图代表了动态图的某一帧,黑色背景代表透明,描述一下表情包表达的情感和内容,描述细节,从互联网梗,meme的角度去分析" detailed_description, _ = await self.vlm.generate_response_for_image( - vlm_prompt, image_base64_processed, "jpg", temperature=0.4, max_tokens=300 + vlm_prompt, image_base64_processed, "jpeg", temperature=0.4, max_tokens=300 ) else: vlm_prompt = ( diff --git a/src/llm_models/model_client/aiohttp_gemini_client.py b/src/llm_models/model_client/aiohttp_gemini_client.py index 0dae4defa..d2acc5f2a 100644 --- a/src/llm_models/model_client/aiohttp_gemini_client.py +++ b/src/llm_models/model_client/aiohttp_gemini_client.py @@ -20,6 +20,29 @@ from ..payload_content.tool_option import ToolOption, ToolParam, ToolCall logger = get_logger("AioHTTP-Gemini客户端") +def _format_to_mime_type(image_format: str) -> str: + """ + 将图片格式转换为正确的MIME类型 + + Args: + image_format (str): 图片格式 (如 'jpg', 'png' 等) + + Returns: + str: 对应的MIME类型 + """ + format_mapping = { + "jpg": "image/jpeg", + "jpeg": "image/jpeg", + "png": "image/png", + "webp": "image/webp", + "gif": "image/gif", + "heic": "image/heic", + "heif": "image/heif" + } + + return format_mapping.get(image_format.lower(), f"image/{image_format.lower()}") + + def _convert_messages(messages: list[Message]) -> tuple[list[dict], list[str] | None]: """ 转换消息格式 - 将消息转换为Gemini REST API所需的格式 @@ -46,7 +69,7 @@ def _convert_messages(messages: list[Message]) -> tuple[list[dict], list[str] | if isinstance(item, tuple): # (format, base64_data) parts.append({ "inline_data": { - "mime_type": f"image/{item[0].lower()}", + "mime_type": _format_to_mime_type(item[0]), "data": item[1] } }) diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py index db6f085ec..506b64af0 100644 --- a/src/llm_models/model_client/gemini_client.py +++ b/src/llm_models/model_client/gemini_client.py @@ -61,6 +61,25 @@ def _convert_messages( :param messages: 消息列表 :return: 转换后的消息列表(和可能存在的system消息) """ + + def _get_correct_mime_type(image_format: str) -> str: + """ + 获取正确的MIME类型,修复jpg到jpeg的映射问题 + :param image_format: 图片格式 + :return: 正确的MIME类型 + """ + # 标准化格式名称,解决jpg/jpeg兼容性问题 + format_mapping = { + "jpg": "jpeg", + "jpeg": "jpeg", + "png": "png", + "webp": "webp", + "heic": "heic", + "heif": "heif", + "gif": "gif" + } + normalized_format = format_mapping.get(image_format.lower(), image_format.lower()) + return f"image/{normalized_format}" def _convert_message_item(message: Message) -> Content: """ @@ -84,7 +103,7 @@ def _convert_messages( if isinstance(item, tuple): image_format = "jpeg" if item[0].lower() == "jpg" else item[0].lower() content.append( - Part.from_bytes(data=base64.b64decode(item[1]), mime_type=f"image/{image_format}") + Part.from_bytes(data=base64.b64decode(item[1]), mime_type=_get_correct_mime_type(item[0])) ) elif isinstance(item, str): content.append(Part.from_text(text=item)) diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py index b5029bc32..f2ade08b5 100644 --- a/src/llm_models/utils_model.py +++ b/src/llm_models/utils_model.py @@ -33,6 +33,38 @@ error_code_mapping = { } +def _normalize_image_format(image_format: str) -> str: + """ + 标准化图片格式名称,确保与各种API的兼容性 + + Args: + image_format (str): 原始图片格式 + + Returns: + str: 标准化后的图片格式 + """ + format_mapping = { + "jpg": "jpeg", + "JPG": "jpeg", + "JPEG": "jpeg", + "jpeg": "jpeg", + "png": "png", + "PNG": "png", + "webp": "webp", + "WEBP": "webp", + "gif": "gif", + "GIF": "gif", + "heic": "heic", + "HEIC": "heic", + "heif": "heif", + "HEIF": "heif" + } + + normalized = format_mapping.get(image_format, image_format.lower()) + logger.debug(f"图片格式标准化: {image_format} -> {normalized}") + return normalized + + class RequestType(Enum): """请求类型枚举""" @@ -78,6 +110,9 @@ class LLMRequest: Returns: (Tuple[str, str, str, Optional[List[ToolCall]]]): 响应内容、推理内容、模型名称、工具调用列表 """ + # 标准化图片格式以确保API兼容性 + normalized_format = _normalize_image_format(image_format) + # 模型选择 start_time = time.time() model_info, api_provider, client = self._select_model() @@ -86,7 +121,7 @@ class LLMRequest: message_builder = MessageBuilder() message_builder.add_text_content(prompt) message_builder.add_image_content( - image_base64=image_base64, image_format=image_format, support_formats=client.get_support_image_formats() + image_base64=image_base64, image_format=normalized_format, support_formats=client.get_support_image_formats() ) messages = [message_builder.build()]