优化图片格式处理,统一将'jpg'格式转换为'jpeg',新增标准化和MIME类型转换功能,以确保API兼容性和提高代码可读性(最主要的还是为了gemini)

This commit is contained in:
minecraft1024a
2025-08-13 13:20:15 +08:00
committed by Windpicker-owo
parent e6976e4e50
commit 1e785a117d
5 changed files with 82 additions and 5 deletions

View File

@@ -957,7 +957,7 @@ class EmojiManager:
raise RuntimeError("GIF表情包转换失败") raise RuntimeError("GIF表情包转换失败")
prompt = "这是一个动态图表情包,每一张图代表了动态图的某一帧,黑色背景代表透明,描述一下表情包表达的情感和内容,描述细节,从互联网梗,meme的角度去分析" prompt = "这是一个动态图表情包,每一张图代表了动态图的某一帧,黑色背景代表透明,描述一下表情包表达的情感和内容,描述细节,从互联网梗,meme的角度去分析"
description, _ = await self.vlm.generate_response_for_image( description, _ = await self.vlm.generate_response_for_image(
prompt, image_base64, "jpg", temperature=0.3, max_tokens=1000 prompt, image_base64, "jpeg", temperature=0.3, max_tokens=1000
) )
else: else:
prompt = ( prompt = (

View File

@@ -161,7 +161,7 @@ class ImageManager:
return "[表情包(GIF处理失败)]" return "[表情包(GIF处理失败)]"
vlm_prompt = "这是一个动态图表情包,每一张图代表了动态图的某一帧,黑色背景代表透明,描述一下表情包表达的情感和内容,描述细节,从互联网梗,meme的角度去分析" vlm_prompt = "这是一个动态图表情包,每一张图代表了动态图的某一帧,黑色背景代表透明,描述一下表情包表达的情感和内容,描述细节,从互联网梗,meme的角度去分析"
detailed_description, _ = await self.vlm.generate_response_for_image( detailed_description, _ = await self.vlm.generate_response_for_image(
vlm_prompt, image_base64_processed, "jpg", temperature=0.4, max_tokens=300 vlm_prompt, image_base64_processed, "jpeg", temperature=0.4, max_tokens=300
) )
else: else:
vlm_prompt = ( vlm_prompt = (

View File

@@ -20,6 +20,29 @@ from ..payload_content.tool_option import ToolOption, ToolParam, ToolCall
logger = get_logger("AioHTTP-Gemini客户端") logger = get_logger("AioHTTP-Gemini客户端")
def _format_to_mime_type(image_format: str) -> str:
"""
将图片格式转换为正确的MIME类型
Args:
image_format (str): 图片格式 (如 'jpg', 'png' 等)
Returns:
str: 对应的MIME类型
"""
format_mapping = {
"jpg": "image/jpeg",
"jpeg": "image/jpeg",
"png": "image/png",
"webp": "image/webp",
"gif": "image/gif",
"heic": "image/heic",
"heif": "image/heif"
}
return format_mapping.get(image_format.lower(), f"image/{image_format.lower()}")
def _convert_messages(messages: list[Message]) -> tuple[list[dict], list[str] | None]: def _convert_messages(messages: list[Message]) -> tuple[list[dict], list[str] | None]:
""" """
转换消息格式 - 将消息转换为Gemini REST API所需的格式 转换消息格式 - 将消息转换为Gemini REST API所需的格式
@@ -46,7 +69,7 @@ def _convert_messages(messages: list[Message]) -> tuple[list[dict], list[str] |
if isinstance(item, tuple): # (format, base64_data) if isinstance(item, tuple): # (format, base64_data)
parts.append({ parts.append({
"inline_data": { "inline_data": {
"mime_type": f"image/{item[0].lower()}", "mime_type": _format_to_mime_type(item[0]),
"data": item[1] "data": item[1]
} }
}) })

View File

@@ -61,6 +61,25 @@ def _convert_messages(
:param messages: 消息列表 :param messages: 消息列表
:return: 转换后的消息列表(和可能存在的system消息) :return: 转换后的消息列表(和可能存在的system消息)
""" """
def _get_correct_mime_type(image_format: str) -> str:
"""
获取正确的MIME类型修复jpg到jpeg的映射问题
:param image_format: 图片格式
:return: 正确的MIME类型
"""
# 标准化格式名称解决jpg/jpeg兼容性问题
format_mapping = {
"jpg": "jpeg",
"jpeg": "jpeg",
"png": "png",
"webp": "webp",
"heic": "heic",
"heif": "heif",
"gif": "gif"
}
normalized_format = format_mapping.get(image_format.lower(), image_format.lower())
return f"image/{normalized_format}"
def _convert_message_item(message: Message) -> Content: def _convert_message_item(message: Message) -> Content:
""" """
@@ -84,7 +103,7 @@ def _convert_messages(
if isinstance(item, tuple): if isinstance(item, tuple):
image_format = "jpeg" if item[0].lower() == "jpg" else item[0].lower() image_format = "jpeg" if item[0].lower() == "jpg" else item[0].lower()
content.append( content.append(
Part.from_bytes(data=base64.b64decode(item[1]), mime_type=f"image/{image_format}") Part.from_bytes(data=base64.b64decode(item[1]), mime_type=_get_correct_mime_type(item[0]))
) )
elif isinstance(item, str): elif isinstance(item, str):
content.append(Part.from_text(text=item)) content.append(Part.from_text(text=item))

View File

@@ -33,6 +33,38 @@ error_code_mapping = {
} }
def _normalize_image_format(image_format: str) -> str:
"""
标准化图片格式名称确保与各种API的兼容性
Args:
image_format (str): 原始图片格式
Returns:
str: 标准化后的图片格式
"""
format_mapping = {
"jpg": "jpeg",
"JPG": "jpeg",
"JPEG": "jpeg",
"jpeg": "jpeg",
"png": "png",
"PNG": "png",
"webp": "webp",
"WEBP": "webp",
"gif": "gif",
"GIF": "gif",
"heic": "heic",
"HEIC": "heic",
"heif": "heif",
"HEIF": "heif"
}
normalized = format_mapping.get(image_format, image_format.lower())
logger.debug(f"图片格式标准化: {image_format} -> {normalized}")
return normalized
class RequestType(Enum): class RequestType(Enum):
"""请求类型枚举""" """请求类型枚举"""
@@ -78,6 +110,9 @@ class LLMRequest:
Returns: Returns:
(Tuple[str, str, str, Optional[List[ToolCall]]]): 响应内容、推理内容、模型名称、工具调用列表 (Tuple[str, str, str, Optional[List[ToolCall]]]): 响应内容、推理内容、模型名称、工具调用列表
""" """
# 标准化图片格式以确保API兼容性
normalized_format = _normalize_image_format(image_format)
# 模型选择 # 模型选择
start_time = time.time() start_time = time.time()
model_info, api_provider, client = self._select_model() model_info, api_provider, client = self._select_model()
@@ -86,7 +121,7 @@ class LLMRequest:
message_builder = MessageBuilder() message_builder = MessageBuilder()
message_builder.add_text_content(prompt) message_builder.add_text_content(prompt)
message_builder.add_image_content( message_builder.add_image_content(
image_base64=image_base64, image_format=image_format, support_formats=client.get_support_image_formats() image_base64=image_base64, image_format=normalized_format, support_formats=client.get_support_image_formats()
) )
messages = [message_builder.build()] messages = [message_builder.build()]