From 876f20e847eef1fae7441ef31efe8a05e0fc6502 Mon Sep 17 00:00:00 2001 From: tt-P607 <68868379+tt-P607@users.noreply.github.com> Date: Fri, 28 Nov 2025 12:00:40 +0800 Subject: [PATCH] =?UTF-8?q?feat(llm):=20=E4=B8=BA=E4=B8=8D=E6=94=AF?= =?UTF-8?q?=E6=8C=81=E7=9A=84=E6=A8=A1=E5=9E=8B=E6=B7=BB=E5=8A=A0=E8=87=AA?= =?UTF-8?q?=E5=8A=A8=E5=B0=86=20GIF=20=E8=BD=AC=E6=8D=A2=E4=B8=BA=20PNG=20?= =?UTF-8?q?=E5=B8=A7=E7=9A=84=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 当语言模型不支持用于图像输入的 GIF 格式时,此功能会自动将 GIF 转换为一系列 PNG 帧。它智能地从 GIF 中采样最多 4 帧,确保即使是动画图像也可以被仅支持静态格式(如 PNG)的模型处理。 这增强了模型的多功能性,让用户无需担心原生格式支持问题即可提交 GIF,从而提供更顺畅和强大的用户体验。 --- src/llm_models/payload_content/message.py | 58 +++++++++++++++++++++-- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/src/llm_models/payload_content/message.py b/src/llm_models/payload_content/message.py index 7a34349a3..2cda33727 100644 --- a/src/llm_models/payload_content/message.py +++ b/src/llm_models/payload_content/message.py @@ -1,5 +1,9 @@ +import base64 +import io from enum import Enum +from PIL import Image + # 设计这系列类的目的是为未来可能的扩展做准备 @@ -53,6 +57,35 @@ class MessageBuilder: self.__content.append(text) return self + def _convert_gif_to_png_frames(self, gif_base64: str, max_frames: int = 4) -> list[str]: + """将GIF的Base64编码分解为多个PNG帧的Base64编码列表""" + gif_bytes = base64.b64decode(gif_base64) + gif_image = Image.open(io.BytesIO(gif_bytes)) + + frames = [] + total_frames = getattr(gif_image, "n_frames", 1) + + # 如果总帧数小于等于最大帧数,则全部提取 + if total_frames <= max_frames: + indices = range(total_frames) + else: + # 否则,在总帧数中均匀选取 max_frames 帧 + indices = [int(i * (total_frames - 1) / (max_frames - 1)) for i in range(max_frames)] + + for i in indices: + try: + gif_image.seek(i) + frame = gif_image.convert("RGBA") + + output_buffer = io.BytesIO() + frame.save(output_buffer, format="PNG") + png_bytes = output_buffer.getvalue() + frames.append(base64.b64encode(png_bytes).decode("utf-8")) + except EOFError: + # 到达文件末尾,停止提取 + break + return frames + def add_image_content( self, image_format: str, @@ -60,18 +93,35 @@ class MessageBuilder: support_formats=None, # 默认支持格式 ) -> "MessageBuilder": """ - 添加图片内容 + 添加图片内容, 如果是GIF且模型不支持, 则会分解为最多4帧PNG图片。 :param image_format: 图片格式 :param image_base64: 图片的base64编码 :return: MessageBuilder对象 """ if support_formats is None: support_formats = SUPPORTED_IMAGE_FORMATS - if image_format.lower() not in support_formats: - raise ValueError("不受支持的图片格式") + + current_format = image_format.lower() + + # 如果是GIF且模型不支持, 则分解为多个PNG帧 + if current_format == "gif" and "gif" not in support_formats: + if "png" in support_formats: + png_frames_base64 = self._convert_gif_to_png_frames(image_base64) + for frame_base64 in png_frames_base64: + if not frame_base64: + continue + self.__content.append(("png", frame_base64)) + return self + else: + raise ValueError("模型不支持GIF, 且无法转换为PNG") + + # 对于其他格式或模型支持GIF的情况 + if current_format not in support_formats: + raise ValueError(f"不受支持的图片格式: {current_format}") if not image_base64: raise ValueError("图片的base64编码不能为空") - self.__content.append((image_format, image_base64)) + + self.__content.append((current_format, image_base64)) return self def add_tool_call(self, tool_call_id: str) -> "MessageBuilder":