better:优化表情包识别
This commit is contained in:
@@ -5,6 +5,7 @@ import hashlib
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
import io
|
import io
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
from ...common.database import db
|
from ...common.database import db
|
||||||
@@ -231,14 +232,16 @@ class ImageManager:
|
|||||||
return "[图片]"
|
return "[图片]"
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def transform_gif(gif_base64: str) -> str:
|
def transform_gif(gif_base64: str, similarity_threshold: float = 1000.0, max_frames: int = 15) -> Optional[str]:
|
||||||
"""将GIF转换为水平拼接的静态图像
|
"""将GIF转换为水平拼接的静态图像, 跳过相似的帧
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
gif_base64: GIF的base64编码字符串
|
gif_base64: GIF的base64编码字符串
|
||||||
|
similarity_threshold: 判定帧相似的阈值 (MSE),越小表示要求差异越大才算不同帧,默认1000.0
|
||||||
|
max_frames: 最大抽取的帧数,默认15
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: 拼接后的JPG图像的base64编码字符串
|
Optional[str]: 拼接后的JPG图像的base64编码字符串, 或者在失败时返回None
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# 解码base64
|
# 解码base64
|
||||||
@@ -246,41 +249,90 @@ class ImageManager:
|
|||||||
gif = Image.open(io.BytesIO(gif_data))
|
gif = Image.open(io.BytesIO(gif_data))
|
||||||
|
|
||||||
# 收集所有帧
|
# 收集所有帧
|
||||||
frames = []
|
all_frames = []
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
gif.seek(len(frames))
|
gif.seek(len(all_frames))
|
||||||
|
# 确保是RGB格式方便比较
|
||||||
frame = gif.convert("RGB")
|
frame = gif.convert("RGB")
|
||||||
frames.append(frame.copy())
|
all_frames.append(frame.copy())
|
||||||
except EOFError:
|
except EOFError:
|
||||||
pass
|
pass # 读完啦
|
||||||
|
|
||||||
if not frames:
|
if not all_frames:
|
||||||
raise ValueError("No frames found in GIF")
|
logger.warning("GIF中没有找到任何帧")
|
||||||
|
return None # 空的GIF直接返回None
|
||||||
|
|
||||||
# 计算需要抽取的帧的索引
|
# --- 新的帧选择逻辑 ---
|
||||||
total_frames = len(frames)
|
selected_frames = []
|
||||||
if total_frames <= 15:
|
last_selected_frame_np = None
|
||||||
selected_frames = frames
|
|
||||||
else:
|
|
||||||
# 均匀抽取10帧
|
|
||||||
indices = [int(i * (total_frames - 1) / 14) for i in range(15)]
|
|
||||||
selected_frames = [frames[i] for i in indices]
|
|
||||||
|
|
||||||
# 获取单帧的尺寸
|
for i, current_frame in enumerate(all_frames):
|
||||||
|
current_frame_np = np.array(current_frame)
|
||||||
|
|
||||||
|
# 第一帧总是要选的
|
||||||
|
if i == 0:
|
||||||
|
selected_frames.append(current_frame)
|
||||||
|
last_selected_frame_np = current_frame_np
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 计算和上一张选中帧的差异(均方误差 MSE)
|
||||||
|
if last_selected_frame_np is not None:
|
||||||
|
mse = np.mean((current_frame_np - last_selected_frame_np) ** 2)
|
||||||
|
# logger.trace(f"帧 {i} 与上一选中帧的 MSE: {mse}") # 可以取消注释来看差异值
|
||||||
|
|
||||||
|
# 如果差异够大,就选它!
|
||||||
|
if mse > similarity_threshold:
|
||||||
|
selected_frames.append(current_frame)
|
||||||
|
last_selected_frame_np = current_frame_np
|
||||||
|
# 检查是不是选够了
|
||||||
|
if len(selected_frames) >= max_frames:
|
||||||
|
# logger.debug(f"已选够 {max_frames} 帧,停止选择。")
|
||||||
|
break
|
||||||
|
# 如果差异不大就跳过这一帧啦
|
||||||
|
|
||||||
|
# --- 帧选择逻辑结束 ---
|
||||||
|
|
||||||
|
# 如果选择后连一帧都没有(比如GIF只有一帧且后续处理失败?)或者原始GIF就没帧,也返回None
|
||||||
|
if not selected_frames:
|
||||||
|
logger.warning("处理后没有选中任何帧")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# logger.debug(f"总帧数: {len(all_frames)}, 选中帧数: {len(selected_frames)}")
|
||||||
|
|
||||||
|
# 获取选中的第一帧的尺寸(假设所有帧尺寸一致)
|
||||||
frame_width, frame_height = selected_frames[0].size
|
frame_width, frame_height = selected_frames[0].size
|
||||||
|
|
||||||
# 计算目标尺寸,保持宽高比
|
# 计算目标尺寸,保持宽高比
|
||||||
target_height = 200 # 固定高度
|
target_height = 200 # 固定高度
|
||||||
|
# 防止除以零
|
||||||
|
if frame_height == 0:
|
||||||
|
logger.error("帧高度为0,无法计算缩放尺寸")
|
||||||
|
return None
|
||||||
target_width = int((target_height / frame_height) * frame_width)
|
target_width = int((target_height / frame_height) * frame_width)
|
||||||
|
# 宽度也不能是0
|
||||||
|
if target_width == 0:
|
||||||
|
logger.warning(f"计算出的目标宽度为0 (原始尺寸 {frame_width}x{frame_height}),调整为1")
|
||||||
|
target_width = 1
|
||||||
|
|
||||||
# 调整所有帧的大小
|
|
||||||
|
# 调整所有选中帧的大小
|
||||||
resized_frames = [
|
resized_frames = [
|
||||||
frame.resize((target_width, target_height), Image.Resampling.LANCZOS) for frame in selected_frames
|
frame.resize((target_width, target_height), Image.Resampling.LANCZOS) for frame in selected_frames
|
||||||
]
|
]
|
||||||
|
|
||||||
# 创建拼接图像
|
# 创建拼接图像
|
||||||
total_width = target_width * len(resized_frames)
|
total_width = target_width * len(resized_frames)
|
||||||
|
# 防止总宽度为0
|
||||||
|
if total_width == 0 and len(resized_frames) > 0:
|
||||||
|
logger.warning("计算出的总宽度为0,但有选中帧,可能目标宽度太小")
|
||||||
|
# 至少给点宽度吧
|
||||||
|
total_width = len(resized_frames)
|
||||||
|
elif total_width == 0:
|
||||||
|
logger.error("计算出的总宽度为0且无选中帧")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
combined_image = Image.new("RGB", (total_width, target_height))
|
combined_image = Image.new("RGB", (total_width, target_height))
|
||||||
|
|
||||||
# 水平拼接图像
|
# 水平拼接图像
|
||||||
@@ -289,14 +341,17 @@ class ImageManager:
|
|||||||
|
|
||||||
# 转换为base64
|
# 转换为base64
|
||||||
buffer = io.BytesIO()
|
buffer = io.BytesIO()
|
||||||
combined_image.save(buffer, format="JPEG", quality=85)
|
combined_image.save(buffer, format="JPEG", quality=85) # 保存为JPEG
|
||||||
result_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
|
result_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
|
||||||
|
|
||||||
return result_base64
|
return result_base64
|
||||||
|
|
||||||
|
except MemoryError:
|
||||||
|
logger.error("GIF转换失败: 内存不足,可能是GIF太大或帧数太多")
|
||||||
|
return None # 内存不够啦
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"GIF转换失败: {str(e)}")
|
logger.error(f"GIF转换失败: {str(e)}", exc_info=True) # 记录详细错误信息
|
||||||
return None
|
return None # 其他错误也返回None
|
||||||
|
|
||||||
|
|
||||||
# 创建全局单例
|
# 创建全局单例
|
||||||
|
|||||||
@@ -195,7 +195,7 @@ class EmojiManager:
|
|||||||
self._scan_task = None
|
self._scan_task = None
|
||||||
self.vlm = LLMRequest(model=global_config.vlm, temperature=0.3, max_tokens=1000, request_type="emoji")
|
self.vlm = LLMRequest(model=global_config.vlm, temperature=0.3, max_tokens=1000, request_type="emoji")
|
||||||
self.llm_emotion_judge = LLMRequest(
|
self.llm_emotion_judge = LLMRequest(
|
||||||
model=global_config.llm_emotion_judge, max_tokens=600, temperature=0.8, request_type="emoji"
|
model=global_config.llm_summary, max_tokens=600, temperature=0.8, request_type="emoji"
|
||||||
) # 更高的温度,更少的token(后续可以根据情绪来调整温度)
|
) # 更高的温度,更少的token(后续可以根据情绪来调整温度)
|
||||||
|
|
||||||
self.emoji_num = 0
|
self.emoji_num = 0
|
||||||
@@ -719,10 +719,10 @@ class EmojiManager:
|
|||||||
# 调用AI获取描述
|
# 调用AI获取描述
|
||||||
if image_format == "gif" or image_format == "GIF":
|
if image_format == "gif" or image_format == "GIF":
|
||||||
image_base64 = image_manager.transform_gif(image_base64)
|
image_base64 = image_manager.transform_gif(image_base64)
|
||||||
prompt = "这是一个动态图表情包,每一张图代表了动态图的某一帧,黑色背景代表透明,详细描述一下表情包表达的情感和内容,请关注其幽默和讽刺意味"
|
prompt = "这是一个动态图表情包,每一张图代表了动态图的某一帧,黑色背景代表透明,描述一下表情包表达的情感和内容,你可以关注其幽默和讽刺意味,必须从互联网梗,meme的角度去分析"
|
||||||
description, _ = await self.vlm.generate_response_for_image(prompt, image_base64, "jpg")
|
description, _ = await self.vlm.generate_response_for_image(prompt, image_base64, "jpg")
|
||||||
else:
|
else:
|
||||||
prompt = "这是一个表情包,请详细描述一下表情包所表达的情感和内容,请关注其幽默和讽刺意味"
|
prompt = "这是一个表情包,请详细描述一下表情包所表达的情感和内容,你可以关注其幽默和讽刺意味,必须从互联网梗,meme的角度去分析"
|
||||||
description, _ = await self.vlm.generate_response_for_image(prompt, image_base64, image_format)
|
description, _ = await self.vlm.generate_response_for_image(prompt, image_base64, image_format)
|
||||||
|
|
||||||
# 审核表情包
|
# 审核表情包
|
||||||
@@ -742,10 +742,10 @@ class EmojiManager:
|
|||||||
# 分析情感含义
|
# 分析情感含义
|
||||||
emotion_prompt = f"""
|
emotion_prompt = f"""
|
||||||
基于这个表情包的描述:'{description}',请列出1-2个可能的情感标签,每个标签用一个词组表示,格式如下:
|
基于这个表情包的描述:'{description}',请列出1-2个可能的情感标签,每个标签用一个词组表示,格式如下:
|
||||||
幽默的讽刺
|
幽默的讽刺,适用于调侃或吐槽场景
|
||||||
悲伤的无奈
|
悲伤的无奈,适用于表达无力感或失望
|
||||||
愤怒的抗议
|
愤怒的抗议,适用于表达不满或反对
|
||||||
愤怒的讽刺
|
愤怒的讽刺,适用于尖锐批评或反讽
|
||||||
直接输出词组,词组检用逗号分隔。"""
|
直接输出词组,词组检用逗号分隔。"""
|
||||||
emotions_text, _ = await self.llm_emotion_judge.generate_response_async(emotion_prompt, temperature=0.7)
|
emotions_text, _ = await self.llm_emotion_judge.generate_response_async(emotion_prompt, temperature=0.7)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user