feat(emoji): 重构表情包识别、描述与缓存系统
本次更新全面重构了表情包处理逻辑,引入了包含“精炼描述”、“关键词”和“详细分析”的三层描述系统,以提升识别的深度与广度。同时,统一并修复了缓存机制,解决了旧缓存数据格式不兼容及新内容无法被缓存的问题,并通过增加校验逻辑增强了系统的健壮性。 注意:更新后,用户必须手动清空 emojis、images、image_descriptions 三个数据库表,并将 data/emoji_registed/ 目录下的所有文件移至 data/emoji/ 目录,然后重启程序,以确保所有表情包能被新系统正确地重新识别
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import random
|
||||
import re
|
||||
|
||||
from src.chat.emoji_system.emoji_history import add_emoji_to_history, get_recent_emojis
|
||||
from src.chat.emoji_system.emoji_manager import MaiEmoji, get_emoji_manager
|
||||
@@ -223,7 +224,13 @@ class EmojiAction(BaseAction):
|
||||
)
|
||||
|
||||
# 准备表情描述列表
|
||||
emoji_descriptions = [desc for _, desc in all_emojis_data]
|
||||
# 提取精炼描述和关键词用于LLM选择
|
||||
def extract_refined_info(full_desc: str) -> str:
|
||||
# 新格式: [精炼描述] Keywords: [关键词] Desc: [详细描述]
|
||||
# 我们只需要 Desc: 之前的部分
|
||||
return full_desc.split(" Desc:")[0].strip()
|
||||
|
||||
emoji_descriptions = [extract_refined_info(desc) for _, desc in all_emojis_data]
|
||||
|
||||
# 构建prompt让LLM选择描述
|
||||
prompt = f"""
|
||||
@@ -256,32 +263,40 @@ class EmojiAction(BaseAction):
|
||||
chosen_emotion = chosen_description # 在描述模式下,用描述作为情感标签
|
||||
logger.info(f"{self.log_prefix} LLM选择的描述: {chosen_description}")
|
||||
|
||||
# 简单关键词匹配
|
||||
matched_emoji = next(
|
||||
(
|
||||
item
|
||||
for item in all_emojis_data
|
||||
if chosen_description.lower() in item[1].lower()
|
||||
or item[1].lower() in chosen_description.lower()
|
||||
),
|
||||
None,
|
||||
)
|
||||
# 优化匹配逻辑:优先在精炼描述中精确匹配,然后进行关键词匹配
|
||||
def extract_refined_info(full_desc: str) -> str:
|
||||
return full_desc.split(" Desc:")[0].strip()
|
||||
|
||||
# 如果包含匹配失败,尝试关键词匹配
|
||||
if not matched_emoji:
|
||||
keywords = ["惊讶", "困惑", "呆滞", "震惊", "懵", "无语", "萌", "可爱"]
|
||||
for keyword in keywords:
|
||||
if keyword in chosen_description:
|
||||
for item in all_emojis_data:
|
||||
if any(k in item[1] for k in ["呆", "萌", "惊", "困惑", "无语"]):
|
||||
matched_emoji = item
|
||||
break
|
||||
if matched_emoji:
|
||||
break
|
||||
# 1. 尝试在精炼描述中找到最匹配的表情
|
||||
# 我们假设LLM返回的是精炼描述的一部分或全部
|
||||
matched_emoji = None
|
||||
best_match_score = 0
|
||||
|
||||
for item in all_emojis_data:
|
||||
refined_info = extract_refined_info(item[1])
|
||||
# 计算一个简单的匹配分数
|
||||
score = 0
|
||||
if chosen_description.lower() in refined_info.lower():
|
||||
score += 2 # 包含匹配
|
||||
if refined_info.lower() in chosen_description.lower():
|
||||
score += 2 # 包含匹配
|
||||
|
||||
# 关键词匹配加分
|
||||
chosen_keywords = re.findall(r'\w+', chosen_description.lower())
|
||||
item_keywords = re.findall(r'\[(.*?)\]', refined_info)
|
||||
if item_keywords:
|
||||
item_keywords_set = {k.strip().lower() for k in item_keywords[0].split(',')}
|
||||
for kw in chosen_keywords:
|
||||
if kw in item_keywords_set:
|
||||
score += 1
|
||||
|
||||
if score > best_match_score:
|
||||
best_match_score = score
|
||||
matched_emoji = item
|
||||
|
||||
if matched_emoji:
|
||||
emoji_base64, emoji_description = matched_emoji
|
||||
logger.info(f"{self.log_prefix} 找到匹配描述的表情包: {emoji_description}")
|
||||
logger.info(f"{self.log_prefix} 找到匹配描述的表情包: {extract_refined_info(emoji_description)}")
|
||||
else:
|
||||
logger.warning(f"{self.log_prefix} LLM选择的描述无法匹配任何表情包, 将随机选择")
|
||||
emoji_base64, emoji_description = random.choice(all_emojis_data)
|
||||
|
||||
Reference in New Issue
Block a user