diff --git a/src/plugins/chat/emoji_manager.py b/src/plugins/chat/emoji_manager.py index 2311b2459..a0164d065 100644 --- a/src/plugins/chat/emoji_manager.py +++ b/src/plugins/chat/emoji_manager.py @@ -14,10 +14,12 @@ import asyncio import time from PIL import Image import io +from loguru import logger from nonebot import get_driver from ..chat.config import global_config from ..models.utils_model import LLM_request +from utils import get_embedding driver = get_driver() config = driver.config @@ -27,16 +29,6 @@ class EmojiManager: _instance = None EMOJI_DIR = "data/emoji" # 表情包存储目录 - EMOTION_KEYWORDS = { - 'happy': ['开心', '快乐', '高兴', '欢喜', '笑', '喜悦', '兴奋', '愉快', '乐', '好'], - 'angry': ['生气', '愤怒', '恼火', '不爽', '火大', '怒', '气愤', '恼怒', '发火', '不满'], - 'sad': ['伤心', '难过', '悲伤', '痛苦', '哭', '忧伤', '悲痛', '哀伤', '委屈', '失落'], - 'surprised': ['惊讶', '震惊', '吃惊', '意外', '惊', '诧异', '惊奇', '惊喜', '不敢相信', '目瞪口呆'], - 'disgusted': ['恶心', '讨厌', '厌恶', '反感', '嫌弃', '恶', '嫌恶', '憎恶', '不喜欢', '烦'], - 'fearful': ['害怕', '恐惧', '惊恐', '担心', '怕', '惊吓', '惊慌', '畏惧', '胆怯', '惧'], - 'neutral': ['普通', '一般', '还行', '正常', '平静', '平淡', '一般般', '凑合', '还好', '就这样'] - } - def __new__(cls): if cls._instance is None: cls._instance = super().__new__(cls) @@ -64,7 +56,7 @@ class EmojiManager: # 启动时执行一次完整性检查 self.check_emoji_file_integrity() except Exception as e: - print(f"\033[1;31m[错误]\033[0m 初始化表情管理器失败: {str(e)}") + logger.error(f"初始化表情管理器失败: {str(e)}") def _ensure_db(self): """确保数据库已初始化""" @@ -77,7 +69,7 @@ class EmojiManager: """确保emoji集合存在并创建索引""" if 'emoji' not in self.db.db.list_collection_names(): self.db.db.create_collection('emoji') - self.db.db.emoji.create_index([('tags', 1)]) + self.db.db.emoji.create_index([('embedding', '2dsphere')]) self.db.db.emoji.create_index([('filename', 1)], unique=True) def record_usage(self, emoji_id: str): @@ -89,79 +81,8 @@ class EmojiManager: {'$inc': {'usage_count': 1}} ) except Exception as e: - print(f"\033[1;31m[错误]\033[0m 记录表情使用失败: {str(e)}") + logger.error(f"记录表情使用失败: {str(e)}") - async def _get_emotion_from_text(self, text: str) -> List[str]: - """从文本中识别情感关键词 - Args: - text: 输入文本 - Returns: - List[str]: 匹配到的情感标签列表 - """ - try: - prompt = f'分析这段文本:"{text}",从"happy,angry,sad,surprised,disgusted,fearful,neutral"中选出最匹配的1个情感标签。只需要返回标签,不要输出其他任何内容。' - - content, _ = await self.llm.generate_response(prompt) - emotion = content.strip().lower() - - if emotion in self.EMOTION_KEYWORDS: - print(f"\033[1;32m[成功]\033[0m 识别到的情感: {emotion}") - return [emotion] - - return ['neutral'] - - except Exception as e: - print(f"\033[1;31m[错误]\033[0m 情感分析失败: {str(e)}") - return ['neutral'] - - async def get_emoji_for_emotion(self, emotion_tag: str) -> Optional[str]: - try: - self._ensure_db() - - # 构建查询条件:标签匹配任一情感 - query = {'tags': {'$in': emotion_tag}} - - # print(f"\033[1;34m[调试]\033[0m 表情查询条件: {query}") - - try: - # 随机获取一个匹配的表情 - emoji = self.db.db.emoji.aggregate([ - {'$match': query}, - {'$sample': {'size': 1}} - ]).next() - print(f"\033[1;32m[成功]\033[0m 找到匹配的表情") - if emoji and 'path' in emoji: - # 更新使用次数 - self.db.db.emoji.update_one( - {'_id': emoji['_id']}, - {'$inc': {'usage_count': 1}} - ) - return emoji['path'] - except StopIteration: - # 如果没有匹配的表情,从所有表情中随机选择一个 - print(f"\033[1;33m[提示]\033[0m 未找到匹配的表情,随机选择一个") - try: - emoji = self.db.db.emoji.aggregate([ - {'$sample': {'size': 1}} - ]).next() - if emoji and 'path' in emoji: - # 更新使用次数 - self.db.db.emoji.update_one( - {'_id': emoji['_id']}, - {'$inc': {'usage_count': 1}} - ) - return emoji['path'] - except StopIteration: - print(f"\033[1;31m[错误]\033[0m 数据库中没有任何表情") - return None - - return None - - except Exception as e: - print(f"\033[1;31m[错误]\033[0m 获取表情包失败: {str(e)}") - return None - - async def get_emoji_for_text(self, text: str) -> Optional[str]: """根据文本内容获取相关表情包 Args: @@ -171,77 +92,84 @@ class EmojiManager: """ try: self._ensure_db() - # 获取情感标签 - emotions = await self._get_emotion_from_text(text) - print("为 ‘"+ str(text) + "’ 获取到的情感标签为:" + str(emotions)) - if not emotions: + + # 获取文本的embedding + text_embedding = get_embedding(text) + if not text_embedding: + logger.error("无法获取文本的embedding") return None - # 构建查询条件:标签匹配任一情感 - query = {'tags': {'$in': emotions}} - - print(f"\033[1;34m[调试]\033[0m 表情查询条件: {query}") - print(f"\033[1;34m[调试]\033[0m 匹配到的情感: {emotions}") + # 使用embedding进行相似度搜索,获取最相似的3个表情包 + pipeline = [ + { + "$search": { + "index": "default", + "knnBeta": { + "vector": text_embedding, + "path": "embedding", + "k": 3 + } + } + } + ] try: - # 随机获取一个匹配的表情 - emoji = self.db.db.emoji.aggregate([ - {'$match': query}, - {'$sample': {'size': 1}} - ]).next() - print(f"\033[1;32m[成功]\033[0m 找到匹配的表情") - if emoji and 'path' in emoji: + # 获取搜索结果 + results = list(self.db.db.emoji.aggregate(pipeline)) + + if not results: + logger.warning("未找到匹配的表情包,尝试随机选择") + # 如果没有匹配的表情,随机选择一个 + try: + emoji = self.db.db.emoji.aggregate([ + {'$sample': {'size': 1}} + ]).next() + if emoji and 'path' in emoji: + # 更新使用次数 + self.db.db.emoji.update_one( + {'_id': emoji['_id']}, + {'$inc': {'usage_count': 1}} + ) + return emoji['path'] + except StopIteration: + logger.error("数据库中没有任何表情") + return None + + # 从最相似的3个表情包中随机选择一个 + selected_emoji = random.choice(results) + + if selected_emoji and 'path' in selected_emoji: # 更新使用次数 self.db.db.emoji.update_one( - {'_id': emoji['_id']}, + {'_id': selected_emoji['_id']}, {'$inc': {'usage_count': 1}} ) - return emoji['path'] - except StopIteration: - # 如果没有匹配的表情,从所有表情中随机选择一个 - print(f"\033[1;33m[提示]\033[0m 未找到匹配的表情,随机选择一个") - try: - emoji = self.db.db.emoji.aggregate([ - {'$sample': {'size': 1}} - ]).next() - if emoji and 'path' in emoji: - # 更新使用次数 - self.db.db.emoji.update_one( - {'_id': emoji['_id']}, - {'$inc': {'usage_count': 1}} - ) - return emoji['path'] - except StopIteration: - print(f"\033[1;31m[错误]\033[0m 数据库中没有任何表情") - return None + logger.success(f"找到匹配的表情包: {selected_emoji.get('discription', '无描述')}") + return selected_emoji['path'] + + except Exception as search_error: + logger.error(f"搜索表情包失败: {str(search_error)}") + return None return None except Exception as e: - print(f"\033[1;31m[错误]\033[0m 获取表情包失败: {str(e)}") + logger.error(f"获取表情包失败: {str(e)}") return None async def _get_emoji_tag(self, image_base64: str) -> str: """获取表情包的标签""" try: - prompt = '这是一个表情包,请从"happy", "angry", "sad", "surprised", "disgusted", "fearful", "neutral"中选出1个情感标签。只输出标签,不要输出其他任何内容,只输出情感标签就好' + prompt = '这是一个表情包,请为其生成简洁的描述,同时生成表情包所蕴含的情绪的描述。' content, _ = await self.llm.generate_response_for_image(prompt, image_base64) - tag_result = content.strip().lower() - - valid_tags = ["happy", "angry", "sad", "surprised", "disgusted", "fearful", "neutral"] - for tag_match in valid_tags: - if tag_match in tag_result or tag_match == tag_result: - return tag_match - print(f"\033[1;33m[警告]\033[0m 无效的标签: {tag_result}, 跳过") + logger.debug(f"输出描述: {content}") + return content except Exception as e: - print(f"\033[1;31m[错误]\033[0m 获取标签失败: {str(e)}") - return "skip" + logger.error(f"获取标签失败: {str(e)}") + return None - print(f"\033[1;32m[调试信息]\033[0m 使用默认标签: neutral") - return "skip" # 默认标签 - async def _compress_image(self, image_path: str, target_size: int = 0.8 * 1024 * 1024) -> Optional[str]: """压缩图片并返回base64编码 Args: @@ -303,12 +231,12 @@ class EmojiManager: # 获取压缩后的数据并转换为base64 compressed_data = output_buffer.getvalue() - print(f"\033[1;32m[成功]\033[0m 压缩图片: {os.path.basename(image_path)} ({original_width}x{original_height} -> {new_width}x{new_height})") + logger.success(f"压缩图片: {os.path.basename(image_path)} ({original_width}x{original_height} -> {new_width}x{new_height})") return base64.b64encode(compressed_data).decode('utf-8') except Exception as e: - print(f"\033[1;31m[错误]\033[0m 压缩图片失败: {os.path.basename(image_path)}, 错误: {str(e)}") + logger.error(f"压缩图片失败: {os.path.basename(image_path)}, 错误: {str(e)}") return None async def scan_new_emojis(self): @@ -334,35 +262,29 @@ class EmojiManager: os.remove(image_path) continue - # 获取表情包的情感标签 - tag = await self._get_emoji_tag(image_base64) - if not tag == "skip": + # 获取表情包的描述 + discription = await self._get_emoji_tag(image_base64) + embedding = get_embedding(discription) + if discription is not None: # 准备数据库记录 emoji_record = { 'filename': filename, 'path': image_path, - 'tags': [tag], + 'embedding':embedding, + 'discription': discription, 'timestamp': int(time.time()) } # 保存到数据库 self.db.db['emoji'].insert_one(emoji_record) - print(f"\033[1;32m[成功]\033[0m 注册新表情包: {filename}") - print(f"标签: {tag}") + logger.success(f"注册新表情包: {filename}") + logger.info(f"描述: {discription}") else: - print(f"\033[1;33m[警告]\033[0m 跳过表情包: {filename}") + logger.warning(f"跳过表情包: {filename}") except Exception as e: - print(f"\033[1;31m[错误]\033[0m 扫描表情包失败: {str(e)}") - import traceback - print(traceback.format_exc()) - - async def _periodic_scan(self, interval_MINS: int = 10): - """定期扫描新表情包""" - while True: - print(f"\033[1;36m[表情包]\033[0m 开始扫描新表情包...") - await self.scan_new_emojis() - await asyncio.sleep(interval_MINS * 60) # 每600秒扫描一次 + logger.error(f"扫描表情包失败: {str(e)}") + logger.error(traceback.format_exc()) def check_emoji_file_integrity(self): """检查表情包文件完整性 @@ -378,44 +300,42 @@ class EmojiManager: for emoji in all_emojis: try: if 'path' not in emoji: - print(f"\033[1;33m[提示]\033[0m 发现无效记录(缺少path字段),ID: {emoji.get('_id', 'unknown')}") + logger.warning(f"发现无效记录(缺少path字段),ID: {emoji.get('_id', 'unknown')}") + self.db.db.emoji.delete_one({'_id': emoji['_id']}) + removed_count += 1 + continue + + if 'embedding' not in emoji: + logger.warning(f"发现过时记录(缺少embedding字段),ID: {emoji.get('_id', 'unknown')}") self.db.db.emoji.delete_one({'_id': emoji['_id']}) removed_count += 1 continue # 检查文件是否存在 if not os.path.exists(emoji['path']): - print(f"\033[1;33m[提示]\033[0m 表情包文件已被删除: {emoji['path']}") + logger.warning(f"表情包文件已被删除: {emoji['path']}") # 从数据库中删除记录 result = self.db.db.emoji.delete_one({'_id': emoji['_id']}) if result.deleted_count > 0: - print(f"\033[1;32m[成功]\033[0m 成功删除数据库记录: {emoji['_id']}") + logger.success(f"成功删除数据库记录: {emoji['_id']}") removed_count += 1 else: - print(f"\033[1;31m[错误]\033[0m 删除数据库记录失败: {emoji['_id']}") + logger.error(f"删除数据库记录失败: {emoji['_id']}") except Exception as item_error: - print(f"\033[1;31m[错误]\033[0m 处理表情包记录时出错: {str(item_error)}") + logger.error(f"处理表情包记录时出错: {str(item_error)}") continue # 验证清理结果 remaining_count = self.db.db.emoji.count_documents({}) if removed_count > 0: - print(f"\033[1;32m[成功]\033[0m 已清理 {removed_count} 个失效的表情包记录") - print(f"\033[1;34m[统计]\033[0m 清理前总数: {total_count} | 清理后总数: {remaining_count}") - # print(f"\033[1;34m[统计]\033[0m 应删除数量: {removed_count} | 实际删除数量: {total_count - remaining_count}") - # 执行数据库压缩 - try: - self.db.db.command({"compact": "emoji"}) - print(f"\033[1;32m[成功]\033[0m 数据库集合压缩完成") - except Exception as compact_error: - print(f"\033[1;31m[错误]\033[0m 数据库压缩失败: {str(compact_error)}") + logger.success(f"已清理 {removed_count} 个失效的表情包记录") + logger.info(f"清理前总数: {total_count} | 清理后总数: {remaining_count}") else: - print(f"\033[1;36m[表情包]\033[0m 已检查 {total_count} 个表情包记录") + logger.info(f"已检查 {total_count} 个表情包记录") except Exception as e: - print(f"\033[1;31m[错误]\033[0m 检查表情包完整性失败: {str(e)}") - import traceback - print(f"\033[1;31m[错误追踪]\033[0m\n{traceback.format_exc()}") + logger.error(f"检查表情包完整性失败: {str(e)}") + logger.error(traceback.format_exc()) async def start_periodic_check(self, interval_MINS: int = 120): while True: