From 669f9e400a31d4ea7a624bbf198aa7d754b136ef Mon Sep 17 00:00:00 2001 From: tcmofashi Date: Wed, 5 Mar 2025 09:49:19 +0800 Subject: [PATCH 1/7] =?UTF-8?q?feat:=20=E4=BF=AE=E6=94=B9emoji=E4=B8=BAemb?= =?UTF-8?q?edding=E5=8C=B9=E9=85=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/chat/emoji_manager.py | 260 +++++++++++------------------- 1 file changed, 90 insertions(+), 170 deletions(-) diff --git a/src/plugins/chat/emoji_manager.py b/src/plugins/chat/emoji_manager.py index 2311b2459..a0164d065 100644 --- a/src/plugins/chat/emoji_manager.py +++ b/src/plugins/chat/emoji_manager.py @@ -14,10 +14,12 @@ import asyncio import time from PIL import Image import io +from loguru import logger from nonebot import get_driver from ..chat.config import global_config from ..models.utils_model import LLM_request +from utils import get_embedding driver = get_driver() config = driver.config @@ -27,16 +29,6 @@ class EmojiManager: _instance = None EMOJI_DIR = "data/emoji" # 表情包存储目录 - EMOTION_KEYWORDS = { - 'happy': ['开心', '快乐', '高兴', '欢喜', '笑', '喜悦', '兴奋', '愉快', '乐', '好'], - 'angry': ['生气', '愤怒', '恼火', '不爽', '火大', '怒', '气愤', '恼怒', '发火', '不满'], - 'sad': ['伤心', '难过', '悲伤', '痛苦', '哭', '忧伤', '悲痛', '哀伤', '委屈', '失落'], - 'surprised': ['惊讶', '震惊', '吃惊', '意外', '惊', '诧异', '惊奇', '惊喜', '不敢相信', '目瞪口呆'], - 'disgusted': ['恶心', '讨厌', '厌恶', '反感', '嫌弃', '恶', '嫌恶', '憎恶', '不喜欢', '烦'], - 'fearful': ['害怕', '恐惧', '惊恐', '担心', '怕', '惊吓', '惊慌', '畏惧', '胆怯', '惧'], - 'neutral': ['普通', '一般', '还行', '正常', '平静', '平淡', '一般般', '凑合', '还好', '就这样'] - } - def __new__(cls): if cls._instance is None: cls._instance = super().__new__(cls) @@ -64,7 +56,7 @@ class EmojiManager: # 启动时执行一次完整性检查 self.check_emoji_file_integrity() except Exception as e: - print(f"\033[1;31m[错误]\033[0m 初始化表情管理器失败: {str(e)}") + logger.error(f"初始化表情管理器失败: {str(e)}") def _ensure_db(self): """确保数据库已初始化""" @@ -77,7 +69,7 @@ class EmojiManager: """确保emoji集合存在并创建索引""" if 'emoji' not in self.db.db.list_collection_names(): self.db.db.create_collection('emoji') - self.db.db.emoji.create_index([('tags', 1)]) + self.db.db.emoji.create_index([('embedding', '2dsphere')]) self.db.db.emoji.create_index([('filename', 1)], unique=True) def record_usage(self, emoji_id: str): @@ -89,79 +81,8 @@ class EmojiManager: {'$inc': {'usage_count': 1}} ) except Exception as e: - print(f"\033[1;31m[错误]\033[0m 记录表情使用失败: {str(e)}") + logger.error(f"记录表情使用失败: {str(e)}") - async def _get_emotion_from_text(self, text: str) -> List[str]: - """从文本中识别情感关键词 - Args: - text: 输入文本 - Returns: - List[str]: 匹配到的情感标签列表 - """ - try: - prompt = f'分析这段文本:"{text}",从"happy,angry,sad,surprised,disgusted,fearful,neutral"中选出最匹配的1个情感标签。只需要返回标签,不要输出其他任何内容。' - - content, _ = await self.llm.generate_response(prompt) - emotion = content.strip().lower() - - if emotion in self.EMOTION_KEYWORDS: - print(f"\033[1;32m[成功]\033[0m 识别到的情感: {emotion}") - return [emotion] - - return ['neutral'] - - except Exception as e: - print(f"\033[1;31m[错误]\033[0m 情感分析失败: {str(e)}") - return ['neutral'] - - async def get_emoji_for_emotion(self, emotion_tag: str) -> Optional[str]: - try: - self._ensure_db() - - # 构建查询条件:标签匹配任一情感 - query = {'tags': {'$in': emotion_tag}} - - # print(f"\033[1;34m[调试]\033[0m 表情查询条件: {query}") - - try: - # 随机获取一个匹配的表情 - emoji = self.db.db.emoji.aggregate([ - {'$match': query}, - {'$sample': {'size': 1}} - ]).next() - print(f"\033[1;32m[成功]\033[0m 找到匹配的表情") - if emoji and 'path' in emoji: - # 更新使用次数 - self.db.db.emoji.update_one( - {'_id': emoji['_id']}, - {'$inc': {'usage_count': 1}} - ) - return emoji['path'] - except StopIteration: - # 如果没有匹配的表情,从所有表情中随机选择一个 - print(f"\033[1;33m[提示]\033[0m 未找到匹配的表情,随机选择一个") - try: - emoji = self.db.db.emoji.aggregate([ - {'$sample': {'size': 1}} - ]).next() - if emoji and 'path' in emoji: - # 更新使用次数 - self.db.db.emoji.update_one( - {'_id': emoji['_id']}, - {'$inc': {'usage_count': 1}} - ) - return emoji['path'] - except StopIteration: - print(f"\033[1;31m[错误]\033[0m 数据库中没有任何表情") - return None - - return None - - except Exception as e: - print(f"\033[1;31m[错误]\033[0m 获取表情包失败: {str(e)}") - return None - - async def get_emoji_for_text(self, text: str) -> Optional[str]: """根据文本内容获取相关表情包 Args: @@ -171,77 +92,84 @@ class EmojiManager: """ try: self._ensure_db() - # 获取情感标签 - emotions = await self._get_emotion_from_text(text) - print("为 ‘"+ str(text) + "’ 获取到的情感标签为:" + str(emotions)) - if not emotions: + + # 获取文本的embedding + text_embedding = get_embedding(text) + if not text_embedding: + logger.error("无法获取文本的embedding") return None - # 构建查询条件:标签匹配任一情感 - query = {'tags': {'$in': emotions}} - - print(f"\033[1;34m[调试]\033[0m 表情查询条件: {query}") - print(f"\033[1;34m[调试]\033[0m 匹配到的情感: {emotions}") + # 使用embedding进行相似度搜索,获取最相似的3个表情包 + pipeline = [ + { + "$search": { + "index": "default", + "knnBeta": { + "vector": text_embedding, + "path": "embedding", + "k": 3 + } + } + } + ] try: - # 随机获取一个匹配的表情 - emoji = self.db.db.emoji.aggregate([ - {'$match': query}, - {'$sample': {'size': 1}} - ]).next() - print(f"\033[1;32m[成功]\033[0m 找到匹配的表情") - if emoji and 'path' in emoji: + # 获取搜索结果 + results = list(self.db.db.emoji.aggregate(pipeline)) + + if not results: + logger.warning("未找到匹配的表情包,尝试随机选择") + # 如果没有匹配的表情,随机选择一个 + try: + emoji = self.db.db.emoji.aggregate([ + {'$sample': {'size': 1}} + ]).next() + if emoji and 'path' in emoji: + # 更新使用次数 + self.db.db.emoji.update_one( + {'_id': emoji['_id']}, + {'$inc': {'usage_count': 1}} + ) + return emoji['path'] + except StopIteration: + logger.error("数据库中没有任何表情") + return None + + # 从最相似的3个表情包中随机选择一个 + selected_emoji = random.choice(results) + + if selected_emoji and 'path' in selected_emoji: # 更新使用次数 self.db.db.emoji.update_one( - {'_id': emoji['_id']}, + {'_id': selected_emoji['_id']}, {'$inc': {'usage_count': 1}} ) - return emoji['path'] - except StopIteration: - # 如果没有匹配的表情,从所有表情中随机选择一个 - print(f"\033[1;33m[提示]\033[0m 未找到匹配的表情,随机选择一个") - try: - emoji = self.db.db.emoji.aggregate([ - {'$sample': {'size': 1}} - ]).next() - if emoji and 'path' in emoji: - # 更新使用次数 - self.db.db.emoji.update_one( - {'_id': emoji['_id']}, - {'$inc': {'usage_count': 1}} - ) - return emoji['path'] - except StopIteration: - print(f"\033[1;31m[错误]\033[0m 数据库中没有任何表情") - return None + logger.success(f"找到匹配的表情包: {selected_emoji.get('discription', '无描述')}") + return selected_emoji['path'] + + except Exception as search_error: + logger.error(f"搜索表情包失败: {str(search_error)}") + return None return None except Exception as e: - print(f"\033[1;31m[错误]\033[0m 获取表情包失败: {str(e)}") + logger.error(f"获取表情包失败: {str(e)}") return None async def _get_emoji_tag(self, image_base64: str) -> str: """获取表情包的标签""" try: - prompt = '这是一个表情包,请从"happy", "angry", "sad", "surprised", "disgusted", "fearful", "neutral"中选出1个情感标签。只输出标签,不要输出其他任何内容,只输出情感标签就好' + prompt = '这是一个表情包,请为其生成简洁的描述,同时生成表情包所蕴含的情绪的描述。' content, _ = await self.llm.generate_response_for_image(prompt, image_base64) - tag_result = content.strip().lower() - - valid_tags = ["happy", "angry", "sad", "surprised", "disgusted", "fearful", "neutral"] - for tag_match in valid_tags: - if tag_match in tag_result or tag_match == tag_result: - return tag_match - print(f"\033[1;33m[警告]\033[0m 无效的标签: {tag_result}, 跳过") + logger.debug(f"输出描述: {content}") + return content except Exception as e: - print(f"\033[1;31m[错误]\033[0m 获取标签失败: {str(e)}") - return "skip" + logger.error(f"获取标签失败: {str(e)}") + return None - print(f"\033[1;32m[调试信息]\033[0m 使用默认标签: neutral") - return "skip" # 默认标签 - async def _compress_image(self, image_path: str, target_size: int = 0.8 * 1024 * 1024) -> Optional[str]: """压缩图片并返回base64编码 Args: @@ -303,12 +231,12 @@ class EmojiManager: # 获取压缩后的数据并转换为base64 compressed_data = output_buffer.getvalue() - print(f"\033[1;32m[成功]\033[0m 压缩图片: {os.path.basename(image_path)} ({original_width}x{original_height} -> {new_width}x{new_height})") + logger.success(f"压缩图片: {os.path.basename(image_path)} ({original_width}x{original_height} -> {new_width}x{new_height})") return base64.b64encode(compressed_data).decode('utf-8') except Exception as e: - print(f"\033[1;31m[错误]\033[0m 压缩图片失败: {os.path.basename(image_path)}, 错误: {str(e)}") + logger.error(f"压缩图片失败: {os.path.basename(image_path)}, 错误: {str(e)}") return None async def scan_new_emojis(self): @@ -334,35 +262,29 @@ class EmojiManager: os.remove(image_path) continue - # 获取表情包的情感标签 - tag = await self._get_emoji_tag(image_base64) - if not tag == "skip": + # 获取表情包的描述 + discription = await self._get_emoji_tag(image_base64) + embedding = get_embedding(discription) + if discription is not None: # 准备数据库记录 emoji_record = { 'filename': filename, 'path': image_path, - 'tags': [tag], + 'embedding':embedding, + 'discription': discription, 'timestamp': int(time.time()) } # 保存到数据库 self.db.db['emoji'].insert_one(emoji_record) - print(f"\033[1;32m[成功]\033[0m 注册新表情包: {filename}") - print(f"标签: {tag}") + logger.success(f"注册新表情包: {filename}") + logger.info(f"描述: {discription}") else: - print(f"\033[1;33m[警告]\033[0m 跳过表情包: {filename}") + logger.warning(f"跳过表情包: {filename}") except Exception as e: - print(f"\033[1;31m[错误]\033[0m 扫描表情包失败: {str(e)}") - import traceback - print(traceback.format_exc()) - - async def _periodic_scan(self, interval_MINS: int = 10): - """定期扫描新表情包""" - while True: - print(f"\033[1;36m[表情包]\033[0m 开始扫描新表情包...") - await self.scan_new_emojis() - await asyncio.sleep(interval_MINS * 60) # 每600秒扫描一次 + logger.error(f"扫描表情包失败: {str(e)}") + logger.error(traceback.format_exc()) def check_emoji_file_integrity(self): """检查表情包文件完整性 @@ -378,44 +300,42 @@ class EmojiManager: for emoji in all_emojis: try: if 'path' not in emoji: - print(f"\033[1;33m[提示]\033[0m 发现无效记录(缺少path字段),ID: {emoji.get('_id', 'unknown')}") + logger.warning(f"发现无效记录(缺少path字段),ID: {emoji.get('_id', 'unknown')}") + self.db.db.emoji.delete_one({'_id': emoji['_id']}) + removed_count += 1 + continue + + if 'embedding' not in emoji: + logger.warning(f"发现过时记录(缺少embedding字段),ID: {emoji.get('_id', 'unknown')}") self.db.db.emoji.delete_one({'_id': emoji['_id']}) removed_count += 1 continue # 检查文件是否存在 if not os.path.exists(emoji['path']): - print(f"\033[1;33m[提示]\033[0m 表情包文件已被删除: {emoji['path']}") + logger.warning(f"表情包文件已被删除: {emoji['path']}") # 从数据库中删除记录 result = self.db.db.emoji.delete_one({'_id': emoji['_id']}) if result.deleted_count > 0: - print(f"\033[1;32m[成功]\033[0m 成功删除数据库记录: {emoji['_id']}") + logger.success(f"成功删除数据库记录: {emoji['_id']}") removed_count += 1 else: - print(f"\033[1;31m[错误]\033[0m 删除数据库记录失败: {emoji['_id']}") + logger.error(f"删除数据库记录失败: {emoji['_id']}") except Exception as item_error: - print(f"\033[1;31m[错误]\033[0m 处理表情包记录时出错: {str(item_error)}") + logger.error(f"处理表情包记录时出错: {str(item_error)}") continue # 验证清理结果 remaining_count = self.db.db.emoji.count_documents({}) if removed_count > 0: - print(f"\033[1;32m[成功]\033[0m 已清理 {removed_count} 个失效的表情包记录") - print(f"\033[1;34m[统计]\033[0m 清理前总数: {total_count} | 清理后总数: {remaining_count}") - # print(f"\033[1;34m[统计]\033[0m 应删除数量: {removed_count} | 实际删除数量: {total_count - remaining_count}") - # 执行数据库压缩 - try: - self.db.db.command({"compact": "emoji"}) - print(f"\033[1;32m[成功]\033[0m 数据库集合压缩完成") - except Exception as compact_error: - print(f"\033[1;31m[错误]\033[0m 数据库压缩失败: {str(compact_error)}") + logger.success(f"已清理 {removed_count} 个失效的表情包记录") + logger.info(f"清理前总数: {total_count} | 清理后总数: {remaining_count}") else: - print(f"\033[1;36m[表情包]\033[0m 已检查 {total_count} 个表情包记录") + logger.info(f"已检查 {total_count} 个表情包记录") except Exception as e: - print(f"\033[1;31m[错误]\033[0m 检查表情包完整性失败: {str(e)}") - import traceback - print(f"\033[1;31m[错误追踪]\033[0m\n{traceback.format_exc()}") + logger.error(f"检查表情包完整性失败: {str(e)}") + logger.error(traceback.format_exc()) async def start_periodic_check(self, interval_MINS: int = 120): while True: From 97bddb83e71cfcd2a965ddaaef29c3970a4d21df Mon Sep 17 00:00:00 2001 From: tcmofashi Date: Wed, 5 Mar 2025 10:43:08 +0800 Subject: [PATCH 2/7] =?UTF-8?q?feat:=20=E8=A1=A8=E6=83=85=E5=8C=85?= =?UTF-8?q?=E5=8C=B9=E9=85=8D=E4=BB=8E=E6=83=85=E7=BB=AA=E5=8C=B9=E9=85=8D?= =?UTF-8?q?=E6=94=B9=E6=88=90=E5=B5=8C=E5=85=A5=E7=9B=B8=E4=BC=BC=E5=BA=A6?= =?UTF-8?q?=E5=8C=B9=E9=85=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/chat/bot.py | 2 +- src/plugins/chat/emoji_manager.py | 84 +++++++++++++++++-------------- 2 files changed, 47 insertions(+), 39 deletions(-) diff --git a/src/plugins/chat/bot.py b/src/plugins/chat/bot.py index 6b0e76db5..f9488b96f 100644 --- a/src/plugins/chat/bot.py +++ b/src/plugins/chat/bot.py @@ -174,7 +174,7 @@ class ChatBot: bot_response_time = tinking_time_point if random() < global_config.emoji_chance: - emoji_path = await emoji_manager.get_emoji_for_emotion(emotion) + emoji_path = await emoji_manager.get_emoji_for_text(response) if emoji_path: emoji_cq = CQCode.create_emoji_cq(emoji_path) diff --git a/src/plugins/chat/emoji_manager.py b/src/plugins/chat/emoji_manager.py index a0164d065..aa0bc1fb5 100644 --- a/src/plugins/chat/emoji_manager.py +++ b/src/plugins/chat/emoji_manager.py @@ -15,11 +15,12 @@ import time from PIL import Image import io from loguru import logger +import traceback from nonebot import get_driver from ..chat.config import global_config from ..models.utils_model import LLM_request -from utils import get_embedding +from ..chat.utils import get_embedding driver = get_driver() config = driver.config @@ -39,7 +40,7 @@ class EmojiManager: def __init__(self): self.db = Database.get_instance() self._scan_task = None - self.llm = LLM_request(model=global_config.vlm, temperature=0.3, max_tokens=50) + self.llm = LLM_request(model=global_config.vlm, temperature=0.3, max_tokens=1000) def _ensure_emoji_dir(self): """确保表情存储目录存在""" @@ -98,45 +99,44 @@ class EmojiManager: if not text_embedding: logger.error("无法获取文本的embedding") return None - - # 使用embedding进行相似度搜索,获取最相似的3个表情包 - pipeline = [ - { - "$search": { - "index": "default", - "knnBeta": { - "vector": text_embedding, - "path": "embedding", - "k": 3 - } - } - } - ] try: - # 获取搜索结果 - results = list(self.db.db.emoji.aggregate(pipeline)) + # 获取所有表情包 + all_emojis = list(self.db.db.emoji.find({}, {'_id': 1, 'path': 1, 'embedding': 1, 'discription': 1})) - if not results: - logger.warning("未找到匹配的表情包,尝试随机选择") - # 如果没有匹配的表情,随机选择一个 - try: - emoji = self.db.db.emoji.aggregate([ - {'$sample': {'size': 1}} - ]).next() - if emoji and 'path' in emoji: - # 更新使用次数 - self.db.db.emoji.update_one( - {'_id': emoji['_id']}, - {'$inc': {'usage_count': 1}} - ) - return emoji['path'] - except StopIteration: - logger.error("数据库中没有任何表情") - return None + if not all_emojis: + logger.warning("数据库中没有任何表情包") + return None - # 从最相似的3个表情包中随机选择一个 - selected_emoji = random.choice(results) + # 计算余弦相似度并排序 + def cosine_similarity(v1, v2): + if not v1 or not v2: + return 0 + dot_product = sum(a * b for a, b in zip(v1, v2)) + norm_v1 = sum(a * a for a in v1) ** 0.5 + norm_v2 = sum(b * b for b in v2) ** 0.5 + if norm_v1 == 0 or norm_v2 == 0: + return 0 + return dot_product / (norm_v1 * norm_v2) + + # 计算所有表情包与输入文本的相似度 + emoji_similarities = [ + (emoji, cosine_similarity(text_embedding, emoji.get('embedding', []))) + for emoji in all_emojis + ] + + # 按相似度降序排序 + emoji_similarities.sort(key=lambda x: x[1], reverse=True) + + # 获取前3个最相似的表情包 + top_3_emojis = emoji_similarities[:3] + + if not top_3_emojis: + logger.warning("未找到匹配的表情包") + return None + + # 从前3个中随机选择一个 + selected_emoji, similarity = random.choice(top_3_emojis) if selected_emoji and 'path' in selected_emoji: # 更新使用次数 @@ -144,7 +144,7 @@ class EmojiManager: {'_id': selected_emoji['_id']}, {'$inc': {'usage_count': 1}} ) - logger.success(f"找到匹配的表情包: {selected_emoji.get('discription', '无描述')}") + logger.success(f"找到匹配的表情包: {selected_emoji.get('discription', '无描述')} (相似度: {similarity:.4f})") return selected_emoji['path'] except Exception as search_error: @@ -285,6 +285,14 @@ class EmojiManager: except Exception as e: logger.error(f"扫描表情包失败: {str(e)}") logger.error(traceback.format_exc()) + + async def _periodic_scan(self, interval_MINS: int = 10): + """定期扫描新表情包""" + while True: + print(f"\033[1;36m[表情包]\033[0m 开始扫描新表情包...") + await self.scan_new_emojis() + await asyncio.sleep(interval_MINS * 60) # 每600秒扫描一次 + def check_emoji_file_integrity(self): """检查表情包文件完整性 From a896cf5ec4d31d358bf3cef3e5721f01da817d07 Mon Sep 17 00:00:00 2001 From: tcmofashi Date: Thu, 6 Mar 2025 02:13:54 +0800 Subject: [PATCH 3/7] =?UTF-8?q?fix:=20=E5=85=BC=E5=AE=B9tag?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/chat/emoji_manager.py | 38 +++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/src/plugins/chat/emoji_manager.py b/src/plugins/chat/emoji_manager.py index aa0bc1fb5..2a74e8b02 100644 --- a/src/plugins/chat/emoji_manager.py +++ b/src/plugins/chat/emoji_manager.py @@ -29,6 +29,16 @@ config = driver.config class EmojiManager: _instance = None EMOJI_DIR = "data/emoji" # 表情包存储目录 + + EMOTION_KEYWORDS = { + 'happy': ['开心', '快乐', '高兴', '欢喜', '笑', '喜悦', '兴奋', '愉快', '乐', '好'], + 'angry': ['生气', '愤怒', '恼火', '不爽', '火大', '怒', '气愤', '恼怒', '发火', '不满'], + 'sad': ['伤心', '难过', '悲伤', '痛苦', '哭', '忧伤', '悲痛', '哀伤', '委屈', '失落'], + 'surprised': ['惊讶', '震惊', '吃惊', '意外', '惊', '诧异', '惊奇', '惊喜', '不敢相信', '目瞪口呆'], + 'disgusted': ['恶心', '讨厌', '厌恶', '反感', '嫌弃', '恶', '嫌恶', '憎恶', '不喜欢', '烦'], + 'fearful': ['害怕', '恐惧', '惊恐', '担心', '怕', '惊吓', '惊慌', '畏惧', '胆怯', '惧'], + 'neutral': ['普通', '一般', '还行', '正常', '平静', '平淡', '一般般', '凑合', '还好', '就这样'] + } def __new__(cls): if cls._instance is None: @@ -71,6 +81,7 @@ class EmojiManager: if 'emoji' not in self.db.db.list_collection_names(): self.db.db.create_collection('emoji') self.db.db.emoji.create_index([('embedding', '2dsphere')]) + self.db.db.emoji.create_index([('tags', 1)]) self.db.db.emoji.create_index([('filename', 1)], unique=True) def record_usage(self, emoji_id: str): @@ -160,7 +171,28 @@ class EmojiManager: async def _get_emoji_tag(self, image_base64: str) -> str: """获取表情包的标签""" try: - prompt = '这是一个表情包,请为其生成简洁的描述,同时生成表情包所蕴含的情绪的描述。' + prompt = '这是一个表情包,请从"happy", "angry", "sad", "surprised", "disgusted", "fearful", "neutral"中选出1个情感标签。只输出标签,不要输出其他任何内容,只输出情感标签就好' + + content, _ = await self.llm.generate_response_for_image(prompt, image_base64) + tag_result = content.strip().lower() + + valid_tags = ["happy", "angry", "sad", "surprised", "disgusted", "fearful", "neutral"] + for tag_match in valid_tags: + if tag_match in tag_result or tag_match == tag_result: + return tag_match + print(f"\033[1;33m[警告]\033[0m 无效的标签: {tag_result}, 跳过") + + except Exception as e: + print(f"\033[1;31m[错误]\033[0m 获取标签失败: {str(e)}") + return "neutral" + + print(f"\033[1;32m[调试信息]\033[0m 使用默认标签: neutral") + return "neutral" # 默认标签 + + async def _get_emoji_discription(self, image_base64: str) -> str: + """获取表情包的标签""" + try: + prompt = '这是一个表情包,简洁的描述一下表情包的内容和表情包所表达的情感' content, _ = await self.llm.generate_response_for_image(prompt, image_base64) logger.debug(f"输出描述: {content}") @@ -263,7 +295,8 @@ class EmojiManager: continue # 获取表情包的描述 - discription = await self._get_emoji_tag(image_base64) + discription = await self._get_emoji_discription(image_base64) + tag = await self._get_emoji_tag(image_base64) embedding = get_embedding(discription) if discription is not None: # 准备数据库记录 @@ -272,6 +305,7 @@ class EmojiManager: 'path': image_path, 'embedding':embedding, 'discription': discription, + 'tag':tag, 'timestamp': int(time.time()) } From fea3285d2012d572a856a0bdf1aaf692eeb15fc7 Mon Sep 17 00:00:00 2001 From: tcmofashi Date: Thu, 6 Mar 2025 06:30:27 +0800 Subject: [PATCH 4/7] =?UTF-8?q?feat:=20emoji=E9=80=89=E6=8B=A9=E9=80=BB?= =?UTF-8?q?=E8=BE=91=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/chat/emoji_manager.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/plugins/chat/emoji_manager.py b/src/plugins/chat/emoji_manager.py index 2a74e8b02..f2bee4fb5 100644 --- a/src/plugins/chat/emoji_manager.py +++ b/src/plugins/chat/emoji_manager.py @@ -51,6 +51,7 @@ class EmojiManager: self.db = Database.get_instance() self._scan_task = None self.llm = LLM_request(model=global_config.vlm, temperature=0.3, max_tokens=1000) + self.lm = LLM_request(model=global_config.llm_reasoning_minor, max_tokens=1000) def _ensure_emoji_dir(self): """确保表情存储目录存在""" @@ -106,7 +107,8 @@ class EmojiManager: self._ensure_db() # 获取文本的embedding - text_embedding = get_embedding(text) + text_for_search= await self._get_kimoji_for_text(text) + text_embedding = get_embedding(text_for_search) if not text_embedding: logger.error("无法获取文本的embedding") return None @@ -202,6 +204,18 @@ class EmojiManager: logger.error(f"获取标签失败: {str(e)}") return None + async def _get_kimoji_for_text(self, text:str): + try: + prompt = f'这是{global_config.BOT_NICKNAME}将要发送的消息内容:\n{text}\n若要为其配上表情包,请你输出这个表情包应该表达怎样的情感,应该给人什么样的感觉,不要太简洁也不要太长,注意不要输出任何对内容的分析内容,只输出\"一种什么样的感觉\"中间的形容词部分。' + + content, _ = await self.llm.generate_response_async(prompt) + logger.info(f"输出描述: {content}") + return content + + except Exception as e: + logger.error(f"获取标签失败: {str(e)}") + return None + async def _compress_image(self, image_path: str, target_size: int = 0.8 * 1024 * 1024) -> Optional[str]: """压缩图片并返回base64编码 Args: From a612519d56c57ddcfc22a07e4235fa4b2924af85 Mon Sep 17 00:00:00 2001 From: tcmofashi Date: Thu, 6 Mar 2025 07:22:36 +0800 Subject: [PATCH 5/7] =?UTF-8?q?feat:=20=E6=96=B0=E5=A2=9E=E8=A1=A8?= =?UTF-8?q?=E6=83=85=E5=8C=85=E8=BF=87=E6=BB=A4=EF=BC=8C=E5=A5=B6=E9=BE=99?= =?UTF-8?q?=E5=86=8D=E8=A7=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/chat/emoji_manager.py | 23 +++++++++++++++++++++-- src/plugins/chat/llm_generator.py | 3 ++- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/plugins/chat/emoji_manager.py b/src/plugins/chat/emoji_manager.py index f2bee4fb5..e7ff85803 100644 --- a/src/plugins/chat/emoji_manager.py +++ b/src/plugins/chat/emoji_manager.py @@ -194,7 +194,19 @@ class EmojiManager: async def _get_emoji_discription(self, image_base64: str) -> str: """获取表情包的标签""" try: - prompt = '这是一个表情包,简洁的描述一下表情包的内容和表情包所表达的情感' + prompt = '这是一个表情包,使用中文简洁的描述一下表情包的内容和表情包所表达的情感' + + content, _ = await self.llm.generate_response_for_image(prompt, image_base64) + logger.debug(f"输出描述: {content}") + return content + + except Exception as e: + logger.error(f"获取标签失败: {str(e)}") + return None + + async def _check_emoji(self, image_base64: str) -> str: + try: + prompt = '这是一个表情包,请回答这个表情包是否满足\"动漫风格,画风可爱\"的要求,是则回答是,否则回答否,不要出现任何其他内容' content, _ = await self.llm.generate_response_for_image(prompt, image_base64) logger.debug(f"输出描述: {content}") @@ -208,7 +220,7 @@ class EmojiManager: try: prompt = f'这是{global_config.BOT_NICKNAME}将要发送的消息内容:\n{text}\n若要为其配上表情包,请你输出这个表情包应该表达怎样的情感,应该给人什么样的感觉,不要太简洁也不要太长,注意不要输出任何对内容的分析内容,只输出\"一种什么样的感觉\"中间的形容词部分。' - content, _ = await self.llm.generate_response_async(prompt) + content, _ = await self.lm.generate_response_async(prompt) logger.info(f"输出描述: {content}") return content @@ -310,6 +322,13 @@ class EmojiManager: # 获取表情包的描述 discription = await self._get_emoji_discription(image_base64) + check = await self._check_emoji(image_base64) + if '是' not in check: + os.remove(image_path) + logger.info(f"描述: {discription}") + logger.info(f"其不满足过滤规则,被剔除 {check}") + continue + logger.info(f"check通过 {check}") tag = await self._get_emoji_tag(image_base64) embedding = get_embedding(discription) if discription is not None: diff --git a/src/plugins/chat/llm_generator.py b/src/plugins/chat/llm_generator.py index ab0f4e12c..a2f981c9e 100644 --- a/src/plugins/chat/llm_generator.py +++ b/src/plugins/chat/llm_generator.py @@ -24,6 +24,7 @@ class ResponseGenerator: self.model_r1 = LLM_request(model=global_config.llm_reasoning, temperature=0.7,max_tokens=1000) self.model_v3 = LLM_request(model=global_config.llm_normal, temperature=0.7,max_tokens=1000) self.model_r1_distill = LLM_request(model=global_config.llm_reasoning_minor, temperature=0.7,max_tokens=1000) + self.model_v25 = LLM_request(model=global_config.llm_normal_minor, temperature=0.7,max_tokens=1000) self.db = Database.get_instance() self.current_model_type = 'r1' # 默认使用 R1 @@ -138,7 +139,7 @@ class ResponseGenerator: 内容:{content} 输出: ''' - content, _ = await self.model_v3.generate_response(prompt) + content, _ = await self.model_v25.generate_response(prompt) content=content.strip() if content in ['happy','angry','sad','surprised','disgusted','fearful','neutral']: return [content] From 90e72db87b34d17063a01e0fe1bb73a22aae8b1e Mon Sep 17 00:00:00 2001 From: tcmofashi Date: Thu, 6 Mar 2025 21:11:22 +0800 Subject: [PATCH 6/7] =?UTF-8?q?fix:=20=E8=B0=83=E6=95=B4api=E6=8E=A5?= =?UTF-8?q?=E5=8F=A3=E7=9A=84=E6=89=93=E5=8D=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/models/utils_model.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/plugins/models/utils_model.py b/src/plugins/models/utils_model.py index 2801a3553..793a89290 100644 --- a/src/plugins/models/utils_model.py +++ b/src/plugins/models/utils_model.py @@ -41,7 +41,7 @@ class LLM_request: # 发送请求到完整的chat/completions端点 api_url = f"{self.base_url.rstrip('/')}/chat/completions" - logger.info(f"发送请求到URL: {api_url}{self.model_name}") # 记录请求的URL + logger.info(f"发送请求到URL: {api_url}/{self.model_name}") # 记录请求的URL max_retries = 3 base_wait_time = 15 @@ -123,7 +123,7 @@ class LLM_request: # 发送请求到完整的chat/completions端点 api_url = f"{self.base_url.rstrip('/')}/chat/completions" - logger.info(f"发送请求到URL: {api_url}{self.model_name}") # 记录请求的URL + logger.info(f"发送请求到URL: {api_url}/{self.model_name}") # 记录请求的URL max_retries = 3 base_wait_time = 15 @@ -273,7 +273,7 @@ class LLM_request: # 发送请求到完整的chat/completions端点 api_url = f"{self.base_url.rstrip('/')}/chat/completions" - logger.info(f"发送请求到URL: {api_url}{self.model_name}") # 记录请求的URL + logger.info(f"发送请求到URL: {api_url}/{self.model_name}") # 记录请求的URL max_retries = 2 base_wait_time = 6 @@ -339,7 +339,7 @@ class LLM_request: } api_url = f"{self.base_url.rstrip('/')}/embeddings" - logger.info(f"发送请求到URL: {api_url}{self.model_name}") # 记录请求的URL + logger.info(f"发送请求到URL: {api_url}/{self.model_name}") # 记录请求的URL max_retries = 2 base_wait_time = 6 @@ -396,7 +396,7 @@ class LLM_request: } api_url = f"{self.base_url.rstrip('/')}/embeddings" - logger.info(f"发送请求到URL: {api_url}{self.model_name}") # 记录请求的URL + logger.info(f"发送请求到URL: {api_url}/{self.model_name}") # 记录请求的URL max_retries = 3 base_wait_time = 15 From e3c7fae61d7b3ccaf34040c67533da6b38c962ff Mon Sep 17 00:00:00 2001 From: tcmofashi Date: Thu, 6 Mar 2025 21:18:35 +0800 Subject: [PATCH 7/7] =?UTF-8?q?fix:=20=E5=A2=9E=E5=8A=A0=E8=AE=BE=E7=BD=AE?= =?UTF-8?q?=E9=A1=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/chat/config.py | 2 ++ src/plugins/chat/emoji_manager.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/plugins/chat/config.py b/src/plugins/chat/config.py index d5ee364ce..e044edc5e 100644 --- a/src/plugins/chat/config.py +++ b/src/plugins/chat/config.py @@ -30,6 +30,7 @@ class BotConfig: forget_memory_interval: int = 300 # 记忆遗忘间隔(秒) EMOJI_CHECK_INTERVAL: int = 120 # 表情包检查间隔(分钟) EMOJI_REGISTER_INTERVAL: int = 10 # 表情包注册间隔(分钟) + EMOJI_CHECK_PROMPT: str = "不要包含违反公序良俗的内容" # 表情包过滤要求 ban_words = set() @@ -94,6 +95,7 @@ class BotConfig: emoji_config = toml_dict["emoji"] config.EMOJI_CHECK_INTERVAL = emoji_config.get("check_interval", config.EMOJI_CHECK_INTERVAL) config.EMOJI_REGISTER_INTERVAL = emoji_config.get("register_interval", config.EMOJI_REGISTER_INTERVAL) + config.EMOJI_CHECK_PROMPT = emoji_config.get('check_prompt',config.EMOJI_CHECK_PROMPT) if "cq_code" in toml_dict: cq_code_config = toml_dict["cq_code"] diff --git a/src/plugins/chat/emoji_manager.py b/src/plugins/chat/emoji_manager.py index e7ff85803..4b81302b1 100644 --- a/src/plugins/chat/emoji_manager.py +++ b/src/plugins/chat/emoji_manager.py @@ -206,7 +206,7 @@ class EmojiManager: async def _check_emoji(self, image_base64: str) -> str: try: - prompt = '这是一个表情包,请回答这个表情包是否满足\"动漫风格,画风可爱\"的要求,是则回答是,否则回答否,不要出现任何其他内容' + prompt = f'这是一个表情包,请回答这个表情包是否满足\"{global_config.EMOJI_CHECK_PROMPT}\"的要求,是则回答是,否则回答否,不要出现任何其他内容' content, _ = await self.llm.generate_response_for_image(prompt, image_base64) logger.debug(f"输出描述: {content}")