diff --git a/changelogs/changelog.md b/changelogs/changelog.md index c56426a72..1aa33a995 100644 --- a/changelogs/changelog.md +++ b/changelogs/changelog.md @@ -2,13 +2,15 @@ ## [0.9.1] - 2025-7-25 +- 修复reply导致的planner异常空跳 - 修复表达方式迁移空目录问题 - 修复reply_to空字段问题 - 将metioned bot 和 at应用到focus prompt中 +- 更好的兴趣度计算 +- 修复部分模型由于enable_thinking导致的400问题 +- 优化关键词提取 - - -## [0.9.0] - 2025-7-25 +## [0.9.0] - 2025-7-24 ### 摘要 MaiBot 0.9.0 重磅升级!本版本带来两大核心突破:**全面重构的插件系统**提供更强大的扩展能力和管理功能;**normal和focus模式统一化处理**大幅简化架构并提升性能。同时新增s4u prompt模式优化、语音消息支持、全新情绪系统和mais4u直播互动功能,为MaiBot带来更自然、更智能的交互体验! diff --git a/src/chat/chat_loop/heartFC_chat.py b/src/chat/chat_loop/heartFC_chat.py index 41101b2dd..ac8c7d2df 100644 --- a/src/chat/chat_loop/heartFC_chat.py +++ b/src/chat/chat_loop/heartFC_chat.py @@ -330,13 +330,13 @@ class HeartFChatting: if self.loop_mode == ChatMode.NORMAL: if action_type == "no_action": - logger.info(f"[{self.log_prefix}] {global_config.bot.nickname} 决定进行回复") + logger.info(f"{self.log_prefix}{global_config.bot.nickname} 决定进行回复") elif is_parallel: logger.info( - f"[{self.log_prefix}] {global_config.bot.nickname} 决定进行回复, 同时执行{action_type}动作" + f"{self.log_prefix}{global_config.bot.nickname} 决定进行回复, 同时执行{action_type}动作" ) else: - logger.info(f"[{self.log_prefix}] {global_config.bot.nickname} 决定执行{action_type}动作") + logger.info(f"{self.log_prefix}{global_config.bot.nickname} 决定执行{action_type}动作") if action_type == "no_action": # 等待回复生成完毕 @@ -351,15 +351,15 @@ class HeartFChatting: # 模型炸了,没有回复内容生成 if not response_set: - logger.warning(f"[{self.log_prefix}] 模型未生成回复内容") + logger.warning(f"{self.log_prefix}模型未生成回复内容") return False elif action_type not in ["no_action"] and not is_parallel: logger.info( - f"[{self.log_prefix}] {global_config.bot.nickname} 原本想要回复:{content},但选择执行{action_type},不发表回复" + f"{self.log_prefix}{global_config.bot.nickname} 原本想要回复:{content},但选择执行{action_type},不发表回复" ) return False - logger.info(f"[{self.log_prefix}] {global_config.bot.nickname} 决定的回复内容: {content}") + logger.info(f"{self.log_prefix}{global_config.bot.nickname} 决定的回复内容: {content}") # 发送回复 (不再需要传入 chat) reply_text = await self._send_response(response_set, reply_to_str, loop_start_time,message_data) @@ -563,7 +563,7 @@ class HeartFChatting: return reply_set except Exception as e: - logger.error(f"[{self.log_prefix}] 回复生成出现错误:{str(e)} {traceback.format_exc()}") + logger.error(f"{self.log_prefix}回复生成出现错误:{str(e)} {traceback.format_exc()}") return None async def _send_response(self, reply_set, reply_to, thinking_start_time, message_data): diff --git a/src/chat/heart_flow/heartflow_message_processor.py b/src/chat/heart_flow/heartflow_message_processor.py index 57b52ae69..95b059892 100644 --- a/src/chat/heart_flow/heartflow_message_processor.py +++ b/src/chat/heart_flow/heartflow_message_processor.py @@ -12,6 +12,7 @@ from src.chat.message_receive.storage import MessageStorage from src.chat.heart_flow.heartflow import heartflow from src.chat.utils.utils import is_mentioned_bot_in_message from src.chat.utils.timer_calculator import Timer +from src.chat.utils.chat_message_builder import replace_user_references_in_content from src.common.logger import get_logger from src.person_info.relationship_manager import get_relationship_manager from src.mood.mood_manager import mood_manager @@ -56,6 +57,7 @@ async def _calculate_interest(message: MessageRecv) -> Tuple[float, bool]: with Timer("记忆激活"): interested_rate = await hippocampus_manager.get_activate_from_text( message.processed_plain_text, + max_depth= 5, fast_retrieval=False, ) logger.debug(f"记忆激活率: {interested_rate:.2f}") @@ -147,6 +149,14 @@ class HeartFCMessageReceiver: # 如果消息中包含图片标识,则将 [picid:...] 替换为 [图片] picid_pattern = r"\[picid:([^\]]+)\]" processed_plain_text = re.sub(picid_pattern, "[图片]", message.processed_plain_text) + + # 应用用户引用格式替换,将回复和@格式转换为可读格式 + processed_plain_text = replace_user_references_in_content( + processed_plain_text, + message.message_info.platform, + is_async=False, + replace_bot_name=True + ) logger.info(f"[{mes_name}]{userinfo.user_nickname}:{processed_plain_text}[兴趣度:{interested_rate:.2f}]") # type: ignore diff --git a/src/chat/memory_system/Hippocampus.py b/src/chat/memory_system/Hippocampus.py index c1cf61797..13cf53f22 100644 --- a/src/chat/memory_system/Hippocampus.py +++ b/src/chat/memory_system/Hippocampus.py @@ -224,10 +224,15 @@ class Hippocampus: return hash((source, target)) @staticmethod - def find_topic_llm(text, topic_num): - # sourcery skip: inline-immediately-returned-variable + def find_topic_llm(text:str, topic_num:int|list[int]): + topic_num_str = "" + if isinstance(topic_num, list): + topic_num_str = f"{topic_num[0]}-{topic_num[1]}" + else: + topic_num_str = topic_num + prompt = ( - f"这是一段文字:\n{text}\n\n请你从这段话中总结出最多{topic_num}个关键的概念,可以是名词,动词,或者特定人物,帮我列出来," + f"这是一段文字:\n{text}\n\n请你从这段话中总结出最多{topic_num_str}个关键的概念,可以是名词,动词,或者特定人物,帮我列出来," f"将主题用逗号隔开,并加上<>,例如<主题1>,<主题2>......尽可能精简。只需要列举最多{topic_num}个话题就好,不要有序号,不要告诉我其他内容。" f"如果确定找不出主题或者没有明显主题,返回。" ) @@ -300,7 +305,7 @@ class Hippocampus: memories.sort(key=lambda x: x[2], reverse=True) return memories - async def get_keywords_from_text(self, text: str, fast_retrieval: bool = False) -> list: + async def get_keywords_from_text(self, text: str) -> list: """从文本中提取关键词。 Args: @@ -312,49 +317,45 @@ class Hippocampus: if not text: return [] - if fast_retrieval: - # 使用jieba分词提取关键词 + # 使用LLM提取关键词 - 根据详细文本长度分布优化topic_num计算 + text_length = len(text) + topic_num:str|list[int] = None + if text_length <= 5: words = jieba.cut(text) - # 过滤掉停用词和单字词 keywords = [word for word in words if len(word) > 1] - # 去重 - keywords = list(set(keywords)) - # 限制关键词数量 - logger.debug(f"提取关键词: {keywords}") - + keywords = list(set(keywords))[:3] # 限制最多3个关键词 + logger.info(f"提取关键词: {keywords}") + return keywords + elif text_length <= 10: + topic_num = [1,3] # 6-10字符: 1个关键词 (27.18%的文本) + elif text_length <= 20: + topic_num = [2,4] # 11-20字符: 2个关键词 (22.76%的文本) + elif text_length <= 30: + topic_num = [3,5] # 21-30字符: 3个关键词 (10.33%的文本) + elif text_length <= 50: + topic_num = [4,5] # 31-50字符: 4个关键词 (9.79%的文本) else: - # 使用LLM提取关键词 - 根据详细文本长度分布优化topic_num计算 - text_length = len(text) - if text_length <= 5: - topic_num = 1 # 1-5字符: 1个关键词 (26.57%的文本) - elif text_length <= 10: - topic_num = 1 # 6-10字符: 1个关键词 (27.18%的文本) - elif text_length <= 20: - topic_num = 2 # 11-20字符: 2个关键词 (22.76%的文本) - elif text_length <= 30: - topic_num = 3 # 21-30字符: 3个关键词 (10.33%的文本) - elif text_length <= 50: - topic_num = 4 # 31-50字符: 4个关键词 (9.79%的文本) - else: - topic_num = 5 # 51+字符: 5个关键词 (其余长文本) - - # logger.info(f"提取关键词数量: {topic_num}") - topics_response, (reasoning_content, model_name) = await self.model_summary.generate_response_async( - self.find_topic_llm(text, topic_num) - ) + topic_num = 5 # 51+字符: 5个关键词 (其余长文本) + + + topics_response, (reasoning_content, model_name) = await self.model_summary.generate_response_async( + self.find_topic_llm(text, topic_num) + ) - # 提取关键词 - keywords = re.findall(r"<([^>]+)>", topics_response) - if not keywords: - keywords = [] - else: - keywords = [ - keyword.strip() - for keyword in ",".join(keywords).replace(",", ",").replace("、", ",").replace(" ", ",").split(",") - if keyword.strip() - ] - - return keywords + # 提取关键词 + keywords = re.findall(r"<([^>]+)>", topics_response) + if not keywords: + keywords = [] + else: + keywords = [ + keyword.strip() + for keyword in ",".join(keywords).replace(",", ",").replace("、", ",").replace(" ", ",").split(",") + if keyword.strip() + ] + + logger.info(f"提取关键词: {keywords}") + + return keywords async def get_memory_from_text( @@ -382,7 +383,7 @@ class Hippocampus: - memory_items: list, 该主题下的记忆项列表 - similarity: float, 与文本的相似度 """ - keywords = await self.get_keywords_from_text(text, fast_retrieval) + keywords = await self.get_keywords_from_text(text) # 过滤掉不存在于记忆图中的关键词 valid_keywords = [keyword for keyword in keywords if keyword in self.memory_graph.G] @@ -704,7 +705,7 @@ class Hippocampus: Returns: float: 激活节点数与总节点数的比值 """ - keywords = await self.get_keywords_from_text(text, fast_retrieval) + keywords = await self.get_keywords_from_text(text) # 过滤掉不存在于记忆图中的关键词 valid_keywords = [keyword for keyword in keywords if keyword in self.memory_graph.G] @@ -721,7 +722,7 @@ class Hippocampus: for keyword in valid_keywords: logger.debug(f"开始以关键词 '{keyword}' 为中心进行扩散检索 (最大深度: {max_depth}):") # 初始化激活值 - activation_values = {keyword: 1.0} + activation_values = {keyword: 1.5} # 记录已访问的节点 visited_nodes = {keyword} # 待处理的节点队列,每个元素是(节点, 激活值, 当前深度) diff --git a/src/chat/replyer/default_generator.py b/src/chat/replyer/default_generator.py index 9d75671c6..efefa0934 100644 --- a/src/chat/replyer/default_generator.py +++ b/src/chat/replyer/default_generator.py @@ -17,7 +17,7 @@ from src.chat.message_receive.uni_message_sender import HeartFCSender from src.chat.utils.timer_calculator import Timer # <--- Import Timer from src.chat.utils.utils import get_chat_type_and_target_info from src.chat.utils.prompt_builder import Prompt, global_prompt_manager -from src.chat.utils.chat_message_builder import build_readable_messages, get_raw_msg_before_timestamp_with_chat +from src.chat.utils.chat_message_builder import build_readable_messages, get_raw_msg_before_timestamp_with_chat, replace_user_references_in_content from src.chat.express.expression_selector import expression_selector from src.chat.knowledge.knowledge_lib import qa_manager from src.chat.memory_system.memory_activator import MemoryActivator @@ -629,6 +629,14 @@ class DefaultReplyer: mood_prompt = "" sender, target = self._parse_reply_target(reply_to) + + target = replace_user_references_in_content( + target, + chat_stream.platform, + is_async=False, + replace_bot_name=True + ) + # 构建action描述 (如果启用planner) action_descriptions = "" diff --git a/src/chat/utils/chat_message_builder.py b/src/chat/utils/chat_message_builder.py index 3a08ca72b..22f56d1df 100644 --- a/src/chat/utils/chat_message_builder.py +++ b/src/chat/utils/chat_message_builder.py @@ -2,7 +2,7 @@ import time # 导入 time 模块以获取当前时间 import random import re -from typing import List, Dict, Any, Tuple, Optional +from typing import List, Dict, Any, Tuple, Optional, Union, Callable from rich.traceback import install from src.config.config import global_config @@ -15,6 +15,155 @@ from src.chat.utils.utils import translate_timestamp_to_human_readable,assign_me install(extra_lines=3) +def replace_user_references_in_content( + content: str, + platform: str, + name_resolver: Union[Callable[[str, str], str], Callable[[str, str], Any]] = None, + is_async: bool = False, + replace_bot_name: bool = True +) -> Union[str, Any]: + """ + 替换内容中的用户引用格式,包括回复和@格式 + + Args: + content: 要处理的内容字符串 + platform: 平台标识 + name_resolver: 名称解析函数,接收(platform, user_id)参数,返回用户名称 + 如果为None,则使用默认的person_info_manager + is_async: 是否为异步模式 + replace_bot_name: 是否将机器人的user_id替换为"机器人昵称(你)" + + Returns: + 处理后的内容字符串(同步模式)或awaitable对象(异步模式) + """ + if is_async: + return _replace_user_references_async(content, platform, name_resolver, replace_bot_name) + else: + return _replace_user_references_sync(content, platform, name_resolver, replace_bot_name) + + +def _replace_user_references_sync( + content: str, + platform: str, + name_resolver: Optional[Callable[[str, str], str]] = None, + replace_bot_name: bool = True +) -> str: + """同步版本的用户引用替换""" + if name_resolver is None: + person_info_manager = get_person_info_manager() + def default_resolver(platform: str, user_id: str) -> str: + # 检查是否是机器人自己 + if replace_bot_name and user_id == global_config.bot.qq_account: + return f"{global_config.bot.nickname}(你)" + person_id = PersonInfoManager.get_person_id(platform, user_id) + return person_info_manager.get_value_sync(person_id, "person_name") or user_id + name_resolver = default_resolver + + # 处理回复格式 + reply_pattern = r"回复<([^:<>]+):([^:<>]+)>" + match = re.search(reply_pattern, content) + if match: + aaa = match.group(1) + bbb = match.group(2) + try: + # 检查是否是机器人自己 + if replace_bot_name and bbb == global_config.bot.qq_account: + reply_person_name = f"{global_config.bot.nickname}(你)" + else: + reply_person_name = name_resolver(platform, bbb) or aaa + content = re.sub(reply_pattern, f"回复 {reply_person_name}", content, count=1) + except Exception: + # 如果解析失败,使用原始昵称 + content = re.sub(reply_pattern, f"回复 {aaa}", content, count=1) + + # 处理@格式 + at_pattern = r"@<([^:<>]+):([^:<>]+)>" + at_matches = list(re.finditer(at_pattern, content)) + if at_matches: + new_content = "" + last_end = 0 + for m in at_matches: + new_content += content[last_end:m.start()] + aaa = m.group(1) + bbb = m.group(2) + try: + # 检查是否是机器人自己 + if replace_bot_name and bbb == global_config.bot.qq_account: + at_person_name = f"{global_config.bot.nickname}(你)" + else: + at_person_name = name_resolver(platform, bbb) or aaa + new_content += f"@{at_person_name}" + except Exception: + # 如果解析失败,使用原始昵称 + new_content += f"@{aaa}" + last_end = m.end() + new_content += content[last_end:] + content = new_content + + return content + + +async def _replace_user_references_async( + content: str, + platform: str, + name_resolver: Optional[Callable[[str, str], Any]] = None, + replace_bot_name: bool = True +) -> str: + """异步版本的用户引用替换""" + if name_resolver is None: + person_info_manager = get_person_info_manager() + async def default_resolver(platform: str, user_id: str) -> str: + # 检查是否是机器人自己 + if replace_bot_name and user_id == global_config.bot.qq_account: + return f"{global_config.bot.nickname}(你)" + person_id = PersonInfoManager.get_person_id(platform, user_id) + return await person_info_manager.get_value(person_id, "person_name") or user_id + name_resolver = default_resolver + + # 处理回复格式 + reply_pattern = r"回复<([^:<>]+):([^:<>]+)>" + match = re.search(reply_pattern, content) + if match: + aaa = match.group(1) + bbb = match.group(2) + try: + # 检查是否是机器人自己 + if replace_bot_name and bbb == global_config.bot.qq_account: + reply_person_name = f"{global_config.bot.nickname}(你)" + else: + reply_person_name = await name_resolver(platform, bbb) or aaa + content = re.sub(reply_pattern, f"回复 {reply_person_name}", content, count=1) + except Exception: + # 如果解析失败,使用原始昵称 + content = re.sub(reply_pattern, f"回复 {aaa}", content, count=1) + + # 处理@格式 + at_pattern = r"@<([^:<>]+):([^:<>]+)>" + at_matches = list(re.finditer(at_pattern, content)) + if at_matches: + new_content = "" + last_end = 0 + for m in at_matches: + new_content += content[last_end:m.start()] + aaa = m.group(1) + bbb = m.group(2) + try: + # 检查是否是机器人自己 + if replace_bot_name and bbb == global_config.bot.qq_account: + at_person_name = f"{global_config.bot.nickname}(你)" + else: + at_person_name = await name_resolver(platform, bbb) or aaa + new_content += f"@{at_person_name}" + except Exception: + # 如果解析失败,使用原始昵称 + new_content += f"@{aaa}" + last_end = m.end() + new_content += content[last_end:] + content = new_content + + return content + + def get_raw_msg_by_timestamp( timestamp_start: float, timestamp_end: float, limit: int = 0, limit_mode: str = "latest" ) -> List[Dict[str, Any]]: @@ -374,33 +523,8 @@ def _build_readable_messages_internal( else: person_name = "某人" - # 检查是否有 回复 字段 - reply_pattern = r"回复<([^:<>]+):([^:<>]+)>" - match = re.search(reply_pattern, content) - if match: - aaa: str = match[1] - bbb: str = match[2] - reply_person_id = PersonInfoManager.get_person_id(platform, bbb) - reply_person_name = person_info_manager.get_value_sync(reply_person_id, "person_name") or aaa - # 在内容前加上回复信息 - content = re.sub(reply_pattern, lambda m, name=reply_person_name: f"回复 {name}", content, count=1) - - # 检查是否有 @ 字段 @<{member_info.get('nickname')}:{member_info.get('user_id')}> - at_pattern = r"@<([^:<>]+):([^:<>]+)>" - at_matches = list(re.finditer(at_pattern, content)) - if at_matches: - new_content = "" - last_end = 0 - for m in at_matches: - new_content += content[last_end : m.start()] - aaa = m.group(1) - bbb = m.group(2) - at_person_id = PersonInfoManager.get_person_id(platform, bbb) - at_person_name = person_info_manager.get_value_sync(at_person_id, "person_name") or aaa - new_content += f"@{at_person_name}" - last_end = m.end() - new_content += content[last_end:] - content = new_content + # 使用独立函数处理用户引用格式 + content = replace_user_references_in_content(content, platform, is_async=False, replace_bot_name=replace_bot_name) target_str = "这是QQ的一个功能,用于提及某人,但没那么明显" if target_str in content and random.random() < 0.6: @@ -916,38 +1040,14 @@ async def build_anonymous_messages(messages: List[Dict[str, Any]]) -> str: anon_name = get_anon_name(platform, user_id) # print(f"anon_name:{anon_name}") - # 处理 回复 - reply_pattern = r"回复<([^:<>]+):([^:<>]+)>" - match = re.search(reply_pattern, content) - if match: - # print(f"发现回复match:{match}") - bbb = match.group(2) + # 使用独立函数处理用户引用格式,传入自定义的匿名名称解析器 + def anon_name_resolver(platform: str, user_id: str) -> str: try: - anon_reply = get_anon_name(platform, bbb) - # print(f"anon_reply:{anon_reply}") + return get_anon_name(platform, user_id) except Exception: - anon_reply = "?" - content = re.sub(reply_pattern, f"回复 {anon_reply}", content, count=1) - - # 处理 @,无嵌套def - at_pattern = r"@<([^:<>]+):([^:<>]+)>" - at_matches = list(re.finditer(at_pattern, content)) - if at_matches: - # print(f"发现@match:{at_matches}") - new_content = "" - last_end = 0 - for m in at_matches: - new_content += content[last_end : m.start()] - bbb = m.group(2) - try: - anon_at = get_anon_name(platform, bbb) - # print(f"anon_at:{anon_at}") - except Exception: - anon_at = "?" - new_content += f"@{anon_at}" - last_end = m.end() - new_content += content[last_end:] - content = new_content + return "?" + + content = replace_user_references_in_content(content, platform, anon_name_resolver, is_async=False, replace_bot_name=False) header = f"{anon_name}说 " output_lines.append(header) diff --git a/src/mood/mood_manager.py b/src/mood/mood_manager.py index 38ed39bcc..eae0ea713 100644 --- a/src/mood/mood_manager.py +++ b/src/mood/mood_manager.py @@ -78,7 +78,7 @@ class ChatMood: if interested_rate <= 0: interest_multiplier = 0 else: - interest_multiplier = 3 * math.pow(interested_rate, 0.25) + interest_multiplier = 2 * math.pow(interested_rate, 0.25) logger.debug( f"base_probability: {base_probability}, time_multiplier: {time_multiplier}, interest_multiplier: {interest_multiplier}" diff --git a/src/plugin_system/apis/send_api.py b/src/plugin_system/apis/send_api.py index 352ccdb45..f7b3092ef 100644 --- a/src/plugin_system/apis/send_api.py +++ b/src/plugin_system/apis/send_api.py @@ -19,6 +19,7 @@ await send_api.custom_message("video", video_data, "123456", True) """ +import asyncio import traceback import time import difflib @@ -30,7 +31,7 @@ from src.common.logger import get_logger from src.chat.message_receive.chat_stream import get_chat_manager from src.chat.message_receive.uni_message_sender import HeartFCSender from src.chat.message_receive.message import MessageSending, MessageRecv -from src.chat.utils.chat_message_builder import get_raw_msg_before_timestamp_with_chat +from src.chat.utils.chat_message_builder import get_raw_msg_before_timestamp_with_chat, replace_user_references_in_content from src.person_info.person_info import get_person_info_manager from maim_message import Seg, UserInfo from src.config.config import global_config @@ -183,32 +184,8 @@ async def _find_reply_message(target_stream, reply_to: str) -> Optional[MessageR if person_name == sender: translate_text = message["processed_plain_text"] - # 检查是否有 回复 字段 - reply_pattern = r"回复<([^:<>]+):([^:<>]+)>" - if match := re.search(reply_pattern, translate_text): - aaa = match.group(1) - bbb = match.group(2) - reply_person_id = get_person_info_manager().get_person_id(platform, bbb) - reply_person_name = await get_person_info_manager().get_value(reply_person_id, "person_name") or aaa - # 在内容前加上回复信息 - translate_text = re.sub(reply_pattern, f"回复 {reply_person_name}", translate_text, count=1) - - # 检查是否有 @ 字段 - at_pattern = r"@<([^:<>]+):([^:<>]+)>" - at_matches = list(re.finditer(at_pattern, translate_text)) - if at_matches: - new_content = "" - last_end = 0 - for m in at_matches: - new_content += translate_text[last_end : m.start()] - aaa = m.group(1) - bbb = m.group(2) - at_person_id = get_person_info_manager().get_person_id(platform, bbb) - at_person_name = await get_person_info_manager().get_value(at_person_id, "person_name") or aaa - new_content += f"@{at_person_name}" - last_end = m.end() - new_content += translate_text[last_end:] - translate_text = new_content + # 使用独立函数处理用户引用格式 + translate_text = await replace_user_references_in_content(translate_text, platform, is_async=True) similarity = difflib.SequenceMatcher(None, text, translate_text).ratio() if similarity >= 0.9: