diff --git a/src/config/config.py b/src/config/config.py index acbc0cb2a..ebfc444c1 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -234,6 +234,10 @@ class BotConfig: forget_memory_interval: int = 600 # 记忆遗忘间隔(秒) memory_forget_time: int = 24 # 记忆遗忘时间(小时) memory_forget_percentage: float = 0.01 # 记忆遗忘比例 + + consolidate_memory_interval: int = 1000 # 记忆整合间隔(秒) + consolidation_similarity_threshold: float = 0.7 # 相似度阈值 + consolidate_memory_percentage: float = 0.01 # 检查节点比例 memory_ban_words: list = field( default_factory=lambda: ["表情包", "图片", "回复", "聊天记录"] @@ -594,6 +598,16 @@ class BotConfig: config.build_memory_sample_length = memory_config.get( "build_memory_sample_length", config.build_memory_sample_length ) + if config.INNER_VERSION in SpecifierSet(">=1.5.1"): + config.consolidate_memory_interval = memory_config.get( + "consolidate_memory_interval", config.consolidate_memory_interval + ) + config.consolidation_similarity_threshold = memory_config.get( + "consolidation_similarity_threshold", config.consolidation_similarity_threshold + ) + config.consolidate_memory_percentage = memory_config.get( + "consolidate_memory_percentage", config.consolidate_memory_percentage + ) def remote(parent: dict): remote_config = parent["remote"] diff --git a/src/main.py b/src/main.py index 4e9c2abc4..047e075f4 100644 --- a/src/main.py +++ b/src/main.py @@ -121,6 +121,7 @@ class MainSystem: tasks = [ self.build_memory_task(), self.forget_memory_task(), + self.consolidate_memory_task(), self.print_mood_task(), self.remove_recalled_message_task(), emoji_manager.start_periodic_check_register(), @@ -145,6 +146,15 @@ class MainSystem: print("\033[1;32m[记忆遗忘]\033[0m 开始遗忘记忆...") await HippocampusManager.get_instance().forget_memory(percentage=global_config.memory_forget_percentage) print("\033[1;32m[记忆遗忘]\033[0m 记忆遗忘完成") + + @staticmethod + async def consolidate_memory_task(): + """记忆整合任务""" + while True: + await asyncio.sleep(global_config.consolidate_memory_interval) + print("\033[1;32m[记忆整合]\033[0m 开始整合记忆...") + await HippocampusManager.get_instance().consolidate_memory() + print("\033[1;32m[记忆整合]\033[0m 记忆整合完成") async def print_mood_task(self): """打印情绪状态""" diff --git a/src/plugins/heartFC_chat/heartflow_prompt_builder.py b/src/plugins/heartFC_chat/heartflow_prompt_builder.py index a0fb6d88b..d876d6abd 100644 --- a/src/plugins/heartFC_chat/heartflow_prompt_builder.py +++ b/src/plugins/heartFC_chat/heartflow_prompt_builder.py @@ -221,9 +221,6 @@ class PromptBuilder: moderation_prompt=await global_prompt_manager.get_prompt_async("moderation_prompt"), ) - prompt = await relationship_manager.convert_all_person_sign_to_person_name(prompt) - prompt = parse_text_timestamps(prompt, mode="lite") - return prompt async def _build_prompt_normal(self, chat_stream, message_txt: str, sender_name: str = "某人") -> tuple[str, str]: diff --git a/src/plugins/memory_system/Hippocampus.py b/src/plugins/memory_system/Hippocampus.py index 710fc1421..5cca0d074 100644 --- a/src/plugins/memory_system/Hippocampus.py +++ b/src/plugins/memory_system/Hippocampus.py @@ -195,6 +195,7 @@ class Hippocampus: self.config = None def initialize(self, global_config): + # 使用导入的 MemoryConfig dataclass 和其 from_global_config 方法 self.config = MemoryConfig.from_global_config(global_config) # 初始化子组件 self.entorhinal_cortex = EntorhinalCortex(self) @@ -237,7 +238,7 @@ class Hippocampus: # 不再需要 time_info 参数 prompt = ( f'这是一段文字:\n{text}\n\n我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,' - f"可以包含时间和人物,以及具体的观点。只输出这句话就好" + f"要求包含对这个概念的定义,内容,知识,可以包含时间和人物。只输出这句话就好" ) return prompt @@ -795,7 +796,7 @@ class EntorhinalCortex: def get_memory_sample(self): """从数据库获取记忆样本""" # 硬编码:每条消息最大记忆次数 - max_memorized_time_per_msg = 3 + max_memorized_time_per_msg = 2 # 创建双峰分布的记忆调度器 sample_scheduler = MemoryBuildScheduler( @@ -1337,26 +1338,56 @@ class ParahippocampalGyrus: logger.info("[遗忘] 开始检查节点...") node_check_start = time.time() for node in nodes_to_check: + # 检查节点是否存在,以防在迭代中被移除(例如边移除导致) + if node not in self.memory_graph.G: + continue + node_data = self.memory_graph.G.nodes[node] + + # 首先获取记忆项 + memory_items = node_data.get("memory_items", []) + if not isinstance(memory_items, list): + memory_items = [memory_items] if memory_items else [] + + # 新增:检查节点是否为空 + if not memory_items: + try: + self.memory_graph.G.remove_node(node) + node_changes["removed"].append(f"{node}(空节点)") # 标记为空节点移除 + logger.debug(f"[遗忘] 移除了空的节点: {node}") + except nx.NetworkXError as e: + logger.warning(f"[遗忘] 移除空节点 {node} 时发生错误(可能已被移除): {e}") + continue # 处理下一个节点 + + # --- 如果节点不为空,则执行原来的不活跃检查和随机移除逻辑 --- last_modified = node_data.get("last_modified", current_time) - + # 条件1:检查是否长时间未修改 (超过24小时) if current_time - last_modified > 3600 * 24: - memory_items = node_data.get("memory_items", []) - if not isinstance(memory_items, list): - memory_items = [memory_items] if memory_items else [] - + # 条件2:再次确认节点包含记忆项(理论上已确认,但作为保险) if memory_items: current_count = len(memory_items) - removed_item = random.choice(memory_items) - memory_items.remove(removed_item) + # 如果列表非空,才进行随机选择 + if current_count > 0: + removed_item = random.choice(memory_items) + try: + memory_items.remove(removed_item) - if memory_items: - self.memory_graph.G.nodes[node]["memory_items"] = memory_items - self.memory_graph.G.nodes[node]["last_modified"] = current_time - node_changes["reduced"].append(f"{node} (数量: {current_count} -> {len(memory_items)})") - else: - self.memory_graph.G.remove_node(node) - node_changes["removed"].append(node) + # 条件3:检查移除后 memory_items 是否变空 + if memory_items: # 如果移除后列表不为空 + # self.memory_graph.G.nodes[node]["memory_items"] = memory_items # 直接修改列表即可 + self.memory_graph.G.nodes[node]["last_modified"] = current_time # 更新修改时间 + node_changes["reduced"].append(f"{node} (数量: {current_count} -> {len(memory_items)})") + else: # 如果移除后列表为空 + # 尝试移除节点,处理可能的错误 + try: + self.memory_graph.G.remove_node(node) + node_changes["removed"].append(f"{node}(遗忘清空)") # 标记为遗忘清空 + logger.debug(f"[遗忘] 节点 {node} 因移除最后一项而被清空。") + except nx.NetworkXError as e: + logger.warning(f"[遗忘] 尝试移除节点 {node} 时发生错误(可能已被移除):{e}") + except ValueError: + # 这个错误理论上不应发生,因为 removed_item 来自 memory_items + logger.warning(f"[遗忘] 尝试从节点 '{node}' 移除不存在的项目 '{removed_item[:30]}...'") node_check_end = time.time() logger.info(f"[遗忘] 节点检查耗时: {node_check_end - node_check_start:.2f}秒") @@ -1395,6 +1426,116 @@ class ParahippocampalGyrus: end_time = time.time() logger.info(f"[遗忘] 总耗时: {end_time - start_time:.2f}秒") + async def operation_consolidate_memory(self): + """整合记忆:合并节点内相似的记忆项""" + start_time = time.time() + percentage = self.config.consolidate_memory_percentage + similarity_threshold = self.config.consolidation_similarity_threshold + logger.info(f"[整合] 开始检查记忆节点... 检查比例: {percentage:.2%}, 合并阈值: {similarity_threshold}") + + # 获取所有至少有2条记忆项的节点 + eligible_nodes = [] + for node, data in self.memory_graph.G.nodes(data=True): + memory_items = data.get("memory_items", []) + if isinstance(memory_items, list) and len(memory_items) >= 2: + eligible_nodes.append(node) + + if not eligible_nodes: + logger.info("[整合] 没有找到包含多个记忆项的节点,无需整合。") + return + + # 计算需要检查的节点数量 + check_nodes_count = max(1, min(len(eligible_nodes), int(len(eligible_nodes) * percentage))) + + # 随机抽取节点进行检查 + try: + nodes_to_check = random.sample(eligible_nodes, check_nodes_count) + except ValueError as e: + logger.error(f"[整合] 抽样节点时出错: {e}") + return + + logger.info(f"[整合] 将检查 {len(nodes_to_check)} / {len(eligible_nodes)} 个符合条件的节点。") + + merged_count = 0 + nodes_modified = set() + current_timestamp = datetime.datetime.now().timestamp() + + for node in nodes_to_check: + node_data = self.memory_graph.G.nodes[node] + memory_items = node_data.get("memory_items", []) + if not isinstance(memory_items, list) or len(memory_items) < 2: + continue # 双重检查,理论上不会进入 + + items_copy = list(memory_items) # 创建副本以安全迭代和修改 + + # 遍历所有记忆项组合 + for item1, item2 in combinations(items_copy, 2): + # 确保 item1 和 item2 仍然存在于原始列表中(可能已被之前的合并移除) + if item1 not in memory_items or item2 not in memory_items: + continue + + similarity = self._calculate_item_similarity(item1, item2) + + if similarity >= similarity_threshold: + logger.debug(f"[整合] 节点 '{node}' 中发现相似项 (相似度: {similarity:.2f}):") + logger.trace(f" - '{item1}'") + logger.trace(f" - '{item2}'") + + # 比较信息量 + info1 = calculate_information_content(item1) + info2 = calculate_information_content(item2) + + if info1 >= info2: + item_to_keep = item1 + item_to_remove = item2 + else: + item_to_keep = item2 + item_to_remove = item1 + + # 从原始列表中移除信息量较低的项 + try: + memory_items.remove(item_to_remove) + logger.info(f"[整合] 已合并节点 '{node}' 中的记忆,保留: '{item_to_keep[:60]}...', 移除: '{item_to_remove[:60]}...'" ) + merged_count += 1 + nodes_modified.add(node) + node_data['last_modified'] = current_timestamp # 更新修改时间 + _merged_in_this_node = True + break # 每个节点每次检查只合并一对 + except ValueError: + # 如果项已经被移除(例如,在之前的迭代中作为 item_to_keep),则跳过 + logger.warning(f"[整合] 尝试移除节点 '{node}' 中不存在的项 '{item_to_remove[:30]}...',可能已被合并。") + continue + # # 如果节点内发生了合并,更新节点数据 (这种方式不安全,会丢失其他属性) + # if merged_in_this_node: + # self.memory_graph.G.nodes[node]["memory_items"] = memory_items + + + if merged_count > 0: + logger.info(f"[整合] 共合并了 {merged_count} 对相似记忆项,分布在 {len(nodes_modified)} 个节点中。") + sync_start = time.time() + logger.info("[整合] 开始将变更同步到数据库...") + # 使用 resync 更安全地处理删除和添加 + await self.hippocampus.entorhinal_cortex.resync_memory_to_db() + sync_end = time.time() + logger.info(f"[整合] 数据库同步耗时: {sync_end - sync_start:.2f}秒") + else: + logger.info("[整合] 本次检查未发现需要合并的记忆项。") + + end_time = time.time() + logger.info(f"[整合] 整合检查完成,总耗时: {end_time - start_time:.2f}秒") + + @staticmethod + def _calculate_item_similarity(item1: str, item2: str) -> float: + """计算两条记忆项文本的余弦相似度""" + words1 = set(jieba.cut(item1)) + words2 = set(jieba.cut(item2)) + all_words = words1 | words2 + if not all_words: + return 0.0 + v1 = [1 if word in words1 else 0 for word in all_words] + v2 = [1 if word in words2 else 0 for word in all_words] + return cosine_similarity(v1, v2) + class HippocampusManager: _instance = None @@ -1433,12 +1574,12 @@ class HippocampusManager: edge_count = len(memory_graph.edges()) logger.success(f"""-------------------------------- - 记忆系统参数配置: - 构建间隔: {global_config.build_memory_interval}秒|样本数: {config.build_memory_sample_num},长度: {config.build_memory_sample_length}|压缩率: {config.memory_compress_rate} - 记忆构建分布: {config.memory_build_distribution} - 遗忘间隔: {global_config.forget_memory_interval}秒|遗忘比例: {global_config.memory_forget_percentage}|遗忘: {config.memory_forget_time}小时之后 - 记忆图统计信息: 节点数量: {node_count}, 连接数量: {edge_count} - --------------------------------""") # noqa: E501 + 记忆系统参数配置: + 构建间隔: {global_config.build_memory_interval}秒|样本数: {config.build_memory_sample_num},长度: {config.build_memory_sample_length}|压缩率: {config.memory_compress_rate} + 记忆构建分布: {config.memory_build_distribution} + 遗忘间隔: {global_config.forget_memory_interval}秒|遗忘比例: {global_config.memory_forget_percentage}|遗忘: {config.memory_forget_time}小时之后 + 记忆图统计信息: 节点数量: {node_count}, 连接数量: {edge_count} + --------------------------------""") # noqa: E501 return self._hippocampus @@ -1453,6 +1594,14 @@ class HippocampusManager: if not self._initialized: raise RuntimeError("HippocampusManager 尚未初始化,请先调用 initialize 方法") return await self._hippocampus.parahippocampal_gyrus.operation_forget_topic(percentage) + + async def consolidate_memory(self): + """整合记忆的公共接口""" + if not self._initialized: + raise RuntimeError("HippocampusManager 尚未初始化,请先调用 initialize 方法") + # 注意:目前 operation_consolidate_memory 内部直接读取配置,percentage 参数暂时无效 + # 如果需要外部控制比例,需要修改 operation_consolidate_memory + return await self._hippocampus.parahippocampal_gyrus.operation_consolidate_memory() async def get_memory_from_text( self, diff --git a/src/plugins/memory_system/memory_config.py b/src/plugins/memory_system/memory_config.py index 73f9c1dbd..8f7e1ffe7 100644 --- a/src/plugins/memory_system/memory_config.py +++ b/src/plugins/memory_system/memory_config.py @@ -18,19 +18,29 @@ class MemoryConfig: # 记忆过滤相关配置 memory_ban_words: List[str] # 记忆过滤词列表 + # 新增:记忆整合相关配置 + consolidation_similarity_threshold: float # 相似度阈值 + consolidate_memory_percentage: float # 检查节点比例 + consolidate_memory_interval: int # 记忆整合间隔 + llm_topic_judge: str # 话题判断模型 llm_summary_by_topic: str # 话题总结模型 @classmethod def from_global_config(cls, global_config): """从全局配置创建记忆系统配置""" + # 使用 getattr 提供默认值,防止全局配置缺少这些项 return cls( - memory_build_distribution=global_config.memory_build_distribution, - build_memory_sample_num=global_config.build_memory_sample_num, - build_memory_sample_length=global_config.build_memory_sample_length, - memory_compress_rate=global_config.memory_compress_rate, - memory_forget_time=global_config.memory_forget_time, - memory_ban_words=global_config.memory_ban_words, - llm_topic_judge=global_config.llm_topic_judge, - llm_summary_by_topic=global_config.llm_summary_by_topic, + memory_build_distribution=getattr(global_config, "memory_build_distribution", (24, 12, 0.5, 168, 72, 0.5)), # 添加默认值 + build_memory_sample_num=getattr(global_config, "build_memory_sample_num", 5), + build_memory_sample_length=getattr(global_config, "build_memory_sample_length", 30), + memory_compress_rate=getattr(global_config, "memory_compress_rate", 0.1), + memory_forget_time=getattr(global_config, "memory_forget_time", 24 * 7), + memory_ban_words=getattr(global_config, "memory_ban_words", []), + # 新增加载整合配置,并提供默认值 + consolidation_similarity_threshold=getattr(global_config, "consolidation_similarity_threshold", 0.7), + consolidate_memory_percentage=getattr(global_config, "consolidate_memory_percentage", 0.01), + consolidate_memory_interval=getattr(global_config, "consolidate_memory_interval", 1000), + llm_topic_judge=getattr(global_config, "llm_topic_judge", "default_judge_model"), # 添加默认模型名 + llm_summary_by_topic=getattr(global_config, "llm_summary_by_topic", "default_summary_model"), # 添加默认模型名 ) diff --git a/src/plugins/person_info/relationship_manager.py b/src/plugins/person_info/relationship_manager.py index 2400da8be..6ae7c16e3 100644 --- a/src/plugins/person_info/relationship_manager.py +++ b/src/plugins/person_info/relationship_manager.py @@ -101,36 +101,6 @@ class RelationshipManager: # await person_info_manager.update_one_field(person_id, "user_avatar", user_avatar) await person_info_manager.qv_person_name(person_id, user_nickname, user_cardname, user_avatar) - @staticmethod - async def convert_all_person_sign_to_person_name(input_text: str): - """将所有人的格式转换为person_name""" - try: - # 使用正则表达式匹配格式 - all_person = person_info_manager.person_name_list - - pattern = r"<([^:]+):(\d+):([^:]+):([^>]+)>" - matches = re.findall(pattern, input_text) - - # 遍历匹配结果,将替换为person_name - result_text = input_text - for platform, user_id, nickname, cardname in matches: - person_id = person_info_manager.get_person_id(platform, user_id) - # 默认使用昵称作为人名 - person_name = nickname.strip() if nickname.strip() else cardname.strip() - - if person_id in all_person: - if all_person[person_id] is not None: - person_name = all_person[person_id] - - # print(f"将<{platform}:{user_id}:{nickname}:{cardname}>替换为{person_name}") - - result_text = result_text.replace(f"<{platform}:{user_id}:{nickname}:{cardname}>", person_name) - - return result_text - except Exception: - logger.error(traceback.format_exc()) - return input_text - async def calculate_update_relationship_value(self, chat_stream: ChatStream, label: str, stance: str) -> tuple: """计算并变更关系值 新的关系值变更计算方式: diff --git a/template/bot_config_template.toml b/template/bot_config_template.toml index 03eb0e6ca..486c150ff 100644 --- a/template/bot_config_template.toml +++ b/template/bot_config_template.toml @@ -1,5 +1,5 @@ [inner] -version = "1.5.0" +version = "1.5.1" #----以下是给开发人员阅读的,如果你只是部署了麦麦,不需要阅读---- #如果你想要修改配置文件,请在修改后将version的值进行变更 @@ -127,15 +127,19 @@ check_prompt = "符合公序良俗" # 表情包过滤要求,只有符合该要 [memory] build_memory_interval = 2000 # 记忆构建间隔 单位秒 间隔越低,麦麦学习越多,但是冗余信息也会增多 -build_memory_distribution = [4.0,2.0,0.6,24.0,8.0,0.4] # 记忆构建分布,参数:分布1均值,标准差,权重,分布2均值,标准差,权重 -build_memory_sample_num = 10 # 采样数量,数值越高记忆采样次数越多 -build_memory_sample_length = 20 # 采样长度,数值越高一段记忆内容越丰富 +build_memory_distribution = [6.0,3.0,0.6,32.0,12.0,0.4] # 记忆构建分布,参数:分布1均值,标准差,权重,分布2均值,标准差,权重 +build_memory_sample_num = 8 # 采样数量,数值越高记忆采样次数越多 +build_memory_sample_length = 40 # 采样长度,数值越高一段记忆内容越丰富 memory_compress_rate = 0.1 # 记忆压缩率 控制记忆精简程度 建议保持默认,调高可以获得更多信息,但是冗余信息也会增多 forget_memory_interval = 1000 # 记忆遗忘间隔 单位秒 间隔越低,麦麦遗忘越频繁,记忆更精简,但更难学习 memory_forget_time = 24 #多长时间后的记忆会被遗忘 单位小时 memory_forget_percentage = 0.01 # 记忆遗忘比例 控制记忆遗忘程度 越大遗忘越多 建议保持默认 +consolidate_memory_interval = 1000 # 记忆整合间隔 单位秒 间隔越低,麦麦整合越频繁,记忆更精简 +consolidation_similarity_threshold = 0.7 # 相似度阈值 +consolidation_check_percentage = 0.01 # 检查节点比例 + #不希望记忆的词,已经记忆的不会受到影响 memory_ban_words = [ # "403","张三"