From 2dbe615057fb4d747bf762fd8f9ce2da69edc71c Mon Sep 17 00:00:00 2001 From: Windpicker-owo <3431391539@qq.com> Date: Thu, 6 Nov 2025 21:53:55 +0800 Subject: [PATCH] =?UTF-8?q?feat(memory-graph):=20=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E8=AE=B0=E5=BF=86=E5=9B=BE=E7=B3=BB=E7=BB=9F=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=EF=BC=8C=E6=95=B4=E5=90=88=E8=8A=82=E7=82=B9=E5=8E=BB=E9=87=8D?= =?UTF-8?q?=E5=90=88=E5=B9=B6=E5=99=A8=E4=B8=8E=E8=AE=B0=E5=BF=86=E7=AE=A1?= =?UTF-8?q?=E7=90=86=E5=99=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/official_configs.py | 53 +++++ src/memory_graph/config.py | 272 ------------------------- src/memory_graph/core/node_merger.py | 19 +- src/memory_graph/manager.py | 13 +- src/memory_graph/tools/memory_tools.py | 2 + 5 files changed, 74 insertions(+), 285 deletions(-) delete mode 100644 src/memory_graph/config.py diff --git a/src/config/official_configs.py b/src/config/official_configs.py index adb3062ee..3c0a701f4 100644 --- a/src/config/official_configs.py +++ b/src/config/official_configs.py @@ -401,6 +401,59 @@ class MemoryConfig(ValidatedConfigBase): memory_system_load_balancing: bool = Field(default=True, description="启用记忆系统负载均衡") memory_build_throttling: bool = Field(default=True, description="启用记忆构建节流") memory_priority_queue_enabled: bool = Field(default=True, description="启用记忆优先级队列") + + # === 记忆图系统配置 (Memory Graph System) === + # 新一代记忆系统的配置项 + enable: bool = Field(default=True, description="启用记忆图系统") + data_dir: str = Field(default="data/memory_graph", description="记忆数据存储目录") + + # 向量存储配置 + vector_collection_name: str = Field(default="memory_nodes", description="向量集合名称") + vector_db_path: str = Field(default="data/memory_graph/chroma_db", description="向量数据库路径") + + # 检索配置 + search_top_k: int = Field(default=10, description="默认检索返回数量") + search_min_importance: float = Field(default=0.3, description="最小重要性阈值") + search_similarity_threshold: float = Field(default=0.5, description="向量相似度阈值") + search_max_expand_depth: int = Field(default=2, description="检索时图扩展深度(0-3)") + enable_query_optimization: bool = Field(default=True, description="启用查询优化") + + # 检索权重配置 (记忆图系统) + search_vector_weight: float = Field(default=0.4, description="向量相似度权重") + search_graph_distance_weight: float = Field(default=0.2, description="图距离权重") + search_importance_weight: float = Field(default=0.2, description="重要性权重") + search_recency_weight: float = Field(default=0.2, description="时效性权重") + + # 记忆整合配置 + consolidation_enabled: bool = Field(default=False, description="是否启用记忆整合") + consolidation_interval_hours: float = Field(default=6.0, description="整合任务执行间隔(小时)") + consolidation_similarity_threshold: float = Field(default=0.92, description="相似记忆去重阈值") + consolidation_time_window_hours: float = Field(default=6.0, description="整合时间窗口(小时)") + consolidation_max_batch_size: int = Field(default=50, description="单次最多处理的记忆数量") + + # 自动关联配置 + auto_link_enabled: bool = Field(default=True, description="是否启用自动关联") + auto_link_max_candidates: int = Field(default=5, description="每个记忆最多关联候选数") + auto_link_min_confidence: float = Field(default=0.7, description="最低置信度阈值") + + # 遗忘配置 (记忆图系统) + forgetting_enabled: bool = Field(default=True, description="是否启用自动遗忘") + forgetting_activation_threshold: float = Field(default=0.1, description="激活度阈值") + forgetting_min_importance: float = Field(default=0.8, description="最小保护重要性") + + # 激活配置 + activation_decay_rate: float = Field(default=0.9, description="激活度衰减率") + activation_propagation_strength: float = Field(default=0.5, description="激活传播强度") + activation_propagation_depth: int = Field(default=2, description="激活传播深度") + + # 性能配置 + max_memory_nodes_per_memory: int = Field(default=10, description="每个记忆最多包含的节点数") + max_related_memories: int = Field(default=5, description="相关记忆最大数量") + + # 节点去重合并配置 + node_merger_similarity_threshold: float = Field(default=0.85, description="节点去重相似度阈值") + node_merger_context_match_required: bool = Field(default=True, description="节点合并是否要求上下文匹配") + node_merger_merge_batch_size: int = Field(default=50, description="节点合并批量处理大小") class MoodConfig(ValidatedConfigBase): diff --git a/src/memory_graph/config.py b/src/memory_graph/config.py deleted file mode 100644 index 4aa8c94da..000000000 --- a/src/memory_graph/config.py +++ /dev/null @@ -1,272 +0,0 @@ -""" -记忆图系统配置管理 -""" - -from __future__ import annotations - -from dataclasses import dataclass, field -from pathlib import Path -from typing import Dict, Optional - - -@dataclass -class ConsolidationConfig: - """记忆整理配置""" - - interval_hours: int = 6 # 整理间隔(小时) - batch_size: int = 100 # 每次处理记忆数量 - enable_auto_discovery: bool = True # 是否启用自动关联发现 - enable_conflict_detection: bool = True # 是否启用冲突检测 - - -@dataclass -class RetrievalConfig: - """记忆检索配置""" - - default_mode: str = "auto" # auto/fast/deep - max_expand_depth: int = 2 # 最大图扩展深度 - vector_weight: float = 0.4 # 向量相似度权重 - graph_distance_weight: float = 0.2 # 图距离权重 - importance_weight: float = 0.2 # 重要性权重 - recency_weight: float = 0.2 # 时效性权重 - - def __post_init__(self): - """验证权重总和""" - total = self.vector_weight + self.graph_distance_weight + self.importance_weight + self.recency_weight - if abs(total - 1.0) > 0.01: - raise ValueError(f"权重总和必须为1.0,当前为 {total}") - - -@dataclass -class NodeMergerConfig: - """节点去重配置""" - - similarity_threshold: float = 0.85 # 相似度阈值 - context_match_required: bool = True # 是否要求上下文匹配 - merge_batch_size: int = 50 # 批量处理大小 - - def __post_init__(self): - """验证阈值范围""" - if not 0.0 <= self.similarity_threshold <= 1.0: - raise ValueError(f"相似度阈值必须在 [0, 1] 范围内,当前为 {self.similarity_threshold}") - - -@dataclass -class StorageConfig: - """存储配置""" - - data_dir: Path = field(default_factory=lambda: Path("data/memory_graph")) - vector_collection_name: str = "memory_nodes" - graph_file_name: str = "memory_graph.json" - enable_persistence: bool = True # 是否启用持久化 - auto_save_interval: int = 300 # 自动保存间隔(秒) - - -@dataclass -class MemoryGraphConfig: - """记忆图系统总配置""" - - # 基础配置 - enable: bool = True # 是否启用记忆图系统 - data_dir: Path = field(default_factory=lambda: Path("data/memory_graph")) - - # 向量存储配置 - vector_collection_name: str = "memory_nodes" - vector_db_path: Path = field(default_factory=lambda: Path("data/memory_graph/chroma_db")) - - # 检索配置 - search_top_k: int = 10 - search_min_importance: float = 0.3 - search_similarity_threshold: float = 0.5 - enable_query_optimization: bool = True - - # 整合配置 - consolidation_enabled: bool = True - consolidation_interval_hours: float = 1.0 - consolidation_similarity_threshold: float = 0.85 - consolidation_time_window_hours: int = 24 - - # 自动关联配置 - auto_link_enabled: bool = True # 是否启用自动关联 - auto_link_max_candidates: int = 5 # 每个记忆最多关联候选数 - auto_link_min_confidence: float = 0.7 # 最低置信度阈值 - - # 遗忘配置 - forgetting_enabled: bool = True - forgetting_activation_threshold: float = 0.1 - forgetting_min_importance: float = 0.8 - - # 激活配置 - activation_decay_rate: float = 0.9 - activation_propagation_strength: float = 0.5 - activation_propagation_depth: int = 1 - - # 性能配置 - max_memory_nodes_per_memory: int = 10 - max_related_memories: int = 5 - - # 旧配置(向后兼容) - consolidation: ConsolidationConfig = field(default_factory=ConsolidationConfig) - retrieval: RetrievalConfig = field(default_factory=RetrievalConfig) - node_merger: NodeMergerConfig = field(default_factory=NodeMergerConfig) - storage: StorageConfig = field(default_factory=StorageConfig) - - # 时间衰减配置 - decay_rates: Dict[str, float] = field( - default_factory=lambda: { - "EVENT": 0.05, # 事件衰减较快 - "FACT": 0.01, # 事实衰减慢 - "RELATION": 0.005, # 关系衰减很慢 - "OPINION": 0.03, # 观点中等衰减 - } - ) - - # 嵌入模型配置 - embedding_model: Optional[str] = None # 如果为None,则使用系统默认 - embedding_dimension: int = 384 # 默认使用 sentence-transformers 的维度 - - # 调试和日志 - enable_debug_logging: bool = False - enable_visualization: bool = False # 是否启用记忆可视化 - - @classmethod - def from_bot_config(cls, bot_config) -> MemoryGraphConfig: - """从bot_config加载配置""" - try: - # 尝试获取配置(优先使用memory,兼容memory_graph) - if hasattr(bot_config, 'memory') and bot_config.memory is not None: - mg_config = bot_config.memory - elif hasattr(bot_config, 'memory_graph'): - mg_config = bot_config.memory_graph - - config = cls( - enable=getattr(mg_config, 'enable', True), - data_dir=Path(getattr(mg_config, 'data_dir', 'data/memory_graph')), - vector_collection_name=getattr(mg_config, 'vector_collection_name', 'memory_nodes'), - vector_db_path=Path(getattr(mg_config, 'vector_db_path', 'data/memory_graph/chroma_db')), - search_top_k=getattr(mg_config, 'search_top_k', 10), - search_min_importance=getattr(mg_config, 'search_min_importance', 0.3), - search_similarity_threshold=getattr(mg_config, 'search_similarity_threshold', 0.5), - enable_query_optimization=getattr(mg_config, 'enable_query_optimization', True), - consolidation_enabled=getattr(mg_config, 'consolidation_enabled', True), - consolidation_interval_hours=getattr(mg_config, 'consolidation_interval_hours', 1.0), - consolidation_similarity_threshold=getattr(mg_config, 'consolidation_similarity_threshold', 0.85), - consolidation_time_window_hours=getattr(mg_config, 'consolidation_time_window_hours', 24), - auto_link_enabled=getattr(mg_config, 'auto_link_enabled', True), - auto_link_max_candidates=getattr(mg_config, 'auto_link_max_candidates', 5), - auto_link_min_confidence=getattr(mg_config, 'auto_link_min_confidence', 0.7), - forgetting_enabled=getattr(mg_config, 'forgetting_enabled', True), - forgetting_activation_threshold=getattr(mg_config, 'forgetting_activation_threshold', 0.1), - forgetting_min_importance=getattr(mg_config, 'forgetting_min_importance', 0.8), - activation_decay_rate=getattr(mg_config, 'activation_decay_rate', 0.9), - activation_propagation_strength=getattr(mg_config, 'activation_propagation_strength', 0.5), - activation_propagation_depth=getattr(mg_config, 'activation_propagation_depth', 1), - max_memory_nodes_per_memory=getattr(mg_config, 'max_memory_nodes_per_memory', 10), - max_related_memories=getattr(mg_config, 'max_related_memories', 5), - # 检索配置 - retrieval=RetrievalConfig( - max_expand_depth=getattr(mg_config, 'search_max_expand_depth', 2), - vector_weight=getattr(mg_config, 'search_vector_weight', 0.4), - graph_distance_weight=getattr(mg_config, 'search_graph_distance_weight', 0.2), - importance_weight=getattr(mg_config, 'search_importance_weight', 0.2), - recency_weight=getattr(mg_config, 'search_recency_weight', 0.2), - ), - ) - - return config - else: - # 没有找到memory_graph配置,使用默认值 - return cls() - - except Exception as e: - import logging - logger = logging.getLogger(__name__) - logger.warning(f"从bot_config加载memory_graph配置失败,使用默认配置: {e}") - return cls() - - @classmethod - def from_dict(cls, config_dict: Dict) -> MemoryGraphConfig: - """从字典创建配置""" - return cls( - # 新配置字段 - enable=config_dict.get("enable", True), - data_dir=Path(config_dict.get("data_dir", "data/memory_graph")), - vector_collection_name=config_dict.get("vector_collection_name", "memory_nodes"), - vector_db_path=Path(config_dict.get("vector_db_path", "data/memory_graph/chroma_db")), - search_top_k=config_dict.get("search_top_k", 10), - search_min_importance=config_dict.get("search_min_importance", 0.3), - search_similarity_threshold=config_dict.get("search_similarity_threshold", 0.5), - enable_query_optimization=config_dict.get("enable_query_optimization", True), - consolidation_enabled=config_dict.get("consolidation_enabled", True), - consolidation_interval_hours=config_dict.get("consolidation_interval_hours", 1.0), - consolidation_similarity_threshold=config_dict.get("consolidation_similarity_threshold", 0.85), - consolidation_time_window_hours=config_dict.get("consolidation_time_window_hours", 24), - auto_link_enabled=config_dict.get("auto_link_enabled", True), - auto_link_max_candidates=config_dict.get("auto_link_max_candidates", 5), - auto_link_min_confidence=config_dict.get("auto_link_min_confidence", 0.7), - forgetting_enabled=config_dict.get("forgetting_enabled", True), - forgetting_activation_threshold=config_dict.get("forgetting_activation_threshold", 0.1), - forgetting_min_importance=config_dict.get("forgetting_min_importance", 0.8), - activation_decay_rate=config_dict.get("activation_decay_rate", 0.9), - activation_propagation_strength=config_dict.get("activation_propagation_strength", 0.5), - activation_propagation_depth=config_dict.get("activation_propagation_depth", 1), - max_memory_nodes_per_memory=config_dict.get("max_memory_nodes_per_memory", 10), - max_related_memories=config_dict.get("max_related_memories", 5), - # 旧配置字段(向后兼容) - consolidation=ConsolidationConfig(**config_dict.get("consolidation", {})), - retrieval=RetrievalConfig( - max_expand_depth=config_dict.get("search_max_expand_depth", 2), - vector_weight=config_dict.get("search_vector_weight", 0.4), - graph_distance_weight=config_dict.get("search_graph_distance_weight", 0.2), - importance_weight=config_dict.get("search_importance_weight", 0.2), - recency_weight=config_dict.get("search_recency_weight", 0.2), - **config_dict.get("retrieval", {}) - ), - node_merger=NodeMergerConfig(**config_dict.get("node_merger", {})), - storage=StorageConfig(**config_dict.get("storage", {})), - decay_rates=config_dict.get("decay_rates", cls().decay_rates), - embedding_model=config_dict.get("embedding_model"), - embedding_dimension=config_dict.get("embedding_dimension", 384), - enable_debug_logging=config_dict.get("enable_debug_logging", False), - enable_visualization=config_dict.get("enable_visualization", False), - ) - - def to_dict(self) -> Dict: - """转换为字典""" - return { - "consolidation": { - "interval_hours": self.consolidation.interval_hours, - "batch_size": self.consolidation.batch_size, - "enable_auto_discovery": self.consolidation.enable_auto_discovery, - "enable_conflict_detection": self.consolidation.enable_conflict_detection, - }, - "retrieval": { - "default_mode": self.retrieval.default_mode, - "max_expand_depth": self.retrieval.max_expand_depth, - "vector_weight": self.retrieval.vector_weight, - "graph_distance_weight": self.retrieval.graph_distance_weight, - "importance_weight": self.retrieval.importance_weight, - "recency_weight": self.retrieval.recency_weight, - }, - "node_merger": { - "similarity_threshold": self.node_merger.similarity_threshold, - "context_match_required": self.node_merger.context_match_required, - "merge_batch_size": self.node_merger.merge_batch_size, - }, - "storage": { - "data_dir": str(self.storage.data_dir), - "vector_collection_name": self.storage.vector_collection_name, - "graph_file_name": self.storage.graph_file_name, - "enable_persistence": self.storage.enable_persistence, - "auto_save_interval": self.storage.auto_save_interval, - }, - "decay_rates": self.decay_rates, - "embedding_model": self.embedding_model, - "embedding_dimension": self.embedding_dimension, - "enable_debug_logging": self.enable_debug_logging, - "enable_visualization": self.enable_visualization, - } - - -# 默认配置实例 -DEFAULT_CONFIG = MemoryGraphConfig() diff --git a/src/memory_graph/core/node_merger.py b/src/memory_graph/core/node_merger.py index 378aa5f83..e8b790f1e 100644 --- a/src/memory_graph/core/node_merger.py +++ b/src/memory_graph/core/node_merger.py @@ -4,12 +4,13 @@ from __future__ import annotations +from dataclasses import dataclass from typing import List, Optional, Tuple import numpy as np from src.common.logger import get_logger -from src.memory_graph.config import NodeMergerConfig +from src.config.official_configs import MemoryConfig from src.memory_graph.models import MemoryNode, NodeType from src.memory_graph.storage.graph_store import GraphStore from src.memory_graph.storage.vector_store import VectorStore @@ -31,7 +32,7 @@ class NodeMerger: self, vector_store: VectorStore, graph_store: GraphStore, - config: Optional[NodeMergerConfig] = None, + config: MemoryConfig, ): """ 初始化节点合并器 @@ -39,15 +40,15 @@ class NodeMerger: Args: vector_store: 向量存储 graph_store: 图存储 - config: 配置对象 + config: 记忆配置对象 """ self.vector_store = vector_store self.graph_store = graph_store - self.config = config or NodeMergerConfig() + self.config = config logger.info( - f"初始化节点合并器: threshold={self.config.similarity_threshold}, " - f"context_match={self.config.context_match_required}" + f"初始化节点合并器: threshold={self.config.node_merger_similarity_threshold}, " + f"context_match={self.config.node_merger_context_match_required}" ) async def find_similar_nodes( @@ -71,7 +72,7 @@ class NodeMerger: logger.warning(f"节点 {node.id} 没有 embedding,无法查找相似节点") return [] - threshold = threshold or self.config.similarity_threshold + threshold = threshold or self.config.node_merger_similarity_threshold try: # 在向量存储中搜索相似节点 @@ -121,7 +122,7 @@ class NodeMerger: 是否应该合并 """ # 1. 检查相似度阈值 - if similarity < self.config.similarity_threshold: + if similarity < self.config.node_merger_similarity_threshold: return False # 2. 非常高的相似度(>0.95)直接合并 @@ -130,7 +131,7 @@ class NodeMerger: return True # 3. 如果不要求上下文匹配,则通过相似度判断 - if not self.config.context_match_required: + if not self.config.node_merger_context_match_required: return True # 4. 检查上下文匹配 diff --git a/src/memory_graph/manager.py b/src/memory_graph/manager.py index 9c6ab4e94..5def3c2c9 100644 --- a/src/memory_graph/manager.py +++ b/src/memory_graph/manager.py @@ -15,6 +15,7 @@ from pathlib import Path from typing import Any, Dict, List, Optional, Set, Tuple from src.config.config import global_config +from src.config.official_configs import MemoryConfig from src.memory_graph.core.builder import MemoryBuilder from src.memory_graph.core.extractor import MemoryExtractor from src.memory_graph.models import Memory, MemoryEdge, MemoryNode, MemoryType, NodeType, EdgeType @@ -53,7 +54,7 @@ class MemoryManager: if not global_config.memory or not getattr(global_config.memory, 'enable', False): raise ValueError("记忆系统未启用,请在配置文件中启用 [memory] enable = true") - self.config = global_config.memory + self.config: MemoryConfig = global_config.memory self.data_dir = data_dir or Path(getattr(self.config, 'data_dir', 'data/memory_graph')) # 存储组件 @@ -132,12 +133,16 @@ class MemoryManager: embedding_generator=self.embedding_generator, ) + # 检查配置值 + expand_depth = self.config.search_max_expand_depth + logger.info(f"📊 配置检查: search_max_expand_depth={expand_depth}") + self.tools = MemoryTools( vector_store=self.vector_store, graph_store=self.graph_store, persistence_manager=self.persistence, embedding_generator=self.embedding_generator, - max_expand_depth=getattr(self.config, 'search_max_expand_depth', 1), # 从配置读取默认深度 + max_expand_depth=expand_depth, # 从配置读取图扩展深度 ) self._initialized = True @@ -433,7 +438,7 @@ class MemoryManager: min_importance: float = 0.0, include_forgotten: bool = False, use_multi_query: bool = True, - expand_depth: int = 1, + expand_depth: int | None = None, context: Optional[Dict[str, Any]] = None, ) -> List[Memory]: """ @@ -468,7 +473,7 @@ class MemoryManager: "query": query, "top_k": top_k, "use_multi_query": use_multi_query, - "expand_depth": expand_depth, # 传递图扩展深度 + "expand_depth": expand_depth or global_config.memory.search_max_expand_depth, # 传递图扩展深度 "context": context, } diff --git a/src/memory_graph/tools/memory_tools.py b/src/memory_graph/tools/memory_tools.py index e8b600540..798a36268 100644 --- a/src/memory_graph/tools/memory_tools.py +++ b/src/memory_graph/tools/memory_tools.py @@ -51,6 +51,8 @@ class MemoryTools: self.persistence_manager = persistence_manager self._initialized = False self.max_expand_depth = max_expand_depth # 保存配置的默认值 + + logger.info(f"MemoryTools 初始化: max_expand_depth={max_expand_depth}") # 初始化组件 self.extractor = MemoryExtractor()