feat(memory-graph): 添加记忆图系统配置,整合节点去重合并器与记忆管理器

This commit is contained in:
Windpicker-owo
2025-11-06 21:53:55 +08:00
parent 16be633dc2
commit 2dbe615057
5 changed files with 74 additions and 285 deletions

View File

@@ -402,6 +402,59 @@ class MemoryConfig(ValidatedConfigBase):
memory_build_throttling: bool = Field(default=True, description="启用记忆构建节流") memory_build_throttling: bool = Field(default=True, description="启用记忆构建节流")
memory_priority_queue_enabled: bool = Field(default=True, description="启用记忆优先级队列") memory_priority_queue_enabled: bool = Field(default=True, description="启用记忆优先级队列")
# === 记忆图系统配置 (Memory Graph System) ===
# 新一代记忆系统的配置项
enable: bool = Field(default=True, description="启用记忆图系统")
data_dir: str = Field(default="data/memory_graph", description="记忆数据存储目录")
# 向量存储配置
vector_collection_name: str = Field(default="memory_nodes", description="向量集合名称")
vector_db_path: str = Field(default="data/memory_graph/chroma_db", description="向量数据库路径")
# 检索配置
search_top_k: int = Field(default=10, description="默认检索返回数量")
search_min_importance: float = Field(default=0.3, description="最小重要性阈值")
search_similarity_threshold: float = Field(default=0.5, description="向量相似度阈值")
search_max_expand_depth: int = Field(default=2, description="检索时图扩展深度0-3")
enable_query_optimization: bool = Field(default=True, description="启用查询优化")
# 检索权重配置 (记忆图系统)
search_vector_weight: float = Field(default=0.4, description="向量相似度权重")
search_graph_distance_weight: float = Field(default=0.2, description="图距离权重")
search_importance_weight: float = Field(default=0.2, description="重要性权重")
search_recency_weight: float = Field(default=0.2, description="时效性权重")
# 记忆整合配置
consolidation_enabled: bool = Field(default=False, description="是否启用记忆整合")
consolidation_interval_hours: float = Field(default=6.0, description="整合任务执行间隔(小时)")
consolidation_similarity_threshold: float = Field(default=0.92, description="相似记忆去重阈值")
consolidation_time_window_hours: float = Field(default=6.0, description="整合时间窗口(小时)")
consolidation_max_batch_size: int = Field(default=50, description="单次最多处理的记忆数量")
# 自动关联配置
auto_link_enabled: bool = Field(default=True, description="是否启用自动关联")
auto_link_max_candidates: int = Field(default=5, description="每个记忆最多关联候选数")
auto_link_min_confidence: float = Field(default=0.7, description="最低置信度阈值")
# 遗忘配置 (记忆图系统)
forgetting_enabled: bool = Field(default=True, description="是否启用自动遗忘")
forgetting_activation_threshold: float = Field(default=0.1, description="激活度阈值")
forgetting_min_importance: float = Field(default=0.8, description="最小保护重要性")
# 激活配置
activation_decay_rate: float = Field(default=0.9, description="激活度衰减率")
activation_propagation_strength: float = Field(default=0.5, description="激活传播强度")
activation_propagation_depth: int = Field(default=2, description="激活传播深度")
# 性能配置
max_memory_nodes_per_memory: int = Field(default=10, description="每个记忆最多包含的节点数")
max_related_memories: int = Field(default=5, description="相关记忆最大数量")
# 节点去重合并配置
node_merger_similarity_threshold: float = Field(default=0.85, description="节点去重相似度阈值")
node_merger_context_match_required: bool = Field(default=True, description="节点合并是否要求上下文匹配")
node_merger_merge_batch_size: int = Field(default=50, description="节点合并批量处理大小")
class MoodConfig(ValidatedConfigBase): class MoodConfig(ValidatedConfigBase):
"""情绪配置类""" """情绪配置类"""

View File

@@ -1,272 +0,0 @@
"""
记忆图系统配置管理
"""
from __future__ import annotations
from dataclasses import dataclass, field
from pathlib import Path
from typing import Dict, Optional
@dataclass
class ConsolidationConfig:
"""记忆整理配置"""
interval_hours: int = 6 # 整理间隔(小时)
batch_size: int = 100 # 每次处理记忆数量
enable_auto_discovery: bool = True # 是否启用自动关联发现
enable_conflict_detection: bool = True # 是否启用冲突检测
@dataclass
class RetrievalConfig:
"""记忆检索配置"""
default_mode: str = "auto" # auto/fast/deep
max_expand_depth: int = 2 # 最大图扩展深度
vector_weight: float = 0.4 # 向量相似度权重
graph_distance_weight: float = 0.2 # 图距离权重
importance_weight: float = 0.2 # 重要性权重
recency_weight: float = 0.2 # 时效性权重
def __post_init__(self):
"""验证权重总和"""
total = self.vector_weight + self.graph_distance_weight + self.importance_weight + self.recency_weight
if abs(total - 1.0) > 0.01:
raise ValueError(f"权重总和必须为1.0,当前为 {total}")
@dataclass
class NodeMergerConfig:
"""节点去重配置"""
similarity_threshold: float = 0.85 # 相似度阈值
context_match_required: bool = True # 是否要求上下文匹配
merge_batch_size: int = 50 # 批量处理大小
def __post_init__(self):
"""验证阈值范围"""
if not 0.0 <= self.similarity_threshold <= 1.0:
raise ValueError(f"相似度阈值必须在 [0, 1] 范围内,当前为 {self.similarity_threshold}")
@dataclass
class StorageConfig:
"""存储配置"""
data_dir: Path = field(default_factory=lambda: Path("data/memory_graph"))
vector_collection_name: str = "memory_nodes"
graph_file_name: str = "memory_graph.json"
enable_persistence: bool = True # 是否启用持久化
auto_save_interval: int = 300 # 自动保存间隔(秒)
@dataclass
class MemoryGraphConfig:
"""记忆图系统总配置"""
# 基础配置
enable: bool = True # 是否启用记忆图系统
data_dir: Path = field(default_factory=lambda: Path("data/memory_graph"))
# 向量存储配置
vector_collection_name: str = "memory_nodes"
vector_db_path: Path = field(default_factory=lambda: Path("data/memory_graph/chroma_db"))
# 检索配置
search_top_k: int = 10
search_min_importance: float = 0.3
search_similarity_threshold: float = 0.5
enable_query_optimization: bool = True
# 整合配置
consolidation_enabled: bool = True
consolidation_interval_hours: float = 1.0
consolidation_similarity_threshold: float = 0.85
consolidation_time_window_hours: int = 24
# 自动关联配置
auto_link_enabled: bool = True # 是否启用自动关联
auto_link_max_candidates: int = 5 # 每个记忆最多关联候选数
auto_link_min_confidence: float = 0.7 # 最低置信度阈值
# 遗忘配置
forgetting_enabled: bool = True
forgetting_activation_threshold: float = 0.1
forgetting_min_importance: float = 0.8
# 激活配置
activation_decay_rate: float = 0.9
activation_propagation_strength: float = 0.5
activation_propagation_depth: int = 1
# 性能配置
max_memory_nodes_per_memory: int = 10
max_related_memories: int = 5
# 旧配置(向后兼容)
consolidation: ConsolidationConfig = field(default_factory=ConsolidationConfig)
retrieval: RetrievalConfig = field(default_factory=RetrievalConfig)
node_merger: NodeMergerConfig = field(default_factory=NodeMergerConfig)
storage: StorageConfig = field(default_factory=StorageConfig)
# 时间衰减配置
decay_rates: Dict[str, float] = field(
default_factory=lambda: {
"EVENT": 0.05, # 事件衰减较快
"FACT": 0.01, # 事实衰减慢
"RELATION": 0.005, # 关系衰减很慢
"OPINION": 0.03, # 观点中等衰减
}
)
# 嵌入模型配置
embedding_model: Optional[str] = None # 如果为None则使用系统默认
embedding_dimension: int = 384 # 默认使用 sentence-transformers 的维度
# 调试和日志
enable_debug_logging: bool = False
enable_visualization: bool = False # 是否启用记忆可视化
@classmethod
def from_bot_config(cls, bot_config) -> MemoryGraphConfig:
"""从bot_config加载配置"""
try:
# 尝试获取配置优先使用memory兼容memory_graph
if hasattr(bot_config, 'memory') and bot_config.memory is not None:
mg_config = bot_config.memory
elif hasattr(bot_config, 'memory_graph'):
mg_config = bot_config.memory_graph
config = cls(
enable=getattr(mg_config, 'enable', True),
data_dir=Path(getattr(mg_config, 'data_dir', 'data/memory_graph')),
vector_collection_name=getattr(mg_config, 'vector_collection_name', 'memory_nodes'),
vector_db_path=Path(getattr(mg_config, 'vector_db_path', 'data/memory_graph/chroma_db')),
search_top_k=getattr(mg_config, 'search_top_k', 10),
search_min_importance=getattr(mg_config, 'search_min_importance', 0.3),
search_similarity_threshold=getattr(mg_config, 'search_similarity_threshold', 0.5),
enable_query_optimization=getattr(mg_config, 'enable_query_optimization', True),
consolidation_enabled=getattr(mg_config, 'consolidation_enabled', True),
consolidation_interval_hours=getattr(mg_config, 'consolidation_interval_hours', 1.0),
consolidation_similarity_threshold=getattr(mg_config, 'consolidation_similarity_threshold', 0.85),
consolidation_time_window_hours=getattr(mg_config, 'consolidation_time_window_hours', 24),
auto_link_enabled=getattr(mg_config, 'auto_link_enabled', True),
auto_link_max_candidates=getattr(mg_config, 'auto_link_max_candidates', 5),
auto_link_min_confidence=getattr(mg_config, 'auto_link_min_confidence', 0.7),
forgetting_enabled=getattr(mg_config, 'forgetting_enabled', True),
forgetting_activation_threshold=getattr(mg_config, 'forgetting_activation_threshold', 0.1),
forgetting_min_importance=getattr(mg_config, 'forgetting_min_importance', 0.8),
activation_decay_rate=getattr(mg_config, 'activation_decay_rate', 0.9),
activation_propagation_strength=getattr(mg_config, 'activation_propagation_strength', 0.5),
activation_propagation_depth=getattr(mg_config, 'activation_propagation_depth', 1),
max_memory_nodes_per_memory=getattr(mg_config, 'max_memory_nodes_per_memory', 10),
max_related_memories=getattr(mg_config, 'max_related_memories', 5),
# 检索配置
retrieval=RetrievalConfig(
max_expand_depth=getattr(mg_config, 'search_max_expand_depth', 2),
vector_weight=getattr(mg_config, 'search_vector_weight', 0.4),
graph_distance_weight=getattr(mg_config, 'search_graph_distance_weight', 0.2),
importance_weight=getattr(mg_config, 'search_importance_weight', 0.2),
recency_weight=getattr(mg_config, 'search_recency_weight', 0.2),
),
)
return config
else:
# 没有找到memory_graph配置使用默认值
return cls()
except Exception as e:
import logging
logger = logging.getLogger(__name__)
logger.warning(f"从bot_config加载memory_graph配置失败使用默认配置: {e}")
return cls()
@classmethod
def from_dict(cls, config_dict: Dict) -> MemoryGraphConfig:
"""从字典创建配置"""
return cls(
# 新配置字段
enable=config_dict.get("enable", True),
data_dir=Path(config_dict.get("data_dir", "data/memory_graph")),
vector_collection_name=config_dict.get("vector_collection_name", "memory_nodes"),
vector_db_path=Path(config_dict.get("vector_db_path", "data/memory_graph/chroma_db")),
search_top_k=config_dict.get("search_top_k", 10),
search_min_importance=config_dict.get("search_min_importance", 0.3),
search_similarity_threshold=config_dict.get("search_similarity_threshold", 0.5),
enable_query_optimization=config_dict.get("enable_query_optimization", True),
consolidation_enabled=config_dict.get("consolidation_enabled", True),
consolidation_interval_hours=config_dict.get("consolidation_interval_hours", 1.0),
consolidation_similarity_threshold=config_dict.get("consolidation_similarity_threshold", 0.85),
consolidation_time_window_hours=config_dict.get("consolidation_time_window_hours", 24),
auto_link_enabled=config_dict.get("auto_link_enabled", True),
auto_link_max_candidates=config_dict.get("auto_link_max_candidates", 5),
auto_link_min_confidence=config_dict.get("auto_link_min_confidence", 0.7),
forgetting_enabled=config_dict.get("forgetting_enabled", True),
forgetting_activation_threshold=config_dict.get("forgetting_activation_threshold", 0.1),
forgetting_min_importance=config_dict.get("forgetting_min_importance", 0.8),
activation_decay_rate=config_dict.get("activation_decay_rate", 0.9),
activation_propagation_strength=config_dict.get("activation_propagation_strength", 0.5),
activation_propagation_depth=config_dict.get("activation_propagation_depth", 1),
max_memory_nodes_per_memory=config_dict.get("max_memory_nodes_per_memory", 10),
max_related_memories=config_dict.get("max_related_memories", 5),
# 旧配置字段(向后兼容)
consolidation=ConsolidationConfig(**config_dict.get("consolidation", {})),
retrieval=RetrievalConfig(
max_expand_depth=config_dict.get("search_max_expand_depth", 2),
vector_weight=config_dict.get("search_vector_weight", 0.4),
graph_distance_weight=config_dict.get("search_graph_distance_weight", 0.2),
importance_weight=config_dict.get("search_importance_weight", 0.2),
recency_weight=config_dict.get("search_recency_weight", 0.2),
**config_dict.get("retrieval", {})
),
node_merger=NodeMergerConfig(**config_dict.get("node_merger", {})),
storage=StorageConfig(**config_dict.get("storage", {})),
decay_rates=config_dict.get("decay_rates", cls().decay_rates),
embedding_model=config_dict.get("embedding_model"),
embedding_dimension=config_dict.get("embedding_dimension", 384),
enable_debug_logging=config_dict.get("enable_debug_logging", False),
enable_visualization=config_dict.get("enable_visualization", False),
)
def to_dict(self) -> Dict:
"""转换为字典"""
return {
"consolidation": {
"interval_hours": self.consolidation.interval_hours,
"batch_size": self.consolidation.batch_size,
"enable_auto_discovery": self.consolidation.enable_auto_discovery,
"enable_conflict_detection": self.consolidation.enable_conflict_detection,
},
"retrieval": {
"default_mode": self.retrieval.default_mode,
"max_expand_depth": self.retrieval.max_expand_depth,
"vector_weight": self.retrieval.vector_weight,
"graph_distance_weight": self.retrieval.graph_distance_weight,
"importance_weight": self.retrieval.importance_weight,
"recency_weight": self.retrieval.recency_weight,
},
"node_merger": {
"similarity_threshold": self.node_merger.similarity_threshold,
"context_match_required": self.node_merger.context_match_required,
"merge_batch_size": self.node_merger.merge_batch_size,
},
"storage": {
"data_dir": str(self.storage.data_dir),
"vector_collection_name": self.storage.vector_collection_name,
"graph_file_name": self.storage.graph_file_name,
"enable_persistence": self.storage.enable_persistence,
"auto_save_interval": self.storage.auto_save_interval,
},
"decay_rates": self.decay_rates,
"embedding_model": self.embedding_model,
"embedding_dimension": self.embedding_dimension,
"enable_debug_logging": self.enable_debug_logging,
"enable_visualization": self.enable_visualization,
}
# 默认配置实例
DEFAULT_CONFIG = MemoryGraphConfig()

View File

@@ -4,12 +4,13 @@
from __future__ import annotations from __future__ import annotations
from dataclasses import dataclass
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
import numpy as np import numpy as np
from src.common.logger import get_logger from src.common.logger import get_logger
from src.memory_graph.config import NodeMergerConfig from src.config.official_configs import MemoryConfig
from src.memory_graph.models import MemoryNode, NodeType from src.memory_graph.models import MemoryNode, NodeType
from src.memory_graph.storage.graph_store import GraphStore from src.memory_graph.storage.graph_store import GraphStore
from src.memory_graph.storage.vector_store import VectorStore from src.memory_graph.storage.vector_store import VectorStore
@@ -31,7 +32,7 @@ class NodeMerger:
self, self,
vector_store: VectorStore, vector_store: VectorStore,
graph_store: GraphStore, graph_store: GraphStore,
config: Optional[NodeMergerConfig] = None, config: MemoryConfig,
): ):
""" """
初始化节点合并器 初始化节点合并器
@@ -39,15 +40,15 @@ class NodeMerger:
Args: Args:
vector_store: 向量存储 vector_store: 向量存储
graph_store: 图存储 graph_store: 图存储
config: 配置对象 config: 记忆配置对象
""" """
self.vector_store = vector_store self.vector_store = vector_store
self.graph_store = graph_store self.graph_store = graph_store
self.config = config or NodeMergerConfig() self.config = config
logger.info( logger.info(
f"初始化节点合并器: threshold={self.config.similarity_threshold}, " f"初始化节点合并器: threshold={self.config.node_merger_similarity_threshold}, "
f"context_match={self.config.context_match_required}" f"context_match={self.config.node_merger_context_match_required}"
) )
async def find_similar_nodes( async def find_similar_nodes(
@@ -71,7 +72,7 @@ class NodeMerger:
logger.warning(f"节点 {node.id} 没有 embedding无法查找相似节点") logger.warning(f"节点 {node.id} 没有 embedding无法查找相似节点")
return [] return []
threshold = threshold or self.config.similarity_threshold threshold = threshold or self.config.node_merger_similarity_threshold
try: try:
# 在向量存储中搜索相似节点 # 在向量存储中搜索相似节点
@@ -121,7 +122,7 @@ class NodeMerger:
是否应该合并 是否应该合并
""" """
# 1. 检查相似度阈值 # 1. 检查相似度阈值
if similarity < self.config.similarity_threshold: if similarity < self.config.node_merger_similarity_threshold:
return False return False
# 2. 非常高的相似度(>0.95)直接合并 # 2. 非常高的相似度(>0.95)直接合并
@@ -130,7 +131,7 @@ class NodeMerger:
return True return True
# 3. 如果不要求上下文匹配,则通过相似度判断 # 3. 如果不要求上下文匹配,则通过相似度判断
if not self.config.context_match_required: if not self.config.node_merger_context_match_required:
return True return True
# 4. 检查上下文匹配 # 4. 检查上下文匹配

View File

@@ -15,6 +15,7 @@ from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple from typing import Any, Dict, List, Optional, Set, Tuple
from src.config.config import global_config from src.config.config import global_config
from src.config.official_configs import MemoryConfig
from src.memory_graph.core.builder import MemoryBuilder from src.memory_graph.core.builder import MemoryBuilder
from src.memory_graph.core.extractor import MemoryExtractor from src.memory_graph.core.extractor import MemoryExtractor
from src.memory_graph.models import Memory, MemoryEdge, MemoryNode, MemoryType, NodeType, EdgeType from src.memory_graph.models import Memory, MemoryEdge, MemoryNode, MemoryType, NodeType, EdgeType
@@ -53,7 +54,7 @@ class MemoryManager:
if not global_config.memory or not getattr(global_config.memory, 'enable', False): if not global_config.memory or not getattr(global_config.memory, 'enable', False):
raise ValueError("记忆系统未启用,请在配置文件中启用 [memory] enable = true") raise ValueError("记忆系统未启用,请在配置文件中启用 [memory] enable = true")
self.config = global_config.memory self.config: MemoryConfig = global_config.memory
self.data_dir = data_dir or Path(getattr(self.config, 'data_dir', 'data/memory_graph')) self.data_dir = data_dir or Path(getattr(self.config, 'data_dir', 'data/memory_graph'))
# 存储组件 # 存储组件
@@ -132,12 +133,16 @@ class MemoryManager:
embedding_generator=self.embedding_generator, embedding_generator=self.embedding_generator,
) )
# 检查配置值
expand_depth = self.config.search_max_expand_depth
logger.info(f"📊 配置检查: search_max_expand_depth={expand_depth}")
self.tools = MemoryTools( self.tools = MemoryTools(
vector_store=self.vector_store, vector_store=self.vector_store,
graph_store=self.graph_store, graph_store=self.graph_store,
persistence_manager=self.persistence, persistence_manager=self.persistence,
embedding_generator=self.embedding_generator, embedding_generator=self.embedding_generator,
max_expand_depth=getattr(self.config, 'search_max_expand_depth', 1), # 从配置读取默认深度 max_expand_depth=expand_depth, # 从配置读取图扩展深度
) )
self._initialized = True self._initialized = True
@@ -433,7 +438,7 @@ class MemoryManager:
min_importance: float = 0.0, min_importance: float = 0.0,
include_forgotten: bool = False, include_forgotten: bool = False,
use_multi_query: bool = True, use_multi_query: bool = True,
expand_depth: int = 1, expand_depth: int | None = None,
context: Optional[Dict[str, Any]] = None, context: Optional[Dict[str, Any]] = None,
) -> List[Memory]: ) -> List[Memory]:
""" """
@@ -468,7 +473,7 @@ class MemoryManager:
"query": query, "query": query,
"top_k": top_k, "top_k": top_k,
"use_multi_query": use_multi_query, "use_multi_query": use_multi_query,
"expand_depth": expand_depth, # 传递图扩展深度 "expand_depth": expand_depth or global_config.memory.search_max_expand_depth, # 传递图扩展深度
"context": context, "context": context,
} }

View File

@@ -52,6 +52,8 @@ class MemoryTools:
self._initialized = False self._initialized = False
self.max_expand_depth = max_expand_depth # 保存配置的默认值 self.max_expand_depth = max_expand_depth # 保存配置的默认值
logger.info(f"MemoryTools 初始化: max_expand_depth={max_expand_depth}")
# 初始化组件 # 初始化组件
self.extractor = MemoryExtractor() self.extractor = MemoryExtractor()
self.builder = MemoryBuilder( self.builder = MemoryBuilder(