feat(memory-graph): 添加记忆图系统配置,整合节点去重合并器与记忆管理器

This commit is contained in:
Windpicker-owo
2025-11-06 21:53:55 +08:00
parent 28d41acc51
commit a2ce020099
5 changed files with 74 additions and 285 deletions

View File

@@ -401,6 +401,59 @@ class MemoryConfig(ValidatedConfigBase):
memory_system_load_balancing: bool = Field(default=True, description="启用记忆系统负载均衡")
memory_build_throttling: bool = Field(default=True, description="启用记忆构建节流")
memory_priority_queue_enabled: bool = Field(default=True, description="启用记忆优先级队列")
# === 记忆图系统配置 (Memory Graph System) ===
# 新一代记忆系统的配置项
enable: bool = Field(default=True, description="启用记忆图系统")
data_dir: str = Field(default="data/memory_graph", description="记忆数据存储目录")
# 向量存储配置
vector_collection_name: str = Field(default="memory_nodes", description="向量集合名称")
vector_db_path: str = Field(default="data/memory_graph/chroma_db", description="向量数据库路径")
# 检索配置
search_top_k: int = Field(default=10, description="默认检索返回数量")
search_min_importance: float = Field(default=0.3, description="最小重要性阈值")
search_similarity_threshold: float = Field(default=0.5, description="向量相似度阈值")
search_max_expand_depth: int = Field(default=2, description="检索时图扩展深度0-3")
enable_query_optimization: bool = Field(default=True, description="启用查询优化")
# 检索权重配置 (记忆图系统)
search_vector_weight: float = Field(default=0.4, description="向量相似度权重")
search_graph_distance_weight: float = Field(default=0.2, description="图距离权重")
search_importance_weight: float = Field(default=0.2, description="重要性权重")
search_recency_weight: float = Field(default=0.2, description="时效性权重")
# 记忆整合配置
consolidation_enabled: bool = Field(default=False, description="是否启用记忆整合")
consolidation_interval_hours: float = Field(default=6.0, description="整合任务执行间隔(小时)")
consolidation_similarity_threshold: float = Field(default=0.92, description="相似记忆去重阈值")
consolidation_time_window_hours: float = Field(default=6.0, description="整合时间窗口(小时)")
consolidation_max_batch_size: int = Field(default=50, description="单次最多处理的记忆数量")
# 自动关联配置
auto_link_enabled: bool = Field(default=True, description="是否启用自动关联")
auto_link_max_candidates: int = Field(default=5, description="每个记忆最多关联候选数")
auto_link_min_confidence: float = Field(default=0.7, description="最低置信度阈值")
# 遗忘配置 (记忆图系统)
forgetting_enabled: bool = Field(default=True, description="是否启用自动遗忘")
forgetting_activation_threshold: float = Field(default=0.1, description="激活度阈值")
forgetting_min_importance: float = Field(default=0.8, description="最小保护重要性")
# 激活配置
activation_decay_rate: float = Field(default=0.9, description="激活度衰减率")
activation_propagation_strength: float = Field(default=0.5, description="激活传播强度")
activation_propagation_depth: int = Field(default=2, description="激活传播深度")
# 性能配置
max_memory_nodes_per_memory: int = Field(default=10, description="每个记忆最多包含的节点数")
max_related_memories: int = Field(default=5, description="相关记忆最大数量")
# 节点去重合并配置
node_merger_similarity_threshold: float = Field(default=0.85, description="节点去重相似度阈值")
node_merger_context_match_required: bool = Field(default=True, description="节点合并是否要求上下文匹配")
node_merger_merge_batch_size: int = Field(default=50, description="节点合并批量处理大小")
class MoodConfig(ValidatedConfigBase):

View File

@@ -1,272 +0,0 @@
"""
记忆图系统配置管理
"""
from __future__ import annotations
from dataclasses import dataclass, field
from pathlib import Path
from typing import Dict, Optional
@dataclass
class ConsolidationConfig:
"""记忆整理配置"""
interval_hours: int = 6 # 整理间隔(小时)
batch_size: int = 100 # 每次处理记忆数量
enable_auto_discovery: bool = True # 是否启用自动关联发现
enable_conflict_detection: bool = True # 是否启用冲突检测
@dataclass
class RetrievalConfig:
"""记忆检索配置"""
default_mode: str = "auto" # auto/fast/deep
max_expand_depth: int = 2 # 最大图扩展深度
vector_weight: float = 0.4 # 向量相似度权重
graph_distance_weight: float = 0.2 # 图距离权重
importance_weight: float = 0.2 # 重要性权重
recency_weight: float = 0.2 # 时效性权重
def __post_init__(self):
"""验证权重总和"""
total = self.vector_weight + self.graph_distance_weight + self.importance_weight + self.recency_weight
if abs(total - 1.0) > 0.01:
raise ValueError(f"权重总和必须为1.0,当前为 {total}")
@dataclass
class NodeMergerConfig:
"""节点去重配置"""
similarity_threshold: float = 0.85 # 相似度阈值
context_match_required: bool = True # 是否要求上下文匹配
merge_batch_size: int = 50 # 批量处理大小
def __post_init__(self):
"""验证阈值范围"""
if not 0.0 <= self.similarity_threshold <= 1.0:
raise ValueError(f"相似度阈值必须在 [0, 1] 范围内,当前为 {self.similarity_threshold}")
@dataclass
class StorageConfig:
"""存储配置"""
data_dir: Path = field(default_factory=lambda: Path("data/memory_graph"))
vector_collection_name: str = "memory_nodes"
graph_file_name: str = "memory_graph.json"
enable_persistence: bool = True # 是否启用持久化
auto_save_interval: int = 300 # 自动保存间隔(秒)
@dataclass
class MemoryGraphConfig:
"""记忆图系统总配置"""
# 基础配置
enable: bool = True # 是否启用记忆图系统
data_dir: Path = field(default_factory=lambda: Path("data/memory_graph"))
# 向量存储配置
vector_collection_name: str = "memory_nodes"
vector_db_path: Path = field(default_factory=lambda: Path("data/memory_graph/chroma_db"))
# 检索配置
search_top_k: int = 10
search_min_importance: float = 0.3
search_similarity_threshold: float = 0.5
enable_query_optimization: bool = True
# 整合配置
consolidation_enabled: bool = True
consolidation_interval_hours: float = 1.0
consolidation_similarity_threshold: float = 0.85
consolidation_time_window_hours: int = 24
# 自动关联配置
auto_link_enabled: bool = True # 是否启用自动关联
auto_link_max_candidates: int = 5 # 每个记忆最多关联候选数
auto_link_min_confidence: float = 0.7 # 最低置信度阈值
# 遗忘配置
forgetting_enabled: bool = True
forgetting_activation_threshold: float = 0.1
forgetting_min_importance: float = 0.8
# 激活配置
activation_decay_rate: float = 0.9
activation_propagation_strength: float = 0.5
activation_propagation_depth: int = 1
# 性能配置
max_memory_nodes_per_memory: int = 10
max_related_memories: int = 5
# 旧配置(向后兼容)
consolidation: ConsolidationConfig = field(default_factory=ConsolidationConfig)
retrieval: RetrievalConfig = field(default_factory=RetrievalConfig)
node_merger: NodeMergerConfig = field(default_factory=NodeMergerConfig)
storage: StorageConfig = field(default_factory=StorageConfig)
# 时间衰减配置
decay_rates: Dict[str, float] = field(
default_factory=lambda: {
"EVENT": 0.05, # 事件衰减较快
"FACT": 0.01, # 事实衰减慢
"RELATION": 0.005, # 关系衰减很慢
"OPINION": 0.03, # 观点中等衰减
}
)
# 嵌入模型配置
embedding_model: Optional[str] = None # 如果为None则使用系统默认
embedding_dimension: int = 384 # 默认使用 sentence-transformers 的维度
# 调试和日志
enable_debug_logging: bool = False
enable_visualization: bool = False # 是否启用记忆可视化
@classmethod
def from_bot_config(cls, bot_config) -> MemoryGraphConfig:
"""从bot_config加载配置"""
try:
# 尝试获取配置优先使用memory兼容memory_graph
if hasattr(bot_config, 'memory') and bot_config.memory is not None:
mg_config = bot_config.memory
elif hasattr(bot_config, 'memory_graph'):
mg_config = bot_config.memory_graph
config = cls(
enable=getattr(mg_config, 'enable', True),
data_dir=Path(getattr(mg_config, 'data_dir', 'data/memory_graph')),
vector_collection_name=getattr(mg_config, 'vector_collection_name', 'memory_nodes'),
vector_db_path=Path(getattr(mg_config, 'vector_db_path', 'data/memory_graph/chroma_db')),
search_top_k=getattr(mg_config, 'search_top_k', 10),
search_min_importance=getattr(mg_config, 'search_min_importance', 0.3),
search_similarity_threshold=getattr(mg_config, 'search_similarity_threshold', 0.5),
enable_query_optimization=getattr(mg_config, 'enable_query_optimization', True),
consolidation_enabled=getattr(mg_config, 'consolidation_enabled', True),
consolidation_interval_hours=getattr(mg_config, 'consolidation_interval_hours', 1.0),
consolidation_similarity_threshold=getattr(mg_config, 'consolidation_similarity_threshold', 0.85),
consolidation_time_window_hours=getattr(mg_config, 'consolidation_time_window_hours', 24),
auto_link_enabled=getattr(mg_config, 'auto_link_enabled', True),
auto_link_max_candidates=getattr(mg_config, 'auto_link_max_candidates', 5),
auto_link_min_confidence=getattr(mg_config, 'auto_link_min_confidence', 0.7),
forgetting_enabled=getattr(mg_config, 'forgetting_enabled', True),
forgetting_activation_threshold=getattr(mg_config, 'forgetting_activation_threshold', 0.1),
forgetting_min_importance=getattr(mg_config, 'forgetting_min_importance', 0.8),
activation_decay_rate=getattr(mg_config, 'activation_decay_rate', 0.9),
activation_propagation_strength=getattr(mg_config, 'activation_propagation_strength', 0.5),
activation_propagation_depth=getattr(mg_config, 'activation_propagation_depth', 1),
max_memory_nodes_per_memory=getattr(mg_config, 'max_memory_nodes_per_memory', 10),
max_related_memories=getattr(mg_config, 'max_related_memories', 5),
# 检索配置
retrieval=RetrievalConfig(
max_expand_depth=getattr(mg_config, 'search_max_expand_depth', 2),
vector_weight=getattr(mg_config, 'search_vector_weight', 0.4),
graph_distance_weight=getattr(mg_config, 'search_graph_distance_weight', 0.2),
importance_weight=getattr(mg_config, 'search_importance_weight', 0.2),
recency_weight=getattr(mg_config, 'search_recency_weight', 0.2),
),
)
return config
else:
# 没有找到memory_graph配置使用默认值
return cls()
except Exception as e:
import logging
logger = logging.getLogger(__name__)
logger.warning(f"从bot_config加载memory_graph配置失败使用默认配置: {e}")
return cls()
@classmethod
def from_dict(cls, config_dict: Dict) -> MemoryGraphConfig:
"""从字典创建配置"""
return cls(
# 新配置字段
enable=config_dict.get("enable", True),
data_dir=Path(config_dict.get("data_dir", "data/memory_graph")),
vector_collection_name=config_dict.get("vector_collection_name", "memory_nodes"),
vector_db_path=Path(config_dict.get("vector_db_path", "data/memory_graph/chroma_db")),
search_top_k=config_dict.get("search_top_k", 10),
search_min_importance=config_dict.get("search_min_importance", 0.3),
search_similarity_threshold=config_dict.get("search_similarity_threshold", 0.5),
enable_query_optimization=config_dict.get("enable_query_optimization", True),
consolidation_enabled=config_dict.get("consolidation_enabled", True),
consolidation_interval_hours=config_dict.get("consolidation_interval_hours", 1.0),
consolidation_similarity_threshold=config_dict.get("consolidation_similarity_threshold", 0.85),
consolidation_time_window_hours=config_dict.get("consolidation_time_window_hours", 24),
auto_link_enabled=config_dict.get("auto_link_enabled", True),
auto_link_max_candidates=config_dict.get("auto_link_max_candidates", 5),
auto_link_min_confidence=config_dict.get("auto_link_min_confidence", 0.7),
forgetting_enabled=config_dict.get("forgetting_enabled", True),
forgetting_activation_threshold=config_dict.get("forgetting_activation_threshold", 0.1),
forgetting_min_importance=config_dict.get("forgetting_min_importance", 0.8),
activation_decay_rate=config_dict.get("activation_decay_rate", 0.9),
activation_propagation_strength=config_dict.get("activation_propagation_strength", 0.5),
activation_propagation_depth=config_dict.get("activation_propagation_depth", 1),
max_memory_nodes_per_memory=config_dict.get("max_memory_nodes_per_memory", 10),
max_related_memories=config_dict.get("max_related_memories", 5),
# 旧配置字段(向后兼容)
consolidation=ConsolidationConfig(**config_dict.get("consolidation", {})),
retrieval=RetrievalConfig(
max_expand_depth=config_dict.get("search_max_expand_depth", 2),
vector_weight=config_dict.get("search_vector_weight", 0.4),
graph_distance_weight=config_dict.get("search_graph_distance_weight", 0.2),
importance_weight=config_dict.get("search_importance_weight", 0.2),
recency_weight=config_dict.get("search_recency_weight", 0.2),
**config_dict.get("retrieval", {})
),
node_merger=NodeMergerConfig(**config_dict.get("node_merger", {})),
storage=StorageConfig(**config_dict.get("storage", {})),
decay_rates=config_dict.get("decay_rates", cls().decay_rates),
embedding_model=config_dict.get("embedding_model"),
embedding_dimension=config_dict.get("embedding_dimension", 384),
enable_debug_logging=config_dict.get("enable_debug_logging", False),
enable_visualization=config_dict.get("enable_visualization", False),
)
def to_dict(self) -> Dict:
"""转换为字典"""
return {
"consolidation": {
"interval_hours": self.consolidation.interval_hours,
"batch_size": self.consolidation.batch_size,
"enable_auto_discovery": self.consolidation.enable_auto_discovery,
"enable_conflict_detection": self.consolidation.enable_conflict_detection,
},
"retrieval": {
"default_mode": self.retrieval.default_mode,
"max_expand_depth": self.retrieval.max_expand_depth,
"vector_weight": self.retrieval.vector_weight,
"graph_distance_weight": self.retrieval.graph_distance_weight,
"importance_weight": self.retrieval.importance_weight,
"recency_weight": self.retrieval.recency_weight,
},
"node_merger": {
"similarity_threshold": self.node_merger.similarity_threshold,
"context_match_required": self.node_merger.context_match_required,
"merge_batch_size": self.node_merger.merge_batch_size,
},
"storage": {
"data_dir": str(self.storage.data_dir),
"vector_collection_name": self.storage.vector_collection_name,
"graph_file_name": self.storage.graph_file_name,
"enable_persistence": self.storage.enable_persistence,
"auto_save_interval": self.storage.auto_save_interval,
},
"decay_rates": self.decay_rates,
"embedding_model": self.embedding_model,
"embedding_dimension": self.embedding_dimension,
"enable_debug_logging": self.enable_debug_logging,
"enable_visualization": self.enable_visualization,
}
# 默认配置实例
DEFAULT_CONFIG = MemoryGraphConfig()

View File

@@ -4,12 +4,13 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import List, Optional, Tuple
import numpy as np
from src.common.logger import get_logger
from src.memory_graph.config import NodeMergerConfig
from src.config.official_configs import MemoryConfig
from src.memory_graph.models import MemoryNode, NodeType
from src.memory_graph.storage.graph_store import GraphStore
from src.memory_graph.storage.vector_store import VectorStore
@@ -31,7 +32,7 @@ class NodeMerger:
self,
vector_store: VectorStore,
graph_store: GraphStore,
config: Optional[NodeMergerConfig] = None,
config: MemoryConfig,
):
"""
初始化节点合并器
@@ -39,15 +40,15 @@ class NodeMerger:
Args:
vector_store: 向量存储
graph_store: 图存储
config: 配置对象
config: 记忆配置对象
"""
self.vector_store = vector_store
self.graph_store = graph_store
self.config = config or NodeMergerConfig()
self.config = config
logger.info(
f"初始化节点合并器: threshold={self.config.similarity_threshold}, "
f"context_match={self.config.context_match_required}"
f"初始化节点合并器: threshold={self.config.node_merger_similarity_threshold}, "
f"context_match={self.config.node_merger_context_match_required}"
)
async def find_similar_nodes(
@@ -71,7 +72,7 @@ class NodeMerger:
logger.warning(f"节点 {node.id} 没有 embedding无法查找相似节点")
return []
threshold = threshold or self.config.similarity_threshold
threshold = threshold or self.config.node_merger_similarity_threshold
try:
# 在向量存储中搜索相似节点
@@ -121,7 +122,7 @@ class NodeMerger:
是否应该合并
"""
# 1. 检查相似度阈值
if similarity < self.config.similarity_threshold:
if similarity < self.config.node_merger_similarity_threshold:
return False
# 2. 非常高的相似度(>0.95)直接合并
@@ -130,7 +131,7 @@ class NodeMerger:
return True
# 3. 如果不要求上下文匹配,则通过相似度判断
if not self.config.context_match_required:
if not self.config.node_merger_context_match_required:
return True
# 4. 检查上下文匹配

View File

@@ -15,6 +15,7 @@ from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple
from src.config.config import global_config
from src.config.official_configs import MemoryConfig
from src.memory_graph.core.builder import MemoryBuilder
from src.memory_graph.core.extractor import MemoryExtractor
from src.memory_graph.models import Memory, MemoryEdge, MemoryNode, MemoryType, NodeType, EdgeType
@@ -53,7 +54,7 @@ class MemoryManager:
if not global_config.memory or not getattr(global_config.memory, 'enable', False):
raise ValueError("记忆系统未启用,请在配置文件中启用 [memory] enable = true")
self.config = global_config.memory
self.config: MemoryConfig = global_config.memory
self.data_dir = data_dir or Path(getattr(self.config, 'data_dir', 'data/memory_graph'))
# 存储组件
@@ -132,12 +133,16 @@ class MemoryManager:
embedding_generator=self.embedding_generator,
)
# 检查配置值
expand_depth = self.config.search_max_expand_depth
logger.info(f"📊 配置检查: search_max_expand_depth={expand_depth}")
self.tools = MemoryTools(
vector_store=self.vector_store,
graph_store=self.graph_store,
persistence_manager=self.persistence,
embedding_generator=self.embedding_generator,
max_expand_depth=getattr(self.config, 'search_max_expand_depth', 1), # 从配置读取默认深度
max_expand_depth=expand_depth, # 从配置读取图扩展深度
)
self._initialized = True
@@ -433,7 +438,7 @@ class MemoryManager:
min_importance: float = 0.0,
include_forgotten: bool = False,
use_multi_query: bool = True,
expand_depth: int = 1,
expand_depth: int | None = None,
context: Optional[Dict[str, Any]] = None,
) -> List[Memory]:
"""
@@ -468,7 +473,7 @@ class MemoryManager:
"query": query,
"top_k": top_k,
"use_multi_query": use_multi_query,
"expand_depth": expand_depth, # 传递图扩展深度
"expand_depth": expand_depth or global_config.memory.search_max_expand_depth, # 传递图扩展深度
"context": context,
}

View File

@@ -51,6 +51,8 @@ class MemoryTools:
self.persistence_manager = persistence_manager
self._initialized = False
self.max_expand_depth = max_expand_depth # 保存配置的默认值
logger.info(f"MemoryTools 初始化: max_expand_depth={max_expand_depth}")
# 初始化组件
self.extractor = MemoryExtractor()