feat(memory-graph): Phase 1 基础架构实现

- 定义核心数据模型 (MemoryNode, MemoryEdge, Memory)
- 实现配置管理系统 (MemoryGraphConfig)
- 实现向量存储层 (VectorStore with ChromaDB)
- 实现图存储层 (GraphStore with NetworkX)
- 创建设计文档大纲
- 添加基础测试并验证通过

待完成:
- 持久化管理
- 节点去重逻辑
- 记忆构建器
- 记忆检索器
This commit is contained in:
Windpicker-owo
2025-11-05 16:46:53 +08:00
parent b950ddba13
commit dd58f5da20
7 changed files with 2694 additions and 0 deletions

145
src/memory_graph/config.py Normal file
View File

@@ -0,0 +1,145 @@
"""
记忆图系统配置管理
"""
from __future__ import annotations
from dataclasses import dataclass, field
from pathlib import Path
from typing import Dict, Optional
@dataclass
class ConsolidationConfig:
"""记忆整理配置"""
interval_hours: int = 6 # 整理间隔(小时)
batch_size: int = 100 # 每次处理记忆数量
enable_auto_discovery: bool = True # 是否启用自动关联发现
enable_conflict_detection: bool = True # 是否启用冲突检测
@dataclass
class RetrievalConfig:
"""记忆检索配置"""
default_mode: str = "auto" # auto/fast/deep
max_expand_depth: int = 2 # 最大图扩展深度
vector_weight: float = 0.4 # 向量相似度权重
graph_distance_weight: float = 0.2 # 图距离权重
importance_weight: float = 0.2 # 重要性权重
recency_weight: float = 0.2 # 时效性权重
def __post_init__(self):
"""验证权重总和"""
total = self.vector_weight + self.graph_distance_weight + self.importance_weight + self.recency_weight
if abs(total - 1.0) > 0.01:
raise ValueError(f"权重总和必须为1.0,当前为 {total}")
@dataclass
class NodeMergerConfig:
"""节点去重配置"""
similarity_threshold: float = 0.85 # 相似度阈值
context_match_required: bool = True # 是否要求上下文匹配
merge_batch_size: int = 50 # 批量处理大小
def __post_init__(self):
"""验证阈值范围"""
if not 0.0 <= self.similarity_threshold <= 1.0:
raise ValueError(f"相似度阈值必须在 [0, 1] 范围内,当前为 {self.similarity_threshold}")
@dataclass
class StorageConfig:
"""存储配置"""
data_dir: Path = field(default_factory=lambda: Path("data/memory_graph"))
vector_collection_name: str = "memory_nodes"
graph_file_name: str = "memory_graph.json"
enable_persistence: bool = True # 是否启用持久化
auto_save_interval: int = 300 # 自动保存间隔(秒)
@dataclass
class MemoryGraphConfig:
"""记忆图系统总配置"""
consolidation: ConsolidationConfig = field(default_factory=ConsolidationConfig)
retrieval: RetrievalConfig = field(default_factory=RetrievalConfig)
node_merger: NodeMergerConfig = field(default_factory=NodeMergerConfig)
storage: StorageConfig = field(default_factory=StorageConfig)
# 时间衰减配置
decay_rates: Dict[str, float] = field(
default_factory=lambda: {
"EVENT": 0.05, # 事件衰减较快
"FACT": 0.01, # 事实衰减慢
"RELATION": 0.005, # 关系衰减很慢
"OPINION": 0.03, # 观点中等衰减
}
)
# 嵌入模型配置
embedding_model: Optional[str] = None # 如果为None则使用系统默认
embedding_dimension: int = 384 # 默认使用 sentence-transformers 的维度
# 调试和日志
enable_debug_logging: bool = False
enable_visualization: bool = False # 是否启用记忆可视化
@classmethod
def from_dict(cls, config_dict: Dict) -> MemoryGraphConfig:
"""从字典创建配置"""
return cls(
consolidation=ConsolidationConfig(**config_dict.get("consolidation", {})),
retrieval=RetrievalConfig(**config_dict.get("retrieval", {})),
node_merger=NodeMergerConfig(**config_dict.get("node_merger", {})),
storage=StorageConfig(**config_dict.get("storage", {})),
decay_rates=config_dict.get("decay_rates", cls().decay_rates),
embedding_model=config_dict.get("embedding_model"),
embedding_dimension=config_dict.get("embedding_dimension", 384),
enable_debug_logging=config_dict.get("enable_debug_logging", False),
enable_visualization=config_dict.get("enable_visualization", False),
)
def to_dict(self) -> Dict:
"""转换为字典"""
return {
"consolidation": {
"interval_hours": self.consolidation.interval_hours,
"batch_size": self.consolidation.batch_size,
"enable_auto_discovery": self.consolidation.enable_auto_discovery,
"enable_conflict_detection": self.consolidation.enable_conflict_detection,
},
"retrieval": {
"default_mode": self.retrieval.default_mode,
"max_expand_depth": self.retrieval.max_expand_depth,
"vector_weight": self.retrieval.vector_weight,
"graph_distance_weight": self.retrieval.graph_distance_weight,
"importance_weight": self.retrieval.importance_weight,
"recency_weight": self.retrieval.recency_weight,
},
"node_merger": {
"similarity_threshold": self.node_merger.similarity_threshold,
"context_match_required": self.node_merger.context_match_required,
"merge_batch_size": self.node_merger.merge_batch_size,
},
"storage": {
"data_dir": str(self.storage.data_dir),
"vector_collection_name": self.storage.vector_collection_name,
"graph_file_name": self.storage.graph_file_name,
"enable_persistence": self.storage.enable_persistence,
"auto_save_interval": self.storage.auto_save_interval,
},
"decay_rates": self.decay_rates,
"embedding_model": self.embedding_model,
"embedding_dimension": self.embedding_dimension,
"enable_debug_logging": self.enable_debug_logging,
"enable_visualization": self.enable_visualization,
}
# 默认配置实例
DEFAULT_CONFIG = MemoryGraphConfig()