Files
Mofox-Core/src/memory_graph/manager.py

1572 lines
59 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
记忆管理器 - Phase 3
统一的记忆系统管理接口,整合所有组件:
- 记忆创建、检索、更新、删除
- 记忆生命周期管理(激活、遗忘)
- 记忆整合与维护
- 多策略检索优化
"""
import asyncio
import logging
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple
from src.config.config import global_config
from src.memory_graph.core.builder import MemoryBuilder
from src.memory_graph.core.extractor import MemoryExtractor
from src.memory_graph.models import Memory, MemoryEdge, MemoryNode, MemoryType, NodeType, EdgeType
from src.memory_graph.storage.graph_store import GraphStore
from src.memory_graph.storage.persistence import PersistenceManager
from src.memory_graph.storage.vector_store import VectorStore
from src.memory_graph.tools.memory_tools import MemoryTools
from src.memory_graph.utils.embeddings import EmbeddingGenerator
import uuid
logger = logging.getLogger(__name__)
class MemoryManager:
"""
记忆管理器
核心管理类,提供记忆系统的统一接口:
- 记忆 CRUD 操作
- 记忆生命周期管理
- 智能检索与推荐
- 记忆维护与优化
"""
def __init__(
self,
data_dir: Optional[Path] = None,
):
"""
初始化记忆管理器
Args:
data_dir: 数据目录可选默认从global_config读取
"""
# 直接使用 global_config.memory
if not global_config.memory or not getattr(global_config.memory, 'enable', False):
raise ValueError("记忆系统未启用,请在配置文件中启用 [memory] enable = true")
self.config = global_config.memory
self.data_dir = data_dir or Path(getattr(self.config, 'data_dir', 'data/memory_graph'))
# 存储组件
self.vector_store: Optional[VectorStore] = None
self.graph_store: Optional[GraphStore] = None
self.persistence: Optional[PersistenceManager] = None
# 核心组件
self.embedding_generator: Optional[EmbeddingGenerator] = None
self.extractor: Optional[MemoryExtractor] = None
self.builder: Optional[MemoryBuilder] = None
self.tools: Optional[MemoryTools] = None
# 状态
self._initialized = False
self._last_maintenance = datetime.now()
self._maintenance_task: Optional[asyncio.Task] = None
self._maintenance_interval_hours = getattr(self.config, 'consolidation_interval_hours', 1.0)
self._maintenance_schedule_id: Optional[str] = None # 调度任务ID
logger.info(f"记忆管理器已创建 (data_dir={self.data_dir}, enable={getattr(self.config, 'enable', False)})")
async def initialize(self) -> None:
"""
初始化所有组件
按照依赖顺序初始化:
1. 存储层(向量存储、图存储、持久化)
2. 工具层(嵌入生成器、提取器)
3. 管理层(构建器、工具接口)
"""
if self._initialized:
logger.warning("记忆管理器已经初始化")
return
try:
logger.info("开始初始化记忆管理器...")
# 1. 初始化存储层
self.data_dir.mkdir(parents=True, exist_ok=True)
# 获取存储配置
storage_config = getattr(self.config, 'storage', None)
vector_collection_name = getattr(storage_config, 'vector_collection_name', 'memory_graph') if storage_config else 'memory_graph'
self.vector_store = VectorStore(
collection_name=vector_collection_name,
data_dir=self.data_dir,
)
await self.vector_store.initialize()
self.persistence = PersistenceManager(data_dir=self.data_dir)
# 尝试加载现有图数据
self.graph_store = await self.persistence.load_graph_store()
if not self.graph_store:
logger.info("未找到现有图数据,创建新的图存储")
self.graph_store = GraphStore()
else:
stats = self.graph_store.get_statistics()
logger.info(
f"加载图数据: {stats['total_memories']} 条记忆, "
f"{stats['total_nodes']} 个节点, {stats['total_edges']} 条边"
)
# 2. 初始化工具层
self.embedding_generator = EmbeddingGenerator()
# EmbeddingGenerator 使用延迟初始化,在第一次调用时自动初始化
self.extractor = MemoryExtractor()
# 3. 初始化管理层
self.builder = MemoryBuilder(
vector_store=self.vector_store,
graph_store=self.graph_store,
embedding_generator=self.embedding_generator,
)
self.tools = MemoryTools(
vector_store=self.vector_store,
graph_store=self.graph_store,
persistence_manager=self.persistence,
embedding_generator=self.embedding_generator,
max_expand_depth=getattr(self.config, 'max_expand_depth', 1), # 从配置读取默认深度
)
self._initialized = True
logger.info("✅ 记忆管理器初始化完成")
# 启动后台维护调度任务
await self.start_maintenance_scheduler()
except Exception as e:
logger.error(f"记忆管理器初始化失败: {e}", exc_info=True)
raise
async def shutdown(self) -> None:
"""
关闭记忆管理器
执行清理操作:
- 停止维护调度任务
- 保存所有数据
- 关闭存储组件
"""
if not self._initialized:
logger.warning("记忆管理器未初始化,无需关闭")
return
try:
logger.info("正在关闭记忆管理器...")
# 1. 停止调度任务
await self.stop_maintenance_scheduler()
# 2. 执行最后一次维护(保存数据)
if self.graph_store and self.persistence:
logger.info("执行最终数据保存...")
await self.persistence.save_graph_store(self.graph_store)
# 3. 关闭存储组件
if self.vector_store:
# VectorStore 使用 chromadb无需显式关闭
pass
self._initialized = False
logger.info("✅ 记忆管理器已关闭")
except Exception as e:
logger.error(f"关闭记忆管理器失败: {e}", exc_info=True)
# ==================== 记忆 CRUD 操作 ====================
async def create_memory(
self,
subject: str,
memory_type: str,
topic: str,
object: Optional[str] = None,
attributes: Optional[Dict[str, str]] = None,
importance: float = 0.5,
**kwargs,
) -> Optional[Memory]:
"""
创建新记忆
Args:
subject: 主体(谁)
memory_type: 记忆类型(事件/观点/事实/关系)
topic: 主题(做什么/想什么)
object: 客体(对谁/对什么)
attributes: 属性字典(时间、地点、原因等)
importance: 重要性 (0.0-1.0)
**kwargs: 其他参数
Returns:
创建的记忆对象,失败返回 None
"""
if not self._initialized:
await self.initialize()
try:
result = await self.tools.create_memory(
subject=subject,
memory_type=memory_type,
topic=topic,
object=object,
attributes=attributes,
importance=importance,
**kwargs,
)
if result["success"]:
memory_id = result["memory_id"]
memory = self.graph_store.get_memory_by_id(memory_id)
logger.info(f"记忆创建成功: {memory_id}")
return memory
else:
logger.error(f"记忆创建失败: {result.get('error', 'Unknown error')}")
return None
except Exception as e:
logger.error(f"创建记忆时发生异常: {e}", exc_info=True)
return None
async def get_memory(self, memory_id: str) -> Optional[Memory]:
"""
根据 ID 获取记忆
Args:
memory_id: 记忆 ID
Returns:
记忆对象,不存在返回 None
"""
if not self._initialized:
await self.initialize()
return self.graph_store.get_memory_by_id(memory_id)
async def update_memory(
self,
memory_id: str,
**updates,
) -> bool:
"""
更新记忆
Args:
memory_id: 记忆 ID
**updates: 要更新的字段
Returns:
是否更新成功
"""
if not self._initialized:
await self.initialize()
try:
memory = self.graph_store.get_memory_by_id(memory_id)
if not memory:
logger.warning(f"记忆不存在: {memory_id}")
return False
# 更新元数据
if "importance" in updates:
memory.importance = updates["importance"]
if "metadata" in updates:
memory.metadata.update(updates["metadata"])
memory.updated_at = datetime.now()
# 保存更新
await self.persistence.save_graph_store(self.graph_store)
logger.info(f"记忆更新成功: {memory_id}")
return True
except Exception as e:
logger.error(f"更新记忆失败: {e}", exc_info=True)
return False
async def delete_memory(self, memory_id: str) -> bool:
"""
删除记忆
Args:
memory_id: 记忆 ID
Returns:
是否删除成功
"""
if not self._initialized:
await self.initialize()
try:
memory = self.graph_store.get_memory_by_id(memory_id)
if not memory:
logger.warning(f"记忆不存在: {memory_id}")
return False
# 从向量存储删除节点
for node in memory.nodes:
if node.embedding is not None:
await self.vector_store.delete_node(node.id)
# 从图存储删除记忆
self.graph_store.remove_memory(memory_id)
# 保存更新
await self.persistence.save_graph_store(self.graph_store)
logger.info(f"记忆删除成功: {memory_id}")
return True
except Exception as e:
logger.error(f"删除记忆失败: {e}", exc_info=True)
return False
# ==================== 记忆检索操作 ====================
async def generate_multi_queries(
self,
query: str,
context: Optional[Dict[str, Any]] = None,
) -> List[Tuple[str, float]]:
"""
使用小模型生成多个查询语句(用于多路召回)
简化版多查询策略直接让小模型生成3-5个不同角度的查询
避免复杂的查询分解和组合逻辑。
Args:
query: 原始查询
context: 上下文信息(聊天历史、发言人、参与者等)
Returns:
List of (query_string, weight) - 查询语句和权重
"""
try:
from src.llm_models.utils_model import LLMRequest
from src.config.config import model_config
llm = LLMRequest(
model_set=model_config.model_task_config.utils_small,
request_type="memory.multi_query_generator"
)
# 构建上下文信息
chat_history = context.get("chat_history", "") if context else ""
prompt = f"""你是记忆检索助手。为提高检索准确率请为查询生成3-5个不同角度的搜索语句。
**核心原则(重要!):**
对于包含多个概念的复杂查询(如"小明如何评价新的记忆系统"),应该生成:
1. 完整查询(包含所有要素)- 权重1.0
2. 每个关键概念的独立查询(如"新的记忆系统"- 权重0.8,避免被主体淹没!
3. 主体+动作组合(如"小明 评价"- 权重0.6
4. 泛化查询(如"记忆系统"- 权重0.7
**要求:**
- 第一个必须是原始查询或同义改写
- 识别查询中的所有重要概念,为每个概念生成独立查询
- 查询简洁5-20字
- 直接输出JSON不要添加说明
**对话上下文:** {chat_history[-300:] if chat_history else ""}
**输出JSON格式**
```json
{{
"queries": [
{{"text": "完整查询", "weight": 1.0}},
{{"text": "关键概念1", "weight": 0.8}},
{{"text": "关键概念2", "weight": 0.8}},
{{"text": "组合查询", "weight": 0.6}}
]
}}
```"""
response, _ = await llm.generate_response_async(prompt, temperature=0.3, max_tokens=300)
# 解析JSON
import json, re
response = re.sub(r'```json\s*', '', response)
response = re.sub(r'```\s*$', '', response).strip()
try:
data = json.loads(response)
queries = data.get("queries", [])
result = []
for item in queries:
text = item.get("text", "").strip()
weight = float(item.get("weight", 0.5))
if text:
result.append((text, weight))
if result:
logger.info(f"生成 {len(result)} 个查询: {[q for q, _ in result]}")
return result
except json.JSONDecodeError as e:
logger.warning(f"解析失败: {e}, response={response[:100]}")
except Exception as e:
logger.warning(f"多查询生成失败: {e}")
# 回退到原始查询
return [(query, 1.0)]
async def search_memories(
self,
query: str,
top_k: int = 10,
memory_types: Optional[List[str]] = None,
time_range: Optional[Tuple[datetime, datetime]] = None,
min_importance: float = 0.0,
include_forgotten: bool = False,
optimize_query: bool = True,
use_multi_query: bool = True,
expand_depth: int = 1,
context: Optional[Dict[str, Any]] = None,
) -> List[Memory]:
"""
搜索记忆
使用多策略检索优化,解决复杂查询问题。
例如:"杰瑞喵如何评价新的记忆系统" 会被分解为多个子查询,
确保同时匹配"杰瑞喵""新的记忆系统"两个关键概念。
同时支持图扩展:从初始检索结果出发,沿图结构查找语义相关的邻居记忆。
Args:
query: 搜索查询
top_k: 返回结果数
memory_types: 记忆类型过滤
time_range: 时间范围过滤 (start, end)
min_importance: 最小重要性
include_forgotten: 是否包含已遗忘的记忆
optimize_query: 是否使用小模型优化查询(已弃用,被 use_multi_query 替代)
use_multi_query: 是否使用多查询策略推荐默认True
expand_depth: 图扩展深度0=禁用, 1=推荐, 2-3=深度探索)
context: 查询上下文(用于优化)
Returns:
记忆列表
"""
if not self._initialized:
await self.initialize()
try:
# 准备搜索参数
params = {
"query": query,
"top_k": top_k,
"use_multi_query": use_multi_query,
"expand_depth": expand_depth, # 传递图扩展深度
"context": context,
}
if memory_types:
params["memory_types"] = memory_types
# 执行搜索
result = await self.tools.search_memories(**params)
if not result["success"]:
logger.error(f"搜索失败: {result.get('error', 'Unknown error')}")
return []
memories = result.get("results", [])
# 后处理过滤
filtered_memories = []
for mem_dict in memories:
# 从字典重建 Memory 对象
memory_id = mem_dict.get("memory_id", "")
if not memory_id:
continue
memory = self.graph_store.get_memory_by_id(memory_id)
if not memory:
continue
# 重要性过滤
if min_importance is not None and memory.importance < min_importance:
continue
# 遗忘状态过滤
if not include_forgotten and memory.metadata.get("forgotten", False):
continue
# 时间范围过滤
if time_range:
mem_time = memory.created_at
if not (time_range[0] <= mem_time <= time_range[1]):
continue
filtered_memories.append(memory)
strategy = result.get("strategy", "unknown")
logger.info(
f"搜索完成: 找到 {len(filtered_memories)} 条记忆 (策略={strategy})"
)
return filtered_memories[:top_k]
except Exception as e:
logger.error(f"搜索记忆失败: {e}", exc_info=True)
return []
async def link_memories(
self,
source_description: str,
target_description: str,
relation_type: str,
importance: float = 0.5,
) -> bool:
"""
关联两条记忆
Args:
source_description: 源记忆描述
target_description: 目标记忆描述
relation_type: 关系类型(导致/引用/相似/相反)
importance: 关系重要性
Returns:
是否关联成功
"""
if not self._initialized:
await self.initialize()
try:
result = await self.tools.link_memories(
source_memory_description=source_description,
target_memory_description=target_description,
relation_type=relation_type,
importance=importance,
)
if result["success"]:
logger.info(
f"记忆关联成功: {result['source_memory_id']} -> "
f"{result['target_memory_id']} ({relation_type})"
)
return True
else:
logger.error(f"记忆关联失败: {result.get('error', 'Unknown error')}")
return False
except Exception as e:
logger.error(f"关联记忆失败: {e}", exc_info=True)
return False
# ==================== 记忆生命周期管理 ====================
async def activate_memory(self, memory_id: str, strength: float = 1.0) -> bool:
"""
激活记忆
更新记忆的激活度,并传播到相关记忆
Args:
memory_id: 记忆 ID
strength: 激活强度 (0.0-1.0)
Returns:
是否激活成功
"""
if not self._initialized:
await self.initialize()
try:
memory = self.graph_store.get_memory_by_id(memory_id)
if not memory:
logger.warning(f"记忆不存在: {memory_id}")
return False
# 更新激活信息
now = datetime.now()
activation_info = memory.metadata.get("activation", {})
# 更新激活度(考虑时间衰减)
last_access = activation_info.get("last_access")
if last_access:
# 计算时间衰减
last_access_dt = datetime.fromisoformat(last_access)
hours_passed = (now - last_access_dt).total_seconds() / 3600
decay_rate = getattr(self.config, 'activation_decay_rate', 0.95)
decay_factor = decay_rate ** (hours_passed / 24)
current_activation = activation_info.get("level", 0.0) * decay_factor
else:
current_activation = 0.0
# 新的激活度 = 当前激活度 + 激活强度
new_activation = min(1.0, current_activation + strength)
activation_info.update({
"level": new_activation,
"last_access": now.isoformat(),
"access_count": activation_info.get("access_count", 0) + 1,
})
memory.metadata["activation"] = activation_info
memory.last_accessed = now
# 激活传播:激活相关记忆
if strength > 0.1: # 只有足够强的激活才传播
propagation_depth = getattr(self.config, 'activation_propagation_depth', 2)
related_memories = self._get_related_memories(
memory_id,
max_depth=propagation_depth
)
propagation_strength_factor = getattr(self.config, 'activation_propagation_strength', 0.5)
propagation_strength = strength * propagation_strength_factor
max_related = getattr(self.config, 'max_related_memories', 5)
for related_id in related_memories[:max_related]:
await self.activate_memory(related_id, propagation_strength)
# 保存更新
await self.persistence.save_graph_store(self.graph_store)
logger.debug(f"记忆已激活: {memory_id} (level={new_activation:.3f})")
return True
except Exception as e:
logger.error(f"激活记忆失败: {e}", exc_info=True)
return False
def _get_related_memories(self, memory_id: str, max_depth: int = 1) -> List[str]:
"""
获取相关记忆 ID 列表(旧版本,保留用于激活传播)
Args:
memory_id: 记忆 ID
max_depth: 最大遍历深度
Returns:
相关记忆 ID 列表
"""
memory = self.graph_store.get_memory_by_id(memory_id)
if not memory:
return []
related_ids = set()
# 遍历记忆的节点
for node in memory.nodes:
# 获取节点的邻居
neighbors = list(self.graph_store.graph.neighbors(node.id))
for neighbor_id in neighbors:
# 获取邻居节点所属的记忆
neighbor_node = self.graph_store.graph.nodes.get(neighbor_id)
if neighbor_node:
neighbor_memory_ids = neighbor_node.get("memory_ids", [])
for mem_id in neighbor_memory_ids:
if mem_id != memory_id:
related_ids.add(mem_id)
return list(related_ids)
async def expand_memories_with_semantic_filter(
self,
initial_memory_ids: List[str],
query_embedding: "np.ndarray",
max_depth: int = 2,
semantic_threshold: float = 0.5,
max_expanded: int = 20
) -> List[Tuple[str, float]]:
"""
从初始记忆集合出发,沿图结构扩展,并用语义相似度过滤
这个方法解决了纯向量搜索可能遗漏的"语义相关且图结构相关"的记忆。
Args:
initial_memory_ids: 初始记忆ID集合由向量搜索得到
query_embedding: 查询向量
max_depth: 最大扩展深度1-3推荐
semantic_threshold: 语义相似度阈值0.5推荐)
max_expanded: 最多扩展多少个记忆
Returns:
List[(memory_id, relevance_score)] 按相关度排序
"""
if not initial_memory_ids or query_embedding is None:
return []
try:
import numpy as np
# 记录已访问的记忆,避免重复
visited_memories = set(initial_memory_ids)
# 记录扩展的记忆及其分数
expanded_memories: Dict[str, float] = {}
# BFS扩展
current_level = initial_memory_ids
for depth in range(max_depth):
next_level = []
for memory_id in current_level:
memory = self.graph_store.get_memory_by_id(memory_id)
if not memory:
continue
# 遍历该记忆的所有节点
for node in memory.nodes:
if not node.has_embedding():
continue
# 获取邻居节点
try:
neighbors = list(self.graph_store.graph.neighbors(node.id))
except:
continue
for neighbor_id in neighbors:
# 获取邻居节点信息
neighbor_node_data = self.graph_store.graph.nodes.get(neighbor_id)
if not neighbor_node_data:
continue
# 获取邻居节点的向量(从向量存储)
neighbor_vector_data = await self.vector_store.get_node_by_id(neighbor_id)
if not neighbor_vector_data or neighbor_vector_data.get("embedding") is None:
continue
neighbor_embedding = neighbor_vector_data["embedding"]
# 计算与查询的语义相似度
semantic_sim = self._cosine_similarity(
query_embedding,
neighbor_embedding
)
# 获取边的权重
try:
edge_data = self.graph_store.graph.get_edge_data(node.id, neighbor_id)
edge_importance = edge_data.get("importance", 0.5) if edge_data else 0.5
except:
edge_importance = 0.5
# 综合评分:语义相似度(70%) + 图结构权重(20%) + 深度衰减(10%)
depth_decay = 1.0 / (depth + 1) # 深度越深,权重越低
relevance_score = (
semantic_sim * 0.7 +
edge_importance * 0.2 +
depth_decay * 0.1
)
# 只保留超过阈值的节点
if relevance_score < semantic_threshold:
continue
# 提取邻居节点所属的记忆
neighbor_memory_ids = neighbor_node_data.get("memory_ids", [])
if isinstance(neighbor_memory_ids, str):
import json
try:
neighbor_memory_ids = json.loads(neighbor_memory_ids)
except:
neighbor_memory_ids = [neighbor_memory_ids]
for neighbor_mem_id in neighbor_memory_ids:
if neighbor_mem_id in visited_memories:
continue
# 记录这个扩展记忆
if neighbor_mem_id not in expanded_memories:
expanded_memories[neighbor_mem_id] = relevance_score
visited_memories.add(neighbor_mem_id)
next_level.append(neighbor_mem_id)
else:
# 如果已存在,取最高分
expanded_memories[neighbor_mem_id] = max(
expanded_memories[neighbor_mem_id],
relevance_score
)
# 如果没有新节点或已达到数量限制,提前终止
if not next_level or len(expanded_memories) >= max_expanded:
break
current_level = next_level[:max_expanded] # 限制每层的扩展数量
# 排序并返回
sorted_results = sorted(
expanded_memories.items(),
key=lambda x: x[1],
reverse=True
)[:max_expanded]
logger.info(
f"图扩展完成: 初始{len(initial_memory_ids)}个 → "
f"扩展{len(sorted_results)}个新记忆 "
f"(深度={max_depth}, 阈值={semantic_threshold:.2f})"
)
return sorted_results
except Exception as e:
logger.error(f"语义图扩展失败: {e}", exc_info=True)
return []
def _cosine_similarity(self, vec1: "np.ndarray", vec2: "np.ndarray") -> float:
"""计算余弦相似度"""
try:
import numpy as np
# 确保是numpy数组
if not isinstance(vec1, np.ndarray):
vec1 = np.array(vec1)
if not isinstance(vec2, np.ndarray):
vec2 = np.array(vec2)
# 归一化
vec1_norm = np.linalg.norm(vec1)
vec2_norm = np.linalg.norm(vec2)
if vec1_norm == 0 or vec2_norm == 0:
return 0.0
# 余弦相似度
similarity = np.dot(vec1, vec2) / (vec1_norm * vec2_norm)
return float(similarity)
except Exception as e:
logger.warning(f"计算余弦相似度失败: {e}")
return 0.0
async def forget_memory(self, memory_id: str) -> bool:
"""
遗忘记忆(标记为已遗忘,不删除)
Args:
memory_id: 记忆 ID
Returns:
是否遗忘成功
"""
if not self._initialized:
await self.initialize()
try:
memory = self.graph_store.get_memory_by_id(memory_id)
if not memory:
logger.warning(f"记忆不存在: {memory_id}")
return False
memory.metadata["forgotten"] = True
memory.metadata["forgotten_at"] = datetime.now().isoformat()
# 保存更新
await self.persistence.save_graph_store(self.graph_store)
logger.info(f"记忆已遗忘: {memory_id}")
return True
except Exception as e:
logger.error(f"遗忘记忆失败: {e}", exc_info=True)
return False
async def auto_forget_memories(self, threshold: float = 0.1) -> int:
"""
自动遗忘低激活度的记忆
Args:
threshold: 激活度阈值
Returns:
遗忘的记忆数量
"""
if not self._initialized:
await self.initialize()
try:
forgotten_count = 0
all_memories = self.graph_store.get_all_memories()
for memory in all_memories:
# 跳过已遗忘的记忆
if memory.metadata.get("forgotten", False):
continue
# 跳过高重要性记忆
min_importance = getattr(self.config, 'forgetting_min_importance', 7.0)
if memory.importance >= min_importance:
continue
# 计算当前激活度
activation_info = memory.metadata.get("activation", {})
last_access = activation_info.get("last_access")
if last_access:
last_access_dt = datetime.fromisoformat(last_access)
days_passed = (datetime.now() - last_access_dt).days
# 长时间未访问的记忆,应用时间衰减
decay_factor = 0.9 ** days_passed
current_activation = activation_info.get("level", 0.0) * decay_factor
# 低于阈值则遗忘
if current_activation < threshold:
await self.forget_memory(memory.id)
forgotten_count += 1
logger.info(f"自动遗忘完成: 遗忘了 {forgotten_count} 条记忆")
return forgotten_count
except Exception as e:
logger.error(f"自动遗忘失败: {e}", exc_info=True)
return 0
# ==================== 统计与维护 ====================
def get_statistics(self) -> Dict[str, Any]:
"""
获取记忆系统统计信息
Returns:
统计信息字典
"""
if not self._initialized or not self.graph_store:
return {}
stats = self.graph_store.get_statistics()
# 添加激活度统计
all_memories = self.graph_store.get_all_memories()
activation_levels = []
forgotten_count = 0
for memory in all_memories:
if memory.metadata.get("forgotten", False):
forgotten_count += 1
else:
activation_info = memory.metadata.get("activation", {})
activation_levels.append(activation_info.get("level", 0.0))
if activation_levels:
stats["avg_activation"] = sum(activation_levels) / len(activation_levels)
stats["max_activation"] = max(activation_levels)
else:
stats["avg_activation"] = 0.0
stats["max_activation"] = 0.0
stats["forgotten_memories"] = forgotten_count
stats["active_memories"] = stats["total_memories"] - forgotten_count
return stats
async def consolidate_memories(
self,
similarity_threshold: float = 0.85,
time_window_hours: int = 24,
) -> Dict[str, Any]:
"""
整理记忆:合并相似记忆
Args:
similarity_threshold: 相似度阈值
time_window_hours: 时间窗口(小时)
Returns:
整理结果
"""
if not self._initialized:
await self.initialize()
try:
logger.info(f"开始记忆整理 (similarity_threshold={similarity_threshold}, time_window={time_window_hours}h)...")
result = {
"merged_count": 0,
"checked_count": 0,
}
# 获取最近创建的记忆
cutoff_time = datetime.now() - timedelta(hours=time_window_hours)
all_memories = self.graph_store.get_all_memories()
recent_memories = [
mem for mem in all_memories
if mem.created_at >= cutoff_time and not mem.metadata.get("forgotten", False)
]
if not recent_memories:
logger.info("没有需要整理的记忆")
return result
logger.info(f"找到 {len(recent_memories)} 条待整理记忆")
result["checked_count"] = len(recent_memories)
# 按记忆类型分组
memories_by_type: Dict[str, List[Memory]] = {}
for mem in recent_memories:
mem_type = mem.metadata.get("memory_type", "")
if mem_type not in memories_by_type:
memories_by_type[mem_type] = []
memories_by_type[mem_type].append(mem)
# 对每个类型的记忆进行相似度检测
for mem_type, memories in memories_by_type.items():
if len(memories) < 2:
continue
logger.debug(f"检查类型 '{mem_type}'{len(memories)} 条记忆")
# 使用向量相似度检测
for i in range(len(memories)):
for j in range(i + 1, len(memories)):
mem_i = memories[i]
mem_j = memories[j]
# 获取主题节点的向量
topic_i = next((n for n in mem_i.nodes if n.node_type == NodeType.TOPIC), None)
topic_j = next((n for n in mem_j.nodes if n.node_type == NodeType.TOPIC), None)
if not topic_i or not topic_j:
continue
if topic_i.embedding is None or topic_j.embedding is None:
continue
# 计算余弦相似度
import numpy as np
similarity = np.dot(topic_i.embedding, topic_j.embedding) / (
np.linalg.norm(topic_i.embedding) * np.linalg.norm(topic_j.embedding)
)
if similarity >= similarity_threshold:
# 合并记忆:保留重要性高的,删除另一个
if mem_i.importance >= mem_j.importance:
keep_mem, remove_mem = mem_i, mem_j
else:
keep_mem, remove_mem = mem_j, mem_i
logger.info(
f"合并相似记忆 (similarity={similarity:.3f}): "
f"保留 {keep_mem.id}, 删除 {remove_mem.id}"
)
# 增加保留记忆的重要性
keep_mem.importance = min(1.0, keep_mem.importance + 0.1)
keep_mem.activation = min(1.0, keep_mem.activation + 0.1)
# 删除相似记忆
await self.delete_memory(remove_mem.id)
result["merged_count"] += 1
logger.info(f"记忆整理完成: {result}")
return result
except Exception as e:
logger.error(f"记忆整理失败: {e}", exc_info=True)
return {"error": str(e), "merged_count": 0, "checked_count": 0}
async def auto_link_memories(
self,
time_window_hours: float = None,
max_candidates: int = None,
min_confidence: float = None,
) -> Dict[str, Any]:
"""
自动关联记忆
使用LLM分析记忆之间的关系自动建立关联边。
Args:
time_window_hours: 分析时间窗口(小时)
max_candidates: 每个记忆最多关联的候选数
min_confidence: 最低置信度阈值
Returns:
关联结果统计
"""
if not self._initialized:
await self.initialize()
# 使用配置值或参数覆盖
time_window_hours = time_window_hours if time_window_hours is not None else 24
max_candidates = max_candidates if max_candidates is not None else getattr(self.config, 'auto_link_max_candidates', 10)
min_confidence = min_confidence if min_confidence is not None else getattr(self.config, 'auto_link_min_confidence', 0.7)
try:
logger.info(f"开始自动关联记忆 (时间窗口={time_window_hours}h)...")
result = {
"checked_count": 0,
"linked_count": 0,
"relation_stats": {}, # 关系类型统计 {类型: 数量}
"relations": {}, # 详细关系 {source_id: [关系列表]}
}
# 1. 获取时间窗口内的记忆
time_threshold = datetime.now() - timedelta(hours=time_window_hours)
all_memories = self.graph_store.get_all_memories()
recent_memories = [
mem for mem in all_memories
if mem.created_at >= time_threshold
and not mem.metadata.get("forgotten", False)
]
if len(recent_memories) < 2:
logger.info("记忆数量不足,跳过自动关联")
return result
logger.info(f"找到 {len(recent_memories)} 条待关联记忆")
# 2. 为每个记忆寻找关联候选
for memory in recent_memories:
result["checked_count"] += 1
# 跳过已经有很多连接的记忆
existing_edges = len([
e for e in memory.edges
if e.edge_type == EdgeType.RELATION
])
if existing_edges >= 10:
continue
# 3. 使用向量搜索找候选记忆
candidates = await self._find_link_candidates(
memory,
exclude_ids={memory.id},
max_results=max_candidates
)
if not candidates:
continue
# 4. 使用LLM分析关系
relations = await self._analyze_memory_relations(
source_memory=memory,
candidate_memories=candidates,
min_confidence=min_confidence
)
# 5. 建立关联
for relation in relations:
try:
# 创建关联边
edge = MemoryEdge(
id=f"edge_{uuid.uuid4().hex[:12]}",
source_id=memory.subject_id,
target_id=relation["target_memory"].subject_id,
relation=relation["relation_type"],
edge_type=EdgeType.RELATION,
importance=relation["confidence"],
metadata={
"auto_linked": True,
"confidence": relation["confidence"],
"reasoning": relation["reasoning"],
"created_at": datetime.now().isoformat(),
}
)
# 添加到图
self.graph_store.graph.add_edge(
edge.source_id,
edge.target_id,
edge_id=edge.id,
relation=edge.relation,
edge_type=edge.edge_type.value,
importance=edge.importance,
metadata=edge.metadata,
)
# 同时添加到记忆的边列表
memory.edges.append(edge)
result["linked_count"] += 1
# 更新统计
result["relation_stats"][relation["relation_type"]] = \
result["relation_stats"].get(relation["relation_type"], 0) + 1
# 记录详细关系
if memory.id not in result["relations"]:
result["relations"][memory.id] = []
result["relations"][memory.id].append({
"target_id": relation["target_memory"].id,
"relation_type": relation["relation_type"],
"confidence": relation["confidence"],
"reasoning": relation["reasoning"],
})
logger.info(
f"建立关联: {memory.id[:8]} --[{relation['relation_type']}]--> "
f"{relation['target_memory'].id[:8]} "
f"(置信度={relation['confidence']:.2f})"
)
except Exception as e:
logger.warning(f"建立关联失败: {e}")
continue
# 保存更新后的图数据
if result["linked_count"] > 0:
await self.persistence.save_graph_store(self.graph_store)
logger.info(f"已保存 {result['linked_count']} 条自动关联边")
logger.info(f"自动关联完成: {result}")
return result
except Exception as e:
logger.error(f"自动关联失败: {e}", exc_info=True)
return {"error": str(e), "checked_count": 0, "linked_count": 0}
async def _find_link_candidates(
self,
memory: Memory,
exclude_ids: Set[str],
max_results: int = 5,
) -> List[Memory]:
"""
为记忆寻找关联候选
使用向量相似度 + 时间接近度找到潜在相关记忆
"""
try:
# 获取记忆的主题
topic_node = next(
(n for n in memory.nodes if n.node_type == NodeType.TOPIC),
None
)
if not topic_node or not topic_node.content:
return []
# 使用主题内容搜索相似记忆
candidates = await self.search_memories(
query=topic_node.content,
top_k=max_results * 2,
include_forgotten=False,
optimize_query=False,
)
# 过滤:排除自己和已关联的
existing_targets = {
e.target_id for e in memory.edges
if e.edge_type == EdgeType.RELATION
}
filtered = [
c for c in candidates
if c.id not in exclude_ids
and c.id not in existing_targets
]
return filtered[:max_results]
except Exception as e:
logger.warning(f"查找候选失败: {e}")
return []
async def _analyze_memory_relations(
self,
source_memory: Memory,
candidate_memories: List[Memory],
min_confidence: float = 0.7,
) -> List[Dict[str, Any]]:
"""
使用LLM分析记忆之间的关系
Args:
source_memory: 源记忆
candidate_memories: 候选记忆列表
min_confidence: 最低置信度
Returns:
关系列表,每项包含:
- target_memory: 目标记忆
- relation_type: 关系类型
- confidence: 置信度
- reasoning: 推理过程
"""
try:
from src.llm_models.utils_model import LLMRequest
from src.config.config import model_config
# 构建LLM请求
llm = LLMRequest(
model_set=model_config.model_task_config.utils_small,
request_type="memory.relation_analysis"
)
# 格式化记忆信息
source_desc = self._format_memory_for_llm(source_memory)
candidates_desc = "\n\n".join([
f"记忆{i+1}:\n{self._format_memory_for_llm(mem)}"
for i, mem in enumerate(candidate_memories)
])
# 构建提示词
prompt = f"""你是一个记忆关系分析专家。请分析源记忆与候选记忆之间是否存在有意义的关系。
**关系类型说明:**
- 导致: A的发生导致了B的发生因果关系
- 引用: A提到或涉及B引用关系
- 相似: A和B描述相似的内容相似关系
- 相反: A和B表达相反的观点对立关系
- 关联: A和B存在某种关联但不属于以上类型一般关联
**源记忆:**
{source_desc}
**候选记忆:**
{candidates_desc}
**任务要求:**
1. 对每个候选记忆,判断是否与源记忆存在关系
2. 如果存在关系,指定关系类型和置信度(0.0-1.0)
3. 简要说明判断理由
4. 只返回置信度 >= {min_confidence} 的关系
**输出格式JSON**
```json
[
{{
"candidate_id": 1,
"has_relation": true,
"relation_type": "导致",
"confidence": 0.85,
"reasoning": "记忆1是记忆源的结果"
}},
{{
"candidate_id": 2,
"has_relation": false,
"reasoning": "两者无明显关联"
}}
]
```
请分析并输出JSON结果"""
# 调用LLM
response, _ = await llm.generate_response_async(
prompt,
temperature=0.3,
max_tokens=1000,
)
# 解析响应
import json
import re
# 提取JSON
json_match = re.search(r'```json\s*(.*?)\s*```', response, re.DOTALL)
if json_match:
json_str = json_match.group(1)
else:
json_str = response.strip()
try:
analysis_results = json.loads(json_str)
except json.JSONDecodeError:
logger.warning(f"LLM返回格式错误尝试修复: {response[:200]}")
# 尝试简单修复
json_str = re.sub(r'[\r\n\t]', '', json_str)
analysis_results = json.loads(json_str)
# 转换为结果格式
relations = []
for result in analysis_results:
if not result.get("has_relation", False):
continue
confidence = result.get("confidence", 0.0)
if confidence < min_confidence:
continue
candidate_id = result.get("candidate_id", 0) - 1
if 0 <= candidate_id < len(candidate_memories):
relations.append({
"target_memory": candidate_memories[candidate_id],
"relation_type": result.get("relation_type", "关联"),
"confidence": confidence,
"reasoning": result.get("reasoning", ""),
})
logger.debug(f"LLM分析完成: 发现 {len(relations)} 个关系")
return relations
except Exception as e:
logger.error(f"LLM关系分析失败: {e}", exc_info=True)
return []
def _format_memory_for_llm(self, memory: Memory) -> str:
"""格式化记忆为LLM可读的文本"""
try:
# 获取关键节点
subject_node = next(
(n for n in memory.nodes if n.node_type == NodeType.SUBJECT),
None
)
topic_node = next(
(n for n in memory.nodes if n.node_type == NodeType.TOPIC),
None
)
object_node = next(
(n for n in memory.nodes if n.node_type == NodeType.OBJECT),
None
)
parts = []
parts.append(f"类型: {memory.memory_type.value}")
if subject_node:
parts.append(f"主体: {subject_node.content}")
if topic_node:
parts.append(f"主题: {topic_node.content}")
if object_node:
parts.append(f"对象: {object_node.content}")
parts.append(f"重要性: {memory.importance:.2f}")
parts.append(f"时间: {memory.created_at.strftime('%Y-%m-%d %H:%M')}")
return " | ".join(parts)
except Exception as e:
logger.warning(f"格式化记忆失败: {e}")
return f"记忆ID: {memory.id}"
async def maintenance(self) -> Dict[str, Any]:
"""
执行维护任务
包括:
- 记忆整理(合并相似记忆)
- 清理过期记忆
- 自动遗忘低激活度记忆
- 保存数据
Returns:
维护结果
"""
if not self._initialized:
await self.initialize()
try:
logger.info("开始执行记忆系统维护...")
result = {
"consolidated": 0,
"forgotten": 0,
"deleted": 0,
"saved": False,
}
# 1. 记忆整理(合并相似记忆)
if getattr(self.config, 'consolidation_enabled', False):
consolidate_result = await self.consolidate_memories(
similarity_threshold=getattr(self.config, 'consolidation_similarity_threshold', 0.9),
time_window_hours=getattr(self.config, 'consolidation_time_window_hours', 24.0)
)
result["consolidated"] = consolidate_result.get("merged_count", 0)
# 2. 自动关联记忆(发现和建立关系)
if getattr(self.config, 'auto_link_enabled', True):
link_result = await self.auto_link_memories()
result["linked"] = link_result.get("linked_count", 0)
# 3. 自动遗忘
if getattr(self.config, 'forgetting_enabled', True):
forgotten_count = await self.auto_forget_memories(
threshold=getattr(self.config, 'forgetting_activation_threshold', 0.1)
)
result["forgotten"] = forgotten_count
# 4. 清理非常旧的已遗忘记忆(可选)
# TODO: 实现清理逻辑
# 5. 保存数据
await self.persistence.save_graph_store(self.graph_store)
result["saved"] = True
self._last_maintenance = datetime.now()
logger.info(f"维护完成: {result}")
return result
except Exception as e:
logger.error(f"维护失败: {e}", exc_info=True)
return {"error": str(e)}
async def start_maintenance_scheduler(self) -> None:
"""
启动记忆维护调度任务
使用 unified_scheduler 定期执行维护任务:
- 记忆整合(合并相似记忆)
- 自动遗忘低激活度记忆
- 保存数据
默认间隔1小时
"""
try:
from src.schedule.unified_scheduler import TriggerType, unified_scheduler
# 如果已有调度任务,先移除
if self._maintenance_schedule_id:
await unified_scheduler.remove_schedule(self._maintenance_schedule_id)
logger.info("移除旧的维护调度任务")
# 创建新的调度任务
interval_seconds = self._maintenance_interval_hours * 3600
self._maintenance_schedule_id = await unified_scheduler.create_schedule(
callback=self.maintenance,
trigger_type=TriggerType.TIME,
trigger_config={
"delay_seconds": interval_seconds, # 首次延迟启动后1小时
"interval_seconds": interval_seconds, # 循环间隔
},
is_recurring=True,
task_name="memory_maintenance",
)
logger.info(
f"✅ 记忆维护调度任务已启动 "
f"(间隔={self._maintenance_interval_hours}小时, "
f"schedule_id={self._maintenance_schedule_id[:8]}...)"
)
except ImportError:
logger.warning("无法导入 unified_scheduler维护调度功能不可用")
except Exception as e:
logger.error(f"启动维护调度任务失败: {e}", exc_info=True)
async def stop_maintenance_scheduler(self) -> None:
"""
停止记忆维护调度任务
"""
if not self._maintenance_schedule_id:
return
try:
from src.schedule.unified_scheduler import unified_scheduler
success = await unified_scheduler.remove_schedule(self._maintenance_schedule_id)
if success:
logger.info(f"✅ 记忆维护调度任务已停止 (schedule_id={self._maintenance_schedule_id[:8]}...)")
else:
logger.warning(f"停止维护调度任务失败 (schedule_id={self._maintenance_schedule_id[:8]}...)")
self._maintenance_schedule_id = None
except ImportError:
logger.warning("无法导入 unified_scheduler")
except Exception as e:
logger.error(f"停止维护调度任务失败: {e}", exc_info=True)