Files
Mofox-Core/src/memory_graph/models.py
2025-11-19 23:35:35 +08:00

300 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
记忆图系统核心数据模型
定义节点、边、记忆等核心数据结构
"""
from __future__ import annotations
import uuid
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Any
import numpy as np
class NodeType(Enum):
"""节点类型枚举"""
SUBJECT = "主体" # 记忆的主语(我、小明、老师)
TOPIC = "主题" # 动作或状态(吃饭、情绪、学习)
OBJECT = "客体" # 宾语(白米饭、学校、书)
ATTRIBUTE = "属性" # 延伸属性(时间、地点、原因)
VALUE = "" # 属性的具体值2025-11-05、不开心
class MemoryType(Enum):
"""记忆类型枚举"""
EVENT = "事件" # 有时间点的动作
FACT = "事实" # 相对稳定的状态
RELATION = "关系" # 人际关系
OPINION = "观点" # 主观评价
class EdgeType(Enum):
"""边类型枚举"""
MEMORY_TYPE = "记忆类型" # 主体 → 主题
CORE_RELATION = "核心关系" # 主题 → 客体(是/做/有)
ATTRIBUTE = "属性关系" # 任意节点 → 属性
CAUSALITY = "因果关系" # 记忆 → 记忆
REFERENCE = "引用关系" # 记忆 → 记忆(转述)
RELATION = "关联关系" # 记忆 → 记忆(自动关联发现的关系)
class MemoryStatus(Enum):
"""记忆状态枚举"""
STAGED = "staged" # 临时状态,未整理
CONSOLIDATED = "consolidated" # 已整理
ARCHIVED = "archived" # 已归档(低价值,很少访问)
@dataclass
class MemoryNode:
"""记忆节点"""
id: str # 节点唯一ID
content: str # 节点内容(如:"我"、"吃饭"、"白米饭"
node_type: NodeType # 节点类型
embedding: np.ndarray | None = None # 语义向量(仅主题/客体需要)
metadata: dict[str, Any] = field(default_factory=dict) # 扩展元数据
created_at: datetime = field(default_factory=datetime.now)
def __post_init__(self):
"""后初始化处理"""
if not self.id:
self.id = str(uuid.uuid4())
def to_dict(self) -> dict[str, Any]:
"""转换为字典(用于序列化)"""
return {
"id": self.id,
"content": self.content,
"node_type": self.node_type.value,
"embedding": self.embedding.tolist() if self.embedding is not None else None,
"metadata": self.metadata,
"created_at": self.created_at.isoformat(),
}
@classmethod
def from_dict(cls, data: dict[str, Any]) -> MemoryNode:
"""从字典创建节点"""
embedding = None
if data.get("embedding") is not None:
embedding = np.array(data["embedding"])
return cls(
id=data["id"],
content=data["content"],
node_type=NodeType(data["node_type"]),
embedding=embedding,
metadata=data.get("metadata", {}),
created_at=datetime.fromisoformat(data["created_at"]),
)
def has_embedding(self) -> bool:
"""是否有语义向量"""
return self.embedding is not None
def __str__(self) -> str:
return f"Node({self.node_type.value}: {self.content})"
@dataclass
class MemoryEdge:
"""记忆边(节点之间的关系)"""
id: str # 边唯一ID
source_id: str # 源节点ID
target_id: str # 目标节点ID或目标记忆ID
relation: str # 关系名称(如:"是"、"做"、"时间"、"因为"
edge_type: EdgeType # 边类型
importance: float = 0.5 # 重要性 [0-1]
metadata: dict[str, Any] = field(default_factory=dict) # 扩展元数据
created_at: datetime = field(default_factory=datetime.now)
def __post_init__(self):
"""后初始化处理"""
if not self.id:
self.id = str(uuid.uuid4())
# 确保重要性在有效范围内
self.importance = max(0.0, min(1.0, self.importance))
def to_dict(self) -> dict[str, Any]:
"""转换为字典(用于序列化)"""
return {
"id": self.id,
"source_id": self.source_id,
"target_id": self.target_id,
"relation": self.relation,
"edge_type": self.edge_type.value,
"importance": self.importance,
"metadata": self.metadata,
"created_at": self.created_at.isoformat(),
}
@classmethod
def from_dict(cls, data: dict[str, Any]) -> MemoryEdge:
"""从字典创建边"""
return cls(
id=data["id"],
source_id=data["source_id"],
target_id=data["target_id"],
relation=data["relation"],
edge_type=EdgeType(data["edge_type"]),
importance=data.get("importance", 0.5),
metadata=data.get("metadata", {}),
created_at=datetime.fromisoformat(data["created_at"]),
)
def __str__(self) -> str:
return f"Edge({self.source_id} --{self.relation}--> {self.target_id})"
@dataclass
class Memory:
"""完整记忆(由节点和边组成的子图)"""
id: str # 记忆唯一ID
subject_id: str # 主体节点ID
memory_type: MemoryType # 记忆类型
nodes: list[MemoryNode] # 该记忆包含的所有节点
edges: list[MemoryEdge] # 该记忆包含的所有边
importance: float = 0.5 # 整体重要性 [0-1]
activation: float = 0.0 # 激活度 [0-1],用于记忆整合和遗忘
status: MemoryStatus = MemoryStatus.STAGED # 记忆状态
created_at: datetime = field(default_factory=datetime.now)
last_accessed: datetime = field(default_factory=datetime.now) # 最后访问时间
access_count: int = 0 # 访问次数
decay_factor: float = 1.0 # 衰减因子(随时间变化)
metadata: dict[str, Any] = field(default_factory=dict) # 扩展元数据
def __post_init__(self):
"""后初始化处理"""
if not self.id:
self.id = str(uuid.uuid4())
# 确保重要性和激活度在有效范围内
self.importance = max(0.0, min(1.0, self.importance))
self.activation = max(0.0, min(1.0, self.activation))
def to_dict(self) -> dict[str, Any]:
"""转换为字典(用于序列化)"""
return {
"id": self.id,
"subject_id": self.subject_id,
"memory_type": self.memory_type.value,
"nodes": [node.to_dict() for node in self.nodes],
"edges": [edge.to_dict() for edge in self.edges],
"importance": self.importance,
"activation": self.activation,
"status": self.status.value,
"created_at": self.created_at.isoformat(),
"last_accessed": self.last_accessed.isoformat(),
"access_count": self.access_count,
"decay_factor": self.decay_factor,
"metadata": self.metadata,
}
@classmethod
def from_dict(cls, data: dict[str, Any]) -> Memory:
"""从字典创建记忆"""
return cls(
id=data["id"],
subject_id=data["subject_id"],
memory_type=MemoryType(data["memory_type"]),
nodes=[MemoryNode.from_dict(n) for n in data["nodes"]],
edges=[MemoryEdge.from_dict(e) for e in data["edges"]],
importance=data.get("importance", 0.5),
activation=data.get("activation", 0.0),
status=MemoryStatus(data.get("status", "staged")),
created_at=datetime.fromisoformat(data["created_at"]),
last_accessed=datetime.fromisoformat(data.get("last_accessed", data["created_at"])),
access_count=data.get("access_count", 0),
decay_factor=data.get("decay_factor", 1.0),
metadata=data.get("metadata", {}),
)
def update_access(self) -> None:
"""更新访问记录"""
self.last_accessed = datetime.now()
self.access_count += 1
def get_node_by_id(self, node_id: str) -> MemoryNode | None:
"""根据ID获取节点"""
for node in self.nodes:
if node.id == node_id:
return node
return None
def get_subject_node(self) -> MemoryNode | None:
"""获取主体节点"""
return self.get_node_by_id(self.subject_id)
def to_text(self) -> str:
"""转换为文本描述用于显示和LLM处理"""
subject_node = self.get_subject_node()
if not subject_node:
return f"[记忆 {self.id[:8]}]"
# 简单的文本生成逻辑
parts = [f"{subject_node.content}"]
# 查找主题节点(通过记忆类型边连接)
topic_node = None
for edge in self.edges:
if edge.edge_type == EdgeType.MEMORY_TYPE and edge.source_id == self.subject_id:
topic_node = self.get_node_by_id(edge.target_id)
break
if topic_node:
parts.append(topic_node.content)
# 查找客体节点(通过核心关系边连接)
for edge in self.edges:
if edge.edge_type == EdgeType.CORE_RELATION and edge.source_id == topic_node.id:
obj_node = self.get_node_by_id(edge.target_id)
if obj_node:
parts.append(f"{edge.relation} {obj_node.content}")
break
return " ".join(parts)
def __str__(self) -> str:
return f"Memory({self.memory_type.value}: {self.to_text()})"
@dataclass
class StagedMemory:
"""临时记忆(未整理状态)"""
memory: Memory # 原始记忆对象
status: MemoryStatus = MemoryStatus.STAGED # 状态
created_at: datetime = field(default_factory=datetime.now)
consolidated_at: datetime | None = None # 整理时间
merge_history: list[str] = field(default_factory=list) # 被合并的节点ID列表
def to_dict(self) -> dict[str, Any]:
"""转换为字典"""
return {
"memory": self.memory.to_dict(),
"status": self.status.value,
"created_at": self.created_at.isoformat(),
"consolidated_at": self.consolidated_at.isoformat() if self.consolidated_at else None,
"merge_history": self.merge_history,
}
@classmethod
def from_dict(cls, data: dict[str, Any]) -> StagedMemory:
"""从字典创建临时记忆"""
return cls(
memory=Memory.from_dict(data["memory"]),
status=MemoryStatus(data.get("status", "staged")),
created_at=datetime.fromisoformat(data["created_at"]),
consolidated_at=datetime.fromisoformat(data["consolidated_at"]) if data.get("consolidated_at") else None,
merge_history=data.get("merge_history", []),
)