style: 统一代码风格并采用现代化类型注解

对整个代码库进行了一次全面的代码风格清理和现代化改造,主要包括:

- 移除了所有文件中多余的行尾空格。
- 将类型提示更新为 PEP 585 和 PEP 604 引入的现代语法(例如,使用 `list` 代替 `List`,使用 `|` 代替 `Optional`)。
- 清理了多个模块中未被使用的导入语句。
- 移除了不含插值变量的冗余 f-string。
- 调整了部分 `__init__.py` 文件中的 `__all__` 导出顺序,以保持一致性。

这些改动旨在提升代码的可读性和可维护性,使其与现代 Python 最佳实践保持一致,但未修改任何核心逻辑。
This commit is contained in:
minecraft1024a
2025-11-12 12:49:40 +08:00
parent daf8ea7e6a
commit 0e1e9935b2
33 changed files with 227 additions and 229 deletions

View File

@@ -128,7 +128,7 @@ class MemoryBuilder:
# 6. 构建 Memory 对象
# 新记忆应该有较高的初始激活度
initial_activation = 0.75 # 新记忆初始激活度为 0.75
memory = Memory(
id=memory_id,
subject_id=subject_node.id,

View File

@@ -149,7 +149,7 @@ class MemoryManager:
# 读取阈值过滤配置
search_min_importance = self.config.search_min_importance
search_similarity_threshold = self.config.search_similarity_threshold
logger.info(
f"📊 配置检查: search_max_expand_depth={expand_depth}, "
f"search_expand_semantic_threshold={expand_semantic_threshold}, "
@@ -415,7 +415,7 @@ class MemoryManager:
# 使用配置的默认值
if top_k is None:
top_k = getattr(self.config, "search_top_k", 10)
# 准备搜索参数
params = {
"query": query,
@@ -948,7 +948,7 @@ class MemoryManager:
)
else:
logger.debug(f"记忆已删除: {memory_id} (删除了 {deleted_vectors} 个向量)")
# 4. 保存更新
await self.persistence.save_graph_store(self.graph_store)
return True
@@ -981,7 +981,7 @@ class MemoryManager:
try:
forgotten_count = 0
all_memories = self.graph_store.get_all_memories()
# 获取配置参数
min_importance = getattr(self.config, "forgetting_min_importance", 0.8)
decay_rate = getattr(self.config, "activation_decay_rate", 0.9)
@@ -1007,10 +1007,10 @@ class MemoryManager:
try:
last_access_dt = datetime.fromisoformat(last_access)
days_passed = (datetime.now() - last_access_dt).days
# 应用指数衰减activation = base * (decay_rate ^ days)
current_activation = base_activation * (decay_rate ** days_passed)
logger.debug(
f"记忆 {memory.id[:8]}: 基础激活度={base_activation:.3f}, "
f"经过{days_passed}天衰减后={current_activation:.3f}"
@@ -1032,20 +1032,20 @@ class MemoryManager:
# 批量遗忘记忆(不立即清理孤立节点)
if memories_to_forget:
logger.info(f"开始批量遗忘 {len(memories_to_forget)} 条记忆...")
for memory_id, activation in memories_to_forget:
# cleanup_orphans=False暂不清理孤立节点
success = await self.forget_memory(memory_id, cleanup_orphans=False)
if success:
forgotten_count += 1
# 统一清理孤立节点和边
logger.info("批量遗忘完成,开始统一清理孤立节点和边...")
orphan_nodes, orphan_edges = await self._cleanup_orphan_nodes_and_edges()
# 保存最终更新
await self.persistence.save_graph_store(self.graph_store)
logger.info(
f"✅ 自动遗忘完成: 遗忘了 {forgotten_count} 条记忆, "
f"清理了 {orphan_nodes} 个孤立节点, {orphan_edges} 条孤立边"
@@ -1076,31 +1076,31 @@ class MemoryManager:
# 1. 清理孤立节点
# graph_store.node_to_memories 记录了每个节点属于哪些记忆
nodes_to_remove = []
for node_id, memory_ids in list(self.graph_store.node_to_memories.items()):
# 如果节点不再属于任何记忆,标记为删除
if not memory_ids:
nodes_to_remove.append(node_id)
# 从图中删除孤立节点
for node_id in nodes_to_remove:
if self.graph_store.graph.has_node(node_id):
self.graph_store.graph.remove_node(node_id)
orphan_nodes_count += 1
# 从映射中删除
if node_id in self.graph_store.node_to_memories:
del self.graph_store.node_to_memories[node_id]
# 2. 清理孤立边(指向已删除节点的边)
edges_to_remove = []
for source, target, edge_id in self.graph_store.graph.edges(data='edge_id'):
for source, target, edge_id in self.graph_store.graph.edges(data="edge_id"):
# 检查边的源节点和目标节点是否还存在于node_to_memories中
if source not in self.graph_store.node_to_memories or \
target not in self.graph_store.node_to_memories:
edges_to_remove.append((source, target))
# 删除孤立边
for source, target in edges_to_remove:
try:
@@ -1108,12 +1108,12 @@ class MemoryManager:
orphan_edges_count += 1
except Exception as e:
logger.debug(f"删除边失败 {source} -> {target}: {e}")
if orphan_nodes_count > 0 or orphan_edges_count > 0:
logger.info(
f"清理完成: {orphan_nodes_count} 个孤立节点, {orphan_edges_count} 条孤立边"
)
return orphan_nodes_count, orphan_edges_count
except Exception as e:
@@ -1255,7 +1255,7 @@ class MemoryManager:
mem for mem in recent_memories
if mem.importance >= min_importance_for_consolidation
]
result["importance_filtered"] = len(recent_memories) - len(important_memories)
logger.info(
f"📊 步骤2: 重要性过滤 (阈值={min_importance_for_consolidation:.2f}): "
@@ -1379,26 +1379,26 @@ class MemoryManager:
# ===== 步骤4: 向量检索关联记忆 + LLM分析关系 =====
# 过滤掉已删除的记忆
remaining_memories = [m for m in important_memories if m.id not in deleted_ids]
if not remaining_memories:
logger.info("✅ 记忆整理完成: 去重后无剩余记忆")
return
logger.info(f"📍 步骤4: 开始关联分析 ({len(remaining_memories)} 条记忆)...")
# 分批处理记忆关联
llm_batch_size = getattr(self.config, "consolidation_llm_batch_size", 10)
max_candidates_per_memory = getattr(self.config, "consolidation_max_candidates", 5)
min_confidence = getattr(self.config, "consolidation_min_confidence", 0.6)
all_new_edges = [] # 收集所有新建的边
for batch_start in range(0, len(remaining_memories), llm_batch_size):
batch_end = min(batch_start + llm_batch_size, len(remaining_memories))
batch = remaining_memories[batch_start:batch_end]
logger.debug(f"处理批次 {batch_start//llm_batch_size + 1}/{(len(remaining_memories)-1)//llm_batch_size + 1}")
for memory in batch:
# 跳过已经有很多连接的记忆
existing_edges = len([
@@ -1451,14 +1451,14 @@ class MemoryManager:
except Exception as e:
logger.warning(f"创建关联边失败: {e}")
continue
# 每个批次后让出控制权
await asyncio.sleep(0.01)
# ===== 步骤5: 统一更新记忆数据 =====
if all_new_edges:
logger.info(f"📍 步骤5: 统一更新 {len(all_new_edges)} 条新关联边...")
for memory, edge, relation in all_new_edges:
try:
# 添加到图
@@ -2298,7 +2298,7 @@ class MemoryManager:
# 使用 asyncio.wait_for 来支持取消
await asyncio.wait_for(
asyncio.sleep(initial_delay),
timeout=float('inf') # 允许随时取消
timeout=float("inf") # 允许随时取消
)
# 检查是否仍然需要运行

View File

@@ -482,7 +482,7 @@ class GraphStore:
for node in memory.nodes:
if node.id in self.node_to_memories:
self.node_to_memories[node.id].discard(memory_id)
# 可选:立即清理孤立节点
if cleanup_orphans:
# 如果该节点不再属于任何记忆,从图中移除节点

View File

@@ -70,12 +70,12 @@ class MemoryTools:
self.max_expand_depth = max_expand_depth
self.expand_semantic_threshold = expand_semantic_threshold
self.search_top_k = search_top_k
# 保存权重配置
self.base_vector_weight = search_vector_weight
self.base_importance_weight = search_importance_weight
self.base_recency_weight = search_recency_weight
# 保存阈值过滤配置
self.search_min_importance = search_min_importance
self.search_similarity_threshold = search_similarity_threshold
@@ -511,14 +511,14 @@ class MemoryTools:
# 1. 根据策略选择检索方式
llm_prefer_types = [] # LLM识别的偏好节点类型
if use_multi_query:
# 多查询策略(返回节点列表 + 偏好类型)
similar_nodes, llm_prefer_types = await self._multi_query_search(query, top_k, context)
else:
# 传统单查询策略
similar_nodes = await self._single_query_search(query, top_k)
# 合并用户指定的偏好类型和LLM识别的偏好类型
all_prefer_types = list(set(prefer_node_types + llm_prefer_types))
if all_prefer_types:
@@ -546,7 +546,7 @@ class MemoryTools:
# 记录最高分数
if mem_id not in memory_scores or similarity > memory_scores[mem_id]:
memory_scores[mem_id] = similarity
# 🔥 详细日志:检查初始召回情况
logger.info(
f"初始向量搜索: 返回{len(similar_nodes)}个节点 → "
@@ -554,8 +554,8 @@ class MemoryTools:
)
if len(initial_memory_ids) == 0:
logger.warning(
f"⚠️ 向量搜索未找到任何记忆!"
f"可能原因1) 嵌入模型理解问题 2) 记忆节点未建立索引 3) 查询表达与存储内容差异过大"
"⚠️ 向量搜索未找到任何记忆!"
"可能原因1) 嵌入模型理解问题 2) 记忆节点未建立索引 3) 查询表达与存储内容差异过大"
)
# 输出相似节点的详细信息用于调试
if similar_nodes:
@@ -613,7 +613,7 @@ class MemoryTools:
key=lambda x: final_scores[x],
reverse=True
) # 🔥 不再提前截断,让所有候选参与详细评分
# 🔍 统计初始记忆的相似度分布(用于诊断)
if memory_scores:
similarities = list(memory_scores.values())
@@ -628,7 +628,7 @@ class MemoryTools:
# 5. 获取完整记忆并进行最终排序(优化后的动态权重系统)
memories_with_scores = []
filter_stats = {"importance": 0, "similarity": 0, "total_checked": 0} # 过滤统计
for memory_id in sorted_memory_ids: # 遍历所有候选
memory = self.graph_store.get_memory_by_id(memory_id)
if memory:
@@ -636,7 +636,7 @@ class MemoryTools:
# 基础分数
similarity_score = final_scores[memory_id]
importance_score = memory.importance
# 🆕 区分记忆来源(用于过滤)
is_initial_memory = memory_id in memory_scores # 是否来自初始向量搜索
true_similarity = memory_scores.get(memory_id, 0.0) if is_initial_memory else None
@@ -659,16 +659,16 @@ class MemoryTools:
activation_score = memory.activation
# 🆕 动态权重计算:使用配置的基础权重 + 根据记忆类型微调
memory_type = memory.memory_type.value if hasattr(memory.memory_type, 'value') else str(memory.memory_type)
memory_type = memory.memory_type.value if hasattr(memory.memory_type, "value") else str(memory.memory_type)
# 检测记忆的主要节点类型
node_types_count = {}
for node in memory.nodes:
nt = node.node_type.value if hasattr(node.node_type, 'value') else str(node.node_type)
nt = node.node_type.value if hasattr(node.node_type, "value") else str(node.node_type)
node_types_count[nt] = node_types_count.get(nt, 0) + 1
dominant_node_type = max(node_types_count.items(), key=lambda x: x[1])[0] if node_types_count else "unknown"
# 根据记忆类型和节点类型计算调整系数(在配置权重基础上微调)
if dominant_node_type in ["ATTRIBUTE", "REFERENCE"] or memory_type == "FACT":
# 事实性记忆:提升相似度权重,降低时效性权重
@@ -698,41 +698,41 @@ class MemoryTools:
"importance": 1.0,
"recency": 1.0,
}
# 应用调整后的权重(基于配置的基础权重)
weights = {
"similarity": self.base_vector_weight * type_adjustments["similarity"],
"importance": self.base_importance_weight * type_adjustments["importance"],
"recency": self.base_recency_weight * type_adjustments["recency"],
}
# 归一化权重确保总和为1.0
total_weight = sum(weights.values())
if total_weight > 0:
weights = {k: v / total_weight for k, v in weights.items()}
# 综合分数计算(🔥 移除激活度影响)
final_score = (
similarity_score * weights["similarity"] +
importance_score * weights["importance"] +
recency_score * weights["recency"]
)
# 🆕 阈值过滤策略:
# 1. 重要性过滤:应用于所有记忆(过滤极低质量)
if memory.importance < self.search_min_importance:
filter_stats["importance"] += 1
logger.debug(f"❌ 过滤 {memory.id[:8]}: 重要性 {memory.importance:.2f} < 阈值 {self.search_min_importance}")
continue
# 2. 相似度过滤:不再对初始向量搜索结果过滤(信任向量搜索的排序)
# 理由:向量搜索已经按相似度排序,返回的都是最相关结果
# 如果再用阈值过滤,会导致"最相关的也不够相关"的矛盾
#
#
# 注意:如果未来需要对扩展记忆过滤,可以在这里添加逻辑
# if not is_initial_memory and some_score < threshold:
# continue
# 记录通过过滤的记忆(用于调试)
if is_initial_memory:
logger.debug(
@@ -744,11 +744,11 @@ class MemoryTools:
f"✅ 保留 {memory.id[:8]} [扩展]: 重要性={memory.importance:.2f}, "
f"综合分数={final_score:.4f}"
)
# 🆕 节点类型加权对REFERENCE/ATTRIBUTE节点额外加分促进事实性信息召回
if "REFERENCE" in node_types_count or "ATTRIBUTE" in node_types_count:
final_score *= 1.1 # 10% 加成
# 🆕 用户指定的优先节点类型额外加权
if prefer_node_types:
for prefer_type in prefer_node_types:
@@ -756,7 +756,7 @@ class MemoryTools:
final_score *= 1.15 # 15% 额外加成
logger.debug(f"记忆 {memory.id[:8]} 包含优先节点类型 {prefer_type},加权后分数: {final_score:.4f}")
break
memories_with_scores.append((memory, final_score, dominant_node_type))
# 按综合分数排序
@@ -766,7 +766,7 @@ class MemoryTools:
# 统计过滤情况
total_candidates = len(all_memory_ids)
filtered_count = total_candidates - len(memories_with_scores)
# 6. 格式化结果(包含调试信息)
results = []
for memory, score, node_type in memories_with_scores[:top_k]:
@@ -787,7 +787,7 @@ class MemoryTools:
f"过滤{filtered_count}个 (重要性过滤) → "
f"最终返回{len(results)}条记忆"
)
# 如果过滤率过高,发出警告
if total_candidates > 0:
filter_rate = filtered_count / total_candidates
@@ -1000,20 +1000,21 @@ class MemoryTools:
response, _ = await llm.generate_response_async(prompt, temperature=0.3, max_tokens=300)
import re
import orjson
# 清理Markdown代码块
response = re.sub(r"```json\s*", "", response)
response = re.sub(r"```\s*$", "", response).strip()
# 解析JSON
data = orjson.loads(response)
# 提取查询列表
queries = data.get("queries", [])
result_queries = [(item.get("text", "").strip(), float(item.get("weight", 0.5)))
for item in queries if item.get("text", "").strip()]
# 提取偏好节点类型
prefer_node_types = data.get("prefer_node_types", [])
# 确保类型正确且有效
@@ -1062,7 +1063,7 @@ class MemoryTools:
limit=top_k * 5, # 🔥 从2倍提升到5倍提高初始召回率
min_similarity=0.0, # 不在这里过滤,交给后续评分
)
logger.debug(f"单查询向量搜索: 查询='{query}', 返回节点数={len(similar_nodes)}")
if similar_nodes:
logger.debug(f"Top 3相似度: {[f'{sim:.3f}' for _, sim, _ in similar_nodes[:3]]}")

View File

@@ -62,7 +62,7 @@ async def expand_memories_with_semantic_filter(
try:
import time
start_time = time.time()
# 记录已访问的记忆,避免重复
visited_memories = set(initial_memory_ids)
# 记录扩展的记忆及其分数
@@ -87,17 +87,17 @@ async def expand_memories_with_semantic_filter(
# 获取该记忆的邻居记忆(通过边关系)
neighbor_memory_ids = set()
# 🆕 遍历记忆的所有边,收集邻居记忆(带边类型权重)
edge_weights = {} # 记录通过不同边类型到达的记忆的权重
for edge in memory.edges:
# 获取边的目标节点
target_node_id = edge.target_id
source_node_id = edge.source_id
# 🆕 根据边类型设置权重优先扩展REFERENCE、ATTRIBUTE相关的边
edge_type_str = edge.edge_type.value if hasattr(edge.edge_type, 'value') else str(edge.edge_type)
edge_type_str = edge.edge_type.value if hasattr(edge.edge_type, "value") else str(edge.edge_type)
if edge_type_str == "REFERENCE":
edge_weight = 1.3 # REFERENCE边权重最高引用关系
elif edge_type_str in ["ATTRIBUTE", "HAS_PROPERTY"]:
@@ -108,18 +108,18 @@ async def expand_memories_with_semantic_filter(
edge_weight = 0.9 # 一般关系适中降权
else:
edge_weight = 1.0 # 默认权重
# 通过节点找到其他记忆
for node_id in [target_node_id, source_node_id]:
if node_id in graph_store.node_to_memories:
for neighbor_id in graph_store.node_to_memories[node_id]:
if neighbor_id not in edge_weights or edge_weights[neighbor_id] < edge_weight:
edge_weights[neighbor_id] = edge_weight
# 将权重高的邻居记忆加入候选
for neighbor_id, edge_weight in edge_weights.items():
neighbor_memory_ids.add((neighbor_id, edge_weight))
# 过滤掉已访问的和自己
filtered_neighbors = []
for neighbor_id, edge_weight in neighbor_memory_ids:
@@ -129,7 +129,7 @@ async def expand_memories_with_semantic_filter(
# 批量评估邻居记忆
for neighbor_mem_id, edge_weight in filtered_neighbors:
candidates_checked += 1
neighbor_memory = graph_store.get_memory_by_id(neighbor_mem_id)
if not neighbor_memory:
continue
@@ -139,7 +139,7 @@ async def expand_memories_with_semantic_filter(
(n for n in neighbor_memory.nodes if n.has_embedding()),
None
)
if not topic_node or topic_node.embedding is None:
continue
@@ -179,11 +179,11 @@ async def expand_memories_with_semantic_filter(
if len(expanded_memories) >= max_expanded:
logger.debug(f"⏹️ 提前停止:已达到最大扩展数量 {max_expanded}")
break
# 早停检查
if len(expanded_memories) >= max_expanded:
break
# 记录本层统计
depth_stats.append({
"depth": depth + 1,
@@ -199,20 +199,20 @@ async def expand_memories_with_semantic_filter(
# 限制下一层的记忆数量,避免爆炸性增长
current_level_memories = next_level_memories[:max_expanded]
# 每层让出控制权
await asyncio.sleep(0.001)
# 排序并返回
sorted_results = sorted(expanded_memories.items(), key=lambda x: x[1], reverse=True)[:max_expanded]
elapsed = time.time() - start_time
logger.info(
f"✅ 图扩展完成: 初始{len(initial_memory_ids)}个 → "
f"扩展{len(sorted_results)}个新记忆 "
f"(深度={max_depth}, 阈值={semantic_threshold:.2f}, 耗时={elapsed:.3f}s)"
)
# 输出每层统计
for stat in depth_stats:
logger.debug(