style: 统一代码风格并采用现代化类型注解

对整个代码库进行了一次全面的代码风格清理和现代化改造，主要包括： - 移除了所有文件中多余的行尾空格。 - 将类型提示更新为 PEP 585 和 PEP 604 引入的现代语法（例如，使用 `list` 代替 `List`，使用 `|` 代替 `Optional`）。 - 清理了多个模块中未被使用的导入语句。 - 移除了不含插值变量的冗余 f-string。 - 调整了部分 `__init__.py` 文件中的 `__all__` 导出顺序，以保持一致性。这些改动旨在提升代码的可读性和可维护性，使其与现代 Python 最佳实践保持一致，但未修改任何核心逻辑。
2025-11-12 12:49:40 +08:00
parent daf8ea7e6a
commit 0e1e9935b2
33 changed files with 227 additions and 229 deletions
--- a/src/memory_graph/core/builder.py
+++ b/src/memory_graph/core/builder.py
@@ -128,7 +128,7 @@ class MemoryBuilder:
            # 6. 构建 Memory 对象
            # 新记忆应该有较高的初始激活度
            initial_activation = 0.75  # 新记忆初始激活度为 0.75
-            
+
            memory = Memory(
                id=memory_id,
                subject_id=subject_node.id,
--- a/src/memory_graph/manager.py
+++ b/src/memory_graph/manager.py
@@ -149,7 +149,7 @@ class MemoryManager:
            # 读取阈值过滤配置
            search_min_importance = self.config.search_min_importance
            search_similarity_threshold = self.config.search_similarity_threshold
-            
+
            logger.info(
                f"📊 配置检查: search_max_expand_depth={expand_depth}, "
                f"search_expand_semantic_threshold={expand_semantic_threshold}, "
@@ -415,7 +415,7 @@ class MemoryManager:
            # 使用配置的默认值
            if top_k is None:
                top_k = getattr(self.config, "search_top_k", 10)
-            
+
            # 准备搜索参数
            params = {
                "query": query,
@@ -948,7 +948,7 @@ class MemoryManager:
                    )
                else:
                    logger.debug(f"记忆已删除: {memory_id} (删除了 {deleted_vectors} 个向量)")
-                
+
                # 4. 保存更新
                await self.persistence.save_graph_store(self.graph_store)
                return True
@@ -981,7 +981,7 @@ class MemoryManager:
        try:
            forgotten_count = 0
            all_memories = self.graph_store.get_all_memories()
-            
+
            # 获取配置参数
            min_importance = getattr(self.config, "forgetting_min_importance", 0.8)
            decay_rate = getattr(self.config, "activation_decay_rate", 0.9)
@@ -1007,10 +1007,10 @@ class MemoryManager:
                    try:
                        last_access_dt = datetime.fromisoformat(last_access)
                        days_passed = (datetime.now() - last_access_dt).days
-                        
+
                        # 应用指数衰减：activation = base * (decay_rate ^ days)
                        current_activation = base_activation * (decay_rate ** days_passed)
-                        
+
                        logger.debug(
                            f"记忆 {memory.id[:8]}: 基础激活度={base_activation:.3f}, "
                            f"经过{days_passed}天衰减后={current_activation:.3f}"
@@ -1032,20 +1032,20 @@ class MemoryManager:
            # 批量遗忘记忆（不立即清理孤立节点）
            if memories_to_forget:
                logger.info(f"开始批量遗忘 {len(memories_to_forget)} 条记忆...")
-                
+
                for memory_id, activation in memories_to_forget:
                    # cleanup_orphans=False：暂不清理孤立节点
                    success = await self.forget_memory(memory_id, cleanup_orphans=False)
                    if success:
                        forgotten_count += 1
-                
+
                # 统一清理孤立节点和边
                logger.info("批量遗忘完成，开始统一清理孤立节点和边...")
                orphan_nodes, orphan_edges = await self._cleanup_orphan_nodes_and_edges()
-                
+
                # 保存最终更新
                await self.persistence.save_graph_store(self.graph_store)
-                
+
                logger.info(
                    f"✅ 自动遗忘完成: 遗忘了 {forgotten_count} 条记忆, "
                    f"清理了 {orphan_nodes} 个孤立节点, {orphan_edges} 条孤立边"
@@ -1076,31 +1076,31 @@ class MemoryManager:
            # 1. 清理孤立节点
            # graph_store.node_to_memories 记录了每个节点属于哪些记忆
            nodes_to_remove = []
-            
+
            for node_id, memory_ids in list(self.graph_store.node_to_memories.items()):
                # 如果节点不再属于任何记忆，标记为删除
                if not memory_ids:
                    nodes_to_remove.append(node_id)
-            
+
            # 从图中删除孤立节点
            for node_id in nodes_to_remove:
                if self.graph_store.graph.has_node(node_id):
                    self.graph_store.graph.remove_node(node_id)
                    orphan_nodes_count += 1
-                
+
                # 从映射中删除
                if node_id in self.graph_store.node_to_memories:
                    del self.graph_store.node_to_memories[node_id]
-            
+
            # 2. 清理孤立边（指向已删除节点的边）
            edges_to_remove = []
-            
-            for source, target, edge_id in self.graph_store.graph.edges(data='edge_id'):
+
+            for source, target, edge_id in self.graph_store.graph.edges(data="edge_id"):
                # 检查边的源节点和目标节点是否还存在于node_to_memories中
                if source not in self.graph_store.node_to_memories or \
                   target not in self.graph_store.node_to_memories:
                    edges_to_remove.append((source, target))
-            
+
            # 删除孤立边
            for source, target in edges_to_remove:
                try:
@@ -1108,12 +1108,12 @@ class MemoryManager:
                    orphan_edges_count += 1
                except Exception as e:
                    logger.debug(f"删除边失败 {source} -> {target}: {e}")
-            
+
            if orphan_nodes_count > 0 or orphan_edges_count > 0:
                logger.info(
                    f"清理完成: {orphan_nodes_count} 个孤立节点, {orphan_edges_count} 条孤立边"
                )
-            
+
            return orphan_nodes_count, orphan_edges_count

        except Exception as e:
@@ -1255,7 +1255,7 @@ class MemoryManager:
                mem for mem in recent_memories
                if mem.importance >= min_importance_for_consolidation
            ]
-            
+
            result["importance_filtered"] = len(recent_memories) - len(important_memories)
            logger.info(
                f"📊 步骤2: 重要性过滤 (阈值={min_importance_for_consolidation:.2f}): "
@@ -1379,26 +1379,26 @@ class MemoryManager:
            # ===== 步骤4: 向量检索关联记忆 + LLM分析关系 =====
            # 过滤掉已删除的记忆
            remaining_memories = [m for m in important_memories if m.id not in deleted_ids]
-            
+
            if not remaining_memories:
                logger.info("✅ 记忆整理完成: 去重后无剩余记忆")
                return

            logger.info(f"📍 步骤4: 开始关联分析 ({len(remaining_memories)} 条记忆)...")
-            
+
            # 分批处理记忆关联
            llm_batch_size = getattr(self.config, "consolidation_llm_batch_size", 10)
            max_candidates_per_memory = getattr(self.config, "consolidation_max_candidates", 5)
            min_confidence = getattr(self.config, "consolidation_min_confidence", 0.6)
-            
+
            all_new_edges = []  # 收集所有新建的边
-            
+
            for batch_start in range(0, len(remaining_memories), llm_batch_size):
                batch_end = min(batch_start + llm_batch_size, len(remaining_memories))
                batch = remaining_memories[batch_start:batch_end]
-                
+
                logger.debug(f"处理批次 {batch_start//llm_batch_size + 1}/{(len(remaining_memories)-1)//llm_batch_size + 1}")
-                
+
                for memory in batch:
                    # 跳过已经有很多连接的记忆
                    existing_edges = len([
@@ -1451,14 +1451,14 @@ class MemoryManager:
                        except Exception as e:
                            logger.warning(f"创建关联边失败: {e}")
                            continue
-                
+
                # 每个批次后让出控制权
                await asyncio.sleep(0.01)

            # ===== 步骤5: 统一更新记忆数据 =====
            if all_new_edges:
                logger.info(f"📍 步骤5: 统一更新 {len(all_new_edges)} 条新关联边...")
-                
+
                for memory, edge, relation in all_new_edges:
                    try:
                        # 添加到图
@@ -2298,7 +2298,7 @@ class MemoryManager:
                    # 使用 asyncio.wait_for 来支持取消
                    await asyncio.wait_for(
                        asyncio.sleep(initial_delay),
-                        timeout=float('inf')  # 允许随时取消
+                        timeout=float("inf")  # 允许随时取消
                    )

                    # 检查是否仍然需要运行
--- a/src/memory_graph/storage/graph_store.py
+++ b/src/memory_graph/storage/graph_store.py
@@ -482,7 +482,7 @@ class GraphStore:
            for node in memory.nodes:
                if node.id in self.node_to_memories:
                    self.node_to_memories[node.id].discard(memory_id)
-                    
+
                    # 可选：立即清理孤立节点
                    if cleanup_orphans:
                        # 如果该节点不再属于任何记忆，从图中移除节点
--- a/src/memory_graph/tools/memory_tools.py
+++ b/src/memory_graph/tools/memory_tools.py
@@ -70,12 +70,12 @@ class MemoryTools:
        self.max_expand_depth = max_expand_depth
        self.expand_semantic_threshold = expand_semantic_threshold
        self.search_top_k = search_top_k
-        
+
        # 保存权重配置
        self.base_vector_weight = search_vector_weight
        self.base_importance_weight = search_importance_weight
        self.base_recency_weight = search_recency_weight
-        
+
        # 保存阈值过滤配置
        self.search_min_importance = search_min_importance
        self.search_similarity_threshold = search_similarity_threshold
@@ -511,14 +511,14 @@ class MemoryTools:

            # 1. 根据策略选择检索方式
            llm_prefer_types = []  # LLM识别的偏好节点类型
-            
+
            if use_multi_query:
                # 多查询策略（返回节点列表 + 偏好类型）
                similar_nodes, llm_prefer_types = await self._multi_query_search(query, top_k, context)
            else:
                # 传统单查询策略
                similar_nodes = await self._single_query_search(query, top_k)
-            
+
            # 合并用户指定的偏好类型和LLM识别的偏好类型
            all_prefer_types = list(set(prefer_node_types + llm_prefer_types))
            if all_prefer_types:
@@ -546,7 +546,7 @@ class MemoryTools:
                            # 记录最高分数
                            if mem_id not in memory_scores or similarity > memory_scores[mem_id]:
                                memory_scores[mem_id] = similarity
-            
+
            # 🔥 详细日志：检查初始召回情况
            logger.info(
                f"初始向量搜索: 返回{len(similar_nodes)}个节点 → "
@@ -554,8 +554,8 @@ class MemoryTools:
            )
            if len(initial_memory_ids) == 0:
                logger.warning(
-                    f"⚠️ 向量搜索未找到任何记忆！"
-                    f"可能原因：1) 嵌入模型理解问题 2) 记忆节点未建立索引 3) 查询表达与存储内容差异过大"
+                    "⚠️ 向量搜索未找到任何记忆！"
+                    "可能原因：1) 嵌入模型理解问题 2) 记忆节点未建立索引 3) 查询表达与存储内容差异过大"
                )
                # 输出相似节点的详细信息用于调试
                if similar_nodes:
@@ -613,7 +613,7 @@ class MemoryTools:
                key=lambda x: final_scores[x],
                reverse=True
            )  # 🔥 不再提前截断，让所有候选参与详细评分
-            
+
            # 🔍 统计初始记忆的相似度分布（用于诊断）
            if memory_scores:
                similarities = list(memory_scores.values())
@@ -628,7 +628,7 @@ class MemoryTools:
            # 5. 获取完整记忆并进行最终排序（优化后的动态权重系统）
            memories_with_scores = []
            filter_stats = {"importance": 0, "similarity": 0, "total_checked": 0}  # 过滤统计
-            
+
            for memory_id in sorted_memory_ids:  # 遍历所有候选
                memory = self.graph_store.get_memory_by_id(memory_id)
                if memory:
@@ -636,7 +636,7 @@ class MemoryTools:
                    # 基础分数
                    similarity_score = final_scores[memory_id]
                    importance_score = memory.importance
-                    
+
                    # 🆕 区分记忆来源（用于过滤）
                    is_initial_memory = memory_id in memory_scores  # 是否来自初始向量搜索
                    true_similarity = memory_scores.get(memory_id, 0.0) if is_initial_memory else None
@@ -659,16 +659,16 @@ class MemoryTools:
                        activation_score = memory.activation

                    # 🆕 动态权重计算：使用配置的基础权重 + 根据记忆类型微调
-                    memory_type = memory.memory_type.value if hasattr(memory.memory_type, 'value') else str(memory.memory_type)
-                    
+                    memory_type = memory.memory_type.value if hasattr(memory.memory_type, "value") else str(memory.memory_type)
+
                    # 检测记忆的主要节点类型
                    node_types_count = {}
                    for node in memory.nodes:
-                        nt = node.node_type.value if hasattr(node.node_type, 'value') else str(node.node_type)
+                        nt = node.node_type.value if hasattr(node.node_type, "value") else str(node.node_type)
                        node_types_count[nt] = node_types_count.get(nt, 0) + 1
-                    
+
                    dominant_node_type = max(node_types_count.items(), key=lambda x: x[1])[0] if node_types_count else "unknown"
-                    
+
                    # 根据记忆类型和节点类型计算调整系数（在配置权重基础上微调）
                    if dominant_node_type in ["ATTRIBUTE", "REFERENCE"] or memory_type == "FACT":
                        # 事实性记忆：提升相似度权重，降低时效性权重
@@ -698,41 +698,41 @@ class MemoryTools:
                            "importance": 1.0,
                            "recency": 1.0,
                        }
-                    
+
                    # 应用调整后的权重（基于配置的基础权重）
                    weights = {
                        "similarity": self.base_vector_weight * type_adjustments["similarity"],
                        "importance": self.base_importance_weight * type_adjustments["importance"],
                        "recency": self.base_recency_weight * type_adjustments["recency"],
                    }
-                    
+
                    # 归一化权重（确保总和为1.0）
                    total_weight = sum(weights.values())
                    if total_weight > 0:
                        weights = {k: v / total_weight for k, v in weights.items()}
-                    
+
                    # 综合分数计算（🔥 移除激活度影响）
                    final_score = (
                        similarity_score * weights["similarity"] +
                        importance_score * weights["importance"] +
                        recency_score * weights["recency"]
                    )
-                    
+
                    # 🆕 阈值过滤策略：
                    # 1. 重要性过滤：应用于所有记忆（过滤极低质量）
                    if memory.importance < self.search_min_importance:
                        filter_stats["importance"] += 1
                        logger.debug(f"❌ 过滤 {memory.id[:8]}: 重要性 {memory.importance:.2f} < 阈值 {self.search_min_importance}")
                        continue
-                    
+
                    # 2. 相似度过滤：不再对初始向量搜索结果过滤（信任向量搜索的排序）
                    # 理由：向量搜索已经按相似度排序，返回的都是最相关结果
                    # 如果再用阈值过滤，会导致"最相关的也不够相关"的矛盾
-                    # 
+                    #
                    # 注意：如果未来需要对扩展记忆过滤，可以在这里添加逻辑
                    # if not is_initial_memory and some_score < threshold:
                    #     continue
-                    
+
                    # 记录通过过滤的记忆（用于调试）
                    if is_initial_memory:
                        logger.debug(
@@ -744,11 +744,11 @@ class MemoryTools:
                            f"✅ 保留 {memory.id[:8]} [扩展]: 重要性={memory.importance:.2f}, "
                            f"综合分数={final_score:.4f}"
                        )
-                    
+
                    # 🆕 节点类型加权：对REFERENCE/ATTRIBUTE节点额外加分（促进事实性信息召回）
                    if "REFERENCE" in node_types_count or "ATTRIBUTE" in node_types_count:
                        final_score *= 1.1  # 10% 加成
-                    
+
                    # 🆕 用户指定的优先节点类型额外加权
                    if prefer_node_types:
                        for prefer_type in prefer_node_types:
@@ -756,7 +756,7 @@ class MemoryTools:
                                final_score *= 1.15  # 15% 额外加成
                                logger.debug(f"记忆 {memory.id[:8]} 包含优先节点类型 {prefer_type}，加权后分数: {final_score:.4f}")
                                break
-                    
+
                    memories_with_scores.append((memory, final_score, dominant_node_type))

            # 按综合分数排序
@@ -766,7 +766,7 @@ class MemoryTools:
            # 统计过滤情况
            total_candidates = len(all_memory_ids)
            filtered_count = total_candidates - len(memories_with_scores)
-            
+
            # 6. 格式化结果（包含调试信息）
            results = []
            for memory, score, node_type in memories_with_scores[:top_k]:
@@ -787,7 +787,7 @@ class MemoryTools:
                f"过滤{filtered_count}个 (重要性过滤) → "
                f"最终返回{len(results)}条记忆"
            )
-            
+
            # 如果过滤率过高，发出警告
            if total_candidates > 0:
                filter_rate = filtered_count / total_candidates
@@ -1000,20 +1000,21 @@ class MemoryTools:
            response, _ = await llm.generate_response_async(prompt, temperature=0.3, max_tokens=300)

            import re
+
            import orjson
-            
+
            # 清理Markdown代码块
            response = re.sub(r"```json\s*", "", response)
            response = re.sub(r"```\s*$", "", response).strip()

            # 解析JSON
            data = orjson.loads(response)
-            
+
            # 提取查询列表
            queries = data.get("queries", [])
            result_queries = [(item.get("text", "").strip(), float(item.get("weight", 0.5)))
                             for item in queries if item.get("text", "").strip()]
-            
+
            # 提取偏好节点类型
            prefer_node_types = data.get("prefer_node_types", [])
            # 确保类型正确且有效
@@ -1062,7 +1063,7 @@ class MemoryTools:
            limit=top_k * 5,  # 🔥 从2倍提升到5倍，提高初始召回率
            min_similarity=0.0,  # 不在这里过滤，交给后续评分
        )
-        
+
        logger.debug(f"单查询向量搜索: 查询='{query}', 返回节点数={len(similar_nodes)}")
        if similar_nodes:
            logger.debug(f"Top 3相似度: {[f'{sim:.3f}' for _, sim, _ in similar_nodes[:3]]}")
--- a/src/memory_graph/utils/graph_expansion.py
+++ b/src/memory_graph/utils/graph_expansion.py
@@ -62,7 +62,7 @@ async def expand_memories_with_semantic_filter(
    try:
        import time
        start_time = time.time()
-        
+
        # 记录已访问的记忆，避免重复
        visited_memories = set(initial_memory_ids)
        # 记录扩展的记忆及其分数
@@ -87,17 +87,17 @@ async def expand_memories_with_semantic_filter(

                # 获取该记忆的邻居记忆（通过边关系）
                neighbor_memory_ids = set()
-                
+
                # 🆕 遍历记忆的所有边，收集邻居记忆（带边类型权重）
                edge_weights = {}  # 记录通过不同边类型到达的记忆的权重
-                
+
                for edge in memory.edges:
                    # 获取边的目标节点
                    target_node_id = edge.target_id
                    source_node_id = edge.source_id
-                    
+
                    # 🆕 根据边类型设置权重（优先扩展REFERENCE、ATTRIBUTE相关的边）
-                    edge_type_str = edge.edge_type.value if hasattr(edge.edge_type, 'value') else str(edge.edge_type)
+                    edge_type_str = edge.edge_type.value if hasattr(edge.edge_type, "value") else str(edge.edge_type)
                    if edge_type_str == "REFERENCE":
                        edge_weight = 1.3  # REFERENCE边权重最高（引用关系）
                    elif edge_type_str in ["ATTRIBUTE", "HAS_PROPERTY"]:
@@ -108,18 +108,18 @@ async def expand_memories_with_semantic_filter(
                        edge_weight = 0.9  # 一般关系适中降权
                    else:
                        edge_weight = 1.0  # 默认权重
-                    
+
                    # 通过节点找到其他记忆
                    for node_id in [target_node_id, source_node_id]:
                        if node_id in graph_store.node_to_memories:
                            for neighbor_id in graph_store.node_to_memories[node_id]:
                                if neighbor_id not in edge_weights or edge_weights[neighbor_id] < edge_weight:
                                    edge_weights[neighbor_id] = edge_weight
-                
+
                # 将权重高的邻居记忆加入候选
                for neighbor_id, edge_weight in edge_weights.items():
                    neighbor_memory_ids.add((neighbor_id, edge_weight))
-                
+
                # 过滤掉已访问的和自己
                filtered_neighbors = []
                for neighbor_id, edge_weight in neighbor_memory_ids:
@@ -129,7 +129,7 @@ async def expand_memories_with_semantic_filter(
                # 批量评估邻居记忆
                for neighbor_mem_id, edge_weight in filtered_neighbors:
                    candidates_checked += 1
-                    
+
                    neighbor_memory = graph_store.get_memory_by_id(neighbor_mem_id)
                    if not neighbor_memory:
                        continue
@@ -139,7 +139,7 @@ async def expand_memories_with_semantic_filter(
                        (n for n in neighbor_memory.nodes if n.has_embedding()),
                        None
                    )
-                    
+
                    if not topic_node or topic_node.embedding is None:
                        continue

@@ -179,11 +179,11 @@ async def expand_memories_with_semantic_filter(
                    if len(expanded_memories) >= max_expanded:
                        logger.debug(f"⏹️  提前停止：已达到最大扩展数量 {max_expanded}")
                        break
-                
+
                # 早停检查
                if len(expanded_memories) >= max_expanded:
                    break
-            
+
            # 记录本层统计
            depth_stats.append({
                "depth": depth + 1,
@@ -199,20 +199,20 @@ async def expand_memories_with_semantic_filter(

            # 限制下一层的记忆数量，避免爆炸性增长
            current_level_memories = next_level_memories[:max_expanded]
-            
+
            # 每层让出控制权
            await asyncio.sleep(0.001)

        # 排序并返回
        sorted_results = sorted(expanded_memories.items(), key=lambda x: x[1], reverse=True)[:max_expanded]
-        
+
        elapsed = time.time() - start_time
        logger.info(
            f"✅ 图扩展完成: 初始{len(initial_memory_ids)}个 → "
            f"扩展{len(sorted_results)}个新记忆 "
            f"(深度={max_depth}, 阈值={semantic_threshold:.2f}, 耗时={elapsed:.3f}s)"
        )
-        
+
        # 输出每层统计
        for stat in depth_stats:
            logger.debug(