diff --git a/scripts/generate_missing_embeddings.py b/scripts/generate_missing_embeddings.py new file mode 100644 index 000000000..a8957e50b --- /dev/null +++ b/scripts/generate_missing_embeddings.py @@ -0,0 +1,268 @@ +""" +为现有节点生成嵌入向量 + +批量为图存储中缺少嵌入向量的节点生成并索引嵌入向量 + +使用场景: +1. 历史记忆节点没有嵌入向量 +2. 嵌入生成器之前未配置,现在需要补充生成 +3. 向量索引损坏需要重建 + +使用方法: + python scripts/generate_missing_embeddings.py [--node-types TOPIC,OBJECT] [--batch-size 50] + +参数说明: + --node-types: 需要生成嵌入的节点类型,默认为 TOPIC,OBJECT + --batch-size: 批量处理大小,默认为 50 +""" + +import asyncio +import sys +from pathlib import Path +from typing import List + +# 添加项目根目录到路径 +sys.path.insert(0, str(Path(__file__).parent.parent)) + + +async def generate_missing_embeddings( + target_node_types: List[str] = None, + batch_size: int = 50, +): + """ + 为缺失嵌入向量的节点生成嵌入 + + Args: + target_node_types: 需要处理的节点类型列表(如 ["主题", "客体"]) + batch_size: 批处理大小 + """ + from src.common.logger import get_logger + from src.memory_graph.manager_singleton import get_memory_manager, initialize_memory_manager + from src.memory_graph.models import NodeType + + logger = get_logger("generate_missing_embeddings") + + if target_node_types is None: + target_node_types = [NodeType.TOPIC.value, NodeType.OBJECT.value] + + print(f"\n{'='*80}") + print(f"🔧 为节点生成嵌入向量") + print(f"{'='*80}\n") + print(f"目标节点类型: {', '.join(target_node_types)}") + print(f"批处理大小: {batch_size}\n") + + # 1. 初始化记忆管理器 + print(f"🔧 正在初始化记忆管理器...") + await initialize_memory_manager() + manager = get_memory_manager() + + if manager is None: + print("❌ 记忆管理器初始化失败") + return + + print(f"✅ 记忆管理器已初始化\n") + + # 2. 获取已索引的节点ID + print(f"🔍 检查现有向量索引...") + existing_node_ids = set() + try: + vector_count = manager.vector_store.collection.count() + if vector_count > 0: + # 分批获取所有已索引的ID + batch_size_check = 1000 + for offset in range(0, vector_count, batch_size_check): + limit = min(batch_size_check, vector_count - offset) + result = manager.vector_store.collection.get( + limit=limit, + offset=offset, + ) + if result and "ids" in result: + existing_node_ids.update(result["ids"]) + + print(f"✅ 发现 {len(existing_node_ids)} 个已索引节点\n") + except Exception as e: + logger.warning(f"获取已索引节点ID失败: {e}") + print(f"⚠️ 无法获取已索引节点,将尝试跳过重复项\n") + + # 3. 收集需要生成嵌入的节点 + print(f"🔍 扫描需要生成嵌入的节点...") + all_memories = manager.graph_store.get_all_memories() + + nodes_to_process = [] + total_target_nodes = 0 + type_stats = {nt: {"total": 0, "need_emb": 0, "already_indexed": 0} for nt in target_node_types} + + for memory in all_memories: + for node in memory.nodes: + if node.node_type.value in target_node_types: + total_target_nodes += 1 + type_stats[node.node_type.value]["total"] += 1 + + # 检查是否已在向量索引中 + if node.id in existing_node_ids: + type_stats[node.node_type.value]["already_indexed"] += 1 + continue + + if not node.has_embedding(): + nodes_to_process.append({ + "node": node, + "memory_id": memory.id, + }) + type_stats[node.node_type.value]["need_emb"] += 1 + + print(f"\n📊 扫描结果:") + for node_type in target_node_types: + stats = type_stats[node_type] + already_ok = stats["already_indexed"] + coverage = (stats["total"] - stats["need_emb"]) / stats["total"] * 100 if stats["total"] > 0 else 0 + print(f" - {node_type}: {stats['total']} 个节点, {stats['need_emb']} 个缺失嵌入, " + f"{already_ok} 个已索引 (覆盖率: {coverage:.1f}%)") + + print(f"\n 总计: {total_target_nodes} 个目标节点, {len(nodes_to_process)} 个需要生成嵌入\n") + + if len(nodes_to_process) == 0: + print(f"✅ 所有节点已有嵌入向量,无需生成") + return + + # 3. 批量生成嵌入 + print(f"🚀 开始生成嵌入向量...\n") + + total_batches = (len(nodes_to_process) + batch_size - 1) // batch_size + success_count = 0 + failed_count = 0 + indexed_count = 0 + + for i in range(0, len(nodes_to_process), batch_size): + batch = nodes_to_process[i : i + batch_size] + batch_num = i // batch_size + 1 + + print(f"📦 批次 {batch_num}/{total_batches} ({len(batch)} 个节点)...") + + try: + # 提取文本内容 + texts = [item["node"].content for item in batch] + + # 批量生成嵌入 + embeddings = await manager.embedding_generator.generate_batch(texts) + + # 为节点设置嵌入并索引 + batch_nodes_for_index = [] + + for j, (item, embedding) in enumerate(zip(batch, embeddings)): + node = item["node"] + + if embedding is not None: + # 设置嵌入向量 + node.embedding = embedding + batch_nodes_for_index.append(node) + success_count += 1 + else: + failed_count += 1 + logger.warning(f" ⚠️ 节点 {node.id[:8]}... '{node.content[:30]}' 嵌入生成失败") + + # 批量索引到向量数据库 + if batch_nodes_for_index: + try: + await manager.vector_store.add_nodes_batch(batch_nodes_for_index) + indexed_count += len(batch_nodes_for_index) + print(f" ✅ 成功: {len(batch_nodes_for_index)}/{len(batch)} 个节点已生成并索引") + except Exception as e: + # 如果批量失败,尝试逐个添加(跳过重复) + logger.warning(f" 批量索引失败,尝试逐个添加: {e}") + individual_success = 0 + for node in batch_nodes_for_index: + try: + await manager.vector_store.add_node(node) + individual_success += 1 + indexed_count += 1 + except Exception as e2: + if "Expected IDs to be unique" in str(e2): + logger.debug(f" 跳过已存在节点: {node.id}") + else: + logger.error(f" 节点 {node.id} 索引失败: {e2}") + print(f" ⚠️ 逐个索引: {individual_success}/{len(batch_nodes_for_index)} 个成功") + + except Exception as e: + failed_count += len(batch) + logger.error(f"批次 {batch_num} 处理失败", exc_info=True) + print(f" ❌ 批次处理失败: {e}") + + # 显示进度 + total_processed = min(i + batch_size, len(nodes_to_process)) + progress = total_processed / len(nodes_to_process) * 100 + print(f" 📊 总进度: {total_processed}/{len(nodes_to_process)} ({progress:.1f}%)\n") + + # 4. 保存图数据(更新节点的 embedding 字段) + print(f"💾 保存图数据...") + try: + await manager.persistence.save_graph_store(manager.graph_store) + print(f"✅ 图数据已保存\n") + except Exception as e: + logger.error(f"保存图数据失败", exc_info=True) + print(f"❌ 保存失败: {e}\n") + + # 5. 验证结果 + print(f"🔍 验证向量索引...") + final_vector_count = manager.vector_store.collection.count() + stats = manager.graph_store.get_statistics() + total_nodes = stats["total_nodes"] + + print(f"\n{'='*80}") + print(f"📊 生成完成") + print(f"{'='*80}") + print(f"处理节点数: {len(nodes_to_process)}") + print(f"成功生成: {success_count}") + print(f"失败数量: {failed_count}") + print(f"成功索引: {indexed_count}") + print(f"向量索引节点数: {final_vector_count}") + print(f"图存储节点数: {total_nodes}") + print(f"索引覆盖率: {final_vector_count / total_nodes * 100:.1f}%\n") + + # 6. 测试搜索 + print(f"🧪 测试搜索功能...") + test_queries = ["小红帽蕾克", "拾风", "杰瑞喵"] + + for query in test_queries: + results = await manager.search_memories(query=query, top_k=3) + if results: + print(f"\n✅ 查询 '{query}' 找到 {len(results)} 条记忆:") + for i, memory in enumerate(results[:2], 1): + subject_node = memory.get_subject_node() + # 获取主题节点(遍历所有节点找TOPIC类型) + from src.memory_graph.models import NodeType + topic_nodes = [n for n in memory.nodes if n.node_type == NodeType.TOPIC] + subject = subject_node.content if subject_node else "?" + topic = topic_nodes[0].content if topic_nodes else "?" + print(f" {i}. {subject} - {topic} (重要性: {memory.importance:.2f})") + else: + print(f"\n⚠️ 查询 '{query}' 返回 0 条结果") + + +async def main(): + import argparse + + parser = argparse.ArgumentParser(description="为节点生成嵌入向量") + parser.add_argument( + "--node-types", + type=str, + default="主题,客体", + help="需要生成嵌入的节点类型,逗号分隔(默认:主题,客体)", + ) + parser.add_argument( + "--batch-size", + type=int, + default=50, + help="批处理大小(默认:50)", + ) + + args = parser.parse_args() + + target_types = [t.strip() for t in args.node_types.split(",")] + await generate_missing_embeddings( + target_node_types=target_types, + batch_size=args.batch_size, + ) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/src/api/memory_visualizer_router.py b/src/api/memory_visualizer_router.py index e80e8ec0e..84971f78a 100644 --- a/src/api/memory_visualizer_router.py +++ b/src/api/memory_visualizer_router.py @@ -7,9 +7,10 @@ from datetime import datetime from pathlib import Path from typing import Any, Dict, List, Optional +from collections import defaultdict import orjson -from fastapi import APIRouter, HTTPException, Request +from fastapi import APIRouter, HTTPException, Request, Query from fastapi.responses import HTMLResponse, JSONResponse from fastapi.templating import Jinja2Templates @@ -227,6 +228,242 @@ async def get_full_graph(): return JSONResponse(content={"success": False, "error": str(e)}, status_code=500) +@router.get("/api/graph/summary") +async def get_graph_summary(): + """获取图的摘要信息(仅统计数据,不包含节点和边)""" + try: + from src.memory_graph.manager_singleton import get_memory_manager + + memory_manager = get_memory_manager() + + if memory_manager and memory_manager._initialized: + stats = memory_manager.get_statistics() + return JSONResponse(content={"success": True, "data": { + "stats": { + "total_nodes": stats.get("total_nodes", 0), + "total_edges": stats.get("total_edges", 0), + "total_memories": stats.get("total_memories", 0), + }, + "current_file": "memory_manager (实时数据)", + }}) + else: + data = load_graph_data_from_file() + return JSONResponse(content={"success": True, "data": { + "stats": data.get("stats", {}), + "current_file": data.get("current_file", ""), + }}) + except Exception as e: + return JSONResponse(content={"success": False, "error": str(e)}, status_code=500) + + +@router.get("/api/graph/paginated") +async def get_paginated_graph( + page: int = Query(1, ge=1, description="页码"), + page_size: int = Query(500, ge=100, le=2000, description="每页节点数"), + min_importance: float = Query(0.0, ge=0.0, le=1.0, description="最小重要性阈值"), + node_types: Optional[str] = Query(None, description="节点类型过滤,逗号分隔"), +): + """分页获取图数据,支持重要性过滤""" + try: + from src.memory_graph.manager_singleton import get_memory_manager + + memory_manager = get_memory_manager() + + # 获取完整数据 + if memory_manager and memory_manager._initialized: + full_data = _format_graph_data_from_manager(memory_manager) + else: + full_data = load_graph_data_from_file() + + nodes = full_data.get("nodes", []) + edges = full_data.get("edges", []) + + # 过滤节点类型 + if node_types: + allowed_types = set(node_types.split(",")) + nodes = [n for n in nodes if n.get("group") in allowed_types] + + # 按重要性排序(如果有importance字段) + nodes_with_importance = [] + for node in nodes: + # 计算节点重要性(连接的边数) + edge_count = sum(1 for e in edges if e.get("from") == node["id"] or e.get("to") == node["id"]) + importance = edge_count / max(len(edges), 1) + if importance >= min_importance: + node["importance"] = importance + nodes_with_importance.append(node) + + # 按重要性降序排序 + nodes_with_importance.sort(key=lambda x: x.get("importance", 0), reverse=True) + + # 分页 + total_nodes = len(nodes_with_importance) + total_pages = (total_nodes + page_size - 1) // page_size + start_idx = (page - 1) * page_size + end_idx = min(start_idx + page_size, total_nodes) + + paginated_nodes = nodes_with_importance[start_idx:end_idx] + node_ids = set(n["id"] for n in paginated_nodes) + + # 只保留连接分页节点的边 + paginated_edges = [ + e for e in edges + if e.get("from") in node_ids and e.get("to") in node_ids + ] + + return JSONResponse(content={"success": True, "data": { + "nodes": paginated_nodes, + "edges": paginated_edges, + "pagination": { + "page": page, + "page_size": page_size, + "total_nodes": total_nodes, + "total_pages": total_pages, + "has_next": page < total_pages, + "has_prev": page > 1, + }, + "stats": { + "total_nodes": total_nodes, + "total_edges": len(paginated_edges), + "total_memories": full_data.get("stats", {}).get("total_memories", 0), + }, + }}) + except Exception as e: + import traceback + traceback.print_exc() + return JSONResponse(content={"success": False, "error": str(e)}, status_code=500) + + +@router.get("/api/graph/clustered") +async def get_clustered_graph( + max_nodes: int = Query(300, ge=50, le=1000, description="最大节点数"), + cluster_threshold: int = Query(10, ge=2, le=50, description="聚类阈值") +): + """获取聚类简化后的图数据""" + try: + from src.memory_graph.manager_singleton import get_memory_manager + + memory_manager = get_memory_manager() + + # 获取完整数据 + if memory_manager and memory_manager._initialized: + full_data = _format_graph_data_from_manager(memory_manager) + else: + full_data = load_graph_data_from_file() + + nodes = full_data.get("nodes", []) + edges = full_data.get("edges", []) + + # 如果节点数小于阈值,直接返回 + if len(nodes) <= max_nodes: + return JSONResponse(content={"success": True, "data": { + "nodes": nodes, + "edges": edges, + "stats": full_data.get("stats", {}), + "clustered": False, + }}) + + # 执行聚类 + clustered_data = _cluster_graph_data(nodes, edges, max_nodes, cluster_threshold) + + return JSONResponse(content={"success": True, "data": { + **clustered_data, + "stats": { + "original_nodes": len(nodes), + "original_edges": len(edges), + "clustered_nodes": len(clustered_data["nodes"]), + "clustered_edges": len(clustered_data["edges"]), + "total_memories": full_data.get("stats", {}).get("total_memories", 0), + }, + "clustered": True, + }}) + except Exception as e: + import traceback + traceback.print_exc() + return JSONResponse(content={"success": False, "error": str(e)}, status_code=500) + + +def _cluster_graph_data(nodes: List[Dict], edges: List[Dict], max_nodes: int, cluster_threshold: int) -> Dict: + """简单的图聚类算法:按类型和连接度聚类""" + # 构建邻接表 + adjacency = defaultdict(set) + for edge in edges: + adjacency[edge["from"]].add(edge["to"]) + adjacency[edge["to"]].add(edge["from"]) + + # 按类型分组 + type_groups = defaultdict(list) + for node in nodes: + type_groups[node.get("group", "UNKNOWN")].append(node) + + clustered_nodes = [] + clustered_edges = [] + node_mapping = {} # 原始节点ID -> 聚类节点ID + + for node_type, type_nodes in type_groups.items(): + # 如果该类型节点少于阈值,直接保留 + if len(type_nodes) <= cluster_threshold: + for node in type_nodes: + clustered_nodes.append(node) + node_mapping[node["id"]] = node["id"] + else: + # 按连接度排序,保留最重要的节点 + node_importance = [] + for node in type_nodes: + importance = len(adjacency[node["id"]]) + node_importance.append((node, importance)) + + node_importance.sort(key=lambda x: x[1], reverse=True) + + # 保留前N个重要节点 + keep_count = min(len(type_nodes), max_nodes // len(type_groups)) + for node, importance in node_importance[:keep_count]: + clustered_nodes.append(node) + node_mapping[node["id"]] = node["id"] + + # 其余节点聚合为一个超级节点 + if len(node_importance) > keep_count: + clustered_node_ids = [n["id"] for n, _ in node_importance[keep_count:]] + cluster_id = f"cluster_{node_type}_{len(clustered_nodes)}" + cluster_label = f"{node_type} 集群 ({len(clustered_node_ids)}个节点)" + + clustered_nodes.append({ + "id": cluster_id, + "label": cluster_label, + "group": node_type, + "title": f"包含 {len(clustered_node_ids)} 个{node_type}节点", + "is_cluster": True, + "cluster_size": len(clustered_node_ids), + "clustered_nodes": clustered_node_ids[:10], # 只保留前10个用于展示 + }) + + for node_id in clustered_node_ids: + node_mapping[node_id] = cluster_id + + # 重建边(去重) + edge_set = set() + for edge in edges: + from_id = node_mapping.get(edge["from"]) + to_id = node_mapping.get(edge["to"]) + + if from_id and to_id and from_id != to_id: + edge_key = tuple(sorted([from_id, to_id])) + if edge_key not in edge_set: + edge_set.add(edge_key) + clustered_edges.append({ + "id": f"{from_id}_{to_id}", + "from": from_id, + "to": to_id, + "label": edge.get("label", ""), + "arrows": "to", + }) + + return { + "nodes": clustered_nodes, + "edges": clustered_edges, + } + + @router.get("/api/files") async def list_files_api(): """列出所有可用的数据文件""" diff --git a/src/api/templates/visualizer.html b/src/api/templates/visualizer.html index 47c105863..c21c5ffca 100644 --- a/src/api/templates/visualizer.html +++ b/src/api/templates/visualizer.html @@ -205,6 +205,59 @@ cursor: pointer; } + .performance-mode { + display: flex; + flex-direction: column; + gap: 8px; + padding: 10px; + background: #fff3cd; + border-radius: 6px; + border: 1px solid #ffc107; + } + + .performance-mode h3 { + font-size: 13px; + color: #856404; + margin: 0; + } + + .performance-options { + display: flex; + flex-direction: column; + gap: 6px; + } + + .performance-item { + display: flex; + align-items: center; + gap: 8px; + font-size: 12px; + } + + .performance-item input[type="radio"] { + cursor: pointer; + } + + .performance-item label { + cursor: pointer; + flex: 1; + } + + .performance-tips { + margin-top: 8px; + padding: 8px; + background: #e8f5e9; + border-radius: 4px; + font-size: 11px; + color: #2e7d32; + line-height: 1.4; + } + + .performance-tips strong { + display: block; + margin-bottom: 4px; + } + .loading { position: absolute; top: 50%; @@ -214,6 +267,17 @@ color: #667eea; font-size: 18px; font-weight: 500; + z-index: 1000; + background: rgba(255, 255, 255, 0.95); + padding: 30px; + border-radius: 12px; + box-shadow: 0 4px 20px rgba(0, 0, 0, 0.2); + } + + .loading-progress { + margin-top: 10px; + font-size: 14px; + color: #764ba2; } .loading-spinner { @@ -482,6 +546,39 @@ + +
+

⚡ 性能模式

+
+

数据加载模式

+
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+
+ 💡 性能提示: + • 节点 >500: 优先使用聚类或分页模式
+ • 加载完成后点击"禁用物理"按钮
+ • 鼠标悬停可查看节点标签
+ • 点击节点高亮关联路径(完整模式1跳) +
+
+
+

ℹ️ 节点信息

@@ -499,6 +596,12 @@ + +
+ @@ -538,6 +641,16 @@ memories: [] }; let originalData = null; + let currentLoadMode = 'auto'; + let currentPage = 1; + let totalPages = 1; + let isLoading = false; + let hoveredNodeId = null; // 当前鼠标悬停的节点ID + let highlightedNodeIds = new Set(); // 当前高亮的节点ID集合 + + // 邻接表缓存,用于高亮优化 + let adjacencyCache = null; + let nodeDegreeCache = null; // 节点颜色配置 const nodeColors = { @@ -548,75 +661,130 @@ 'VALUE': '#98D8C8' }; + // 切换加载模式 + function changeLoadMode() { + const selected = document.querySelector('input[name="loadMode"]:checked'); + if (selected) { + currentLoadMode = selected.value; + console.log('切换加载模式:', currentLoadMode); + loadGraph(); + } + } + + // 构建邻接表缓存 + function buildAdjacencyCache(edges) { + adjacencyCache = new Map(); + nodeDegreeCache = new Map(); + + edges.forEach(edge => { + const from = edge.from; + const to = edge.to; + + if (!adjacencyCache.has(from)) { + adjacencyCache.set(from, []); + nodeDegreeCache.set(from, 0); + } + if (!adjacencyCache.has(to)) { + adjacencyCache.set(to, []); + nodeDegreeCache.set(to, 0); + } + + adjacencyCache.get(from).push({ nodeId: to, edgeId: edge.id }); + adjacencyCache.get(to).push({ nodeId: from, edgeId: edge.id }); + nodeDegreeCache.set(from, nodeDegreeCache.get(from) + 1); + nodeDegreeCache.set(to, nodeDegreeCache.get(to) + 1); + }); + + console.log(`邻接表缓存构建完成: ${adjacencyCache.size} 个节点`); + } + // 初始化图形 function initNetwork() { const container = document.getElementById('memory-graph'); - const options = { - nodes: { - shape: 'dot', - size: 20, - font: { - size: 14, - color: '#333', - face: 'Microsoft YaHei' - }, - borderWidth: 2, - borderWidthSelected: 4, - shadow: true - }, - edges: { - width: 2, - color: { - color: '#848484', - highlight: '#667eea', - hover: '#764ba2' - }, - arrows: { - to: { - enabled: true, - scaleFactor: 0.5 + // 动态配置:大数据集使用极简配置 + const getNetworkOptions = (nodeCount) => { + const isLargeDataset = nodeCount > 500; + const isVeryLargeDataset = nodeCount > 2000; + + return { + nodes: { + shape: 'dot', + size: isVeryLargeDataset ? 10 : (isLargeDataset ? 15 : 20), + font: { + size: isVeryLargeDataset ? 0 : (isLargeDataset ? 10 : 14), // 超大数据集隐藏文字 + color: '#333', + face: 'Microsoft YaHei' + }, + borderWidth: isVeryLargeDataset ? 0 : 2, + borderWidthSelected: 3, + shadow: false, // 始终禁用阴影 + scaling: { + min: isVeryLargeDataset ? 5 : 10, + max: isVeryLargeDataset ? 15 : 30 } }, - smooth: { - enabled: true, - type: 'dynamic' + edges: { + width: isVeryLargeDataset ? 0.5 : (isLargeDataset ? 1 : 2), + color: { + color: '#848484', + highlight: '#667eea', + hover: '#764ba2', + opacity: isVeryLargeDataset ? 0.3 : 0.7 // 超大数据集边半透明 + }, + arrows: { + to: { + enabled: !isVeryLargeDataset, // 超大数据集禁用箭头 + scaleFactor: 0.3 + } + }, + smooth: false, // 始终禁用平滑以提升性能 + font: { + size: 0, // 始终隐藏边标签 + strokeWidth: 0 + }, + shadow: false, + selectionWidth: 2 }, - font: { - size: 11, - color: '#666', - face: 'Microsoft YaHei', - align: 'middle' + physics: { + enabled: true, + barnesHut: { + gravitationalConstant: isVeryLargeDataset ? -800 : (isLargeDataset ? -2000 : -8000), + centralGravity: isVeryLargeDataset ? 0.1 : 0.3, + springLength: isVeryLargeDataset ? 50 : (isLargeDataset ? 95 : 150), + springConstant: isVeryLargeDataset ? 0.001 : (isLargeDataset ? 0.02 : 0.04), + damping: isVeryLargeDataset ? 0.3 : 0.09, + avoidOverlap: 0 + }, + stabilization: { + enabled: true, + iterations: isVeryLargeDataset ? 50 : (isLargeDataset ? 100 : 300), + updateInterval: isVeryLargeDataset ? 100 : (isLargeDataset ? 50 : 25), + onlyDynamicEdges: false, + fit: true + }, + solver: 'barnesHut', + timestep: isVeryLargeDataset ? 1.0 : 0.5, + adaptiveTimestep: true, + maxVelocity: isVeryLargeDataset ? 100 : 50, + minVelocity: isVeryLargeDataset ? 5 : 0.75 + }, + interaction: { + hover: true, // 始终启用hover - 我们用自定义事件处理标签显示 + tooltipDelay: 300, + zoomView: true, + dragView: true, + hideEdgesOnDrag: true, // 始终在拖拽时隐藏边 + hideEdgesOnZoom: true, // 始终在缩放时隐藏边 + hideNodesOnDrag: isVeryLargeDataset, // 超大数据集拖拽时也隐藏节点 + navigationButtons: false, + keyboard: false + }, + layout: { + improvedLayout: !isVeryLargeDataset, // 超大数据集禁用改进布局 + randomSeed: 2 // 固定随机种子以获得一致的布局 } - }, - physics: { - enabled: true, - barnesHut: { - gravitationalConstant: -8000, - centralGravity: 0.3, - springLength: 150, - springConstant: 0.04, - damping: 0.09, - avoidOverlap: 0.1 - }, - stabilization: { - enabled: true, - iterations: 300, - updateInterval: 25, - onlyDynamicEdges: false, - fit: true - }, - // 稳定后自动停止物理引擎 - solver: 'barnesHut', - timestep: 0.5, - adaptiveTimestep: true - }, - interaction: { - hover: true, - tooltipDelay: 100, - zoomView: true, - dragView: true - } + }; }; const data = { @@ -624,7 +792,13 @@ edges: new vis.DataSet([]) }; - network = new vis.Network(container, data, options); + // 初始使用中等配置 + network = new vis.Network(container, data, getNetworkOptions(500)); + + // 保存配置函数供后续使用 + network.updateOptions = (nodeCount) => { + network.setOptions(getNetworkOptions(nodeCount)); + }; // 添加事件监听 network.on('click', function(params) { @@ -638,76 +812,395 @@ } }); + // 添加鼠标悬停事件(仅在小数据集启用) + network.on('hoverNode', function(params) { + hoveredNodeId = params.node; + updateNodeLabel(params.node, true); // 显示悬停节点的标签 + }); + + network.on('blurNode', function(params) { + // 如果节点不在高亮集合中,隐藏标签 + if (!highlightedNodeIds.has(params.node)) { + updateNodeLabel(params.node, false); + } + hoveredNodeId = null; + }); + // 稳定化完成后停止物理引擎 network.on('stabilizationIterationsDone', function() { - console.log('初始稳定化完成,停止物理引擎'); + console.log('稳定化完成,停止物理引擎'); network.setOptions({ physics: { enabled: false } }); + updateLoadingProgress('布局完成'); }); // 添加稳定化进度监听 network.on('stabilizationProgress', function(params) { const progress = Math.round((params.iterations / params.total) * 100); - if (progress % 10 === 0) { // 每10%打印一次 - console.log(`稳定化进度: ${progress}%`); - } + updateLoadingProgress(`布局中: ${progress}%`); }); } - // 加载图形数据 - async function loadGraph() { - try { - document.getElementById('loading').style.display = 'block'; - - const response = await fetch('/visualizer/api/graph/full'); - const result = await response.json(); + // 更新加载进度提示 + function updateLoadingProgress(message) { + const progressDiv = document.querySelector('.loading-progress'); + if (progressDiv) { + progressDiv.textContent = message; + } else { + const loadingDiv = document.getElementById('loading'); + if (loadingDiv) { + const existing = loadingDiv.querySelector('.loading-progress'); + if (!existing) { + const newDiv = document.createElement('div'); + newDiv.className = 'loading-progress'; + newDiv.textContent = message; + loadingDiv.appendChild(newDiv); + } else { + existing.textContent = message; + } + } + } + } - if (result.success) { + // 动态更新节点标签显示 + function updateNodeLabel(nodeId, show) { + if (!network || !graphData) return; + + const nodes = network.body.data.nodes; + const node = nodes.get(nodeId); + if (!node) return; + + // 获取原始节点数据 + const originalNode = graphData.nodes.find(n => n.id === nodeId); + if (!originalNode) return; + + // 更新节点标签 + nodes.update({ + id: nodeId, + label: show ? originalNode.label : '', + font: show ? { size: 14, color: '#333' } : { size: 0 } + }); + } + + // 批量更新边标签显示 + function updateEdgeLabels(edgeIds, show) { + if (!network || !graphData || edgeIds.size === 0) return; + + const edges = network.body.data.edges; + const edgeUpdates = []; + + edgeIds.forEach(edgeId => { + const edge = edges.get(edgeId); + if (!edge) return; + + // 获取原始边数据 + const originalEdge = graphData.edges.find(e => e.id === edgeId); + if (!originalEdge) return; + + edgeUpdates.push({ + id: edgeId, + label: show ? (originalEdge.label || '') : '', + font: show ? { size: 11, color: '#666' } : { size: 0 } + }); + }); + + if (edgeUpdates.length > 0) { + edges.update(edgeUpdates); + } + } + + // 加载图形数据(智能选择模式) + async function loadGraph() { + if (isLoading) { + console.log('已有加载任务在进行中'); + return; + } + + try { + isLoading = true; + document.getElementById('loading').style.display = 'block'; + updateLoadingProgress('获取数据摘要...'); + + // 首先获取摘要信息以决定加载策略 + const summaryResponse = await fetch('/visualizer/api/graph/summary'); + const summaryResult = await summaryResponse.json(); + + if (!summaryResult.success) { + throw new Error(summaryResult.error); + } + + const totalNodes = summaryResult.data.stats.total_nodes; + console.log(`图数据节点总数: ${totalNodes}`); + + // 根据模式和数据量选择加载策略(更保守的阈值) + let loadMode = currentLoadMode; + if (loadMode === 'auto') { + if (totalNodes <= 500) { + loadMode = 'full'; + } else if (totalNodes <= 2000) { + loadMode = 'cluster'; + } else { + loadMode = 'paginated'; + } + console.log(`自动选择加载模式: ${loadMode} (节点数: ${totalNodes})`); + } + + // 根据选择的模式加载数据 + let result; + if (loadMode === 'full') { + updateLoadingProgress('加载完整数据...'); + result = await loadFullGraph(); + } else if (loadMode === 'cluster') { + updateLoadingProgress('加载聚类数据...'); + result = await loadClusteredGraph(); + } else if (loadMode === 'paginated') { + updateLoadingProgress('加载分页数据 (第1页)...'); + result = await loadPaginatedGraph(1); + } + + if (result && result.success) { + updateLoadingProgress('渲染图形...'); originalData = result.data; updateGraph(result.data); updateStats(result.data.stats); + + // 构建缓存以优化后续操作 + buildAdjacencyCache(result.data.edges || []); + + // 显示/隐藏分页控制 + const paginationControls = document.getElementById('paginationControls'); + if (result.data.pagination) { + const p = result.data.pagination; + totalPages = p.total_pages; + currentPage = p.page; + paginationControls.style.display = 'flex'; + document.getElementById('pageInfo').textContent = `第 ${p.page}/${p.total_pages} 页`; + document.getElementById('prevPageBtn').disabled = !p.has_prev; + document.getElementById('nextPageBtn').disabled = !p.has_next; + } else { + paginationControls.style.display = 'none'; + } + + // 显示加载信息 + if (result.data.clustered) { + console.log(`✅ 聚类简化: ${result.data.stats.original_nodes} → ${result.data.stats.clustered_nodes} 节点`); + } else if (result.data.pagination) { + const p = result.data.pagination; + console.log(`✅ 分页加载: 第 ${p.page}/${p.total_pages} 页 (共 ${p.total_nodes} 节点)`); + } else { + console.log(`✅ 完整加载: ${result.data.stats.total_nodes} 节点`); + } } else { - alert('加载失败: ' + result.error); + alert('加载失败: ' + (result ? result.error : '未知错误')); } } catch (error) { console.error('加载图形失败:', error); alert('加载失败: ' + error.message); } finally { + isLoading = false; document.getElementById('loading').style.display = 'none'; } } - // 更新图形显示 + // 加载完整图数据 + async function loadFullGraph() { + const response = await fetch('/visualizer/api/graph/full'); + return await response.json(); + } + + // 加载聚类简化数据 + async function loadClusteredGraph(maxNodes = 300) { + const response = await fetch(`/visualizer/api/graph/clustered?max_nodes=${maxNodes}&cluster_threshold=10`); + return await response.json(); + } + + // 加载分页数据 + async function loadPaginatedGraph(page = 1, pageSize = 500) { + const response = await fetch(`/visualizer/api/graph/paginated?page=${page}&page_size=${pageSize}&min_importance=0.0`); + return await response.json(); + } + + // 更新图形显示(优化版本 - 使用节点限制和延迟渲染) function updateGraph(data) { graphData = data; + const nodeCount = data.nodes.length; + const edgeCount = data.edges.length; - // 处理节点数据 - const nodes = data.nodes.map(node => ({ - id: node.id, - label: node.label, - title: node.title, - group: node.group, - color: nodeColors[node.group] || '#999', - metadata: node.metadata - })); + console.log(`准备更新图形: ${nodeCount} 个节点, ${edgeCount} 条边`); - // 处理边数据 - const edges = data.edges.map(edge => ({ - id: edge.id, - from: edge.from, - to: edge.to, - label: edge.label, - title: edge.title, - width: edge.importance * 3 + 1 - })); + // 对于超大数据集,进一步限制 + const MAX_RENDERABLE_NODES = 5000; + const MAX_RENDERABLE_EDGES = 10000; + + let nodesToRender = data.nodes; + let edgesToRender = data.edges; + let isLimited = false; - // 更新网络 - network.setData({ - nodes: new vis.DataSet(nodes), - edges: new vis.DataSet(edges) + // 如果超过限制,只渲染最重要的节点 + if (nodeCount > MAX_RENDERABLE_NODES) { + console.warn(`节点数 ${nodeCount} 超过渲染限制 ${MAX_RENDERABLE_NODES},将只显示最重要的节点`); + + // 计算节点重要性 + const nodeDegrees = new Map(); + data.edges.forEach(edge => { + nodeDegrees.set(edge.from, (nodeDegrees.get(edge.from) || 0) + 1); + nodeDegrees.set(edge.to, (nodeDegrees.get(edge.to) || 0) + 1); + }); + + // 按连接度排序,保留前N个 + nodesToRender = data.nodes + .map(node => ({ + ...node, + degree: nodeDegrees.get(node.id) || 0 + })) + .sort((a, b) => b.degree - a.degree) + .slice(0, MAX_RENDERABLE_NODES); + + const renderableNodeIds = new Set(nodesToRender.map(n => n.id)); + edgesToRender = data.edges.filter(e => + renderableNodeIds.has(e.from) && renderableNodeIds.has(e.to) + ); + + isLimited = true; + alert(`⚠️ 数据量过大\n原始: ${nodeCount} 节点\n仅渲染: ${nodesToRender.length} 个最重要节点\n建议使用"聚类简化"或"分页加载"模式`); + } + + // 如果边数过多,也进行限制 + if (edgesToRender.length > MAX_RENDERABLE_EDGES) { + console.warn(`边数 ${edgesToRender.length} 超过渲染限制 ${MAX_RENDERABLE_EDGES}`); + edgesToRender = edgesToRender.slice(0, MAX_RENDERABLE_EDGES); + isLimited = true; + } + + // 根据节点数量动态调整网络配置 + if (network.updateOptions) { + network.updateOptions(nodesToRender.length); + } + + console.log(`实际渲染: ${nodesToRender.length} 个节点, ${edgesToRender.length} 条边`); + + // 极简化节点数据 - 移除所有不必要的属性 + const startTime = performance.now(); + const nodes = nodesToRender.map(node => { + const baseNode = { + id: node.id, + label: '', // 默认不显示标签,仅在悬停或高亮时显示 + group: node.group, + color: nodeColors[node.group] || '#999' + }; + + // 仅在小数据集中添加title(tooltip) + if (nodesToRender.length <= 1000) { + baseNode.title = node.title || node.label; + } + + // 如果是聚类节点,使用不同的样式并始终显示标签 + if (node.is_cluster) { + baseNode.shape = 'star'; + baseNode.size = 25 + Math.min(node.cluster_size / 10, 15); + baseNode.label = `${node.group} (${node.cluster_size})`; + baseNode.font = { size: 14, bold: true }; + } + + return baseNode; }); - // 注意:setData 会自动触发物理引擎重新布局 - // stabilizationIterationsDone 事件监听器会自动停止物理引擎 + // 极简化边数据 + const edges = edgesToRender.map(edge => { + const baseEdge = { + id: edge.id, + from: edge.from, + to: edge.to, + label: '' // 默认不显示标签,仅在高亮时显示 + }; + + // 在小数据集中保留边宽度 + if (edgesToRender.length <= 1000) { + baseEdge.width = (edge.importance || 0.5) * 2 + 0.5; + } + + return baseEdge; + }); + + // 批量更新网络(使用异步渲染以避免阻塞UI) + const prepTime = performance.now() - startTime; + console.log(`数据准备耗时: ${prepTime.toFixed(2)}ms`); + + // 使用 requestAnimationFrame 异步渲染,避免阻塞UI + requestAnimationFrame(() => { + const renderStart = performance.now(); + + // 先清空现有数据 + network.setData({ + nodes: new vis.DataSet([]), + edges: new vis.DataSet([]) + }); + + // 分批添加节点和边以提升响应性 + const BATCH_SIZE = 500; + let nodeIndex = 0; + let edgeIndex = 0; + + const nodeDataSet = new vis.DataSet(); + const edgeDataSet = new vis.DataSet(); + + function addNodeBatch() { + const batch = nodes.slice(nodeIndex, nodeIndex + BATCH_SIZE); + if (batch.length > 0) { + nodeDataSet.add(batch); + nodeIndex += BATCH_SIZE; + updateLoadingProgress(`加载节点: ${Math.min(nodeIndex, nodes.length)}/${nodes.length}`); + + if (nodeIndex < nodes.length) { + setTimeout(addNodeBatch, 0); + } else { + // 节点加载完成,开始加载边 + setTimeout(addEdgeBatch, 0); + } + } + } + + function addEdgeBatch() { + const batch = edges.slice(edgeIndex, edgeIndex + BATCH_SIZE); + if (batch.length > 0) { + edgeDataSet.add(batch); + edgeIndex += BATCH_SIZE; + updateLoadingProgress(`加载边: ${Math.min(edgeIndex, edges.length)}/${edges.length}`); + + if (edgeIndex < edges.length) { + setTimeout(addEdgeBatch, 0); + } else { + // 所有数据加载完成,更新网络 + finishRendering(); + } + } + } + + function finishRendering() { + network.setData({ + nodes: nodeDataSet, + edges: edgeDataSet + }); + + const renderTime = performance.now() - renderStart; + console.log(`图形渲染总耗时: ${renderTime.toFixed(2)}ms`); + updateLoadingProgress('渲染完成'); + } + + // 对于小数据集,直接一次性加载 + if (nodes.length <= 1000) { + nodeDataSet.add(nodes); + edgeDataSet.add(edges); + network.setData({ + nodes: nodeDataSet, + edges: edgeDataSet + }); + const renderTime = performance.now() - renderStart; + console.log(`图形渲染总耗时: ${renderTime.toFixed(2)}ms`); + } else { + // 大数据集使用分批加载 + addNodeBatch(); + } + }); } // 更新统计信息 @@ -748,7 +1241,7 @@ } try { - const response = await fetch(`/visualizer/api/search?q=${encodeURIComponent(query)}&limit=50`); + const response = await fetch(`api/search?q=${encodeURIComponent(query)}&limit=50`); const result = await response.json(); if (result.success) { @@ -788,31 +1281,32 @@ } } - // 高亮与选中节点连接的节点(最多3跳深度) + // 高亮与选中节点连接的节点(优化版本,使用缓存) function highlightConnectedNodes(nodeId) { - if (!network || !graphData) return; + if (!network || !graphData || !adjacencyCache) return; - // 使用 BFS 探索最多3跳深度的连接节点 - const MAX_DEPTH = 3; + const startTime = performance.now(); + + // 根据加载模式和数据规模决定探索深度 + // 完整加载模式:1跳 + // 聚类/分页模式:3跳(因为数据量已经减少) + const nodeCount = graphData.nodes.length; + let MAX_DEPTH = 3; + + if (currentLoadMode === 'full' || (currentLoadMode === 'auto' && nodeCount <= 500)) { + MAX_DEPTH = 1; // 完整加载模式限制为1跳 + console.log('完整加载模式:使用1跳探索深度'); + } else { + console.log('聚类/分页模式:使用3跳探索深度'); + } + + // 使用缓存的邻接表进行 BFS const connectedNodeIds = new Set(); const connectedEdgeIds = new Set(); const visited = new Set(); const queue = [{ nodeId: nodeId, depth: 0 }]; - // 构建邻接表以提高查询效率 - const adjacencyMap = new Map(); - graphData.edges.forEach(edge => { - if (!adjacencyMap.has(edge.from)) { - adjacencyMap.set(edge.from, []); - } - if (!adjacencyMap.has(edge.to)) { - adjacencyMap.set(edge.to, []); - } - adjacencyMap.get(edge.from).push({ nodeId: edge.to, edgeId: edge.id }); - adjacencyMap.get(edge.to).push({ nodeId: edge.from, edgeId: edge.id }); - }); - - // BFS 遍历,限制深度为3跳 + // BFS 遍历 while (queue.length > 0) { const { nodeId: currentNode, depth } = queue.shift(); @@ -820,11 +1314,10 @@ visited.add(currentNode); connectedNodeIds.add(currentNode); - // 如果已经达到最大深度,不再探索更深的节点 if (depth >= MAX_DEPTH) continue; - // 探索相邻节点 - const neighbors = adjacencyMap.get(currentNode) || []; + // 使用缓存的邻接表 + const neighbors = adjacencyCache.get(currentNode) || []; neighbors.forEach(({ nodeId: neighborId, edgeId }) => { connectedEdgeIds.add(edgeId); if (!visited.has(neighborId)) { @@ -833,22 +1326,30 @@ }); } - console.log(`选中节点: ${nodeId}, 连接的节点数: ${connectedNodeIds.size}, 连接的边数: ${connectedEdgeIds.size} (最大深度: ${MAX_DEPTH})`); + const searchTime = performance.now() - startTime; + console.log(`BFS搜索耗时: ${searchTime.toFixed(2)}ms, 找到 ${connectedNodeIds.size} 个连接节点 (深度${MAX_DEPTH}跳)`); - // 更新所有节点的透明度 + // 更新高亮节点集合 + highlightedNodeIds = connectedNodeIds; + + // 批量更新节点和边 const allNodes = network.body.data.nodes; const allEdges = network.body.data.edges; - const updates = []; + const nodeUpdates = []; + const edgeUpdates = []; + // 更新节点 allNodes.get().forEach(node => { + const originalNode = graphData.nodes.find(n => n.id === node.id); + if (!originalNode) return; + if (connectedNodeIds.has(node.id)) { - // 连接的节点保持正常,甚至可以加强显示 - // 被选中的节点特别突出 const isSelected = node.id === nodeId; - updates.push({ + nodeUpdates.push({ id: node.id, opacity: 1.0, borderWidth: isSelected ? 5 : 3, + label: originalNode.label || '', // 显示高亮节点的标签 font: { color: isSelected ? '#667eea' : '#333', size: isSelected ? 16 : 14, @@ -856,46 +1357,52 @@ } }); } else { - // 无关节点变为高度透明 - const dimmedColor = hexToRgba(node.color, 0.08); - updates.push({ + const originalColor = nodeColors[node.group] || '#999'; + const dimmedColor = hexToRgba(originalColor, 0.08); + nodeUpdates.push({ id: node.id, color: { background: dimmedColor, border: dimmedColor, - highlight: { background: dimmedColor, border: dimmedColor } }, opacity: 0.08, - font: { color: 'rgba(51, 51, 51, 0.08)', size: 14 } + label: '', // 隐藏非高亮节点的标签 + font: { color: 'rgba(51, 51, 51, 0.08)', size: 0 } }); } }); - allNodes.update(updates); - // 更新所有边的透明度 - const edgeUpdates = []; + // 更新边 allEdges.get().forEach(edge => { + const originalEdge = graphData.edges.find(e => e.id === edge.id); + if (connectedEdgeIds.has(edge.id)) { - // 连接的边加强显示 edgeUpdates.push({ id: edge.id, color: { color: '#667eea', opacity: 1.0 }, width: 4, + label: originalEdge?.label || '', // 显示高亮边的标签 font: { color: '#667eea', size: 12 } }); } else { - // 无关边变为高度透明 edgeUpdates.push({ id: edge.id, color: { color: '#848484', opacity: 0.03 }, width: 1, - font: { color: 'rgba(102, 102, 102, 0.03)', size: 11 } + label: '', // 隐藏非高亮边的标签 + font: { color: 'rgba(102, 102, 102, 0.03)', size: 0 } }); } }); + + // 批量应用更新 + allNodes.update(nodeUpdates); allEdges.update(edgeUpdates); - // 将视图聚焦到连接的子图 + const totalTime = performance.now() - startTime; + console.log(`高亮操作总耗时: ${totalTime.toFixed(2)}ms`); + + // 聚焦视图 if (connectedNodeIds.size > 1 && connectedNodeIds.size < 100) { network.fit({ nodes: Array.from(connectedNodeIds), @@ -907,38 +1414,46 @@ } } - // 重置节点高亮状态 + // 重置节点高亮状态(优化版本) function resetNodeHighlight() { if (!network || !graphData) return; + const startTime = performance.now(); const allNodes = network.body.data.nodes; const allEdges = network.body.data.edges; - // 恢复所有节点 - 重新应用原始颜色 - const nodeUpdates = []; - allNodes.get().forEach(node => { + // 清空高亮节点集合 + highlightedNodeIds.clear(); + + // 批量恢复所有节点(不显示标签,除非是悬停节点) + const nodeUpdates = allNodes.get().map(node => { const originalColor = nodeColors[node.group] || '#999'; - nodeUpdates.push({ + const shouldShowLabel = node.id === hoveredNodeId; // 只显示悬停节点的标签 + const originalNode = shouldShowLabel ? graphData.nodes.find(n => n.id === node.id) : null; + + return { id: node.id, color: originalColor, opacity: 1.0, borderWidth: 2, - font: { color: '#333', size: 14, bold: false } - }); + label: shouldShowLabel && originalNode ? originalNode.label : '', + font: { color: '#333', size: shouldShowLabel ? 14 : 0, bold: false } + }; }); allNodes.update(nodeUpdates); - // 恢复所有边 - const edgeUpdates = []; - allEdges.get().forEach(edge => { - edgeUpdates.push({ - id: edge.id, - color: { color: '#848484', opacity: 1.0 }, - width: 2, - font: { color: '#666', size: 11 } - }); - }); + // 批量恢复所有边(不显示标签) + const edgeUpdates = allEdges.get().map(edge => ({ + id: edge.id, + color: { color: '#848484', opacity: 1.0 }, + width: 2, + label: '', + font: { color: '#666', size: 0 } + })); allEdges.update(edgeUpdates); + + const endTime = performance.now(); + console.log(`重置高亮耗时: ${(endTime - startTime).toFixed(2)}ms`); } // 辅助函数:将十六进制颜色转换为 rgba @@ -953,7 +1468,7 @@ return `rgba(${r}, ${g}, ${b}, ${alpha})`; } - // 应用过滤器 + // 应用过滤器(优化版本) function applyFilters() { if (!originalData) return; @@ -991,25 +1506,27 @@ // 重置高亮状态 resetNodeHighlight(); - // 重新启用物理引擎以重新布局,避免节点排版错乱 - if (network) { + // 重建缓存 + buildAdjacencyCache(filteredEdges); + + // 重新启用物理引擎以重新布局 + if (network && filteredNodes.length > 0) { network.setOptions({ physics: { enabled: true } }); - // 设置超时保护,确保物理引擎最终会停止 + // 超时保护 let stabilized = false; - const stabilizationTimeout = setTimeout(() => { + const timeout = setTimeout(() => { if (!stabilized) { console.log('物理引擎稳定超时,强制停止'); network.setOptions({ physics: { enabled: false } }); } - }, 5000); // 5秒超时 + }, 5000); - // 等待稳定后再禁用物理引擎 network.once('stabilizationIterationsDone', function() { stabilized = true; - clearTimeout(stabilizationTimeout); + clearTimeout(timeout); network.setOptions({ physics: { enabled: false } }); - console.log('物理引擎已稳定并停止'); + console.log('过滤后重新布局完成'); }); } } @@ -1026,6 +1543,66 @@ } } + // 切换物理引擎 + function togglePhysics() { + if (!network) return; + + const currentPhysics = network.physics.options.enabled; + network.setOptions({ physics: { enabled: !currentPhysics } }); + + const btn = document.getElementById('physicsToggle'); + btn.textContent = currentPhysics ? '⚙️ 启用物理' : '⏸️ 禁用物理'; + + console.log(`物理引擎: ${currentPhysics ? '已禁用' : '已启用'}`); + } + + // 加载下一页 + async function loadNextPage() { + if (currentPage < totalPages && !isLoading) { + currentPage++; + await loadPaginatedGraphAndUpdate(); + } + } + + // 加载上一页 + async function loadPreviousPage() { + if (currentPage > 1 && !isLoading) { + currentPage--; + await loadPaginatedGraphAndUpdate(); + } + } + + // 加载分页并更新UI + async function loadPaginatedGraphAndUpdate() { + try { + isLoading = true; + document.getElementById('loading').style.display = 'block'; + updateLoadingProgress(`加载第 ${currentPage} 页...`); + + const result = await loadPaginatedGraph(currentPage); + + if (result.success) { + originalData = result.data; + updateGraph(result.data); + updateStats(result.data.stats); + buildAdjacencyCache(result.data.edges || []); + + // 更新分页UI + const p = result.data.pagination; + totalPages = p.total_pages; + document.getElementById('pageInfo').textContent = `第 ${p.page}/${p.total_pages} 页`; + document.getElementById('prevPageBtn').disabled = !p.has_prev; + document.getElementById('nextPageBtn').disabled = !p.has_next; + } + } catch (error) { + console.error('加载分页失败:', error); + alert('加载失败: ' + error.message); + } finally { + isLoading = false; + document.getElementById('loading').style.display = 'none'; + } + } + // 导出图形数据 function exportGraph() { const dataStr = JSON.stringify(graphData, null, 2); diff --git a/src/memory_graph/core/builder.py b/src/memory_graph/core/builder.py index f8607e583..4b0d66218 100644 --- a/src/memory_graph/core/builder.py +++ b/src/memory_graph/core/builder.py @@ -185,12 +185,19 @@ class MemoryBuilder: logger.debug(f"复用已存在的主体节点: {existing.id}") return existing + # 为主体和值节点生成嵌入向量(用于人名/实体和重要描述检索) + embedding = None + if node_type in (NodeType.SUBJECT, NodeType.VALUE): + # 只为有足够内容的节点生成嵌入(避免浪费) + if len(content.strip()) >= 2: + embedding = await self._generate_embedding(content) + # 创建新节点 node = MemoryNode( id=self._generate_node_id(), content=content, node_type=node_type, - embedding=None, # 主体和属性不需要嵌入 + embedding=embedding, # 主体、值需要嵌入,属性不需要 metadata={"memory_ids": [memory_id]}, ) diff --git a/src/memory_graph/tools/memory_tools.py b/src/memory_graph/tools/memory_tools.py index ec53c3ea3..195d2d01b 100644 --- a/src/memory_graph/tools/memory_tools.py +++ b/src/memory_graph/tools/memory_tools.py @@ -516,6 +516,22 @@ class MemoryTools: # 记录最高分数 if mem_id not in memory_scores or similarity > memory_scores[mem_id]: memory_scores[mem_id] = similarity + + # 🔥 详细日志:检查初始召回情况 + logger.info( + f"初始向量搜索: 返回{len(similar_nodes)}个节点 → " + f"提取{len(initial_memory_ids)}条记忆" + ) + if len(initial_memory_ids) == 0: + logger.warning( + f"⚠️ 向量搜索未找到任何记忆!" + f"可能原因:1) 嵌入模型理解问题 2) 记忆节点未建立索引 3) 查询表达与存储内容差异过大" + ) + # 输出相似节点的详细信息用于调试 + if similar_nodes: + logger.debug(f"向量搜索返回的节点元数据样例: {similar_nodes[0][2] if len(similar_nodes) > 0 else 'None'}") + elif len(initial_memory_ids) < 3: + logger.warning(f"⚠️ 初始召回记忆数量较少({len(initial_memory_ids)}条),可能影响结果质量") # 3. 图扩展(如果启用且有expand_depth) expanded_memory_scores = {} @@ -609,42 +625,37 @@ class MemoryTools: if dominant_node_type in ["ATTRIBUTE", "REFERENCE"] or memory_type == "FACT": # 事实性记忆(如文档地址、配置信息):语义相似度最重要 weights = { - "similarity": 0.65, # 语义相似度 65% ⬆️ - "importance": 0.20, # 重要性 20% - "recency": 0.05, # 时效性 5% ⬇️(事实不随时间失效) - "activation": 0.10 # 激活度 10% ⬇️(避免冷门信息被压制) + "similarity": 0.70, # 语义相似度 70% ⬆️ + "importance": 0.25, # 重要性 25% ⬆️ + "recency": 0.05, # 时效性 5%(事实不随时间失效) } elif memory_type in ["CONVERSATION", "EPISODIC"] or dominant_node_type == "EVENT": - # 对话/事件记忆:时效性和激活度更重要 + # 对话/事件记忆:时效性更重要 weights = { - "similarity": 0.45, # 语义相似度 45% - "importance": 0.15, # 重要性 15% - "recency": 0.20, # 时效性 20% ⬆️ - "activation": 0.20 # 激活度 20% + "similarity": 0.55, # 语义相似度 55% ⬆️ + "importance": 0.20, # 重要性 20% ⬆️ + "recency": 0.25, # 时效性 25% ⬆️ } elif dominant_node_type == "ENTITY" or memory_type == "SEMANTIC": # 实体/语义记忆:平衡各项 weights = { - "similarity": 0.50, # 语义相似度 50% - "importance": 0.25, # 重要性 25% + "similarity": 0.60, # 语义相似度 60% ⬆️ + "importance": 0.30, # 重要性 30% ⬆️ "recency": 0.10, # 时效性 10% - "activation": 0.15 # 激活度 15% } else: # 默认权重(保守策略,偏向语义) weights = { - "similarity": 0.55, # 语义相似度 55% - "importance": 0.20, # 重要性 20% + "similarity": 0.65, # 语义相似度 65% ⬆️ + "importance": 0.25, # 重要性 25% ⬆️ "recency": 0.10, # 时效性 10% - "activation": 0.15 # 激活度 15% } - # 综合分数计算 + # 综合分数计算(🔥 移除激活度影响) final_score = ( similarity_score * weights["similarity"] + importance_score * weights["importance"] + - recency_score * weights["recency"] + - activation_score * weights["activation"] + recency_score * weights["recency"] ) # 🆕 节点类型加权:对REFERENCE/ATTRIBUTE节点额外加分(促进事实性信息召回) @@ -943,11 +954,16 @@ class MemoryTools: logger.warning("嵌入生成失败,跳过节点搜索") return [] - # 向量搜索 + # 向量搜索(增加返回数量以提高召回率) similar_nodes = await self.vector_store.search_similar_nodes( query_embedding=query_embedding, - limit=top_k * 2, # 多取一些,后续过滤 + limit=top_k * 5, # 🔥 从2倍提升到5倍,提高初始召回率 + min_similarity=0.0, # 不在这里过滤,交给后续评分 ) + + logger.debug(f"单查询向量搜索: 查询='{query}', 返回节点数={len(similar_nodes)}") + if similar_nodes: + logger.debug(f"Top 3相似度: {[f'{sim:.3f}' for _, sim, _ in similar_nodes[:3]]}") return similar_nodes @@ -1003,11 +1019,13 @@ class MemoryTools: similar_nodes = await self.vector_store.search_with_multiple_queries( query_embeddings=query_embeddings, query_weights=query_weights, - limit=top_k * 2, # 多取一些,后续过滤 + limit=top_k * 5, # 🔥 从2倍提升到5倍,提高初始召回率 fusion_strategy="weighted_max", ) logger.info(f"多查询检索完成: {len(similar_nodes)} 个节点 (偏好类型: {prefer_node_types})") + if similar_nodes: + logger.debug(f"Top 5融合相似度: {[f'{sim:.3f}' for _, sim, _ in similar_nodes[:5]]}") return similar_nodes, prefer_node_types