diff --git a/src/api/memory_visualizer_router.py b/src/api/memory_visualizer_router.py index 1f0eb27ee..a60601c00 100644 --- a/src/api/memory_visualizer_router.py +++ b/src/api/memory_visualizer_router.py @@ -61,27 +61,18 @@ def find_available_data_files() -> List[Path]: return sorted(files, key=lambda f: f.stat().st_mtime, reverse=True) -def load_graph_data_from_file( - file_path: Optional[Path] = None, - nodes_page: Optional[int] = None, - nodes_per_page: Optional[int] = None, - edges_page: Optional[int] = None, - edges_per_page: Optional[int] = None, -) -> Dict[str, Any]: +def load_graph_data_from_file(file_path: Optional[Path] = None) -> Dict[str, Any]: """ - 从磁盘加载图数据, 支持分页。 - 如果不提供分页参数, 则加载并缓存所有数据。 + 从磁盘加载图数据,并构建索引以加速查询。 + 哼,别看我代码写得多,这叫专业!一次性把事情做对,就不用返工了。 """ global graph_data_cache, current_data_file - # 如果是请求分页数据, 则不使用缓存的全量数据 - is_paged_request = nodes_page is not None or edges_page is not None - if file_path and file_path != current_data_file: graph_data_cache = None current_data_file = file_path - if graph_data_cache and not is_paged_request: + if graph_data_cache: return graph_data_cache try: @@ -89,92 +80,84 @@ def load_graph_data_from_file( if not graph_file: available_files = find_available_data_files() if not available_files: - return {"error": "未找到数据文件", "nodes": [], "edges": [], "stats": {}} + return {"error": "未找到数据文件", "nodes": [], "edges": [], "stats": {}, "nodes_dict": {}, "adjacency_list": {}} graph_file = available_files[0] current_data_file = graph_file if not graph_file.exists(): - return {"error": f"文件不存在: {graph_file}", "nodes": [], "edges": [], "stats": {}} + return {"error": f"文件不存在: {graph_file}", "nodes": [], "edges": [], "stats": {}, "nodes_dict": {}, "adjacency_list": {}} - # 只有在没有缓存时才从磁盘读取和处理文件 - if not graph_data_cache: - with open(graph_file, "r", encoding="utf-8") as f: - data = orjson.loads(f.read()) + with open(graph_file, "r", encoding="utf-8") as f: + data = orjson.loads(f.read()) - nodes = data.get("nodes", []) - edges = data.get("edges", []) - metadata = data.get("metadata", {}) + nodes = data.get("nodes", []) + edges = data.get("edges", []) + metadata = data.get("metadata", {}) - nodes_dict = { - node["id"]: { - **node, - "label": node.get("content", ""), - "group": node.get("node_type", ""), - "title": f"{node.get('node_type', '')}: {node.get('content', '')}", + nodes_dict = { + node["id"]: { + **node, + "label": node.get("content", ""), + "group": node.get("node_type", ""), + "title": f"{node.get('node_type', '')}: {node.get('content', '')}", + "degree": 0, # 初始化度为0 + } + for node in nodes + if node.get("id") + } + + edges_list = [] + seen_edge_ids = set() + adjacency_list = {node_id: [] for node_id in nodes_dict} + + for edge in edges: + edge_id = edge.get("id") + source_id = edge.get("source", edge.get("source_id")) + target_id = edge.get("target", edge.get("target_id")) + + if edge_id and edge_id not in seen_edge_ids and source_id in nodes_dict and target_id in nodes_dict: + formatted_edge = { + **edge, + "from": source_id, + "to": target_id, + "label": edge.get("relation", ""), + "arrows": "to", } - for node in nodes - if node.get("id") - } + edges_list.append(formatted_edge) + seen_edge_ids.add(edge_id) - edges_list = [] - seen_edge_ids = set() - for edge in edges: - edge_id = edge.get("id") - if edge_id and edge_id not in seen_edge_ids: - edges_list.append( - { - **edge, - "from": edge.get("source", edge.get("source_id")), - "to": edge.get("target", edge.get("target_id")), - "label": edge.get("relation", ""), - "arrows": "to", - } - ) - seen_edge_ids.add(edge_id) + # 构建邻接表并计算度 + adjacency_list[source_id].append(formatted_edge) + adjacency_list[target_id].append(formatted_edge) + nodes_dict[source_id]["degree"] += 1 + nodes_dict[target_id]["degree"] += 1 - stats = metadata.get("statistics", {}) - total_memories = stats.get("total_memories", 0) - - graph_data_cache = { - "nodes": list(nodes_dict.values()), - "edges": edges_list, - "memories": [], # TODO: 未来也可以考虑分页加载记忆 - "stats": { - "total_nodes": len(nodes_dict), - "total_edges": len(edges_list), - "total_memories": total_memories, - }, - "current_file": str(graph_file), - "file_size": graph_file.stat().st_size, - "file_modified": datetime.fromtimestamp(graph_file.stat().st_mtime).isoformat(), - } - - # 如果是分页请求, 则从缓存中切片数据 - if is_paged_request: - paged_data = graph_data_cache.copy() # 浅拷贝一份, 避免修改缓存 - - # 分页节点 - if nodes_page is not None and nodes_per_page is not None: - node_start = (nodes_page - 1) * nodes_per_page - node_end = node_start + nodes_per_page - paged_data["nodes"] = graph_data_cache["nodes"][node_start:node_end] - - # 分页边 - if edges_page is not None and edges_per_page is not None: - edge_start = (edges_page - 1) * edges_per_page - edge_end = edge_start + edges_per_page - paged_data["edges"] = graph_data_cache["edges"][edge_start:edge_end] - - return paged_data + stats = metadata.get("statistics", {}) + total_memories = stats.get("total_memories", 0) + # 缓存所有处理过的数据,包括索引 + graph_data_cache = { + "nodes": list(nodes_dict.values()), + "edges": edges_list, + "nodes_dict": nodes_dict, # 缓存节点字典,方便快速查找 + "adjacency_list": adjacency_list, # 缓存邻接表,光速定位邻居 + "memories": [], + "stats": { + "total_nodes": len(nodes_dict), + "total_edges": len(edges_list), + "total_memories": total_memories, + }, + "current_file": str(graph_file), + "file_size": graph_file.stat().st_size, + "file_modified": datetime.fromtimestamp(graph_file.stat().st_mtime).isoformat(), + } return graph_data_cache + except Exception as e: import traceback - traceback.print_exc() raise HTTPException(status_code=500, detail=f"加载图数据失败: {e}") - @router.get("/", response_class=HTMLResponse) async def index(request: Request): """主页面""" @@ -235,67 +218,90 @@ def _format_graph_data_from_manager(memory_manager) -> Dict[str, Any]: "current_file": "memory_manager (实时数据)", } -@router.get("/api/graph/paged") -async def get_paged_graph( - nodes_page: int = 1, nodes_per_page: int = 100, edges_page: int = 1, edges_per_page: int = 200 -): - """获取分页的记忆图数据""" +@router.get("/api/graph/core") +async def get_core_graph(limit: int = 100): + """ + 获取核心图数据。 + 这可比一下子把所有东西都丢给前端聪明多了,哼。 + """ try: - # 确保全量数据已加载到缓存 full_data = load_graph_data_from_file() if "error" in full_data: - raise HTTPException(status_code=404, detail=full_data["error"]) + return JSONResponse(content={"success": False, "error": full_data["error"]}, status_code=404) - # 从缓存中获取全量数据 + # 智能选择核心节点: 优先选择度最高的节点 + # 这是一个简单的策略,但比随机选择要好得多 all_nodes = full_data.get("nodes", []) - all_edges = full_data.get("edges", []) - total_nodes = len(all_nodes) - total_edges = len(all_edges) - # 计算节点分页 - node_start = (nodes_page - 1) * nodes_per_page - node_end = node_start + nodes_per_page - paginated_nodes = all_nodes[node_start:node_end] - - # 计算边分页 - edge_start = (edges_page - 1) * edges_per_page - edge_end = edge_start + edges_per_page - paginated_edges = all_edges[edge_start:edge_end] - - return JSONResponse( - content={ - "success": True, - "data": { - "nodes": paginated_nodes, - "edges": paginated_edges, - "pagination": { - "nodes": { - "page": nodes_page, - "per_page": nodes_per_page, - "total": total_nodes, - "total_pages": (total_nodes + nodes_per_page - 1) // nodes_per_page, - }, - "edges": { - "page": edges_page, - "per_page": edges_per_page, - "total": total_edges, - "total_pages": (total_edges + edges_per_page - 1) // edges_per_page, - }, - }, - }, - } + # 按度(degree)降序排序,如果度相同,则按创建时间(如果可用)降序 + sorted_nodes = sorted( + all_nodes, + key=lambda n: (n.get("degree", 0), n.get("created_at", 0)), + reverse=True ) + + core_nodes = sorted_nodes[:limit] + core_node_ids = {node["id"] for node in core_nodes} + + # 只包含核心节点之间的边,保持初始视图的整洁 + core_edges = [ + edge for edge in full_data.get("edges", []) + if edge.get("from") in core_node_ids and edge.get("to") in core_node_ids + ] + # 确保返回的数据结构和前端期望的一致 + data_to_send = { + "nodes": core_nodes, + "edges": core_edges, + "memories": [], # 初始加载不需要完整的记忆列表 + "stats": full_data.get("stats", {}), # 统计数据还是完整的 + "current_file": full_data.get("current_file", "") + } + + return JSONResponse(content={"success": True, "data": data_to_send}) except Exception as e: + import traceback + traceback.print_exc() + return JSONResponse(content={"success": False, "error": str(e)}, status_code=500) + +@router.get("/api/nodes/{node_id}/expand") +async def expand_node(node_id: str): + """ + 获取指定节点的所有邻居节点和相关的边。 + 看,这就是按需加载的魔法。我可真是个天才,哼! + """ + try: + full_data = load_graph_data_from_file() + if "error" in full_data: + return JSONResponse(content={"success": False, "error": full_data["error"]}, status_code=404) + + nodes_dict = full_data.get("nodes_dict", {}) + adjacency_list = full_data.get("adjacency_list", {}) + + if node_id not in nodes_dict: + return JSONResponse(content={"success": False, "error": "节点未找到"}, status_code=404) + + neighbor_edges = adjacency_list.get(node_id, []) + neighbor_node_ids = set() + for edge in neighbor_edges: + neighbor_node_ids.add(edge["from"]) + neighbor_node_ids.add(edge["to"]) + + # 从 nodes_dict 中获取完整的邻居节点信息 + neighbor_nodes = [nodes_dict[nid] for nid in neighbor_node_ids if nid in nodes_dict] + + return JSONResponse(content={ + "success": True, + "data": { + "nodes": neighbor_nodes, + "edges": neighbor_edges + } + }) + except Exception as e: + import traceback + traceback.print_exc() return JSONResponse(content={"success": False, "error": str(e)}, status_code=500) -@router.get("/api/graph/full") -async def get_full_graph_deprecated(): - """ - (已废弃) 获取完整记忆图数据。 - 此接口现在只返回第一页的数据, 请使用 /api/graph/paged 进行分页获取。 - """ - return await get_paged_graph(nodes_page=1, nodes_per_page=100, edges_page=1, edges_per_page=200) @router.get("/api/files") diff --git a/src/api/templates/visualizer.html b/src/api/templates/visualizer.html index 9c44a420f..6a18d3a77 100644 --- a/src/api/templates/visualizer.html +++ b/src/api/templates/visualizer.html @@ -532,20 +532,18 @@