From e2236f5bc131fadcf477f410bf4b3af992fca75e Mon Sep 17 00:00:00 2001
From: Windpicker-owo <3431391539@qq.com>
Date: Tue, 11 Nov 2025 19:25:03 +0800
Subject: [PATCH] =?UTF-8?q?feat:=E5=A2=9E=E5=BC=BA=E8=AE=B0=E5=BF=86?=
=?UTF-8?q?=E8=8A=82=E7=82=B9=E7=9A=84=E5=B5=8C=E5=85=A5=E7=94=9F=E6=88=90?=
=?UTF-8?q?=E5=92=8C=E6=97=A5=E5=BF=97=E8=AE=B0=E5=BD=95-=20=E5=9C=A8=20Me?=
=?UTF-8?q?moryBuilder=20=E4=B8=AD=E4=B8=BA=20SUBJECT=20=E5=92=8C=20VALUE?=
=?UTF-8?q?=20=E8=8A=82=E7=82=B9=E7=B1=BB=E5=9E=8B=E6=B7=BB=E5=8A=A0?=
=?UTF-8?q?=E4=BA=86=E5=B5=8C=E5=85=A5=E7=94=9F=E6=88=90=EF=BC=8C=E7=A1=AE?=
=?UTF-8?q?=E4=BF=9D=E4=BB=85=E4=B8=BA=E5=86=85=E5=AE=B9=E8=B6=B3=E5=A4=9F?=
=?UTF-8?q?=E7=9A=84=E8=8A=82=E7=82=B9=E5=88=9B=E5=BB=BA=E5=B5=8C=E5=85=A5?=
=?UTF-8?q?=E3=80=82-=20=E6=94=B9=E8=BF=9B=E4=BA=86=20MemoryTools=20?=
=?UTF-8?q?=E7=9A=84=E6=97=A5=E5=BF=97=E8=AE=B0=E5=BD=95=EF=BC=8C=E5=9C=A8?=
=?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=90=91=E9=87=8F=E6=90=9C=E7=B4=A2=E6=9C=9F?=
=?UTF-8?q?=E9=97=B4=E6=8F=90=E4=BE=9B=E8=AF=A6=E7=BB=86=E8=A7=81=E8=A7=A3?=
=?UTF-8?q?=EF=BC=8C=E5=8C=85=E6=8B=AC=E4=BD=8E=E5=8F=AC=E5=9B=9E=E6=83=85?=
=?UTF-8?q?=E5=86=B5=E7=9A=84=E8=AD=A6=E5=91=8A=E3=80=82-=20=E8=B0=83?=
=?UTF-8?q?=E6=95=B4=E4=BA=86=E4=B8=8D=E5=90=8C=E8=AE=B0=E5=BF=86=E7=B1=BB?=
=?UTF-8?q?=E5=9E=8B=E7=9A=84=E8=AF=84=E5=88=86=E6=9D=83=E9=87=8D=EF=BC=8C?=
=?UTF-8?q?=E4=BB=A5=E5=BC=BA=E8=B0=83=E7=9B=B8=E4=BC=BC=E6=80=A7=E5=92=8C?=
=?UTF-8?q?=E9=87=8D=E8=A6=81=E6=80=A7=EF=BC=8C=E6=8F=90=E9=AB=98=E8=AE=B0?=
=?UTF-8?q?=E5=BF=86=E6=A3=80=E7=B4=A2=E7=9A=84=E8=B4=A8=E9=87=8F=E3=80=82?=
=?UTF-8?q?-=20=E5=B0=86=E5=90=91=E9=87=8F=E6=90=9C=E7=B4=A2=E9=99=90?=
=?UTF-8?q?=E5=88=B6=E4=BB=8E=202=20=E5=80=8D=E6=8F=90=E9=AB=98=E5=88=B0?=
=?UTF-8?q?=205=20=E5=80=8D=EF=BC=8C=E4=BB=A5=E6=94=B9=E5=96=84=E5=88=9D?=
=?UTF-8?q?=E5=A7=8B=E5=8F=AC=E5=9B=9E=E7=8E=87=E3=80=82-=20=E5=BC=95?=
=?UTF-8?q?=E5=85=A5=E4=BA=86=E4=B8=80=E4=B8=AA=E6=96=B0=E8=84=9A=E6=9C=AC?=
=?UTF-8?q?=EF=BC=8C=E7=94=A8=E4=BA=8E=E4=B8=BA=E7=8E=B0=E6=9C=89=E8=8A=82?=
=?UTF-8?q?=E7=82=B9=E7=94=9F=E6=88=90=E7=BC=BA=E5=A4=B1=E7=9A=84=E5=B5=8C?=
=?UTF-8?q?=E5=85=A5=EF=BC=8C=E6=94=AF=E6=8C=81=E6=89=B9=E9=87=8F=E5=A4=84?=
=?UTF-8?q?=E7=90=86=E5=B9=B6=E6=94=B9=E8=BF=9B=E7=B4=A2=E5=BC=95=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
scripts/generate_missing_embeddings.py | 268 ++++++++
src/api/memory_visualizer_router.py | 239 ++++++-
src/api/templates/visualizer.html | 909 ++++++++++++++++++++-----
src/memory_graph/core/builder.py | 9 +-
src/memory_graph/tools/memory_tools.py | 60 +-
5 files changed, 1296 insertions(+), 189 deletions(-)
create mode 100644 scripts/generate_missing_embeddings.py
diff --git a/scripts/generate_missing_embeddings.py b/scripts/generate_missing_embeddings.py
new file mode 100644
index 000000000..a8957e50b
--- /dev/null
+++ b/scripts/generate_missing_embeddings.py
@@ -0,0 +1,268 @@
+"""
+为现有节点生成嵌入向量
+
+批量为图存储中缺少嵌入向量的节点生成并索引嵌入向量
+
+使用场景:
+1. 历史记忆节点没有嵌入向量
+2. 嵌入生成器之前未配置,现在需要补充生成
+3. 向量索引损坏需要重建
+
+使用方法:
+ python scripts/generate_missing_embeddings.py [--node-types TOPIC,OBJECT] [--batch-size 50]
+
+参数说明:
+ --node-types: 需要生成嵌入的节点类型,默认为 TOPIC,OBJECT
+ --batch-size: 批量处理大小,默认为 50
+"""
+
+import asyncio
+import sys
+from pathlib import Path
+from typing import List
+
+# 添加项目根目录到路径
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+
+async def generate_missing_embeddings(
+ target_node_types: List[str] = None,
+ batch_size: int = 50,
+):
+ """
+ 为缺失嵌入向量的节点生成嵌入
+
+ Args:
+ target_node_types: 需要处理的节点类型列表(如 ["主题", "客体"])
+ batch_size: 批处理大小
+ """
+ from src.common.logger import get_logger
+ from src.memory_graph.manager_singleton import get_memory_manager, initialize_memory_manager
+ from src.memory_graph.models import NodeType
+
+ logger = get_logger("generate_missing_embeddings")
+
+ if target_node_types is None:
+ target_node_types = [NodeType.TOPIC.value, NodeType.OBJECT.value]
+
+ print(f"\n{'='*80}")
+ print(f"🔧 为节点生成嵌入向量")
+ print(f"{'='*80}\n")
+ print(f"目标节点类型: {', '.join(target_node_types)}")
+ print(f"批处理大小: {batch_size}\n")
+
+ # 1. 初始化记忆管理器
+ print(f"🔧 正在初始化记忆管理器...")
+ await initialize_memory_manager()
+ manager = get_memory_manager()
+
+ if manager is None:
+ print("❌ 记忆管理器初始化失败")
+ return
+
+ print(f"✅ 记忆管理器已初始化\n")
+
+ # 2. 获取已索引的节点ID
+ print(f"🔍 检查现有向量索引...")
+ existing_node_ids = set()
+ try:
+ vector_count = manager.vector_store.collection.count()
+ if vector_count > 0:
+ # 分批获取所有已索引的ID
+ batch_size_check = 1000
+ for offset in range(0, vector_count, batch_size_check):
+ limit = min(batch_size_check, vector_count - offset)
+ result = manager.vector_store.collection.get(
+ limit=limit,
+ offset=offset,
+ )
+ if result and "ids" in result:
+ existing_node_ids.update(result["ids"])
+
+ print(f"✅ 发现 {len(existing_node_ids)} 个已索引节点\n")
+ except Exception as e:
+ logger.warning(f"获取已索引节点ID失败: {e}")
+ print(f"⚠️ 无法获取已索引节点,将尝试跳过重复项\n")
+
+ # 3. 收集需要生成嵌入的节点
+ print(f"🔍 扫描需要生成嵌入的节点...")
+ all_memories = manager.graph_store.get_all_memories()
+
+ nodes_to_process = []
+ total_target_nodes = 0
+ type_stats = {nt: {"total": 0, "need_emb": 0, "already_indexed": 0} for nt in target_node_types}
+
+ for memory in all_memories:
+ for node in memory.nodes:
+ if node.node_type.value in target_node_types:
+ total_target_nodes += 1
+ type_stats[node.node_type.value]["total"] += 1
+
+ # 检查是否已在向量索引中
+ if node.id in existing_node_ids:
+ type_stats[node.node_type.value]["already_indexed"] += 1
+ continue
+
+ if not node.has_embedding():
+ nodes_to_process.append({
+ "node": node,
+ "memory_id": memory.id,
+ })
+ type_stats[node.node_type.value]["need_emb"] += 1
+
+ print(f"\n📊 扫描结果:")
+ for node_type in target_node_types:
+ stats = type_stats[node_type]
+ already_ok = stats["already_indexed"]
+ coverage = (stats["total"] - stats["need_emb"]) / stats["total"] * 100 if stats["total"] > 0 else 0
+ print(f" - {node_type}: {stats['total']} 个节点, {stats['need_emb']} 个缺失嵌入, "
+ f"{already_ok} 个已索引 (覆盖率: {coverage:.1f}%)")
+
+ print(f"\n 总计: {total_target_nodes} 个目标节点, {len(nodes_to_process)} 个需要生成嵌入\n")
+
+ if len(nodes_to_process) == 0:
+ print(f"✅ 所有节点已有嵌入向量,无需生成")
+ return
+
+ # 3. 批量生成嵌入
+ print(f"🚀 开始生成嵌入向量...\n")
+
+ total_batches = (len(nodes_to_process) + batch_size - 1) // batch_size
+ success_count = 0
+ failed_count = 0
+ indexed_count = 0
+
+ for i in range(0, len(nodes_to_process), batch_size):
+ batch = nodes_to_process[i : i + batch_size]
+ batch_num = i // batch_size + 1
+
+ print(f"📦 批次 {batch_num}/{total_batches} ({len(batch)} 个节点)...")
+
+ try:
+ # 提取文本内容
+ texts = [item["node"].content for item in batch]
+
+ # 批量生成嵌入
+ embeddings = await manager.embedding_generator.generate_batch(texts)
+
+ # 为节点设置嵌入并索引
+ batch_nodes_for_index = []
+
+ for j, (item, embedding) in enumerate(zip(batch, embeddings)):
+ node = item["node"]
+
+ if embedding is not None:
+ # 设置嵌入向量
+ node.embedding = embedding
+ batch_nodes_for_index.append(node)
+ success_count += 1
+ else:
+ failed_count += 1
+ logger.warning(f" ⚠️ 节点 {node.id[:8]}... '{node.content[:30]}' 嵌入生成失败")
+
+ # 批量索引到向量数据库
+ if batch_nodes_for_index:
+ try:
+ await manager.vector_store.add_nodes_batch(batch_nodes_for_index)
+ indexed_count += len(batch_nodes_for_index)
+ print(f" ✅ 成功: {len(batch_nodes_for_index)}/{len(batch)} 个节点已生成并索引")
+ except Exception as e:
+ # 如果批量失败,尝试逐个添加(跳过重复)
+ logger.warning(f" 批量索引失败,尝试逐个添加: {e}")
+ individual_success = 0
+ for node in batch_nodes_for_index:
+ try:
+ await manager.vector_store.add_node(node)
+ individual_success += 1
+ indexed_count += 1
+ except Exception as e2:
+ if "Expected IDs to be unique" in str(e2):
+ logger.debug(f" 跳过已存在节点: {node.id}")
+ else:
+ logger.error(f" 节点 {node.id} 索引失败: {e2}")
+ print(f" ⚠️ 逐个索引: {individual_success}/{len(batch_nodes_for_index)} 个成功")
+
+ except Exception as e:
+ failed_count += len(batch)
+ logger.error(f"批次 {batch_num} 处理失败", exc_info=True)
+ print(f" ❌ 批次处理失败: {e}")
+
+ # 显示进度
+ total_processed = min(i + batch_size, len(nodes_to_process))
+ progress = total_processed / len(nodes_to_process) * 100
+ print(f" 📊 总进度: {total_processed}/{len(nodes_to_process)} ({progress:.1f}%)\n")
+
+ # 4. 保存图数据(更新节点的 embedding 字段)
+ print(f"💾 保存图数据...")
+ try:
+ await manager.persistence.save_graph_store(manager.graph_store)
+ print(f"✅ 图数据已保存\n")
+ except Exception as e:
+ logger.error(f"保存图数据失败", exc_info=True)
+ print(f"❌ 保存失败: {e}\n")
+
+ # 5. 验证结果
+ print(f"🔍 验证向量索引...")
+ final_vector_count = manager.vector_store.collection.count()
+ stats = manager.graph_store.get_statistics()
+ total_nodes = stats["total_nodes"]
+
+ print(f"\n{'='*80}")
+ print(f"📊 生成完成")
+ print(f"{'='*80}")
+ print(f"处理节点数: {len(nodes_to_process)}")
+ print(f"成功生成: {success_count}")
+ print(f"失败数量: {failed_count}")
+ print(f"成功索引: {indexed_count}")
+ print(f"向量索引节点数: {final_vector_count}")
+ print(f"图存储节点数: {total_nodes}")
+ print(f"索引覆盖率: {final_vector_count / total_nodes * 100:.1f}%\n")
+
+ # 6. 测试搜索
+ print(f"🧪 测试搜索功能...")
+ test_queries = ["小红帽蕾克", "拾风", "杰瑞喵"]
+
+ for query in test_queries:
+ results = await manager.search_memories(query=query, top_k=3)
+ if results:
+ print(f"\n✅ 查询 '{query}' 找到 {len(results)} 条记忆:")
+ for i, memory in enumerate(results[:2], 1):
+ subject_node = memory.get_subject_node()
+ # 获取主题节点(遍历所有节点找TOPIC类型)
+ from src.memory_graph.models import NodeType
+ topic_nodes = [n for n in memory.nodes if n.node_type == NodeType.TOPIC]
+ subject = subject_node.content if subject_node else "?"
+ topic = topic_nodes[0].content if topic_nodes else "?"
+ print(f" {i}. {subject} - {topic} (重要性: {memory.importance:.2f})")
+ else:
+ print(f"\n⚠️ 查询 '{query}' 返回 0 条结果")
+
+
+async def main():
+ import argparse
+
+ parser = argparse.ArgumentParser(description="为节点生成嵌入向量")
+ parser.add_argument(
+ "--node-types",
+ type=str,
+ default="主题,客体",
+ help="需要生成嵌入的节点类型,逗号分隔(默认:主题,客体)",
+ )
+ parser.add_argument(
+ "--batch-size",
+ type=int,
+ default=50,
+ help="批处理大小(默认:50)",
+ )
+
+ args = parser.parse_args()
+
+ target_types = [t.strip() for t in args.node_types.split(",")]
+ await generate_missing_embeddings(
+ target_node_types=target_types,
+ batch_size=args.batch_size,
+ )
+
+
+if __name__ == "__main__":
+ asyncio.run(main())
diff --git a/src/api/memory_visualizer_router.py b/src/api/memory_visualizer_router.py
index e80e8ec0e..84971f78a 100644
--- a/src/api/memory_visualizer_router.py
+++ b/src/api/memory_visualizer_router.py
@@ -7,9 +7,10 @@
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
+from collections import defaultdict
import orjson
-from fastapi import APIRouter, HTTPException, Request
+from fastapi import APIRouter, HTTPException, Request, Query
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.templating import Jinja2Templates
@@ -227,6 +228,242 @@ async def get_full_graph():
return JSONResponse(content={"success": False, "error": str(e)}, status_code=500)
+@router.get("/api/graph/summary")
+async def get_graph_summary():
+ """获取图的摘要信息(仅统计数据,不包含节点和边)"""
+ try:
+ from src.memory_graph.manager_singleton import get_memory_manager
+
+ memory_manager = get_memory_manager()
+
+ if memory_manager and memory_manager._initialized:
+ stats = memory_manager.get_statistics()
+ return JSONResponse(content={"success": True, "data": {
+ "stats": {
+ "total_nodes": stats.get("total_nodes", 0),
+ "total_edges": stats.get("total_edges", 0),
+ "total_memories": stats.get("total_memories", 0),
+ },
+ "current_file": "memory_manager (实时数据)",
+ }})
+ else:
+ data = load_graph_data_from_file()
+ return JSONResponse(content={"success": True, "data": {
+ "stats": data.get("stats", {}),
+ "current_file": data.get("current_file", ""),
+ }})
+ except Exception as e:
+ return JSONResponse(content={"success": False, "error": str(e)}, status_code=500)
+
+
+@router.get("/api/graph/paginated")
+async def get_paginated_graph(
+ page: int = Query(1, ge=1, description="页码"),
+ page_size: int = Query(500, ge=100, le=2000, description="每页节点数"),
+ min_importance: float = Query(0.0, ge=0.0, le=1.0, description="最小重要性阈值"),
+ node_types: Optional[str] = Query(None, description="节点类型过滤,逗号分隔"),
+):
+ """分页获取图数据,支持重要性过滤"""
+ try:
+ from src.memory_graph.manager_singleton import get_memory_manager
+
+ memory_manager = get_memory_manager()
+
+ # 获取完整数据
+ if memory_manager and memory_manager._initialized:
+ full_data = _format_graph_data_from_manager(memory_manager)
+ else:
+ full_data = load_graph_data_from_file()
+
+ nodes = full_data.get("nodes", [])
+ edges = full_data.get("edges", [])
+
+ # 过滤节点类型
+ if node_types:
+ allowed_types = set(node_types.split(","))
+ nodes = [n for n in nodes if n.get("group") in allowed_types]
+
+ # 按重要性排序(如果有importance字段)
+ nodes_with_importance = []
+ for node in nodes:
+ # 计算节点重要性(连接的边数)
+ edge_count = sum(1 for e in edges if e.get("from") == node["id"] or e.get("to") == node["id"])
+ importance = edge_count / max(len(edges), 1)
+ if importance >= min_importance:
+ node["importance"] = importance
+ nodes_with_importance.append(node)
+
+ # 按重要性降序排序
+ nodes_with_importance.sort(key=lambda x: x.get("importance", 0), reverse=True)
+
+ # 分页
+ total_nodes = len(nodes_with_importance)
+ total_pages = (total_nodes + page_size - 1) // page_size
+ start_idx = (page - 1) * page_size
+ end_idx = min(start_idx + page_size, total_nodes)
+
+ paginated_nodes = nodes_with_importance[start_idx:end_idx]
+ node_ids = set(n["id"] for n in paginated_nodes)
+
+ # 只保留连接分页节点的边
+ paginated_edges = [
+ e for e in edges
+ if e.get("from") in node_ids and e.get("to") in node_ids
+ ]
+
+ return JSONResponse(content={"success": True, "data": {
+ "nodes": paginated_nodes,
+ "edges": paginated_edges,
+ "pagination": {
+ "page": page,
+ "page_size": page_size,
+ "total_nodes": total_nodes,
+ "total_pages": total_pages,
+ "has_next": page < total_pages,
+ "has_prev": page > 1,
+ },
+ "stats": {
+ "total_nodes": total_nodes,
+ "total_edges": len(paginated_edges),
+ "total_memories": full_data.get("stats", {}).get("total_memories", 0),
+ },
+ }})
+ except Exception as e:
+ import traceback
+ traceback.print_exc()
+ return JSONResponse(content={"success": False, "error": str(e)}, status_code=500)
+
+
+@router.get("/api/graph/clustered")
+async def get_clustered_graph(
+ max_nodes: int = Query(300, ge=50, le=1000, description="最大节点数"),
+ cluster_threshold: int = Query(10, ge=2, le=50, description="聚类阈值")
+):
+ """获取聚类简化后的图数据"""
+ try:
+ from src.memory_graph.manager_singleton import get_memory_manager
+
+ memory_manager = get_memory_manager()
+
+ # 获取完整数据
+ if memory_manager and memory_manager._initialized:
+ full_data = _format_graph_data_from_manager(memory_manager)
+ else:
+ full_data = load_graph_data_from_file()
+
+ nodes = full_data.get("nodes", [])
+ edges = full_data.get("edges", [])
+
+ # 如果节点数小于阈值,直接返回
+ if len(nodes) <= max_nodes:
+ return JSONResponse(content={"success": True, "data": {
+ "nodes": nodes,
+ "edges": edges,
+ "stats": full_data.get("stats", {}),
+ "clustered": False,
+ }})
+
+ # 执行聚类
+ clustered_data = _cluster_graph_data(nodes, edges, max_nodes, cluster_threshold)
+
+ return JSONResponse(content={"success": True, "data": {
+ **clustered_data,
+ "stats": {
+ "original_nodes": len(nodes),
+ "original_edges": len(edges),
+ "clustered_nodes": len(clustered_data["nodes"]),
+ "clustered_edges": len(clustered_data["edges"]),
+ "total_memories": full_data.get("stats", {}).get("total_memories", 0),
+ },
+ "clustered": True,
+ }})
+ except Exception as e:
+ import traceback
+ traceback.print_exc()
+ return JSONResponse(content={"success": False, "error": str(e)}, status_code=500)
+
+
+def _cluster_graph_data(nodes: List[Dict], edges: List[Dict], max_nodes: int, cluster_threshold: int) -> Dict:
+ """简单的图聚类算法:按类型和连接度聚类"""
+ # 构建邻接表
+ adjacency = defaultdict(set)
+ for edge in edges:
+ adjacency[edge["from"]].add(edge["to"])
+ adjacency[edge["to"]].add(edge["from"])
+
+ # 按类型分组
+ type_groups = defaultdict(list)
+ for node in nodes:
+ type_groups[node.get("group", "UNKNOWN")].append(node)
+
+ clustered_nodes = []
+ clustered_edges = []
+ node_mapping = {} # 原始节点ID -> 聚类节点ID
+
+ for node_type, type_nodes in type_groups.items():
+ # 如果该类型节点少于阈值,直接保留
+ if len(type_nodes) <= cluster_threshold:
+ for node in type_nodes:
+ clustered_nodes.append(node)
+ node_mapping[node["id"]] = node["id"]
+ else:
+ # 按连接度排序,保留最重要的节点
+ node_importance = []
+ for node in type_nodes:
+ importance = len(adjacency[node["id"]])
+ node_importance.append((node, importance))
+
+ node_importance.sort(key=lambda x: x[1], reverse=True)
+
+ # 保留前N个重要节点
+ keep_count = min(len(type_nodes), max_nodes // len(type_groups))
+ for node, importance in node_importance[:keep_count]:
+ clustered_nodes.append(node)
+ node_mapping[node["id"]] = node["id"]
+
+ # 其余节点聚合为一个超级节点
+ if len(node_importance) > keep_count:
+ clustered_node_ids = [n["id"] for n, _ in node_importance[keep_count:]]
+ cluster_id = f"cluster_{node_type}_{len(clustered_nodes)}"
+ cluster_label = f"{node_type} 集群 ({len(clustered_node_ids)}个节点)"
+
+ clustered_nodes.append({
+ "id": cluster_id,
+ "label": cluster_label,
+ "group": node_type,
+ "title": f"包含 {len(clustered_node_ids)} 个{node_type}节点",
+ "is_cluster": True,
+ "cluster_size": len(clustered_node_ids),
+ "clustered_nodes": clustered_node_ids[:10], # 只保留前10个用于展示
+ })
+
+ for node_id in clustered_node_ids:
+ node_mapping[node_id] = cluster_id
+
+ # 重建边(去重)
+ edge_set = set()
+ for edge in edges:
+ from_id = node_mapping.get(edge["from"])
+ to_id = node_mapping.get(edge["to"])
+
+ if from_id and to_id and from_id != to_id:
+ edge_key = tuple(sorted([from_id, to_id]))
+ if edge_key not in edge_set:
+ edge_set.add(edge_key)
+ clustered_edges.append({
+ "id": f"{from_id}_{to_id}",
+ "from": from_id,
+ "to": to_id,
+ "label": edge.get("label", ""),
+ "arrows": "to",
+ })
+
+ return {
+ "nodes": clustered_nodes,
+ "edges": clustered_edges,
+ }
+
+
@router.get("/api/files")
async def list_files_api():
"""列出所有可用的数据文件"""
diff --git a/src/api/templates/visualizer.html b/src/api/templates/visualizer.html
index 47c105863..c21c5ffca 100644
--- a/src/api/templates/visualizer.html
+++ b/src/api/templates/visualizer.html
@@ -205,6 +205,59 @@
cursor: pointer;
}
+ .performance-mode {
+ display: flex;
+ flex-direction: column;
+ gap: 8px;
+ padding: 10px;
+ background: #fff3cd;
+ border-radius: 6px;
+ border: 1px solid #ffc107;
+ }
+
+ .performance-mode h3 {
+ font-size: 13px;
+ color: #856404;
+ margin: 0;
+ }
+
+ .performance-options {
+ display: flex;
+ flex-direction: column;
+ gap: 6px;
+ }
+
+ .performance-item {
+ display: flex;
+ align-items: center;
+ gap: 8px;
+ font-size: 12px;
+ }
+
+ .performance-item input[type="radio"] {
+ cursor: pointer;
+ }
+
+ .performance-item label {
+ cursor: pointer;
+ flex: 1;
+ }
+
+ .performance-tips {
+ margin-top: 8px;
+ padding: 8px;
+ background: #e8f5e9;
+ border-radius: 4px;
+ font-size: 11px;
+ color: #2e7d32;
+ line-height: 1.4;
+ }
+
+ .performance-tips strong {
+ display: block;
+ margin-bottom: 4px;
+ }
+
.loading {
position: absolute;
top: 50%;
@@ -214,6 +267,17 @@
color: #667eea;
font-size: 18px;
font-weight: 500;
+ z-index: 1000;
+ background: rgba(255, 255, 255, 0.95);
+ padding: 30px;
+ border-radius: 12px;
+ box-shadow: 0 4px 20px rgba(0, 0, 0, 0.2);
+ }
+
+ .loading-progress {
+ margin-top: 10px;
+ font-size: 14px;
+ color: #764ba2;
}
.loading-spinner {
@@ -482,6 +546,39 @@
+
+
+
ℹ️ 节点信息
@@ -499,6 +596,12 @@
+
+
+
@@ -538,6 +641,16 @@
memories: []
};
let originalData = null;
+ let currentLoadMode = 'auto';
+ let currentPage = 1;
+ let totalPages = 1;
+ let isLoading = false;
+ let hoveredNodeId = null; // 当前鼠标悬停的节点ID
+ let highlightedNodeIds = new Set(); // 当前高亮的节点ID集合
+
+ // 邻接表缓存,用于高亮优化
+ let adjacencyCache = null;
+ let nodeDegreeCache = null;
// 节点颜色配置
const nodeColors = {
@@ -548,75 +661,130 @@
'VALUE': '#98D8C8'
};
+ // 切换加载模式
+ function changeLoadMode() {
+ const selected = document.querySelector('input[name="loadMode"]:checked');
+ if (selected) {
+ currentLoadMode = selected.value;
+ console.log('切换加载模式:', currentLoadMode);
+ loadGraph();
+ }
+ }
+
+ // 构建邻接表缓存
+ function buildAdjacencyCache(edges) {
+ adjacencyCache = new Map();
+ nodeDegreeCache = new Map();
+
+ edges.forEach(edge => {
+ const from = edge.from;
+ const to = edge.to;
+
+ if (!adjacencyCache.has(from)) {
+ adjacencyCache.set(from, []);
+ nodeDegreeCache.set(from, 0);
+ }
+ if (!adjacencyCache.has(to)) {
+ adjacencyCache.set(to, []);
+ nodeDegreeCache.set(to, 0);
+ }
+
+ adjacencyCache.get(from).push({ nodeId: to, edgeId: edge.id });
+ adjacencyCache.get(to).push({ nodeId: from, edgeId: edge.id });
+ nodeDegreeCache.set(from, nodeDegreeCache.get(from) + 1);
+ nodeDegreeCache.set(to, nodeDegreeCache.get(to) + 1);
+ });
+
+ console.log(`邻接表缓存构建完成: ${adjacencyCache.size} 个节点`);
+ }
+
// 初始化图形
function initNetwork() {
const container = document.getElementById('memory-graph');
- const options = {
- nodes: {
- shape: 'dot',
- size: 20,
- font: {
- size: 14,
- color: '#333',
- face: 'Microsoft YaHei'
- },
- borderWidth: 2,
- borderWidthSelected: 4,
- shadow: true
- },
- edges: {
- width: 2,
- color: {
- color: '#848484',
- highlight: '#667eea',
- hover: '#764ba2'
- },
- arrows: {
- to: {
- enabled: true,
- scaleFactor: 0.5
+ // 动态配置:大数据集使用极简配置
+ const getNetworkOptions = (nodeCount) => {
+ const isLargeDataset = nodeCount > 500;
+ const isVeryLargeDataset = nodeCount > 2000;
+
+ return {
+ nodes: {
+ shape: 'dot',
+ size: isVeryLargeDataset ? 10 : (isLargeDataset ? 15 : 20),
+ font: {
+ size: isVeryLargeDataset ? 0 : (isLargeDataset ? 10 : 14), // 超大数据集隐藏文字
+ color: '#333',
+ face: 'Microsoft YaHei'
+ },
+ borderWidth: isVeryLargeDataset ? 0 : 2,
+ borderWidthSelected: 3,
+ shadow: false, // 始终禁用阴影
+ scaling: {
+ min: isVeryLargeDataset ? 5 : 10,
+ max: isVeryLargeDataset ? 15 : 30
}
},
- smooth: {
- enabled: true,
- type: 'dynamic'
+ edges: {
+ width: isVeryLargeDataset ? 0.5 : (isLargeDataset ? 1 : 2),
+ color: {
+ color: '#848484',
+ highlight: '#667eea',
+ hover: '#764ba2',
+ opacity: isVeryLargeDataset ? 0.3 : 0.7 // 超大数据集边半透明
+ },
+ arrows: {
+ to: {
+ enabled: !isVeryLargeDataset, // 超大数据集禁用箭头
+ scaleFactor: 0.3
+ }
+ },
+ smooth: false, // 始终禁用平滑以提升性能
+ font: {
+ size: 0, // 始终隐藏边标签
+ strokeWidth: 0
+ },
+ shadow: false,
+ selectionWidth: 2
},
- font: {
- size: 11,
- color: '#666',
- face: 'Microsoft YaHei',
- align: 'middle'
+ physics: {
+ enabled: true,
+ barnesHut: {
+ gravitationalConstant: isVeryLargeDataset ? -800 : (isLargeDataset ? -2000 : -8000),
+ centralGravity: isVeryLargeDataset ? 0.1 : 0.3,
+ springLength: isVeryLargeDataset ? 50 : (isLargeDataset ? 95 : 150),
+ springConstant: isVeryLargeDataset ? 0.001 : (isLargeDataset ? 0.02 : 0.04),
+ damping: isVeryLargeDataset ? 0.3 : 0.09,
+ avoidOverlap: 0
+ },
+ stabilization: {
+ enabled: true,
+ iterations: isVeryLargeDataset ? 50 : (isLargeDataset ? 100 : 300),
+ updateInterval: isVeryLargeDataset ? 100 : (isLargeDataset ? 50 : 25),
+ onlyDynamicEdges: false,
+ fit: true
+ },
+ solver: 'barnesHut',
+ timestep: isVeryLargeDataset ? 1.0 : 0.5,
+ adaptiveTimestep: true,
+ maxVelocity: isVeryLargeDataset ? 100 : 50,
+ minVelocity: isVeryLargeDataset ? 5 : 0.75
+ },
+ interaction: {
+ hover: true, // 始终启用hover - 我们用自定义事件处理标签显示
+ tooltipDelay: 300,
+ zoomView: true,
+ dragView: true,
+ hideEdgesOnDrag: true, // 始终在拖拽时隐藏边
+ hideEdgesOnZoom: true, // 始终在缩放时隐藏边
+ hideNodesOnDrag: isVeryLargeDataset, // 超大数据集拖拽时也隐藏节点
+ navigationButtons: false,
+ keyboard: false
+ },
+ layout: {
+ improvedLayout: !isVeryLargeDataset, // 超大数据集禁用改进布局
+ randomSeed: 2 // 固定随机种子以获得一致的布局
}
- },
- physics: {
- enabled: true,
- barnesHut: {
- gravitationalConstant: -8000,
- centralGravity: 0.3,
- springLength: 150,
- springConstant: 0.04,
- damping: 0.09,
- avoidOverlap: 0.1
- },
- stabilization: {
- enabled: true,
- iterations: 300,
- updateInterval: 25,
- onlyDynamicEdges: false,
- fit: true
- },
- // 稳定后自动停止物理引擎
- solver: 'barnesHut',
- timestep: 0.5,
- adaptiveTimestep: true
- },
- interaction: {
- hover: true,
- tooltipDelay: 100,
- zoomView: true,
- dragView: true
- }
+ };
};
const data = {
@@ -624,7 +792,13 @@
edges: new vis.DataSet([])
};
- network = new vis.Network(container, data, options);
+ // 初始使用中等配置
+ network = new vis.Network(container, data, getNetworkOptions(500));
+
+ // 保存配置函数供后续使用
+ network.updateOptions = (nodeCount) => {
+ network.setOptions(getNetworkOptions(nodeCount));
+ };
// 添加事件监听
network.on('click', function(params) {
@@ -638,76 +812,395 @@
}
});
+ // 添加鼠标悬停事件(仅在小数据集启用)
+ network.on('hoverNode', function(params) {
+ hoveredNodeId = params.node;
+ updateNodeLabel(params.node, true); // 显示悬停节点的标签
+ });
+
+ network.on('blurNode', function(params) {
+ // 如果节点不在高亮集合中,隐藏标签
+ if (!highlightedNodeIds.has(params.node)) {
+ updateNodeLabel(params.node, false);
+ }
+ hoveredNodeId = null;
+ });
+
// 稳定化完成后停止物理引擎
network.on('stabilizationIterationsDone', function() {
- console.log('初始稳定化完成,停止物理引擎');
+ console.log('稳定化完成,停止物理引擎');
network.setOptions({ physics: { enabled: false } });
+ updateLoadingProgress('布局完成');
});
// 添加稳定化进度监听
network.on('stabilizationProgress', function(params) {
const progress = Math.round((params.iterations / params.total) * 100);
- if (progress % 10 === 0) { // 每10%打印一次
- console.log(`稳定化进度: ${progress}%`);
- }
+ updateLoadingProgress(`布局中: ${progress}%`);
});
}
- // 加载图形数据
- async function loadGraph() {
- try {
- document.getElementById('loading').style.display = 'block';
-
- const response = await fetch('/visualizer/api/graph/full');
- const result = await response.json();
+ // 更新加载进度提示
+ function updateLoadingProgress(message) {
+ const progressDiv = document.querySelector('.loading-progress');
+ if (progressDiv) {
+ progressDiv.textContent = message;
+ } else {
+ const loadingDiv = document.getElementById('loading');
+ if (loadingDiv) {
+ const existing = loadingDiv.querySelector('.loading-progress');
+ if (!existing) {
+ const newDiv = document.createElement('div');
+ newDiv.className = 'loading-progress';
+ newDiv.textContent = message;
+ loadingDiv.appendChild(newDiv);
+ } else {
+ existing.textContent = message;
+ }
+ }
+ }
+ }
- if (result.success) {
+ // 动态更新节点标签显示
+ function updateNodeLabel(nodeId, show) {
+ if (!network || !graphData) return;
+
+ const nodes = network.body.data.nodes;
+ const node = nodes.get(nodeId);
+ if (!node) return;
+
+ // 获取原始节点数据
+ const originalNode = graphData.nodes.find(n => n.id === nodeId);
+ if (!originalNode) return;
+
+ // 更新节点标签
+ nodes.update({
+ id: nodeId,
+ label: show ? originalNode.label : '',
+ font: show ? { size: 14, color: '#333' } : { size: 0 }
+ });
+ }
+
+ // 批量更新边标签显示
+ function updateEdgeLabels(edgeIds, show) {
+ if (!network || !graphData || edgeIds.size === 0) return;
+
+ const edges = network.body.data.edges;
+ const edgeUpdates = [];
+
+ edgeIds.forEach(edgeId => {
+ const edge = edges.get(edgeId);
+ if (!edge) return;
+
+ // 获取原始边数据
+ const originalEdge = graphData.edges.find(e => e.id === edgeId);
+ if (!originalEdge) return;
+
+ edgeUpdates.push({
+ id: edgeId,
+ label: show ? (originalEdge.label || '') : '',
+ font: show ? { size: 11, color: '#666' } : { size: 0 }
+ });
+ });
+
+ if (edgeUpdates.length > 0) {
+ edges.update(edgeUpdates);
+ }
+ }
+
+ // 加载图形数据(智能选择模式)
+ async function loadGraph() {
+ if (isLoading) {
+ console.log('已有加载任务在进行中');
+ return;
+ }
+
+ try {
+ isLoading = true;
+ document.getElementById('loading').style.display = 'block';
+ updateLoadingProgress('获取数据摘要...');
+
+ // 首先获取摘要信息以决定加载策略
+ const summaryResponse = await fetch('/visualizer/api/graph/summary');
+ const summaryResult = await summaryResponse.json();
+
+ if (!summaryResult.success) {
+ throw new Error(summaryResult.error);
+ }
+
+ const totalNodes = summaryResult.data.stats.total_nodes;
+ console.log(`图数据节点总数: ${totalNodes}`);
+
+ // 根据模式和数据量选择加载策略(更保守的阈值)
+ let loadMode = currentLoadMode;
+ if (loadMode === 'auto') {
+ if (totalNodes <= 500) {
+ loadMode = 'full';
+ } else if (totalNodes <= 2000) {
+ loadMode = 'cluster';
+ } else {
+ loadMode = 'paginated';
+ }
+ console.log(`自动选择加载模式: ${loadMode} (节点数: ${totalNodes})`);
+ }
+
+ // 根据选择的模式加载数据
+ let result;
+ if (loadMode === 'full') {
+ updateLoadingProgress('加载完整数据...');
+ result = await loadFullGraph();
+ } else if (loadMode === 'cluster') {
+ updateLoadingProgress('加载聚类数据...');
+ result = await loadClusteredGraph();
+ } else if (loadMode === 'paginated') {
+ updateLoadingProgress('加载分页数据 (第1页)...');
+ result = await loadPaginatedGraph(1);
+ }
+
+ if (result && result.success) {
+ updateLoadingProgress('渲染图形...');
originalData = result.data;
updateGraph(result.data);
updateStats(result.data.stats);
+
+ // 构建缓存以优化后续操作
+ buildAdjacencyCache(result.data.edges || []);
+
+ // 显示/隐藏分页控制
+ const paginationControls = document.getElementById('paginationControls');
+ if (result.data.pagination) {
+ const p = result.data.pagination;
+ totalPages = p.total_pages;
+ currentPage = p.page;
+ paginationControls.style.display = 'flex';
+ document.getElementById('pageInfo').textContent = `第 ${p.page}/${p.total_pages} 页`;
+ document.getElementById('prevPageBtn').disabled = !p.has_prev;
+ document.getElementById('nextPageBtn').disabled = !p.has_next;
+ } else {
+ paginationControls.style.display = 'none';
+ }
+
+ // 显示加载信息
+ if (result.data.clustered) {
+ console.log(`✅ 聚类简化: ${result.data.stats.original_nodes} → ${result.data.stats.clustered_nodes} 节点`);
+ } else if (result.data.pagination) {
+ const p = result.data.pagination;
+ console.log(`✅ 分页加载: 第 ${p.page}/${p.total_pages} 页 (共 ${p.total_nodes} 节点)`);
+ } else {
+ console.log(`✅ 完整加载: ${result.data.stats.total_nodes} 节点`);
+ }
} else {
- alert('加载失败: ' + result.error);
+ alert('加载失败: ' + (result ? result.error : '未知错误'));
}
} catch (error) {
console.error('加载图形失败:', error);
alert('加载失败: ' + error.message);
} finally {
+ isLoading = false;
document.getElementById('loading').style.display = 'none';
}
}
- // 更新图形显示
+ // 加载完整图数据
+ async function loadFullGraph() {
+ const response = await fetch('/visualizer/api/graph/full');
+ return await response.json();
+ }
+
+ // 加载聚类简化数据
+ async function loadClusteredGraph(maxNodes = 300) {
+ const response = await fetch(`/visualizer/api/graph/clustered?max_nodes=${maxNodes}&cluster_threshold=10`);
+ return await response.json();
+ }
+
+ // 加载分页数据
+ async function loadPaginatedGraph(page = 1, pageSize = 500) {
+ const response = await fetch(`/visualizer/api/graph/paginated?page=${page}&page_size=${pageSize}&min_importance=0.0`);
+ return await response.json();
+ }
+
+ // 更新图形显示(优化版本 - 使用节点限制和延迟渲染)
function updateGraph(data) {
graphData = data;
+ const nodeCount = data.nodes.length;
+ const edgeCount = data.edges.length;
- // 处理节点数据
- const nodes = data.nodes.map(node => ({
- id: node.id,
- label: node.label,
- title: node.title,
- group: node.group,
- color: nodeColors[node.group] || '#999',
- metadata: node.metadata
- }));
+ console.log(`准备更新图形: ${nodeCount} 个节点, ${edgeCount} 条边`);
- // 处理边数据
- const edges = data.edges.map(edge => ({
- id: edge.id,
- from: edge.from,
- to: edge.to,
- label: edge.label,
- title: edge.title,
- width: edge.importance * 3 + 1
- }));
+ // 对于超大数据集,进一步限制
+ const MAX_RENDERABLE_NODES = 5000;
+ const MAX_RENDERABLE_EDGES = 10000;
+
+ let nodesToRender = data.nodes;
+ let edgesToRender = data.edges;
+ let isLimited = false;
- // 更新网络
- network.setData({
- nodes: new vis.DataSet(nodes),
- edges: new vis.DataSet(edges)
+ // 如果超过限制,只渲染最重要的节点
+ if (nodeCount > MAX_RENDERABLE_NODES) {
+ console.warn(`节点数 ${nodeCount} 超过渲染限制 ${MAX_RENDERABLE_NODES},将只显示最重要的节点`);
+
+ // 计算节点重要性
+ const nodeDegrees = new Map();
+ data.edges.forEach(edge => {
+ nodeDegrees.set(edge.from, (nodeDegrees.get(edge.from) || 0) + 1);
+ nodeDegrees.set(edge.to, (nodeDegrees.get(edge.to) || 0) + 1);
+ });
+
+ // 按连接度排序,保留前N个
+ nodesToRender = data.nodes
+ .map(node => ({
+ ...node,
+ degree: nodeDegrees.get(node.id) || 0
+ }))
+ .sort((a, b) => b.degree - a.degree)
+ .slice(0, MAX_RENDERABLE_NODES);
+
+ const renderableNodeIds = new Set(nodesToRender.map(n => n.id));
+ edgesToRender = data.edges.filter(e =>
+ renderableNodeIds.has(e.from) && renderableNodeIds.has(e.to)
+ );
+
+ isLimited = true;
+ alert(`⚠️ 数据量过大\n原始: ${nodeCount} 节点\n仅渲染: ${nodesToRender.length} 个最重要节点\n建议使用"聚类简化"或"分页加载"模式`);
+ }
+
+ // 如果边数过多,也进行限制
+ if (edgesToRender.length > MAX_RENDERABLE_EDGES) {
+ console.warn(`边数 ${edgesToRender.length} 超过渲染限制 ${MAX_RENDERABLE_EDGES}`);
+ edgesToRender = edgesToRender.slice(0, MAX_RENDERABLE_EDGES);
+ isLimited = true;
+ }
+
+ // 根据节点数量动态调整网络配置
+ if (network.updateOptions) {
+ network.updateOptions(nodesToRender.length);
+ }
+
+ console.log(`实际渲染: ${nodesToRender.length} 个节点, ${edgesToRender.length} 条边`);
+
+ // 极简化节点数据 - 移除所有不必要的属性
+ const startTime = performance.now();
+ const nodes = nodesToRender.map(node => {
+ const baseNode = {
+ id: node.id,
+ label: '', // 默认不显示标签,仅在悬停或高亮时显示
+ group: node.group,
+ color: nodeColors[node.group] || '#999'
+ };
+
+ // 仅在小数据集中添加title(tooltip)
+ if (nodesToRender.length <= 1000) {
+ baseNode.title = node.title || node.label;
+ }
+
+ // 如果是聚类节点,使用不同的样式并始终显示标签
+ if (node.is_cluster) {
+ baseNode.shape = 'star';
+ baseNode.size = 25 + Math.min(node.cluster_size / 10, 15);
+ baseNode.label = `${node.group} (${node.cluster_size})`;
+ baseNode.font = { size: 14, bold: true };
+ }
+
+ return baseNode;
});
- // 注意:setData 会自动触发物理引擎重新布局
- // stabilizationIterationsDone 事件监听器会自动停止物理引擎
+ // 极简化边数据
+ const edges = edgesToRender.map(edge => {
+ const baseEdge = {
+ id: edge.id,
+ from: edge.from,
+ to: edge.to,
+ label: '' // 默认不显示标签,仅在高亮时显示
+ };
+
+ // 在小数据集中保留边宽度
+ if (edgesToRender.length <= 1000) {
+ baseEdge.width = (edge.importance || 0.5) * 2 + 0.5;
+ }
+
+ return baseEdge;
+ });
+
+ // 批量更新网络(使用异步渲染以避免阻塞UI)
+ const prepTime = performance.now() - startTime;
+ console.log(`数据准备耗时: ${prepTime.toFixed(2)}ms`);
+
+ // 使用 requestAnimationFrame 异步渲染,避免阻塞UI
+ requestAnimationFrame(() => {
+ const renderStart = performance.now();
+
+ // 先清空现有数据
+ network.setData({
+ nodes: new vis.DataSet([]),
+ edges: new vis.DataSet([])
+ });
+
+ // 分批添加节点和边以提升响应性
+ const BATCH_SIZE = 500;
+ let nodeIndex = 0;
+ let edgeIndex = 0;
+
+ const nodeDataSet = new vis.DataSet();
+ const edgeDataSet = new vis.DataSet();
+
+ function addNodeBatch() {
+ const batch = nodes.slice(nodeIndex, nodeIndex + BATCH_SIZE);
+ if (batch.length > 0) {
+ nodeDataSet.add(batch);
+ nodeIndex += BATCH_SIZE;
+ updateLoadingProgress(`加载节点: ${Math.min(nodeIndex, nodes.length)}/${nodes.length}`);
+
+ if (nodeIndex < nodes.length) {
+ setTimeout(addNodeBatch, 0);
+ } else {
+ // 节点加载完成,开始加载边
+ setTimeout(addEdgeBatch, 0);
+ }
+ }
+ }
+
+ function addEdgeBatch() {
+ const batch = edges.slice(edgeIndex, edgeIndex + BATCH_SIZE);
+ if (batch.length > 0) {
+ edgeDataSet.add(batch);
+ edgeIndex += BATCH_SIZE;
+ updateLoadingProgress(`加载边: ${Math.min(edgeIndex, edges.length)}/${edges.length}`);
+
+ if (edgeIndex < edges.length) {
+ setTimeout(addEdgeBatch, 0);
+ } else {
+ // 所有数据加载完成,更新网络
+ finishRendering();
+ }
+ }
+ }
+
+ function finishRendering() {
+ network.setData({
+ nodes: nodeDataSet,
+ edges: edgeDataSet
+ });
+
+ const renderTime = performance.now() - renderStart;
+ console.log(`图形渲染总耗时: ${renderTime.toFixed(2)}ms`);
+ updateLoadingProgress('渲染完成');
+ }
+
+ // 对于小数据集,直接一次性加载
+ if (nodes.length <= 1000) {
+ nodeDataSet.add(nodes);
+ edgeDataSet.add(edges);
+ network.setData({
+ nodes: nodeDataSet,
+ edges: edgeDataSet
+ });
+ const renderTime = performance.now() - renderStart;
+ console.log(`图形渲染总耗时: ${renderTime.toFixed(2)}ms`);
+ } else {
+ // 大数据集使用分批加载
+ addNodeBatch();
+ }
+ });
}
// 更新统计信息
@@ -748,7 +1241,7 @@
}
try {
- const response = await fetch(`/visualizer/api/search?q=${encodeURIComponent(query)}&limit=50`);
+ const response = await fetch(`api/search?q=${encodeURIComponent(query)}&limit=50`);
const result = await response.json();
if (result.success) {
@@ -788,31 +1281,32 @@
}
}
- // 高亮与选中节点连接的节点(最多3跳深度)
+ // 高亮与选中节点连接的节点(优化版本,使用缓存)
function highlightConnectedNodes(nodeId) {
- if (!network || !graphData) return;
+ if (!network || !graphData || !adjacencyCache) return;
- // 使用 BFS 探索最多3跳深度的连接节点
- const MAX_DEPTH = 3;
+ const startTime = performance.now();
+
+ // 根据加载模式和数据规模决定探索深度
+ // 完整加载模式:1跳
+ // 聚类/分页模式:3跳(因为数据量已经减少)
+ const nodeCount = graphData.nodes.length;
+ let MAX_DEPTH = 3;
+
+ if (currentLoadMode === 'full' || (currentLoadMode === 'auto' && nodeCount <= 500)) {
+ MAX_DEPTH = 1; // 完整加载模式限制为1跳
+ console.log('完整加载模式:使用1跳探索深度');
+ } else {
+ console.log('聚类/分页模式:使用3跳探索深度');
+ }
+
+ // 使用缓存的邻接表进行 BFS
const connectedNodeIds = new Set();
const connectedEdgeIds = new Set();
const visited = new Set();
const queue = [{ nodeId: nodeId, depth: 0 }];
- // 构建邻接表以提高查询效率
- const adjacencyMap = new Map();
- graphData.edges.forEach(edge => {
- if (!adjacencyMap.has(edge.from)) {
- adjacencyMap.set(edge.from, []);
- }
- if (!adjacencyMap.has(edge.to)) {
- adjacencyMap.set(edge.to, []);
- }
- adjacencyMap.get(edge.from).push({ nodeId: edge.to, edgeId: edge.id });
- adjacencyMap.get(edge.to).push({ nodeId: edge.from, edgeId: edge.id });
- });
-
- // BFS 遍历,限制深度为3跳
+ // BFS 遍历
while (queue.length > 0) {
const { nodeId: currentNode, depth } = queue.shift();
@@ -820,11 +1314,10 @@
visited.add(currentNode);
connectedNodeIds.add(currentNode);
- // 如果已经达到最大深度,不再探索更深的节点
if (depth >= MAX_DEPTH) continue;
- // 探索相邻节点
- const neighbors = adjacencyMap.get(currentNode) || [];
+ // 使用缓存的邻接表
+ const neighbors = adjacencyCache.get(currentNode) || [];
neighbors.forEach(({ nodeId: neighborId, edgeId }) => {
connectedEdgeIds.add(edgeId);
if (!visited.has(neighborId)) {
@@ -833,22 +1326,30 @@
});
}
- console.log(`选中节点: ${nodeId}, 连接的节点数: ${connectedNodeIds.size}, 连接的边数: ${connectedEdgeIds.size} (最大深度: ${MAX_DEPTH})`);
+ const searchTime = performance.now() - startTime;
+ console.log(`BFS搜索耗时: ${searchTime.toFixed(2)}ms, 找到 ${connectedNodeIds.size} 个连接节点 (深度${MAX_DEPTH}跳)`);
- // 更新所有节点的透明度
+ // 更新高亮节点集合
+ highlightedNodeIds = connectedNodeIds;
+
+ // 批量更新节点和边
const allNodes = network.body.data.nodes;
const allEdges = network.body.data.edges;
- const updates = [];
+ const nodeUpdates = [];
+ const edgeUpdates = [];
+ // 更新节点
allNodes.get().forEach(node => {
+ const originalNode = graphData.nodes.find(n => n.id === node.id);
+ if (!originalNode) return;
+
if (connectedNodeIds.has(node.id)) {
- // 连接的节点保持正常,甚至可以加强显示
- // 被选中的节点特别突出
const isSelected = node.id === nodeId;
- updates.push({
+ nodeUpdates.push({
id: node.id,
opacity: 1.0,
borderWidth: isSelected ? 5 : 3,
+ label: originalNode.label || '', // 显示高亮节点的标签
font: {
color: isSelected ? '#667eea' : '#333',
size: isSelected ? 16 : 14,
@@ -856,46 +1357,52 @@
}
});
} else {
- // 无关节点变为高度透明
- const dimmedColor = hexToRgba(node.color, 0.08);
- updates.push({
+ const originalColor = nodeColors[node.group] || '#999';
+ const dimmedColor = hexToRgba(originalColor, 0.08);
+ nodeUpdates.push({
id: node.id,
color: {
background: dimmedColor,
border: dimmedColor,
- highlight: { background: dimmedColor, border: dimmedColor }
},
opacity: 0.08,
- font: { color: 'rgba(51, 51, 51, 0.08)', size: 14 }
+ label: '', // 隐藏非高亮节点的标签
+ font: { color: 'rgba(51, 51, 51, 0.08)', size: 0 }
});
}
});
- allNodes.update(updates);
- // 更新所有边的透明度
- const edgeUpdates = [];
+ // 更新边
allEdges.get().forEach(edge => {
+ const originalEdge = graphData.edges.find(e => e.id === edge.id);
+
if (connectedEdgeIds.has(edge.id)) {
- // 连接的边加强显示
edgeUpdates.push({
id: edge.id,
color: { color: '#667eea', opacity: 1.0 },
width: 4,
+ label: originalEdge?.label || '', // 显示高亮边的标签
font: { color: '#667eea', size: 12 }
});
} else {
- // 无关边变为高度透明
edgeUpdates.push({
id: edge.id,
color: { color: '#848484', opacity: 0.03 },
width: 1,
- font: { color: 'rgba(102, 102, 102, 0.03)', size: 11 }
+ label: '', // 隐藏非高亮边的标签
+ font: { color: 'rgba(102, 102, 102, 0.03)', size: 0 }
});
}
});
+
+ // 批量应用更新
+ allNodes.update(nodeUpdates);
allEdges.update(edgeUpdates);
- // 将视图聚焦到连接的子图
+ const totalTime = performance.now() - startTime;
+ console.log(`高亮操作总耗时: ${totalTime.toFixed(2)}ms`);
+
+ // 聚焦视图
if (connectedNodeIds.size > 1 && connectedNodeIds.size < 100) {
network.fit({
nodes: Array.from(connectedNodeIds),
@@ -907,38 +1414,46 @@
}
}
- // 重置节点高亮状态
+ // 重置节点高亮状态(优化版本)
function resetNodeHighlight() {
if (!network || !graphData) return;
+ const startTime = performance.now();
const allNodes = network.body.data.nodes;
const allEdges = network.body.data.edges;
- // 恢复所有节点 - 重新应用原始颜色
- const nodeUpdates = [];
- allNodes.get().forEach(node => {
+ // 清空高亮节点集合
+ highlightedNodeIds.clear();
+
+ // 批量恢复所有节点(不显示标签,除非是悬停节点)
+ const nodeUpdates = allNodes.get().map(node => {
const originalColor = nodeColors[node.group] || '#999';
- nodeUpdates.push({
+ const shouldShowLabel = node.id === hoveredNodeId; // 只显示悬停节点的标签
+ const originalNode = shouldShowLabel ? graphData.nodes.find(n => n.id === node.id) : null;
+
+ return {
id: node.id,
color: originalColor,
opacity: 1.0,
borderWidth: 2,
- font: { color: '#333', size: 14, bold: false }
- });
+ label: shouldShowLabel && originalNode ? originalNode.label : '',
+ font: { color: '#333', size: shouldShowLabel ? 14 : 0, bold: false }
+ };
});
allNodes.update(nodeUpdates);
- // 恢复所有边
- const edgeUpdates = [];
- allEdges.get().forEach(edge => {
- edgeUpdates.push({
- id: edge.id,
- color: { color: '#848484', opacity: 1.0 },
- width: 2,
- font: { color: '#666', size: 11 }
- });
- });
+ // 批量恢复所有边(不显示标签)
+ const edgeUpdates = allEdges.get().map(edge => ({
+ id: edge.id,
+ color: { color: '#848484', opacity: 1.0 },
+ width: 2,
+ label: '',
+ font: { color: '#666', size: 0 }
+ }));
allEdges.update(edgeUpdates);
+
+ const endTime = performance.now();
+ console.log(`重置高亮耗时: ${(endTime - startTime).toFixed(2)}ms`);
}
// 辅助函数:将十六进制颜色转换为 rgba
@@ -953,7 +1468,7 @@
return `rgba(${r}, ${g}, ${b}, ${alpha})`;
}
- // 应用过滤器
+ // 应用过滤器(优化版本)
function applyFilters() {
if (!originalData) return;
@@ -991,25 +1506,27 @@
// 重置高亮状态
resetNodeHighlight();
- // 重新启用物理引擎以重新布局,避免节点排版错乱
- if (network) {
+ // 重建缓存
+ buildAdjacencyCache(filteredEdges);
+
+ // 重新启用物理引擎以重新布局
+ if (network && filteredNodes.length > 0) {
network.setOptions({ physics: { enabled: true } });
- // 设置超时保护,确保物理引擎最终会停止
+ // 超时保护
let stabilized = false;
- const stabilizationTimeout = setTimeout(() => {
+ const timeout = setTimeout(() => {
if (!stabilized) {
console.log('物理引擎稳定超时,强制停止');
network.setOptions({ physics: { enabled: false } });
}
- }, 5000); // 5秒超时
+ }, 5000);
- // 等待稳定后再禁用物理引擎
network.once('stabilizationIterationsDone', function() {
stabilized = true;
- clearTimeout(stabilizationTimeout);
+ clearTimeout(timeout);
network.setOptions({ physics: { enabled: false } });
- console.log('物理引擎已稳定并停止');
+ console.log('过滤后重新布局完成');
});
}
}
@@ -1026,6 +1543,66 @@
}
}
+ // 切换物理引擎
+ function togglePhysics() {
+ if (!network) return;
+
+ const currentPhysics = network.physics.options.enabled;
+ network.setOptions({ physics: { enabled: !currentPhysics } });
+
+ const btn = document.getElementById('physicsToggle');
+ btn.textContent = currentPhysics ? '⚙️ 启用物理' : '⏸️ 禁用物理';
+
+ console.log(`物理引擎: ${currentPhysics ? '已禁用' : '已启用'}`);
+ }
+
+ // 加载下一页
+ async function loadNextPage() {
+ if (currentPage < totalPages && !isLoading) {
+ currentPage++;
+ await loadPaginatedGraphAndUpdate();
+ }
+ }
+
+ // 加载上一页
+ async function loadPreviousPage() {
+ if (currentPage > 1 && !isLoading) {
+ currentPage--;
+ await loadPaginatedGraphAndUpdate();
+ }
+ }
+
+ // 加载分页并更新UI
+ async function loadPaginatedGraphAndUpdate() {
+ try {
+ isLoading = true;
+ document.getElementById('loading').style.display = 'block';
+ updateLoadingProgress(`加载第 ${currentPage} 页...`);
+
+ const result = await loadPaginatedGraph(currentPage);
+
+ if (result.success) {
+ originalData = result.data;
+ updateGraph(result.data);
+ updateStats(result.data.stats);
+ buildAdjacencyCache(result.data.edges || []);
+
+ // 更新分页UI
+ const p = result.data.pagination;
+ totalPages = p.total_pages;
+ document.getElementById('pageInfo').textContent = `第 ${p.page}/${p.total_pages} 页`;
+ document.getElementById('prevPageBtn').disabled = !p.has_prev;
+ document.getElementById('nextPageBtn').disabled = !p.has_next;
+ }
+ } catch (error) {
+ console.error('加载分页失败:', error);
+ alert('加载失败: ' + error.message);
+ } finally {
+ isLoading = false;
+ document.getElementById('loading').style.display = 'none';
+ }
+ }
+
// 导出图形数据
function exportGraph() {
const dataStr = JSON.stringify(graphData, null, 2);
diff --git a/src/memory_graph/core/builder.py b/src/memory_graph/core/builder.py
index f8607e583..4b0d66218 100644
--- a/src/memory_graph/core/builder.py
+++ b/src/memory_graph/core/builder.py
@@ -185,12 +185,19 @@ class MemoryBuilder:
logger.debug(f"复用已存在的主体节点: {existing.id}")
return existing
+ # 为主体和值节点生成嵌入向量(用于人名/实体和重要描述检索)
+ embedding = None
+ if node_type in (NodeType.SUBJECT, NodeType.VALUE):
+ # 只为有足够内容的节点生成嵌入(避免浪费)
+ if len(content.strip()) >= 2:
+ embedding = await self._generate_embedding(content)
+
# 创建新节点
node = MemoryNode(
id=self._generate_node_id(),
content=content,
node_type=node_type,
- embedding=None, # 主体和属性不需要嵌入
+ embedding=embedding, # 主体、值需要嵌入,属性不需要
metadata={"memory_ids": [memory_id]},
)
diff --git a/src/memory_graph/tools/memory_tools.py b/src/memory_graph/tools/memory_tools.py
index ec53c3ea3..195d2d01b 100644
--- a/src/memory_graph/tools/memory_tools.py
+++ b/src/memory_graph/tools/memory_tools.py
@@ -516,6 +516,22 @@ class MemoryTools:
# 记录最高分数
if mem_id not in memory_scores or similarity > memory_scores[mem_id]:
memory_scores[mem_id] = similarity
+
+ # 🔥 详细日志:检查初始召回情况
+ logger.info(
+ f"初始向量搜索: 返回{len(similar_nodes)}个节点 → "
+ f"提取{len(initial_memory_ids)}条记忆"
+ )
+ if len(initial_memory_ids) == 0:
+ logger.warning(
+ f"⚠️ 向量搜索未找到任何记忆!"
+ f"可能原因:1) 嵌入模型理解问题 2) 记忆节点未建立索引 3) 查询表达与存储内容差异过大"
+ )
+ # 输出相似节点的详细信息用于调试
+ if similar_nodes:
+ logger.debug(f"向量搜索返回的节点元数据样例: {similar_nodes[0][2] if len(similar_nodes) > 0 else 'None'}")
+ elif len(initial_memory_ids) < 3:
+ logger.warning(f"⚠️ 初始召回记忆数量较少({len(initial_memory_ids)}条),可能影响结果质量")
# 3. 图扩展(如果启用且有expand_depth)
expanded_memory_scores = {}
@@ -609,42 +625,37 @@ class MemoryTools:
if dominant_node_type in ["ATTRIBUTE", "REFERENCE"] or memory_type == "FACT":
# 事实性记忆(如文档地址、配置信息):语义相似度最重要
weights = {
- "similarity": 0.65, # 语义相似度 65% ⬆️
- "importance": 0.20, # 重要性 20%
- "recency": 0.05, # 时效性 5% ⬇️(事实不随时间失效)
- "activation": 0.10 # 激活度 10% ⬇️(避免冷门信息被压制)
+ "similarity": 0.70, # 语义相似度 70% ⬆️
+ "importance": 0.25, # 重要性 25% ⬆️
+ "recency": 0.05, # 时效性 5%(事实不随时间失效)
}
elif memory_type in ["CONVERSATION", "EPISODIC"] or dominant_node_type == "EVENT":
- # 对话/事件记忆:时效性和激活度更重要
+ # 对话/事件记忆:时效性更重要
weights = {
- "similarity": 0.45, # 语义相似度 45%
- "importance": 0.15, # 重要性 15%
- "recency": 0.20, # 时效性 20% ⬆️
- "activation": 0.20 # 激活度 20%
+ "similarity": 0.55, # 语义相似度 55% ⬆️
+ "importance": 0.20, # 重要性 20% ⬆️
+ "recency": 0.25, # 时效性 25% ⬆️
}
elif dominant_node_type == "ENTITY" or memory_type == "SEMANTIC":
# 实体/语义记忆:平衡各项
weights = {
- "similarity": 0.50, # 语义相似度 50%
- "importance": 0.25, # 重要性 25%
+ "similarity": 0.60, # 语义相似度 60% ⬆️
+ "importance": 0.30, # 重要性 30% ⬆️
"recency": 0.10, # 时效性 10%
- "activation": 0.15 # 激活度 15%
}
else:
# 默认权重(保守策略,偏向语义)
weights = {
- "similarity": 0.55, # 语义相似度 55%
- "importance": 0.20, # 重要性 20%
+ "similarity": 0.65, # 语义相似度 65% ⬆️
+ "importance": 0.25, # 重要性 25% ⬆️
"recency": 0.10, # 时效性 10%
- "activation": 0.15 # 激活度 15%
}
- # 综合分数计算
+ # 综合分数计算(🔥 移除激活度影响)
final_score = (
similarity_score * weights["similarity"] +
importance_score * weights["importance"] +
- recency_score * weights["recency"] +
- activation_score * weights["activation"]
+ recency_score * weights["recency"]
)
# 🆕 节点类型加权:对REFERENCE/ATTRIBUTE节点额外加分(促进事实性信息召回)
@@ -943,11 +954,16 @@ class MemoryTools:
logger.warning("嵌入生成失败,跳过节点搜索")
return []
- # 向量搜索
+ # 向量搜索(增加返回数量以提高召回率)
similar_nodes = await self.vector_store.search_similar_nodes(
query_embedding=query_embedding,
- limit=top_k * 2, # 多取一些,后续过滤
+ limit=top_k * 5, # 🔥 从2倍提升到5倍,提高初始召回率
+ min_similarity=0.0, # 不在这里过滤,交给后续评分
)
+
+ logger.debug(f"单查询向量搜索: 查询='{query}', 返回节点数={len(similar_nodes)}")
+ if similar_nodes:
+ logger.debug(f"Top 3相似度: {[f'{sim:.3f}' for _, sim, _ in similar_nodes[:3]]}")
return similar_nodes
@@ -1003,11 +1019,13 @@ class MemoryTools:
similar_nodes = await self.vector_store.search_with_multiple_queries(
query_embeddings=query_embeddings,
query_weights=query_weights,
- limit=top_k * 2, # 多取一些,后续过滤
+ limit=top_k * 5, # 🔥 从2倍提升到5倍,提高初始召回率
fusion_strategy="weighted_max",
)
logger.info(f"多查询检索完成: {len(similar_nodes)} 个节点 (偏好类型: {prefer_node_types})")
+ if similar_nodes:
+ logger.debug(f"Top 5融合相似度: {[f'{sim:.3f}' for _, sim, _ in similar_nodes[:5]]}")
return similar_nodes, prefer_node_types