Merge branch 'feature/memory-graph-system' of https://github.com/MoFox-Studio/MoFox_Bot into feature/memory-graph-system

2025-11-06 16:52:19 +08:00
parent 155667603b d75476d41c
commit c8189d4a68
24 changed files with 1035 additions and 304 deletions
--- a/src/memory_graph/manager.py
+++ b/src/memory_graph/manager.py
@@ -137,6 +137,7 @@ class MemoryManager:
                graph_store=self.graph_store,
                persistence_manager=self.persistence,
                embedding_generator=self.embedding_generator,
+                max_expand_depth=getattr(self.config, 'max_expand_depth', 1),  # 从配置读取默认深度
            )
            
            self._initialized = True
@@ -362,18 +363,15 @@ class MemoryManager:
            
            # 构建上下文信息
            chat_history = context.get("chat_history", "") if context else ""
-            sender = context.get("sender", "") if context else ""
-            participants = context.get("participants", []) if context else []
-            participants_str = "、".join(participants) if participants else "无"
-            
+    
            prompt = f"""你是记忆检索助手。为提高检索准确率，请为查询生成3-5个不同角度的搜索语句。

 **核心原则（重要！）：**
-对于包含多个概念的复杂查询（如"杰瑞喵如何评价新的记忆系统"），应该生成：
+对于包含多个概念的复杂查询（如"小明如何评价小王"），应该生成：
 1. 完整查询（包含所有要素）- 权重1.0
-2. 每个关键概念的独立查询（如"新的记忆系统"）- 权重0.8，避免被主体淹没！
-3. 主体+动作组合（如"杰瑞喵 评价"）- 权重0.6
-4. 泛化查询（如"记忆系统"）- 权重0.7
+2. 每个关键概念的独立查询（如"小明"、"小王"）- 权重0.8，避免被主体淹没！
+3. 主体+动作组合（如"小明 评价"）- 权重0.6
+4. 泛化查询（如"评价"）- 权重0.7

 **要求：**
 - 第一个必须是原始查询或同义改写
@@ -381,9 +379,7 @@ class MemoryManager:
 - 查询简洁（5-20字）
 - 直接输出JSON，不要添加说明

-**已知参与者：** {participants_str}
 **对话上下文：** {chat_history[-300:] if chat_history else "无"}
-**当前查询：** {sender}: {query}

 **输出JSON格式：**
 ```json
@@ -436,7 +432,6 @@ class MemoryManager:
        time_range: Optional[Tuple[datetime, datetime]] = None,
        min_importance: float = 0.0,
        include_forgotten: bool = False,
-        optimize_query: bool = True,
        use_multi_query: bool = True,
        expand_depth: int = 1,
        context: Optional[Dict[str, Any]] = None,
@@ -457,7 +452,6 @@ class MemoryManager:
            time_range: 时间范围过滤 (start, end)
            min_importance: 最小重要性
            include_forgotten: 是否包含已遗忘的记忆
-            optimize_query: 是否使用小模型优化查询（已弃用，被 use_multi_query 替代）
            use_multi_query: 是否使用多查询策略（推荐，默认True）
            expand_depth: 图扩展深度（0=禁用, 1=推荐, 2-3=深度探索）
            context: 查询上下文（用于优化）
--- a/src/memory_graph/storage/vector_store.py
+++ b/src/memory_graph/storage/vector_store.py
@@ -102,8 +102,8 @@ class VectorStore:
            # 处理额外的元数据，将 list 转换为 JSON 字符串
            for key, value in node.metadata.items():
                if isinstance(value, (list, dict)):
-                    import json
-                    metadata[key] = json.dumps(value, ensure_ascii=False)
+                    import orjson
+                    metadata[key] = orjson.dumps(value, option=orjson.OPT_NON_STR_KEYS).decode('utf-8')
                elif isinstance(value, (str, int, float, bool)) or value is None:
                    metadata[key] = value
                else:
@@ -141,7 +141,7 @@ class VectorStore:

        try:
            # 准备元数据
-            import json
+            import orjson
            metadatas = []
            for n in valid_nodes:
                metadata = {
@@ -151,7 +151,7 @@ class VectorStore:
                }
                for key, value in n.metadata.items():
                    if isinstance(value, (list, dict)):
-                        metadata[key] = json.dumps(value, ensure_ascii=False)
+                        metadata[key] = orjson.dumps(value, option=orjson.OPT_NON_STR_KEYS).decode('utf-8')
                    elif isinstance(value, (str, int, float, bool)) or value is None:
                        metadata[key] = value  # type: ignore
                    else:
@@ -207,7 +207,7 @@ class VectorStore:
            )

            # 解析结果
-            import json
+            import orjson
            similar_nodes = []
            if results["ids"] and results["ids"][0]:
                for i, node_id in enumerate(results["ids"][0]):
@@ -223,7 +223,7 @@ class VectorStore:
                        for key, value in list(metadata.items()):
                            if isinstance(value, str) and (value.startswith('[') or value.startswith('{')):
                                try:
-                                    metadata[key] = json.loads(value)
+                                    metadata[key] = orjson.loads(value)
                                except:
                                    pass  # 保持原值
                        
--- a/src/memory_graph/tools/memory_tools.py
+++ b/src/memory_graph/tools/memory_tools.py
@@ -34,6 +34,7 @@ class MemoryTools:
        graph_store: GraphStore,
        persistence_manager: PersistenceManager,
        embedding_generator: Optional[EmbeddingGenerator] = None,
+        max_expand_depth: int = 1,
    ):
        """
        初始化工具集
@@ -43,11 +44,13 @@ class MemoryTools:
            graph_store: 图存储
            persistence_manager: 持久化管理器
            embedding_generator: 嵌入生成器（可选）
+            max_expand_depth: 图扩展深度的默认值（从配置读取）
        """
        self.vector_store = vector_store
        self.graph_store = graph_store
        self.persistence_manager = persistence_manager
        self._initialized = False
+        self.max_expand_depth = max_expand_depth  # 保存配置的默认值

        # 初始化组件
        self.extractor = MemoryExtractor()
@@ -448,11 +451,12 @@ class MemoryTools:
        try:
            query = params.get("query", "")
            top_k = params.get("top_k", 10)
-            expand_depth = params.get("expand_depth", 1)
+            # 使用配置中的默认值而不是硬编码的 1
+            expand_depth = params.get("expand_depth", self.max_expand_depth)
            use_multi_query = params.get("use_multi_query", True)
            context = params.get("context", None)

-            logger.info(f"搜索记忆: {query} (top_k={top_k}, multi_query={use_multi_query})")
+            logger.info(f"搜索记忆: {query} (top_k={top_k}, expand_depth={expand_depth}, multi_query={use_multi_query})")

            # 0. 确保初始化
            await self._ensure_initialized()
@@ -474,9 +478,9 @@ class MemoryTools:
                    ids = metadata["memory_ids"]
                    # 确保是列表
                    if isinstance(ids, str):
-                        import json
+                        import orjson
                        try:
-                            ids = json.loads(ids)
+                            ids = orjson.loads(ids)
                        except:
                            ids = [ids]
                    if isinstance(ids, list):
@@ -625,35 +629,63 @@ class MemoryTools:
        try:
            from src.llm_models.utils_model import LLMRequest
            from src.config.config import model_config
-            
+
            llm = LLMRequest(
                model_set=model_config.model_task_config.utils_small,
                request_type="memory.multi_query"
            )
-            
-            participants = context.get("participants", []) if context else []
-            prompt = f"""为查询生成3-5个不同角度的搜索语句（JSON格式）。

-**查询：** {query}
+            # 获取上下文信息
+            participants = context.get("participants", []) if context else []
+            chat_history = context.get("chat_history", "") if context else ""
+            sender = context.get("sender", "") if context else ""
+
+            # 处理聊天历史，提取最近5条左右的对话
+            recent_chat = ""
+            if chat_history:
+                lines = chat_history.strip().split('\n')
+                # 取最近5条消息
+                recent_lines = lines[-5:] if len(lines) > 5 else lines
+                recent_chat = '\n'.join(recent_lines)
+
+            prompt = f"""基于聊天上下文为查询生成3-5个不同角度的搜索语句（JSON格式）。
+
+**当前查询：** {query}
+**发送者：** {sender if sender else '未知'}
 **参与者：** {', '.join(participants) if participants else '无'}

-**原则：** 对复杂查询（如"杰瑞喵如何评价新的记忆系统"），应生成：
-1. 完整查询（权重1.0）
-2. 每个关键概念独立查询（权重0.8）- 重要！
-3. 主体+动作（权重0.6）
+**最近聊天记录（最近5条）：**
+{recent_chat if recent_chat else '无聊天历史'}

-**输出JSON：**
+**分析原则：**
+1. **上下文理解**：根据聊天历史理解查询的真实意图
+2. **指代消解**：识别并代换"他"、"她"、"它"、"那个"等指代词
+3. **话题关联**：结合最近讨论的话题生成更精准的查询
+4. **查询分解**：对复杂查询分解为多个子查询
+
+**生成策略：**
+1. **完整查询**（权重1.0）：结合上下文的完整查询，包含指代消解
+2. **关键概念查询**（权重0.8）：查询中的核心概念，特别是聊天中提到的实体
+3. **话题扩展查询**（权重0.7）：基于最近聊天话题的相关查询
+4. **动作/情感查询**（权重0.6）：如果涉及情感或动作，生成相关查询
+
+**输出JSON格式：**
 ```json
-{{"queries": [{{"text": "查询1", "weight": 1.0}}, {{"text": "查询2", "weight": 0.8}}]}}
-```"""
+{{"queries": [{{"text": "查询语句", "weight": 1.0}}, {{"text": "查询语句", "weight": 0.8}}]}}
+```
+
+**示例：**
+- 查询："他怎么样了？" + 聊天中提到"小明生病了" → "小明身体恢复情况"
+- 查询："那个项目" + 聊天中讨论"记忆系统开发" → "记忆系统项目进展"
+"""

            response, _ = await llm.generate_response_async(prompt, temperature=0.3, max_tokens=250)
            
-            import json, re
+            import orjson, re
            response = re.sub(r'```json\s*', '', response)
            response = re.sub(r'```\s*$', '', response).strip()
            
-            data = json.loads(response)
+            data = orjson.loads(response)
            queries = data.get("queries", [])
            
            result = [(item.get("text", "").strip(), float(item.get("weight", 0.5))) 
@@ -799,9 +831,9 @@ class MemoryTools:
        
        # 确保是列表
        if isinstance(ids, str):
-            import json
+            import orjson
            try:
-                ids = json.loads(ids)
+                ids = orjson.loads(ids)
            except Exception as e:
                logger.warning(f"JSON 解析失败: {e}")
                ids = [ids]
@@ -910,9 +942,9 @@ class MemoryTools:
                            # 提取记忆ID
                            neighbor_memory_ids = neighbor_node_data.get("memory_ids", [])
                            if isinstance(neighbor_memory_ids, str):
-                                import json
+                                import orjson
                                try:
-                                    neighbor_memory_ids = json.loads(neighbor_memory_ids)
+                                    neighbor_memory_ids = orjson.loads(neighbor_memory_ids)
                                except:
                                    neighbor_memory_ids = [neighbor_memory_ids]