Merge branch 'feature/memory-graph-system' of https://github.com/MoFox-Studio/MoFox_Bot into feature/memory-graph-system

This commit is contained in:
tt-P607
2025-11-06 16:52:19 +08:00
24 changed files with 1035 additions and 304 deletions

View File

@@ -137,6 +137,7 @@ class MemoryManager:
graph_store=self.graph_store,
persistence_manager=self.persistence,
embedding_generator=self.embedding_generator,
max_expand_depth=getattr(self.config, 'max_expand_depth', 1), # 从配置读取默认深度
)
self._initialized = True
@@ -362,18 +363,15 @@ class MemoryManager:
# 构建上下文信息
chat_history = context.get("chat_history", "") if context else ""
sender = context.get("sender", "") if context else ""
participants = context.get("participants", []) if context else []
participants_str = "".join(participants) if participants else ""
prompt = f"""你是记忆检索助手。为提高检索准确率请为查询生成3-5个不同角度的搜索语句。
**核心原则(重要!):**
对于包含多个概念的复杂查询(如"杰瑞喵如何评价新的记忆系统"),应该生成:
对于包含多个概念的复杂查询(如"小明如何评价小王"),应该生成:
1. 完整查询(包含所有要素)- 权重1.0
2. 每个关键概念的独立查询(如"新的记忆系统"- 权重0.8,避免被主体淹没!
3. 主体+动作组合(如"杰瑞喵 评价"- 权重0.6
4. 泛化查询(如"记忆系统"- 权重0.7
2. 每个关键概念的独立查询(如"小明""小王"- 权重0.8,避免被主体淹没!
3. 主体+动作组合(如"小明 评价"- 权重0.6
4. 泛化查询(如"评价"- 权重0.7
**要求:**
- 第一个必须是原始查询或同义改写
@@ -381,9 +379,7 @@ class MemoryManager:
- 查询简洁5-20字
- 直接输出JSON不要添加说明
**已知参与者:** {participants_str}
**对话上下文:** {chat_history[-300:] if chat_history else ""}
**当前查询:** {sender}: {query}
**输出JSON格式**
```json
@@ -436,7 +432,6 @@ class MemoryManager:
time_range: Optional[Tuple[datetime, datetime]] = None,
min_importance: float = 0.0,
include_forgotten: bool = False,
optimize_query: bool = True,
use_multi_query: bool = True,
expand_depth: int = 1,
context: Optional[Dict[str, Any]] = None,
@@ -457,7 +452,6 @@ class MemoryManager:
time_range: 时间范围过滤 (start, end)
min_importance: 最小重要性
include_forgotten: 是否包含已遗忘的记忆
optimize_query: 是否使用小模型优化查询(已弃用,被 use_multi_query 替代)
use_multi_query: 是否使用多查询策略推荐默认True
expand_depth: 图扩展深度0=禁用, 1=推荐, 2-3=深度探索)
context: 查询上下文(用于优化)

View File

@@ -102,8 +102,8 @@ class VectorStore:
# 处理额外的元数据,将 list 转换为 JSON 字符串
for key, value in node.metadata.items():
if isinstance(value, (list, dict)):
import json
metadata[key] = json.dumps(value, ensure_ascii=False)
import orjson
metadata[key] = orjson.dumps(value, option=orjson.OPT_NON_STR_KEYS).decode('utf-8')
elif isinstance(value, (str, int, float, bool)) or value is None:
metadata[key] = value
else:
@@ -141,7 +141,7 @@ class VectorStore:
try:
# 准备元数据
import json
import orjson
metadatas = []
for n in valid_nodes:
metadata = {
@@ -151,7 +151,7 @@ class VectorStore:
}
for key, value in n.metadata.items():
if isinstance(value, (list, dict)):
metadata[key] = json.dumps(value, ensure_ascii=False)
metadata[key] = orjson.dumps(value, option=orjson.OPT_NON_STR_KEYS).decode('utf-8')
elif isinstance(value, (str, int, float, bool)) or value is None:
metadata[key] = value # type: ignore
else:
@@ -207,7 +207,7 @@ class VectorStore:
)
# 解析结果
import json
import orjson
similar_nodes = []
if results["ids"] and results["ids"][0]:
for i, node_id in enumerate(results["ids"][0]):
@@ -223,7 +223,7 @@ class VectorStore:
for key, value in list(metadata.items()):
if isinstance(value, str) and (value.startswith('[') or value.startswith('{')):
try:
metadata[key] = json.loads(value)
metadata[key] = orjson.loads(value)
except:
pass # 保持原值

View File

@@ -34,6 +34,7 @@ class MemoryTools:
graph_store: GraphStore,
persistence_manager: PersistenceManager,
embedding_generator: Optional[EmbeddingGenerator] = None,
max_expand_depth: int = 1,
):
"""
初始化工具集
@@ -43,11 +44,13 @@ class MemoryTools:
graph_store: 图存储
persistence_manager: 持久化管理器
embedding_generator: 嵌入生成器(可选)
max_expand_depth: 图扩展深度的默认值(从配置读取)
"""
self.vector_store = vector_store
self.graph_store = graph_store
self.persistence_manager = persistence_manager
self._initialized = False
self.max_expand_depth = max_expand_depth # 保存配置的默认值
# 初始化组件
self.extractor = MemoryExtractor()
@@ -448,11 +451,12 @@ class MemoryTools:
try:
query = params.get("query", "")
top_k = params.get("top_k", 10)
expand_depth = params.get("expand_depth", 1)
# 使用配置中的默认值而不是硬编码的 1
expand_depth = params.get("expand_depth", self.max_expand_depth)
use_multi_query = params.get("use_multi_query", True)
context = params.get("context", None)
logger.info(f"搜索记忆: {query} (top_k={top_k}, multi_query={use_multi_query})")
logger.info(f"搜索记忆: {query} (top_k={top_k}, expand_depth={expand_depth}, multi_query={use_multi_query})")
# 0. 确保初始化
await self._ensure_initialized()
@@ -474,9 +478,9 @@ class MemoryTools:
ids = metadata["memory_ids"]
# 确保是列表
if isinstance(ids, str):
import json
import orjson
try:
ids = json.loads(ids)
ids = orjson.loads(ids)
except:
ids = [ids]
if isinstance(ids, list):
@@ -625,35 +629,63 @@ class MemoryTools:
try:
from src.llm_models.utils_model import LLMRequest
from src.config.config import model_config
llm = LLMRequest(
model_set=model_config.model_task_config.utils_small,
request_type="memory.multi_query"
)
participants = context.get("participants", []) if context else []
prompt = f"""为查询生成3-5个不同角度的搜索语句JSON格式
**查询:** {query}
# 获取上下文信息
participants = context.get("participants", []) if context else []
chat_history = context.get("chat_history", "") if context else ""
sender = context.get("sender", "") if context else ""
# 处理聊天历史提取最近5条左右的对话
recent_chat = ""
if chat_history:
lines = chat_history.strip().split('\n')
# 取最近5条消息
recent_lines = lines[-5:] if len(lines) > 5 else lines
recent_chat = '\n'.join(recent_lines)
prompt = f"""基于聊天上下文为查询生成3-5个不同角度的搜索语句JSON格式
**当前查询:** {query}
**发送者:** {sender if sender else '未知'}
**参与者:** {', '.join(participants) if participants else ''}
**原则:** 对复杂查询(如"杰瑞喵如何评价新的记忆系统"),应生成:
1. 完整查询权重1.0
2. 每个关键概念独立查询权重0.8- 重要!
3. 主体+动作权重0.6
**最近聊天记录最近5条**
{recent_chat if recent_chat else '无聊天历史'}
**输出JSON**
**分析原则**
1. **上下文理解**:根据聊天历史理解查询的真实意图
2. **指代消解**:识别并代换"""""""那个"等指代词
3. **话题关联**:结合最近讨论的话题生成更精准的查询
4. **查询分解**:对复杂查询分解为多个子查询
**生成策略:**
1. **完整查询**权重1.0):结合上下文的完整查询,包含指代消解
2. **关键概念查询**权重0.8):查询中的核心概念,特别是聊天中提到的实体
3. **话题扩展查询**权重0.7):基于最近聊天话题的相关查询
4. **动作/情感查询**权重0.6):如果涉及情感或动作,生成相关查询
**输出JSON格式**
```json
{{"queries": [{{"text": "查询1", "weight": 1.0}}, {{"text": "查询2", "weight": 0.8}}]}}
```"""
{{"queries": [{{"text": "查询语句", "weight": 1.0}}, {{"text": "查询语句", "weight": 0.8}}]}}
```
**示例:**
- 查询:"他怎么样了?" + 聊天中提到"小明生病了""小明身体恢复情况"
- 查询:"那个项目" + 聊天中讨论"记忆系统开发""记忆系统项目进展"
"""
response, _ = await llm.generate_response_async(prompt, temperature=0.3, max_tokens=250)
import json, re
import orjson, re
response = re.sub(r'```json\s*', '', response)
response = re.sub(r'```\s*$', '', response).strip()
data = json.loads(response)
data = orjson.loads(response)
queries = data.get("queries", [])
result = [(item.get("text", "").strip(), float(item.get("weight", 0.5)))
@@ -799,9 +831,9 @@ class MemoryTools:
# 确保是列表
if isinstance(ids, str):
import json
import orjson
try:
ids = json.loads(ids)
ids = orjson.loads(ids)
except Exception as e:
logger.warning(f"JSON 解析失败: {e}")
ids = [ids]
@@ -910,9 +942,9 @@ class MemoryTools:
# 提取记忆ID
neighbor_memory_ids = neighbor_node_data.get("memory_ids", [])
if isinstance(neighbor_memory_ids, str):
import json
import orjson
try:
neighbor_memory_ids = json.loads(neighbor_memory_ids)
neighbor_memory_ids = orjson.loads(neighbor_memory_ids)
except:
neighbor_memory_ids = [neighbor_memory_ids]