feat: 重构聊天系统的内存处理与格式化功能
- 更新了DefaultReplyer,采用新的内存格式化工具以提供更优质的内存描述。 - 已移除 Config 类中已弃用的内存配置。 - 在主系统中增强内存系统初始化检查,确保配置正确。 - 优化了MemoryManager,使其可直接使用全局配置进行内存设置。 - 新增了一个内存格式化工具,用于将内存对象转换为自然语言描述。 - 更新了内存工具,提供了更清晰的内存创建与管理指南。 - 精炼插件工具与使用提示,提升用户交互体验与记忆准确性。 - 根据内存系统结构的变化调整了机器人配置模板。
This commit is contained in:
@@ -61,6 +61,34 @@ class ExpressorModel:
|
|||||||
if cid not in self.nb.token_counts:
|
if cid not in self.nb.token_counts:
|
||||||
self.nb.token_counts[cid] = defaultdict(float)
|
self.nb.token_counts[cid] = defaultdict(float)
|
||||||
|
|
||||||
|
def remove_candidate(self, cid: str) -> bool:
|
||||||
|
"""
|
||||||
|
删除候选文本
|
||||||
|
|
||||||
|
Args:
|
||||||
|
cid: 候选ID
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
是否删除成功
|
||||||
|
"""
|
||||||
|
removed = False
|
||||||
|
|
||||||
|
if cid in self._candidates:
|
||||||
|
del self._candidates[cid]
|
||||||
|
removed = True
|
||||||
|
|
||||||
|
if cid in self._situations:
|
||||||
|
del self._situations[cid]
|
||||||
|
|
||||||
|
# 从nb模型中删除
|
||||||
|
if cid in self.nb.cls_counts:
|
||||||
|
del self.nb.cls_counts[cid]
|
||||||
|
|
||||||
|
if cid in self.nb.token_counts:
|
||||||
|
del self.nb.token_counts[cid]
|
||||||
|
|
||||||
|
return removed
|
||||||
|
|
||||||
def predict(self, text: str, k: int | None = None) -> tuple[str | None, dict[str, float]]:
|
def predict(self, text: str, k: int | None = None) -> tuple[str | None, dict[str, float]]:
|
||||||
"""
|
"""
|
||||||
直接对所有候选进行朴素贝叶斯评分
|
直接对所有候选进行朴素贝叶斯评分
|
||||||
|
|||||||
@@ -36,6 +36,8 @@ class StyleLearner:
|
|||||||
|
|
||||||
# 动态风格管理
|
# 动态风格管理
|
||||||
self.max_styles = 2000 # 每个chat_id最多2000个风格
|
self.max_styles = 2000 # 每个chat_id最多2000个风格
|
||||||
|
self.cleanup_threshold = 0.9 # 达到90%容量时触发清理
|
||||||
|
self.cleanup_ratio = 0.2 # 每次清理20%的风格
|
||||||
self.style_to_id: dict[str, str] = {} # style文本 -> style_id
|
self.style_to_id: dict[str, str] = {} # style文本 -> style_id
|
||||||
self.id_to_style: dict[str, str] = {} # style_id -> style文本
|
self.id_to_style: dict[str, str] = {} # style_id -> style文本
|
||||||
self.id_to_situation: dict[str, str] = {} # style_id -> situation文本
|
self.id_to_situation: dict[str, str] = {} # style_id -> situation文本
|
||||||
@@ -45,6 +47,7 @@ class StyleLearner:
|
|||||||
self.learning_stats = {
|
self.learning_stats = {
|
||||||
"total_samples": 0,
|
"total_samples": 0,
|
||||||
"style_counts": {},
|
"style_counts": {},
|
||||||
|
"style_last_used": {}, # 记录每个风格最后使用时间
|
||||||
"last_update": time.time(),
|
"last_update": time.time(),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -66,10 +69,19 @@ class StyleLearner:
|
|||||||
if style in self.style_to_id:
|
if style in self.style_to_id:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# 检查是否超过最大限制
|
# 检查是否需要清理
|
||||||
if len(self.style_to_id) >= self.max_styles:
|
current_count = len(self.style_to_id)
|
||||||
logger.warning(f"已达到最大风格数量限制 ({self.max_styles})")
|
cleanup_trigger = int(self.max_styles * self.cleanup_threshold)
|
||||||
return False
|
|
||||||
|
if current_count >= cleanup_trigger:
|
||||||
|
if current_count >= self.max_styles:
|
||||||
|
# 已经达到最大限制,必须清理
|
||||||
|
logger.warning(f"已达到最大风格数量限制 ({self.max_styles}),开始清理")
|
||||||
|
self._cleanup_styles()
|
||||||
|
elif current_count >= cleanup_trigger:
|
||||||
|
# 接近限制,提前清理
|
||||||
|
logger.info(f"风格数量达到 {current_count}/{self.max_styles},触发预防性清理")
|
||||||
|
self._cleanup_styles()
|
||||||
|
|
||||||
# 生成新的style_id
|
# 生成新的style_id
|
||||||
style_id = f"style_{self.next_style_id}"
|
style_id = f"style_{self.next_style_id}"
|
||||||
@@ -94,6 +106,80 @@ class StyleLearner:
|
|||||||
logger.error(f"添加风格失败: {e}")
|
logger.error(f"添加风格失败: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def _cleanup_styles(self):
|
||||||
|
"""
|
||||||
|
清理低价值的风格,为新风格腾出空间
|
||||||
|
|
||||||
|
清理策略:
|
||||||
|
1. 综合考虑使用次数和最后使用时间
|
||||||
|
2. 删除得分最低的风格
|
||||||
|
3. 默认清理 cleanup_ratio (20%) 的风格
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
current_time = time.time()
|
||||||
|
cleanup_count = max(1, int(len(self.style_to_id) * self.cleanup_ratio))
|
||||||
|
|
||||||
|
# 计算每个风格的价值分数
|
||||||
|
style_scores = []
|
||||||
|
for style_id in self.style_to_id.values():
|
||||||
|
# 使用次数
|
||||||
|
usage_count = self.learning_stats["style_counts"].get(style_id, 0)
|
||||||
|
|
||||||
|
# 最后使用时间(越近越好)
|
||||||
|
last_used = self.learning_stats["style_last_used"].get(style_id, 0)
|
||||||
|
time_since_used = current_time - last_used if last_used > 0 else float('inf')
|
||||||
|
|
||||||
|
# 综合分数:使用次数越多越好,距离上次使用时间越短越好
|
||||||
|
# 使用对数来平滑使用次数的影响
|
||||||
|
import math
|
||||||
|
usage_score = math.log1p(usage_count) # log(1 + count)
|
||||||
|
|
||||||
|
# 时间分数:转换为天数,使用指数衰减
|
||||||
|
days_unused = time_since_used / 86400 # 转换为天
|
||||||
|
time_score = math.exp(-days_unused / 30) # 30天衰减因子
|
||||||
|
|
||||||
|
# 综合分数:80%使用频率 + 20%时间新鲜度
|
||||||
|
total_score = 0.8 * usage_score + 0.2 * time_score
|
||||||
|
|
||||||
|
style_scores.append((style_id, total_score, usage_count, days_unused))
|
||||||
|
|
||||||
|
# 按分数排序,分数低的先删除
|
||||||
|
style_scores.sort(key=lambda x: x[1])
|
||||||
|
|
||||||
|
# 删除分数最低的风格
|
||||||
|
deleted_styles = []
|
||||||
|
for style_id, score, usage, days in style_scores[:cleanup_count]:
|
||||||
|
style_text = self.id_to_style.get(style_id)
|
||||||
|
if style_text:
|
||||||
|
# 从映射中删除
|
||||||
|
del self.style_to_id[style_text]
|
||||||
|
del self.id_to_style[style_id]
|
||||||
|
if style_id in self.id_to_situation:
|
||||||
|
del self.id_to_situation[style_id]
|
||||||
|
|
||||||
|
# 从统计中删除
|
||||||
|
if style_id in self.learning_stats["style_counts"]:
|
||||||
|
del self.learning_stats["style_counts"][style_id]
|
||||||
|
if style_id in self.learning_stats["style_last_used"]:
|
||||||
|
del self.learning_stats["style_last_used"][style_id]
|
||||||
|
|
||||||
|
# 从expressor模型中删除
|
||||||
|
self.expressor.remove_candidate(style_id)
|
||||||
|
|
||||||
|
deleted_styles.append((style_text[:30], usage, f"{days:.1f}天"))
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"风格清理完成: 删除了 {len(deleted_styles)}/{len(style_scores)} 个风格,"
|
||||||
|
f"剩余 {len(self.style_to_id)} 个风格"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 记录前5个被删除的风格(用于调试)
|
||||||
|
if deleted_styles:
|
||||||
|
logger.debug(f"被删除的风格样例(前5): {deleted_styles[:5]}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"清理风格失败: {e}", exc_info=True)
|
||||||
|
|
||||||
def learn_mapping(self, up_content: str, style: str) -> bool:
|
def learn_mapping(self, up_content: str, style: str) -> bool:
|
||||||
"""
|
"""
|
||||||
学习一个up_content到style的映射
|
学习一个up_content到style的映射
|
||||||
@@ -118,9 +204,11 @@ class StyleLearner:
|
|||||||
self.expressor.update_positive(up_content, style_id)
|
self.expressor.update_positive(up_content, style_id)
|
||||||
|
|
||||||
# 更新统计
|
# 更新统计
|
||||||
|
current_time = time.time()
|
||||||
self.learning_stats["total_samples"] += 1
|
self.learning_stats["total_samples"] += 1
|
||||||
self.learning_stats["style_counts"][style_id] += 1
|
self.learning_stats["style_counts"][style_id] += 1
|
||||||
self.learning_stats["last_update"] = time.time()
|
self.learning_stats["style_last_used"][style_id] = current_time # 更新最后使用时间
|
||||||
|
self.learning_stats["last_update"] = current_time
|
||||||
|
|
||||||
logger.debug(f"学习映射成功: {up_content[:20]}... -> {style}")
|
logger.debug(f"学习映射成功: {up_content[:20]}... -> {style}")
|
||||||
return True
|
return True
|
||||||
@@ -171,6 +259,10 @@ class StyleLearner:
|
|||||||
else:
|
else:
|
||||||
logger.warning(f"跳过无法转换的style_id: {sid}")
|
logger.warning(f"跳过无法转换的style_id: {sid}")
|
||||||
|
|
||||||
|
# 更新最后使用时间(仅针对最佳风格)
|
||||||
|
if best_style_id:
|
||||||
|
self.learning_stats["style_last_used"][best_style_id] = time.time()
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"预测成功: up_content={up_content[:30]}..., "
|
f"预测成功: up_content={up_content[:30]}..., "
|
||||||
f"best_style={best_style}, top3_scores={list(style_scores.items())[:3]}"
|
f"best_style={best_style}, top3_scores={list(style_scores.items())[:3]}"
|
||||||
@@ -208,6 +300,30 @@ class StyleLearner:
|
|||||||
"""
|
"""
|
||||||
return list(self.style_to_id.keys())
|
return list(self.style_to_id.keys())
|
||||||
|
|
||||||
|
def cleanup_old_styles(self, ratio: float | None = None) -> int:
|
||||||
|
"""
|
||||||
|
手动清理旧风格
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ratio: 清理比例,如果为None则使用默认的cleanup_ratio
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
清理的风格数量
|
||||||
|
"""
|
||||||
|
old_count = len(self.style_to_id)
|
||||||
|
if ratio is not None:
|
||||||
|
old_cleanup_ratio = self.cleanup_ratio
|
||||||
|
self.cleanup_ratio = ratio
|
||||||
|
self._cleanup_styles()
|
||||||
|
self.cleanup_ratio = old_cleanup_ratio
|
||||||
|
else:
|
||||||
|
self._cleanup_styles()
|
||||||
|
|
||||||
|
new_count = len(self.style_to_id)
|
||||||
|
cleaned = old_count - new_count
|
||||||
|
logger.info(f"手动清理完成: chat_id={self.chat_id}, 清理了 {cleaned} 个风格")
|
||||||
|
return cleaned
|
||||||
|
|
||||||
def apply_decay(self, factor: float | None = None):
|
def apply_decay(self, factor: float | None = None):
|
||||||
"""
|
"""
|
||||||
应用知识衰减
|
应用知识衰减
|
||||||
@@ -241,6 +357,11 @@ class StyleLearner:
|
|||||||
import pickle
|
import pickle
|
||||||
|
|
||||||
meta_path = os.path.join(save_dir, "meta.pkl")
|
meta_path = os.path.join(save_dir, "meta.pkl")
|
||||||
|
|
||||||
|
# 确保 learning_stats 包含所有必要字段
|
||||||
|
if "style_last_used" not in self.learning_stats:
|
||||||
|
self.learning_stats["style_last_used"] = {}
|
||||||
|
|
||||||
meta_data = {
|
meta_data = {
|
||||||
"style_to_id": self.style_to_id,
|
"style_to_id": self.style_to_id,
|
||||||
"id_to_style": self.id_to_style,
|
"id_to_style": self.id_to_style,
|
||||||
@@ -295,6 +416,10 @@ class StyleLearner:
|
|||||||
self.id_to_situation = meta_data["id_to_situation"]
|
self.id_to_situation = meta_data["id_to_situation"]
|
||||||
self.next_style_id = meta_data["next_style_id"]
|
self.next_style_id = meta_data["next_style_id"]
|
||||||
self.learning_stats = meta_data["learning_stats"]
|
self.learning_stats = meta_data["learning_stats"]
|
||||||
|
|
||||||
|
# 确保旧数据兼容:如果没有 style_last_used 字段,添加它
|
||||||
|
if "style_last_used" not in self.learning_stats:
|
||||||
|
self.learning_stats["style_last_used"] = {}
|
||||||
|
|
||||||
logger.info(f"StyleLearner加载成功: {save_dir}")
|
logger.info(f"StyleLearner加载成功: {save_dir}")
|
||||||
return True
|
return True
|
||||||
@@ -398,6 +523,26 @@ class StyleLearnerManager:
|
|||||||
logger.info(f"保存所有StyleLearner {'成功' if success else '部分失败'}")
|
logger.info(f"保存所有StyleLearner {'成功' if success else '部分失败'}")
|
||||||
return success
|
return success
|
||||||
|
|
||||||
|
def cleanup_all_old_styles(self, ratio: float | None = None) -> dict[str, int]:
|
||||||
|
"""
|
||||||
|
对所有学习器清理旧风格
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ratio: 清理比例
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
{chat_id: 清理数量}
|
||||||
|
"""
|
||||||
|
cleanup_results = {}
|
||||||
|
for chat_id, learner in self.learners.items():
|
||||||
|
cleaned = learner.cleanup_old_styles(ratio)
|
||||||
|
if cleaned > 0:
|
||||||
|
cleanup_results[chat_id] = cleaned
|
||||||
|
|
||||||
|
total_cleaned = sum(cleanup_results.values())
|
||||||
|
logger.info(f"清理所有StyleLearner完成: 总共清理了 {total_cleaned} 个风格")
|
||||||
|
return cleanup_results
|
||||||
|
|
||||||
def apply_decay_all(self, factor: float | None = None):
|
def apply_decay_all(self, factor: float | None = None):
|
||||||
"""
|
"""
|
||||||
对所有学习器应用知识衰减
|
对所有学习器应用知识衰减
|
||||||
|
|||||||
@@ -585,17 +585,29 @@ class DefaultReplyer:
|
|||||||
|
|
||||||
if memories:
|
if memories:
|
||||||
logger.info(f"[记忆图] 检索到 {len(memories)} 条相关记忆")
|
logger.info(f"[记忆图] 检索到 {len(memories)} 条相关记忆")
|
||||||
|
|
||||||
|
# 使用新的格式化工具构建完整的记忆描述
|
||||||
|
from src.memory_graph.utils.memory_formatter import (
|
||||||
|
format_memory_for_prompt,
|
||||||
|
get_memory_type_label,
|
||||||
|
)
|
||||||
|
|
||||||
for memory in memories:
|
for memory in memories:
|
||||||
topic = memory.metadata.get("topic", "")
|
# 使用格式化工具生成完整的主谓宾描述
|
||||||
mem_type = memory.metadata.get("memory_type", "未知")
|
content = format_memory_for_prompt(memory, include_metadata=False)
|
||||||
if topic:
|
|
||||||
|
# 获取记忆类型
|
||||||
|
mem_type = memory.memory_type.value if memory.memory_type else "未知"
|
||||||
|
|
||||||
|
if content:
|
||||||
all_memories.append({
|
all_memories.append({
|
||||||
"content": topic,
|
"content": content,
|
||||||
"memory_type": mem_type,
|
"memory_type": mem_type,
|
||||||
"importance": memory.importance,
|
"importance": memory.importance,
|
||||||
"relevance": 0.7,
|
"relevance": 0.7,
|
||||||
"source": "memory_graph",
|
"source": "memory_graph",
|
||||||
})
|
})
|
||||||
|
logger.debug(f"[记忆构建] 格式化记忆: [{mem_type}] {content[:50]}...")
|
||||||
else:
|
else:
|
||||||
logger.debug("[记忆图] 未找到相关记忆")
|
logger.debug("[记忆图] 未找到相关记忆")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -629,8 +641,13 @@ class DefaultReplyer:
|
|||||||
logger.debug(f"[记忆构建] 空记忆详情: {running_memory}")
|
logger.debug(f"[记忆构建] 空记忆详情: {running_memory}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 使用全局记忆类型映射表
|
# 使用记忆图的类型映射(优先)或全局映射
|
||||||
chinese_type = get_memory_type_chinese_label(memory_type)
|
try:
|
||||||
|
from src.memory_graph.utils.memory_formatter import get_memory_type_label
|
||||||
|
chinese_type = get_memory_type_label(memory_type)
|
||||||
|
except ImportError:
|
||||||
|
# 回退到全局映射
|
||||||
|
chinese_type = get_memory_type_chinese_label(memory_type)
|
||||||
|
|
||||||
# 提取纯净内容(如果包含旧格式的元数据)
|
# 提取纯净内容(如果包含旧格式的元数据)
|
||||||
clean_content = content
|
clean_content = content
|
||||||
|
|||||||
@@ -381,7 +381,7 @@ class Config(ValidatedConfigBase):
|
|||||||
notice: NoticeConfig = Field(..., description="Notice消息配置")
|
notice: NoticeConfig = Field(..., description="Notice消息配置")
|
||||||
emoji: EmojiConfig = Field(..., description="表情配置")
|
emoji: EmojiConfig = Field(..., description="表情配置")
|
||||||
expression: ExpressionConfig = Field(..., description="表达配置")
|
expression: ExpressionConfig = Field(..., description="表达配置")
|
||||||
memory: Optional[MemoryConfig] = Field(default=None, description="记忆配置(旧版,已废弃)")
|
memory: Optional[MemoryConfig] = Field(default=None, description="记忆配置")
|
||||||
mood: MoodConfig = Field(..., description="情绪配置")
|
mood: MoodConfig = Field(..., description="情绪配置")
|
||||||
reaction: ReactionConfig = Field(default_factory=ReactionConfig, description="反应规则配置")
|
reaction: ReactionConfig = Field(default_factory=ReactionConfig, description="反应规则配置")
|
||||||
chinese_typo: ChineseTypoConfig = Field(..., description="中文错别字配置")
|
chinese_typo: ChineseTypoConfig = Field(..., description="中文错别字配置")
|
||||||
|
|||||||
@@ -251,7 +251,7 @@ class MainSystem:
|
|||||||
|
|
||||||
# 停止增强记忆系统
|
# 停止增强记忆系统
|
||||||
try:
|
try:
|
||||||
if global_config.memory.enable_memory:
|
if global_config.memory and getattr(global_config.memory, 'enable', False):
|
||||||
cleanup_tasks.append(("增强记忆系统", self.memory_manager.shutdown()))
|
cleanup_tasks.append(("增强记忆系统", self.memory_manager.shutdown()))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"准备停止增强记忆系统时出错: {e}")
|
logger.error(f"准备停止增强记忆系统时出错: {e}")
|
||||||
@@ -469,7 +469,7 @@ MoFox_Bot(第三方修改版)
|
|||||||
task.add_done_callback(_background_tasks.discard)
|
task.add_done_callback(_background_tasks.discard)
|
||||||
|
|
||||||
# 初始化增强记忆系统
|
# 初始化增强记忆系统
|
||||||
if global_config.memory.enable_memory:
|
if global_config.memory and getattr(global_config.memory, 'enable', False):
|
||||||
from src.chat.memory_system.memory_system import initialize_memory_system
|
from src.chat.memory_system.memory_system import initialize_memory_system
|
||||||
await self._safe_init("增强记忆系统", initialize_memory_system)()
|
await self._safe_init("增强记忆系统", initialize_memory_system)()
|
||||||
await self._safe_init("记忆管理器", self.memory_manager.initialize)()
|
await self._safe_init("记忆管理器", self.memory_manager.initialize)()
|
||||||
|
|||||||
@@ -133,8 +133,10 @@ class MemoryGraphConfig:
|
|||||||
def from_bot_config(cls, bot_config) -> MemoryGraphConfig:
|
def from_bot_config(cls, bot_config) -> MemoryGraphConfig:
|
||||||
"""从bot_config加载配置"""
|
"""从bot_config加载配置"""
|
||||||
try:
|
try:
|
||||||
# 尝试获取新配置
|
# 尝试获取配置(优先使用memory,兼容memory_graph)
|
||||||
if hasattr(bot_config, 'memory_graph'):
|
if hasattr(bot_config, 'memory') and bot_config.memory is not None:
|
||||||
|
mg_config = bot_config.memory
|
||||||
|
elif hasattr(bot_config, 'memory_graph'):
|
||||||
mg_config = bot_config.memory_graph
|
mg_config = bot_config.memory_graph
|
||||||
|
|
||||||
config = cls(
|
config = cls(
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ from datetime import datetime, timedelta
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||||
|
|
||||||
from src.memory_graph.config import MemoryGraphConfig
|
from src.config.config import global_config
|
||||||
from src.memory_graph.core.builder import MemoryBuilder
|
from src.memory_graph.core.builder import MemoryBuilder
|
||||||
from src.memory_graph.core.extractor import MemoryExtractor
|
from src.memory_graph.core.extractor import MemoryExtractor
|
||||||
from src.memory_graph.models import Memory, MemoryEdge, MemoryNode, MemoryType, NodeType, EdgeType
|
from src.memory_graph.models import Memory, MemoryEdge, MemoryNode, MemoryType, NodeType, EdgeType
|
||||||
@@ -41,18 +41,20 @@ class MemoryManager:
|
|||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
config: Optional[MemoryGraphConfig] = None,
|
|
||||||
data_dir: Optional[Path] = None,
|
data_dir: Optional[Path] = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
初始化记忆管理器
|
初始化记忆管理器
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
config: 记忆图配置
|
data_dir: 数据目录(可选,默认从global_config读取)
|
||||||
data_dir: 数据目录
|
|
||||||
"""
|
"""
|
||||||
self.config = config or MemoryGraphConfig()
|
# 直接使用 global_config.memory
|
||||||
self.data_dir = data_dir or Path("data/memory_graph")
|
if not global_config.memory or not getattr(global_config.memory, 'enable', False):
|
||||||
|
raise ValueError("记忆系统未启用,请在配置文件中启用 [memory] enable = true")
|
||||||
|
|
||||||
|
self.config = global_config.memory
|
||||||
|
self.data_dir = data_dir or Path(getattr(self.config, 'data_dir', 'data/memory_graph'))
|
||||||
|
|
||||||
# 存储组件
|
# 存储组件
|
||||||
self.vector_store: Optional[VectorStore] = None
|
self.vector_store: Optional[VectorStore] = None
|
||||||
@@ -69,10 +71,10 @@ class MemoryManager:
|
|||||||
self._initialized = False
|
self._initialized = False
|
||||||
self._last_maintenance = datetime.now()
|
self._last_maintenance = datetime.now()
|
||||||
self._maintenance_task: Optional[asyncio.Task] = None
|
self._maintenance_task: Optional[asyncio.Task] = None
|
||||||
self._maintenance_interval_hours = self.config.consolidation_interval_hours # 从配置读取
|
self._maintenance_interval_hours = getattr(self.config, 'consolidation_interval_hours', 1.0)
|
||||||
self._maintenance_schedule_id: Optional[str] = None # 调度任务ID
|
self._maintenance_schedule_id: Optional[str] = None # 调度任务ID
|
||||||
|
|
||||||
logger.info(f"记忆管理器已创建 (data_dir={data_dir}, enable={self.config.enable})")
|
logger.info(f"记忆管理器已创建 (data_dir={self.data_dir}, enable={getattr(self.config, 'enable', False)})")
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
"""
|
"""
|
||||||
@@ -93,8 +95,12 @@ class MemoryManager:
|
|||||||
# 1. 初始化存储层
|
# 1. 初始化存储层
|
||||||
self.data_dir.mkdir(parents=True, exist_ok=True)
|
self.data_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# 获取存储配置
|
||||||
|
storage_config = getattr(self.config, 'storage', None)
|
||||||
|
vector_collection_name = getattr(storage_config, 'vector_collection_name', 'memory_graph') if storage_config else 'memory_graph'
|
||||||
|
|
||||||
self.vector_store = VectorStore(
|
self.vector_store = VectorStore(
|
||||||
collection_name=self.config.storage.vector_collection_name,
|
collection_name=vector_collection_name,
|
||||||
data_dir=self.data_dir,
|
data_dir=self.data_dir,
|
||||||
)
|
)
|
||||||
await self.vector_store.initialize()
|
await self.vector_store.initialize()
|
||||||
@@ -557,7 +563,8 @@ class MemoryManager:
|
|||||||
# 计算时间衰减
|
# 计算时间衰减
|
||||||
last_access_dt = datetime.fromisoformat(last_access)
|
last_access_dt = datetime.fromisoformat(last_access)
|
||||||
hours_passed = (now - last_access_dt).total_seconds() / 3600
|
hours_passed = (now - last_access_dt).total_seconds() / 3600
|
||||||
decay_factor = self.config.activation_decay_rate ** (hours_passed / 24)
|
decay_rate = getattr(self.config, 'activation_decay_rate', 0.95)
|
||||||
|
decay_factor = decay_rate ** (hours_passed / 24)
|
||||||
current_activation = activation_info.get("level", 0.0) * decay_factor
|
current_activation = activation_info.get("level", 0.0) * decay_factor
|
||||||
else:
|
else:
|
||||||
current_activation = 0.0
|
current_activation = 0.0
|
||||||
@@ -576,13 +583,16 @@ class MemoryManager:
|
|||||||
|
|
||||||
# 激活传播:激活相关记忆
|
# 激活传播:激活相关记忆
|
||||||
if strength > 0.1: # 只有足够强的激活才传播
|
if strength > 0.1: # 只有足够强的激活才传播
|
||||||
|
propagation_depth = getattr(self.config, 'activation_propagation_depth', 2)
|
||||||
related_memories = self._get_related_memories(
|
related_memories = self._get_related_memories(
|
||||||
memory_id,
|
memory_id,
|
||||||
max_depth=self.config.activation_propagation_depth
|
max_depth=propagation_depth
|
||||||
)
|
)
|
||||||
propagation_strength = strength * self.config.activation_propagation_strength
|
propagation_strength_factor = getattr(self.config, 'activation_propagation_strength', 0.5)
|
||||||
|
propagation_strength = strength * propagation_strength_factor
|
||||||
|
|
||||||
for related_id in related_memories[:self.config.max_related_memories]:
|
max_related = getattr(self.config, 'max_related_memories', 5)
|
||||||
|
for related_id in related_memories[:max_related]:
|
||||||
await self.activate_memory(related_id, propagation_strength)
|
await self.activate_memory(related_id, propagation_strength)
|
||||||
|
|
||||||
# 保存更新
|
# 保存更新
|
||||||
@@ -681,7 +691,8 @@ class MemoryManager:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# 跳过高重要性记忆
|
# 跳过高重要性记忆
|
||||||
if memory.importance >= self.config.forgetting_min_importance:
|
min_importance = getattr(self.config, 'forgetting_min_importance', 7.0)
|
||||||
|
if memory.importance >= min_importance:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 计算当前激活度
|
# 计算当前激活度
|
||||||
@@ -876,8 +887,8 @@ class MemoryManager:
|
|||||||
|
|
||||||
# 使用配置值或参数覆盖
|
# 使用配置值或参数覆盖
|
||||||
time_window_hours = time_window_hours if time_window_hours is not None else 24
|
time_window_hours = time_window_hours if time_window_hours is not None else 24
|
||||||
max_candidates = max_candidates if max_candidates is not None else self.config.auto_link_max_candidates
|
max_candidates = max_candidates if max_candidates is not None else getattr(self.config, 'auto_link_max_candidates', 10)
|
||||||
min_confidence = min_confidence if min_confidence is not None else self.config.auto_link_min_confidence
|
min_confidence = min_confidence if min_confidence is not None else getattr(self.config, 'auto_link_min_confidence', 0.7)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
logger.info(f"开始自动关联记忆 (时间窗口={time_window_hours}h)...")
|
logger.info(f"开始自动关联记忆 (时间窗口={time_window_hours}h)...")
|
||||||
@@ -1249,22 +1260,22 @@ class MemoryManager:
|
|||||||
}
|
}
|
||||||
|
|
||||||
# 1. 记忆整理(合并相似记忆)
|
# 1. 记忆整理(合并相似记忆)
|
||||||
if self.config.consolidation_enabled:
|
if getattr(self.config, 'consolidation_enabled', False):
|
||||||
consolidate_result = await self.consolidate_memories(
|
consolidate_result = await self.consolidate_memories(
|
||||||
similarity_threshold=self.config.consolidation_similarity_threshold,
|
similarity_threshold=getattr(self.config, 'consolidation_similarity_threshold', 0.9),
|
||||||
time_window_hours=self.config.consolidation_time_window_hours
|
time_window_hours=getattr(self.config, 'consolidation_time_window_hours', 24.0)
|
||||||
)
|
)
|
||||||
result["consolidated"] = consolidate_result.get("merged_count", 0)
|
result["consolidated"] = consolidate_result.get("merged_count", 0)
|
||||||
|
|
||||||
# 2. 自动关联记忆(发现和建立关系)
|
# 2. 自动关联记忆(发现和建立关系)
|
||||||
if self.config.auto_link_enabled:
|
if getattr(self.config, 'auto_link_enabled', True):
|
||||||
link_result = await self.auto_link_memories()
|
link_result = await self.auto_link_memories()
|
||||||
result["linked"] = link_result.get("linked_count", 0)
|
result["linked"] = link_result.get("linked_count", 0)
|
||||||
|
|
||||||
# 3. 自动遗忘
|
# 3. 自动遗忘
|
||||||
if self.config.forgetting_enabled:
|
if getattr(self.config, 'forgetting_enabled', True):
|
||||||
forgotten_count = await self.auto_forget_memories(
|
forgotten_count = await self.auto_forget_memories(
|
||||||
threshold=self.config.forgetting_activation_threshold
|
threshold=getattr(self.config, 'forgetting_activation_threshold', 0.1)
|
||||||
)
|
)
|
||||||
result["forgotten"] = forgotten_count
|
result["forgotten"] = forgotten_count
|
||||||
|
|
||||||
|
|||||||
@@ -21,17 +21,17 @@ _initialized: bool = False
|
|||||||
|
|
||||||
async def initialize_memory_manager(
|
async def initialize_memory_manager(
|
||||||
data_dir: Optional[Path | str] = None,
|
data_dir: Optional[Path | str] = None,
|
||||||
config = None,
|
|
||||||
) -> Optional[MemoryManager]:
|
) -> Optional[MemoryManager]:
|
||||||
"""
|
"""
|
||||||
初始化全局 MemoryManager
|
初始化全局 MemoryManager
|
||||||
|
|
||||||
|
直接从 global_config.memory 读取配置
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
data_dir: 数据目录,默认使用 data/memory_graph
|
data_dir: 数据目录(可选,默认从配置读取)
|
||||||
config: MemoryGraphConfig 或 bot_config 实例
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
MemoryManager 实例
|
MemoryManager 实例,如果禁用则返回 None
|
||||||
"""
|
"""
|
||||||
global _memory_manager, _initialized
|
global _memory_manager, _initialized
|
||||||
|
|
||||||
@@ -40,26 +40,10 @@ async def initialize_memory_manager(
|
|||||||
return _memory_manager
|
return _memory_manager
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from src.memory_graph.config import MemoryGraphConfig
|
from src.config.config import global_config
|
||||||
|
|
||||||
# 处理配置
|
|
||||||
if config is None:
|
|
||||||
# 尝试从全局配置加载
|
|
||||||
try:
|
|
||||||
from src.config.config import global_config
|
|
||||||
memory_config = MemoryGraphConfig.from_bot_config(global_config)
|
|
||||||
logger.info("从 bot_config 加载 memory_graph 配置")
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"无法从 bot_config 加载配置,使用默认配置: {e}")
|
|
||||||
memory_config = MemoryGraphConfig()
|
|
||||||
elif isinstance(config, MemoryGraphConfig):
|
|
||||||
memory_config = config
|
|
||||||
else:
|
|
||||||
# 假设是 bot_config
|
|
||||||
memory_config = MemoryGraphConfig.from_bot_config(config)
|
|
||||||
|
|
||||||
# 检查是否启用
|
# 检查是否启用
|
||||||
if not memory_config.enable:
|
if not global_config.memory or not getattr(global_config.memory, 'enable', False):
|
||||||
logger.info("记忆图系统已在配置中禁用")
|
logger.info("记忆图系统已在配置中禁用")
|
||||||
_initialized = False
|
_initialized = False
|
||||||
_memory_manager = None
|
_memory_manager = None
|
||||||
@@ -67,13 +51,13 @@ async def initialize_memory_manager(
|
|||||||
|
|
||||||
# 处理数据目录
|
# 处理数据目录
|
||||||
if data_dir is None:
|
if data_dir is None:
|
||||||
data_dir = memory_config.data_dir
|
data_dir = getattr(global_config.memory, 'data_dir', 'data/memory_graph')
|
||||||
elif isinstance(data_dir, str):
|
if isinstance(data_dir, str):
|
||||||
data_dir = Path(data_dir)
|
data_dir = Path(data_dir)
|
||||||
|
|
||||||
logger.info(f"正在初始化全局 MemoryManager (data_dir={data_dir})...")
|
logger.info(f"正在初始化全局 MemoryManager (data_dir={data_dir})...")
|
||||||
|
|
||||||
_memory_manager = MemoryManager(config=memory_config, data_dir=data_dir)
|
_memory_manager = MemoryManager(data_dir=data_dir)
|
||||||
await _memory_manager.initialize()
|
await _memory_manager.initialize()
|
||||||
|
|
||||||
_initialized = True
|
_initialized = True
|
||||||
|
|||||||
@@ -19,15 +19,47 @@ class CreateMemoryTool(BaseTool):
|
|||||||
"""创建记忆工具"""
|
"""创建记忆工具"""
|
||||||
|
|
||||||
name = "create_memory"
|
name = "create_memory"
|
||||||
description = "创建一个新的记忆。记忆由主体、类型、主题、客体(可选)和属性组成。用于记录重要的信息、事件、想法等。"
|
description = """记录对话中有价值的信息,构建长期记忆。
|
||||||
|
|
||||||
|
## 应该记录的内容类型:
|
||||||
|
|
||||||
|
### 高优先级记录(importance 0.7-1.0)
|
||||||
|
- 个人核心信息:姓名、年龄、职业、学历、联系方式
|
||||||
|
- 重要关系:家人、亲密朋友、恋人关系
|
||||||
|
- 核心目标:人生规划、职业目标、重要决定
|
||||||
|
- 关键事件:毕业、入职、搬家、重要成就
|
||||||
|
|
||||||
|
### 中等优先级(importance 0.5-0.7)
|
||||||
|
- 生活状态:工作内容、学习情况、日常习惯
|
||||||
|
- 兴趣偏好:喜欢/不喜欢的事物、消费偏好
|
||||||
|
- 观点态度:价值观、对事物的看法
|
||||||
|
- 技能知识:掌握的技能、专业领域
|
||||||
|
- 一般事件:日常活动、例行任务
|
||||||
|
|
||||||
|
### 低优先级(importance 0.3-0.5)
|
||||||
|
- 临时状态:今天心情、当前活动
|
||||||
|
- 一般评价:对产品/服务的简单评价
|
||||||
|
- 琐碎事件:买东西、看电影等常规活动
|
||||||
|
|
||||||
|
### ❌ 不应记录
|
||||||
|
- 单纯招呼语:"你好"、"再见"、"谢谢"
|
||||||
|
- 无意义语气词:"哦"、"嗯"、"好的"
|
||||||
|
- 纯粹回复确认:没有信息量的回应
|
||||||
|
|
||||||
|
## 记忆拆分原则
|
||||||
|
一句话多个信息点 → 多次调用创建多条记忆
|
||||||
|
|
||||||
|
示例:"我最近在学Python,想找数据分析的工作"
|
||||||
|
→ 调用1:{{subject:"[从历史提取真实名字]", memory_type:"事实", topic:"学习", object:"Python", attributes:{{时间:"最近", 状态:"进行中"}}, importance:0.7}}
|
||||||
|
→ 调用2:{{subject:"[从历史提取真实名字]", memory_type:"目标", topic:"求职", object:"数据分析岗位", attributes:{{状态:"计划中"}}, importance:0.8}}"""
|
||||||
|
|
||||||
parameters: ClassVar[list[tuple[str, ToolParamType, str, bool, list[str] | None]]] = [
|
parameters: ClassVar[list[tuple[str, ToolParamType, str, bool, list[str] | None]]] = [
|
||||||
("subject", ToolParamType.STRING, "记忆的主体,通常是'我'、'用户'或具体的人名", True, None),
|
("subject", ToolParamType.STRING, "记忆主体(重要!)。从对话历史中提取真实发送人名字。示例:如果看到'Prou(12345678): 我喜欢...',subject应填'Prou';如果看到'张三: 我在...',subject应填'张三'。❌禁止使用'用户'这种泛指,必须用具体名字!", True, None),
|
||||||
("memory_type", ToolParamType.STRING, "记忆类型", True, ["事件", "事实", "关系", "观点"]),
|
("memory_type", ToolParamType.STRING, "记忆类型。【事件】=有明确时间点的动作(昨天吃饭、明天开会)【事实】=稳定状态(职业是程序员、住在北京)【观点】=主观看法(喜欢/讨厌/认为)【关系】=人际关系(朋友、同事)", True, ["事件", "事实", "关系", "观点"]),
|
||||||
("topic", ToolParamType.STRING, "记忆的主题,即发生的事情或状态", True, None),
|
("topic", ToolParamType.STRING, "记忆的核心内容(做什么/是什么状态/什么关系)。必须明确、具体,包含关键动词或状态词", True, None),
|
||||||
("object", ToolParamType.STRING, "记忆的客体,即主题作用的对象(可选)", False, None),
|
("object", ToolParamType.STRING, "记忆涉及的对象或目标。如果topic已经很完整可以不填,如果有明确对象建议填写", False, None),
|
||||||
("attributes", ToolParamType.STRING, "记忆的属性(JSON格式字符串),如 {\"时间\":\"今天\",\"地点\":\"家里\"}", False, None),
|
("attributes", ToolParamType.STRING, "详细属性,JSON格式字符串。强烈建议包含:时间(具体到日期)、地点、状态、原因等上下文信息。例:{\"时间\":\"2025-11-06\",\"地点\":\"公司\",\"状态\":\"进行中\",\"原因\":\"项目需要\"}", False, None),
|
||||||
("importance", ToolParamType.FLOAT, "记忆的重要性(0.0-1.0),默认0.5", False, None),
|
("importance", ToolParamType.FLOAT, "重要性评分 0.0-1.0。参考:日常琐事0.3-0.4,一般对话0.5-0.6,重要信息0.7-0.8,核心记忆0.9-1.0。不确定时用0.5", False, None),
|
||||||
]
|
]
|
||||||
|
|
||||||
available_for_llm = True
|
available_for_llm = True
|
||||||
|
|||||||
@@ -73,83 +73,92 @@ class MemoryTools:
|
|||||||
"""
|
"""
|
||||||
return {
|
return {
|
||||||
"name": "create_memory",
|
"name": "create_memory",
|
||||||
"description": """创建一个新的记忆节点。
|
"description": """创建一个新的记忆节点,记录对话中有价值的信息。
|
||||||
|
|
||||||
⚠️ 记忆创建原则(必须遵守):
|
🎯 **核心原则**:主动记录、积极构建、丰富细节
|
||||||
1. **价值判断**:只创建具有长期价值的关键信息,避免记录日常闲聊、礼貌用语、重复信息
|
|
||||||
2. **细粒度原则**:每条记忆只包含一个明确的事实/事件/观点,避免泛化
|
|
||||||
3. **原子性**:如果一句话包含多个重要信息点,拆分成多条独立记忆
|
|
||||||
4. **具体性**:记录具体的人、事、物、时间、地点,避免模糊描述
|
|
||||||
|
|
||||||
❌ 不应创建记忆的情况:
|
✅ **优先创建记忆的场景**(鼓励记录):
|
||||||
- 普通问候、感谢、确认等礼貌性对话
|
1. **个人信息**:姓名、昵称、年龄、职业、身份、所在地、联系方式等
|
||||||
- 已存在的重复信息
|
2. **兴趣爱好**:喜欢/不喜欢的事物、娱乐偏好、运动爱好、饮食口味等
|
||||||
- 临时性、一次性的琐碎信息
|
3. **生活状态**:工作学习状态、生活习惯、作息时间、日常安排等
|
||||||
- 纯粹的功能操作指令(如"帮我查一下")
|
4. **经历事件**:正在做的事、完成的任务、参与的活动、遇到的问题等
|
||||||
- 缺乏上下文的碎片化信息
|
5. **观点态度**:对事物的看法、价值观、情绪表达、评价意见等
|
||||||
|
6. **计划目标**:未来打算、学习计划、工作目标、待办事项等
|
||||||
|
7. **人际关系**:提到的朋友、家人、同事、认识的人等
|
||||||
|
8. **技能知识**:掌握的技能、学习的知识、专业领域、使用的工具等
|
||||||
|
9. **物品资源**:拥有的物品、使用的设备、喜欢的品牌等
|
||||||
|
10. **时间地点**:重要时间节点、常去的地点、活动场所等
|
||||||
|
|
||||||
✅ 应该创建记忆的情况:
|
⚠️ **暂不创建的情况**(仅限以下):
|
||||||
- 用户的个人信息(姓名、职业、兴趣、联系方式等)
|
- 纯粹的招呼语(单纯的"你好"、"再见")
|
||||||
- 重要事件(项目进展、重大决定、关键行动等)
|
- 完全无意义的语气词(单纯的"哦"、"嗯")
|
||||||
- 长期偏好/观点(喜好、价值观、习惯等)
|
- 明确的系统指令(如"切换模式"、"重启")
|
||||||
- 人际关系变化(新朋友、合作关系等)
|
|
||||||
- 具体计划/目标(明确的待办事项、长期目标等)
|
|
||||||
|
|
||||||
📝 拆分示例:
|
<EFBFBD> **记忆拆分建议**:
|
||||||
- ❌ "用户喜欢编程,最近在学Python和机器学习" → 过于泛化
|
- 一句话包含多个信息点 → 拆成多条记忆(更利于后续检索)
|
||||||
- ✅ 拆分为3条:
|
- 例如:"我最近在学Python和机器学习,想找工作"
|
||||||
1. "用户喜欢编程"(观点)
|
→ 拆成3条:
|
||||||
2. "用户正在学习Python"(事件)
|
1. "用户正在学习Python"(事件)
|
||||||
3. "用户正在学习机器学习"(事件)
|
2. "用户正在学习机器学习"(事件)
|
||||||
|
3. "用户想找工作"(事件/目标)
|
||||||
|
|
||||||
记忆结构:主体 + 类型 + 主题 + 客体(可选)+ 属性""",
|
📌 **记忆质量建议**:
|
||||||
|
- 记录时尽量补充时间("今天"、"最近"、"昨天"等)
|
||||||
|
- 包含具体细节(越具体越好)
|
||||||
|
- 主体明确(优先使用"用户"或具体人名,避免"我")
|
||||||
|
|
||||||
|
记忆结构:主体 + 类型 + 主题 + 客体(可选)+ 属性(越详细越好)""",
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"subject": {
|
"subject": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "记忆的主体,通常是'用户'或具体的人名(避免使用'我')",
|
"description": "记忆的主体(谁的信息):\n- 对话中的用户统一使用'用户'\n- 提到的具体人物使用其名字(如'小明'、'张三')\n- 避免使用'我'、'他'等代词",
|
||||||
},
|
},
|
||||||
"memory_type": {
|
"memory_type": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": ["事件", "事实", "关系", "观点"],
|
"enum": ["事件", "事实", "关系", "观点"],
|
||||||
"description": "记忆类型:\n- 事件:时间绑定的具体动作(如'完成项目'、'学习课程')\n- 事实:稳定的客观状态(如'职业是工程师'、'住在北京')\n- 关系:人际关系(如'认识了朋友'、'同事关系')\n- 观点:主观评价/偏好(如'喜欢Python'、'认为AI很重要')",
|
"description": "选择最合适的记忆类型:\n\n【事件】时间相关的动作或发生的事(用'正在'、'完成了'、'参加'等动词)\n 例:正在学习Python、完成了项目、参加会议、去旅行\n\n【事实】相对稳定的客观信息(用'是'、'有'、'在'等描述状态)\n 例:职业是工程师、住在北京、有一只猫、会说英语\n\n【观点】主观看法、喜好、态度(用'喜欢'、'认为'、'觉得'等)\n 例:喜欢Python、认为AI很重要、觉得累、讨厌加班\n\n【关系】人与人之间的关系\n 例:认识了朋友、是同事、家人关系",
|
||||||
},
|
},
|
||||||
"topic": {
|
"topic": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "记忆的核心主题,必须具体且明确(如'学习PyTorch框架'而非'学习编程')",
|
"description": "记忆的核心内容(做什么/是什么/关于什么):\n- 尽量具体明确('学习Python编程' 优于 '学习')\n- 包含关键动词或核心概念\n- 可以包含时间状态('正在学习'、'已完成'、'计划做')",
|
||||||
},
|
},
|
||||||
"object": {
|
"object": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "记忆的客体/对象,作为主题的补充说明(如主题是'学习',客体可以是'PyTorch框架')",
|
"description": "可选:记忆涉及的对象或目标:\n- 事件的对象(学习的是什么、购买的是什么)\n- 观点的对象(喜欢的是什么、讨厌的是什么)\n- 可以留空(如果topic已经足够完整)",
|
||||||
},
|
},
|
||||||
"attributes": {
|
"attributes": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"description": "记忆的具体属性(尽量填写以增加记忆的信息密度)",
|
"description": "记忆的详细属性(建议尽量填写,越详细越好):",
|
||||||
"properties": {
|
"properties": {
|
||||||
"时间": {
|
"时间": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "具体时间表达式,如'2025-11-05'、'今天下午'、'最近一周'、'3天前'",
|
"description": "时间信息(强烈建议填写):\n- 具体日期:'2025-11-05'、'2025年11月'\n- 相对时间:'今天'、'昨天'、'上周'、'最近'、'3天前'\n- 时间段:'今天下午'、'上个月'、'这学期'",
|
||||||
},
|
},
|
||||||
"地点": {
|
"地点": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "具体地点(如果相关)"
|
"description": "地点信息(如涉及):\n- 具体地址、城市名、国家\n- 场所类型:'在家'、'公司'、'学校'、'咖啡店'"
|
||||||
},
|
},
|
||||||
"原因": {
|
"原因": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "事件发生的原因或动机(如果明确)"
|
"description": "为什么这样做/这样想(如明确提到)"
|
||||||
},
|
},
|
||||||
"方式": {
|
"方式": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "完成的方式或途径(如果相关)"
|
"description": "怎么做的/通过什么方式(如明确提到)"
|
||||||
},
|
},
|
||||||
"结果": {
|
"结果": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "事件的结果或影响(如果已知)"
|
"description": "结果如何/产生什么影响(如明确提到)"
|
||||||
},
|
},
|
||||||
"状态": {
|
"状态": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "当前状态(如'进行中'、'已完成'、'计划中')"
|
"description": "当前进展:'进行中'、'已完成'、'计划中'、'暂停'等"
|
||||||
|
},
|
||||||
|
"程度": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "程度描述(如'非常'、'比较'、'有点'、'不太')"
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"additionalProperties": True,
|
"additionalProperties": True,
|
||||||
@@ -158,7 +167,7 @@ class MemoryTools:
|
|||||||
"type": "number",
|
"type": "number",
|
||||||
"minimum": 0.0,
|
"minimum": 0.0,
|
||||||
"maximum": 1.0,
|
"maximum": 1.0,
|
||||||
"description": "记忆的重要性评分(0.0-1.0):\n- 0.3-0.4: 次要信息\n- 0.5-0.6: 一般信息\n- 0.7-0.8: 重要信息(用户明确表达的偏好、重要事件)\n- 0.9-1.0: 关键信息(核心个人信息、重大决定、强烈偏好)\n默认0.5",
|
"description": "重要性评分(默认0.5,日常对话建议0.5-0.7):\n\n0.3-0.4: 次要细节(偶然提及的琐事)\n0.5-0.6: 日常信息(一般性的分享、普通爱好)← 推荐默认值\n0.7-0.8: 重要信息(明确的偏好、重要计划、核心爱好)\n0.9-1.0: 关键信息(身份信息、重大决定、强烈情感)\n\n💡 建议:日常对话中大部分记忆使用0.5-0.6,除非用户特别强调",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"required": ["subject", "memory_type", "topic"],
|
"required": ["subject", "memory_type", "topic"],
|
||||||
|
|||||||
323
src/memory_graph/utils/memory_formatter.py
Normal file
323
src/memory_graph/utils/memory_formatter.py
Normal file
@@ -0,0 +1,323 @@
|
|||||||
|
"""
|
||||||
|
记忆格式化工具
|
||||||
|
|
||||||
|
用于将记忆图系统的Memory对象转换为适合提示词的自然语言描述
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Optional, List, Dict, Any
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from src.memory_graph.models import Memory, MemoryNode, NodeType, EdgeType, MemoryType
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def format_memory_for_prompt(memory: Memory, include_metadata: bool = False) -> str:
|
||||||
|
"""
|
||||||
|
将记忆对象格式化为适合提示词的自然语言描述
|
||||||
|
|
||||||
|
根据记忆的图结构,构建完整的主谓宾描述,包含:
|
||||||
|
- 主语(subject node)
|
||||||
|
- 谓语/动作(topic node)
|
||||||
|
- 宾语/对象(object node,如果存在)
|
||||||
|
- 属性信息(attributes,如时间、地点等)
|
||||||
|
- 关系信息(记忆之间的关系)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
memory: 记忆对象
|
||||||
|
include_metadata: 是否包含元数据(时间、重要性等)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
格式化后的自然语言描述
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# 1. 获取主体节点(主语)
|
||||||
|
subject_node = memory.get_subject_node()
|
||||||
|
if not subject_node:
|
||||||
|
logger.warning(f"记忆 {memory.id} 缺少主体节点")
|
||||||
|
return "(记忆格式错误:缺少主体)"
|
||||||
|
|
||||||
|
subject_text = subject_node.content
|
||||||
|
|
||||||
|
# 2. 查找主题节点(谓语/动作)
|
||||||
|
topic_node = None
|
||||||
|
memory_type_relation = None
|
||||||
|
for edge in memory.edges:
|
||||||
|
if edge.edge_type == EdgeType.MEMORY_TYPE and edge.source_id == memory.subject_id:
|
||||||
|
topic_node = memory.get_node_by_id(edge.target_id)
|
||||||
|
memory_type_relation = edge.relation
|
||||||
|
break
|
||||||
|
|
||||||
|
if not topic_node:
|
||||||
|
logger.warning(f"记忆 {memory.id} 缺少主题节点")
|
||||||
|
return f"{subject_text}(记忆格式错误:缺少主题)"
|
||||||
|
|
||||||
|
topic_text = topic_node.content
|
||||||
|
|
||||||
|
# 3. 查找客体节点(宾语)和核心关系
|
||||||
|
object_node = None
|
||||||
|
core_relation = None
|
||||||
|
for edge in memory.edges:
|
||||||
|
if edge.edge_type == EdgeType.CORE_RELATION and edge.source_id == topic_node.id:
|
||||||
|
object_node = memory.get_node_by_id(edge.target_id)
|
||||||
|
core_relation = edge.relation if edge.relation else ""
|
||||||
|
break
|
||||||
|
|
||||||
|
# 4. 收集属性节点
|
||||||
|
attributes: Dict[str, str] = {}
|
||||||
|
for edge in memory.edges:
|
||||||
|
if edge.edge_type == EdgeType.ATTRIBUTE:
|
||||||
|
# 查找属性节点和值节点
|
||||||
|
attr_node = memory.get_node_by_id(edge.target_id)
|
||||||
|
if attr_node and attr_node.node_type == NodeType.ATTRIBUTE:
|
||||||
|
# 查找这个属性的值
|
||||||
|
for value_edge in memory.edges:
|
||||||
|
if (value_edge.edge_type == EdgeType.ATTRIBUTE
|
||||||
|
and value_edge.source_id == attr_node.id):
|
||||||
|
value_node = memory.get_node_by_id(value_edge.target_id)
|
||||||
|
if value_node and value_node.node_type == NodeType.VALUE:
|
||||||
|
attributes[attr_node.content] = value_node.content
|
||||||
|
break
|
||||||
|
|
||||||
|
# 5. 构建自然语言描述
|
||||||
|
parts = []
|
||||||
|
|
||||||
|
# 主谓宾结构
|
||||||
|
if object_node is not None:
|
||||||
|
# 有完整的主谓宾
|
||||||
|
if core_relation:
|
||||||
|
parts.append(f"{subject_text}{topic_text}{core_relation}{object_node.content}")
|
||||||
|
else:
|
||||||
|
parts.append(f"{subject_text}{topic_text}{object_node.content}")
|
||||||
|
else:
|
||||||
|
# 只有主谓
|
||||||
|
parts.append(f"{subject_text}{topic_text}")
|
||||||
|
|
||||||
|
# 添加属性信息
|
||||||
|
if attributes:
|
||||||
|
attr_parts = []
|
||||||
|
# 优先显示时间和地点
|
||||||
|
if "时间" in attributes:
|
||||||
|
attr_parts.append(f"于{attributes['时间']}")
|
||||||
|
if "地点" in attributes:
|
||||||
|
attr_parts.append(f"在{attributes['地点']}")
|
||||||
|
# 其他属性
|
||||||
|
for key, value in attributes.items():
|
||||||
|
if key not in ["时间", "地点"]:
|
||||||
|
attr_parts.append(f"{key}:{value}")
|
||||||
|
|
||||||
|
if attr_parts:
|
||||||
|
parts.append(f"({' '.join(attr_parts)})")
|
||||||
|
|
||||||
|
description = "".join(parts)
|
||||||
|
|
||||||
|
# 6. 添加元数据(可选)
|
||||||
|
if include_metadata:
|
||||||
|
metadata_parts = []
|
||||||
|
|
||||||
|
# 记忆类型
|
||||||
|
if memory.memory_type:
|
||||||
|
metadata_parts.append(f"类型:{memory.memory_type.value}")
|
||||||
|
|
||||||
|
# 重要性
|
||||||
|
if memory.importance >= 0.8:
|
||||||
|
metadata_parts.append("重要")
|
||||||
|
elif memory.importance >= 0.6:
|
||||||
|
metadata_parts.append("一般")
|
||||||
|
|
||||||
|
# 时间(如果没有在属性中)
|
||||||
|
if "时间" not in attributes:
|
||||||
|
time_str = _format_relative_time(memory.created_at)
|
||||||
|
if time_str:
|
||||||
|
metadata_parts.append(time_str)
|
||||||
|
|
||||||
|
if metadata_parts:
|
||||||
|
description += f" [{', '.join(metadata_parts)}]"
|
||||||
|
|
||||||
|
return description
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"格式化记忆失败: {e}", exc_info=True)
|
||||||
|
return f"(记忆格式化错误: {str(e)[:50]})"
|
||||||
|
|
||||||
|
|
||||||
|
def format_memories_for_prompt(
|
||||||
|
memories: List[Memory],
|
||||||
|
max_count: Optional[int] = None,
|
||||||
|
include_metadata: bool = False,
|
||||||
|
group_by_type: bool = False
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
批量格式化多条记忆为提示词文本
|
||||||
|
|
||||||
|
Args:
|
||||||
|
memories: 记忆列表
|
||||||
|
max_count: 最大记忆数量(可选)
|
||||||
|
include_metadata: 是否包含元数据
|
||||||
|
group_by_type: 是否按类型分组
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
格式化后的文本,包含标题和列表
|
||||||
|
"""
|
||||||
|
if not memories:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# 限制数量
|
||||||
|
if max_count:
|
||||||
|
memories = memories[:max_count]
|
||||||
|
|
||||||
|
# 按类型分组
|
||||||
|
if group_by_type:
|
||||||
|
type_groups: Dict[MemoryType, List[Memory]] = {}
|
||||||
|
for memory in memories:
|
||||||
|
if memory.memory_type not in type_groups:
|
||||||
|
type_groups[memory.memory_type] = []
|
||||||
|
type_groups[memory.memory_type].append(memory)
|
||||||
|
|
||||||
|
# 构建分组文本
|
||||||
|
parts = ["### 🧠 相关记忆 (Relevant Memories)", ""]
|
||||||
|
|
||||||
|
type_order = [MemoryType.FACT, MemoryType.EVENT, MemoryType.RELATION, MemoryType.OPINION]
|
||||||
|
for mem_type in type_order:
|
||||||
|
if mem_type in type_groups:
|
||||||
|
parts.append(f"#### {mem_type.value}")
|
||||||
|
for memory in type_groups[mem_type]:
|
||||||
|
desc = format_memory_for_prompt(memory, include_metadata)
|
||||||
|
parts.append(f"- {desc}")
|
||||||
|
parts.append("")
|
||||||
|
|
||||||
|
return "\n".join(parts)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# 不分组,直接列出
|
||||||
|
parts = ["### 🧠 相关记忆 (Relevant Memories)", ""]
|
||||||
|
|
||||||
|
for memory in memories:
|
||||||
|
# 获取类型标签
|
||||||
|
type_label = memory.memory_type.value if memory.memory_type else "未知"
|
||||||
|
|
||||||
|
# 格式化记忆内容
|
||||||
|
desc = format_memory_for_prompt(memory, include_metadata)
|
||||||
|
|
||||||
|
# 添加类型标签
|
||||||
|
parts.append(f"- **[{type_label}]** {desc}")
|
||||||
|
|
||||||
|
return "\n".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
def get_memory_type_label(memory_type: str) -> str:
|
||||||
|
"""
|
||||||
|
获取记忆类型的中文标签
|
||||||
|
|
||||||
|
Args:
|
||||||
|
memory_type: 记忆类型(可能是英文或中文)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
中文标签
|
||||||
|
"""
|
||||||
|
# 映射表
|
||||||
|
type_mapping = {
|
||||||
|
# 英文到中文
|
||||||
|
"event": "事件",
|
||||||
|
"fact": "事实",
|
||||||
|
"relation": "关系",
|
||||||
|
"opinion": "观点",
|
||||||
|
"preference": "偏好",
|
||||||
|
"emotion": "情绪",
|
||||||
|
"knowledge": "知识",
|
||||||
|
"skill": "技能",
|
||||||
|
"goal": "目标",
|
||||||
|
"experience": "经历",
|
||||||
|
"contextual": "情境",
|
||||||
|
# 中文(保持不变)
|
||||||
|
"事件": "事件",
|
||||||
|
"事实": "事实",
|
||||||
|
"关系": "关系",
|
||||||
|
"观点": "观点",
|
||||||
|
"偏好": "偏好",
|
||||||
|
"情绪": "情绪",
|
||||||
|
"知识": "知识",
|
||||||
|
"技能": "技能",
|
||||||
|
"目标": "目标",
|
||||||
|
"经历": "经历",
|
||||||
|
"情境": "情境",
|
||||||
|
}
|
||||||
|
|
||||||
|
# 转换为小写进行匹配
|
||||||
|
memory_type_lower = memory_type.lower() if memory_type else ""
|
||||||
|
|
||||||
|
return type_mapping.get(memory_type_lower, "未知")
|
||||||
|
|
||||||
|
|
||||||
|
def _format_relative_time(timestamp: datetime) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
格式化相对时间(如"2天前"、"刚才")
|
||||||
|
|
||||||
|
Args:
|
||||||
|
timestamp: 时间戳
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
相对时间描述,如果太久远则返回None
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
now = datetime.now()
|
||||||
|
delta = now - timestamp
|
||||||
|
|
||||||
|
if delta.total_seconds() < 60:
|
||||||
|
return "刚才"
|
||||||
|
elif delta.total_seconds() < 3600:
|
||||||
|
minutes = int(delta.total_seconds() / 60)
|
||||||
|
return f"{minutes}分钟前"
|
||||||
|
elif delta.total_seconds() < 86400:
|
||||||
|
hours = int(delta.total_seconds() / 3600)
|
||||||
|
return f"{hours}小时前"
|
||||||
|
elif delta.days < 7:
|
||||||
|
return f"{delta.days}天前"
|
||||||
|
elif delta.days < 30:
|
||||||
|
weeks = delta.days // 7
|
||||||
|
return f"{weeks}周前"
|
||||||
|
elif delta.days < 365:
|
||||||
|
months = delta.days // 30
|
||||||
|
return f"{months}个月前"
|
||||||
|
else:
|
||||||
|
# 超过一年不显示相对时间
|
||||||
|
return None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def format_memory_summary(memory: Memory) -> str:
|
||||||
|
"""
|
||||||
|
生成记忆的简短摘要(用于日志和调试)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
memory: 记忆对象
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
简短摘要
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
subject_node = memory.get_subject_node()
|
||||||
|
subject_text = subject_node.content if subject_node else "?"
|
||||||
|
|
||||||
|
topic_text = "?"
|
||||||
|
for edge in memory.edges:
|
||||||
|
if edge.edge_type == EdgeType.MEMORY_TYPE and edge.source_id == memory.subject_id:
|
||||||
|
topic_node = memory.get_node_by_id(edge.target_id)
|
||||||
|
if topic_node:
|
||||||
|
topic_text = topic_node.content
|
||||||
|
break
|
||||||
|
|
||||||
|
return f"{subject_text} - {memory.memory_type.value if memory.memory_type else '?'}: {topic_text}"
|
||||||
|
except Exception:
|
||||||
|
return f"记忆 {memory.id[:8]}"
|
||||||
|
|
||||||
|
|
||||||
|
# 导出主要函数
|
||||||
|
__all__ = [
|
||||||
|
'format_memory_for_prompt',
|
||||||
|
'format_memories_for_prompt',
|
||||||
|
'get_memory_type_label',
|
||||||
|
'format_memory_summary',
|
||||||
|
]
|
||||||
@@ -18,20 +18,36 @@ logger = get_logger("tool_use")
|
|||||||
def init_tool_executor_prompt():
|
def init_tool_executor_prompt():
|
||||||
"""初始化工具执行器的提示词"""
|
"""初始化工具执行器的提示词"""
|
||||||
tool_executor_prompt = """
|
tool_executor_prompt = """
|
||||||
你是一个专门执行工具的助手。你的名字是{bot_name}。现在是{time_now}。
|
# 工具调用系统
|
||||||
群里正在进行的聊天内容:
|
|
||||||
|
## 📋 你的身份
|
||||||
|
- **名字**: {bot_name}
|
||||||
|
- **核心人设**: {personality_core}
|
||||||
|
- **人格特质**: {personality_side}
|
||||||
|
- **当前时间**: {time_now}
|
||||||
|
|
||||||
|
## 💬 上下文信息
|
||||||
|
|
||||||
|
### 对话历史
|
||||||
{chat_history}
|
{chat_history}
|
||||||
|
|
||||||
现在,{sender}发送了内容:{target_message},你想要回复ta。
|
### 当前消息
|
||||||
请仔细分析聊天内容,考虑以下几点:
|
**{sender}** 说: {target_message}
|
||||||
1. 内容中是否包含需要查询信息的问题
|
|
||||||
2. 是否有明确的工具使用指令
|
|
||||||
3. 之前的工具调用是否提供了有用的信息
|
|
||||||
4. 是否需要基于之前的工具结果进行进一步的查询
|
|
||||||
|
|
||||||
{tool_history}
|
{tool_history}
|
||||||
|
|
||||||
If you need to use a tool, please directly call the corresponding tool function. If you do not need to use any tool, simply output "No tool needed".
|
## 🔧 工具使用
|
||||||
|
|
||||||
|
根据上下文判断是否需要使用工具。每个工具都有详细的description说明其用途和参数,请根据工具定义决定是否调用。
|
||||||
|
|
||||||
|
**⚠️ 记忆创建特别提醒:**
|
||||||
|
创建记忆时,subject(主体)必须使用对话历史中显示的**真实发送人名字**!
|
||||||
|
- ✅ 正确:从"Prou(12345678): ..."中提取"Prou"作为subject
|
||||||
|
- ❌ 错误:使用"用户"、"对方"等泛指词
|
||||||
|
|
||||||
|
**执行指令:**
|
||||||
|
- 需要使用工具 → 直接调用相应的工具函数
|
||||||
|
- 不需要工具 → 输出 "No tool needed"
|
||||||
"""
|
"""
|
||||||
Prompt(tool_executor_prompt, "tool_executor_prompt")
|
Prompt(tool_executor_prompt, "tool_executor_prompt")
|
||||||
|
|
||||||
@@ -110,6 +126,10 @@ class ToolExecutor:
|
|||||||
|
|
||||||
# 构建工具调用历史文本
|
# 构建工具调用历史文本
|
||||||
tool_history = self._format_tool_history()
|
tool_history = self._format_tool_history()
|
||||||
|
|
||||||
|
# 获取人设信息
|
||||||
|
personality_core = global_config.personality.personality_core
|
||||||
|
personality_side = global_config.personality.personality_side
|
||||||
|
|
||||||
# 构建工具调用提示词
|
# 构建工具调用提示词
|
||||||
prompt = await global_prompt_manager.format_prompt(
|
prompt = await global_prompt_manager.format_prompt(
|
||||||
@@ -120,6 +140,8 @@ class ToolExecutor:
|
|||||||
bot_name=bot_name,
|
bot_name=bot_name,
|
||||||
time_now=time_now,
|
time_now=time_now,
|
||||||
tool_history=tool_history,
|
tool_history=tool_history,
|
||||||
|
personality_core=personality_core,
|
||||||
|
personality_side=personality_side,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.debug(f"{self.log_prefix}开始LLM工具调用分析")
|
logger.debug(f"{self.log_prefix}开始LLM工具调用分析")
|
||||||
|
|||||||
@@ -239,9 +239,9 @@ max_context_emojis = 30 # 每次随机传递给LLM的表情包详细描述的最
|
|||||||
# 新一代记忆系统:基于知识图谱 + 语义向量的混合记忆架构
|
# 新一代记忆系统:基于知识图谱 + 语义向量的混合记忆架构
|
||||||
# 替代旧的 enhanced memory 系统
|
# 替代旧的 enhanced memory 系统
|
||||||
|
|
||||||
[memory_graph]
|
[memory]
|
||||||
# === 基础配置 ===
|
# === 基础配置 ===
|
||||||
enable = true # 是否启用记忆图系统
|
enable = true # 是否启用记忆系统
|
||||||
data_dir = "data/memory_graph" # 记忆数据存储目录
|
data_dir = "data/memory_graph" # 记忆数据存储目录
|
||||||
|
|
||||||
# === 向量存储配置 ===
|
# === 向量存储配置 ===
|
||||||
|
|||||||
Reference in New Issue
Block a user