diff --git a/src/chat/memory_system/__init__.py b/src/chat/memory_system/__init__.py index 814017e41..75daf0fb2 100644 --- a/src/chat/memory_system/__init__.py +++ b/src/chat/memory_system/__init__.py @@ -51,14 +51,6 @@ from .enhanced_memory_activator import ( enhanced_memory_activator ) -# 格式化器 -from .memory_formatter import ( - MemoryFormatter, - FormatterConfig, - format_memories_for_llm, - format_memories_bracket_style -) - # 兼容性别名 from .memory_chunk import MemoryChunk as Memory @@ -98,12 +90,6 @@ __all__ = [ "MemoryActivator", "memory_activator", "enhanced_memory_activator", # 兼容性别名 - - # 格式化器 - "MemoryFormatter", - "FormatterConfig", - "format_memories_for_llm", - "format_memories_bracket_style", ] # 版本信息 diff --git a/src/chat/memory_system/memory_formatter.py b/src/chat/memory_system/memory_formatter.py deleted file mode 100644 index 87339c823..000000000 --- a/src/chat/memory_system/memory_formatter.py +++ /dev/null @@ -1,331 +0,0 @@ -# -*- coding: utf-8 -*- -""" -记忆格式化器 -将召回的记忆转化为LLM友好的Markdown格式 -""" - -from typing import List, Dict, Any, Optional -from datetime import datetime -from dataclasses import dataclass - -from src.common.logger import get_logger -from src.chat.memory_system.memory_chunk import MemoryChunk, MemoryType - -logger = get_logger(__name__) - - -@dataclass -class FormatterConfig: - """格式化器配置""" - include_timestamps: bool = True # 是否包含时间信息 - include_memory_types: bool = True # 是否包含记忆类型 - include_confidence: bool = False # 是否包含置信度信息 - max_display_length: int = 200 # 单条记忆最大显示长度 - datetime_format: str = "%Y年%m月%d日" # 时间格式 - use_emoji_icons: bool = True # 是否使用emoji图标 - group_by_type: bool = False # 是否按类型分组 - use_bracket_format: bool = False # 是否使用方括号格式 [类型] 内容 - compact_format: bool = False # 是否使用紧凑格式 - - -class MemoryFormatter: - """记忆格式化器 - 将记忆转化为提示词友好的格式""" - - # 记忆类型对应的emoji图标 - TYPE_EMOJI_MAP = { - MemoryType.PERSONAL_FACT: "👤", - MemoryType.EVENT: "📅", - MemoryType.PREFERENCE: "❤️", - MemoryType.OPINION: "💭", - MemoryType.RELATIONSHIP: "👥", - MemoryType.EMOTION: "😊", - MemoryType.KNOWLEDGE: "📚", - MemoryType.SKILL: "🛠️", - MemoryType.GOAL: "🎯", - MemoryType.EXPERIENCE: "🌟", - MemoryType.CONTEXTUAL: "💬" - } - - # 记忆类型的中文标签 - 优化格式 - TYPE_LABELS = { - MemoryType.PERSONAL_FACT: "个人事实", - MemoryType.EVENT: "事件", - MemoryType.PREFERENCE: "偏好", - MemoryType.OPINION: "观点", - MemoryType.RELATIONSHIP: "关系", - MemoryType.EMOTION: "情感", - MemoryType.KNOWLEDGE: "知识", - MemoryType.SKILL: "技能", - MemoryType.GOAL: "目标", - MemoryType.EXPERIENCE: "经验", - MemoryType.CONTEXTUAL: "上下文" - } - - def __init__(self, config: Optional[FormatterConfig] = None): - self.config = config or FormatterConfig() - - def format_memories_for_prompt( - self, - memories: List[MemoryChunk], - query_context: Optional[str] = None - ) -> str: - """ - 将记忆列表格式化为LLM提示词 - - Args: - memories: 记忆列表 - query_context: 查询上下文(可选) - - Returns: - 格式化的Markdown文本 - """ - if not memories: - return "" - - lines = ["## 🧠 相关记忆回顾", ""] - - if self.config.group_by_type: - lines.extend(self._format_memories_by_type(memories)) - else: - lines.extend(self._format_memories_chronologically(memories)) - - return "\n".join(lines) - - def _format_memories_by_type(self, memories: List[MemoryChunk]) -> List[str]: - """按类型分组格式化记忆""" - # 按类型分组 - grouped_memories = {} - for memory in memories: - memory_type = memory.memory_type - if memory_type not in grouped_memories: - grouped_memories[memory_type] = [] - grouped_memories[memory_type].append(memory) - - lines = [] - - # 为每个类型生成格式化文本 - for memory_type, type_memories in grouped_memories.items(): - emoji = self.TYPE_EMOJI_MAP.get(memory_type, "📝") - label = self.TYPE_LABELS.get(memory_type, memory_type.value) - - lines.extend([ - f"### {emoji} {label}", - "" - ]) - - for memory in type_memories: - formatted_item = self._format_single_memory(memory, include_type=False) - lines.append(formatted_item) - - lines.append("") # 类型间空行 - - return lines - - def _format_memories_chronologically(self, memories: List[MemoryChunk]) -> List[str]: - """按时间顺序格式化记忆""" - lines = [] - - for i, memory in enumerate(memories, 1): - formatted_item = self._format_single_memory(memory, include_type=True, index=i) - lines.append(formatted_item) - - return lines - - def _format_single_memory( - self, - memory: MemoryChunk, - include_type: bool = True, - index: Optional[int] = None - ) -> str: - """格式化单条记忆""" - # 如果启用方括号格式,使用新格式 - if self.config.use_bracket_format: - return self._format_single_memory_bracket(memory) - - # 获取显示文本 - display_text = memory.display or memory.text_content - if len(display_text) > self.config.max_display_length: - display_text = display_text[:self.config.max_display_length - 3] + "..." - - # 构建前缀 - prefix_parts = [] - - # 添加序号 - if index is not None: - prefix_parts.append(f"{index}.") - - # 添加类型标签 - if include_type and self.config.include_memory_types: - if self.config.use_emoji_icons: - emoji = self.TYPE_EMOJI_MAP.get(memory.memory_type, "📝") - prefix_parts.append(f"**{emoji}") - else: - label = self.TYPE_LABELS.get(memory.memory_type, memory.memory_type.value) - prefix_parts.append(f"**[{label}]") - - # 添加时间信息 - if self.config.include_timestamps: - timestamp = memory.metadata.created_at - if timestamp > 0: - dt = datetime.fromtimestamp(timestamp) - time_str = dt.strftime(self.config.datetime_format) - if self.config.use_emoji_icons: - prefix_parts.append(f"⏰ {time_str}") - else: - prefix_parts.append(f"({time_str})") - - # 添加置信度信息 - if self.config.include_confidence: - confidence = memory.metadata.confidence.value - confidence_stars = "★" * confidence + "☆" * (4 - confidence) - prefix_parts.append(f"信度:{confidence_stars}") - - # 构建完整格式 - if prefix_parts: - if self.config.include_memory_types and self.config.use_emoji_icons: - prefix = " ".join(prefix_parts) + "** " - else: - prefix = " ".join(prefix_parts) + " " - return f"- {prefix}{display_text}" - else: - return f"- {display_text}" - - def _format_single_memory_bracket(self, memory: MemoryChunk) -> str: - """格式化单条记忆 - 使用方括号格式 [类型] 内容""" - # 获取显示文本 - display_text = memory.display or memory.text_content - - # 如果启用紧凑格式,只显示核心内容 - if self.config.compact_format: - if len(display_text) > self.config.max_display_length: - display_text = display_text[:self.config.max_display_length - 3] + "..." - else: - # 非紧凑格式可以包含时间信息 - if self.config.include_timestamps: - timestamp = memory.metadata.created_at - if timestamp > 0: - dt = datetime.fromtimestamp(timestamp) - time_str = dt.strftime("%Y年%m月%d日") - # 将时间信息自然地整合到内容中 - if "在" not in display_text and "当" not in display_text: - display_text = f"在{time_str},{display_text}" - - # 获取类型标签 - label = self.TYPE_LABELS.get(memory.memory_type, memory.memory_type.value) - - # 构建方括号格式: **[类型]** 内容 - return f"- **[{label}]** {display_text}" - - def format_memory_summary(self, memories: List[MemoryChunk]) -> str: - """生成记忆摘要统计""" - if not memories: - return "暂无相关记忆。" - - # 统计信息 - total_count = len(memories) - type_counts = {} - - for memory in memories: - memory_type = memory.memory_type - type_counts[memory_type] = type_counts.get(memory_type, 0) + 1 - - # 生成摘要 - lines = [f"**记忆摘要**: 共找到 {total_count} 条相关记忆"] - - if len(type_counts) > 1: - type_summaries = [] - for memory_type, count in type_counts.items(): - emoji = self.TYPE_EMOJI_MAP.get(memory_type, "📝") - label = self.TYPE_LABELS.get(memory_type, memory_type.value) - type_summaries.append(f"{emoji}{label} {count}条") - - lines.append(f"包括: {', '.join(type_summaries)}") - - return " | ".join(lines) - - def format_for_debug(self, memories: List[MemoryChunk]) -> str: - """生成调试格式的记忆列表""" - if not memories: - return "无记忆数据" - - lines = ["### 记忆调试信息", ""] - - for i, memory in enumerate(memories, 1): - lines.extend([ - f"**记忆 {i}** (ID: {memory.memory_id[:8]})", - f"- 类型: {memory.memory_type.value}", - f"- 内容: {memory.display[:100]}{'...' if len(memory.display) > 100 else ''}", - f"- 访问次数: {memory.metadata.access_count}", - f"- 置信度: {memory.metadata.confidence.value}/4", - f"- 重要性: {memory.metadata.importance.value}/4", - f"- 创建时间: {datetime.fromtimestamp(memory.metadata.created_at).strftime('%Y-%m-%d %H:%M')}", - "" - ]) - - return "\n".join(lines) - - -# 创建默认格式化器实例 -default_formatter = MemoryFormatter() - - -def format_memories_for_llm( - memories: List[MemoryChunk], - query_context: Optional[str] = None, - config: Optional[FormatterConfig] = None -) -> str: - """ - 便捷函数:将记忆格式化为LLM提示词 - """ - if config: - formatter = MemoryFormatter(config) - else: - formatter = default_formatter - - return formatter.format_memories_for_prompt(memories, query_context) - - -def format_memory_summary( - memories: List[MemoryChunk], - config: Optional[FormatterConfig] = None -) -> str: - """ - 便捷函数:生成记忆摘要 - """ - if config: - formatter = MemoryFormatter(config) - else: - formatter = default_formatter - - return formatter.format_memory_summary(memories) - - -def format_memories_bracket_style( - memories: List[MemoryChunk], - query_context: Optional[str] = None, - compact: bool = True, - include_timestamps: bool = True -) -> str: - """ - 便捷函数:使用方括号格式格式化记忆 - - Args: - memories: 记忆列表 - query_context: 查询上下文 - compact: 是否使用紧凑格式 - include_timestamps: 是否包含时间信息 - - Returns: - 格式化的Markdown文本 - """ - config = FormatterConfig( - use_bracket_format=True, - compact_format=compact, - include_timestamps=include_timestamps, - include_memory_types=True, - use_emoji_icons=False, - group_by_type=False - ) - - formatter = MemoryFormatter(config) - return formatter.format_memories_for_prompt(memories, query_context) \ No newline at end of file diff --git a/src/chat/memory_system/memory_metadata_index.py b/src/chat/memory_system/memory_metadata_index.py index 387f95bfb..32104ffab 100644 --- a/src/chat/memory_system/memory_metadata_index.py +++ b/src/chat/memory_system/memory_metadata_index.py @@ -200,7 +200,8 @@ class MemoryMetadataIndex: created_after: Optional[float] = None, created_before: Optional[float] = None, user_id: Optional[str] = None, - limit: Optional[int] = None + limit: Optional[int] = None, + flexible_mode: bool = True # 新增:灵活匹配模式 ) -> List[str]: """ 搜索符合条件的记忆ID列表(支持模糊匹配) @@ -209,96 +210,275 @@ class MemoryMetadataIndex: List[str]: 符合条件的 memory_id 列表 """ with self.lock: - # 初始候选集(所有记忆) - candidate_ids: Optional[Set[str]] = None - - # 用户过滤(必选) - if user_id: - candidate_ids = { - mid for mid, entry in self.index.items() - if entry.user_id == user_id - } + if flexible_mode: + return self._search_flexible( + memory_types=memory_types, + subjects=subjects, + keywords=keywords, # 保留用于兼容性 + tags=tags, # 保留用于兼容性 + created_after=created_after, + created_before=created_before, + user_id=user_id, + limit=limit + ) else: - candidate_ids = set(self.index.keys()) - - # 类型过滤(OR关系) + return self._search_strict( + memory_types=memory_types, + subjects=subjects, + keywords=keywords, + tags=tags, + importance_min=importance_min, + importance_max=importance_max, + created_after=created_after, + created_before=created_before, + user_id=user_id, + limit=limit + ) + + def _search_flexible( + self, + memory_types: Optional[List[str]] = None, + subjects: Optional[List[str]] = None, + created_after: Optional[float] = None, + created_before: Optional[float] = None, + user_id: Optional[str] = None, + limit: Optional[int] = None, + **kwargs # 接受但不使用的参数 + ) -> List[str]: + """ + 灵活搜索模式:2/4项匹配即可,支持部分匹配 + + 评分维度: + 1. 记忆类型匹配 (0-1分) + 2. 主语匹配 (0-1分) + 3. 宾语匹配 (0-1分) + 4. 时间范围匹配 (0-1分) + + 总分 >= 2分即视为有效 + """ + # 用户过滤(必选) + if user_id: + base_candidates = { + mid for mid, entry in self.index.items() + if entry.user_id == user_id + } + else: + base_candidates = set(self.index.keys()) + + scored_candidates = [] + + for memory_id in base_candidates: + entry = self.index[memory_id] + score = 0 + match_details = [] + + # 1. 记忆类型匹配 if memory_types: - type_ids = set() + type_score = 0 for mtype in memory_types: - type_ids.update(self.type_index.get(mtype, set())) - candidate_ids &= type_ids - - # 主语过滤(OR关系,支持模糊匹配) + if entry.memory_type == mtype: + type_score = 1 + break + # 部分匹配:类型名称包含 + if mtype.lower() in entry.memory_type.lower() or entry.memory_type.lower() in mtype.lower(): + type_score = 0.5 + break + score += type_score + if type_score > 0: + match_details.append(f"类型:{entry.memory_type}") + else: + match_details.append("类型:未指定") + + # 2. 主语匹配(支持部分匹配) if subjects: - subject_ids = set() + subject_score = 0 for subject in subjects: subject_norm = subject.strip().lower() - # 精确匹配 - if subject_norm in self.subject_index: - subject_ids.update(self.subject_index[subject_norm]) - # 模糊匹配(包含) - for indexed_subject, ids in self.subject_index.items(): - if subject_norm in indexed_subject or indexed_subject in subject_norm: - subject_ids.update(ids) - candidate_ids &= subject_ids - - # 关键词过滤(OR关系,支持模糊匹配) - if keywords: - keyword_ids = set() - for keyword in keywords: - keyword_norm = keyword.strip().lower() - # 精确匹配 - if keyword_norm in self.keyword_index: - keyword_ids.update(self.keyword_index[keyword_norm]) - # 模糊匹配(包含) - for indexed_keyword, ids in self.keyword_index.items(): - if keyword_norm in indexed_keyword or indexed_keyword in keyword_norm: - keyword_ids.update(ids) - candidate_ids &= keyword_ids - - # 标签过滤(OR关系) - if tags: - tag_ids = set() - for tag in tags: - tag_norm = tag.strip().lower() - tag_ids.update(self.tag_index.get(tag_norm, set())) - candidate_ids &= tag_ids - - # 重要性过滤 - if importance_min is not None or importance_max is not None: - importance_ids = { - mid for mid in candidate_ids - if (importance_min is None or self.index[mid].importance >= importance_min) - and (importance_max is None or self.index[mid].importance <= importance_max) - } - candidate_ids &= importance_ids - - # 时间范围过滤 + for entry_subject in entry.subjects: + entry_subject_norm = entry_subject.strip().lower() + # 精确匹配 + if subject_norm == entry_subject_norm: + subject_score = 1 + break + # 部分匹配:包含关系 + if subject_norm in entry_subject_norm or entry_subject_norm in subject_norm: + subject_score = 0.6 + break + if subject_score == 1: + break + score += subject_score + if subject_score > 0: + match_details.append("主语:匹配") + else: + match_details.append("主语:未指定") + + # 3. 宾语匹配(支持部分匹配) + object_score = 0 + if entry.objects: + for entry_object in entry.objects: + entry_object_norm = str(entry_object).strip().lower() + # 检查是否与主语相关(主宾关联) + for subject in subjects or []: + subject_norm = subject.strip().lower() + if subject_norm in entry_object_norm or entry_object_norm in subject_norm: + object_score = 0.8 + match_details.append("宾语:主宾关联") + break + if object_score > 0: + break + + score += object_score + if object_score > 0: + match_details.append("宾语:匹配") + elif not entry.objects: + match_details.append("宾语:无") + + # 4. 时间范围匹配 + time_score = 0 if created_after is not None or created_before is not None: - time_ids = { - mid for mid in candidate_ids - if (created_after is None or self.index[mid].created_at >= created_after) - and (created_before is None or self.index[mid].created_at <= created_before) - } - candidate_ids &= time_ids - - # 转换为列表并排序(按创建时间倒序) - result_ids = sorted( - candidate_ids, - key=lambda mid: self.index[mid].created_at, - reverse=True - ) - - # 限制数量 - if limit: - result_ids = result_ids[:limit] - - logger.debug( - f"元数据索引搜索: types={memory_types}, subjects={subjects}, " - f"keywords={keywords}, 返回={len(result_ids)}条" - ) - - return result_ids + time_match = True + if created_after is not None and entry.created_at < created_after: + time_match = False + if created_before is not None and entry.created_at > created_before: + time_match = False + if time_match: + time_score = 1 + match_details.append("时间:匹配") + else: + match_details.append("时间:不匹配") + else: + match_details.append("时间:未指定") + + score += time_score + + # 只有总分 >= 2 的记忆才会被返回 + if score >= 2: + scored_candidates.append((memory_id, score, match_details)) + + # 按分数和时间排序 + scored_candidates.sort(key=lambda x: (x[1], self.index[x[0]].created_at), reverse=True) + + if limit: + result_ids = [mid for mid, _, _ in scored_candidates[:limit]] + else: + result_ids = [mid for mid, _, _ in scored_candidates] + + logger.debug( + f"[灵活搜索] 过滤条件: types={memory_types}, subjects={subjects}, " + f"time_range=[{created_after}, {created_before}], 返回={len(result_ids)}条" + ) + + # 记录匹配统计 + if scored_candidates and len(scored_candidates) > 0: + avg_score = sum(score for _, score, _ in scored_candidates) / len(scored_candidates) + logger.debug(f"[灵活搜索] 平均匹配分数: {avg_score:.2f}, 最高分: {scored_candidates[0][1]:.2f}") + + return result_ids + + def _search_strict( + self, + memory_types: Optional[List[str]] = None, + subjects: Optional[List[str]] = None, + keywords: Optional[List[str]] = None, + tags: Optional[List[str]] = None, + importance_min: Optional[int] = None, + importance_max: Optional[int] = None, + created_after: Optional[float] = None, + created_before: Optional[float] = None, + user_id: Optional[str] = None, + limit: Optional[int] = None + ) -> List[str]: + """严格搜索模式(原有逻辑)""" + # 初始候选集(所有记忆) + candidate_ids: Optional[Set[str]] = None + + # 用户过滤(必选) + if user_id: + candidate_ids = { + mid for mid, entry in self.index.items() + if entry.user_id == user_id + } + else: + candidate_ids = set(self.index.keys()) + + # 类型过滤(OR关系) + if memory_types: + type_ids = set() + for mtype in memory_types: + type_ids.update(self.type_index.get(mtype, set())) + candidate_ids &= type_ids + + # 主语过滤(OR关系,支持模糊匹配) + if subjects: + subject_ids = set() + for subject in subjects: + subject_norm = subject.strip().lower() + # 精确匹配 + if subject_norm in self.subject_index: + subject_ids.update(self.subject_index[subject_norm]) + # 模糊匹配(包含) + for indexed_subject, ids in self.subject_index.items(): + if subject_norm in indexed_subject or indexed_subject in subject_norm: + subject_ids.update(ids) + candidate_ids &= subject_ids + + # 关键词过滤(OR关系,支持模糊匹配) + if keywords: + keyword_ids = set() + for keyword in keywords: + keyword_norm = keyword.strip().lower() + # 精确匹配 + if keyword_norm in self.keyword_index: + keyword_ids.update(self.keyword_index[keyword_norm]) + # 模糊匹配(包含) + for indexed_keyword, ids in self.keyword_index.items(): + if keyword_norm in indexed_keyword or indexed_keyword in keyword_norm: + keyword_ids.update(ids) + candidate_ids &= keyword_ids + + # 标签过滤(OR关系) + if tags: + tag_ids = set() + for tag in tags: + tag_norm = tag.strip().lower() + tag_ids.update(self.tag_index.get(tag_norm, set())) + candidate_ids &= tag_ids + + # 重要性过滤 + if importance_min is not None or importance_max is not None: + importance_ids = { + mid for mid in candidate_ids + if (importance_min is None or self.index[mid].importance >= importance_min) + and (importance_max is None or self.index[mid].importance <= importance_max) + } + candidate_ids &= importance_ids + + # 时间范围过滤 + if created_after is not None or created_before is not None: + time_ids = { + mid for mid in candidate_ids + if (created_after is None or self.index[mid].created_at >= created_after) + and (created_before is None or self.index[mid].created_at <= created_before) + } + candidate_ids &= time_ids + + # 转换为列表并排序(按创建时间倒序) + result_ids = sorted( + candidate_ids, + key=lambda mid: self.index[mid].created_at, + reverse=True + ) + + # 限制数量 + if limit: + result_ids = result_ids[:limit] + + logger.debug( + f"[严格搜索] types={memory_types}, subjects={subjects}, " + f"keywords={keywords}, 返回={len(result_ids)}条" + ) + + return result_ids def get_entry(self, memory_id: str) -> Optional[MemoryMetadataIndexEntry]: """获取单个索引条目""" diff --git a/src/chat/memory_system/vector_memory_storage_v2.py b/src/chat/memory_system/vector_memory_storage_v2.py index 51b37acae..6c590d888 100644 --- a/src/chat/memory_system/vector_memory_storage_v2.py +++ b/src/chat/memory_system/vector_memory_storage_v2.py @@ -24,12 +24,63 @@ import numpy as np from src.common.logger import get_logger from src.common.vector_db import vector_db_service from src.chat.utils.utils import get_embedding -from src.chat.memory_system.memory_chunk import MemoryChunk +from src.chat.memory_system.memory_chunk import MemoryChunk, ConfidenceLevel, ImportanceLevel from src.chat.memory_system.memory_forgetting_engine import MemoryForgettingEngine from src.chat.memory_system.memory_metadata_index import MemoryMetadataIndex, MemoryMetadataIndexEntry logger = get_logger(__name__) +# 全局枚举映射表缓存 +_ENUM_MAPPINGS_CACHE = {} + +def _build_enum_mapping(enum_class: type) -> Dict[str, Any]: + """构建枚举类的完整映射表 + + Args: + enum_class: 枚举类 + + Returns: + Dict[str, Any]: 包含各种映射格式的字典 + """ + cache_key = f"{enum_class.__module__}.{enum_class.__name__}" + + # 如果已经缓存过,直接返回 + if cache_key in _ENUM_MAPPINGS_CACHE: + return _ENUM_MAPPINGS_CACHE[cache_key] + + mapping = { + "name_to_enum": {}, # 枚举名称 -> 枚举实例 (HIGH -> ImportanceLevel.HIGH) + "value_to_enum": {}, # 整数值 -> 枚举实例 (3 -> ImportanceLevel.HIGH) + "value_str_to_enum": {}, # 字符串value -> 枚举实例 ("3" -> ImportanceLevel.HIGH) + "enum_value_to_name": {}, # 枚举实例 -> 名称映射 (反向) + "all_possible_strings": set(), # 所有可能的字符串表示 + } + + for member in enum_class: + # 名称映射 (支持大小写) + mapping["name_to_enum"][member.name] = member + mapping["name_to_enum"][member.name.lower()] = member + mapping["name_to_enum"][member.name.upper()] = member + + # 值映射 + mapping["value_to_enum"][member.value] = member + mapping["value_str_to_enum"][str(member.value)] = member + + # 反向映射 + mapping["enum_value_to_name"][member] = member.name + + # 收集所有可能的字符串表示 + mapping["all_possible_strings"].add(member.name) + mapping["all_possible_strings"].add(member.name.lower()) + mapping["all_possible_strings"].add(member.name.upper()) + mapping["all_possible_strings"].add(str(member.value)) + + # 缓存结果 + _ENUM_MAPPINGS_CACHE[cache_key] = mapping + logger.debug(f"构建枚举映射表: {enum_class.__name__} -> {len(mapping['name_to_enum'])} 个名称映射, {len(mapping['value_to_enum'])} 个值映射") + + return mapping + @dataclass class VectorStorageConfig: @@ -294,8 +345,8 @@ class VectorMemoryStorage: "last_modified": metadata.get("timestamp", time.time()), "access_count": metadata.get("access_count", 0), "relevance_score": 0.0, - "confidence": int(metadata.get("confidence", 2)), # MEDIUM - "importance": int(metadata.get("importance", 2)), # NORMAL + "confidence": self._parse_enum_value(metadata.get("confidence", 2), ConfidenceLevel, ConfidenceLevel.MEDIUM), + "importance": self._parse_enum_value(metadata.get("importance", 2), ImportanceLevel, ImportanceLevel.NORMAL), "source_context": None, }, "content": { @@ -313,12 +364,76 @@ class VectorMemoryStorage: "related_memories": [], "temporal_context": None } - + return MemoryChunk.from_dict(memory_dict) - + except Exception as e: logger.error(f"转换Vector结果到MemoryChunk失败: {e}", exc_info=True) return None + + def _parse_enum_value(self, value: Any, enum_class: type, default: Any) -> Any: + """解析枚举值,支持字符串、整数和枚举实例 + + Args: + value: 要解析的值(可能是字符串、整数或枚举实例) + enum_class: 目标枚举类 + default: 默认值 + + Returns: + 解析后的枚举实例 + """ + if value is None: + return default + + # 如果已经是枚举实例,直接返回 + if isinstance(value, enum_class): + return value + + # 如果是整数,尝试按value值匹配 + if isinstance(value, int): + try: + for member in enum_class: + if member.value == value: + return member + # 如果没找到匹配的,返回默认值 + logger.warning(f"无法找到{enum_class.__name__}中value={value}的枚举项,使用默认值") + return default + except Exception as e: + logger.warning(f"解析{enum_class.__name__}整数值{value}时出错: {e},使用默认值") + return default + + # 如果是字符串,尝试按名称或value值匹配 + if isinstance(value, str): + str_value = value.strip().upper() + + # 先尝试按枚举名称匹配 + try: + if hasattr(enum_class, str_value): + return getattr(enum_class, str_value) + except AttributeError: + pass + + # 再尝试按value值匹配(如果value是字符串形式的数字) + try: + int_value = int(str_value) + return self._parse_enum_value(int_value, enum_class, default) + except ValueError: + pass + + # 最后尝试按小写名称匹配 + try: + for member in enum_class: + if member.value.upper() == str_value: + return member + logger.warning(f"无法找到{enum_class.__name__}中名称或value为'{value}'的枚举项,使用默认值") + return default + except Exception as e: + logger.warning(f"解析{enum_class.__name__}字符串值'{value}'时出错: {e},使用默认值") + return default + + # 其他类型,返回默认值 + logger.warning(f"不支持的{enum_class.__name__}值类型: {type(value)},使用默认值") + return default def _get_from_cache(self, memory_id: str) -> Optional[MemoryChunk]: """从缓存获取记忆""" @@ -518,14 +633,19 @@ class VectorMemoryStorage: created_after=metadata_filters.get('created_after'), created_before=metadata_filters.get('created_before'), user_id=metadata_filters.get('user_id'), - limit=self.config.search_limit * 2 # 粗筛返回更多候选 + limit=self.config.search_limit * 2, # 粗筛返回更多候选 + flexible_mode=True # 使用灵活匹配模式 ) logger.info(f"[JSON元数据粗筛] 完成,筛选出 {len(candidate_ids)} 个候选ID") - - # 如果粗筛后没有结果,直接返回 + + # 如果粗筛后没有结果,回退到全部记忆搜索 if not candidate_ids: - logger.warning("JSON元数据粗筛后无候选,返回空结果") - return [] + total_memories = len(self.metadata_index.index) + logger.warning(f"JSON元数据粗筛后无候选,启用回退机制:在全部 {total_memories} 条记忆中进行向量搜索") + logger.info("💡 提示:这可能是因为查询条件过于严格,或相关记忆的元数据与查询条件不完全匹配") + candidate_ids = None # 设为None表示不限制候选ID + else: + logger.debug(f"[JSON元数据粗筛] 成功筛选出候选,进入向量精筛阶段") # === 阶段二:向量精筛 === # 生成查询向量 @@ -543,6 +663,8 @@ class VectorMemoryStorage: # ChromaDB的where条件需要使用$in操作符 where_conditions["memory_id"] = {"$in": candidate_ids} logger.debug(f"[向量精筛] 限制在 {len(candidate_ids)} 个候选ID内搜索") + else: + logger.info("[向量精筛] 在全部记忆中搜索(元数据筛选无结果回退)") # 查询Vector DB logger.debug(f"[向量精筛] 开始,limit={min(limit, self.config.search_limit)}") diff --git a/src/chat/replyer/default_generator.py b/src/chat/replyer/default_generator.py index de9e8f798..ecd57639b 100644 --- a/src/chat/replyer/default_generator.py +++ b/src/chat/replyer/default_generator.py @@ -18,6 +18,7 @@ from src.individuality.individuality import get_individuality from src.llm_models.utils_model import LLMRequest from src.chat.message_receive.message import UserInfo, Seg, MessageRecv, MessageSending from src.chat.message_receive.chat_stream import ChatStream +from src.chat.utils.memory_mappings import get_memory_type_chinese_label from src.chat.message_receive.uni_message_sender import HeartFCSender from src.chat.utils.timer_calculator import Timer from src.chat.utils.utils import get_chat_type_and_target_info @@ -621,20 +622,6 @@ class DefaultReplyer: running_memories = [] instant_memory = "" - def _format_confidence_label(value: Optional[float]) -> str: - if value is None: - return "未知" - mapping = {4: "已验证", 3: "高", 2: "中等", 1: "较低"} - rounded = int(value) - return mapping.get(rounded, f"{value:.2f}") - - def _format_importance_label(value: Optional[float]) -> str: - if value is None: - return "未知" - mapping = {4: "关键", 3: "高", 2: "一般", 1: "较低"} - rounded = int(value) - return mapping.get(rounded, f"{value:.2f}") - # 构建记忆字符串,使用方括号格式 memory_str = "" has_any_memory = False @@ -662,16 +649,8 @@ class DefaultReplyer: logger.debug(f"[记忆构建] 空记忆详情: {running_memory}") continue - # 映射记忆类型到中文标签 - type_mapping = { - "personal_fact": "个人事实", - "preference": "偏好", - "event": "事件", - "opinion": "观点", - "relationship": "个人事实", - "unknown": "未知" - } - chinese_type = type_mapping.get(memory_type, "未知") + # 使用全局记忆类型映射表 + chinese_type = get_memory_type_chinese_label(memory_type) # 提取纯净内容(如果包含旧格式的元数据) clean_content = content diff --git a/src/chat/utils/memory_mappings.py b/src/chat/utils/memory_mappings.py new file mode 100644 index 000000000..79ce50ade --- /dev/null +++ b/src/chat/utils/memory_mappings.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- +""" +记忆系统相关的映射表和工具函数 +提供记忆类型、置信度、重要性等的中文标签映射 +""" + +# 记忆类型到中文标签的完整映射表 +MEMORY_TYPE_CHINESE_MAPPING = { + "personal_fact": "个人事实", + "preference": "偏好", + "event": "事件", + "opinion": "观点", + "relationship": "人际关系", + "emotion": "情感状态", + "knowledge": "知识信息", + "skill": "技能能力", + "goal": "目标计划", + "experience": "经验教训", + "contextual": "上下文信息", + "unknown": "未知" +} + +# 置信度等级到中文标签的映射表 +CONFIDENCE_LEVEL_CHINESE_MAPPING = { + 1: "低置信度", + 2: "中等置信度", + 3: "高置信度", + 4: "已验证", + "LOW": "低置信度", + "MEDIUM": "中等置信度", + "HIGH": "高置信度", + "VERIFIED": "已验证", + "unknown": "未知" +} + +# 重要性等级到中文标签的映射表 +IMPORTANCE_LEVEL_CHINESE_MAPPING = { + 1: "低重要性", + 2: "一般重要性", + 3: "高重要性", + 4: "关键重要性", + "LOW": "低重要性", + "NORMAL": "一般重要性", + "HIGH": "高重要性", + "CRITICAL": "关键重要性", + "unknown": "未知" +} + + +def get_memory_type_chinese_label(memory_type: str) -> str: + """获取记忆类型的中文标签 + + Args: + memory_type: 记忆类型字符串 + + Returns: + str: 对应的中文标签,如果找不到则返回"未知" + """ + return MEMORY_TYPE_CHINESE_MAPPING.get(memory_type, "未知") + + +def get_confidence_level_chinese_label(level) -> str: + """获取置信度等级的中文标签 + + Args: + level: 置信度等级(可以是数字、字符串或枚举实例) + + Returns: + str: 对应的中文标签,如果找不到则返回"未知" + """ + # 处理枚举实例 + if hasattr(level, 'value'): + level = level.value + + # 处理数字 + if isinstance(level, int): + return CONFIDENCE_LEVEL_CHINESE_MAPPING.get(level, "未知") + + # 处理字符串 + if isinstance(level, str): + level_upper = level.upper() + return CONFIDENCE_LEVEL_CHINESE_MAPPING.get(level_upper, "未知") + + return "未知" + + +def get_importance_level_chinese_label(level) -> str: + """获取重要性等级的中文标签 + + Args: + level: 重要性等级(可以是数字、字符串或枚举实例) + + Returns: + str: 对应的中文标签,如果找不到则返回"未知" + """ + # 处理枚举实例 + if hasattr(level, 'value'): + level = level.value + + # 处理数字 + if isinstance(level, int): + return IMPORTANCE_LEVEL_CHINESE_MAPPING.get(level, "未知") + + # 处理字符串 + if isinstance(level, str): + level_upper = level.upper() + return IMPORTANCE_LEVEL_CHINESE_MAPPING.get(level_upper, "未知") + + return "未知" \ No newline at end of file