feat(memory): 实现灵活搜索模式并重构记忆格式化系统

- 新增灵活匹配模式(flexible_mode)，支持2/4项匹配即可的记忆检索策略 - 删除冗余的memory_formatter模块，简化记忆系统架构 - 增强枚举值解析机制，支持字符串、整数和枚举实例的自动转换 - 优化元数据索引搜索逻辑，分离严格模式和灵活模式的实现路径 - 改进向量存储的搜索回退机制，当元数据筛选无结果时自动回退到全量搜索 - 统一记忆类型映射管理，避免重复的格式化函数定义这些变更提升了记忆检索的准确性和灵活性，同时简化了代码结构，提高了系统可维护性。
2025-10-02 11:27:06 +08:00
parent 82bb2df369
commit 1f2d7d9ee5
6 changed files with 508 additions and 463 deletions
--- a/src/chat/memory_system/init.py
+++ b/src/chat/memory_system/init.py
@@ -51,14 +51,6 @@ from .enhanced_memory_activator import (
    enhanced_memory_activator
 )
 # 格式化器
 from .memory_formatter import (
    MemoryFormatter,
    FormatterConfig,
    format_memories_for_llm,
    format_memories_bracket_style
 )
 # 兼容性别名
 from .memory_chunk import MemoryChunk as Memory
@@ -98,12 +90,6 @@ __all__ = [
    "MemoryActivator",
    "memory_activator",
    "enhanced_memory_activator",  # 兼容性别名
    # 格式化器
    "MemoryFormatter",
    "FormatterConfig",
    "format_memories_for_llm",
    "format_memories_bracket_style",
 ]
 # 版本信息
--- a/src/chat/memory_system/memory_formatter.py
+++ b/src/chat/memory_system/memory_formatter.py
@@ -1,331 +0,0 @@
 # -*- coding: utf-8 -*-
 """
 记忆格式化器
 将召回的记忆转化为LLM友好的Markdown格式
 """
 from typing import List, Dict, Any, Optional
 from datetime import datetime
 from dataclasses import dataclass
 from src.common.logger import get_logger
 from src.chat.memory_system.memory_chunk import MemoryChunk, MemoryType
 logger = get_logger(__name__)
@dataclass
 class FormatterConfig:
    """格式化器配置"""
    include_timestamps: bool = True      # 是否包含时间信息
    include_memory_types: bool = True    # 是否包含记忆类型
    include_confidence: bool = False     # 是否包含置信度信息
    max_display_length: int = 200       # 单条记忆最大显示长度
    datetime_format: str = "%Y年%m月%d日" # 时间格式
    use_emoji_icons: bool = True         # 是否使用emoji图标
    group_by_type: bool = False          # 是否按类型分组
    use_bracket_format: bool = False     # 是否使用方括号格式 [类型] 内容
    compact_format: bool = False         # 是否使用紧凑格式
 class MemoryFormatter:
    """记忆格式化器 - 将记忆转化为提示词友好的格式"""
    # 记忆类型对应的emoji图标
    TYPE_EMOJI_MAP = {
        MemoryType.PERSONAL_FACT: "👤",
        MemoryType.EVENT: "📅",
        MemoryType.PREFERENCE: "❤️",
        MemoryType.OPINION: "💭",
        MemoryType.RELATIONSHIP: "👥",
        MemoryType.EMOTION: "😊",
        MemoryType.KNOWLEDGE: "📚",
        MemoryType.SKILL: "🛠️",
        MemoryType.GOAL: "🎯",
        MemoryType.EXPERIENCE: "🌟",
        MemoryType.CONTEXTUAL: "💬"
    }
    # 记忆类型的中文标签 - 优化格式
    TYPE_LABELS = {
        MemoryType.PERSONAL_FACT: "个人事实",
        MemoryType.EVENT: "事件",
        MemoryType.PREFERENCE: "偏好",
        MemoryType.OPINION: "观点",
        MemoryType.RELATIONSHIP: "关系",
        MemoryType.EMOTION: "情感",
        MemoryType.KNOWLEDGE: "知识",
        MemoryType.SKILL: "技能",
        MemoryType.GOAL: "目标",
        MemoryType.EXPERIENCE: "经验",
        MemoryType.CONTEXTUAL: "上下文"
    }
    def __init__(self, config: Optional[FormatterConfig] = None):
        self.config = config or FormatterConfig()
    def format_memories_for_prompt(
        self,
        memories: List[MemoryChunk],
        query_context: Optional[str] = None
    ) -> str:
        """
        将记忆列表格式化为LLM提示词
        Args:
            memories: 记忆列表
            query_context: 查询上下文（可选）
        Returns:
            格式化的Markdown文本
        """
        if not memories:
            return ""
        lines = ["## 🧠 相关记忆回顾", ""]
        if self.config.group_by_type:
            lines.extend(self._format_memories_by_type(memories))
        else:
            lines.extend(self._format_memories_chronologically(memories))
        return "\n".join(lines)
    def _format_memories_by_type(self, memories: List[MemoryChunk]) -> List[str]:
        """按类型分组格式化记忆"""
        # 按类型分组
        grouped_memories = {}
        for memory in memories:
            memory_type = memory.memory_type
            if memory_type not in grouped_memories:
                grouped_memories[memory_type] = []
            grouped_memories[memory_type].append(memory)
        lines = []
        # 为每个类型生成格式化文本
        for memory_type, type_memories in grouped_memories.items():
            emoji = self.TYPE_EMOJI_MAP.get(memory_type, "📝")
            label = self.TYPE_LABELS.get(memory_type, memory_type.value)
            lines.extend([
                f"### {emoji} {label}",
                ""
            ])
            for memory in type_memories:
                formatted_item = self._format_single_memory(memory, include_type=False)
                lines.append(formatted_item)
            lines.append("")  # 类型间空行
        return lines
    def _format_memories_chronologically(self, memories: List[MemoryChunk]) -> List[str]:
        """按时间顺序格式化记忆"""
        lines = []
        for i, memory in enumerate(memories, 1):
            formatted_item = self._format_single_memory(memory, include_type=True, index=i)
            lines.append(formatted_item)
        return lines
    def _format_single_memory(
        self,
        memory: MemoryChunk,
        include_type: bool = True,
        index: Optional[int] = None
    ) -> str:
        """格式化单条记忆"""
        # 如果启用方括号格式，使用新格式
        if self.config.use_bracket_format:
            return self._format_single_memory_bracket(memory)
        # 获取显示文本
        display_text = memory.display or memory.text_content
        if len(display_text) > self.config.max_display_length:
            display_text = display_text[:self.config.max_display_length - 3] + "..."
        # 构建前缀
        prefix_parts = []
        # 添加序号
        if index is not None:
            prefix_parts.append(f"{index}.")
        # 添加类型标签
        if include_type and self.config.include_memory_types:
            if self.config.use_emoji_icons:
                emoji = self.TYPE_EMOJI_MAP.get(memory.memory_type, "📝")
                prefix_parts.append(f"**{emoji}")
            else:
                label = self.TYPE_LABELS.get(memory.memory_type, memory.memory_type.value)
                prefix_parts.append(f"**[{label}]")
        # 添加时间信息
        if self.config.include_timestamps:
            timestamp = memory.metadata.created_at
            if timestamp > 0:
                dt = datetime.fromtimestamp(timestamp)
                time_str = dt.strftime(self.config.datetime_format)
                if self.config.use_emoji_icons:
                    prefix_parts.append(f"⏰ {time_str}")
                else:
                    prefix_parts.append(f"({time_str})")
        # 添加置信度信息
        if self.config.include_confidence:
            confidence = memory.metadata.confidence.value
            confidence_stars = "★" * confidence + "☆" * (4 - confidence)
            prefix_parts.append(f"信度:{confidence_stars}")
        # 构建完整格式
        if prefix_parts:
            if self.config.include_memory_types and self.config.use_emoji_icons:
                prefix = " ".join(prefix_parts) + "** "
            else:
                prefix = " ".join(prefix_parts) + " "
            return f"- {prefix}{display_text}"
        else:
            return f"- {display_text}"
    def _format_single_memory_bracket(self, memory: MemoryChunk) -> str:
        """格式化单条记忆 - 使用方括号格式 [类型] 内容"""
        # 获取显示文本
        display_text = memory.display or memory.text_content
        # 如果启用紧凑格式，只显示核心内容
        if self.config.compact_format:
            if len(display_text) > self.config.max_display_length:
                display_text = display_text[:self.config.max_display_length - 3] + "..."
        else:
            # 非紧凑格式可以包含时间信息
            if self.config.include_timestamps:
                timestamp = memory.metadata.created_at
                if timestamp > 0:
                    dt = datetime.fromtimestamp(timestamp)
                    time_str = dt.strftime("%Y年%m月%d日")
                    # 将时间信息自然地整合到内容中
                    if "在" not in display_text and "当" not in display_text:
                        display_text = f"在{time_str}，{display_text}"
        # 获取类型标签
        label = self.TYPE_LABELS.get(memory.memory_type, memory.memory_type.value)
        # 构建方括号格式: **[类型]** 内容
        return f"- **[{label}]** {display_text}"
    def format_memory_summary(self, memories: List[MemoryChunk]) -> str:
        """生成记忆摘要统计"""
        if not memories:
            return "暂无相关记忆。"
        # 统计信息
        total_count = len(memories)
        type_counts = {}
        for memory in memories:
            memory_type = memory.memory_type
            type_counts[memory_type] = type_counts.get(memory_type, 0) + 1
        # 生成摘要
        lines = [f"**记忆摘要**: 共找到 {total_count} 条相关记忆"]
        if len(type_counts) > 1:
            type_summaries = []
            for memory_type, count in type_counts.items():
                emoji = self.TYPE_EMOJI_MAP.get(memory_type, "📝")
                label = self.TYPE_LABELS.get(memory_type, memory_type.value)
                type_summaries.append(f"{emoji}{label} {count}条")
            lines.append(f"包括: {', '.join(type_summaries)}")
        return " | ".join(lines)
    def format_for_debug(self, memories: List[MemoryChunk]) -> str:
        """生成调试格式的记忆列表"""
        if not memories:
            return "无记忆数据"
        lines = ["### 记忆调试信息", ""]
        for i, memory in enumerate(memories, 1):
            lines.extend([
                f"**记忆 {i}** (ID: {memory.memory_id[:8]})",
                f"- 类型: {memory.memory_type.value}",
                f"- 内容: {memory.display[:100]}{'...' if len(memory.display) > 100 else ''}",
                f"- 访问次数: {memory.metadata.access_count}",
                f"- 置信度: {memory.metadata.confidence.value}/4",
                f"- 重要性: {memory.metadata.importance.value}/4",
                f"- 创建时间: {datetime.fromtimestamp(memory.metadata.created_at).strftime('%Y-%m-%d %H:%M')}",
                ""
            ])
        return "\n".join(lines)
 # 创建默认格式化器实例
 default_formatter = MemoryFormatter()
 def format_memories_for_llm(
    memories: List[MemoryChunk],
    query_context: Optional[str] = None,
    config: Optional[FormatterConfig] = None
 ) -> str:
    """
    便捷函数：将记忆格式化为LLM提示词
    """
    if config:
        formatter = MemoryFormatter(config)
    else:
        formatter = default_formatter
    return formatter.format_memories_for_prompt(memories, query_context)
 def format_memory_summary(
    memories: List[MemoryChunk],
    config: Optional[FormatterConfig] = None
 ) -> str:
    """
    便捷函数：生成记忆摘要
    """
    if config:
        formatter = MemoryFormatter(config)
    else:
        formatter = default_formatter
    return formatter.format_memory_summary(memories)
 def format_memories_bracket_style(
    memories: List[MemoryChunk],
    query_context: Optional[str] = None,
    compact: bool = True,
    include_timestamps: bool = True
 ) -> str:
    """
    便捷函数：使用方括号格式格式化记忆
    Args:
        memories: 记忆列表
        query_context: 查询上下文
        compact: 是否使用紧凑格式
        include_timestamps: 是否包含时间信息
    Returns:
        格式化的Markdown文本
    """
    config = FormatterConfig(
        use_bracket_format=True,
        compact_format=compact,
        include_timestamps=include_timestamps,
        include_memory_types=True,
        use_emoji_icons=False,
        group_by_type=False
    )
    formatter = MemoryFormatter(config)
    return formatter.format_memories_for_prompt(memories, query_context)
--- a/src/chat/memory_system/memory_metadata_index.py
+++ b/src/chat/memory_system/memory_metadata_index.py
@@ -200,7 +200,8 @@ class MemoryMetadataIndex:
        created_after: Optional[float] = None,
        created_before: Optional[float] = None,
        user_id: Optional[str] = None,
-        limit: Optional[int] = None
+        limit: Optional[int] = None,
        flexible_mode: bool = True  # 新增：灵活匹配模式
    ) -> List[str]:
        """
        搜索符合条件的记忆ID列表（支持模糊匹配）
@@ -209,96 +210,275 @@ class MemoryMetadataIndex:
            List[str]: 符合条件的 memory_id 列表
        """
        with self.lock:
-            # 初始候选集（所有记忆）
+            if flexible_mode:
-            candidate_ids: Optional[Set[str]] = None
+                return self._search_flexible(
-            
+                    memory_types=memory_types,
-            # 用户过滤（必选）
+                    subjects=subjects,
-            if user_id:
+                    keywords=keywords,  # 保留用于兼容性
-                candidate_ids = {
+                    tags=tags,  # 保留用于兼容性
-                    mid for mid, entry in self.index.items()
+                    created_after=created_after,
-                    if entry.user_id == user_id
+                    created_before=created_before,
-                }
+                    user_id=user_id,
                    limit=limit
                )
            else:
-                candidate_ids = set(self.index.keys())
+                return self._search_strict(
-            
+                    memory_types=memory_types,
-            # 类型过滤（OR关系）
+                    subjects=subjects,
                    keywords=keywords,
                    tags=tags,
                    importance_min=importance_min,
                    importance_max=importance_max,
                    created_after=created_after,
                    created_before=created_before,
                    user_id=user_id,
                    limit=limit
                )
    def _search_flexible(
        self,
        memory_types: Optional[List[str]] = None,
        subjects: Optional[List[str]] = None,
        created_after: Optional[float] = None,
        created_before: Optional[float] = None,
        user_id: Optional[str] = None,
        limit: Optional[int] = None,
        **kwargs  # 接受但不使用的参数
    ) -> List[str]:
        """
        灵活搜索模式：2/4项匹配即可，支持部分匹配
        评分维度：
        1. 记忆类型匹配 (0-1分)
        2. 主语匹配 (0-1分)
        3. 宾语匹配 (0-1分)
        4. 时间范围匹配 (0-1分)
        总分 >= 2分即视为有效
        """
        # 用户过滤（必选）
        if user_id:
            base_candidates = {
                mid for mid, entry in self.index.items()
                if entry.user_id == user_id
            }
        else:
            base_candidates = set(self.index.keys())
        scored_candidates = []
        for memory_id in base_candidates:
            entry = self.index[memory_id]
            score = 0
            match_details = []
            # 1. 记忆类型匹配
            if memory_types:
-                type_ids = set()
+                type_score = 0
                for mtype in memory_types:
-                    type_ids.update(self.type_index.get(mtype, set()))
+                    if entry.memory_type == mtype:
-                candidate_ids &= type_ids
+                        type_score = 1
-            
+                        break
-            # 主语过滤（OR关系，支持模糊匹配）
+                    # 部分匹配：类型名称包含
                    if mtype.lower() in entry.memory_type.lower() or entry.memory_type.lower() in mtype.lower():
                        type_score = 0.5
                        break
                score += type_score
                if type_score > 0:
                    match_details.append(f"类型:{entry.memory_type}")
            else:
                match_details.append("类型:未指定")
            # 2. 主语匹配（支持部分匹配）
            if subjects:
-                subject_ids = set()
+                subject_score = 0
                for subject in subjects:
                    subject_norm = subject.strip().lower()
-                    # 精确匹配
+                    for entry_subject in entry.subjects:
-                    if subject_norm in self.subject_index:
+                        entry_subject_norm = entry_subject.strip().lower()
-                        subject_ids.update(self.subject_index[subject_norm])
+                        # 精确匹配
-                    # 模糊匹配（包含）
+                        if subject_norm == entry_subject_norm:
-                    for indexed_subject, ids in self.subject_index.items():
+                            subject_score = 1
-                        if subject_norm in indexed_subject or indexed_subject in subject_norm:
+                            break
-                            subject_ids.update(ids)
+                        # 部分匹配：包含关系
-                candidate_ids &= subject_ids
+                        if subject_norm in entry_subject_norm or entry_subject_norm in subject_norm:
-            
+                            subject_score = 0.6
-            # 关键词过滤（OR关系，支持模糊匹配）
+                            break
-            if keywords:
+                    if subject_score == 1:
-                keyword_ids = set()
+                        break
-                for keyword in keywords:
+                score += subject_score
-                    keyword_norm = keyword.strip().lower()
+                if subject_score > 0:
-                    # 精确匹配
+                    match_details.append("主语:匹配")
-                    if keyword_norm in self.keyword_index:
+            else:
-                        keyword_ids.update(self.keyword_index[keyword_norm])
+                match_details.append("主语:未指定")
-                    # 模糊匹配（包含）
+
-                    for indexed_keyword, ids in self.keyword_index.items():
+            # 3. 宾语匹配（支持部分匹配）
-                        if keyword_norm in indexed_keyword or indexed_keyword in keyword_norm:
+            object_score = 0
-                            keyword_ids.update(ids)
+            if entry.objects:
-                candidate_ids &= keyword_ids
+                for entry_object in entry.objects:
-            
+                    entry_object_norm = str(entry_object).strip().lower()
-            # 标签过滤（OR关系）
+                    # 检查是否与主语相关（主宾关联）
-            if tags:
+                    for subject in subjects or []:
-                tag_ids = set()
+                        subject_norm = subject.strip().lower()
-                for tag in tags:
+                        if subject_norm in entry_object_norm or entry_object_norm in subject_norm:
-                    tag_norm = tag.strip().lower()
+                            object_score = 0.8
-                    tag_ids.update(self.tag_index.get(tag_norm, set()))
+                            match_details.append("宾语:主宾关联")
-                candidate_ids &= tag_ids
+                            break
-            
+                    if object_score > 0:
-            # 重要性过滤
+                        break
-            if importance_min is not None or importance_max is not None:
+
-                importance_ids = {
+            score += object_score
-                    mid for mid in candidate_ids
+            if object_score > 0:
-                    if (importance_min is None or self.index[mid].importance >= importance_min)
+                match_details.append("宾语:匹配")
-                    and (importance_max is None or self.index[mid].importance <= importance_max)
+            elif not entry.objects:
-                }
+                match_details.append("宾语:无")
-                candidate_ids &= importance_ids
+
-            
+            # 4. 时间范围匹配
-            # 时间范围过滤
+            time_score = 0
            if created_after is not None or created_before is not None:
-                time_ids = {
+                time_match = True
-                    mid for mid in candidate_ids
+                if created_after is not None and entry.created_at < created_after:
-                    if (created_after is None or self.index[mid].created_at >= created_after)
+                    time_match = False
-                    and (created_before is None or self.index[mid].created_at <= created_before)
+                if created_before is not None and entry.created_at > created_before:
-                }
+                    time_match = False
-                candidate_ids &= time_ids
+                if time_match:
-            
+                    time_score = 1
-            # 转换为列表并排序（按创建时间倒序）
+                    match_details.append("时间:匹配")
-            result_ids = sorted(
+                else:
-                candidate_ids,
+                    match_details.append("时间:不匹配")
-                key=lambda mid: self.index[mid].created_at,
+            else:
-                reverse=True
+                match_details.append("时间:未指定")
-            )
+
-            
+            score += time_score
-            # 限制数量
+
-            if limit:
+            # 只有总分 >= 2 的记忆才会被返回
-                result_ids = result_ids[:limit]
+            if score >= 2:
-            
+                scored_candidates.append((memory_id, score, match_details))
-            logger.debug(
+
-                f"元数据索引搜索: types={memory_types}, subjects={subjects}, "
+        # 按分数和时间排序
-                f"keywords={keywords}, 返回={len(result_ids)}条"
+        scored_candidates.sort(key=lambda x: (x[1], self.index[x[0]].created_at), reverse=True)
-            )
+
-            
+        if limit:
-            return result_ids
+            result_ids = [mid for mid, _, _ in scored_candidates[:limit]]
        else:
            result_ids = [mid for mid, _, _ in scored_candidates]
        logger.debug(
            f"[灵活搜索] 过滤条件: types={memory_types}, subjects={subjects}, "
            f"time_range=[{created_after}, {created_before}], 返回={len(result_ids)}条"
        )
        # 记录匹配统计
        if scored_candidates and len(scored_candidates) > 0:
            avg_score = sum(score for _, score, _ in scored_candidates) / len(scored_candidates)
            logger.debug(f"[灵活搜索] 平均匹配分数: {avg_score:.2f}, 最高分: {scored_candidates[0][1]:.2f}")
        return result_ids
    def _search_strict(
        self,
        memory_types: Optional[List[str]] = None,
        subjects: Optional[List[str]] = None,
        keywords: Optional[List[str]] = None,
        tags: Optional[List[str]] = None,
        importance_min: Optional[int] = None,
        importance_max: Optional[int] = None,
        created_after: Optional[float] = None,
        created_before: Optional[float] = None,
        user_id: Optional[str] = None,
        limit: Optional[int] = None
    ) -> List[str]:
        """严格搜索模式（原有逻辑）"""
        # 初始候选集（所有记忆）
        candidate_ids: Optional[Set[str]] = None
        # 用户过滤（必选）
        if user_id:
            candidate_ids = {
                mid for mid, entry in self.index.items()
                if entry.user_id == user_id
            }
        else:
            candidate_ids = set(self.index.keys())
        # 类型过滤（OR关系）
        if memory_types:
            type_ids = set()
            for mtype in memory_types:
                type_ids.update(self.type_index.get(mtype, set()))
            candidate_ids &= type_ids
        # 主语过滤（OR关系，支持模糊匹配）
        if subjects:
            subject_ids = set()
            for subject in subjects:
                subject_norm = subject.strip().lower()
                # 精确匹配
                if subject_norm in self.subject_index:
                    subject_ids.update(self.subject_index[subject_norm])
                # 模糊匹配（包含）
                for indexed_subject, ids in self.subject_index.items():
                    if subject_norm in indexed_subject or indexed_subject in subject_norm:
                        subject_ids.update(ids)
            candidate_ids &= subject_ids
        # 关键词过滤（OR关系，支持模糊匹配）
        if keywords:
            keyword_ids = set()
            for keyword in keywords:
                keyword_norm = keyword.strip().lower()
                # 精确匹配
                if keyword_norm in self.keyword_index:
                    keyword_ids.update(self.keyword_index[keyword_norm])
                # 模糊匹配（包含）
                for indexed_keyword, ids in self.keyword_index.items():
                    if keyword_norm in indexed_keyword or indexed_keyword in keyword_norm:
                        keyword_ids.update(ids)
            candidate_ids &= keyword_ids
        # 标签过滤（OR关系）
        if tags:
            tag_ids = set()
            for tag in tags:
                tag_norm = tag.strip().lower()
                tag_ids.update(self.tag_index.get(tag_norm, set()))
            candidate_ids &= tag_ids
        # 重要性过滤
        if importance_min is not None or importance_max is not None:
            importance_ids = {
                mid for mid in candidate_ids
                if (importance_min is None or self.index[mid].importance >= importance_min)
                and (importance_max is None or self.index[mid].importance <= importance_max)
            }
            candidate_ids &= importance_ids
        # 时间范围过滤
        if created_after is not None or created_before is not None:
            time_ids = {
                mid for mid in candidate_ids
                if (created_after is None or self.index[mid].created_at >= created_after)
                and (created_before is None or self.index[mid].created_at <= created_before)
            }
            candidate_ids &= time_ids
        # 转换为列表并排序（按创建时间倒序）
        result_ids = sorted(
            candidate_ids,
            key=lambda mid: self.index[mid].created_at,
            reverse=True
        )
        # 限制数量
        if limit:
            result_ids = result_ids[:limit]
        logger.debug(
            f"[严格搜索] types={memory_types}, subjects={subjects}, "
            f"keywords={keywords}, 返回={len(result_ids)}条"
        )
        return result_ids
    def get_entry(self, memory_id: str) -> Optional[MemoryMetadataIndexEntry]:
        """获取单个索引条目"""
--- a/src/chat/memory_system/vector_memory_storage_v2.py
+++ b/src/chat/memory_system/vector_memory_storage_v2.py
@@ -24,12 +24,63 @@ import numpy as np
 from src.common.logger import get_logger
 from src.common.vector_db import vector_db_service
 from src.chat.utils.utils import get_embedding
-from src.chat.memory_system.memory_chunk import MemoryChunk
+from src.chat.memory_system.memory_chunk import MemoryChunk, ConfidenceLevel, ImportanceLevel
 from src.chat.memory_system.memory_forgetting_engine import MemoryForgettingEngine
 from src.chat.memory_system.memory_metadata_index import MemoryMetadataIndex, MemoryMetadataIndexEntry
 logger = get_logger(__name__)
 # 全局枚举映射表缓存
 _ENUM_MAPPINGS_CACHE = {}
 def _build_enum_mapping(enum_class: type) -> Dict[str, Any]:
    """构建枚举类的完整映射表
    Args:
        enum_class: 枚举类
    Returns:
        Dict[str, Any]: 包含各种映射格式的字典
    """
    cache_key = f"{enum_class.__module__}.{enum_class.__name__}"
    # 如果已经缓存过，直接返回
    if cache_key in _ENUM_MAPPINGS_CACHE:
        return _ENUM_MAPPINGS_CACHE[cache_key]
    mapping = {
        "name_to_enum": {},      # 枚举名称 -> 枚举实例 (HIGH -> ImportanceLevel.HIGH)
        "value_to_enum": {},     # 整数值 -> 枚举实例 (3 -> ImportanceLevel.HIGH)
        "value_str_to_enum": {}, # 字符串value -> 枚举实例 ("3" -> ImportanceLevel.HIGH)
        "enum_value_to_name": {}, # 枚举实例 -> 名称映射 (反向)
        "all_possible_strings": set(),  # 所有可能的字符串表示
    }
    for member in enum_class:
        # 名称映射 (支持大小写)
        mapping["name_to_enum"][member.name] = member
        mapping["name_to_enum"][member.name.lower()] = member
        mapping["name_to_enum"][member.name.upper()] = member
        # 值映射
        mapping["value_to_enum"][member.value] = member
        mapping["value_str_to_enum"][str(member.value)] = member
        # 反向映射
        mapping["enum_value_to_name"][member] = member.name
        # 收集所有可能的字符串表示
        mapping["all_possible_strings"].add(member.name)
        mapping["all_possible_strings"].add(member.name.lower())
        mapping["all_possible_strings"].add(member.name.upper())
        mapping["all_possible_strings"].add(str(member.value))
    # 缓存结果
    _ENUM_MAPPINGS_CACHE[cache_key] = mapping
    logger.debug(f"构建枚举映射表: {enum_class.__name__} -> {len(mapping['name_to_enum'])} 个名称映射, {len(mapping['value_to_enum'])} 个值映射")
    return mapping
@dataclass
 class VectorStorageConfig:
@@ -294,8 +345,8 @@ class VectorMemoryStorage:
                    "last_modified": metadata.get("timestamp", time.time()),
                    "access_count": metadata.get("access_count", 0),
                    "relevance_score": 0.0,
-                    "confidence": int(metadata.get("confidence", 2)),  # MEDIUM
+                    "confidence": self._parse_enum_value(metadata.get("confidence", 2), ConfidenceLevel, ConfidenceLevel.MEDIUM),
-                    "importance": int(metadata.get("importance", 2)),  # NORMAL
+                    "importance": self._parse_enum_value(metadata.get("importance", 2), ImportanceLevel, ImportanceLevel.NORMAL),
                    "source_context": None,
                },
                "content": {
@@ -313,12 +364,76 @@ class VectorMemoryStorage:
                "related_memories": [],
                "temporal_context": None
            }
-            
+
            return MemoryChunk.from_dict(memory_dict)
-            
+
        except Exception as e:
            logger.error(f"转换Vector结果到MemoryChunk失败: {e}", exc_info=True)
            return None
    def _parse_enum_value(self, value: Any, enum_class: type, default: Any) -> Any:
        """解析枚举值，支持字符串、整数和枚举实例
        Args:
            value: 要解析的值（可能是字符串、整数或枚举实例）
            enum_class: 目标枚举类
            default: 默认值
        Returns:
            解析后的枚举实例
        """
        if value is None:
            return default
        # 如果已经是枚举实例，直接返回
        if isinstance(value, enum_class):
            return value
        # 如果是整数，尝试按value值匹配
        if isinstance(value, int):
            try:
                for member in enum_class:
                    if member.value == value:
                        return member
                # 如果没找到匹配的，返回默认值
                logger.warning(f"无法找到{enum_class.__name__}中value={value}的枚举项，使用默认值")
                return default
            except Exception as e:
                logger.warning(f"解析{enum_class.__name__}整数值{value}时出错: {e}，使用默认值")
                return default
        # 如果是字符串，尝试按名称或value值匹配
        if isinstance(value, str):
            str_value = value.strip().upper()
            # 先尝试按枚举名称匹配
            try:
                if hasattr(enum_class, str_value):
                    return getattr(enum_class, str_value)
            except AttributeError:
                pass
            # 再尝试按value值匹配（如果value是字符串形式的数字）
            try:
                int_value = int(str_value)
                return self._parse_enum_value(int_value, enum_class, default)
            except ValueError:
                pass
            # 最后尝试按小写名称匹配
            try:
                for member in enum_class:
                    if member.value.upper() == str_value:
                        return member
                logger.warning(f"无法找到{enum_class.__name__}中名称或value为'{value}'的枚举项，使用默认值")
                return default
            except Exception as e:
                logger.warning(f"解析{enum_class.__name__}字符串值'{value}'时出错: {e}，使用默认值")
                return default
        # 其他类型，返回默认值
        logger.warning(f"不支持的{enum_class.__name__}值类型: {type(value)}，使用默认值")
        return default
    def _get_from_cache(self, memory_id: str) -> Optional[MemoryChunk]:
        """从缓存获取记忆"""
@@ -518,14 +633,19 @@ class VectorMemoryStorage:
                    created_after=metadata_filters.get('created_after'),
                    created_before=metadata_filters.get('created_before'),
                    user_id=metadata_filters.get('user_id'),
-                    limit=self.config.search_limit * 2  # 粗筛返回更多候选
+                    limit=self.config.search_limit * 2,  # 粗筛返回更多候选
                    flexible_mode=True  # 使用灵活匹配模式
                )
                logger.info(f"[JSON元数据粗筛] 完成，筛选出 {len(candidate_ids)} 个候选ID")
-                
+
-                # 如果粗筛后没有结果，直接返回
+                # 如果粗筛后没有结果，回退到全部记忆搜索
                if not candidate_ids:
-                    logger.warning("JSON元数据粗筛后无候选，返回空结果")
+                    total_memories = len(self.metadata_index.index)
-                    return []
+                    logger.warning(f"JSON元数据粗筛后无候选，启用回退机制：在全部 {total_memories} 条记忆中进行向量搜索")
                    logger.info("💡 提示：这可能是因为查询条件过于严格，或相关记忆的元数据与查询条件不完全匹配")
                    candidate_ids = None  # 设为None表示不限制候选ID
                else:
                    logger.debug(f"[JSON元数据粗筛] 成功筛选出候选，进入向量精筛阶段")
            # === 阶段二：向量精筛 ===
            # 生成查询向量
@@ -543,6 +663,8 @@ class VectorMemoryStorage:
                # ChromaDB的where条件需要使用$in操作符
                where_conditions["memory_id"] = {"$in": candidate_ids}
                logger.debug(f"[向量精筛] 限制在 {len(candidate_ids)} 个候选ID内搜索")
            else:
                logger.info("[向量精筛] 在全部记忆中搜索（元数据筛选无结果回退）")
            # 查询Vector DB
            logger.debug(f"[向量精筛] 开始，limit={min(limit, self.config.search_limit)}")
--- a/src/chat/replyer/default_generator.py
+++ b/src/chat/replyer/default_generator.py
@@ -18,6 +18,7 @@ from src.individuality.individuality import get_individuality
 from src.llm_models.utils_model import LLMRequest
 from src.chat.message_receive.message import UserInfo, Seg, MessageRecv, MessageSending
 from src.chat.message_receive.chat_stream import ChatStream
 from src.chat.utils.memory_mappings import get_memory_type_chinese_label
 from src.chat.message_receive.uni_message_sender import HeartFCSender
 from src.chat.utils.timer_calculator import Timer
 from src.chat.utils.utils import get_chat_type_and_target_info
@@ -621,20 +622,6 @@ class DefaultReplyer:
                running_memories = []
                instant_memory = ""
        def _format_confidence_label(value: Optional[float]) -> str:
            if value is None:
                return "未知"
            mapping = {4: "已验证", 3: "高", 2: "中等", 1: "较低"}
            rounded = int(value)
            return mapping.get(rounded, f"{value:.2f}")
        def _format_importance_label(value: Optional[float]) -> str:
            if value is None:
                return "未知"
            mapping = {4: "关键", 3: "高", 2: "一般", 1: "较低"}
            rounded = int(value)
            return mapping.get(rounded, f"{value:.2f}")
        # 构建记忆字符串，使用方括号格式
        memory_str = ""
        has_any_memory = False
@@ -662,16 +649,8 @@ class DefaultReplyer:
                    logger.debug(f"[记忆构建] 空记忆详情: {running_memory}")
                    continue
-                # 映射记忆类型到中文标签
+                # 使用全局记忆类型映射表
-                type_mapping = {
+                chinese_type = get_memory_type_chinese_label(memory_type)
                    "personal_fact": "个人事实",
                    "preference": "偏好",
                    "event": "事件",
                    "opinion": "观点",
                    "relationship": "个人事实",
                    "unknown": "未知"
                }
                chinese_type = type_mapping.get(memory_type, "未知")
                # 提取纯净内容（如果包含旧格式的元数据）
                clean_content = content
--- a/src/chat/utils/memory_mappings.py
+++ b/src/chat/utils/memory_mappings.py
@@ -0,0 +1,109 @@
 # -*- coding: utf-8 -*-
 """
 记忆系统相关的映射表和工具函数
 提供记忆类型、置信度、重要性等的中文标签映射
 """
 # 记忆类型到中文标签的完整映射表
 MEMORY_TYPE_CHINESE_MAPPING = {
    "personal_fact": "个人事实",
    "preference": "偏好",
    "event": "事件",
    "opinion": "观点",
    "relationship": "人际关系",
    "emotion": "情感状态",
    "knowledge": "知识信息",
    "skill": "技能能力",
    "goal": "目标计划",
    "experience": "经验教训",
    "contextual": "上下文信息",
    "unknown": "未知"
 }
 # 置信度等级到中文标签的映射表
 CONFIDENCE_LEVEL_CHINESE_MAPPING = {
    1: "低置信度",
    2: "中等置信度",
    3: "高置信度",
    4: "已验证",
    "LOW": "低置信度",
    "MEDIUM": "中等置信度",
    "HIGH": "高置信度",
    "VERIFIED": "已验证",
    "unknown": "未知"
 }
 # 重要性等级到中文标签的映射表
 IMPORTANCE_LEVEL_CHINESE_MAPPING = {
    1: "低重要性",
    2: "一般重要性",
    3: "高重要性",
    4: "关键重要性",
    "LOW": "低重要性",
    "NORMAL": "一般重要性",
    "HIGH": "高重要性",
    "CRITICAL": "关键重要性",
    "unknown": "未知"
 }
 def get_memory_type_chinese_label(memory_type: str) -> str:
    """获取记忆类型的中文标签
    Args:
        memory_type: 记忆类型字符串
    Returns:
        str: 对应的中文标签，如果找不到则返回"未知"
    """
    return MEMORY_TYPE_CHINESE_MAPPING.get(memory_type, "未知")
 def get_confidence_level_chinese_label(level) -> str:
    """获取置信度等级的中文标签
    Args:
        level: 置信度等级（可以是数字、字符串或枚举实例）
    Returns:
        str: 对应的中文标签，如果找不到则返回"未知"
    """
    # 处理枚举实例
    if hasattr(level, 'value'):
        level = level.value
    # 处理数字
    if isinstance(level, int):
        return CONFIDENCE_LEVEL_CHINESE_MAPPING.get(level, "未知")
    # 处理字符串
    if isinstance(level, str):
        level_upper = level.upper()
        return CONFIDENCE_LEVEL_CHINESE_MAPPING.get(level_upper, "未知")
    return "未知"
 def get_importance_level_chinese_label(level) -> str:
    """获取重要性等级的中文标签
    Args:
        level: 重要性等级（可以是数字、字符串或枚举实例）
    Returns:
        str: 对应的中文标签，如果找不到则返回"未知"
    """
    # 处理枚举实例
    if hasattr(level, 'value'):
        level = level.value
    # 处理数字
    if isinstance(level, int):
        return IMPORTANCE_LEVEL_CHINESE_MAPPING.get(level, "未知")
    # 处理字符串
    if isinstance(level, str):
        level_upper = level.upper()
        return IMPORTANCE_LEVEL_CHINESE_MAPPING.get(level_upper, "未知")
    return "未知"