feat(context): 为大语言模型提供过去网页搜索的上下文记忆

此更改使聊天机器人能够记住并引用过去网页搜索的相关信息，从而显著提高响应质量和连贯性。系统不再将每个查询视为孤立事件，而是在生成新响应之前，对之前的 `web_search` 结果缓存进行向量相似度搜索。如果发现过去的相关信息，会自动作为“相关历史搜索结果”注入到大语言模型的提示中。这使模型能够立即访问相关背景信息，避免对已经讨论过的主题重复搜索。为了支持这一新功能： - 对 `web_search` 工具的提示进行了改写，以通过确保结果被高效缓存和调用，鼓励大语言模型更频繁地使用它。 - 重要工具结果（如网页搜索）的预览长度已增加
2025-12-04 04:12:36 +08:00
parent f519f87884
commit 22767ce234
4 changed files with 186 additions and 13 deletions
--- a/src/plugins/built_in/kokoro_flow_chatter/context_builder.py
+++ b/src/plugins/built_in/kokoro_flow_chatter/context_builder.py
@@ -334,12 +334,46 @@ class KFCContextBuilder:
            
            tool_executor = ToolExecutor(chat_id=self.chat_id)
            
-            # 首先获取当前的历史记录（在执行新工具调用之前）
+            info_parts = []
+            
+            # ========== 1. 主动召回联网搜索缓存 ==========
+            try:
+                from src.common.cache_manager import tool_cache
+                
+                # 使用聊天历史作为语义查询
+                query_text = chat_history if chat_history else target_message
+                recalled_caches = await tool_cache.recall_relevant_cache(
+                    query_text=query_text,
+                    tool_name="web_search",  # 只召回联网搜索的缓存
+                    top_k=2,
+                    similarity_threshold=0.65,  # 相似度阈值
+                )
+                
+                if recalled_caches:
+                    recall_parts = ["### 🔍 相关的历史搜索结果"]
+                    for item in recalled_caches:
+                        original_query = item.get("query", "")
+                        content = item.get("content", "")
+                        similarity = item.get("similarity", 0)
+                        if content:
+                            # 截断过长的内容
+                            if len(content) > 500:
+                                content = content[:500] + "..."
+                            recall_parts.append(f"**搜索「{original_query}」** (相关度:{similarity:.0%})\n{content}")
+                    
+                    info_parts.append("\n\n".join(recall_parts))
+                    logger.info(f"[缓存召回] 召回了 {len(recalled_caches)} 条相关搜索缓存")
+            except Exception as e:
+                logger.debug(f"[缓存召回] 召回失败（非关键）: {e}")
+            
+            # ========== 2. 获取工具调用历史 ==========
            tool_history_str = tool_executor.history_manager.format_for_prompt(
                max_records=3, include_results=True
            )
+            if tool_history_str:
+                info_parts.append(tool_history_str)
            
-            # 然后执行工具调用
+            # ========== 3. 执行工具调用 ==========
            tool_results, _, _ = await tool_executor.execute_from_chat_message(
                sender=sender_name,
                target_message=target_message,
@@ -347,12 +381,6 @@ class KFCContextBuilder:
                return_details=False,
            )
            
-            info_parts = []
-            
-            # 显示之前的工具调用历史（不包括当前这次调用）
-            if tool_history_str:
-                info_parts.append(tool_history_str)
-            
            # 显示当前工具调用的结果（简要信息）
            if tool_results:
                current_results_parts = ["### 🔧 刚获取的工具信息"]
--- a/src/plugins/built_in/web_search_tool/tools/web_search.py
+++ b/src/plugins/built_in/web_search_tool/tools/web_search.py
@@ -28,7 +28,12 @@ class WebSurfingTool(BaseTool):

    name: str = "web_search"
    description: str = (
-        "用于执行网络搜索。当用户明确要求搜索，或者需要获取关于公司、产品、事件的最新信息、新闻或动态时，必须使用此工具"
+        "联网搜索工具。使用场景：\n"
+        "1. 用户问的问题你不确定答案、需要验证\n"
+        "2. 涉及最新信息（新闻、产品、事件、时效性内容）\n"
+        "3. 需要查找具体数据、事实、定义\n"
+        "4. 用户明确要求搜索\n"
+        "不要担心调用频率，搜索结果会被缓存。"
    )
    available_for_llm: bool = True
    parameters: ClassVar[list] = [