feat(tool_system): implement declarative caching for tools
This commit refactors the tool caching system to be more robust, configurable, and easier to use. The caching logic is centralized within the `wrap_tool_executor`, removing the need for boilerplate code within individual tool implementations. Key changes: - Adds `enable_cache`, `cache_ttl`, and `semantic_cache_query_key` attributes to `BaseTool` for declarative cache configuration. - Moves caching logic from a simple history-based lookup and individual tools into a unified handling process in `wrap_tool_executor`. - The new system leverages the central `tool_cache` manager for both exact and semantic caching based on tool configuration. - Refactors `WebSurfingTool` and `URLParserTool` to utilize the new declarative caching mechanism, simplifying their code.
This commit is contained in:
committed by
Windpicker-owo
parent
12bcde800e
commit
6b53560a7e
@@ -4,9 +4,11 @@ from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
import json
|
||||
from pathlib import Path
|
||||
import inspect
|
||||
|
||||
from .logger import get_logger
|
||||
from src.config.config import global_config
|
||||
from src.common.cache_manager import tool_cache
|
||||
|
||||
logger = get_logger("tool_history")
|
||||
|
||||
@@ -113,34 +115,6 @@ class ToolHistoryManager:
|
||||
except Exception as e:
|
||||
logger.error(f"记录工具调用时发生错误: {e}")
|
||||
|
||||
def find_cached_result(self, tool_name: str, args: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
"""查找匹配的缓存记录
|
||||
|
||||
Args:
|
||||
tool_name: 工具名称
|
||||
args: 工具调用参数
|
||||
|
||||
Returns:
|
||||
Optional[Dict[str, Any]]: 如果找到匹配的缓存记录则返回结果,否则返回None
|
||||
"""
|
||||
# 检查是否启用历史记录
|
||||
if not global_config.tool.history.enable_history:
|
||||
return None
|
||||
|
||||
# 清理输入参数中的敏感信息以便比较
|
||||
sanitized_input_args = self._sanitize_args(args)
|
||||
|
||||
# 按时间倒序遍历历史记录
|
||||
for record in reversed(self._history):
|
||||
if (record["tool_name"] == tool_name and
|
||||
record["status"] == "completed" and
|
||||
record["ttl_count"] < record.get("ttl", 5)):
|
||||
# 比较参数是否匹配
|
||||
if self._sanitize_args(record["arguments"]) == sanitized_input_args:
|
||||
logger.info(f"工具 {tool_name} 命中缓存记录")
|
||||
return record["result"]
|
||||
return None
|
||||
|
||||
def _sanitize_args(self, args: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""清理参数中的敏感信息"""
|
||||
sensitive_keys = ['api_key', 'token', 'password', 'secret']
|
||||
@@ -327,27 +301,78 @@ class ToolHistoryManager:
|
||||
|
||||
def wrap_tool_executor():
|
||||
"""
|
||||
包装工具执行器以添加历史记录功能
|
||||
包装工具执行器以添加历史记录和缓存功能
|
||||
这个函数应该在系统启动时被调用一次
|
||||
"""
|
||||
from src.plugin_system.core.tool_use import ToolExecutor
|
||||
from src.plugin_system.apis.tool_api import get_tool_instance
|
||||
original_execute = ToolExecutor.execute_tool_call
|
||||
history_manager = ToolHistoryManager()
|
||||
|
||||
async def wrapped_execute_tool_call(self, tool_call, tool_instance=None):
|
||||
start_time = time.time()
|
||||
|
||||
# 确保我们有 tool_instance
|
||||
if not tool_instance:
|
||||
tool_instance = get_tool_instance(tool_call.func_name)
|
||||
|
||||
# 首先检查缓存
|
||||
if cached_result := history_manager.find_cached_result(tool_call.func_name, tool_call.args):
|
||||
logger.info(f"{self.log_prefix}使用缓存结果,跳过工具 {tool_call.func_name} 执行")
|
||||
return cached_result
|
||||
# 如果没有 tool_instance,就无法进行缓存检查,直接执行
|
||||
if not tool_instance:
|
||||
result = await original_execute(self, tool_call, None)
|
||||
execution_time = time.time() - start_time
|
||||
history_manager.record_tool_call(
|
||||
tool_name=tool_call.func_name,
|
||||
args=tool_call.args,
|
||||
result=result,
|
||||
execution_time=execution_time,
|
||||
status="completed",
|
||||
chat_id=getattr(self, 'chat_id', None),
|
||||
ttl=5 # Default TTL
|
||||
)
|
||||
return result
|
||||
|
||||
# 新的缓存逻辑
|
||||
if tool_instance.enable_cache:
|
||||
try:
|
||||
tool_file_path = inspect.getfile(tool_instance.__class__)
|
||||
semantic_query = None
|
||||
if tool_instance.semantic_cache_query_key:
|
||||
semantic_query = tool_call.args.get(tool_instance.semantic_cache_query_key)
|
||||
|
||||
cached_result = await tool_cache.get(
|
||||
tool_name=tool_call.func_name,
|
||||
function_args=tool_call.args,
|
||||
tool_file_path=tool_file_path,
|
||||
semantic_query=semantic_query
|
||||
)
|
||||
if cached_result:
|
||||
logger.info(f"{self.log_prefix}使用缓存结果,跳过工具 {tool_call.func_name} 执行")
|
||||
return cached_result
|
||||
except Exception as e:
|
||||
logger.error(f"{self.log_prefix}检查工具缓存时出错: {e}")
|
||||
|
||||
try:
|
||||
result = await original_execute(self, tool_call, tool_instance)
|
||||
execution_time = time.time() - start_time
|
||||
|
||||
# 获取工具的ttl值
|
||||
ttl = getattr(tool_instance, 'history_ttl', 5) if tool_instance else 5
|
||||
# 缓存结果
|
||||
if tool_instance.enable_cache:
|
||||
try:
|
||||
tool_file_path = inspect.getfile(tool_instance.__class__)
|
||||
semantic_query = None
|
||||
if tool_instance.semantic_cache_query_key:
|
||||
semantic_query = tool_call.args.get(tool_instance.semantic_cache_query_key)
|
||||
|
||||
await tool_cache.set(
|
||||
tool_name=tool_call.func_name,
|
||||
function_args=tool_call.args,
|
||||
tool_file_path=tool_file_path,
|
||||
data=result,
|
||||
ttl=tool_instance.cache_ttl,
|
||||
semantic_query=semantic_query
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"{self.log_prefix}设置工具缓存时出错: {e}")
|
||||
|
||||
# 记录成功的调用
|
||||
history_manager.record_tool_call(
|
||||
@@ -357,16 +382,13 @@ def wrap_tool_executor():
|
||||
execution_time=execution_time,
|
||||
status="completed",
|
||||
chat_id=getattr(self, 'chat_id', None),
|
||||
ttl=ttl
|
||||
ttl=tool_instance.history_ttl
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
execution_time = time.time() - start_time
|
||||
# 获取工具的ttl值
|
||||
ttl = getattr(tool_instance, 'history_ttl', 5) if tool_instance else 5
|
||||
|
||||
# 记录失败的调用
|
||||
history_manager.record_tool_call(
|
||||
tool_name=tool_call.func_name,
|
||||
@@ -375,7 +397,7 @@ def wrap_tool_executor():
|
||||
execution_time=execution_time,
|
||||
status="error",
|
||||
chat_id=getattr(self, 'chat_id', None),
|
||||
ttl=ttl
|
||||
ttl=tool_instance.history_ttl
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
@@ -31,6 +31,13 @@ class BaseTool(ABC):
|
||||
history_ttl: int = 5
|
||||
"""工具调用历史记录的TTL值,默认为5。设为0表示不记录历史"""
|
||||
|
||||
enable_cache: bool = False
|
||||
"""是否为该工具启用缓存"""
|
||||
cache_ttl: int = 3600
|
||||
"""缓存的TTL值(秒),默认为3600秒(1小时)"""
|
||||
semantic_cache_query_key: Optional[str] = None
|
||||
"""用于语义缓存的查询参数键名。如果设置,将使用此参数的值进行语义相似度搜索"""
|
||||
|
||||
def __init__(self, plugin_config: Optional[dict] = None):
|
||||
self.plugin_config = plugin_config or {} # 直接存储插件配置字典
|
||||
|
||||
|
||||
@@ -30,6 +30,12 @@ class URLParserTool(BaseTool):
|
||||
parameters = [
|
||||
("urls", ToolParamType.STRING, "要理解的网站", True, None),
|
||||
]
|
||||
|
||||
# --- 新的缓存配置 ---
|
||||
enable_cache: bool = True
|
||||
cache_ttl: int = 86400 # 缓存24小时
|
||||
semantic_cache_query_key: str = "urls"
|
||||
# --------------------
|
||||
|
||||
def __init__(self, plugin_config=None):
|
||||
super().__init__(plugin_config)
|
||||
@@ -42,10 +48,11 @@ class URLParserTool(BaseTool):
|
||||
if exa_api_keys is None:
|
||||
# 从插件配置文件读取
|
||||
exa_api_keys = self.get_config("exa.api_keys", [])
|
||||
|
||||
|
||||
# 创建API密钥管理器
|
||||
from typing import cast, List
|
||||
self.api_manager = create_api_key_manager_from_config(
|
||||
exa_api_keys,
|
||||
cast(List[str], exa_api_keys),
|
||||
lambda key: Exa(api_key=key),
|
||||
"Exa URL Parser"
|
||||
)
|
||||
@@ -135,16 +142,6 @@ class URLParserTool(BaseTool):
|
||||
"""
|
||||
执行URL内容提取和总结。优先使用Exa,失败后尝试本地解析。
|
||||
"""
|
||||
# 获取当前文件路径用于缓存键
|
||||
import os
|
||||
current_file_path = os.path.abspath(__file__)
|
||||
|
||||
# 检查缓存
|
||||
cached_result = await tool_cache.get(self.name, function_args, current_file_path)
|
||||
if cached_result:
|
||||
logger.info(f"缓存命中: {self.name} -> {function_args}")
|
||||
return cached_result
|
||||
|
||||
urls_input = function_args.get("urls")
|
||||
if not urls_input:
|
||||
return {"error": "URL列表不能为空。"}
|
||||
@@ -235,8 +232,4 @@ class URLParserTool(BaseTool):
|
||||
"errors": error_messages
|
||||
}
|
||||
|
||||
# 保存到缓存
|
||||
if "error" not in result:
|
||||
await tool_cache.set(self.name, function_args, current_file_path, result)
|
||||
|
||||
return result
|
||||
|
||||
@@ -31,6 +31,12 @@ class WebSurfingTool(BaseTool):
|
||||
("time_range", ToolParamType.STRING, "指定搜索的时间范围,可以是 'any', 'week', 'month'。默认为 'any'。", False, ["any", "week", "month"])
|
||||
] # type: ignore
|
||||
|
||||
# --- 新的缓存配置 ---
|
||||
enable_cache: bool = True
|
||||
cache_ttl: int = 7200 # 缓存2小时
|
||||
semantic_cache_query_key: str = "query"
|
||||
# --------------------
|
||||
|
||||
def __init__(self, plugin_config=None):
|
||||
super().__init__(plugin_config)
|
||||
# 初始化搜索引擎
|
||||
@@ -46,16 +52,6 @@ class WebSurfingTool(BaseTool):
|
||||
if not query:
|
||||
return {"error": "搜索查询不能为空。"}
|
||||
|
||||
# 获取当前文件路径用于缓存键
|
||||
import os
|
||||
current_file_path = os.path.abspath(__file__)
|
||||
|
||||
# 检查缓存
|
||||
cached_result = await tool_cache.get(self.name, function_args, current_file_path, semantic_query=query)
|
||||
if cached_result:
|
||||
logger.info(f"缓存命中: {self.name} -> {function_args}")
|
||||
return cached_result
|
||||
|
||||
# 读取搜索配置
|
||||
enabled_engines = config_api.get_global_config("web_search.enabled_engines", ["ddg"])
|
||||
search_strategy = config_api.get_global_config("web_search.search_strategy", "single")
|
||||
@@ -69,10 +65,6 @@ class WebSurfingTool(BaseTool):
|
||||
result = await self._execute_fallback_search(function_args, enabled_engines)
|
||||
else: # single
|
||||
result = await self._execute_single_search(function_args, enabled_engines)
|
||||
|
||||
# 保存到缓存
|
||||
if "error" not in result:
|
||||
await tool_cache.set(self.name, function_args, current_file_path, result, semantic_query=query)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
Reference in New Issue
Block a user