feat(cache): 用文件修改时间替换源码哈希生成缓存键
BREAKING CHANGE: CacheManager 的 _generate_key/get/set 方法签名变更,现在需要传入 tool_file_path 而非 tool_class 实例,所有调用方需跟进适配。
This commit is contained in:
@@ -2,10 +2,12 @@ import time
|
|||||||
import json
|
import json
|
||||||
import hashlib
|
import hashlib
|
||||||
import inspect
|
import inspect
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import faiss
|
import faiss
|
||||||
import chromadb
|
import chromadb
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, Optional, Union
|
||||||
from src.common.logger import get_logger
|
from src.common.logger import get_logger
|
||||||
from src.llm_models.utils_model import LLMRequest
|
from src.llm_models.utils_model import LLMRequest
|
||||||
from src.config.config import global_config, model_config
|
from src.config.config import global_config, model_config
|
||||||
@@ -90,26 +92,33 @@ class CacheManager:
|
|||||||
logger.error(f"验证嵌入向量时发生错误: {e}")
|
logger.error(f"验证嵌入向量时发生错误: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _generate_key(self, tool_name: str, function_args: Dict[str, Any], tool_class: Any) -> str:
|
def _generate_key(self, tool_name: str, function_args: Dict[str, Any], tool_file_path: Union[str, Path]) -> str:
|
||||||
"""生成确定性的缓存键,包含代码哈希以实现自动失效。"""
|
"""生成确定性的缓存键,包含文件修改时间以实现自动失效。"""
|
||||||
try:
|
try:
|
||||||
source_code = inspect.getsource(tool_class)
|
tool_file_path = Path(tool_file_path)
|
||||||
code_hash = hashlib.md5(source_code.encode()).hexdigest()
|
if tool_file_path.exists():
|
||||||
except (TypeError, OSError) as e:
|
file_name = tool_file_path.name
|
||||||
code_hash = "unknown"
|
file_mtime = tool_file_path.stat().st_mtime
|
||||||
logger.warning(f"无法获取 {tool_class.__name__} 的源代码,代码哈希将为 'unknown'。错误: {e}")
|
file_hash = hashlib.md5(f"{file_name}:{file_mtime}".encode()).hexdigest()
|
||||||
|
else:
|
||||||
|
file_hash = "unknown"
|
||||||
|
logger.warning(f"工具文件不存在: {tool_file_path}")
|
||||||
|
except (OSError, TypeError) as e:
|
||||||
|
file_hash = "unknown"
|
||||||
|
logger.warning(f"无法获取文件信息: {tool_file_path},错误: {e}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
sorted_args = json.dumps(function_args, sort_keys=True)
|
sorted_args = json.dumps(function_args, sort_keys=True)
|
||||||
except TypeError:
|
except TypeError:
|
||||||
sorted_args = repr(sorted(function_args.items()))
|
sorted_args = repr(sorted(function_args.items()))
|
||||||
return f"{tool_name}::{sorted_args}::{code_hash}"
|
return f"{tool_name}::{sorted_args}::{file_hash}"
|
||||||
|
|
||||||
async def get(self, tool_name: str, function_args: Dict[str, Any], tool_class: Any, semantic_query: Optional[str] = None) -> Optional[Any]:
|
async def get(self, tool_name: str, function_args: Dict[str, Any], tool_file_path: Union[str, Path], semantic_query: Optional[str] = None) -> Optional[Any]:
|
||||||
"""
|
"""
|
||||||
从缓存获取结果,查询顺序: L1-KV -> L1-Vector -> L2-KV -> L2-Vector。
|
从缓存获取结果,查询顺序: L1-KV -> L1-Vector -> L2-KV -> L2-Vector。
|
||||||
"""
|
"""
|
||||||
# 步骤 1: L1 精确缓存查询
|
# 步骤 1: L1 精确缓存查询
|
||||||
key = self._generate_key(tool_name, function_args, tool_class)
|
key = self._generate_key(tool_name, function_args, tool_file_path)
|
||||||
logger.debug(f"生成的缓存键: {key}")
|
logger.debug(f"生成的缓存键: {key}")
|
||||||
if semantic_query:
|
if semantic_query:
|
||||||
logger.debug(f"使用的语义查询: '{semantic_query}'")
|
logger.debug(f"使用的语义查询: '{semantic_query}'")
|
||||||
@@ -220,14 +229,14 @@ class CacheManager:
|
|||||||
logger.debug(f"缓存未命中: {key}")
|
logger.debug(f"缓存未命中: {key}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def set(self, tool_name: str, function_args: Dict[str, Any], tool_class: Any, data: Any, ttl: Optional[int] = None, semantic_query: Optional[str] = None):
|
async def set(self, tool_name: str, function_args: Dict[str, Any], tool_file_path: Union[str, Path], data: Any, ttl: Optional[int] = None, semantic_query: Optional[str] = None):
|
||||||
"""将结果存入所有缓存层。"""
|
"""将结果存入所有缓存层。"""
|
||||||
if ttl is None:
|
if ttl is None:
|
||||||
ttl = self.default_ttl
|
ttl = self.default_ttl
|
||||||
if ttl <= 0:
|
if ttl <= 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
key = self._generate_key(tool_name, function_args, tool_class)
|
key = self._generate_key(tool_name, function_args, tool_file_path)
|
||||||
expires_at = time.time() + ttl
|
expires_at = time.time() + ttl
|
||||||
|
|
||||||
# 写入 L1
|
# 写入 L1
|
||||||
|
|||||||
@@ -87,9 +87,13 @@ class WebSurfingTool(BaseTool):
|
|||||||
if not query:
|
if not query:
|
||||||
return {"error": "搜索查询不能为空。"}
|
return {"error": "搜索查询不能为空。"}
|
||||||
|
|
||||||
|
# 获取当前文件路径用于缓存键
|
||||||
|
import os
|
||||||
|
current_file_path = os.path.abspath(__file__)
|
||||||
|
|
||||||
# 检查缓存
|
# 检查缓存
|
||||||
query = function_args.get("query")
|
query = function_args.get("query")
|
||||||
cached_result = await tool_cache.get(self.name, function_args, tool_class=self.__class__, semantic_query=query)
|
cached_result = await tool_cache.get(self.name, function_args, current_file_path, semantic_query=query)
|
||||||
if cached_result:
|
if cached_result:
|
||||||
logger.info(f"缓存命中: {self.name} -> {function_args}")
|
logger.info(f"缓存命中: {self.name} -> {function_args}")
|
||||||
return cached_result
|
return cached_result
|
||||||
@@ -111,7 +115,7 @@ class WebSurfingTool(BaseTool):
|
|||||||
# 保存到缓存
|
# 保存到缓存
|
||||||
if "error" not in result:
|
if "error" not in result:
|
||||||
query = function_args.get("query")
|
query = function_args.get("query")
|
||||||
await tool_cache.set(self.name, function_args, self.__class__, result, semantic_query=query)
|
await tool_cache.set(self.name, function_args, current_file_path, result, semantic_query=query)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@@ -464,8 +468,12 @@ class URLParserTool(BaseTool):
|
|||||||
"""
|
"""
|
||||||
执行URL内容提取和总结。优先使用Exa,失败后尝试本地解析。
|
执行URL内容提取和总结。优先使用Exa,失败后尝试本地解析。
|
||||||
"""
|
"""
|
||||||
|
# 获取当前文件路径用于缓存键
|
||||||
|
import os
|
||||||
|
current_file_path = os.path.abspath(__file__)
|
||||||
|
|
||||||
# 检查缓存
|
# 检查缓存
|
||||||
cached_result = await tool_cache.get(self.name, function_args, tool_class=self.__class__)
|
cached_result = await tool_cache.get(self.name, function_args, current_file_path)
|
||||||
if cached_result:
|
if cached_result:
|
||||||
logger.info(f"缓存命中: {self.name} -> {function_args}")
|
logger.info(f"缓存命中: {self.name} -> {function_args}")
|
||||||
return cached_result
|
return cached_result
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[inner]
|
[inner]
|
||||||
version = "6.3.7"
|
version = "6.3.8"
|
||||||
|
|
||||||
#----以下是给开发人员阅读的,如果你只是部署了麦麦,不需要阅读----
|
#----以下是给开发人员阅读的,如果你只是部署了麦麦,不需要阅读----
|
||||||
#如果你想要修改配置文件,请递增version的值
|
#如果你想要修改配置文件,请递增version的值
|
||||||
@@ -162,8 +162,8 @@ ban_msgs_regex = [
|
|||||||
|
|
||||||
[anti_prompt_injection] # LLM反注入系统配置
|
[anti_prompt_injection] # LLM反注入系统配置
|
||||||
enabled = true # 是否启用反注入系统
|
enabled = true # 是否启用反注入系统
|
||||||
enabled_rules = false # 是否启用规则检测
|
enabled_rules = true # 是否启用规则检测
|
||||||
enabled_LLM = true # 是否启用LLM检测
|
enabled_LLM = false # 是否启用LLM检测
|
||||||
process_mode = "lenient" # 处理模式:strict(严格模式,直接丢弃), lenient(宽松模式,消息加盾)
|
process_mode = "lenient" # 处理模式:strict(严格模式,直接丢弃), lenient(宽松模式,消息加盾)
|
||||||
|
|
||||||
# 白名单配置
|
# 白名单配置
|
||||||
|
|||||||
Reference in New Issue
Block a user