From eb1feeeb0b1fdd4b0e325e6f60c307d80e83ad92 Mon Sep 17 00:00:00 2001 From: minecraft1024a Date: Thu, 28 Aug 2025 20:10:32 +0800 Subject: [PATCH] =?UTF-8?q?refactor(cache):=20=E9=87=8D=E6=9E=84=E5=B7=A5?= =?UTF-8?q?=E5=85=B7=E7=BC=93=E5=AD=98=E6=9C=BA=E5=88=B6=E5=B9=B6=E4=BC=98?= =?UTF-8?q?=E5=8C=96LLM=E8=AF=B7=E6=B1=82=E9=87=8D=E8=AF=95=E9=80=BB?= =?UTF-8?q?=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 将工具缓存的实现从`ToolExecutor`的装饰器模式重构为直接集成。缓存逻辑被移出`cache_manager.py`并整合进`ToolExecutor.execute_tool_call`方法中,简化了代码结构并使其更易于维护。 主要变更: - 从`cache_manager.py`中移除了`wrap_tool_executor`函数。 - 在`tool_use.py`中,`execute_tool_call`现在包含完整的缓存检查和设置逻辑。 - 调整了`llm_models/utils_model.py`中的LLM请求逻辑,为模型生成的空回复或截断响应增加了内部重试机制,增强了稳定性。 - 清理了项目中未使用的导入和过时的文档文件,以保持代码库的整洁。 --- docs/{ => plugins}/PLUS_COMMAND_GUIDE.md | 0 docs/plugins/command-components.md | 89 -------- docs/plugins/index.md | 2 +- docs/plugins/tool-components.md | 2 +- docs/schedule_enhancement (1).md | 121 ----------- src/chat/chat_loop/heartFC_chat.py | 1 - src/common/cache_manager.py | 196 +++++++----------- src/llm_models/utils_model.py | 120 ++++++----- src/plugin_system/apis/tool_api.py | 2 +- src/plugin_system/base/base_plugin.py | 2 +- src/plugin_system/base/plus_command.py | 2 +- src/plugin_system/core/tool_use.py | 67 +++++- .../services/content_service.py | 5 +- .../built_in/permission_management/plugin.py | 2 +- src/schedule/schedule_manager.py | 1 - 15 files changed, 199 insertions(+), 413 deletions(-) rename docs/{ => plugins}/PLUS_COMMAND_GUIDE.md (100%) delete mode 100644 docs/plugins/command-components.md delete mode 100644 docs/schedule_enhancement (1).md diff --git a/docs/PLUS_COMMAND_GUIDE.md b/docs/plugins/PLUS_COMMAND_GUIDE.md similarity index 100% rename from docs/PLUS_COMMAND_GUIDE.md rename to docs/plugins/PLUS_COMMAND_GUIDE.md diff --git a/docs/plugins/command-components.md b/docs/plugins/command-components.md deleted file mode 100644 index 77cc8accf..000000000 --- a/docs/plugins/command-components.md +++ /dev/null @@ -1,89 +0,0 @@ -# 💻 Command组件详解 - -## 📖 什么是Command - -Command是直接响应用户明确指令的组件,与Action不同,Command是**被动触发**的,当用户输入特定格式的命令时立即执行。 - -Command通过正则表达式匹配用户输入,提供确定性的功能服务。 - -### 🎯 Command的特点 - -- 🎯 **确定性执行**:匹配到命令立即执行,无随机性 -- ⚡ **即时响应**:用户主动触发,快速响应 -- 🔍 **正则匹配**:通过正则表达式精确匹配用户输入 -- 🛑 **拦截控制**:可以控制是否阻止消息继续处理 -- 📝 **参数解析**:支持从用户输入中提取参数 - ---- - -## 🛠️ Command组件的基本结构 - -首先,Command组件需要继承自`BaseCommand`类,并实现必要的方法。 - -```python -class ExampleCommand(BaseCommand): - command_name = "example" # 命令名称,作为唯一标识符 - command_description = "这是一个示例命令" # 命令描述 - command_pattern = r"" # 命令匹配的正则表达式 - - async def execute(self) -> Tuple[bool, Optional[str], bool]: - """ - 执行Command的主要逻辑 - - Returns: - Tuple[bool, str, bool]: - - 第一个bool表示是否成功执行 - - 第二个str是执行结果消息 - - 第三个bool表示是否需要阻止消息继续处理 - """ - # ---- 执行命令的逻辑 ---- - return True, "执行成功", False -``` -**`command_pattern`**: 该Command匹配的正则表达式,用于精确匹配用户输入。 - -请注意:如果希望能获取到命令中的参数,请在正则表达式中使用有命名的捕获组,例如`(?Ppattern)`。 - -这样在匹配时,内部实现可以使用`re.match.groupdict()`方法获取到所有捕获组的参数,并以字典的形式存储在`self.matched_groups`中。 - -### 匹配样例 -假设我们有一个命令`/example param1=value1 param2=value2`,对应的正则表达式可以是: - -```python -class ExampleCommand(BaseCommand): - command_name = "example" - command_description = "这是一个示例命令" - command_pattern = r"/example (?P\w+) (?P\w+)" - - async def execute(self) -> Tuple[bool, Optional[str], bool]: - # 获取匹配的参数 - param1 = self.matched_groups.get("param1") - param2 = self.matched_groups.get("param2") - - # 执行逻辑 - return True, f"参数1: {param1}, 参数2: {param2}", False -``` - ---- - -## Command 内置方法说明 -```python -class BaseCommand: - def get_config(self, key: str, default=None): - """获取插件配置值,使用嵌套键访问""" - - async def send_text(self, content: str, reply_to: str = "") -> bool: - """发送回复消息""" - - async def send_type(self, message_type: str, content: str, display_message: str = "", typing: bool = False, reply_to: str = "") -> bool: - """发送指定类型的回复消息到当前聊天环境""" - - async def send_command(self, command_name: str, args: Optional[dict] = None, display_message: str = "", storage_message: bool = True) -> bool: - """发送命令消息""" - - async def send_emoji(self, emoji_base64: str) -> bool: - """发送表情包""" - - async def send_image(self, image_base64: str) -> bool: - """发送图片""" -``` -具体参数与用法参见`BaseCommand`基类的定义。 \ No newline at end of file diff --git a/docs/plugins/index.md b/docs/plugins/index.md index fe999f393..c39efe72e 100644 --- a/docs/plugins/index.md +++ b/docs/plugins/index.md @@ -9,7 +9,7 @@ ## 组件功能详解 - [🧱 Action组件详解](action-components.md) - 掌握最核心的Action组件 -- [💻 Command组件详解](command-components.md) - 学习直接响应命令的组件 +- [💻 Command组件详解](PLUS_COMMAND_GUIDE.md) - 学习直接响应命令的组件 - [🔧 Tool组件详解](tool-components.md) - 了解如何扩展信息获取能力 - [⚙️ 配置文件系统指南](configuration-guide.md) - 学会使用自动生成的插件配置文件 - [📄 Manifest系统指南](manifest-guide.md) - 了解插件元数据管理和配置架构 diff --git a/docs/plugins/tool-components.md b/docs/plugins/tool-components.md index e27658af8..d2972c6a6 100644 --- a/docs/plugins/tool-components.md +++ b/docs/plugins/tool-components.md @@ -2,7 +2,7 @@ ## 📖 什么是工具 -工具是MoFox_Bot的信息获取能力扩展组件。如果说Action组件功能五花八门,可以拓展麦麦能做的事情,那么Tool就是在某个过程中拓宽了麦麦能够获得的信息量。 +工具是MoFox_Bot的信息获取能力扩展组件。如果说Action组件功能五花八门,可以拓展麦麦能做的事情,那么Tool就是在某个过程中拓宽了MoFox_Bot能够获得的信息量。 ### 🎯 工具的特点 diff --git a/docs/schedule_enhancement (1).md b/docs/schedule_enhancement (1).md deleted file mode 100644 index 1dc2a9b8d..000000000 --- a/docs/schedule_enhancement (1).md +++ /dev/null @@ -1,121 +0,0 @@ -# “月层计划”系统架构设计文档 - -## 1. 系统概述与目标 - -本系统旨在为MoFox_Bot引入一个动态的、由大型语言模型(LLM)驱动的“月层计划”机制。其核心目标是取代静态、预设的任务模板,转而利用LLM在程序启动时自动生成符合Bot人设的、具有时效性的月度计划。这些计划将被存储、管理,并在构建每日日程时被动态抽取和使用,从而极大地丰富日程内容的个性和多样性。 - ---- - -## 2. 核心设计原则 - -- **动态性与智能化:** 所有计划内容均由LLM实时生成,确保其独特性和创造性。 -- **人设一致性:** 计划的生成将严格围绕Bot的核心人设进行,强化角色形象。 -- **持久化与可管理:** 生成的计划将被存入专用数据库表,便于管理和追溯。 -- **消耗性与随机性:** 计划在使用后有一定几率被消耗(删除),模拟真实世界中计划的完成与迭代。 - ---- - -## 3. 系统核心流程规划 - -本系统包含两大核心流程:**启动时的计划生成流程**和**日程构建时的计划使用流程**。 - -### 3.1 流程一:启动时计划生成 - -此流程在每次程序启动时触发,负责填充当月的计划池。 - -```mermaid -graph TD - A[程序启动] --> B{检查当月计划池}; - B -- 计划数量低于阈值 --> C[构建LLM Prompt]; - C -- prompt包含Bot人设、月份等信息 --> D[调用LLM服务]; - D -- LLM返回多个计划文本 --> E[解析并格式化计划]; - E -- 逐条处理 --> F[存入`monthly_plans`数据库表]; - F --> G[完成启动任务]; - B -- 计划数量充足 --> G; -``` - -### 3.2 流程二:日程构建时计划使用 - -此流程在构建每日日程的提示词(Prompt)时触发。 - -```mermaid -graph TD - H[构建日程Prompt] --> I{查询数据库}; - I -- 读取当月未使用的计划 --> J[随机抽取N个计划]; - J --> K[将计划文本嵌入日程Prompt]; - K --> L{随机数判断}; - L -- 概率命中 --> M[将已抽取的计划标记为删除]; - M --> N[完成Prompt构建]; - L -- 概率未命中 --> N; -``` - ---- - -## 4. 数据库模型设计 - -为支撑本系统,需要新增一个数据库表。 - -**表名:** `monthly_plans` - -| 字段名 | 类型 | 描述 | -| :--- | :--- | :--- | -| `id` | Integer | 主键,自增。 | -| `plan_text` | Text | 由LLM生成的计划内容原文。 | -| `target_month` | String(7) | 计划所属的月份,格式为 "YYYY-MM"。 | -| `is_deleted` | Boolean | 软删除标记,默认为 `false`。 | -| `created_at` | DateTime | 记录创建时间。 | - ---- - -## 5. 详细模块规划 - -### 5.1 LLM Prompt生成模块 - -- **职责:** 构建高质量的Prompt以引导LLM生成符合要求的计划。 -- **输入:** Bot人设描述、当前月份、期望生成的计划数量。 -- **输出:** 一个结构化的Prompt字符串。 -- **Prompt示例:** - ``` - 你是一个[此处填入Bot人设描述,例如:活泼开朗、偶尔有些小迷糊的虚拟助手]。 - 请为即将到来的[YYYY年MM月]设计[N]个符合你身份的月度计划或目标。 - - 要求: - 1. 每个计划都是独立的、积极向上的。 - 2. 语言风格要自然、口语化,符合你的性格。 - 3. 每个计划用一句话或两句话简短描述。 - 4. 以JSON格式返回,格式为:{"plans": ["计划一", "计划二", ...]} - ``` - -### 5.2 数据库交互模块 - -- **职责:** 提供对 `monthly_plans` 表的增、删、改、查接口。 -- **规划函数列表:** - - `add_new_plans(plans: list[str], month: str)`: 批量添加新生成的计划。 - - `get_active_plans_for_month(month: str) -> list`: 获取指定月份所有未被删除的计划。 - - `soft_delete_plans(plan_ids: list[int])`: 将指定ID的计划标记为软删除。 - -### 5.3 配置项规划 - -需要在主配置文件 `config/bot_config.toml` 中添加以下配置项,以控制系统行为。 - -```toml -# ---------------------------------------------------------------- -# 月层计划系统设置 (Monthly Plan System Settings) -# ---------------------------------------------------------------- -[monthly_plan_system] - -# 是否启用本功能 -enable = true - -# 启动时,如果当月计划少于此数量,则触发LLM生成 -generation_threshold = 10 - -# 每次调用LLM期望生成的计划数量 -plans_per_generation = 5 - -# 计划被使用后,被删除的概率 (0.0 到 1.0) -deletion_probability_on_use = 0.5 -``` - ---- -**文档结束。** 本文档纯粹为架构规划,旨在提供清晰的设计思路和开发指引,不包含任何实现代码。 \ No newline at end of file diff --git a/src/chat/chat_loop/heartFC_chat.py b/src/chat/chat_loop/heartFC_chat.py index 6ca4dc916..ccb90da2d 100644 --- a/src/chat/chat_loop/heartFC_chat.py +++ b/src/chat/chat_loop/heartFC_chat.py @@ -10,7 +10,6 @@ from src.chat.express.expression_learner import expression_learner_manager from src.plugin_system.base.component_types import ChatMode from src.schedule.schedule_manager import schedule_manager from src.plugin_system.apis import message_api -from src.mood.mood_manager import mood_manager from .hfc_context import HfcContext from .energy_manager import EnergyManager diff --git a/src/common/cache_manager.py b/src/common/cache_manager.py index d4f872d30..a11ccaa7e 100644 --- a/src/common/cache_manager.py +++ b/src/common/cache_manager.py @@ -4,7 +4,7 @@ import hashlib from pathlib import Path import numpy as np import faiss -from typing import Any, Dict, Optional, Union, List +from typing import Any, Dict, Optional, Union from src.common.logger import get_logger from src.llm_models.utils_model import LLMRequest from src.config.config import global_config, model_config @@ -14,6 +14,7 @@ from src.common.vector_db import vector_db_service logger = get_logger("cache_manager") + class CacheManager: """ 一个支持分层和语义缓存的通用工具缓存管理器。 @@ -21,6 +22,7 @@ class CacheManager: L1缓存: 内存字典 (KV) + FAISS (Vector)。 L2缓存: 数据库 (KV) + ChromaDB (Vector)。 """ + _instance = None def __new__(cls, *args, **kwargs): @@ -32,7 +34,7 @@ class CacheManager: """ 初始化缓存管理器。 """ - if not hasattr(self, '_initialized'): + if not hasattr(self, "_initialized"): self.default_ttl = default_ttl self.semantic_cache_collection_name = "semantic_cache" @@ -41,7 +43,7 @@ class CacheManager: embedding_dim = global_config.lpmm_knowledge.embedding_dimension self.l1_vector_index = faiss.IndexFlatIP(embedding_dim) self.l1_vector_id_to_key: Dict[int, str] = {} - + # L2 向量缓存 (使用新的服务) vector_db_service.get_or_create_collection(self.semantic_cache_collection_name) @@ -58,32 +60,32 @@ class CacheManager: try: if embedding_result is None: return None - + # 确保embedding_result是一维数组或列表 if isinstance(embedding_result, (list, tuple, np.ndarray)): # 转换为numpy数组进行处理 embedding_array = np.array(embedding_result) - + # 如果是多维数组,展平它 if embedding_array.ndim > 1: embedding_array = embedding_array.flatten() - + # 检查维度是否符合预期 expected_dim = global_config.lpmm_knowledge.embedding_dimension if embedding_array.shape[0] != expected_dim: logger.warning(f"嵌入向量维度不匹配: 期望 {expected_dim}, 实际 {embedding_array.shape[0]}") return None - + # 检查是否包含有效的数值 if np.isnan(embedding_array).any() or np.isinf(embedding_array).any(): logger.warning("嵌入向量包含无效的数值 (NaN 或 Inf)") return None - - return embedding_array.astype('float32') + + return embedding_array.astype("float32") else: logger.warning(f"嵌入结果格式不支持: {type(embedding_result)}") return None - + except Exception as e: logger.error(f"验证嵌入向量时发生错误: {e}") return None @@ -102,14 +104,20 @@ class CacheManager: except (OSError, TypeError) as e: file_hash = "unknown" logger.warning(f"无法获取文件信息: {tool_file_path},错误: {e}") - + try: - sorted_args = orjson.dumps(function_args, option=orjson.OPT_SORT_KEYS).decode('utf-8') + sorted_args = orjson.dumps(function_args, option=orjson.OPT_SORT_KEYS).decode("utf-8") except TypeError: sorted_args = repr(sorted(function_args.items())) return f"{tool_name}::{sorted_args}::{file_hash}" - async def get(self, tool_name: str, function_args: Dict[str, Any], tool_file_path: Union[str, Path], semantic_query: Optional[str] = None) -> Optional[Any]: + async def get( + self, + tool_name: str, + function_args: Dict[str, Any], + tool_file_path: Union[str, Path], + semantic_query: Optional[str] = None, + ) -> Optional[Any]: """ 从缓存获取结果,查询顺序: L1-KV -> L1-Vector -> L2-KV -> L2-Vector。 """ @@ -136,13 +144,13 @@ class CacheManager: embedding_vector = embedding_result[0] if isinstance(embedding_result, tuple) else embedding_result validated_embedding = self._validate_embedding(embedding_vector) if validated_embedding is not None: - query_embedding = np.array([validated_embedding], dtype='float32') + query_embedding = np.array([validated_embedding], dtype="float32") # 步骤 2a: L1 语义缓存 (FAISS) if query_embedding is not None and self.l1_vector_index.ntotal > 0: faiss.normalize_L2(query_embedding) - distances, indices = self.l1_vector_index.search(query_embedding, 1) # type: ignore - if indices.size > 0 and distances[0][0] > 0.75: # IP 越大越相似 + distances, indices = self.l1_vector_index.search(query_embedding, 1) # type: ignore + if indices.size > 0 and distances[0][0] > 0.75: # IP 越大越相似 hit_index = indices[0][0] l1_hit_key = self.l1_vector_id_to_key.get(hit_index) if l1_hit_key and l1_hit_key in self.l1_kv_cache: @@ -151,12 +159,9 @@ class CacheManager: # 步骤 2b: L2 精确缓存 (数据库) cache_results_obj = await db_query( - model_class=CacheEntries, - query_type="get", - filters={"cache_key": key}, - single_result=True + model_class=CacheEntries, query_type="get", filters={"cache_key": key}, single_result=True ) - + if cache_results_obj: # 使用 getattr 安全访问属性,避免 Pylance 类型检查错误 expires_at = getattr(cache_results_obj, "expires_at", 0) @@ -164,7 +169,7 @@ class CacheManager: logger.info(f"命中L2键值缓存: {key}") cache_value = getattr(cache_results_obj, "cache_value", "{}") data = orjson.loads(cache_value) - + # 更新访问统计 await db_query( model_class=CacheEntries, @@ -172,20 +177,16 @@ class CacheManager: filters={"cache_key": key}, data={ "last_accessed": time.time(), - "access_count": getattr(cache_results_obj, "access_count", 0) + 1 - } + "access_count": getattr(cache_results_obj, "access_count", 0) + 1, + }, ) - + # 回填 L1 self.l1_kv_cache[key] = {"data": data, "expires_at": expires_at} return data else: # 删除过期的缓存条目 - await db_query( - model_class=CacheEntries, - query_type="delete", - filters={"cache_key": key} - ) + await db_query(model_class=CacheEntries, query_type="delete", filters={"cache_key": key}) # 步骤 2c: L2 语义缓存 (VectorDB Service) if query_embedding is not None: @@ -193,31 +194,33 @@ class CacheManager: results = vector_db_service.query( collection_name=self.semantic_cache_collection_name, query_embeddings=query_embedding.tolist(), - n_results=1 + n_results=1, ) - if results and results.get('ids') and results['ids'][0]: - distance = results['distances'][0][0] if results.get('distances') and results['distances'][0] else 'N/A' + if results and results.get("ids") and results["ids"][0]: + distance = ( + results["distances"][0][0] if results.get("distances") and results["distances"][0] else "N/A" + ) logger.debug(f"L2语义搜索找到最相似的结果: id={results['ids'][0]}, 距离={distance}") - - if distance != 'N/A' and distance < 0.75: - l2_hit_key = results['ids'][0][0] if isinstance(results['ids'][0], list) else results['ids'][0] + + if distance != "N/A" and distance < 0.75: + l2_hit_key = results["ids"][0][0] if isinstance(results["ids"][0], list) else results["ids"][0] logger.info(f"命中L2语义缓存: key='{l2_hit_key}', 距离={distance:.4f}") - + # 从数据库获取缓存数据 semantic_cache_results_obj = await db_query( model_class=CacheEntries, query_type="get", filters={"cache_key": l2_hit_key}, - single_result=True + single_result=True, ) - + if semantic_cache_results_obj: expires_at = getattr(semantic_cache_results_obj, "expires_at", 0) if time.time() < expires_at: cache_value = getattr(semantic_cache_results_obj, "cache_value", "{}") data = orjson.loads(cache_value) logger.debug(f"L2语义缓存返回的数据: {data}") - + # 回填 L1 self.l1_kv_cache[key] = {"data": data, "expires_at": expires_at} if query_embedding is not None: @@ -235,7 +238,15 @@ class CacheManager: logger.debug(f"缓存未命中: {key}") return None - async def set(self, tool_name: str, function_args: Dict[str, Any], tool_file_path: Union[str, Path], data: Any, ttl: Optional[int] = None, semantic_query: Optional[str] = None): + async def set( + self, + tool_name: str, + function_args: Dict[str, Any], + tool_file_path: Union[str, Path], + data: Any, + ttl: Optional[int] = None, + semantic_query: Optional[str] = None, + ): """将结果存入所有缓存层。""" if ttl is None: ttl = self.default_ttl @@ -244,27 +255,22 @@ class CacheManager: key = self._generate_key(tool_name, function_args, tool_file_path) expires_at = time.time() + ttl - + # 写入 L1 self.l1_kv_cache[key] = {"data": data, "expires_at": expires_at} # 写入 L2 (数据库) cache_data = { "cache_key": key, - "cache_value": orjson.dumps(data).decode('utf-8'), + "cache_value": orjson.dumps(data).decode("utf-8"), "expires_at": expires_at, "tool_name": tool_name, "created_at": time.time(), "last_accessed": time.time(), - "access_count": 1 + "access_count": 1, } - - await db_save( - model_class=CacheEntries, - data=cache_data, - key_field="cache_key", - key_value=key - ) + + await db_save(model_class=CacheEntries, data=cache_data, key_field="cache_key", key_value=key) # 写入语义缓存 if semantic_query and self.embedding_model: @@ -274,19 +280,19 @@ class CacheManager: embedding_vector = embedding_result[0] if isinstance(embedding_result, tuple) else embedding_result validated_embedding = self._validate_embedding(embedding_vector) if validated_embedding is not None: - embedding = np.array([validated_embedding], dtype='float32') - + embedding = np.array([validated_embedding], dtype="float32") + # 写入 L1 Vector new_id = self.l1_vector_index.ntotal faiss.normalize_L2(embedding) self.l1_vector_index.add(x=embedding) # type: ignore self.l1_vector_id_to_key[new_id] = key - + # 写入 L2 Vector (使用新的服务) vector_db_service.add( collection_name=self.semantic_cache_collection_name, embeddings=embedding.tolist(), - ids=[key] + ids=[key], ) except Exception as e: logger.warning(f"语义缓存写入失败: {e}") @@ -306,16 +312,16 @@ class CacheManager: await db_query( model_class=CacheEntries, query_type="delete", - filters={} # 删除所有记录 + filters={}, # 删除所有记录 ) - + # 清空 VectorDB try: vector_db_service.delete_collection(name=self.semantic_cache_collection_name) vector_db_service.get_or_create_collection(name=self.semantic_cache_collection_name) except Exception as e: logger.warning(f"清空 VectorDB 集合失败: {e}") - + logger.info("L2 (数据库 & VectorDB) 缓存已清空。") async def clear_all(self): @@ -327,85 +333,23 @@ class CacheManager: async def clean_expired(self): """清理过期的缓存条目""" current_time = time.time() - + # 清理L1过期条目 expired_keys = [] for key, entry in self.l1_kv_cache.items(): if current_time >= entry["expires_at"]: expired_keys.append(key) - + for key in expired_keys: del self.l1_kv_cache[key] - + # 清理L2过期条目 - await db_query( - model_class=CacheEntries, - query_type="delete", - filters={"expires_at": {"$lt": current_time}} - ) - + await db_query(model_class=CacheEntries, query_type="delete", filters={"expires_at": {"$lt": current_time}}) + if expired_keys: logger.info(f"清理了 {len(expired_keys)} 个过期的L1缓存条目") + # 全局实例 tool_cache = CacheManager() -import inspect -import time - -def wrap_tool_executor(): - """ - 包装工具执行器以添加缓存功能 - 这个函数应该在系统启动时被调用一次 - """ - from src.plugin_system.core.tool_use import ToolExecutor - from src.plugin_system.apis.tool_api import get_tool_instance - original_execute = ToolExecutor.execute_tool_call - - async def wrapped_execute_tool_call(self, tool_call, tool_instance=None): - if not tool_instance: - tool_instance = get_tool_instance(tool_call.func_name) - - if not tool_instance or not tool_instance.enable_cache: - return await original_execute(self, tool_call, tool_instance) - - try: - tool_file_path = inspect.getfile(tool_instance.__class__) - semantic_query = None - if tool_instance.semantic_cache_query_key: - semantic_query = tool_call.args.get(tool_instance.semantic_cache_query_key) - - cached_result = await tool_cache.get( - tool_name=tool_call.func_name, - function_args=tool_call.args, - tool_file_path=tool_file_path, - semantic_query=semantic_query - ) - if cached_result: - logger.info(f"{getattr(self, 'log_prefix', '')}使用缓存结果,跳过工具 {tool_call.func_name} 执行") - return cached_result - except Exception as e: - logger.error(f"{getattr(self, 'log_prefix', '')}检查工具缓存时出错: {e}") - - result = await original_execute(self, tool_call, tool_instance) - - try: - tool_file_path = inspect.getfile(tool_instance.__class__) - semantic_query = None - if tool_instance.semantic_cache_query_key: - semantic_query = tool_call.args.get(tool_instance.semantic_cache_query_key) - - await tool_cache.set( - tool_name=tool_call.func_name, - function_args=tool_call.args, - tool_file_path=tool_file_path, - data=result, - ttl=tool_instance.cache_ttl, - semantic_query=semantic_query - ) - except Exception as e: - logger.error(f"{getattr(self, 'log_prefix', '')}设置工具缓存时出错: {e}") - - return result - - ToolExecutor.execute_tool_call = wrapped_execute_tool_call \ No newline at end of file diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py index f2b8ce002..01859d257 100644 --- a/src/llm_models/utils_model.py +++ b/src/llm_models/utils_model.py @@ -310,82 +310,94 @@ class LLMRequest: message_builder.add_text_content(processed_prompt) messages = [message_builder.build()] tool_built = self._build_tool_options(tools) - - response = await self._execute_request( - api_provider=api_provider, - client=client, - request_type=RequestType.RESPONSE, - model_info=model_info, - message_list=messages, - tool_options=tool_built, - temperature=temperature, - max_tokens=max_tokens, - ) - content = response.content or "" - reasoning_content = response.reasoning_content or "" - tool_calls = response.tool_calls + # 针对当前模型的空回复/截断重试逻辑 + empty_retry_count = 0 + max_empty_retry = api_provider.max_retry + empty_retry_interval = api_provider.retry_interval - if not reasoning_content and content: - content, extracted_reasoning = self._extract_reasoning(content) - reasoning_content = extracted_reasoning - - is_empty_reply = not tool_calls and (not content or content.strip() == "") - is_truncated = False - if use_anti_truncation: - if content.endswith("[done]"): - content = content[:-6].strip() - else: - is_truncated = True - - if is_empty_reply or is_truncated: - # 空回复或截断不进行模型切换,仅记录错误后抛出或返回 - reason = "空回复" if is_empty_reply else "截断" - msg = f"模型 '{model_name}' 生成了{reason}的回复" - logger.error(msg) - if raise_when_empty: - raise RuntimeError(msg) - return msg, (reasoning_content, model_name, tool_calls) - - - # 成功获取响应 - if usage := response.usage: - llm_usage_recorder.record_usage_to_database( - model_info=model_info, model_usage=usage, time_cost=time.time() - start_time, - user_id="system", request_type=self.request_type, endpoint="/chat/completions", + while empty_retry_count <= max_empty_retry: + response = await self._execute_request( + api_provider=api_provider, + client=client, + request_type=RequestType.RESPONSE, + model_info=model_info, + message_list=messages, + tool_options=tool_built, + temperature=temperature, + max_tokens=max_tokens, ) - if not content and not tool_calls: - if raise_when_empty: - raise RuntimeError("生成空回复") - content = "生成的响应为空" - - logger.info(f"模型 '{model_name}' 成功生成回复。") - return content, (reasoning_content, model_name, tool_calls) + content = response.content or "" + reasoning_content = response.reasoning_content or "" + tool_calls = response.tool_calls + + if not reasoning_content and content: + content, extracted_reasoning = self._extract_reasoning(content) + reasoning_content = extracted_reasoning + + is_empty_reply = not tool_calls and (not content or content.strip() == "") + is_truncated = False + if use_anti_truncation: + if content.endswith("[done]"): + content = content[:-6].strip() + else: + is_truncated = True + + if is_empty_reply or is_truncated: + empty_retry_count += 1 + if empty_retry_count <= max_empty_retry: + reason = "空回复" if is_empty_reply else "截断" + logger.warning(f"模型 '{model_name}' 检测到{reason},正在进行第 {empty_retry_count}/{max_empty_retry} 次重新生成...") + if empty_retry_interval > 0: + await asyncio.sleep(empty_retry_interval) + continue # 继续使用当前模型重试 + else: + # 当前模型重试次数用尽,跳出内层循环,触发外层循环切换模型 + reason = "空回复" if is_empty_reply else "截断" + logger.error(f"模型 '{model_name}' 经过 {max_empty_retry} 次重试后仍然是{reason}的回复。") + raise RuntimeError(f"模型 '{model_name}' 达到最大空回复/截断重试次数") + + # 成功获取响应 + if usage := response.usage: + llm_usage_recorder.record_usage_to_database( + model_info=model_info, model_usage=usage, time_cost=time.time() - start_time, + user_id="system", request_type=self.request_type, endpoint="/chat/completions", + ) + + if not content and not tool_calls: + if raise_when_empty: + raise RuntimeError("生成空回复") + content = "生成的响应为空" + + logger.info(f"模型 '{model_name}' 成功生成回复。") + return content, (reasoning_content, model_name, tool_calls) except RespNotOkException as e: if e.status_code in [401, 403]: logger.error(f"模型 '{model_name}' 遇到认证/权限错误 (Code: {e.status_code}),将尝试下一个模型。") failed_models.add(model_name) last_exception = e - continue + continue # 切换到下一个模型 else: - # 对于其他HTTP错误,不切换模型,直接抛出 logger.error(f"模型 '{model_name}' 请求失败,HTTP状态码: {e.status_code}") - last_exception = e if raise_when_empty: raise - break + # 对于其他HTTP错误,直接抛出,不再尝试其他模型 + return f"请求失败: {e}", ("", model_name, None) + except RuntimeError as e: + # 捕获所有重试失败(包括空回复和网络问题) logger.error(f"模型 '{model_name}' 在所有重试后仍然失败: {e},将尝试下一个模型。") failed_models.add(model_name) last_exception = e - continue + continue # 切换到下一个模型 + except Exception as e: logger.error(f"使用模型 '{model_name}' 时发生未知异常: {e}") failed_models.add(model_name) last_exception = e - continue + continue # 切换到下一个模型 # 所有模型都尝试失败 logger.error("所有可用模型都已尝试失败。") diff --git a/src/plugin_system/apis/tool_api.py b/src/plugin_system/apis/tool_api.py index da17f9305..60b9f17de 100644 --- a/src/plugin_system/apis/tool_api.py +++ b/src/plugin_system/apis/tool_api.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Type +from typing import Optional, Type from src.plugin_system.base.base_tool import BaseTool from src.plugin_system.base.component_types import ComponentType diff --git a/src/plugin_system/base/base_plugin.py b/src/plugin_system/base/base_plugin.py index 57f131ba1..8916fadfd 100644 --- a/src/plugin_system/base/base_plugin.py +++ b/src/plugin_system/base/base_plugin.py @@ -1,5 +1,5 @@ from abc import abstractmethod -from typing import List, Type, Tuple, Union, TYPE_CHECKING +from typing import List, Type, Tuple, Union from .plugin_base import PluginBase from src.common.logger import get_logger diff --git a/src/plugin_system/base/plus_command.py b/src/plugin_system/base/plus_command.py index 16af685a1..1e68a2276 100644 --- a/src/plugin_system/base/plus_command.py +++ b/src/plugin_system/base/plus_command.py @@ -4,7 +4,7 @@ """ from abc import ABC, abstractmethod -from typing import Dict, Tuple, Optional, List +from typing import Tuple, Optional, List import re from src.common.logger import get_logger diff --git a/src/plugin_system/core/tool_use.py b/src/plugin_system/core/tool_use.py index dee611c8c..098337ec2 100644 --- a/src/plugin_system/core/tool_use.py +++ b/src/plugin_system/core/tool_use.py @@ -7,8 +7,10 @@ from src.llm_models.utils_model import LLMRequest from src.llm_models.payload_content import ToolCall from src.config.config import global_config, model_config from src.chat.utils.prompt_builder import Prompt, global_prompt_manager +import inspect from src.chat.message_receive.chat_stream import get_chat_manager from src.common.logger import get_logger +from src.common.cache_manager import tool_cache logger = get_logger("tool_use") @@ -184,28 +186,71 @@ class ToolExecutor: return tool_results, used_tools async def execute_tool_call(self, tool_call: ToolCall, tool_instance: Optional[BaseTool] = None) -> Optional[Dict[str, Any]]: - # sourcery skip: use-assigned-variable - """执行单个工具调用 + """执行单个工具调用,并处理缓存""" + + function_args = tool_call.args or {} + tool_instance = tool_instance or get_tool_instance(tool_call.func_name) - Args: - tool_call: 工具调用对象 + # 如果工具不存在或未启用缓存,则直接执行 + if not tool_instance or not tool_instance.enable_cache: + return await self._original_execute_tool_call(tool_call, tool_instance) - Returns: - Optional[Dict]: 工具调用结果,如果失败则返回None - """ + # --- 缓存逻辑开始 --- + try: + tool_file_path = inspect.getfile(tool_instance.__class__) + semantic_query = None + if tool_instance.semantic_cache_query_key: + semantic_query = function_args.get(tool_instance.semantic_cache_query_key) + + cached_result = await tool_cache.get( + tool_name=tool_call.func_name, + function_args=function_args, + tool_file_path=tool_file_path, + semantic_query=semantic_query + ) + if cached_result: + logger.info(f"{self.log_prefix}使用缓存结果,跳过工具 {tool_call.func_name} 执行") + return cached_result + except Exception as e: + logger.error(f"{self.log_prefix}检查工具缓存时出错: {e}") + + # 缓存未命中,执行原始工具调用 + result = await self._original_execute_tool_call(tool_call, tool_instance) + + # 将结果存入缓存 + try: + tool_file_path = inspect.getfile(tool_instance.__class__) + semantic_query = None + if tool_instance.semantic_cache_query_key: + semantic_query = function_args.get(tool_instance.semantic_cache_query_key) + + await tool_cache.set( + tool_name=tool_call.func_name, + function_args=function_args, + tool_file_path=tool_file_path, + data=result, + ttl=tool_instance.cache_ttl, + semantic_query=semantic_query + ) + except Exception as e: + logger.error(f"{self.log_prefix}设置工具缓存时出错: {e}") + # --- 缓存逻辑结束 --- + + return result + + async def _original_execute_tool_call(self, tool_call: ToolCall, tool_instance: Optional[BaseTool] = None) -> Optional[Dict[str, Any]]: + """执行单个工具调用的原始逻辑""" try: function_name = tool_call.func_name function_args = tool_call.args or {} - logger.info(f"🤖 {self.log_prefix} 正在执行工具: [bold green]{function_name}[/bold green] | 参数: {function_args}") - function_args["llm_called"] = True # 标记为LLM调用 + logger.info(f"{self.log_prefix} 正在执行工具: [bold green]{function_name}[/bold green] | 参数: {function_args}") + function_args["llm_called"] = True - # 获取对应工具实例 tool_instance = tool_instance or get_tool_instance(function_name) if not tool_instance: logger.warning(f"未知工具名称: {function_name}") return None - # 执行工具并记录日志 logger.debug(f"{self.log_prefix}执行工具 {function_name},参数: {function_args}") result = await tool_instance.execute(function_args) if result: diff --git a/src/plugins/built_in/maizone_refactored/services/content_service.py b/src/plugins/built_in/maizone_refactored/services/content_service.py index 7a98a7cdc..cda1fa714 100644 --- a/src/plugins/built_in/maizone_refactored/services/content_service.py +++ b/src/plugins/built_in/maizone_refactored/services/content_service.py @@ -9,12 +9,9 @@ import datetime import base64 import aiohttp from src.common.logger import get_logger -import base64 -import aiohttp import imghdr import asyncio -from src.common.logger import get_logger -from src.plugin_system.apis import llm_api, config_api, generator_api, person_api +from src.plugin_system.apis import llm_api, config_api, generator_api from src.chat.message_receive.chat_stream import get_chat_manager from maim_message import UserInfo from src.llm_models.utils_model import LLMRequest diff --git a/src/plugins/built_in/permission_management/plugin.py b/src/plugins/built_in/permission_management/plugin.py index d8a39107a..bad227787 100644 --- a/src/plugins/built_in/permission_management/plugin.py +++ b/src/plugins/built_in/permission_management/plugin.py @@ -16,7 +16,7 @@ from src.plugin_system.apis.permission_api import permission_api from src.plugin_system.apis.logging_api import get_logger from src.plugin_system.base.component_types import PlusCommandInfo, ChatType from src.plugin_system.base.config_types import ConfigField -from src.plugin_system.utils.permission_decorators import require_permission, require_master, PermissionChecker +from src.plugin_system.utils.permission_decorators import require_permission logger = get_logger("Permission") diff --git a/src/schedule/schedule_manager.py b/src/schedule/schedule_manager.py index 82578046d..84b87c657 100644 --- a/src/schedule/schedule_manager.py +++ b/src/schedule/schedule_manager.py @@ -411,7 +411,6 @@ class ScheduleManager: 通过关键词匹配、唤醒度、睡眠压力等综合判断是否处于休眠时间。 新增弹性睡眠机制,允许在压力低时延迟入睡,并在入睡前发送通知。 """ - from src.chat.chat_loop.wakeup_manager import WakeUpManager # --- 基础检查 --- if not global_config.schedule.enable_is_sleep: return False