Merge branch 'master' of https://github.com/MoFox-Studio/MoFox_Bot

2025-08-28 21:04:05 +08:00
parent 3d958b9e05 78e78e71ba
commit ad3e6763e1
27 changed files with 436 additions and 609 deletions
--- a/docs/architecture/PERMISSION_SYSTEM.md
+++ b/docs/architecture/PERMISSION_SYSTEM.md
--- a/docs/architecture/memory_system_design_v3.md
+++ b/docs/architecture/memory_system_design_v3.md
--- a/docs/plugins/image/quick-start/1750326700269.png
+++ b/docs/plugins/image/quick-start/1750326700269.png
--- a/docs/plugins/image/quick-start/1750332508760.png
+++ b/docs/plugins/image/quick-start/1750332508760.png
--- a/docs/assets/image-1.png
+++ b/docs/assets/image-1.png
--- a/docs/assets/image.png
+++ b/docs/assets/image.png
--- a/docs/development/CONTRIBUTE.md
+++ b/docs/development/CONTRIBUTE.md
--- a/docs/guides/model_configuration_guide.md
+++ b/docs/guides/model_configuration_guide.md
@@ -43,12 +43,11 @@ retry_interval = 10                     # 重试间隔（秒）
 | `name` | ✅ | 服务商名称，需要在模型配置中引用 | - |
 | `base_url` | ✅ | API服务的基础URL | - |
 | `api_key` | ✅ | API密钥，请替换为实际密钥 | - |
-| `client_type` | ❌ | 客户端类型：`openai`（OpenAI格式）或 `gemini`（Gemini格式，现在支持不良好） | `openai` |
+| `client_type` | ❌ | 客户端类型：`openai`、`gemini` 或 `aiohttp_gemini` | `openai` |
 | `max_retry` | ❌ | API调用失败时的最大重试次数 | 2 |
 | `timeout` | ❌ | API请求超时时间（秒） | 30 |
 | `retry_interval` | ❌ | 重试间隔时间（秒） | 10 |

-**请注意，对于`client_type`为`gemini`的模型，`base_url`字段无效。**
 ### 2.3 支持的服务商示例

 #### DeepSeek
@@ -73,9 +72,9 @@ client_type = "openai"
 ```toml
 [[api_providers]]
 name = "Google"
-base_url = "https://api.google.com/v1"
+base_url = "https://generativelanguage.googleapis.com/v1beta" # 在MoFox-Bot中, 使用aiohttp_gemini客户端的提供商可以自定义base_url
 api_key = "your-google-api-key"
-client_type = "gemini"  # 注意：Gemini需要使用特殊客户端
+client_type = "aiohttp_gemini"  # 注意：Gemini需要使用特殊客户端
 ```

 ## 3. 模型配置
@@ -118,11 +117,11 @@ enable_thinking = false # 禁用思考

 比如上面就是参考SiliconFlow的文档配置配置的`Qwen3`禁用思考参数。

-![SiliconFlow文档截图](image-1.png)
+![SiliconFlow文档截图](../assets/image-1.png)

 以豆包文档为另一个例子

-![豆包文档截图](image.png)
+![豆包文档截图](../assets/image.png)

 得到豆包`"doubao-seed-1-6-250615"`的禁用思考配置方法为
 ```toml
@@ -133,7 +132,6 @@ thinking = {type = "disabled"} # 禁用思考
 ```
 请注意，`extra_params` 的配置应该构成一个合法的TOML字典结构，具体内容取决于API服务商的要求。

-**请注意，对于`client_type`为`gemini`的模型，此字段无效。**
 ### 3.3 配置参数说明

 | 参数 | 必填 | 说明 |
@@ -145,6 +143,7 @@ thinking = {type = "disabled"} # 禁用思考
 | `price_out` | ❌ | 输出价格（元/M token），用于成本统计 |
 | `force_stream_mode` | ❌ | 是否强制使用流式输出 |
 | `extra_params` | ❌ | 额外的模型参数配置 |
+| `anti_truncation` | ❌ | 是否启用反截断功能 |

 ## 4. 模型任务配置

@@ -184,7 +183,7 @@ max_tokens = 800
 ```

 ### planner - 决策模型
-负责决定MaiBot该做什么：
+负责决定MoFox_Bot该做什么：
 ```toml
 [model_task_config.planner]
 model_list = ["siliconflow-deepseek-v3"]
@@ -193,7 +192,7 @@ max_tokens = 800
 ```

 ### emotion - 情绪模型
-负责MaiBot的情绪变化：
+负责MoFox_Bot的情绪变化：
 ```toml
 [model_task_config.emotion]
 model_list = ["siliconflow-deepseek-v3"]
@@ -262,6 +261,44 @@ temperature = 0.7
 max_tokens = 800
 ```

+### schedule_generator - 日程生成模型
+```toml
+[model_task_config.schedule_generator]
+model_list = ["deepseek-v3"]
+temperature = 0.5
+max_tokens = 1024
+```
+
+### monthly_plan_generator - 月度计划生成模型
+```toml
+[model_task_config.monthly_plan_generator]
+model_list = ["deepseek-v3"]
+temperature = 0.7
+max_tokens = 1024
+```
+
+### emoji_vlm - 表情包VLM模型
+```toml
+[model_task_config.emoji_vlm]
+model_list = ["qwen-vl-max"]
+max_tokens = 800
+```
+
+### anti_injection - 反注入模型
+```toml
+[model_task_config.anti_injection]
+model_list = ["deepseek-v3"]
+temperature = 0.1
+max_tokens = 512
+```
+
+### utils_video - 视频分析模型
+```toml
+[model_task_config.utils_video]
+model_list = ["qwen-vl-max"]
+max_tokens = 800
+```
+
 ## 5. 配置建议

 ### 5.1 Temperature 参数选择
@@ -276,7 +313,7 @@ max_tokens = 800

 | 任务类型 | 推荐模型类型 | 示例 |
 |----------|--------------|------|
-| 高精度任务 | 大模型 | DeepSeek-V3, GPT-4 |
+| 高精度任务 | 大模型 | DeepSeek-V3, GPT-5,Gemini-2.5-Pro |
 | 高频率任务 | 小模型 | Qwen3-8B |
 | 多模态任务 | 专用模型 | Qwen2.5-VL, SenseVoice |
 | 工具调用 | 支持Function Call的模型 | Qwen3-14B |
@@ -285,7 +322,6 @@ max_tokens = 800

 1. **分层使用**：核心功能使用高质量模型，辅助功能使用经济模型
 2. **合理配置max_tokens**：根据实际需求设置，避免浪费
-3. **选择免费模型**：对于测试环境，优先使用price为0的模型

 ## 6. 配置验证

--- a/docs/guides/vector_db_usage_guide.md
+++ b/docs/guides/vector_db_usage_guide.md
--- a/docs/integrations/Bing.md
+++ b/docs/integrations/Bing.md
--- a/docs/plugins/PLUS_COMMAND_GUIDE.md
+++ b/docs/plugins/PLUS_COMMAND_GUIDE.md
--- a/docs/plugins/command-components.md
+++ b/docs/plugins/command-components.md
@@ -1,89 +0,0 @@
-# 💻 Command组件详解
-
-## 📖 什么是Command
-
-Command是直接响应用户明确指令的组件，与Action不同，Command是**被动触发**的，当用户输入特定格式的命令时立即执行。
-
-Command通过正则表达式匹配用户输入，提供确定性的功能服务。
-
-### 🎯 Command的特点
-
- 🎯 **确定性执行**：匹配到命令立即执行，无随机性
- ⚡ **即时响应**：用户主动触发，快速响应
- 🔍 **正则匹配**：通过正则表达式精确匹配用户输入
- 🛑 **拦截控制**：可以控制是否阻止消息继续处理
- 📝 **参数解析**：支持从用户输入中提取参数
-
---
-
-## 🛠️ Command组件的基本结构
-
-首先，Command组件需要继承自`BaseCommand`类，并实现必要的方法。
-
-```python
-class ExampleCommand(BaseCommand):
-    command_name = "example" # 命令名称，作为唯一标识符
-    command_description = "这是一个示例命令" # 命令描述
-    command_pattern = r"" # 命令匹配的正则表达式
-
-    async def execute(self) -> Tuple[bool, Optional[str], bool]:
-        """
-        执行Command的主要逻辑
-
-        Returns:
-            Tuple[bool, str, bool]: 
-                - 第一个bool表示是否成功执行
-                - 第二个str是执行结果消息
-                - 第三个bool表示是否需要阻止消息继续处理
-        """
-        # ---- 执行命令的逻辑 ----
-        return True, "执行成功", False
-```
-**`command_pattern`**: 该Command匹配的正则表达式，用于精确匹配用户输入。
-
-请注意：如果希望能获取到命令中的参数，请在正则表达式中使用有命名的捕获组，例如`(?P<param_name>pattern)`。
-
-这样在匹配时，内部实现可以使用`re.match.groupdict()`方法获取到所有捕获组的参数，并以字典的形式存储在`self.matched_groups`中。
-
-### 匹配样例
-假设我们有一个命令`/example param1=value1 param2=value2`，对应的正则表达式可以是：
-
-```python
-class ExampleCommand(BaseCommand):
-    command_name = "example"
-    command_description = "这是一个示例命令"
-    command_pattern = r"/example (?P<param1>\w+) (?P<param2>\w+)"
-
-    async def execute(self) -> Tuple[bool, Optional[str], bool]:
-        # 获取匹配的参数
-        param1 = self.matched_groups.get("param1")
-        param2 = self.matched_groups.get("param2")
-        
-        # 执行逻辑
-        return True, f"参数1: {param1}, 参数2: {param2}", False
-```
-
---
-
-## Command 内置方法说明
-```python
-class BaseCommand:
-    def get_config(self, key: str, default=None):
-        """获取插件配置值，使用嵌套键访问"""
-
-    async def send_text(self, content: str, reply_to: str = "") -> bool:
-        """发送回复消息"""
-
-    async def send_type(self, message_type: str, content: str, display_message: str = "", typing: bool = False, reply_to: str = "") -> bool:
-        """发送指定类型的回复消息到当前聊天环境"""
-
-    async def send_command(self, command_name: str, args: Optional[dict] = None, display_message: str = "", storage_message: bool = True) -> bool:
-        """发送命令消息"""
-
-    async def send_emoji(self, emoji_base64: str) -> bool:
-        """发送表情包"""
-
-    async def send_image(self, image_base64: str) -> bool:
-        """发送图片"""
-```
-具体参数与用法参见`BaseCommand`基类的定义。
--- a/docs/plugins/index.md
+++ b/docs/plugins/index.md
@@ -9,7 +9,7 @@
 ## 组件功能详解

 - [🧱 Action组件详解](action-components.md) - 掌握最核心的Action组件
- [💻 Command组件详解](command-components.md) - 学习直接响应命令的组件
+- [💻 Command组件详解](PLUS_COMMAND_GUIDE.md) - 学习直接响应命令的组件
 - [🔧 Tool组件详解](tool-components.md) - 了解如何扩展信息获取能力
 - [⚙️ 配置文件系统指南](configuration-guide.md) - 学会使用自动生成的插件配置文件
 - [📄 Manifest系统指南](manifest-guide.md) - 了解插件元数据管理和配置架构
--- a/docs/plugins/quick-start.md
+++ b/docs/plugins/quick-start.md
@@ -90,7 +90,7 @@ class HelloWorldPlugin(BasePlugin):

 在日志中你应该能看到插件被加载的信息。虽然插件还没有任何功能，但它已经成功运行了！

-![1750326700269](image/quick-start/1750326700269.png)
+![1750326700269](../assets/1750326700269.png)

 ### 5. 添加第一个功能：问候Action

@@ -180,7 +180,7 @@ MoFox_Bot可能会选择使用你的问候Action，发送回复：
 嗨！很开心见到你！😊
 ```

-![1750332508760](image/quick-start/1750332508760.png)
+![1750332508760](../assets/1750332508760.png)

 > **💡 小提示**：MoFox_Bot会智能地决定什么时候使用它。如果没有立即看到效果，多试几次不同的消息。

--- a/docs/plugins/tool_caching_guide.md
+++ b/docs/plugins/tool_caching_guide.md
@@ -1,124 +0,0 @@
-# 自动化工具缓存系统使用指南
-
-为了提升性能并减少不必要的重复计算或API调用，MMC内置了一套强大且易于使用的自动化工具缓存系统。该系统同时支持传统的**精确缓存**和先进的**语义缓存**。工具开发者无需编写任何手动缓存逻辑，只需在工具类中设置几个属性，即可轻松启用和配置缓存行为。
-
-## 核心概念
-
- **精确缓存 (KV Cache)**: 当一个工具被调用时，系统会根据工具名称和所有参数生成一个唯一的键。只有当**下一次调用的工具名和所有参数与之前完全一致**时，才会命中缓存。
- **语义缓存 (Vector Cache)**: 它不要求参数完全一致，而是理解参数的**语义和意图**。例如，`"查询深圳今天的天气"` 和 `"今天深圳天气怎么样"` 这两个不同的查询，在语义上是高度相似的。如果启用了语义缓存，第二个查询就能成功命中由第一个查询产生的缓存结果。
-
-## 如何为你的工具启用缓存
-
-为你的工具（必须继承自 `BaseTool`）启用缓存非常简单，只需在你的工具类定义中添加以下一个或多个属性即可：
-
-### 1. `enable_cache: bool`
-
-这是启用缓存的总开关。
-
- **类型**: `bool`
- **默认值**: `False`
- **作用**: 设置为 `True` 即可为该工具启用缓存功能。如果为 `False`，后续的所有缓存配置都将无效。
-
-**示例**:
-```python
-class MyAwesomeTool(BaseTool):
-    # ... 其他定义 ...
-    enable_cache: bool = True
-```
-
-### 2. `cache_ttl: int`
-
-设置缓存的生存时间（Time-To-Live）。
-
- **类型**: `int`
- **单位**: 秒
- **默认值**: `3600` (1小时)
- **作用**: 定义缓存条目在被视为过期之前可以存活多长时间。
-
-**示例**:
-```python
-class MyLongTermCacheTool(BaseTool):
-    # ... 其他定义 ...
-    enable_cache: bool = True
-    cache_ttl: int = 86400  # 缓存24小时
-```
-
-### 3. `semantic_cache_query_key: Optional[str]`
-
-启用语义缓存的关键。
-
- **类型**: `Optional[str]`
- **默认值**: `None`
- **作用**:
-    - 将此属性的值设置为你工具的某个**参数的名称**（字符串）。
-    - 自动化缓存系统在工作时，会提取该参数的值，将其转换为向量，并进行语义相似度搜索。
-    - 如果该值为 `None`，则此工具**仅使用精确缓存**。
-
-**示例**:
-```python
-class WebSurfingTool(BaseTool):
-    name: str = "web_search"
-    parameters = [
-        ("query", ToolParamType.STRING, "要搜索的关键词或问题。", True, None),
-        # ... 其他参数 ...
-    ]
-    
-    # --- 缓存配置 ---
-    enable_cache: bool = True
-    cache_ttl: int = 7200  # 缓存2小时
-    semantic_cache_query_key: str = "query" # <-- 关键！
-```
-在上面的例子中，`web_search` 工具的 `"query"` 参数值（例如，用户输入的搜索词）将被用于语义缓存搜索。
-
-## 完整示例
-
-假设我们有一个调用外部API来获取股票价格的工具。由于股价在短时间内相对稳定，且查询意图可能相似（如 "苹果股价" vs "AAPL股价"），因此非常适合使用缓存。
-
-```python
-# in your_plugin/tools/stock_checker.py
-
-from src.plugin_system import BaseTool, ToolParamType
-
-class StockCheckerTool(BaseTool):
-    """
-    一个用于查询股票价格的工具。
-    """
-    name: str = "get_stock_price"
-    description: str = "获取指定公司或股票代码的最新价格。"
-    available_for_llm: bool = True
-    parameters = [
-        ("symbol", ToolParamType.STRING, "公司名称或股票代码 (e.g., 'AAPL', '苹果')", True, None),
-    ]
-
-    # --- 缓存配置 ---
-    # 1. 开启缓存
-    enable_cache: bool = True
-    # 2. 股价信息缓存10分钟
-    cache_ttl: int = 600
-    # 3. 使用 "symbol" 参数进行语义搜索
-    semantic_cache_query_key: str = "symbol"
-    # --------------------
-
-    async def execute(self, function_args: dict[str, Any]) -> dict[str, Any]:
-        symbol = function_args.get("symbol")
-        
-        # ... 这里是你调用外部API获取股票价格的逻辑 ...
-        # price = await some_stock_api.get_price(symbol)
-        price = 123.45 # 示例价格
-        
-        return {
-            "type": "stock_price_result",
-            "content": f"{symbol} 的当前价格是 ${price}"
-        }
-
-```
-
-通过以上简单的三行配置，`StockCheckerTool` 现在就拥有了强大的自动化缓存能力：
-
- 当用户查询 `"苹果"` 时，工具会执行并缓存结果。
- 在接下来的10分钟内，如果再次查询 `"苹果"`，将直接从精确缓存返回结果。
- 更智能的是，如果另一个用户查询 `"AAPL"`，语义缓存系统会识别出 `"AAPL"` 和 `"苹果"` 在语义上高度相关，大概率也会直接返回缓存的结果，而无需再次调用API。
-
---
-
-现在，你可以专注于实现工具的核心逻辑，把缓存的复杂性交给MMC的自动化系统来处理。
--- a/docs/plugins/tool-components.md
+++ b/docs/plugins/tool-components.md
@@ -2,7 +2,7 @@

 ## 📖 什么是工具

-工具是MoFox_Bot的信息获取能力扩展组件。如果说Action组件功能五花八门，可以拓展麦麦能做的事情，那么Tool就是在某个过程中拓宽了麦麦能够获得的信息量。
+工具是MoFox_Bot的信息获取能力扩展组件。如果说Action组件功能五花八门，可以拓展麦麦能做的事情，那么Tool就是在某个过程中拓宽了MoFox_Bot能够获得的信息量。

 ### 🎯 工具的特点

@@ -191,8 +191,7 @@ class WeatherTool(BaseTool):
 name = "weather_query"        # 清晰表达功能
 name = "knowledge_search"     # 描述性强
 name = "stock_price_check"    # 功能明确
-```
-#### ❌ 避免的命名
+```#### ❌ 避免的命名
 ```python
 name = "tool1"               # 无意义
 name = "wq"                  # 过于简短
@@ -244,3 +243,130 @@ def _format_result(self, data):
 def _format_result(self, data):
    return str(data)  # 直接返回原始数据
 ```
+
+---
+
+# 自动化工具缓存系统使用指南
+
+为了提升性能并减少不必要的重复计算或API调用，MMC内置了一套强大且易于使用的自动化工具缓存系统。该系统同时支持传统的**精确缓存**和先进的**语义缓存**。工具开发者无需编写任何手动缓存逻辑，只需在工具类中设置几个属性，即可轻松启用和配置缓存行为。
+
+## 核心概念
+
+- **精确缓存 (KV Cache)**: 当一个工具被调用时，系统会根据工具名称和所有参数生成一个唯一的键。只有当**下一次调用的工具名和所有参数与之前完全一致**时，才会命中缓存。
+- **语义缓存 (Vector Cache)**: 它不要求参数完全一致，而是理解参数的**语义和意图**。例如，`"查询深圳今天的天气"` 和 `"今天深圳天气怎么样"` 这两个不同的查询，在语义上是高度相似的。如果启用了语义缓存，第二个查询就能成功命中由第一个查询产生的缓存结果。
+
+## 如何为你的工具启用缓存
+
+为你的工具（必须继承自 `BaseTool`）启用缓存非常简单，只需在你的工具类定义中添加以下一个或多个属性即可：
+
+### 1. `enable_cache: bool`
+
+这是启用缓存的总开关。
+
+- **类型**: `bool`
+- **默认值**: `False`
+- **作用**: 设置为 `True` 即可为该工具启用缓存功能。如果为 `False`，后续的所有缓存配置都将无效。
+
+**示例**:
+```python
+class MyAwesomeTool(BaseTool):
+    # ... 其他定义 ...
+    enable_cache: bool = True
+```
+
+### 2. `cache_ttl: int`
+
+设置缓存的生存时间（Time-To-Live）。
+
+- **类型**: `int`
+- **单位**: 秒
+- **默认值**: `3600` (1小时)
+- **作用**: 定义缓存条目在被视为过期之前可以存活多长时间。
+
+**示例**:
+```python
+class MyLongTermCacheTool(BaseTool):
+    # ... 其他定义 ...
+    enable_cache: bool = True
+    cache_ttl: int = 86400  # 缓存24小时
+```
+
+### 3. `semantic_cache_query_key: Optional[str]`
+
+启用语义缓存的关键。
+
+- **类型**: `Optional[str]`
+- **默认值**: `None`
+- **作用**:
+    - 将此属性的值设置为你工具的某个**参数的名称**（字符串）。
+    - 自动化缓存系统在工作时，会提取该参数的值，将其转换为向量，并进行语义相似度搜索。
+    - 如果该值为 `None`，则此工具**仅使用精确缓存**。
+
+**示例**:
+```python
+class WebSurfingTool(BaseTool):
+    name: str = "web_search"
+    parameters = [
+        ("query", ToolParamType.STRING, "要搜索的关键词或问题。", True, None),
+        # ... 其他参数 ...
+    ]
+    
+    # --- 缓存配置 ---
+    enable_cache: bool = True
+    cache_ttl: int = 7200  # 缓存2小时
+    semantic_cache_query_key: str = "query" # <-- 关键！
+```
+在上面的例子中，`web_search` 工具的 `"query"` 参数值（例如，用户输入的搜索词）将被用于语义缓存搜索。
+
+## 完整示例
+
+假设我们有一个调用外部API来获取股票价格的工具。由于股价在短时间内相对稳定，且查询意图可能相似（如 "苹果股价" vs "AAPL股价"），因此非常适合使用缓存。
+
+```python
+# in your_plugin/tools/stock_checker.py
+
+from src.plugin_system import BaseTool, ToolParamType
+
+class StockCheckerTool(BaseTool):
+    """
+    一个用于查询股票价格的工具。
+    """
+    name: str = "get_stock_price"
+    description: str = "获取指定公司或股票代码的最新价格。"
+    available_for_llm: bool = True
+    parameters = [
+        ("symbol", ToolParamType.STRING, "公司名称或股票代码 (e.g., 'AAPL', '苹果')", True, None),
+    ]
+
+    # --- 缓存配置 ---
+    # 1. 开启缓存
+    enable_cache: bool = True
+    # 2. 股价信息缓存10分钟
+    cache_ttl: int = 600
+    # 3. 使用 "symbol" 参数进行语义搜索
+    semantic_cache_query_key: str = "symbol"
+    # --------------------
+
+    async def execute(self, function_args: dict[str, Any]) -> dict[str, Any]:
+        symbol = function_args.get("symbol")
+        
+        # ... 这里是你调用外部API获取股票价格的逻辑 ...
+        # price = await some_stock_api.get_price(symbol)
+        price = 123.45 # 示例价格
+        
+        return {
+            "type": "stock_price_result",
+            "content": f"{symbol} 的当前价格是 ${price}"
+        }
+
+```
+
+通过以上简单的三行配置，`StockCheckerTool` 现在就拥有了强大的自动化缓存能力：
+
+- 当用户查询 `"苹果"` 时，工具会执行并缓存结果。
+- 在接下来的10分钟内，如果再次查询 `"苹果"`，将直接从精确缓存返回结果。
+- 更智能的是，如果另一个用户查询 `"AAPL"`，语义缓存系统会识别出 `"AAPL"` 和 `"苹果"` 在语义上高度相关，大概率也会直接返回缓存的结果，而无需再次调用API。
+
+---
+
+现在，你可以专注于实现工具的核心逻辑，把缓存的复杂性交给MMC的自动化系统来处理。
--- a/docs/schedule_enhancement
+++ b/docs/schedule_enhancement
@@ -1,121 +0,0 @@
-# “月层计划”系统架构设计文档
-
-## 1. 系统概述与目标
-
-本系统旨在为MoFox_Bot引入一个动态的、由大型语言模型（LLM）驱动的“月层计划”机制。其核心目标是取代静态、预设的任务模板，转而利用LLM在程序启动时自动生成符合Bot人设的、具有时效性的月度计划。这些计划将被存储、管理，并在构建每日日程时被动态抽取和使用，从而极大地丰富日程内容的个性和多样性。
-
---
-
-## 2. 核心设计原则
-
- **动态性与智能化:** 所有计划内容均由LLM实时生成，确保其独特性和创造性。
- **人设一致性:** 计划的生成将严格围绕Bot的核心人设进行，强化角色形象。
- **持久化与可管理:** 生成的计划将被存入专用数据库表，便于管理和追溯。
- **消耗性与随机性:** 计划在使用后有一定几率被消耗（删除），模拟真实世界中计划的完成与迭代。
-
---
-
-## 3. 系统核心流程规划
-
-本系统包含两大核心流程：**启动时的计划生成流程**和**日程构建时的计划使用流程**。
-
-### 3.1 流程一：启动时计划生成
-
-此流程在每次程序启动时触发，负责填充当月的计划池。
-
-```mermaid
-graph TD
-    A[程序启动] --> B{检查当月计划池};
-    B -- 计划数量低于阈值 --> C[构建LLM Prompt];
-    C -- prompt包含Bot人设、月份等信息 --> D[调用LLM服务];
-    D -- LLM返回多个计划文本 --> E[解析并格式化计划];
-    E -- 逐条处理 --> F[存入`monthly_plans`数据库表];
-    F --> G[完成启动任务];
-    B -- 计划数量充足 --> G;
-```
-
-### 3.2 流程二：日程构建时计划使用
-
-此流程在构建每日日程的提示词（Prompt）时触发。
-
-```mermaid
-graph TD
-    H[构建日程Prompt] --> I{查询数据库};
-    I -- 读取当月未使用的计划 --> J[随机抽取N个计划];
-    J --> K[将计划文本嵌入日程Prompt];
-    K --> L{随机数判断};
-    L -- 概率命中 --> M[将已抽取的计划标记为删除];
-    M --> N[完成Prompt构建];
-    L -- 概率未命中 --> N;
-```
-
---
-
-## 4. 数据库模型设计
-
-为支撑本系统，需要新增一个数据库表。
-
-**表名:** `monthly_plans`
-
-| 字段名 | 类型 | 描述 |
-| :--- | :--- | :--- |
-| `id` | Integer | 主键，自增。 |
-| `plan_text` | Text | 由LLM生成的计划内容原文。 |
-| `target_month` | String(7) | 计划所属的月份，格式为 "YYYY-MM"。 |
-| `is_deleted` | Boolean | 软删除标记，默认为 `false`。 |
-| `created_at` | DateTime | 记录创建时间。 |
-
---
-
-## 5. 详细模块规划
-
-### 5.1 LLM Prompt生成模块
-
- **职责:** 构建高质量的Prompt以引导LLM生成符合要求的计划。
- **输入:** Bot人设描述、当前月份、期望生成的计划数量。
- **输出:** 一个结构化的Prompt字符串。
- **Prompt示例:**
-  ```
-  你是一个[此处填入Bot人设描述，例如：活泼开朗、偶尔有些小迷糊的虚拟助手]。
-  请为即将到来的[YYYY年MM月]设计[N]个符合你身份的月度计划或目标。
-  
-  要求：
-  1. 每个计划都是独立的、积极向上的。
-  2. 语言风格要自然、口语化，符合你的性格。
-  3. 每个计划用一句话或两句话简短描述。
-  4. 以JSON格式返回，格式为：{"plans": ["计划一", "计划二", ...]}
-  ```
-
-### 5.2 数据库交互模块
-
- **职责:** 提供对 `monthly_plans` 表的增、删、改、查接口。
- **规划函数列表:**
-  - `add_new_plans(plans: list[str], month: str)`: 批量添加新生成的计划。
-  - `get_active_plans_for_month(month: str) -> list`: 获取指定月份所有未被删除的计划。
-  - `soft_delete_plans(plan_ids: list[int])`: 将指定ID的计划标记为软删除。
-
-### 5.3 配置项规划
-
-需要在主配置文件 `config/bot_config.toml` 中添加以下配置项，以控制系统行为。
-
-```toml
-# ----------------------------------------------------------------
-# 月层计划系统设置 (Monthly Plan System Settings)
-# ----------------------------------------------------------------
-[monthly_plan_system]
-
-# 是否启用本功能
-enable = true
-
-# 启动时，如果当月计划少于此数量，则触发LLM生成
-generation_threshold = 10
-
-# 每次调用LLM期望生成的计划数量
-plans_per_generation = 5
-
-# 计划被使用后，被删除的概率 (0.0 到 1.0)
-deletion_probability_on_use = 0.5
-```
-
---
-**文档结束。** 本文档纯粹为架构规划，旨在提供清晰的设计思路和开发指引，不包含任何实现代码。
--- a/src/chat/chat_loop/heartFC_chat.py
+++ b/src/chat/chat_loop/heartFC_chat.py
@@ -10,7 +10,6 @@ from src.chat.express.expression_learner import expression_learner_manager
 from src.plugin_system.base.component_types import ChatMode
 from src.schedule.schedule_manager import schedule_manager
 from src.plugin_system.apis import message_api
-from src.mood.mood_manager import mood_manager

 from .hfc_context import HfcContext
 from .energy_manager import EnergyManager
--- a/src/common/cache_manager.py
+++ b/src/common/cache_manager.py
@@ -4,7 +4,7 @@ import hashlib
 from pathlib import Path
 import numpy as np
 import faiss
-from typing import Any, Dict, Optional, Union, List
+from typing import Any, Dict, Optional, Union
 from src.common.logger import get_logger
 from src.llm_models.utils_model import LLMRequest
 from src.config.config import global_config, model_config
@@ -14,6 +14,7 @@ from src.common.vector_db import vector_db_service

 logger = get_logger("cache_manager")

+
 class CacheManager:
    """
    一个支持分层和语义缓存的通用工具缓存管理器。
@@ -21,6 +22,7 @@ class CacheManager:
    L1缓存: 内存字典 (KV) + FAISS (Vector)。
    L2缓存: 数据库 (KV) + ChromaDB (Vector)。
    """
+
    _instance = None

    def __new__(cls, *args, **kwargs):
@@ -32,7 +34,7 @@ class CacheManager:
        """
        初始化缓存管理器。
        """
-        if not hasattr(self, '_initialized'):
+        if not hasattr(self, "_initialized"):
            self.default_ttl = default_ttl
            self.semantic_cache_collection_name = "semantic_cache"

@@ -41,7 +43,7 @@ class CacheManager:
            embedding_dim = global_config.lpmm_knowledge.embedding_dimension
            self.l1_vector_index = faiss.IndexFlatIP(embedding_dim)
            self.l1_vector_id_to_key: Dict[int, str] = {}
-            
+
            # L2 向量缓存 (使用新的服务)
            vector_db_service.get_or_create_collection(self.semantic_cache_collection_name)

@@ -58,32 +60,32 @@ class CacheManager:
        try:
            if embedding_result is None:
                return None
-                
+
            # 确保embedding_result是一维数组或列表
            if isinstance(embedding_result, (list, tuple, np.ndarray)):
                # 转换为numpy数组进行处理
                embedding_array = np.array(embedding_result)
-                
+
                # 如果是多维数组，展平它
                if embedding_array.ndim > 1:
                    embedding_array = embedding_array.flatten()
-                
+
                # 检查维度是否符合预期
                expected_dim = global_config.lpmm_knowledge.embedding_dimension
                if embedding_array.shape[0] != expected_dim:
                    logger.warning(f"嵌入向量维度不匹配: 期望 {expected_dim}, 实际 {embedding_array.shape[0]}")
                    return None
-                
+
                # 检查是否包含有效的数值
                if np.isnan(embedding_array).any() or np.isinf(embedding_array).any():
                    logger.warning("嵌入向量包含无效的数值 (NaN 或 Inf)")
                    return None
-                
-                return embedding_array.astype('float32')
+
+                return embedding_array.astype("float32")
            else:
                logger.warning(f"嵌入结果格式不支持: {type(embedding_result)}")
                return None
-                
+
        except Exception as e:
            logger.error(f"验证嵌入向量时发生错误: {e}")
            return None
@@ -102,14 +104,20 @@ class CacheManager:
        except (OSError, TypeError) as e:
            file_hash = "unknown"
            logger.warning(f"无法获取文件信息: {tool_file_path}，错误: {e}")
-        
+
        try:
-            sorted_args = orjson.dumps(function_args, option=orjson.OPT_SORT_KEYS).decode('utf-8')
+            sorted_args = orjson.dumps(function_args, option=orjson.OPT_SORT_KEYS).decode("utf-8")
        except TypeError:
            sorted_args = repr(sorted(function_args.items()))
        return f"{tool_name}::{sorted_args}::{file_hash}"

-    async def get(self, tool_name: str, function_args: Dict[str, Any], tool_file_path: Union[str, Path], semantic_query: Optional[str] = None) -> Optional[Any]:
+    async def get(
+        self,
+        tool_name: str,
+        function_args: Dict[str, Any],
+        tool_file_path: Union[str, Path],
+        semantic_query: Optional[str] = None,
+    ) -> Optional[Any]:
        """
        从缓存获取结果，查询顺序: L1-KV -> L1-Vector -> L2-KV -> L2-Vector。
        """
@@ -136,13 +144,13 @@ class CacheManager:
                embedding_vector = embedding_result[0] if isinstance(embedding_result, tuple) else embedding_result
                validated_embedding = self._validate_embedding(embedding_vector)
                if validated_embedding is not None:
-                    query_embedding = np.array([validated_embedding], dtype='float32')
+                    query_embedding = np.array([validated_embedding], dtype="float32")

        # 步骤 2a: L1 语义缓存 (FAISS)
        if query_embedding is not None and self.l1_vector_index.ntotal > 0:
            faiss.normalize_L2(query_embedding)
-            distances, indices = self.l1_vector_index.search(query_embedding, 1) # type: ignore
-            if indices.size > 0 and distances[0][0] > 0.75: # IP 越大越相似
+            distances, indices = self.l1_vector_index.search(query_embedding, 1)  # type: ignore
+            if indices.size > 0 and distances[0][0] > 0.75:  # IP 越大越相似
                hit_index = indices[0][0]
                l1_hit_key = self.l1_vector_id_to_key.get(hit_index)
                if l1_hit_key and l1_hit_key in self.l1_kv_cache:
@@ -151,12 +159,9 @@ class CacheManager:

        # 步骤 2b: L2 精确缓存 (数据库)
        cache_results_obj = await db_query(
-            model_class=CacheEntries,
-            query_type="get",
-            filters={"cache_key": key},
-            single_result=True
+            model_class=CacheEntries, query_type="get", filters={"cache_key": key}, single_result=True
        )
-        
+
        if cache_results_obj:
            # 使用 getattr 安全访问属性，避免 Pylance 类型检查错误
            expires_at = getattr(cache_results_obj, "expires_at", 0)
@@ -164,7 +169,7 @@ class CacheManager:
                logger.info(f"命中L2键值缓存: {key}")
                cache_value = getattr(cache_results_obj, "cache_value", "{}")
                data = orjson.loads(cache_value)
-                
+
                # 更新访问统计
                await db_query(
                    model_class=CacheEntries,
@@ -172,20 +177,16 @@ class CacheManager:
                    filters={"cache_key": key},
                    data={
                        "last_accessed": time.time(),
-                        "access_count": getattr(cache_results_obj, "access_count", 0) + 1
-                    }
+                        "access_count": getattr(cache_results_obj, "access_count", 0) + 1,
+                    },
                )
-                
+
                # 回填 L1
                self.l1_kv_cache[key] = {"data": data, "expires_at": expires_at}
                return data
            else:
                # 删除过期的缓存条目
-                await db_query(
-                    model_class=CacheEntries,
-                    query_type="delete",
-                    filters={"cache_key": key}
-                )
+                await db_query(model_class=CacheEntries, query_type="delete", filters={"cache_key": key})

        # 步骤 2c: L2 语义缓存 (VectorDB Service)
        if query_embedding is not None:
@@ -193,31 +194,33 @@ class CacheManager:
                results = vector_db_service.query(
                    collection_name=self.semantic_cache_collection_name,
                    query_embeddings=query_embedding.tolist(),
-                    n_results=1
+                    n_results=1,
                )
-                if results and results.get('ids') and results['ids'][0]:
-                    distance = results['distances'][0][0] if results.get('distances') and results['distances'][0] else 'N/A'
+                if results and results.get("ids") and results["ids"][0]:
+                    distance = (
+                        results["distances"][0][0] if results.get("distances") and results["distances"][0] else "N/A"
+                    )
                    logger.debug(f"L2语义搜索找到最相似的结果: id={results['ids'][0]}, 距离={distance}")
-                    
-                    if distance != 'N/A' and distance < 0.75:
-                        l2_hit_key = results['ids'][0][0] if isinstance(results['ids'][0], list) else results['ids'][0]
+
+                    if distance != "N/A" and distance < 0.75:
+                        l2_hit_key = results["ids"][0][0] if isinstance(results["ids"][0], list) else results["ids"][0]
                        logger.info(f"命中L2语义缓存: key='{l2_hit_key}', 距离={distance:.4f}")
-                        
+
                        # 从数据库获取缓存数据
                        semantic_cache_results_obj = await db_query(
                            model_class=CacheEntries,
                            query_type="get",
                            filters={"cache_key": l2_hit_key},
-                            single_result=True
+                            single_result=True,
                        )
-                        
+
                        if semantic_cache_results_obj:
                            expires_at = getattr(semantic_cache_results_obj, "expires_at", 0)
                            if time.time() < expires_at:
                                cache_value = getattr(semantic_cache_results_obj, "cache_value", "{}")
                                data = orjson.loads(cache_value)
                                logger.debug(f"L2语义缓存返回的数据: {data}")
-                                
+
                                # 回填 L1
                                self.l1_kv_cache[key] = {"data": data, "expires_at": expires_at}
                                if query_embedding is not None:
@@ -235,7 +238,15 @@ class CacheManager:
        logger.debug(f"缓存未命中: {key}")
        return None

-    async def set(self, tool_name: str, function_args: Dict[str, Any], tool_file_path: Union[str, Path], data: Any, ttl: Optional[int] = None, semantic_query: Optional[str] = None):
+    async def set(
+        self,
+        tool_name: str,
+        function_args: Dict[str, Any],
+        tool_file_path: Union[str, Path],
+        data: Any,
+        ttl: Optional[int] = None,
+        semantic_query: Optional[str] = None,
+    ):
        """将结果存入所有缓存层。"""
        if ttl is None:
            ttl = self.default_ttl
@@ -244,27 +255,22 @@ class CacheManager:

        key = self._generate_key(tool_name, function_args, tool_file_path)
        expires_at = time.time() + ttl
-        
+
        # 写入 L1
        self.l1_kv_cache[key] = {"data": data, "expires_at": expires_at}

        # 写入 L2 (数据库)
        cache_data = {
            "cache_key": key,
-            "cache_value": orjson.dumps(data).decode('utf-8'),
+            "cache_value": orjson.dumps(data).decode("utf-8"),
            "expires_at": expires_at,
            "tool_name": tool_name,
            "created_at": time.time(),
            "last_accessed": time.time(),
-            "access_count": 1
+            "access_count": 1,
        }
-        
-        await db_save(
-            model_class=CacheEntries,
-            data=cache_data,
-            key_field="cache_key",
-            key_value=key
-        )
+
+        await db_save(model_class=CacheEntries, data=cache_data, key_field="cache_key", key_value=key)

        # 写入语义缓存
        if semantic_query and self.embedding_model:
@@ -274,19 +280,19 @@ class CacheManager:
                    embedding_vector = embedding_result[0] if isinstance(embedding_result, tuple) else embedding_result
                    validated_embedding = self._validate_embedding(embedding_vector)
                    if validated_embedding is not None:
-                        embedding = np.array([validated_embedding], dtype='float32')
-                        
+                        embedding = np.array([validated_embedding], dtype="float32")
+
                        # 写入 L1 Vector
                        new_id = self.l1_vector_index.ntotal
                        faiss.normalize_L2(embedding)
                        self.l1_vector_index.add(x=embedding)  # type: ignore
                        self.l1_vector_id_to_key[new_id] = key
-                        
+
                        # 写入 L2 Vector (使用新的服务)
                        vector_db_service.add(
                            collection_name=self.semantic_cache_collection_name,
                            embeddings=embedding.tolist(),
-                            ids=[key]
+                            ids=[key],
                        )
            except Exception as e:
                logger.warning(f"语义缓存写入失败: {e}")
@@ -306,16 +312,16 @@ class CacheManager:
        await db_query(
            model_class=CacheEntries,
            query_type="delete",
-            filters={}  # 删除所有记录
+            filters={},  # 删除所有记录
        )
-        
+
        # 清空 VectorDB
        try:
            vector_db_service.delete_collection(name=self.semantic_cache_collection_name)
            vector_db_service.get_or_create_collection(name=self.semantic_cache_collection_name)
        except Exception as e:
            logger.warning(f"清空 VectorDB 集合失败: {e}")
-        
+
        logger.info("L2 (数据库 & VectorDB) 缓存已清空。")

    async def clear_all(self):
@@ -327,85 +333,23 @@ class CacheManager:
    async def clean_expired(self):
        """清理过期的缓存条目"""
        current_time = time.time()
-        
+
        # 清理L1过期条目
        expired_keys = []
        for key, entry in self.l1_kv_cache.items():
            if current_time >= entry["expires_at"]:
                expired_keys.append(key)
-        
+
        for key in expired_keys:
            del self.l1_kv_cache[key]
-        
+
        # 清理L2过期条目
-        await db_query(
-            model_class=CacheEntries,
-            query_type="delete",
-            filters={"expires_at": {"$lt": current_time}}
-        )
-        
+        await db_query(model_class=CacheEntries, query_type="delete", filters={"expires_at": {"$lt": current_time}})
+
        if expired_keys:
            logger.info(f"清理了 {len(expired_keys)} 个过期的L1缓存条目")

+
 # 全局实例
 tool_cache = CacheManager()

-import inspect
-import time
-
-def wrap_tool_executor():
-    """
-    包装工具执行器以添加缓存功能
-    这个函数应该在系统启动时被调用一次
-    """
-    from src.plugin_system.core.tool_use import ToolExecutor
-    from src.plugin_system.apis.tool_api import get_tool_instance
-    original_execute = ToolExecutor.execute_tool_call
-
-    async def wrapped_execute_tool_call(self, tool_call, tool_instance=None):
-        if not tool_instance:
-            tool_instance = get_tool_instance(tool_call.func_name)
-
-        if not tool_instance or not tool_instance.enable_cache:
-            return await original_execute(self, tool_call, tool_instance)
-
-        try:
-            tool_file_path = inspect.getfile(tool_instance.__class__)
-            semantic_query = None
-            if tool_instance.semantic_cache_query_key:
-                semantic_query = tool_call.args.get(tool_instance.semantic_cache_query_key)
-
-            cached_result = await tool_cache.get(
-                tool_name=tool_call.func_name,
-                function_args=tool_call.args,
-                tool_file_path=tool_file_path,
-                semantic_query=semantic_query
-            )
-            if cached_result:
-                logger.info(f"{getattr(self, 'log_prefix', '')}使用缓存结果，跳过工具 {tool_call.func_name} 执行")
-                return cached_result
-        except Exception as e:
-            logger.error(f"{getattr(self, 'log_prefix', '')}检查工具缓存时出错: {e}")
-
-        result = await original_execute(self, tool_call, tool_instance)
-
-        try:
-            tool_file_path = inspect.getfile(tool_instance.__class__)
-            semantic_query = None
-            if tool_instance.semantic_cache_query_key:
-                semantic_query = tool_call.args.get(tool_instance.semantic_cache_query_key)
-            
-            await tool_cache.set(
-                tool_name=tool_call.func_name,
-                function_args=tool_call.args,
-                tool_file_path=tool_file_path,
-                data=result,
-                ttl=tool_instance.cache_ttl,
-                semantic_query=semantic_query
-            )
-        except Exception as e:
-            logger.error(f"{getattr(self, 'log_prefix', '')}设置工具缓存时出错: {e}")
-
-        return result
-
-    ToolExecutor.execute_tool_call = wrapped_execute_tool_call
--- a/src/llm_models/utils_model.py
+++ b/src/llm_models/utils_model.py
@@ -5,7 +5,7 @@ import random

 from enum import Enum
 from rich.traceback import install
-from typing import Tuple, List, Dict, Optional, Callable, Any, Coroutine
+from typing import Tuple, List, Dict, Optional, Callable, Any, Coroutine, Generator

 from src.common.logger import get_logger
 from src.config.config import model_config
@@ -283,131 +283,130 @@ class LLMRequest:
        tools: Optional[List[Dict[str, Any]]] = None,
        raise_when_empty: bool = True,
    ) -> Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]:
-        """执行单次请求"""
-        # 模型选择和请求准备
-        start_time = time.time()
-        model_info, api_provider, client = self._select_model()
-        model_name = model_info.name
-        
-        # 检查是否启用反截断
-        use_anti_truncation = getattr(api_provider, "anti_truncation", False)
-        
-        processed_prompt = prompt
-        if use_anti_truncation:
-            processed_prompt += self.anti_truncation_instruction
-            logger.info(f"{api_provider} '{self.task_name}' 已启用反截断功能")
-            
-        processed_prompt = self._apply_content_obfuscation(processed_prompt, api_provider)
-        
-        message_builder = MessageBuilder()
-        message_builder.add_text_content(processed_prompt)
-        messages = [message_builder.build()]
-        tool_built = self._build_tool_options(tools)
-        
-        # 空回复重试逻辑
-        empty_retry_count = 0
-        max_empty_retry = api_provider.max_retry
-        empty_retry_interval = api_provider.retry_interval
-        
-        while empty_retry_count <= max_empty_retry:
+        """
+        执行单次请求，并在模型失败时按顺序切换到下一个可用模型。
+        """
+        failed_models = set()
+        last_exception: Optional[Exception] = None
+
+        model_scheduler = self._model_scheduler(failed_models)
+
+        for model_info, api_provider, client in model_scheduler:
+            start_time = time.time()
+            model_name = model_info.name
+            logger.info(f"正在尝试使用模型: {model_name}")
+
            try:
-                response = await self._execute_request(
-                    api_provider=api_provider,
-                    client=client,
-                    request_type=RequestType.RESPONSE,
-                    model_info=model_info,
-                    message_list=messages,
-                    tool_options=tool_built,
-                    temperature=temperature,
-                    max_tokens=max_tokens,
-                )
-                content = response.content or ""
-                reasoning_content = response.reasoning_content or ""
-                tool_calls = response.tool_calls
-                # 从内容中提取<think>标签的推理内容（向后兼容）
-                if not reasoning_content and content:
-                    content, extracted_reasoning = self._extract_reasoning(content)
-                    reasoning_content = extracted_reasoning
-                
-                is_empty_reply = False
-                is_truncated = False
-                # 检测是否为空回复或截断
-                if not tool_calls:
-                    is_empty_reply = not content or content.strip() == ""
-                    is_truncated = False
-
+                # 检查是否启用反截断
+                use_anti_truncation = getattr(api_provider, "anti_truncation", False)
+                processed_prompt = prompt
                if use_anti_truncation:
-                    if content.endswith("[done]"):
-                        content = content[:-6].strip()
-                        logger.debug("检测到并已移除 [done] 标记")
-                    else:
-                        is_truncated = True
-                        logger.warning("未检测到 [done] 标记，判定为截断")
+                    processed_prompt += self.anti_truncation_instruction
+                    logger.info(f"'{model_name}' for task '{self.task_name}' 已启用反截断功能")
                
-                if is_empty_reply or is_truncated:
-                    if empty_retry_count < max_empty_retry:
-                        empty_retry_count += 1
-                        reason = "空回复" if is_empty_reply else "截断"
-                        logger.warning(f"检测到{reason}，正在进行第 {empty_retry_count}/{max_empty_retry} 次重新生成")
+                processed_prompt = self._apply_content_obfuscation(processed_prompt, api_provider)

-                        if empty_retry_interval > 0:
-                            await asyncio.sleep(empty_retry_interval)
+                message_builder = MessageBuilder()
+                message_builder.add_text_content(processed_prompt)
+                messages = [message_builder.build()]
+                tool_built = self._build_tool_options(tools)

-                        model_info, api_provider, client = self._select_model()
-                        continue
-                    else:
-                        # 已达到最大重试次数，但仍然是空回复或截断
-                        reason = "空回复" if is_empty_reply else "截断"
-                        # 抛出异常，由外层重试逻辑或最终的异常处理器捕获
-                        raise RuntimeError(f"经过 {max_empty_retry + 1} 次尝试后仍然是{reason}的回复")
+                # 针对当前模型的空回复/截断重试逻辑
+                empty_retry_count = 0
+                max_empty_retry = api_provider.max_retry
+                empty_retry_interval = api_provider.retry_interval

-                # 记录使用情况
-                if usage := response.usage:
-                    llm_usage_recorder.record_usage_to_database(
+                while empty_retry_count <= max_empty_retry:
+                    response = await self._execute_request(
+                        api_provider=api_provider,
+                        client=client,
+                        request_type=RequestType.RESPONSE,
                        model_info=model_info,
-                        model_usage=usage,
-                        time_cost=time.time() - start_time,
-                        user_id="system",
-                        request_type=self.request_type,
-                        endpoint="/chat/completions",
+                        message_list=messages,
+                        tool_options=tool_built,
+                        temperature=temperature,
+                        max_tokens=max_tokens,
                    )

-                # 处理空回复
-                if not content and not tool_calls:
-                    if raise_when_empty:
-                        raise RuntimeError(f"经过 {empty_retry_count} 次重试后仍然生成空回复")
-                    content = "生成的响应为空，请检查模型配置或输入内容是否正确"
-                elif empty_retry_count > 0:
-                    logger.info(f"经过 {empty_retry_count} 次重试后成功生成回复")
+                    content = response.content or ""
+                    reasoning_content = response.reasoning_content or ""
+                    tool_calls = response.tool_calls

-                return content, (reasoning_content, model_info.name, tool_calls)
+                    if not reasoning_content and content:
+                        content, extracted_reasoning = self._extract_reasoning(content)
+                        reasoning_content = extracted_reasoning
+
+                    is_empty_reply = not tool_calls and (not content or content.strip() == "")
+                    is_truncated = False
+                    if use_anti_truncation:
+                        if content.endswith("[done]"):
+                            content = content[:-6].strip()
+                        else:
+                            is_truncated = True
+
+                    if is_empty_reply or is_truncated:
+                        empty_retry_count += 1
+                        if empty_retry_count <= max_empty_retry:
+                            reason = "空回复" if is_empty_reply else "截断"
+                            logger.warning(f"模型 '{model_name}' 检测到{reason}，正在进行第 {empty_retry_count}/{max_empty_retry} 次重新生成...")
+                            if empty_retry_interval > 0:
+                                await asyncio.sleep(empty_retry_interval)
+                            continue  # 继续使用当前模型重试
+                        else:
+                            # 当前模型重试次数用尽，跳出内层循环，触发外层循环切换模型
+                            reason = "空回复" if is_empty_reply else "截断"
+                            logger.error(f"模型 '{model_name}' 经过 {max_empty_retry} 次重试后仍然是{reason}的回复。")
+                            raise RuntimeError(f"模型 '{model_name}' 达到最大空回复/截断重试次数")
+
+                    # 成功获取响应
+                    if usage := response.usage:
+                        llm_usage_recorder.record_usage_to_database(
+                            model_info=model_info, model_usage=usage, time_cost=time.time() - start_time,
+                            user_id="system", request_type=self.request_type, endpoint="/chat/completions",
+                        )
+
+                    if not content and not tool_calls:
+                        if raise_when_empty:
+                            raise RuntimeError("生成空回复")
+                        content = "生成的响应为空"
+                    
+                    logger.info(f"模型 '{model_name}' 成功生成回复。")
+                    return content, (reasoning_content, model_name, tool_calls)
+
+            except RespNotOkException as e:
+                if e.status_code in [401, 403]:
+                    logger.error(f"模型 '{model_name}' 遇到认证/权限错误 (Code: {e.status_code})，将尝试下一个模型。")
+                    failed_models.add(model_name)
+                    last_exception = e
+                    continue # 切换到下一个模型
+                else:
+                    logger.error(f"模型 '{model_name}' 请求失败，HTTP状态码: {e.status_code}")
+                    if raise_when_empty:
+                        raise
+                    # 对于其他HTTP错误，直接抛出，不再尝试其他模型
+                    return f"请求失败: {e}", ("", model_name, None)
+
+            except RuntimeError as e:
+                # 捕获所有重试失败（包括空回复和网络问题）
+                logger.error(f"模型 '{model_name}' 在所有重试后仍然失败: {e}，将尝试下一个模型。")
+                failed_models.add(model_name)
+                last_exception = e
+                continue # 切换到下一个模型

            except Exception as e:
-                logger.error(f"请求执行失败: {e}")
-                if raise_when_empty:
-                    # 在非并发模式下，如果第一次尝试就失败，则直接抛出异常
-                    if empty_retry_count == 0:
-                        raise
+                logger.error(f"使用模型 '{model_name}' 时发生未知异常: {e}")
+                failed_models.add(model_name)
+                last_exception = e
+                continue # 切换到下一个模型

-                    # 如果在重试过程中失败，则继续重试
-                    empty_retry_count += 1
-                    if empty_retry_count <= max_empty_retry:
-                        logger.warning(f"请求失败，将在 {empty_retry_interval} 秒后进行第 {empty_retry_count}/{max_empty_retry} 次重试...")
-                        if empty_retry_interval > 0:
-                            await asyncio.sleep(empty_retry_interval)
-                        continue
-                    else:
-                        logger.error(f"经过 {max_empty_retry} 次重试后仍然失败")
-                        raise RuntimeError(f"经过 {max_empty_retry} 次重试后仍然无法生成有效回复") from e
-                else:
-                    # 在并发模式下，单个请求的失败不应中断整个并发流程，
-                    # 而是将异常返回给调用者（即 execute_concurrently）进行统一处理
-                    raise  # 重新抛出异常，由 execute_concurrently 中的 gather 捕获
-        
-        # 重试失败
+        # 所有模型都尝试失败
+        logger.error("所有可用模型都已尝试失败。")
        if raise_when_empty:
-            raise RuntimeError(f"经过 {max_empty_retry} 次重试后仍然无法生成有效回复")
-        return "生成的响应为空，请检查模型配置或输入内容是否正确", ("", model_name, None)
+            if last_exception:
+                raise RuntimeError("所有模型都请求失败") from last_exception
+            raise RuntimeError("所有模型都请求失败，且没有具体的异常信息")
+        
+        return "所有模型都请求失败", ("", "unknown", None)

    async def get_embedding(self, embedding_input: str) -> Tuple[List[float], str]:
        """获取嵌入向量
@@ -446,9 +445,24 @@ class LLMRequest:

        return embedding, model_info.name

+    def _model_scheduler(self, failed_models: set) -> Generator[Tuple[ModelInfo, APIProvider, BaseClient], None, None]:
+        """
+        一个模型调度器，按顺序提供模型，并跳过已失败的模型。
+        """
+        for model_name in self.model_for_task.model_list:
+            if model_name in failed_models:
+                continue
+            
+            model_info = model_config.get_model_info(model_name)
+            api_provider = model_config.get_provider(model_info.api_provider)
+            force_new_client = (self.request_type == "embedding")
+            client = client_registry.get_client_class_instance(api_provider, force_new=force_new_client)
+            
+            yield model_info, api_provider, client
+
    def _select_model(self) -> Tuple[ModelInfo, APIProvider, BaseClient]:
        """
-        根据总tokens和惩罚值选择的模型
+        根据总tokens和惩罚值选择的模型 (负载均衡)
        """
        least_used_model_name = min(
            self.model_usage,
--- a/src/plugin_system/apis/tool_api.py
+++ b/src/plugin_system/apis/tool_api.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Optional, Type
+from typing import Optional, Type
 from src.plugin_system.base.base_tool import BaseTool
 from src.plugin_system.base.component_types import ComponentType

--- a/src/plugin_system/base/base_plugin.py
+++ b/src/plugin_system/base/base_plugin.py
@@ -1,5 +1,5 @@
 from abc import abstractmethod
-from typing import List, Type, Tuple, Union, TYPE_CHECKING
+from typing import List, Type, Tuple, Union
 from .plugin_base import PluginBase

 from src.common.logger import get_logger
--- a/src/plugin_system/base/plus_command.py
+++ b/src/plugin_system/base/plus_command.py
@@ -4,7 +4,7 @@
 """

 from abc import ABC, abstractmethod
-from typing import Dict, Tuple, Optional, List
+from typing import Tuple, Optional, List
 import re

 from src.common.logger import get_logger
--- a/src/plugin_system/core/tool_use.py
+++ b/src/plugin_system/core/tool_use.py
@@ -7,8 +7,10 @@ from src.llm_models.utils_model import LLMRequest
 from src.llm_models.payload_content import ToolCall
 from src.config.config import global_config, model_config
 from src.chat.utils.prompt_builder import Prompt, global_prompt_manager
+import inspect
 from src.chat.message_receive.chat_stream import get_chat_manager
 from src.common.logger import get_logger
+from src.common.cache_manager import tool_cache

 logger = get_logger("tool_use")

@@ -184,21 +186,65 @@ class ToolExecutor:
        return tool_results, used_tools

    async def execute_tool_call(self, tool_call: ToolCall, tool_instance: Optional[BaseTool] = None) -> Optional[Dict[str, Any]]:
-        # sourcery skip: use-assigned-variable
-        """执行单个工具调用
+        """执行单个工具调用，并处理缓存"""
+        
+        function_args = tool_call.args or {}
+        tool_instance = tool_instance or get_tool_instance(tool_call.func_name)

-        Args:
-            tool_call: 工具调用对象
+        # 如果工具不存在或未启用缓存，则直接执行
+        if not tool_instance or not tool_instance.enable_cache:
+            return await self._original_execute_tool_call(tool_call, tool_instance)

-        Returns:
-            Optional[Dict]: 工具调用结果，如果失败则返回None
-        """
+        # --- 缓存逻辑开始 ---
+        try:
+            tool_file_path = inspect.getfile(tool_instance.__class__)
+            semantic_query = None
+            if tool_instance.semantic_cache_query_key:
+                semantic_query = function_args.get(tool_instance.semantic_cache_query_key)
+
+            cached_result = await tool_cache.get(
+                tool_name=tool_call.func_name,
+                function_args=function_args,
+                tool_file_path=tool_file_path,
+                semantic_query=semantic_query
+            )
+            if cached_result:
+                logger.info(f"{self.log_prefix}使用缓存结果，跳过工具 {tool_call.func_name} 执行")
+                return cached_result
+        except Exception as e:
+            logger.error(f"{self.log_prefix}检查工具缓存时出错: {e}")
+
+        # 缓存未命中，执行原始工具调用
+        result = await self._original_execute_tool_call(tool_call, tool_instance)
+
+        # 将结果存入缓存
+        try:
+            tool_file_path = inspect.getfile(tool_instance.__class__)
+            semantic_query = None
+            if tool_instance.semantic_cache_query_key:
+                semantic_query = function_args.get(tool_instance.semantic_cache_query_key)
+            
+            await tool_cache.set(
+                tool_name=tool_call.func_name,
+                function_args=function_args,
+                tool_file_path=tool_file_path,
+                data=result,
+                ttl=tool_instance.cache_ttl,
+                semantic_query=semantic_query
+            )
+        except Exception as e:
+            logger.error(f"{self.log_prefix}设置工具缓存时出错: {e}")
+        # --- 缓存逻辑结束 ---
+
+        return result
+
+    async def _original_execute_tool_call(self, tool_call: ToolCall, tool_instance: Optional[BaseTool] = None) -> Optional[Dict[str, Any]]:
+        """执行单个工具调用的原始逻辑"""
        try:
            function_name = tool_call.func_name
            function_args = tool_call.args or {}
-            logger.info(f"🤖 {self.log_prefix} 正在执行工具: [bold green]{function_name}[/bold green] | 参数: {function_args}")
+            logger.info(f"{self.log_prefix} 正在执行工具: [bold green]{function_name}[/bold green] | 参数: {function_args}")
            function_args["llm_called"] = True  # 标记为LLM调用
-
            # 获取对应工具实例
            tool_instance = tool_instance or get_tool_instance(function_name)
            if not tool_instance:
--- a/src/plugins/built_in/maizone_refactored/services/content_service.py
+++ b/src/plugins/built_in/maizone_refactored/services/content_service.py
@@ -9,12 +9,9 @@ import datetime
 import base64
 import aiohttp
 from src.common.logger import get_logger
-import base64
-import aiohttp
 import imghdr
 import asyncio
-from src.common.logger import get_logger
-from src.plugin_system.apis import llm_api, config_api, generator_api, person_api
+from src.plugin_system.apis import llm_api, config_api, generator_api
 from src.chat.message_receive.chat_stream import get_chat_manager
 from maim_message import UserInfo
 from src.llm_models.utils_model import LLMRequest
--- a/src/plugins/built_in/permission_management/plugin.py
+++ b/src/plugins/built_in/permission_management/plugin.py
@@ -16,7 +16,7 @@ from src.plugin_system.apis.permission_api import permission_api
 from src.plugin_system.apis.logging_api import get_logger
 from src.plugin_system.base.component_types import PlusCommandInfo, ChatType
 from src.plugin_system.base.config_types import ConfigField
-from src.plugin_system.utils.permission_decorators import require_permission, require_master, PermissionChecker
+from src.plugin_system.utils.permission_decorators import require_permission


 logger = get_logger("Permission")
--- a/src/schedule/schedule_manager.py
+++ b/src/schedule/schedule_manager.py
@@ -411,7 +411,6 @@ class ScheduleManager:
        通过关键词匹配、唤醒度、睡眠压力等综合判断是否处于休眠时间。
        新增弹性睡眠机制，允许在压力低时延迟入睡，并在入睡前发送通知。
        """
-        from src.chat.chat_loop.wakeup_manager import WakeUpManager
        # --- 基础检查 ---
        if not global_config.schedule.enable_is_sleep:
            return False