Merge pull request #1148 from MaiM-with-u/dev-api-ada

添加API Adapter
2025-07-29 10:24:03 +08:00
parent 16c644a666 a2250b5bba
commit cb117ba84c
24 changed files with 4290 additions and 1169 deletions
--- a/bot.py
+++ b/bot.py
@@ -74,36 +74,6 @@ def easter_egg():
    print(rainbow_text)
 def scan_provider(env_config: dict):
    provider = {}
    # 利用未初始化 env 时获取的 env_mask 来对新的环境变量集去重
    # 避免 GPG_KEY 这样的变量干扰检查
    env_config = dict(filter(lambda item: item[0] not in env_mask, env_config.items()))
    # 遍历 env_config 的所有键
    for key in env_config:
        # 检查键是否符合 {provider}_BASE_URL 或 {provider}_KEY 的格式
        if key.endswith("_BASE_URL") or key.endswith("_KEY"):
            # 提取 provider 名称
            provider_name = key.split("_", 1)[0]  # 从左分割一次，取第一部分
            # 初始化 provider 的字典（如果尚未初始化）
            if provider_name not in provider:
                provider[provider_name] = {"url": None, "key": None}
            # 根据键的类型填充 url 或 key
            if key.endswith("_BASE_URL"):
                provider[provider_name]["url"] = env_config[key]
            elif key.endswith("_KEY"):
                provider[provider_name]["key"] = env_config[key]
    # 检查每个 provider 是否同时存在 url 和 key
    for provider_name, config in provider.items():
        if config["url"] is None or config["key"] is None:
            logger.error(f"provider 内容：{config}\nenv_config 内容：{env_config}")
            raise ValueError(f"请检查 '{provider_name}' 提供商配置是否丢失 BASE_URL 或 KEY 环境变量")
 async def graceful_shutdown():
    try:
@@ -229,9 +199,6 @@ def raw_main():
    easter_egg()
    env_config = {key: os.getenv(key) for key in os.environ}
    scan_provider(env_config)
    # 返回MainSystem实例
    return MainSystem()
--- a/src/chat/replyer/default_generator.py
+++ b/src/chat/replyer/default_generator.py
@@ -981,8 +981,9 @@ class DefaultReplyer:
        with Timer("LLM生成", {}):  # 内部计时器，可选保留
            # 加权随机选择一个模型配置
            selected_model_config = self._select_weighted_model_config()
            model_display_name = selected_model_config.get('model_name') or selected_model_config.get('name', 'N/A')
            logger.info(
-                f"使用模型生成回复: {selected_model_config.get('name', 'N/A')} (选中概率: {selected_model_config.get('weight', 1.0)})"
+                f"使用模型生成回复: {model_display_name} (选中概率: {selected_model_config.get('weight', 1.0)})"
            )
            express_model = LLMRequest(
--- a/src/config/api_ada_configs.py
+++ b/src/config/api_ada_configs.py
@@ -0,0 +1,180 @@
 from dataclasses import dataclass, field
 from typing import List, Dict, Union
 import threading
 import time
 from packaging.version import Version
 NEWEST_VER = "0.1.1"  # 当前支持的最新版本
@dataclass
 class APIProvider:
    name: str = ""  # API提供商名称
    base_url: str = ""  # API基础URL
    api_key: str = field(repr=False, default="")  # API密钥（向后兼容）
    api_keys: List[str] = field(repr=False, default_factory=list)  # API密钥列表（新格式）
    client_type: str = "openai"  # 客户端类型（如openai/google等，默认为openai）
    # 多API Key管理相关属性
    _current_key_index: int = field(default=0, init=False, repr=False)  # 当前使用的key索引
    _key_failure_count: Dict[int, int] = field(default_factory=dict, init=False, repr=False)  # 每个key的失败次数
    _key_last_failure_time: Dict[int, float] = field(default_factory=dict, init=False, repr=False)  # 每个key最后失败时间
    _lock: threading.Lock = field(default_factory=threading.Lock, init=False, repr=False)  # 线程锁
    def __post_init__(self):
        """初始化后处理，确保API keys列表正确"""
        # 向后兼容：如果只设置了api_key，将其添加到api_keys列表
        if self.api_key and not self.api_keys:
            self.api_keys = [self.api_key]
        # 如果api_keys不为空但api_key为空，设置api_key为第一个
        elif self.api_keys and not self.api_key:
            self.api_key = self.api_keys[0]
        # 初始化失败计数器
        for i in range(len(self.api_keys)):
            self._key_failure_count[i] = 0
            self._key_last_failure_time[i] = 0
    def get_current_api_key(self) -> str:
        """获取当前应该使用的API Key"""
        with self._lock:
            if not self.api_keys:
                return ""
            # 确保索引在有效范围内
            if self._current_key_index >= len(self.api_keys):
                self._current_key_index = 0
            return self.api_keys[self._current_key_index]
    def get_next_api_key(self) -> Union[str, None]:
        """获取下一个可用的API Key（负载均衡）"""
        with self._lock:
            if not self.api_keys:
                return None
            # 如果只有一个key，直接返回
            if len(self.api_keys) == 1:
                return self.api_keys[0]
            # 轮询到下一个key
            self._current_key_index = (self._current_key_index + 1) % len(self.api_keys)
            return self.api_keys[self._current_key_index]
    def mark_key_failed(self, api_key: str) -> Union[str, None]:
        """标记某个API Key失败，返回下一个可用的key"""
        with self._lock:
            if not self.api_keys or api_key not in self.api_keys:
                return None
            key_index = self.api_keys.index(api_key)
            self._key_failure_count[key_index] += 1
            self._key_last_failure_time[key_index] = time.time()
            # 寻找下一个可用的key
            current_time = time.time()
            for _ in range(len(self.api_keys)):
                self._current_key_index = (self._current_key_index + 1) % len(self.api_keys)
                next_key_index = self._current_key_index
                # 检查该key是否最近失败过（5分钟内失败超过3次则暂时跳过）
                if (self._key_failure_count[next_key_index] <= 3 or 
                    current_time - self._key_last_failure_time[next_key_index] > 300):  # 5分钟后重试
                    return self.api_keys[next_key_index]
            # 如果所有key都不可用，返回当前key（让上层处理）
            return api_key
    def reset_key_failures(self, api_key: str | None = None):
        """重置失败计数（成功调用后调用）"""
        with self._lock:
            if api_key and api_key in self.api_keys:
                key_index = self.api_keys.index(api_key)
                self._key_failure_count[key_index] = 0
                self._key_last_failure_time[key_index] = 0
            else:
                # 重置所有key的失败计数
                for i in range(len(self.api_keys)):
                    self._key_failure_count[i] = 0
                    self._key_last_failure_time[i] = 0
    def get_api_key_stats(self) -> Dict[str, Dict[str, Union[int, float]]]:
        """获取API Key使用统计"""
        with self._lock:
            stats = {}
            for i, key in enumerate(self.api_keys):
                # 只显示key的前8位和后4位，中间用*代替
                masked_key = f"{key[:8]}***{key[-4:]}" if len(key) > 12 else "***"
                stats[masked_key] = {
                    "failure_count": self._key_failure_count.get(i, 0),
                    "last_failure_time": self._key_last_failure_time.get(i, 0),
                    "is_current": i == self._current_key_index
                }
            return stats
@dataclass
 class ModelInfo:
    model_identifier: str = ""  # 模型标识符（用于URL调用）
    name: str = ""  # 模型名称（用于模块调用）
    api_provider: str = ""  # API提供商（如OpenAI、Azure等）
    # 以下用于模型计费
    price_in: float = 0.0  # 每M token输入价格
    price_out: float = 0.0  # 每M token输出价格
    force_stream_mode: bool = False  # 是否强制使用流式输出模式
    # 新增：任务类型和能力字段
    task_type: str = ""  # 任务类型：llm_normal, llm_reasoning, vision, embedding, speech
    capabilities: List[str] = field(default_factory=list)  # 模型能力：text, vision, embedding, speech, tool_calling, reasoning
@dataclass
 class RequestConfig:
    max_retry: int = 2  # 最大重试次数（单个模型API调用失败，最多重试的次数）
    timeout: int = (
        10  # API调用的超时时长（超过这个时长，本次请求将被视为“请求超时”，单位：秒）
    )
    retry_interval: int = 10  # 重试间隔（如果API调用失败，重试的间隔时间，单位：秒）
    default_temperature: float = 0.7  # 默认的温度（如果bot_config.toml中没有设置temperature参数，默认使用这个值）
    default_max_tokens: int = 1024  # 默认的最大输出token数（如果bot_config.toml中没有设置max_tokens参数，默认使用这个值）
@dataclass
 class ModelUsageArgConfigItem:
    """模型使用的配置类
    该类用于加载和存储子任务模型使用的配置
    """
    name: str = ""  # 模型名称
    temperature: float | None = None  # 温度
    max_tokens: int | None = None  # 最大token数
    max_retry: int | None = None  # 调用失败时的最大重试次数
@dataclass
 class ModelUsageArgConfig:
    """子任务使用模型的配置类
    该类用于加载和存储子任务使用的模型配置
    """
    name: str = ""  # 任务名称
    usage: List[ModelUsageArgConfigItem] = field(
        default_factory=lambda: []
    )  # 任务使用的模型列表
@dataclass
 class ModuleConfig:
    INNER_VERSION: Version | None = None  # 配置文件版本
    req_conf: RequestConfig = field(default_factory=lambda: RequestConfig())  # 请求配置
    api_providers: Dict[str, APIProvider] = field(
        default_factory=lambda: {}
    )  # API提供商列表
    models: Dict[str, ModelInfo] = field(default_factory=lambda: {})  # 模型列表
    task_model_arg_map: Dict[str, ModelUsageArgConfig] = field(
        default_factory=lambda: {}
    )
--- a/src/config/auto_update.py
+++ b/src/config/auto_update.py
@@ -1,162 +0,0 @@
 import shutil
 import tomlkit
 from tomlkit.items import Table, KeyType
 from pathlib import Path
 from datetime import datetime
 def get_key_comment(toml_table, key):
    # 获取key的注释（如果有）
    if hasattr(toml_table, "trivia") and hasattr(toml_table.trivia, "comment"):
        return toml_table.trivia.comment
    if hasattr(toml_table, "value") and isinstance(toml_table.value, dict):
        item = toml_table.value.get(key)
        if item is not None and hasattr(item, "trivia"):
            return item.trivia.comment
    if hasattr(toml_table, "keys"):
        for k in toml_table.keys():
            if isinstance(k, KeyType) and k.key == key:
                return k.trivia.comment
    return None
 def compare_dicts(new, old, path=None, new_comments=None, old_comments=None, logs=None):
    # 递归比较两个dict，找出新增和删减项，收集注释
    if path is None:
        path = []
    if logs is None:
        logs = []
    if new_comments is None:
        new_comments = {}
    if old_comments is None:
        old_comments = {}
    # 新增项
    for key in new:
        if key == "version":
            continue
        if key not in old:
            comment = get_key_comment(new, key)
            logs.append(f"新增: {'.'.join(path + [str(key)])}  注释: {comment or '无'}")
        elif isinstance(new[key], (dict, Table)) and isinstance(old.get(key), (dict, Table)):
            compare_dicts(new[key], old[key], path + [str(key)], new_comments, old_comments, logs)
    # 删减项
    for key in old:
        if key == "version":
            continue
        if key not in new:
            comment = get_key_comment(old, key)
            logs.append(f"删减: {'.'.join(path + [str(key)])}  注释: {comment or '无'}")
    return logs
 def update_config():
    print("开始更新配置文件...")
    # 获取根目录路径
    root_dir = Path(__file__).parent.parent.parent.parent
    template_dir = root_dir / "template"
    config_dir = root_dir / "config"
    old_config_dir = config_dir / "old"
    # 创建old目录（如果不存在）
    old_config_dir.mkdir(exist_ok=True)
    # 定义文件路径
    template_path = template_dir / "bot_config_template.toml"
    old_config_path = config_dir / "bot_config.toml"
    new_config_path = config_dir / "bot_config.toml"
    # 读取旧配置文件
    old_config = {}
    if old_config_path.exists():
        print(f"发现旧配置文件: {old_config_path}")
        with open(old_config_path, "r", encoding="utf-8") as f:
            old_config = tomlkit.load(f)
        # 生成带时间戳的新文件名
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        old_backup_path = old_config_dir / f"bot_config_{timestamp}.toml"
        # 移动旧配置文件到old目录
        shutil.move(old_config_path, old_backup_path)
        print(f"已备份旧配置文件到: {old_backup_path}")
    # 复制模板文件到配置目录
    print(f"从模板文件创建新配置: {template_path}")
    shutil.copy2(template_path, new_config_path)
    # 读取新配置文件
    with open(new_config_path, "r", encoding="utf-8") as f:
        new_config = tomlkit.load(f)
    # 检查version是否相同
    if old_config and "inner" in old_config and "inner" in new_config:
        old_version = old_config["inner"].get("version")  # type: ignore
        new_version = new_config["inner"].get("version")  # type: ignore
        if old_version and new_version and old_version == new_version:
            print(f"检测到版本号相同 (v{old_version})，跳过更新")
            # 如果version相同，恢复旧配置文件并返回
            shutil.move(old_backup_path, old_config_path)  # type: ignore
            return
        else:
            print(f"检测到版本号不同: 旧版本 v{old_version} -> 新版本 v{new_version}")
    # 输出新增和删减项及注释
    if old_config:
        print("配置项变动如下：")
        logs = compare_dicts(new_config, old_config)
        if logs:
            for log in logs:
                print(log)
        else:
            print("无新增或删减项")
    # 递归更新配置
    def update_dict(target, source):
        for key, value in source.items():
            # 跳过version字段的更新
            if key == "version":
                continue
            if key in target:
                if isinstance(value, dict) and isinstance(target[key], (dict, Table)):
                    update_dict(target[key], value)
                else:
                    try:
                        # 对数组类型进行特殊处理
                        if isinstance(value, list):
                            # 如果是空数组，确保它保持为空数组
                            if not value:
                                target[key] = tomlkit.array()
                            else:
                                # 特殊处理正则表达式数组和包含正则表达式的结构
                                if key == "ban_msgs_regex":
                                    # 直接使用原始值，不进行额外处理
                                    target[key] = value
                                elif key == "regex_rules":
                                    # 对于regex_rules，需要特殊处理其中的regex字段
                                    target[key] = value
                                else:
                                    # 检查是否包含正则表达式相关的字典项
                                    contains_regex = False
                                    if value and isinstance(value[0], dict) and "regex" in value[0]:
                                        contains_regex = True
                                    target[key] = value if contains_regex else tomlkit.array(str(value))
                        else:
                            # 其他类型使用item方法创建新值
                            target[key] = tomlkit.item(value)
                    except (TypeError, ValueError):
                        # 如果转换失败，直接赋值
                        target[key] = value
    # 将旧配置的值更新到新配置中
    print("开始合并新旧配置...")
    update_dict(new_config, old_config)
    # 保存更新后的配置（保留注释和格式）
    with open(new_config_path, "w", encoding="utf-8") as f:
        f.write(tomlkit.dumps(new_config))
    print("配置文件更新完成")
 if __name__ == "__main__":
    update_config()
--- a/src/config/config.py
+++ b/src/config/config.py
@@ -7,6 +7,10 @@ from tomlkit import TOMLDocument
 from tomlkit.items import Table, KeyType
 from dataclasses import field, dataclass
 from rich.traceback import install
 from packaging import version
 from packaging.specifiers import SpecifierSet
 from packaging.version import Version, InvalidVersion
 from typing import Any, Dict, List
 from src.common.logger import get_logger
 from src.config.config_base import ConfigBase
@@ -36,6 +40,17 @@ from src.config.official_configs import (
    CustomPromptConfig,
 )
 from .api_ada_configs import (
    ModelUsageArgConfigItem,
    ModelUsageArgConfig,
    APIProvider,
    ModelInfo,
    NEWEST_VER,
    ModuleConfig,
 )
 install(extra_lines=3)
@@ -52,6 +67,273 @@ TEMPLATE_DIR = os.path.join(PROJECT_ROOT, "template")
 MMC_VERSION = "0.9.1"
 def _get_config_version(toml: Dict) -> Version:
    """提取配置文件的 SpecifierSet 版本数据
    Args:
        toml[dict]: 输入的配置文件字典
    Returns:
        Version
    """
    if "inner" in toml and "version" in toml["inner"]:
        config_version: str = toml["inner"]["version"]
    else:
        config_version = "0.0.0"  # 默认版本
    try:
        ver = version.parse(config_version)
    except InvalidVersion as e:
        logger.error(
            "配置文件中 inner段 的 version 键是错误的版本描述\n"
            f"请检查配置文件，当前 version 键: {config_version}\n"
            f"错误信息: {e}"
        )
        raise InvalidVersion(
            "配置文件中 inner段 的 version 键是错误的版本描述\n"
        ) from e
    return ver
 def _request_conf(parent: Dict, config: ModuleConfig):
    request_conf_config = parent.get("request_conf")
    config.req_conf.max_retry = request_conf_config.get(
        "max_retry", config.req_conf.max_retry
    )
    config.req_conf.timeout = request_conf_config.get(
        "timeout", config.req_conf.timeout
    )
    config.req_conf.retry_interval = request_conf_config.get(
        "retry_interval", config.req_conf.retry_interval
    )
    config.req_conf.default_temperature = request_conf_config.get(
        "default_temperature", config.req_conf.default_temperature
    )
    config.req_conf.default_max_tokens = request_conf_config.get(
        "default_max_tokens", config.req_conf.default_max_tokens
    )
 def _api_providers(parent: Dict, config: ModuleConfig):
    api_providers_config = parent.get("api_providers")
    for provider in api_providers_config:
        name = provider.get("name", None)
        base_url = provider.get("base_url", None)
        api_key = provider.get("api_key", None)
        api_keys = provider.get("api_keys", [])  # 新增：支持多个API Key
        client_type = provider.get("client_type", "openai")
        if name in config.api_providers:  # 查重
            logger.error(f"重复的API提供商名称: {name}，请检查配置文件。")
            raise KeyError(f"重复的API提供商名称: {name}，请检查配置文件。")
        if name and base_url:
            # 处理API Key配置：支持单个api_key或多个api_keys
            if api_keys:
                # 使用新格式：api_keys列表
                logger.debug(f"API提供商 '{name}' 配置了 {len(api_keys)} 个API Key")
            elif api_key:
                # 向后兼容：使用单个api_key
                api_keys = [api_key]
                logger.debug(f"API提供商 '{name}' 使用单个API Key（向后兼容模式）")
            else:
                logger.warning(f"API提供商 '{name}' 没有配置API Key，某些功能可能不可用")
            config.api_providers[name] = APIProvider(
                name=name,
                base_url=base_url,
                api_key=api_key,  # 保留向后兼容
                api_keys=api_keys,  # 新格式
                client_type=client_type,
            )
        else:
            logger.error(f"API提供商 '{name}' 的配置不完整，请检查配置文件。")
            raise ValueError(f"API提供商 '{name}' 的配置不完整，请检查配置文件。")
 def _models(parent: Dict, config: ModuleConfig):
    models_config = parent.get("models")
    for model in models_config:
        model_identifier = model.get("model_identifier", None)
        name = model.get("name", model_identifier)
        api_provider = model.get("api_provider", None)
        price_in = model.get("price_in", 0.0)
        price_out = model.get("price_out", 0.0)
        force_stream_mode = model.get("force_stream_mode", False)
        task_type = model.get("task_type", "")
        capabilities = model.get("capabilities", [])
        if name in config.models:  # 查重
            logger.error(f"重复的模型名称: {name}，请检查配置文件。")
            raise KeyError(f"重复的模型名称: {name}，请检查配置文件。")
        if model_identifier and api_provider:
            # 检查API提供商是否存在
            if api_provider not in config.api_providers:
                logger.error(f"未声明的API提供商 '{api_provider}' ，请检查配置文件。")
                raise ValueError(
                    f"未声明的API提供商 '{api_provider}' ，请检查配置文件。"
                )
            config.models[name] = ModelInfo(
                name=name,
                model_identifier=model_identifier,
                api_provider=api_provider,
                price_in=price_in,
                price_out=price_out,
                force_stream_mode=force_stream_mode,
                task_type=task_type,
                capabilities=capabilities,
            )
        else:
            logger.error(f"模型 '{name}' 的配置不完整，请检查配置文件。")
            raise ValueError(f"模型 '{name}' 的配置不完整，请检查配置文件。")
 def _task_model_usage(parent: Dict, config: ModuleConfig):
    model_usage_configs = parent.get("task_model_usage")
    config.task_model_arg_map = {}
    for task_name, item in model_usage_configs.items():
        if task_name in config.task_model_arg_map:
            logger.error(f"子任务 {task_name} 已存在，请检查配置文件。")
            raise KeyError(f"子任务 {task_name} 已存在，请检查配置文件。")
        usage = []
        if isinstance(item, Dict):
            if "model" in item:
                usage.append(
                    ModelUsageArgConfigItem(
                        name=item["model"],
                        temperature=item.get("temperature", None),
                        max_tokens=item.get("max_tokens", None),
                        max_retry=item.get("max_retry", None),
                    )
                )
            else:
                logger.error(f"子任务 {task_name} 的模型配置不合法，请检查配置文件。")
                raise ValueError(
                    f"子任务 {task_name} 的模型配置不合法，请检查配置文件。"
                )
        elif isinstance(item, List):
            for model in item:
                if isinstance(model, Dict):
                    usage.append(
                        ModelUsageArgConfigItem(
                            name=model["model"],
                            temperature=model.get("temperature", None),
                            max_tokens=model.get("max_tokens", None),
                            max_retry=model.get("max_retry", None),
                        )
                    )
                elif isinstance(model, str):
                    usage.append(
                        ModelUsageArgConfigItem(
                            name=model,
                            temperature=None,
                            max_tokens=None,
                            max_retry=None,
                        )
                    )
                else:
                    logger.error(
                        f"子任务 {task_name} 的模型配置不合法，请检查配置文件。"
                    )
                    raise ValueError(
                        f"子任务 {task_name} 的模型配置不合法，请检查配置文件。"
                    )
        elif isinstance(item, str):
            usage.append(
                ModelUsageArgConfigItem(
                    name=item,
                    temperature=None,
                    max_tokens=None,
                    max_retry=None,
                )
            )
        config.task_model_arg_map[task_name] = ModelUsageArgConfig(
            name=task_name,
            usage=usage,
        )
 def api_ada_load_config(config_path: str) -> ModuleConfig:
    """从TOML配置文件加载配置"""
    config = ModuleConfig()
    include_configs: Dict[str, Dict[str, Any]] = {
        "request_conf": {
            "func": _request_conf,
            "support": ">=0.0.0",
            "necessary": False,
        },
        "api_providers": {"func": _api_providers, "support": ">=0.0.0"},
        "models": {"func": _models, "support": ">=0.0.0"},
        "task_model_usage": {"func": _task_model_usage, "support": ">=0.0.0"},
    }
    if os.path.exists(config_path):
        with open(config_path, "rb") as f:
            try:
                toml_dict = tomlkit.load(f)
            except tomlkit.TOMLDecodeError as e:
                logger.critical(
                    f"配置文件model_list.toml填写有误，请检查第{e.lineno}行第{e.colno}处：{e.msg}"
                )
                exit(1)
        # 获取配置文件版本
        config.INNER_VERSION = _get_config_version(toml_dict)
        # 检查版本
        if config.INNER_VERSION > Version(NEWEST_VER):
            logger.warning(
                f"当前配置文件版本 {config.INNER_VERSION} 高于支持的最新版本 {NEWEST_VER}，可能导致异常，建议更新依赖。"
            )
        # 解析配置文件
        # 如果在配置中找到了需要的项，调用对应项的闭包函数处理
        for key in include_configs:
            if key in toml_dict:
                group_specifier_set: SpecifierSet = SpecifierSet(
                    include_configs[key]["support"]
                )
                # 检查配置文件版本是否在支持范围内
                if config.INNER_VERSION in group_specifier_set:
                    # 如果版本在支持范围内，检查是否存在通知
                    if "notice" in include_configs[key]:
                        logger.warning(include_configs[key]["notice"])
                    # 调用闭包函数处理配置
                    (include_configs[key]["func"])(toml_dict, config)
                else:
                    # 如果版本不在支持范围内，崩溃并提示用户
                    logger.error(
                        f"配置文件中的 '{key}' 字段的版本 ({config.INNER_VERSION}) 不在支持范围内。\n"
                        f"当前程序仅支持以下版本范围: {group_specifier_set}"
                    )
                    raise InvalidVersion(
                        f"当前程序仅支持以下版本范围: {group_specifier_set}"
                    )
            # 如果 necessary 项目存在，而且显式声明是 False，进入特殊处理
            elif (
                "necessary" in include_configs[key]
                and include_configs[key].get("necessary") is False
            ):
                # 通过 pass 处理的项虽然直接忽略也是可以的，但是为了不增加理解困难，依然需要在这里显式处理
                if key == "keywords_reaction":
                    pass
            else:
                # 如果用户根本没有需要的配置项，提示缺少配置
                logger.error(f"配置文件中缺少必需的字段: '{key}'")
                raise KeyError(f"配置文件中缺少必需的字段: '{key}'")
        logger.info(f"成功加载配置文件: {config_path}")
    return config
 def get_key_comment(toml_table, key):
    # 获取key的注释（如果有）
    if hasattr(toml_table, "trivia") and hasattr(toml_table.trivia, "comment"):
@@ -133,37 +415,74 @@ def compare_default_values(new, old, path=None, logs=None, changes=None):
    return logs, changes
-def update_config():
+def _get_version_from_toml(toml_path):
    """从TOML文件中获取版本号"""
    if not os.path.exists(toml_path):
        return None
    with open(toml_path, "r", encoding="utf-8") as f:
        doc = tomlkit.load(f)
    if "inner" in doc and "version" in doc["inner"]:  # type: ignore
        return doc["inner"]["version"]  # type: ignore
    return None
 def _version_tuple(v):
    """将版本字符串转换为元组以便比较"""
    if v is None:
        return (0,)
    return tuple(int(x) if x.isdigit() else 0 for x in str(v).replace("v", "").split("-")[0].split("."))
 def _update_dict(target: TOMLDocument | dict | Table, source: TOMLDocument | dict):
    """
    将source字典的值更新到target字典中（如果target中存在相同的键）
    """
    for key, value in source.items():
        # 跳过version字段的更新
        if key == "version":
            continue
        if key in target:
            target_value = target[key]
            if isinstance(value, dict) and isinstance(target_value, (dict, Table)):
                _update_dict(target_value, value)
            else:
                try:
                    # 对数组类型进行特殊处理
                    if isinstance(value, list):
                        # 如果是空数组，确保它保持为空数组
                        target[key] = tomlkit.array(str(value)) if value else tomlkit.array()
                    else:
                        # 其他类型使用item方法创建新值
                        target[key] = tomlkit.item(value)
                except (TypeError, ValueError):
                    # 如果转换失败，直接赋值
                    target[key] = value
 def _update_config_generic(config_name: str, template_name: str, should_quit_on_new: bool = True):
    """
    通用的配置文件更新函数
    Args:
        config_name: 配置文件名（不含扩展名），如 'bot_config' 或 'model_config'
        template_name: 模板文件名（不含扩展名），如 'bot_config_template' 或 'model_config_template'
        should_quit_on_new: 创建新配置文件后是否退出程序
    """
    # 获取根目录路径
    old_config_dir = os.path.join(CONFIG_DIR, "old")
    compare_dir = os.path.join(TEMPLATE_DIR, "compare")
    # 定义文件路径
-    template_path = os.path.join(TEMPLATE_DIR, "bot_config_template.toml")
+    template_path = os.path.join(TEMPLATE_DIR, f"{template_name}.toml")
-    old_config_path = os.path.join(CONFIG_DIR, "bot_config.toml")
+    old_config_path = os.path.join(CONFIG_DIR, f"{config_name}.toml")
-    new_config_path = os.path.join(CONFIG_DIR, "bot_config.toml")
+    new_config_path = os.path.join(CONFIG_DIR, f"{config_name}.toml")
-    compare_path = os.path.join(compare_dir, "bot_config_template.toml")
+    compare_path = os.path.join(compare_dir, f"{template_name}.toml")
    # 创建compare目录（如果不存在）
    os.makedirs(compare_dir, exist_ok=True)
-    # 处理compare下的模板文件
+    template_version = _get_version_from_toml(template_path)
-    def get_version_from_toml(toml_path):
+    compare_version = _get_version_from_toml(compare_path)
        if not os.path.exists(toml_path):
            return None
        with open(toml_path, "r", encoding="utf-8") as f:
            doc = tomlkit.load(f)
        if "inner" in doc and "version" in doc["inner"]:  # type: ignore
            return doc["inner"]["version"]  # type: ignore
        return None
    template_version = get_version_from_toml(template_path)
    compare_version = get_version_from_toml(compare_path)
    def version_tuple(v):
        if v is None:
            return (0,)
        return tuple(int(x) if x.isdigit() else 0 for x in str(v).replace("v", "").split("-")[0].split("."))
    # 先读取 compare 下的模板（如果有），用于默认值变动检测
    if os.path.exists(compare_path):
@@ -183,7 +502,7 @@ def update_config():
            old_config = tomlkit.load(f)
        logs, changes = compare_default_values(new_config, compare_config)
        if logs:
-            logger.info("检测到模板默认值变动如下：")
+            logger.info(f"检测到{config_name}模板默认值变动如下：")
            for log in logs:
                logger.info(log)
            # 检查旧配置是否等于旧默认值，如果是则更新为新默认值
@@ -192,10 +511,10 @@ def update_config():
                if old_value == old_default:
                    set_value_by_path(old_config, path, new_default)
                    logger.info(
-                        f"已自动将配置 {'.'.join(path)} 的值从旧默认值 {old_default} 更新为新默认值 {new_default}"
+                        f"已自动将{config_name}配置 {'.'.join(path)} 的值从旧默认值 {old_default} 更新为新默认值 {new_default}"
                    )
        else:
-            logger.info("未检测到模板默认值变动")
+            logger.info(f"未检测到{config_name}模板默认值变动")
        # 保存旧配置的变更（后续合并逻辑会用到 old_config）
    else:
        old_config = None
@@ -203,22 +522,25 @@ def update_config():
    # 检查 compare 下没有模板，或新模板版本更高，则复制
    if not os.path.exists(compare_path):
        shutil.copy2(template_path, compare_path)
-        logger.info(f"已将模板文件复制到: {compare_path}")
+        logger.info(f"已将{config_name}模板文件复制到: {compare_path}")
    else:
-        if version_tuple(template_version) > version_tuple(compare_version):
+        if _version_tuple(template_version) > _version_tuple(compare_version):
            shutil.copy2(template_path, compare_path)
-            logger.info(f"模板版本较新，已替换compare下的模板: {compare_path}")
+            logger.info(f"{config_name}模板版本较新，已替换compare下的模板: {compare_path}")
        else:
-            logger.debug(f"compare下的模板版本不低于当前模板，无需替换: {compare_path}")
+            logger.debug(f"compare下的{config_name}模板版本不低于当前模板，无需替换: {compare_path}")
    # 检查配置文件是否存在
    if not os.path.exists(old_config_path):
-        logger.info("配置文件不存在，从模板创建新配置")
+        logger.info(f"{config_name}.toml配置文件不存在，从模板创建新配置")
        os.makedirs(CONFIG_DIR, exist_ok=True)  # 创建文件夹
        shutil.copy2(template_path, old_config_path)  # 复制模板文件
-        logger.info(f"已创建新配置文件，请填写后重新运行: {old_config_path}")
+        logger.info(f"已创建新{config_name}配置文件，请填写后重新运行: {old_config_path}")
-        # 如果是新创建的配置文件,直接返回
+        # 如果是新创建的配置文件，根据参数决定是否退出
-        quit()
+        if should_quit_on_new:
            quit()
        else:
            return
    # 读取旧配置文件和模板文件（如果前面没读过 old_config，这里再读一次）
    if old_config is None:
@@ -226,38 +548,36 @@ def update_config():
            old_config = tomlkit.load(f)
    # new_config 已经读取
    # 读取 compare_config 只用于默认值变动检测，后续合并逻辑不再用
    # 检查version是否相同
    if old_config and "inner" in old_config and "inner" in new_config:
        old_version = old_config["inner"].get("version")  # type: ignore
        new_version = new_config["inner"].get("version")  # type: ignore
        if old_version and new_version and old_version == new_version:
-            logger.info(f"检测到配置文件版本号相同 (v{old_version})，跳过更新")
+            logger.info(f"检测到{config_name}配置文件版本号相同 (v{old_version})，跳过更新")
            return
        else:
            logger.info(
-                f"\n----------------------------------------\n检测到版本号不同: 旧版本 v{old_version} -> 新版本 v{new_version}\n----------------------------------------"
+                f"\n----------------------------------------\n检测到{config_name}版本号不同: 旧版本 v{old_version} -> 新版本 v{new_version}\n----------------------------------------"
            )
    else:
-        logger.info("已有配置文件未检测到版本号，可能是旧版本。将进行更新")
+        logger.info(f"已有{config_name}配置文件未检测到版本号，可能是旧版本。将进行更新")
    # 创建old目录（如果不存在）
    os.makedirs(old_config_dir, exist_ok=True)  # 生成带时间戳的新文件名
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    old_backup_path = os.path.join(old_config_dir, f"bot_config_{timestamp}.toml")
+    old_backup_path = os.path.join(old_config_dir, f"{config_name}_{timestamp}.toml")
    # 移动旧配置文件到old目录
    shutil.move(old_config_path, old_backup_path)
-    logger.info(f"已备份旧配置文件到: {old_backup_path}")
+    logger.info(f"已备份旧{config_name}配置文件到: {old_backup_path}")
    # 复制模板文件到配置目录
    shutil.copy2(template_path, new_config_path)
-    logger.info(f"已创建新配置文件: {new_config_path}")
+    logger.info(f"已创建新{config_name}配置文件: {new_config_path}")
    # 输出新增和删减项及注释
    if old_config:
-        logger.info("配置项变动如下：\n----------------------------------------")
+        logger.info(f"{config_name}配置项变动如下：\n----------------------------------------")
        logs = compare_dicts(new_config, old_config)
        if logs:
            for log in logs:
@@ -265,40 +585,24 @@ def update_config():
        else:
            logger.info("无新增或删减项")
    def update_dict(target: TOMLDocument | dict | Table, source: TOMLDocument | dict):
        """
        将source字典的值更新到target字典中（如果target中存在相同的键）
        """
        for key, value in source.items():
            # 跳过version字段的更新
            if key == "version":
                continue
            if key in target:
                target_value = target[key]
                if isinstance(value, dict) and isinstance(target_value, (dict, Table)):
                    update_dict(target_value, value)
                else:
                    try:
                        # 对数组类型进行特殊处理
                        if isinstance(value, list):
                            # 如果是空数组，确保它保持为空数组
                            target[key] = tomlkit.array(str(value)) if value else tomlkit.array()
                        else:
                            # 其他类型使用item方法创建新值
                            target[key] = tomlkit.item(value)
                    except (TypeError, ValueError):
                        # 如果转换失败，直接赋值
                        target[key] = value
    # 将旧配置的值更新到新配置中
-    logger.info("开始合并新旧配置...")
+    logger.info(f"开始合并{config_name}新旧配置...")
-    update_dict(new_config, old_config)
+    _update_dict(new_config, old_config)
    # 保存更新后的配置（保留注释和格式）
    with open(new_config_path, "w", encoding="utf-8") as f:
        f.write(tomlkit.dumps(new_config))
-    logger.info("配置文件更新完成，建议检查新配置文件中的内容，以免丢失重要信息")
+    logger.info(f"{config_name}配置文件更新完成，建议检查新配置文件中的内容，以免丢失重要信息")
-    quit()
+
 def update_config():
    """更新bot_config.toml配置文件"""
    _update_config_generic("bot_config", "bot_config_template", should_quit_on_new=True)
 def update_model_config():
    """更新model_config.toml配置文件"""
    _update_config_generic("model_config", "model_config_template", should_quit_on_new=False)
@dataclass
@@ -360,7 +664,9 @@ def get_config_dir() -> str:
 # 获取配置文件路径
 logger.info(f"MaiCore当前版本: {MMC_VERSION}")
 update_config()
 update_model_config()
 logger.info("正在品鉴配置文件...")
 global_config = load_config(config_path=os.path.join(CONFIG_DIR, "bot_config.toml"))
 model_config = api_ada_load_config(config_path=os.path.join(CONFIG_DIR, "model_config.toml"))
 logger.info("非常的新鲜，非常的美味！")
--- a/src/config/official_configs.py
+++ b/src/config/official_configs.py
@@ -4,6 +4,7 @@ from dataclasses import dataclass, field
 from typing import Any, Literal, Optional
 from src.config.config_base import ConfigBase
 from packaging.version import Version
 """
 须知：
@@ -598,7 +599,6 @@ class LPMMKnowledgeConfig(ConfigBase):
    embedding_dimension: int = 1024
    """嵌入向量维度，应该与模型的输出维度一致"""
@dataclass
 class ModelConfig(ConfigBase):
    """模型配置类"""
--- a/src/llm_models/LICENSE
+++ b/src/llm_models/LICENSE
@@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2025 Mai.To.The.Gate
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/src/llm_models/init.py
+++ b/src/llm_models/init.py
--- a/src/llm_models/exceptions.py
+++ b/src/llm_models/exceptions.py
@@ -0,0 +1,69 @@
 from typing import Any
 # 常见Error Code Mapping (以OpenAI API为例)
 error_code_mapping = {
    400: "参数不正确",
    401: "API-Key错误，认证失败，请检查/config/model_list.toml中的配置是否正确",
    402: "账号余额不足",
    403: "模型拒绝访问，可能需要实名或余额不足",
    404: "Not Found",
    413: "请求体过大，请尝试压缩图片或减少输入内容",
    429: "请求过于频繁，请稍后再试",
    500: "服务器内部故障",
    503: "服务器负载过高",
 }
 class NetworkConnectionError(Exception):
    """连接异常，常见于网络问题或服务器不可用"""
    def __init__(self):
        super().__init__()
    def __str__(self):
        return "连接异常，请检查网络连接状态或URL是否正确"
 class ReqAbortException(Exception):
    """请求异常退出，常见于请求被中断或取消"""
    def __init__(self, message: str | None = None):
        super().__init__(message)
        self.message = message
    def __str__(self):
        return self.message or "请求因未知原因异常终止"
 class RespNotOkException(Exception):
    """请求响应异常，见于请求未能成功响应（非 '200 OK'）"""
    def __init__(self, status_code: int, message: str | None = None):
        super().__init__(message)
        self.status_code = status_code
        self.message = message
    def __str__(self):
        if self.status_code in error_code_mapping:
            return error_code_mapping[self.status_code]
        elif self.message:
            return self.message
        else:
            return f"未知的异常响应代码：{self.status_code}"
 class RespParseException(Exception):
    """响应解析错误，常见于响应格式不正确或解析方法不匹配"""
    def __init__(self, ext_info: Any, message: str | None = None):
        super().__init__(message)
        self.ext_info = ext_info
        self.message = message
    def __str__(self):
        return (
            self.message
            if self.message
            else "解析响应内容时发生未知错误，请检查是否配置了正确的解析方法"
        )
--- a/src/llm_models/model_client/init.py
+++ b/src/llm_models/model_client/init.py
@@ -0,0 +1,380 @@
 import asyncio
 from typing import Callable, Any
 from openai import AsyncStream
 from openai.types.chat import ChatCompletionChunk, ChatCompletion
 from .base_client import BaseClient, APIResponse
 from src.config.api_ada_configs import (
    ModelInfo,
    ModelUsageArgConfigItem,
    RequestConfig,
    ModuleConfig,
 )
 from ..exceptions import (
    NetworkConnectionError,
    ReqAbortException,
    RespNotOkException,
    RespParseException,
 )
 from ..payload_content.message import Message
 from ..payload_content.resp_format import RespFormat
 from ..payload_content.tool_option import ToolOption
 from ..utils import compress_messages
 from src.common.logger import get_logger
 logger = get_logger("模型客户端")
 def _check_retry(
    remain_try: int,
    retry_interval: int,
    can_retry_msg: str,
    cannot_retry_msg: str,
    can_retry_callable: Callable | None = None,
    **kwargs,
 ) -> tuple[int, Any | None]:
    """
    辅助函数：检查是否可以重试
    :param remain_try: 剩余尝试次数
    :param retry_interval: 重试间隔
    :param can_retry_msg: 可以重试时的提示信息
    :param cannot_retry_msg: 不可以重试时的提示信息
    :return: (等待间隔（如果为0则不等待，为-1则不再请求该模型）, 新的消息列表（适用于压缩消息）)
    """
    if remain_try > 0:
        # 还有重试机会
        logger.warning(f"{can_retry_msg}")
        if can_retry_callable is not None:
            return retry_interval, can_retry_callable(**kwargs)
        else:
            return retry_interval, None
    else:
        # 达到最大重试次数
        logger.warning(f"{cannot_retry_msg}")
        return -1, None  # 不再重试请求该模型
 def _handle_resp_not_ok(
    e: RespNotOkException,
    task_name: str,
    model_name: str,
    remain_try: int,
    retry_interval: int = 10,
    messages: tuple[list[Message], bool] | None = None,
 ):
    """
    处理响应错误异常
    :param e: 异常对象
    :param task_name: 任务名称
    :param model_name: 模型名称
    :param remain_try: 剩余尝试次数
    :param retry_interval: 重试间隔
    :param messages: (消息列表, 是否已压缩过)
    :return: (等待间隔（如果为0则不等待，为-1则不再请求该模型）, 新的消息列表（适用于压缩消息）)
    """
    # 响应错误
    if e.status_code in [401, 403]:
        # API Key认证错误 - 让多API Key机制处理，给一次重试机会
        if remain_try > 0:
            logger.warning(
                f"任务-'{task_name}' 模型-'{model_name}'\n"
                f"API Key认证失败（错误代码-{e.status_code}），多API Key机制会自动切换"
            )
            return 0, None  # 立即重试，让底层客户端切换API Key
        else:
            logger.warning(
                f"任务-'{task_name}' 模型-'{model_name}'\n"
                f"所有API Key都认证失败，错误代码-{e.status_code}，错误信息-{e.message}"
            )
            return -1, None  # 不再重试请求该模型
    elif e.status_code in [400, 402, 404]:
        # 其他客户端错误（不应该重试）
        logger.warning(
            f"任务-'{task_name}' 模型-'{model_name}'\n"
            f"请求失败，错误代码-{e.status_code}，错误信息-{e.message}"
        )
        return -1, None  # 不再重试请求该模型
    elif e.status_code == 413:
        if messages and not messages[1]:
            # 消息列表不为空且未压缩，尝试压缩消息
            return _check_retry(
                remain_try,
                0,
                can_retry_msg=(
                    f"任务-'{task_name}' 模型-'{model_name}'\n"
                    "请求体过大，尝试压缩消息后重试"
                ),
                cannot_retry_msg=(
                    f"任务-'{task_name}' 模型-'{model_name}'\n"
                    "请求体过大，压缩消息后仍然过大，放弃请求"
                ),
                can_retry_callable=compress_messages,
                messages=messages[0],
            )
        # 没有消息可压缩
        logger.warning(
            f"任务-'{task_name}' 模型-'{model_name}'\n"
            "请求体过大，无法压缩消息，放弃请求。"
        )
        return -1, None
    elif e.status_code == 429:
        # 请求过于频繁 - 让多API Key机制处理，适当延迟后重试
        return _check_retry(
            remain_try,
            min(retry_interval, 5),  # 限制最大延迟为5秒，让API Key切换更快生效
            can_retry_msg=(
                f"任务-'{task_name}' 模型-'{model_name}'\n"
                f"请求过于频繁，多API Key机制会自动切换，{min(retry_interval, 5)}秒后重试"
            ),
            cannot_retry_msg=(
                f"任务-'{task_name}' 模型-'{model_name}'\n"
                "请求过于频繁，所有API Key都被限制，放弃请求"
            ),
        )
    elif e.status_code >= 500:
        # 服务器错误
        return _check_retry(
            remain_try,
            retry_interval,
            can_retry_msg=(
                f"任务-'{task_name}' 模型-'{model_name}'\n"
                f"服务器错误，将于{retry_interval}秒后重试"
            ),
            cannot_retry_msg=(
                f"任务-'{task_name}' 模型-'{model_name}'\n"
                "服务器错误，超过最大重试次数，请稍后再试"
            ),
        )
    else:
        # 未知错误
        logger.warning(
            f"任务-'{task_name}' 模型-'{model_name}'\n"
            f"未知错误，错误代码-{e.status_code}，错误信息-{e.message}"
        )
        return -1, None
 def default_exception_handler(
    e: Exception,
    task_name: str,
    model_name: str,
    remain_try: int,
    retry_interval: int = 10,
    messages: tuple[list[Message], bool] | None = None,
 ) -> tuple[int, list[Message] | None]:
    """
    默认异常处理函数
    :param e: 异常对象
    :param task_name: 任务名称
    :param model_name: 模型名称
    :param remain_try: 剩余尝试次数
    :param retry_interval: 重试间隔
    :param messages: (消息列表, 是否已压缩过)
    :return (等待间隔（如果为0则不等待，为-1则不再请求该模型）, 新的消息列表（适用于压缩消息）)
    """
    if isinstance(e, NetworkConnectionError):  # 网络连接错误
        # 网络错误可能是某个API Key的端点问题，给多API Key机制一次快速重试机会
        return _check_retry(
            remain_try,
            min(retry_interval, 3),  # 网络错误时减少等待时间，让API Key切换更快
            can_retry_msg=(
                f"任务-'{task_name}' 模型-'{model_name}'\n"
                f"连接异常，多API Key机制会尝试其他Key，{min(retry_interval, 3)}秒后重试"
            ),
            cannot_retry_msg=(
                f"任务-'{task_name}' 模型-'{model_name}'\n"
                f"连接异常，超过最大重试次数，请检查网络连接状态或URL是否正确"
            ),
        )
    elif isinstance(e, ReqAbortException):
        logger.warning(
            f"任务-'{task_name}' 模型-'{model_name}'\n请求被中断，详细信息-{str(e.message)}"
        )
        return -1, None  # 不再重试请求该模型
    elif isinstance(e, RespNotOkException):
        return _handle_resp_not_ok(
            e,
            task_name,
            model_name,
            remain_try,
            retry_interval,
            messages,
        )
    elif isinstance(e, RespParseException):
        # 响应解析错误
        logger.error(
            f"任务-'{task_name}' 模型-'{model_name}'\n"
            f"响应解析错误，错误信息-{e.message}\n"
        )
        logger.debug(f"附加内容:\n{str(e.ext_info)}")
        return -1, None  # 不再重试请求该模型
    else:
        logger.error(
            f"任务-'{task_name}' 模型-'{model_name}'\n未知异常，错误信息-{str(e)}"
        )
        return -1, None  # 不再重试请求该模型
 class ModelRequestHandler:
    """
    模型请求处理器
    """
    def __init__(
        self,
        task_name: str,
        config: ModuleConfig,
        api_client_map: dict[str, BaseClient],
    ):
        self.task_name: str = task_name
        """任务名称"""
        self.client_map: dict[str, BaseClient] = {}
        """API客户端列表"""
        self.configs: list[tuple[ModelInfo, ModelUsageArgConfigItem]] = []
        """模型参数配置"""
        self.req_conf: RequestConfig = config.req_conf
        """请求配置"""
        # 获取模型与使用配置
        for model_usage in config.task_model_arg_map[task_name].usage:
            if model_usage.name not in config.models:
                logger.error(f"Model '{model_usage.name}' not found in ModelManager")
                raise KeyError(f"Model '{model_usage.name}' not found in ModelManager")
            model_info = config.models[model_usage.name]
            if model_info.api_provider not in self.client_map:
                # 缓存API客户端
                self.client_map[model_info.api_provider] = api_client_map[
                    model_info.api_provider
                ]
            self.configs.append((model_info, model_usage))  # 添加模型与使用配置
    async def get_response(
        self,
        messages: list[Message],
        tool_options: list[ToolOption] | None = None,
        response_format: RespFormat | None = None,  # 暂不启用
        stream_response_handler: Callable[
            [AsyncStream[ChatCompletionChunk], asyncio.Event | None], APIResponse
        ]
        | None = None,
        async_response_parser: Callable[[ChatCompletion], APIResponse] | None = None,
        interrupt_flag: asyncio.Event | None = None,
    ) -> APIResponse:
        """
        获取对话响应
        :param messages: 消息列表
        :param tool_options: 工具选项列表
        :param response_format: 响应格式
        :param stream_response_handler: 流式响应处理函数（可选）
        :param async_response_parser: 响应解析函数（可选）
        :param interrupt_flag: 中断信号量（可选，默认为None）
        :return: APIResponse
        """
        # 遍历可用模型，若获取响应失败，则使用下一个模型继续请求
        for config_item in self.configs:
            client = self.client_map[config_item[0].api_provider]
            model_info: ModelInfo = config_item[0]
            model_usage_config: ModelUsageArgConfigItem = config_item[1]
            remain_try = (
                model_usage_config.max_retry or self.req_conf.max_retry
            ) + 1  # 初始化：剩余尝试次数 = 最大重试次数 + 1
            compressed_messages = None
            retry_interval = self.req_conf.retry_interval
            while remain_try > 0:
                try:
                    return await client.get_response(
                        model_info,
                        message_list=(compressed_messages or messages),
                        tool_options=tool_options,
                        max_tokens=model_usage_config.max_tokens
                        or self.req_conf.default_max_tokens,
                        temperature=model_usage_config.temperature
                        or self.req_conf.default_temperature,
                        response_format=response_format,
                        stream_response_handler=stream_response_handler,
                        async_response_parser=async_response_parser,
                        interrupt_flag=interrupt_flag,
                    )
                except Exception as e:
                    logger.debug(e)
                    remain_try -= 1  # 剩余尝试次数减1
                    # 处理异常
                    handle_res = default_exception_handler(
                        e,
                        self.task_name,
                        model_info.name,
                        remain_try,
                        retry_interval=self.req_conf.retry_interval,
                        messages=(messages, compressed_messages is not None),
                    )
                    if handle_res[0] == -1:
                        # 等待间隔为-1，表示不再请求该模型
                        remain_try = 0
                    elif handle_res[0] != 0:
                        # 等待间隔不为0，表示需要等待
                        await asyncio.sleep(handle_res[0])
                        retry_interval *= 2
                    if handle_res[1] is not None:
                        # 压缩消息
                        compressed_messages = handle_res[1]
        logger.error(f"任务-'{self.task_name}' 请求执行失败，所有模型均不可用")
        raise RuntimeError("请求失败，所有模型均不可用")  # 所有请求尝试均失败
    async def get_embedding(
        self,
        embedding_input: str,
    ) -> APIResponse:
        """
        获取嵌入向量
        :param embedding_input: 嵌入输入
        :return: APIResponse
        """
        for config in self.configs:
            client = self.client_map[config[0].api_provider]
            model_info: ModelInfo = config[0]
            model_usage_config: ModelUsageArgConfigItem = config[1]
            remain_try = (
                model_usage_config.max_retry or self.req_conf.max_retry
            ) + 1  # 初始化：剩余尝试次数 = 最大重试次数 + 1
            while remain_try:
                try:
                    return await client.get_embedding(
                        model_info=model_info,
                        embedding_input=embedding_input,
                    )
                except Exception as e:
                    logger.debug(e)
                    remain_try -= 1  # 剩余尝试次数减1
                    # 处理异常
                    handle_res = default_exception_handler(
                        e,
                        self.task_name,
                        model_info.name,
                        remain_try,
                        retry_interval=self.req_conf.retry_interval,
                    )
                    if handle_res[0] == -1:
                        # 等待间隔为-1，表示不再请求该模型
                        remain_try = 0
                    elif handle_res[0] != 0:
                        # 等待间隔不为0，表示需要等待
                        await asyncio.sleep(handle_res[0])
        logger.error(f"任务-'{self.task_name}' 请求执行失败，所有模型均不可用")
        raise RuntimeError("请求失败，所有模型均不可用")  # 所有请求尝试均失败
--- a/src/llm_models/model_client/base_client.py
+++ b/src/llm_models/model_client/base_client.py
@@ -0,0 +1,116 @@
 import asyncio
 from dataclasses import dataclass
 from typing import Callable, Any
 from openai import AsyncStream
 from openai.types.chat import ChatCompletionChunk, ChatCompletion
 from src.config.api_ada_configs import ModelInfo, APIProvider
 from ..payload_content.message import Message
 from ..payload_content.resp_format import RespFormat
 from ..payload_content.tool_option import ToolOption, ToolCall
@dataclass
 class UsageRecord:
    """
    使用记录类
    """
    model_name: str
    """模型名称"""
    provider_name: str
    """提供商名称"""
    prompt_tokens: int
    """提示token数"""
    completion_tokens: int
    """完成token数"""
    total_tokens: int
    """总token数"""
@dataclass
 class APIResponse:
    """
    API响应类
    """
    content: str | None = None
    """响应内容"""
    reasoning_content: str | None = None
    """推理内容"""
    tool_calls: list[ToolCall] | None = None
    """工具调用 [(工具名称, 工具参数), ...]"""
    embedding: list[float] | None = None
    """嵌入向量"""
    usage: UsageRecord | None = None
    """使用情况 (prompt_tokens, completion_tokens, total_tokens)"""
    raw_data: Any = None
    """响应原始数据"""
 class BaseClient:
    """
    基础客户端
    """
    api_provider: APIProvider
    def __init__(self, api_provider: APIProvider):
        self.api_provider = api_provider
    async def get_response(
        self,
        model_info: ModelInfo,
        message_list: list[Message],
        tool_options: list[ToolOption] | None = None,
        max_tokens: int = 1024,
        temperature: float = 0.7,
        response_format: RespFormat | None = None,
        stream_response_handler: Callable[
            [AsyncStream[ChatCompletionChunk], asyncio.Event | None],
            tuple[APIResponse, tuple[int, int, int]],
        ]
        | None = None,
        async_response_parser: Callable[
            [ChatCompletion], tuple[APIResponse, tuple[int, int, int]]
        ]
        | None = None,
        interrupt_flag: asyncio.Event | None = None,
    ) -> APIResponse:
        """
        获取对话响应
        :param model_info: 模型信息
        :param message_list: 对话体
        :param tool_options: 工具选项（可选，默认为None）
        :param max_tokens: 最大token数（可选，默认为1024）
        :param temperature: 温度（可选，默认为0.7）
        :param response_format: 响应格式（可选，默认为 NotGiven ）
        :param stream_response_handler: 流式响应处理函数（可选）
        :param async_response_parser: 响应解析函数（可选）
        :param interrupt_flag: 中断信号量（可选，默认为None）
        :return: (响应文本, 推理文本, 工具调用, 其他数据)
        """
        raise RuntimeError("This method should be overridden in subclasses")
    async def get_embedding(
        self,
        model_info: ModelInfo,
        embedding_input: str,
    ) -> APIResponse:
        """
        获取文本嵌入
        :param model_info: 模型信息
        :param embedding_input: 嵌入输入文本
        :return: 嵌入响应
        """
        raise RuntimeError("This method should be overridden in subclasses")
--- a/src/llm_models/model_client/gemini_client.py
+++ b/src/llm_models/model_client/gemini_client.py
@@ -0,0 +1,573 @@
 import asyncio
 import io
 from collections.abc import Iterable
 from typing import Callable, Iterator, TypeVar, AsyncIterator
 from google import genai
 from google.genai import types
 from google.genai.types import FunctionDeclaration, GenerateContentResponse
 from google.genai.errors import (
    ClientError,
    ServerError,
    UnknownFunctionCallArgumentError,
    UnsupportedFunctionError,
    FunctionInvocationError,
 )
 from .base_client import APIResponse, UsageRecord
 from src.config.api_ada_configs import ModelInfo, APIProvider
 from . import BaseClient
 from src.common.logger import get_logger
 from ..exceptions import (
    RespParseException,
    NetworkConnectionError,
    RespNotOkException,
    ReqAbortException,
 )
 from ..payload_content.message import Message, RoleType
 from ..payload_content.resp_format import RespFormat, RespFormatType
 from ..payload_content.tool_option import ToolOption, ToolParam, ToolCall
 logger = get_logger("Gemini客户端")
 T = TypeVar("T")
 def _convert_messages(
    messages: list[Message],
 ) -> tuple[list[types.Content], list[str] | None]:
    """
    转换消息格式 - 将消息转换为Gemini API所需的格式
    :param messages: 消息列表
    :return: 转换后的消息列表(和可能存在的system消息)
    """
    def _convert_message_item(message: Message) -> types.Content:
        """
        转换单个消息格式，除了system和tool类型的消息
        :param message: 消息对象
        :return: 转换后的消息字典
        """
        # 将openai格式的角色重命名为gemini格式的角色
        if message.role == RoleType.Assistant:
            role = "model"
        elif message.role == RoleType.User:
            role = "user"
        # 添加Content
        content: types.Part | list
        if isinstance(message.content, str):
            content = types.Part.from_text(message.content)
        elif isinstance(message.content, list):
            content = []
            for item in message.content:
                if isinstance(item, tuple):
                    content.append(
                        types.Part.from_bytes(
                            data=item[1], mime_type=f"image/{item[0].lower()}"
                        )
                    )
                elif isinstance(item, str):
                    content.append(types.Part.from_text(item))
        else:
            raise RuntimeError("无法触及的代码：请使用MessageBuilder类构建消息对象")
        return types.Content(role=role, content=content)
    temp_list: list[types.Content] = []
    system_instructions: list[str] = []
    for message in messages:
        if message.role == RoleType.System:
            if isinstance(message.content, str):
                system_instructions.append(message.content)
            else:
                raise RuntimeError("你tm怎么往system里面塞图片base64？")
        elif message.role == RoleType.Tool:
            if not message.tool_call_id:
                raise ValueError("无法触及的代码：请使用MessageBuilder类构建消息对象")
        else:
            temp_list.append(_convert_message_item(message))
    if system_instructions:
        # 如果有system消息，就把它加上去
        ret: tuple = (temp_list, system_instructions)
    else:
        # 如果没有system消息，就直接返回
        ret: tuple = (temp_list, None)
    return ret
 def _convert_tool_options(tool_options: list[ToolOption]) -> list[FunctionDeclaration]:
    """
    转换工具选项格式 - 将工具选项转换为Gemini API所需的格式
    :param tool_options: 工具选项列表
    :return: 转换后的工具对象列表
    """
    def _convert_tool_param(tool_option_param: ToolParam) -> dict:
        """
        转换单个工具参数格式
        :param tool_option_param: 工具参数对象
        :return: 转换后的工具参数字典
        """
        return {
            "type": tool_option_param.param_type.value,
            "description": tool_option_param.description,
        }
    def _convert_tool_option_item(tool_option: ToolOption) -> FunctionDeclaration:
        """
        转换单个工具项格式
        :param tool_option: 工具选项对象
        :return: 转换后的Gemini工具选项对象
        """
        ret = {
            "name": tool_option.name,
            "description": tool_option.description,
        }
        if tool_option.params:
            ret["parameters"] = {
                "type": "object",
                "properties": {
                    param.name: _convert_tool_param(param)
                    for param in tool_option.params
                },
                "required": [
                    param.name for param in tool_option.params if param.required
                ],
            }
        ret1 = types.FunctionDeclaration(**ret)
        return ret1
    return [_convert_tool_option_item(tool_option) for tool_option in tool_options]
 def _process_delta(
    delta: GenerateContentResponse,
    fc_delta_buffer: io.StringIO,
    tool_calls_buffer: list[tuple[str, str, dict]],
 ):
    if not hasattr(delta, "candidates") or len(delta.candidates) == 0:
        raise RespParseException(delta, "响应解析失败，缺失candidates字段")
    if delta.text:
        fc_delta_buffer.write(delta.text)
    if delta.function_calls:  # 为什么不用hasattr呢，是因为这个属性一定有，即使是个空的
        for call in delta.function_calls:
            try:
                if not isinstance(
                    call.args, dict
                ):  # gemini返回的function call参数就是dict格式的了
                    raise RespParseException(
                        delta, "响应解析失败，工具调用参数无法解析为字典类型"
                    )
                tool_calls_buffer.append(
                    (
                        call.id,
                        call.name,
                        call.args,
                    )
                )
            except Exception as e:
                raise RespParseException(delta, "响应解析失败，无法解析工具调用参数") from e
 def _build_stream_api_resp(
    _fc_delta_buffer: io.StringIO,
    _tool_calls_buffer: list[tuple[str, str, dict]],
 ) -> APIResponse:
    resp = APIResponse()
    if _fc_delta_buffer.tell() > 0:
        # 如果正式内容缓冲区不为空，则将其写入APIResponse对象
        resp.content = _fc_delta_buffer.getvalue()
    _fc_delta_buffer.close()
    if len(_tool_calls_buffer) > 0:
        # 如果工具调用缓冲区不为空，则将其解析为ToolCall对象列表
        resp.tool_calls = []
        for call_id, function_name, arguments_buffer in _tool_calls_buffer:
            if arguments_buffer is not None:
                arguments = arguments_buffer
                if not isinstance(arguments, dict):
                    raise RespParseException(
                        None,
                        "响应解析失败，工具调用参数无法解析为字典类型。工具调用参数原始响应：\n"
                        f"{arguments_buffer}",
                    )
            else:
                arguments = None
            resp.tool_calls.append(ToolCall(call_id, function_name, arguments))
    return resp
 async def _to_async_iterable(iterable: Iterable[T]) -> AsyncIterator[T]:
    """
    将迭代器转换为异步迭代器
    :param iterable: 迭代器对象
    :return: 异步迭代器对象
    """
    for item in iterable:
        await asyncio.sleep(0)
        yield item
 async def _default_stream_response_handler(
    resp_stream: Iterator[GenerateContentResponse],
    interrupt_flag: asyncio.Event | None,
 ) -> tuple[APIResponse, tuple[int, int, int]]:
    """
    流式响应处理函数 - 处理Gemini API的流式响应
    :param resp_stream: 流式响应对象,是一个神秘的iterator，我完全不知道这个玩意能不能跑，不过遍历一遍之后它就空了，如果跑不了一点的话可以考虑改成别的东西
    :return: APIResponse对象
    """
    _fc_delta_buffer = io.StringIO()  # 正式内容缓冲区，用于存储接收到的正式内容
    _tool_calls_buffer: list[
        tuple[str, str, dict]
    ] = []  # 工具调用缓冲区，用于存储接收到的工具调用
    _usage_record = None  # 使用情况记录
    def _insure_buffer_closed():
        if _fc_delta_buffer and not _fc_delta_buffer.closed:
            _fc_delta_buffer.close()
    async for chunk in _to_async_iterable(resp_stream):
        # 检查是否有中断量
        if interrupt_flag and interrupt_flag.is_set():
            # 如果中断量被设置，则抛出ReqAbortException
            raise ReqAbortException("请求被外部信号中断")
        _process_delta(
            chunk,
            _fc_delta_buffer,
            _tool_calls_buffer,
        )
        if chunk.usage_metadata:
            # 如果有使用情况，则将其存储在APIResponse对象中
            _usage_record = (
                chunk.usage_metadata.prompt_token_count,
                chunk.usage_metadata.candidates_token_count
                + chunk.usage_metadata.thoughts_token_count,
                chunk.usage_metadata.total_token_count,
            )
    try:
        return _build_stream_api_resp(
            _fc_delta_buffer,
            _tool_calls_buffer,
        ), _usage_record
    except Exception:
        # 确保缓冲区被关闭
        _insure_buffer_closed()
        raise
 def _default_normal_response_parser(
    resp: GenerateContentResponse,
 ) -> tuple[APIResponse, tuple[int, int, int]]:
    """
    解析对话补全响应 - 将Gemini API响应解析为APIResponse对象
    :param resp: 响应对象
    :return: APIResponse对象
    """
    api_response = APIResponse()
    if not hasattr(resp, "candidates") or len(resp.candidates) == 0:
        raise RespParseException(resp, "响应解析失败，缺失candidates字段")
    if resp.text:
        api_response.content = resp.text
    if resp.function_calls:
        api_response.tool_calls = []
        for call in resp.function_calls:
            try:
                if not isinstance(call.args, dict):
                    raise RespParseException(
                        resp, "响应解析失败，工具调用参数无法解析为字典类型"
                    )
                api_response.tool_calls.append(ToolCall(call.id, call.name, call.args))
            except Exception as e:
                raise RespParseException(
                    resp, "响应解析失败，无法解析工具调用参数"
                ) from e
    if resp.usage_metadata:
        _usage_record = (
            resp.usage_metadata.prompt_token_count,
            resp.usage_metadata.candidates_token_count
            + resp.usage_metadata.thoughts_token_count,
            resp.usage_metadata.total_token_count,
        )
    else:
        _usage_record = None
    api_response.raw_data = resp
    return api_response, _usage_record
 class GeminiClient(BaseClient):
    def __init__(self, api_provider: APIProvider):
        super().__init__(api_provider)
        # 不再在初始化时创建固定的client，而是在请求时动态创建
        self._clients_cache = {}  # API Key -> genai.Client 的缓存
    def _get_client(self, api_key: str = None) -> genai.Client:
        """获取或创建对应API Key的客户端"""
        if api_key is None:
            api_key = self.api_provider.get_current_api_key()
        if not api_key:
            raise ValueError(f"API Provider '{self.api_provider.name}' 没有可用的API Key")
        # 使用缓存避免重复创建客户端
        if api_key not in self._clients_cache:
            self._clients_cache[api_key] = genai.Client(api_key=api_key)
        return self._clients_cache[api_key]
    async def _execute_with_fallback(self, func, *args, **kwargs):
        """执行请求并在失败时切换API Key"""
        current_api_key = self.api_provider.get_current_api_key()
        max_attempts = len(self.api_provider.api_keys) if self.api_provider.api_keys else 1
        for attempt in range(max_attempts):
            try:
                client = self._get_client(current_api_key)
                result = await func(client, *args, **kwargs)
                # 成功时重置失败计数
                self.api_provider.reset_key_failures(current_api_key)
                return result
            except (ClientError, ServerError) as e:
                # 记录失败并尝试下一个API Key
                logger.warning(f"API Key失败 (尝试 {attempt + 1}/{max_attempts}): {str(e)}")
                if attempt < max_attempts - 1:  # 还有重试机会
                    next_api_key = self.api_provider.mark_key_failed(current_api_key)
                    if next_api_key and next_api_key != current_api_key:
                        current_api_key = next_api_key
                        logger.info(f"切换到下一个API Key: {current_api_key[:8]}***{current_api_key[-4:]}")
                        continue
                # 所有API Key都失败了，重新抛出异常
                raise RespNotOkException(e.status_code, e.message) from e
            except Exception as e:
                # 其他异常直接抛出
                raise e
    async def get_response(
        self,
        model_info: ModelInfo,
        message_list: list[Message],
        tool_options: list[ToolOption] | None = None,
        max_tokens: int = 1024,
        temperature: float = 0.7,
        thinking_budget: int = 0,
        response_format: RespFormat | None = None,
        stream_response_handler: Callable[
            [Iterator[GenerateContentResponse], asyncio.Event | None], APIResponse
        ]
        | None = None,
        async_response_parser: Callable[[GenerateContentResponse], APIResponse]
        | None = None,
        interrupt_flag: asyncio.Event | None = None,
    ) -> APIResponse:
        """
        获取对话响应
        :param model_info: 模型信息
        :param message_list: 对话体
        :param tool_options: 工具选项（可选，默认为None）
        :param max_tokens: 最大token数（可选，默认为1024）
        :param temperature: 温度（可选，默认为0.7）
        :param thinking_budget: 思考预算（可选，默认为0）
        :param response_format: 响应格式（默认为text/plain,如果是输入的JSON Schema则必须遵守OpenAPI3.0格式,理论上和openai是一样的，暂不支持其它相应格式输入）
        :param stream_response_handler: 流式响应处理函数（可选，默认为default_stream_response_handler）
        :param async_response_parser: 响应解析函数（可选，默认为default_response_parser）
        :param interrupt_flag: 中断信号量（可选，默认为None）
        :return: (响应文本, 推理文本, 工具调用, 其他数据)
        """
        return await self._execute_with_fallback(
            self._get_response_internal,
            model_info,
            message_list,
            tool_options,
            max_tokens,
            temperature,
            thinking_budget,
            response_format,
            stream_response_handler,
            async_response_parser,
            interrupt_flag,
        )
    async def _get_response_internal(
        self,
        client: genai.Client,
        model_info: ModelInfo,
        message_list: list[Message],
        tool_options: list[ToolOption] | None = None,
        max_tokens: int = 1024,
        temperature: float = 0.7,
        thinking_budget: int = 0,
        response_format: RespFormat | None = None,
        stream_response_handler: Callable[
            [Iterator[GenerateContentResponse], asyncio.Event | None], APIResponse
        ]
        | None = None,
        async_response_parser: Callable[[GenerateContentResponse], APIResponse]
        | None = None,
        interrupt_flag: asyncio.Event | None = None,
    ) -> APIResponse:
        """内部方法：执行实际的API调用"""
        if stream_response_handler is None:
            stream_response_handler = _default_stream_response_handler
        if async_response_parser is None:
            async_response_parser = _default_normal_response_parser
        # 将messages构造为Gemini API所需的格式
        messages = _convert_messages(message_list)
        # 将tool_options转换为Gemini API所需的格式
        tools = _convert_tool_options(tool_options) if tool_options else None
        # 将response_format转换为Gemini API所需的格式
        generation_config_dict = {
            "max_output_tokens": max_tokens,
            "temperature": temperature,
            "response_modalities": ["TEXT"],  # 暂时只支持文本输出
        }
        if "2.5" in model_info.model_identifier.lower():
            # 我偷个懒，在这里识别一下2.5然后开摆，反正现在只有2.5支持思维链，然后我测试之后发现它不返回思考内容，反正我也怕他有朝一日返回了，我决定干掉任何有关的思维内容
            generation_config_dict["thinking_config"] = types.ThinkingConfig(
                thinking_budget=thinking_budget, include_thoughts=False
            )
        if tools:
            generation_config_dict["tools"] = types.Tool(tools)
        if messages[1]:
            # 如果有system消息，则将其添加到配置中
            generation_config_dict["system_instructions"] = messages[1]
        if response_format and response_format.format_type == RespFormatType.TEXT:
            generation_config_dict["response_mime_type"] = "text/plain"
        elif response_format and response_format.format_type in (RespFormatType.JSON_OBJ, RespFormatType.JSON_SCHEMA):
            generation_config_dict["response_mime_type"] = "application/json"
            generation_config_dict["response_schema"] = response_format.to_dict()
        generation_config = types.GenerateContentConfig(**generation_config_dict)
        try:
            if model_info.force_stream_mode:
                req_task = asyncio.create_task(
                    client.aio.models.generate_content_stream(
                        model=model_info.model_identifier,
                        contents=messages[0],
                        config=generation_config,
                    )
                )
                while not req_task.done():
                    if interrupt_flag and interrupt_flag.is_set():
                        # 如果中断量存在且被设置，则取消任务并抛出异常
                        req_task.cancel()
                        raise ReqAbortException("请求被外部信号中断")
                    await asyncio.sleep(0.1)  # 等待0.1秒后再次检查任务&中断信号量状态
                resp, usage_record = await stream_response_handler(
                    req_task.result(), interrupt_flag
                )
            else:
                req_task = asyncio.create_task(
                    client.aio.models.generate_content(
                        model=model_info.model_identifier,
                        contents=messages[0],
                        config=generation_config,
                    )
                )
                while not req_task.done():
                    if interrupt_flag and interrupt_flag.is_set():
                        # 如果中断量存在且被设置，则取消任务并抛出异常
                        req_task.cancel()
                        raise ReqAbortException("请求被外部信号中断")
                    await asyncio.sleep(0.5)  # 等待0.5秒后再次检查任务&中断信号量状态
                resp, usage_record = async_response_parser(req_task.result())
        except (ClientError, ServerError) as e:
            # 重封装ClientError和ServerError为RespNotOkException
            raise RespNotOkException(e.status_code, e.message) from e
        except (
            UnknownFunctionCallArgumentError,
            UnsupportedFunctionError,
            FunctionInvocationError,
        ) as e:
            raise ValueError(f"工具类型错误：请检查工具选项和参数：{str(e)}") from e
        except Exception as e:
            raise NetworkConnectionError() from e
        if usage_record:
            resp.usage = UsageRecord(
                model_name=model_info.name,
                provider_name=model_info.api_provider,
                prompt_tokens=usage_record[0],
                completion_tokens=usage_record[1],
                total_tokens=usage_record[2],
            )
        return resp
    async def get_embedding(
        self,
        model_info: ModelInfo,
        embedding_input: str,
    ) -> APIResponse:
        """
        获取文本嵌入
        :param model_info: 模型信息
        :param embedding_input: 嵌入输入文本
        :return: 嵌入响应
        """
        return await self._execute_with_fallback(
            self._get_embedding_internal,
            model_info,
            embedding_input,
        )
    async def _get_embedding_internal(
        self,
        client: genai.Client,
        model_info: ModelInfo,
        embedding_input: str,
    ) -> APIResponse:
        """内部方法：执行实际的嵌入API调用"""
        try:
            raw_response: types.EmbedContentResponse = (
                await client.aio.models.embed_content(
                    model=model_info.model_identifier,
                    contents=embedding_input,
                    config=types.EmbedContentConfig(task_type="SEMANTIC_SIMILARITY"),
                )
            )
        except (ClientError, ServerError) as e:
            # 重封装ClientError和ServerError为RespNotOkException
            raise RespNotOkException(e.status_code) from e
        except Exception as e:
            raise NetworkConnectionError() from e
        response = APIResponse()
        # 解析嵌入响应和使用情况
        if hasattr(raw_response, "embeddings"):
            response.embedding = raw_response.embeddings[0].values
        else:
            raise RespParseException(raw_response, "响应解析失败，缺失embeddings字段")
        response.usage = UsageRecord(
            model_name=model_info.name,
            provider_name=model_info.api_provider,
            prompt_tokens=len(embedding_input),
            completion_tokens=0,
            total_tokens=len(embedding_input),
        )
        return response
--- a/src/llm_models/model_client/openai_client.py
+++ b/src/llm_models/model_client/openai_client.py
@@ -0,0 +1,647 @@
 import asyncio
 import io
 import json
 import re
 from collections.abc import Iterable
 from typing import Callable, Any
 from openai import (
    AsyncOpenAI,
    APIConnectionError,
    APIStatusError,
    NOT_GIVEN,
    AsyncStream,
 )
 from openai.types.chat import (
    ChatCompletion,
    ChatCompletionChunk,
    ChatCompletionMessageParam,
    ChatCompletionToolParam,
 )
 from openai.types.chat.chat_completion_chunk import ChoiceDelta
 from .base_client import APIResponse, UsageRecord
 from src.config.api_ada_configs import ModelInfo, APIProvider
 from . import BaseClient
 from src.common.logger import get_logger
 from ..exceptions import (
    RespParseException,
    NetworkConnectionError,
    RespNotOkException,
    ReqAbortException,
 )
 from ..payload_content.message import Message, RoleType
 from ..payload_content.resp_format import RespFormat
 from ..payload_content.tool_option import ToolOption, ToolParam, ToolCall
 logger = get_logger("OpenAI客户端")
 def _convert_messages(messages: list[Message]) -> list[ChatCompletionMessageParam]:
    """
    转换消息格式 - 将消息转换为OpenAI API所需的格式
    :param messages: 消息列表
    :return: 转换后的消息列表
    """
    def _convert_message_item(message: Message) -> ChatCompletionMessageParam:
        """
        转换单个消息格式
        :param message: 消息对象
        :return: 转换后的消息字典
        """
        # 添加Content
        content: str | list[dict[str, Any]]
        if isinstance(message.content, str):
            content = message.content
        elif isinstance(message.content, list):
            content = []
            for item in message.content:
                if isinstance(item, tuple):
                    content.append(
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/{item[0].lower()};base64,{item[1]}"
                            },
                        }
                    )
                elif isinstance(item, str):
                    content.append({"type": "text", "text": item})
        else:
            raise RuntimeError("无法触及的代码：请使用MessageBuilder类构建消息对象")
        ret = {
            "role": message.role.value,
            "content": content,
        }
        # 添加工具调用ID
        if message.role == RoleType.Tool:
            if not message.tool_call_id:
                raise ValueError("无法触及的代码：请使用MessageBuilder类构建消息对象")
            ret["tool_call_id"] = message.tool_call_id
        return ret
    return [_convert_message_item(message) for message in messages]
 def _convert_tool_options(tool_options: list[ToolOption]) -> list[dict[str, Any]]:
    """
    转换工具选项格式 - 将工具选项转换为OpenAI API所需的格式
    :param tool_options: 工具选项列表
    :return: 转换后的工具选项列表
    """
    def _convert_tool_param(tool_option_param: ToolParam) -> dict[str, str]:
        """
        转换单个工具参数格式
        :param tool_option_param: 工具参数对象
        :return: 转换后的工具参数字典
        """
        return {
            "type": tool_option_param.param_type.value,
            "description": tool_option_param.description,
        }
    def _convert_tool_option_item(tool_option: ToolOption) -> dict[str, Any]:
        """
        转换单个工具项格式
        :param tool_option: 工具选项对象
        :return: 转换后的工具选项字典
        """
        ret: dict[str, Any] = {
            "name": tool_option.name,
            "description": tool_option.description,
        }
        if tool_option.params:
            ret["parameters"] = {
                "type": "object",
                "properties": {
                    param.name: _convert_tool_param(param)
                    for param in tool_option.params
                },
                "required": [
                    param.name for param in tool_option.params if param.required
                ],
            }
        return ret
    return [
        {
            "type": "function",
            "function": _convert_tool_option_item(tool_option),
        }
        for tool_option in tool_options
    ]
 def _process_delta(
    delta: ChoiceDelta,
    has_rc_attr_flag: bool,
    in_rc_flag: bool,
    rc_delta_buffer: io.StringIO,
    fc_delta_buffer: io.StringIO,
    tool_calls_buffer: list[tuple[str, str, io.StringIO]],
 ) -> bool:
    # 接收content
    if has_rc_attr_flag:
        # 有独立的推理内容块，则无需考虑content内容的判读
        if hasattr(delta, "reasoning_content") and delta.reasoning_content:
            # 如果有推理内容，则将其写入推理内容缓冲区
            assert isinstance(delta.reasoning_content, str)
            rc_delta_buffer.write(delta.reasoning_content)
        elif delta.content:
            # 如果有正式内容，则将其写入正式内容缓冲区
            fc_delta_buffer.write(delta.content)
    elif hasattr(delta, "content") and delta.content is not None:
        # 没有独立的推理内容块，但有正式内容
        if in_rc_flag:
            # 当前在推理内容块中
            if delta.content == "</think>":
                # 如果当前内容是</think>，则将其视为推理内容的结束标记，退出推理内容块
                in_rc_flag = False
            else:
                # 其他情况视为推理内容，加入推理内容缓冲区
                rc_delta_buffer.write(delta.content)
        elif delta.content == "<think>" and not fc_delta_buffer.getvalue():
            # 如果当前内容是<think>，且正式内容缓冲区为空，说明<think>为输出的首个token
            # 则将其视为推理内容的开始标记，进入推理内容块
            in_rc_flag = True
        else:
            # 其他情况视为正式内容，加入正式内容缓冲区
            fc_delta_buffer.write(delta.content)
    # 接收tool_calls
    if hasattr(delta, "tool_calls") and delta.tool_calls:
        tool_call_delta = delta.tool_calls[0]
        if tool_call_delta.index >= len(tool_calls_buffer):
            # 调用索引号大于等于缓冲区长度，说明是新的工具调用
            tool_calls_buffer.append(
                (
                    tool_call_delta.id,
                    tool_call_delta.function.name,
                    io.StringIO(),
                )
            )
        if tool_call_delta.function.arguments:
            # 如果有工具调用参数，则添加到对应的工具调用的参数串缓冲区中
            tool_calls_buffer[tool_call_delta.index][2].write(
                tool_call_delta.function.arguments
            )
    return in_rc_flag
 def _build_stream_api_resp(
    _fc_delta_buffer: io.StringIO,
    _rc_delta_buffer: io.StringIO,
    _tool_calls_buffer: list[tuple[str, str, io.StringIO]],
 ) -> APIResponse:
    resp = APIResponse()
    if _rc_delta_buffer.tell() > 0:
        # 如果推理内容缓冲区不为空，则将其写入APIResponse对象
        resp.reasoning_content = _rc_delta_buffer.getvalue()
    _rc_delta_buffer.close()
    if _fc_delta_buffer.tell() > 0:
        # 如果正式内容缓冲区不为空，则将其写入APIResponse对象
        resp.content = _fc_delta_buffer.getvalue()
    _fc_delta_buffer.close()
    if _tool_calls_buffer:
        # 如果工具调用缓冲区不为空，则将其解析为ToolCall对象列表
        resp.tool_calls = []
        for call_id, function_name, arguments_buffer in _tool_calls_buffer:
            if arguments_buffer.tell() > 0:
                # 如果参数串缓冲区不为空，则解析为JSON对象
                raw_arg_data = arguments_buffer.getvalue()
                arguments_buffer.close()
                try:
                    arguments = json.loads(raw_arg_data)
                    if not isinstance(arguments, dict):
                        raise RespParseException(
                            None,
                            "响应解析失败，工具调用参数无法解析为字典类型。工具调用参数原始响应：\n"
                            f"{raw_arg_data}",
                        )
                except json.JSONDecodeError as e:
                    raise RespParseException(
                        None,
                        "响应解析失败，无法解析工具调用参数。工具调用参数原始响应："
                        f"{raw_arg_data}",
                    ) from e
            else:
                arguments_buffer.close()
                arguments = None
            resp.tool_calls.append(ToolCall(call_id, function_name, arguments))
    return resp
 async def _default_stream_response_handler(
    resp_stream: AsyncStream[ChatCompletionChunk],
    interrupt_flag: asyncio.Event | None,
 ) -> tuple[APIResponse, tuple[int, int, int]]:
    """
    流式响应处理函数 - 处理OpenAI API的流式响应
    :param resp_stream: 流式响应对象
    :return: APIResponse对象
    """
    _has_rc_attr_flag = False  # 标记是否有独立的推理内容块
    _in_rc_flag = False  # 标记是否在推理内容块中
    _rc_delta_buffer = io.StringIO()  # 推理内容缓冲区，用于存储接收到的推理内容
    _fc_delta_buffer = io.StringIO()  # 正式内容缓冲区，用于存储接收到的正式内容
    _tool_calls_buffer: list[
        tuple[str, str, io.StringIO]
    ] = []  # 工具调用缓冲区，用于存储接收到的工具调用
    _usage_record = None  # 使用情况记录
    def _insure_buffer_closed():
        # 确保缓冲区被关闭
        if _rc_delta_buffer and not _rc_delta_buffer.closed:
            _rc_delta_buffer.close()
        if _fc_delta_buffer and not _fc_delta_buffer.closed:
            _fc_delta_buffer.close()
        for _, _, buffer in _tool_calls_buffer:
            if buffer and not buffer.closed:
                buffer.close()
    async for event in resp_stream:
        if interrupt_flag and interrupt_flag.is_set():
            # 如果中断量被设置，则抛出ReqAbortException
            _insure_buffer_closed()
            raise ReqAbortException("请求被外部信号中断")
        delta = event.choices[0].delta  # 获取当前块的delta内容
        if hasattr(delta, "reasoning_content") and delta.reasoning_content:
            # 标记：有独立的推理内容块
            _has_rc_attr_flag = True
        _in_rc_flag = _process_delta(
            delta,
            _has_rc_attr_flag,
            _in_rc_flag,
            _rc_delta_buffer,
            _fc_delta_buffer,
            _tool_calls_buffer,
        )
        if event.usage:
            # 如果有使用情况，则将其存储在APIResponse对象中
            _usage_record = (
                event.usage.prompt_tokens,
                event.usage.completion_tokens,
                event.usage.total_tokens,
            )
    try:
        return _build_stream_api_resp(
            _fc_delta_buffer,
            _rc_delta_buffer,
            _tool_calls_buffer,
        ), _usage_record
    except Exception:
        # 确保缓冲区被关闭
        _insure_buffer_closed()
        raise
 pattern = re.compile(
    r"<think>(?P<think>.*?)</think>(?P<content>.*)|<think>(?P<think_unclosed>.*)|(?P<content_only>.+)",
    re.DOTALL,
 )
 """用于解析推理内容的正则表达式"""
 def _default_normal_response_parser(
    resp: ChatCompletion,
 ) -> tuple[APIResponse, tuple[int, int, int]]:
    """
    解析对话补全响应 - 将OpenAI API响应解析为APIResponse对象
    :param resp: 响应对象
    :return: APIResponse对象
    """
    api_response = APIResponse()
    if not hasattr(resp, "choices") or len(resp.choices) == 0:
        raise RespParseException(resp, "响应解析失败，缺失choices字段")
    message_part = resp.choices[0].message
    if hasattr(message_part, "reasoning_content") and message_part.reasoning_content:
        # 有有效的推理字段
        api_response.content = message_part.content
        api_response.reasoning_content = message_part.reasoning_content
    elif message_part.content:
        # 提取推理和内容
        match = pattern.match(message_part.content)
        if not match:
            raise RespParseException(resp, "响应解析失败，无法捕获推理内容和输出内容")
        if match.group("think") is not None:
            result = match.group("think").strip(), match.group("content").strip()
        elif match.group("think_unclosed") is not None:
            result = match.group("think_unclosed").strip(), None
        else:
            result = None, match.group("content_only").strip()
        api_response.reasoning_content, api_response.content = result
    # 提取工具调用
    if message_part.tool_calls:
        api_response.tool_calls = []
        for call in message_part.tool_calls:
            try:
                arguments = json.loads(call.function.arguments)
                if not isinstance(arguments, dict):
                    raise RespParseException(
                        resp, "响应解析失败，工具调用参数无法解析为字典类型"
                    )
                api_response.tool_calls.append(
                    ToolCall(call.id, call.function.name, arguments)
                )
            except json.JSONDecodeError as e:
                raise RespParseException(
                    resp, "响应解析失败，无法解析工具调用参数"
                ) from e
    # 提取Usage信息
    if resp.usage:
        _usage_record = (
            resp.usage.prompt_tokens,
            resp.usage.completion_tokens,
            resp.usage.total_tokens,
        )
    else:
        _usage_record = None
    # 将原始响应存储在原始数据中
    api_response.raw_data = resp
    return api_response, _usage_record
 class OpenaiClient(BaseClient):
    def __init__(self, api_provider: APIProvider):
        super().__init__(api_provider)
        # 不再在初始化时创建固定的client，而是在请求时动态创建
        self._clients_cache = {}  # API Key -> AsyncOpenAI client 的缓存
    def _get_client(self, api_key: str = None) -> AsyncOpenAI:
        """获取或创建对应API Key的客户端"""
        if api_key is None:
            api_key = self.api_provider.get_current_api_key()
        if not api_key:
            raise ValueError(f"API Provider '{self.api_provider.name}' 没有可用的API Key")
        # 使用缓存避免重复创建客户端
        if api_key not in self._clients_cache:
            self._clients_cache[api_key] = AsyncOpenAI(
                base_url=self.api_provider.base_url,
                api_key=api_key,
                max_retries=0,
            )
        return self._clients_cache[api_key]
    async def _execute_with_fallback(self, func, *args, **kwargs):
        """执行请求并在失败时切换API Key"""
        current_api_key = self.api_provider.get_current_api_key()
        max_attempts = len(self.api_provider.api_keys) if self.api_provider.api_keys else 1
        for attempt in range(max_attempts):
            try:
                client = self._get_client(current_api_key)
                result = await func(client, *args, **kwargs)
                # 成功时重置失败计数
                self.api_provider.reset_key_failures(current_api_key)
                return result
            except (APIStatusError, APIConnectionError) as e:
                # 记录失败并尝试下一个API Key
                logger.warning(f"API Key失败 (尝试 {attempt + 1}/{max_attempts}): {str(e)}")
                if attempt < max_attempts - 1:  # 还有重试机会
                    next_api_key = self.api_provider.mark_key_failed(current_api_key)
                    if next_api_key and next_api_key != current_api_key:
                        current_api_key = next_api_key
                        logger.info(f"切换到下一个API Key: {current_api_key[:8]}***{current_api_key[-4:]}")
                        continue
                # 所有API Key都失败了，重新抛出异常
                if isinstance(e, APIStatusError):
                    raise RespNotOkException(e.status_code, e.message) from e
                elif isinstance(e, APIConnectionError):
                    raise NetworkConnectionError(str(e)) from e
            except Exception as e:
                # 其他异常直接抛出
                raise e
    async def get_response(
        self,
        model_info: ModelInfo,
        message_list: list[Message],
        tool_options: list[ToolOption] | None = None,
        max_tokens: int = 1024,
        temperature: float = 0.7,
        response_format: RespFormat | None = None,
        stream_response_handler: Callable[
            [AsyncStream[ChatCompletionChunk], asyncio.Event | None],
            tuple[APIResponse, tuple[int, int, int]],
        ]
        | None = None,
        async_response_parser: Callable[
            [ChatCompletion], tuple[APIResponse, tuple[int, int, int]]
        ]
        | None = None,
        interrupt_flag: asyncio.Event | None = None,
    ) -> APIResponse:
        """
        获取对话响应
        :param model_info: 模型信息
        :param message_list: 对话体
        :param tool_options: 工具选项（可选，默认为None）
        :param max_tokens: 最大token数（可选，默认为1024）
        :param temperature: 温度（可选，默认为0.7）
        :param response_format: 响应格式（可选，默认为 NotGiven ）
        :param stream_response_handler: 流式响应处理函数（可选，默认为default_stream_response_handler）
        :param async_response_parser: 响应解析函数（可选，默认为default_response_parser）
        :param interrupt_flag: 中断信号量（可选，默认为None）
        :return: (响应文本, 推理文本, 工具调用, 其他数据)
        """
        return await self._execute_with_fallback(
            self._get_response_internal,
            model_info,
            message_list,
            tool_options,
            max_tokens,
            temperature,
            response_format,
            stream_response_handler,
            async_response_parser,
            interrupt_flag,
        )
    async def _get_response_internal(
        self,
        client: AsyncOpenAI,
        model_info: ModelInfo,
        message_list: list[Message],
        tool_options: list[ToolOption] | None = None,
        max_tokens: int = 1024,
        temperature: float = 0.7,
        response_format: RespFormat | None = None,
        stream_response_handler: Callable[
            [AsyncStream[ChatCompletionChunk], asyncio.Event | None],
            tuple[APIResponse, tuple[int, int, int]],
        ]
        | None = None,
        async_response_parser: Callable[
            [ChatCompletion], tuple[APIResponse, tuple[int, int, int]]
        ]
        | None = None,
        interrupt_flag: asyncio.Event | None = None,
    ) -> APIResponse:
        """内部方法：执行实际的API调用"""
        if stream_response_handler is None:
            stream_response_handler = _default_stream_response_handler
        if async_response_parser is None:
            async_response_parser = _default_normal_response_parser
        # 将messages构造为OpenAI API所需的格式
        messages: Iterable[ChatCompletionMessageParam] = _convert_messages(message_list)
        # 将tool_options转换为OpenAI API所需的格式
        tools: Iterable[ChatCompletionToolParam] = (
            _convert_tool_options(tool_options) if tool_options else NOT_GIVEN
        )
        try:
            if model_info.force_stream_mode:
                req_task = asyncio.create_task(
                    client.chat.completions.create(
                        model=model_info.model_identifier,
                        messages=messages,
                        tools=tools,
                        temperature=temperature,
                        max_tokens=max_tokens,
                        stream=True,
                        response_format=response_format.to_dict()
                        if response_format
                        else NOT_GIVEN,
                    )
                )
                while not req_task.done():
                    if interrupt_flag and interrupt_flag.is_set():
                        # 如果中断量存在且被设置，则取消任务并抛出异常
                        req_task.cancel()
                        raise ReqAbortException("请求被外部信号中断")
                    await asyncio.sleep(0.1)  # 等待0.1秒后再次检查任务&中断信号量状态
                resp, usage_record = await stream_response_handler(
                    req_task.result(), interrupt_flag
                )
            else:
                # 发送请求并获取响应
                req_task = asyncio.create_task(
                    client.chat.completions.create(
                        model=model_info.model_identifier,
                        messages=messages,
                        tools=tools,
                        temperature=temperature,
                        max_tokens=max_tokens,
                        stream=False,
                        response_format=response_format.to_dict()
                        if response_format
                        else NOT_GIVEN,
                    )
                )
                while not req_task.done():
                    if interrupt_flag and interrupt_flag.is_set():
                        # 如果中断量存在且被设置，则取消任务并抛出异常
                        req_task.cancel()
                        raise ReqAbortException("请求被外部信号中断")
                    await asyncio.sleep(0.5)  # 等待0.5秒后再次检查任务&中断信号量状态
                resp, usage_record = async_response_parser(req_task.result())
        except APIConnectionError as e:
            # 重封装APIConnectionError为NetworkConnectionError
            raise NetworkConnectionError() from e
        except APIStatusError as e:
            # 重封装APIError为RespNotOkException
            raise RespNotOkException(e.status_code, e.message) from e
        if usage_record:
            resp.usage = UsageRecord(
                model_name=model_info.name,
                provider_name=model_info.api_provider,
                prompt_tokens=usage_record[0],
                completion_tokens=usage_record[1],
                total_tokens=usage_record[2],
            )
        return resp
    async def get_embedding(
        self,
        model_info: ModelInfo,
        embedding_input: str,
    ) -> APIResponse:
        """
        获取文本嵌入
        :param model_info: 模型信息
        :param embedding_input: 嵌入输入文本
        :return: 嵌入响应
        """
        return await self._execute_with_fallback(
            self._get_embedding_internal,
            model_info,
            embedding_input,
        )
    async def _get_embedding_internal(
        self,
        client: AsyncOpenAI,
        model_info: ModelInfo,
        embedding_input: str,
    ) -> APIResponse:
        """内部方法：执行实际的嵌入API调用"""
        try:
            raw_response = await client.embeddings.create(
                model=model_info.model_identifier,
                input=embedding_input,
            )
        except APIConnectionError as e:
            raise NetworkConnectionError() from e
        except APIStatusError as e:
            # 重封装APIError为RespNotOkException
            raise RespNotOkException(e.status_code) from e
        response = APIResponse()
        # 解析嵌入响应
        if len(raw_response.data) > 0:
            response.embedding = raw_response.data[0].embedding
        else:
            raise RespParseException(
                raw_response,
                "响应解析失败，缺失嵌入数据。",
            )
        # 解析使用情况
        if hasattr(raw_response, "usage"):
            response.usage = UsageRecord(
                model_name=model_info.name,
                provider_name=model_info.api_provider,
                prompt_tokens=raw_response.usage.prompt_tokens,
                completion_tokens=raw_response.usage.completion_tokens,
                total_tokens=raw_response.usage.total_tokens,
            )
        return response
--- a/src/llm_models/model_manager.py
+++ b/src/llm_models/model_manager.py
@@ -0,0 +1,92 @@
 import importlib
 from typing import Dict
 from src.config.config import model_config
 from src.config.api_ada_configs import ModuleConfig, ModelUsageArgConfig
 from src.common.logger import get_logger
 from .model_client import ModelRequestHandler, BaseClient
 logger = get_logger("模型管理器")
 class ModelManager:
    # TODO: 添加读写锁，防止异步刷新配置时发生数据竞争
    def __init__(
        self,
        config: ModuleConfig,
    ):
        self.config: ModuleConfig = config
        """配置信息"""
        self.api_client_map: Dict[str, BaseClient] = {}
        """API客户端映射表"""
        self._request_handler_cache: Dict[str, ModelRequestHandler] = {}
        """ModelRequestHandler缓存，避免重复创建"""
        for provider_name, api_provider in self.config.api_providers.items():
            # 初始化API客户端
            try:
                # 根据配置动态加载实现
                client_module = importlib.import_module(
                    f".model_client.{api_provider.client_type}_client", __package__
                )
                client_class = getattr(
                    client_module, f"{api_provider.client_type.capitalize()}Client"
                )
                if not issubclass(client_class, BaseClient):
                    raise TypeError(
                        f"'{client_class.__name__}' is not a subclass of 'BaseClient'"
                    )
                self.api_client_map[api_provider.name] = client_class(
                    api_provider
                )  # 实例化，放入api_client_map
            except ImportError as e:
                logger.error(f"Failed to import client module: {e}")
                raise ImportError(
                    f"Failed to import client module for '{provider_name}': {e}"
                ) from e
    def __getitem__(self, task_name: str) -> ModelRequestHandler:
        """
        获取任务所需的模型客户端（封装）
        使用缓存机制避免重复创建ModelRequestHandler
        :param task_name: 任务名称
        :return: 模型客户端
        """
        if task_name not in self.config.task_model_arg_map:
            raise KeyError(f"'{task_name}' not registered in ModelManager")
        # 检查缓存中是否已存在
        if task_name in self._request_handler_cache:
            logger.debug(f"🚀 [性能优化] 从缓存获取ModelRequestHandler: {task_name}")
            return self._request_handler_cache[task_name]
        # 创建新的ModelRequestHandler并缓存
        logger.debug(f"🔧 [性能优化] 创建并缓存ModelRequestHandler: {task_name}")
        handler = ModelRequestHandler(
            task_name=task_name,
            config=self.config,
            api_client_map=self.api_client_map,
        )
        self._request_handler_cache[task_name] = handler
        return handler
    def __setitem__(self, task_name: str, value: ModelUsageArgConfig):
        """
        注册任务的模型使用配置
        :param task_name: 任务名称
        :param value: 模型使用配置
        """
        self.config.task_model_arg_map[task_name] = value
    def __contains__(self, task_name: str):
        """
        判断任务是否已注册
        :param task_name: 任务名称
        :return: 是否在模型列表中
        """
        return task_name in self.config.task_model_arg_map
--- a/src/llm_models/payload_content/message.py
+++ b/src/llm_models/payload_content/message.py
@@ -0,0 +1,104 @@
 from enum import Enum
 # 设计这系列类的目的是为未来可能的扩展做准备
 class RoleType(Enum):
    System = "system"
    User = "user"
    Assistant = "assistant"
    Tool = "tool"
 SUPPORTED_IMAGE_FORMATS = ["jpg", "jpeg", "png", "webp", "gif"]
 class Message:
    def __init__(
        self,
        role: RoleType,
        content: str | list[tuple[str, str] | str],
        tool_call_id: str | None = None,
    ):
        """
        初始化消息对象
        （不应直接修改Message类，而应使用MessageBuilder类来构建对象）
        """
        self.role: RoleType = role
        self.content: str | list[tuple[str, str] | str] = content
        self.tool_call_id: str | None = tool_call_id
 class MessageBuilder:
    def __init__(self):
        self.__role: RoleType = RoleType.User
        self.__content: list[tuple[str, str] | str] = []
        self.__tool_call_id: str | None = None
    def set_role(self, role: RoleType = RoleType.User) -> "MessageBuilder":
        """
        设置角色（默认为User）
        :param role: 角色
        :return: MessageBuilder对象
        """
        self.__role = role
        return self
    def add_text_content(self, text: str) -> "MessageBuilder":
        """
        添加文本内容
        :param text: 文本内容
        :return: MessageBuilder对象
        """
        self.__content.append(text)
        return self
    def add_image_content(
        self, image_format: str, image_base64: str
    ) -> "MessageBuilder":
        """
        添加图片内容
        :param image_format: 图片格式
        :param image_base64: 图片的base64编码
        :return: MessageBuilder对象
        """
        if image_format.lower() not in SUPPORTED_IMAGE_FORMATS:
            raise ValueError("不受支持的图片格式")
        if not image_base64:
            raise ValueError("图片的base64编码不能为空")
        self.__content.append((image_format, image_base64))
        return self
    def add_tool_call(self, tool_call_id: str) -> "MessageBuilder":
        """
        添加工具调用指令（调用时请确保已设置为Tool角色）
        :param tool_call_id: 工具调用指令的id
        :return: MessageBuilder对象
        """
        if self.__role != RoleType.Tool:
            raise ValueError("仅当角色为Tool时才能添加工具调用ID")
        if not tool_call_id:
            raise ValueError("工具调用ID不能为空")
        self.__tool_call_id = tool_call_id
        return self
    def build(self) -> Message:
        """
        构建消息对象
        :return: Message对象
        """
        if len(self.__content) == 0:
            raise ValueError("内容不能为空")
        if self.__role == RoleType.Tool and self.__tool_call_id is None:
            raise ValueError("Tool角色的工具调用ID不能为空")
        return Message(
            role=self.__role,
            content=(
                self.__content[0]
                if (len(self.__content) == 1 and isinstance(self.__content[0], str))
                else self.__content
            ),
            tool_call_id=self.__tool_call_id,
        )
--- a/src/llm_models/payload_content/resp_format.py
+++ b/src/llm_models/payload_content/resp_format.py
@@ -0,0 +1,223 @@
 from enum import Enum
 from typing import Optional, Any
 from pydantic import BaseModel
 from typing_extensions import TypedDict, Required
 class RespFormatType(Enum):
    TEXT = "text"  # 文本
    JSON_OBJ = "json_object"  # JSON
    JSON_SCHEMA = "json_schema"  # JSON Schema
 class JsonSchema(TypedDict, total=False):
    name: Required[str]
    """
    The name of the response format.
    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
    of 64.
    """
    description: Optional[str]
    """
    A description of what the response format is for, used by the model to determine
    how to respond in the format.
    """
    schema: dict[str, object]
    """
    The schema for the response format, described as a JSON Schema object. Learn how
    to build JSON schemas [here](https://json-schema.org/).
    """
    strict: Optional[bool]
    """
    Whether to enable strict schema adherence when generating the output. If set to
    true, the model will always follow the exact schema defined in the `schema`
    field. Only a subset of JSON Schema is supported when `strict` is `true`. To
    learn more, read the
    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
    """
 def _json_schema_type_check(instance) -> str | None:
    if "name" not in instance:
        return "schema必须包含'name'字段"
    elif not isinstance(instance["name"], str) or instance["name"].strip() == "":
        return "schema的'name'字段必须是非空字符串"
    if "description" in instance and (
        not isinstance(instance["description"], str)
        or instance["description"].strip() == ""
    ):
        return "schema的'description'字段只能填入非空字符串"
    if "schema" not in instance:
        return "schema必须包含'schema'字段"
    elif not isinstance(instance["schema"], dict):
        return "schema的'schema'字段必须是字典，详见https://json-schema.org/"
    if "strict" in instance and not isinstance(instance["strict"], bool):
        return "schema的'strict'字段只能填入布尔值"
    return None
 def _remove_title(schema: dict[str, Any] | list[Any]) -> dict[str, Any] | list[Any]:
    """
    递归移除JSON Schema中的title字段
    """
    if isinstance(schema, list):
        # 如果当前Schema是列表，则对所有dict/list子元素递归调用
        for idx, item in enumerate(schema):
            if isinstance(item, (dict, list)):
                schema[idx] = _remove_title(item)
    elif isinstance(schema, dict):
        # 是字典，移除title字段，并对所有dict/list子元素递归调用
        if "title" in schema:
            del schema["title"]
        for key, value in schema.items():
            if isinstance(value, (dict, list)):
                schema[key] = _remove_title(value)
    return schema
 def _link_definitions(schema: dict[str, Any]) -> dict[str, Any]:
    """
    链接JSON Schema中的definitions字段
    """
    def link_definitions_recursive(
        path: str, sub_schema: list[Any] | dict[str, Any], defs: dict[str, Any]
    ) -> dict[str, Any]:
        """
        递归链接JSON Schema中的definitions字段
        :param path: 当前路径
        :param sub_schema: 子Schema
        :param defs: Schema定义集
        :return:
        """
        if isinstance(sub_schema, list):
            # 如果当前Schema是列表，则遍历每个元素
            for i in range(len(sub_schema)):
                if isinstance(sub_schema[i], dict):
                    sub_schema[i] = link_definitions_recursive(
                        f"{path}/{str(i)}", sub_schema[i], defs
                    )
        else:
            # 否则为字典
            if "$defs" in sub_schema:
                # 如果当前Schema有$def字段，则将其添加到defs中
                key_prefix = f"{path}/$defs/"
                for key, value in sub_schema["$defs"].items():
                    def_key = key_prefix + key
                    if def_key not in defs:
                        defs[def_key] = value
                del sub_schema["$defs"]
            if "$ref" in sub_schema:
                # 如果当前Schema有$ref字段，则将其替换为defs中的定义
                def_key = sub_schema["$ref"]
                if def_key in defs:
                    sub_schema = defs[def_key]
                else:
                    raise ValueError(f"Schema中引用的定义'{def_key}'不存在")
            # 遍历键值对
            for key, value in sub_schema.items():
                if isinstance(value, (dict, list)):
                    # 如果当前值是字典或列表，则递归调用
                    sub_schema[key] = link_definitions_recursive(
                        f"{path}/{key}", value, defs
                    )
        return sub_schema
    return link_definitions_recursive("#", schema, {})
 def _remove_defs(schema: dict[str, Any]) -> dict[str, Any]:
    """
    递归移除JSON Schema中的$defs字段
    """
    if isinstance(schema, list):
        # 如果当前Schema是列表，则对所有dict/list子元素递归调用
        for idx, item in enumerate(schema):
            if isinstance(item, (dict, list)):
                schema[idx] = _remove_title(item)
    elif isinstance(schema, dict):
        # 是字典，移除title字段，并对所有dict/list子元素递归调用
        if "$defs" in schema:
            del schema["$defs"]
        for key, value in schema.items():
            if isinstance(value, (dict, list)):
                schema[key] = _remove_title(value)
    return schema
 class RespFormat:
    """
    响应格式
    """
    @staticmethod
    def _generate_schema_from_model(schema):
        json_schema = {
            "name": schema.__name__,
            "schema": _remove_defs(
                _link_definitions(_remove_title(schema.model_json_schema()))
            ),
            "strict": False,
        }
        if schema.__doc__:
            json_schema["description"] = schema.__doc__
        return json_schema
    def __init__(
        self,
        format_type: RespFormatType = RespFormatType.TEXT,
        schema: type | JsonSchema | None = None,
    ):
        """
        响应格式
        :param format_type: 响应格式类型（默认为文本）
        :param schema: 模板类或JsonSchema（仅当format_type为JSON Schema时有效）
        """
        self.format_type: RespFormatType = format_type
        if format_type == RespFormatType.JSON_SCHEMA:
            if schema is None:
                raise ValueError("当format_type为'JSON_SCHEMA'时，schema不能为空")
            if isinstance(schema, dict):
                if check_msg := _json_schema_type_check(schema):
                    raise ValueError(f"schema格式不正确，{check_msg}")
                self.schema = schema
            elif issubclass(schema, BaseModel):
                try:
                    json_schema = self._generate_schema_from_model(schema)
                    self.schema = json_schema
                except Exception as e:
                    raise ValueError(
                        f"自动生成JSON Schema时发生异常，请检查模型类{schema.__name__}的定义，详细信息：\n"
                        f"{schema.__name__}:\n"
                    ) from e
            else:
                raise ValueError("schema必须是BaseModel的子类或JsonSchema")
        else:
            self.schema = None
    def to_dict(self):
        """
        将响应格式转换为字典
        :return: 字典
        """
        if self.schema:
            return {
                "format_type": self.format_type.value,
                "schema": self.schema,
            }
        else:
            return {
                "format_type": self.format_type.value,
            }
--- a/src/llm_models/payload_content/tool_option.py
+++ b/src/llm_models/payload_content/tool_option.py
@@ -0,0 +1,155 @@
 from enum import Enum
 class ToolParamType(Enum):
    """
    工具调用参数类型
    """
    String = "string"  # 字符串
    Int = "integer"  # 整型
    Float = "float"  # 浮点型
    Boolean = "bool"  # 布尔型
 class ToolParam:
    """
    工具调用参数
    """
    def __init__(
        self, name: str, param_type: ToolParamType, description: str, required: bool
    ):
        """
        初始化工具调用参数
        （不应直接修改ToolParam类，而应使用ToolOptionBuilder类来构建对象）
        :param name: 参数名称
        :param param_type: 参数类型
        :param description: 参数描述
        :param required: 是否必填
        """
        self.name: str = name
        self.param_type: ToolParamType = param_type
        self.description: str = description
        self.required: bool = required
 class ToolOption:
    """
    工具调用项
    """
    def __init__(
        self,
        name: str,
        description: str,
        params: list[ToolParam] | None = None,
    ):
        """
        初始化工具调用项
        （不应直接修改ToolOption类，而应使用ToolOptionBuilder类来构建对象）
        :param name: 工具名称
        :param description: 工具描述
        :param params: 工具参数列表
        """
        self.name: str = name
        self.description: str = description
        self.params: list[ToolParam] | None = params
 class ToolOptionBuilder:
    """
    工具调用项构建器
    """
    def __init__(self):
        self.__name: str = ""
        self.__description: str = ""
        self.__params: list[ToolParam] = []
    def set_name(self, name: str) -> "ToolOptionBuilder":
        """
        设置工具名称
        :param name: 工具名称
        :return: ToolBuilder实例
        """
        if not name:
            raise ValueError("工具名称不能为空")
        self.__name = name
        return self
    def set_description(self, description: str) -> "ToolOptionBuilder":
        """
        设置工具描述
        :param description: 工具描述
        :return: ToolBuilder实例
        """
        if not description:
            raise ValueError("工具描述不能为空")
        self.__description = description
        return self
    def add_param(
        self,
        name: str,
        param_type: ToolParamType,
        description: str,
        required: bool = False,
    ) -> "ToolOptionBuilder":
        """
        添加工具参数
        :param name: 参数名称
        :param param_type: 参数类型
        :param description: 参数描述
        :param required: 是否必填（默认为False）
        :return: ToolBuilder实例
        """
        if not name or not description:
            raise ValueError("参数名称/描述不能为空")
        self.__params.append(
            ToolParam(
                name=name,
                param_type=param_type,
                description=description,
                required=required,
            )
        )
        return self
    def build(self):
        """
        构建工具调用项
        :return: 工具调用项
        """
        if self.__name == "" or self.__description == "":
            raise ValueError("工具名称/描述不能为空")
        return ToolOption(
            name=self.__name,
            description=self.__description,
            params=None if len(self.__params) == 0 else self.__params,
        )
 class ToolCall:
    """
    来自模型反馈的工具调用
    """
    def __init__(
        self,
        call_id: str,
        func_name: str,
        args: dict | None = None,
    ):
        """
        初始化工具调用
        :param call_id: 工具调用ID
        :param func_name: 要调用的函数名称
        :param args: 工具调用参数
        """
        self.call_id: str = call_id
        self.func_name: str = func_name
        self.args: dict | None = args
--- a/src/llm_models/usage_statistic.py
+++ b/src/llm_models/usage_statistic.py
@@ -0,0 +1,172 @@
 from datetime import datetime
 from enum import Enum
 from typing import Tuple
 from src.common.logger import get_logger
 from src.config.api_ada_configs import ModelInfo
 from src.common.database.database_model import LLMUsage
 logger = get_logger("模型使用统计")
 class ReqType(Enum):
    """
    请求类型
    """
    CHAT = "chat"  # 对话请求
    EMBEDDING = "embedding"  # 嵌入请求
 class UsageCallStatus(Enum):
    """
    任务调用状态
    """
    PROCESSING = "processing"  # 处理中
    SUCCESS = "success"  # 成功
    FAILURE = "failure"  # 失败
    CANCELED = "canceled"  # 取消
 class ModelUsageStatistic:
    """
    模型使用统计类 - 使用SQLite+Peewee
    """
    def __init__(self):
        """
        初始化统计类
        由于使用Peewee ORM，不需要传入数据库实例
        """
        # 确保表已经创建
        try:
            from src.common.database.database import db
            db.create_tables([LLMUsage], safe=True)
        except Exception as e:
            logger.error(f"创建LLMUsage表失败: {e}")
    @staticmethod
    def _calculate_cost(
        prompt_tokens: int, completion_tokens: int, model_info: ModelInfo
    ) -> float:
        """计算API调用成本
        使用模型的pri_in和pri_out价格计算输入和输出的成本
        Args:
            prompt_tokens: 输入token数量
            completion_tokens: 输出token数量
            model_info: 模型信息
        Returns:
            float: 总成本（元）
        """
        # 使用模型的pri_in和pri_out计算成本
        input_cost = (prompt_tokens / 1000000) * model_info.price_in
        output_cost = (completion_tokens / 1000000) * model_info.price_out
        return round(input_cost + output_cost, 6)
    def create_usage(
        self,
        model_name: str,
        task_name: str = "N/A",
        request_type: ReqType = ReqType.CHAT,
        user_id: str = "system",
        endpoint: str = "/chat/completions",
    ) -> int | None:
        """
        创建模型使用情况记录
        Args:
            model_name: 模型名
            task_name: 任务名称
            request_type: 请求类型，默认为Chat
            user_id: 用户ID，默认为system
            endpoint: API端点
        Returns:
            int | None: 返回记录ID，失败返回None
        """
        try:
            usage_record = LLMUsage.create(
                model_name=model_name,
                user_id=user_id,
                request_type=request_type.value,
                endpoint=endpoint,
                prompt_tokens=0,
                completion_tokens=0,
                total_tokens=0,
                cost=0.0,
                status=UsageCallStatus.PROCESSING.value,
                timestamp=datetime.now(),
            )
            logger.trace(
                f"创建了一条模型使用情况记录 - 模型: {model_name}, "
                f"子任务: {task_name}, 类型: {request_type.value}, "
                f"用户: {user_id}, 记录ID: {usage_record.id}"
            )
            return usage_record.id
        except Exception as e:
            logger.error(f"创建模型使用情况记录失败: {str(e)}")
            return None
    def update_usage(
        self,
        record_id: int | None,
        model_info: ModelInfo,
        usage_data: Tuple[int, int, int] | None = None,
        stat: UsageCallStatus = UsageCallStatus.SUCCESS,
        ext_msg: str | None = None,
    ):
        """
        更新模型使用情况
        Args:
            record_id: 记录ID
            model_info: 模型信息
            usage_data: 使用情况数据(输入token数量, 输出token数量, 总token数量)
            stat: 任务调用状态
            ext_msg: 额外信息
        """
        if not record_id:
            logger.error("更新模型使用情况失败: record_id不能为空")
            return
        if usage_data and len(usage_data) != 3:
            logger.error("更新模型使用情况失败: usage_data的长度不正确，应该为3个元素")
            return
        # 提取使用情况数据
        prompt_tokens = usage_data[0] if usage_data else 0
        completion_tokens = usage_data[1] if usage_data else 0
        total_tokens = usage_data[2] if usage_data else 0
        try:
            # 使用Peewee更新记录
            update_query = LLMUsage.update(
                status=stat.value,
                prompt_tokens=prompt_tokens,
                completion_tokens=completion_tokens,
                total_tokens=total_tokens,
                cost=self._calculate_cost(
                    prompt_tokens, completion_tokens, model_info
                ) if usage_data else 0.0,
            ).where(LLMUsage.id == record_id)
            updated_count = update_query.execute()
            if updated_count == 0:
                logger.warning(f"记录ID {record_id} 不存在，无法更新")
                return
            logger.debug(
                f"Token使用情况 - 模型: {model_info.name}, "
                f"记录ID: {record_id}, "
                f"任务状态: {stat.value}, 额外信息: {ext_msg or 'N/A'}, "
                f"提示词: {prompt_tokens}, 完成: {completion_tokens}, "
                f"总计: {total_tokens}"
            )
        except Exception as e:
            logger.error(f"记录token使用情况失败: {str(e)}")
--- a/src/llm_models/utils.py
+++ b/src/llm_models/utils.py
@@ -0,0 +1,152 @@
 import base64
 import io
 from PIL import Image
 from src.common.logger import get_logger
 from .payload_content.message import Message, MessageBuilder
 logger = get_logger("消息压缩工具")
 def compress_messages(
    messages: list[Message], img_target_size: int = 1 * 1024 * 1024
 ) -> list[Message]:
    """
    压缩消息列表中的图片
    :param messages: 消息列表
    :param img_target_size: 图片目标大小，默认1MB
    :return: 压缩后的消息列表
    """
    def reformat_static_image(image_data: bytes) -> bytes:
        """
        将静态图片转换为JPEG格式
        :param image_data: 图片数据
        :return: 转换后的图片数据
        """
        try:
            image = Image.open(image_data)
            if image.format and (
                image.format.upper() in ["JPEG", "JPG", "PNG", "WEBP"]
            ):
                # 静态图像，转换为JPEG格式
                reformated_image_data = io.BytesIO()
                image.save(
                    reformated_image_data, format="JPEG", quality=95, optimize=True
                )
                image_data = reformated_image_data.getvalue()
            return image_data
        except Exception as e:
            logger.error(f"图片转换格式失败: {str(e)}")
            return image_data
    def rescale_image(
        image_data: bytes, scale: float
    ) -> tuple[bytes, tuple[int, int] | None, tuple[int, int] | None]:
        """
        缩放图片
        :param image_data: 图片数据
        :param scale: 缩放比例
        :return: 缩放后的图片数据
        """
        try:
            image = Image.open(image_data)
            # 原始尺寸
            original_size = (image.width, image.height)
            # 计算新的尺寸
            new_size = (int(original_size[0] * scale), int(original_size[1] * scale))
            output_buffer = io.BytesIO()
            if getattr(image, "is_animated", False):
                # 动态图片，处理所有帧
                frames = []
                new_size = (new_size[0] // 2, new_size[1] // 2)  # 动图，缩放尺寸再打折
                for frame_idx in range(getattr(image, "n_frames", 1)):
                    image.seek(frame_idx)
                    new_frame = image.copy()
                    new_frame = new_frame.resize(new_size, Image.Resampling.LANCZOS)
                    frames.append(new_frame)
                # 保存到缓冲区
                frames[0].save(
                    output_buffer,
                    format="GIF",
                    save_all=True,
                    append_images=frames[1:],
                    optimize=True,
                    duration=image.info.get("duration", 100),
                    loop=image.info.get("loop", 0),
                )
            else:
                # 静态图片，直接缩放保存
                resized_image = image.resize(new_size, Image.Resampling.LANCZOS)
                resized_image.save(
                    output_buffer, format="JPEG", quality=95, optimize=True
                )
            return output_buffer.getvalue(), original_size, new_size
        except Exception as e:
            logger.error(f"图片缩放失败: {str(e)}")
            import traceback
            logger.error(traceback.format_exc())
            return image_data, None, None
    def compress_base64_image(
        base64_data: str, target_size: int = 1 * 1024 * 1024
    ) -> str:
        original_b64_data_size = len(base64_data)  # 计算原始数据大小
        image_data = base64.b64decode(base64_data)
        # 先尝试转换格式为JPEG
        image_data = reformat_static_image(image_data)
        base64_data = base64.b64encode(image_data).decode("utf-8")
        if len(base64_data) <= target_size:
            # 如果转换后小于目标大小，直接返回
            logger.info(
                f"成功将图片转为JPEG格式，编码后大小: {len(base64_data) / 1024:.1f}KB"
            )
            return base64_data
        # 如果转换后仍然大于目标大小，进行尺寸压缩
        scale = min(1.0, target_size / len(base64_data))
        image_data, original_size, new_size = rescale_image(image_data, scale)
        base64_data = base64.b64encode(image_data).decode("utf-8")
        if original_size and new_size:
            logger.info(
                f"压缩图片: {original_size[0]}x{original_size[1]} -> {new_size[0]}x{new_size[1]}\n"
                f"压缩前大小: {original_b64_data_size / 1024:.1f}KB, 压缩后大小: {len(base64_data) / 1024:.1f}KB"
            )
        return base64_data
    compressed_messages = []
    for message in messages:
        if isinstance(message.content, list):
            # 检查content，如有图片则压缩
            message_builder = MessageBuilder()
            for content_item in message.content:
                if isinstance(content_item, tuple):
                    # 图片，进行压缩
                    message_builder.add_image_content(
                        content_item[0],
                        compress_base64_image(
                            content_item[1], target_size=img_target_size
                        ),
                    )
                else:
                    message_builder.add_text_content(content_item)
            compressed_messages.append(message_builder.build())
        else:
            compressed_messages.append(message)
    return compressed_messages
--- a/src/llm_models/utils_model.py
+++ b/src/llm_models/utils_model.py
--- a/template/bot_config_template.toml
+++ b/template/bot_config_template.toml
@@ -1,5 +1,5 @@
 [inner]
-version = "4.5.0"
+version = "5.0.0"
 #----以下是给开发人员阅读的，如果你只是部署了麦麦，不需要阅读----
 #如果你想要修改配置文件，请在修改后将version的值进行变更
@@ -227,120 +227,84 @@ show_prompt = false # 是否显示prompt
 [model]
-model_max_output_length = 1024 # 模型单次返回的最大token数
+model_max_output_length = 800 # 模型单次返回的最大token数
-#------------必填：组件模型------------
+#------------模型任务配置------------
 # 所有模型名称需要对应 model_config.toml 中配置的模型名称
 [model.utils] # 在麦麦的一些组件中使用的模型，例如表情包模块，取名模块，关系模块，是麦麦必须的模型
-name = "Pro/deepseek-ai/DeepSeek-V3"
+model_name = "siliconflow-deepseek-v3"  # 对应 model_config.toml 中的模型名称
-provider = "SILICONFLOW"
+temperature = 0.2  # 模型温度，新V3建议0.1-0.3
-pri_in = 2 #模型的输入价格（非必填，可以记录消耗）
+max_tokens = 800   # 最大输出token数
 pri_out = 8 #模型的输出价格（非必填，可以记录消耗）
 #默认temp 0.2 如果你使用的是老V3或者其他模型，请自己修改temp参数
 temp = 0.2 #模型的温度，新V3建议0.1-0.3
 [model.utils_small] # 在麦麦的一些组件中使用的小模型，消耗量较大，建议使用速度较快的小模型
-# 强烈建议使用免费的小模型
+model_name = "qwen3-8b"  # 对应 model_config.toml 中的模型名称
-name = "Qwen/Qwen3-8B"
+temperature = 0.7
-provider = "SILICONFLOW"
+max_tokens = 800
 pri_in = 0
 pri_out = 0
 temp = 0.7
 enable_thinking = false # 是否启用思考
 [model.replyer_1] # 首要回复模型，还用于表达器和表达方式学习
-name = "Pro/deepseek-ai/DeepSeek-V3"
+model_name = "siliconflow-deepseek-v3"  # 对应 model_config.toml 中的模型名称
-provider = "SILICONFLOW"
+temperature = 0.2  # 模型温度，新V3建议0.1-0.3
-pri_in = 2 #模型的输入价格（非必填，可以记录消耗）
+max_tokens = 800
 pri_out = 8 #模型的输出价格（非必填，可以记录消耗）
 #默认temp 0.2 如果你使用的是老V3或者其他模型，请自己修改temp参数
 temp = 0.2 #模型的温度，新V3建议0.1-0.3
 [model.replyer_2] # 次要回复模型
-name = "Pro/deepseek-ai/DeepSeek-V3"
+model_name = "siliconflow-deepseek-r1"  # 对应 model_config.toml 中的模型名称
-provider = "SILICONFLOW"
+temperature = 0.7  # 模型温度
-pri_in = 2 #模型的输入价格（非必填，可以记录消耗）
+max_tokens = 800
 pri_out = 8 #模型的输出价格（非必填，可以记录消耗）
 #默认temp 0.2 如果你使用的是老V3或者其他模型，请自己修改temp参数
 temp = 0.2 #模型的温度，新V3建议0.1-0.3
 [model.planner] #决策：负责决定麦麦该做什么的模型
-name = "Pro/deepseek-ai/DeepSeek-V3"
+model_name = "siliconflow-deepseek-v3"  # 对应 model_config.toml 中的模型名称
-provider = "SILICONFLOW"
+temperature = 0.3
-pri_in = 2
+max_tokens = 800
 pri_out = 8
 temp = 0.3
 [model.emotion] #负责麦麦的情绪变化
-name = "Pro/deepseek-ai/DeepSeek-V3"
+model_name = "siliconflow-deepseek-v3"  # 对应 model_config.toml 中的模型名称
-provider = "SILICONFLOW"
+temperature = 0.3
-pri_in = 2
+max_tokens = 800
 pri_out = 8
 temp = 0.3
 [model.memory] # 记忆模型
-name = "Qwen/Qwen3-30B-A3B"
+model_name = "qwen3-30b"  # 对应 model_config.toml 中的模型名称
-provider = "SILICONFLOW"
+temperature = 0.7
-pri_in = 0.7
+max_tokens = 800
 pri_out = 2.8
 temp = 0.7
 enable_thinking = false # 是否启用思考
 [model.vlm] # 图像识别模型
-name = "Pro/Qwen/Qwen2.5-VL-7B-Instruct"
+model_name = "qwen2.5-vl-72b"  # 对应 model_config.toml 中的模型名称
-provider = "SILICONFLOW"
+max_tokens = 800
 pri_in = 0.35
 pri_out = 0.35
 [model.voice] # 语音识别模型
-name = "FunAudioLLM/SenseVoiceSmall"
+model_name = "sensevoice-small"  # 对应 model_config.toml 中的模型名称
 provider = "SILICONFLOW"
 pri_in = 0
 pri_out = 0
 [model.tool_use] #工具调用模型，需要使用支持工具调用的模型
-name = "Qwen/Qwen3-14B"
+model_name = "qwen3-14b"  # 对应 model_config.toml 中的模型名称
-provider = "SILICONFLOW"
+temperature = 0.7
-pri_in = 0.5
+max_tokens = 800
 pri_out = 2
 temp = 0.7
 enable_thinking = false # 是否启用思考（qwen3 only）
 #嵌入模型
 [model.embedding]
-name = "BAAI/bge-m3"
+model_name = "bge-m3"  # 对应 model_config.toml 中的模型名称
 provider = "SILICONFLOW"
 pri_in = 0
 pri_out = 0
 #------------LPMM知识库模型------------
 [model.lpmm_entity_extract] # 实体提取模型
-name = "Pro/deepseek-ai/DeepSeek-V3"
+model_name = "siliconflow-deepseek-v3"  # 对应 model_config.toml 中的模型名称
-provider = "SILICONFLOW"
+temperature = 0.2
-pri_in = 2
+max_tokens = 800
 pri_out = 8
 temp = 0.2
 [model.lpmm_rdf_build] # RDF构建模型
-name = "Pro/deepseek-ai/DeepSeek-V3"
+model_name = "siliconflow-deepseek-v3"  # 对应 model_config.toml 中的模型名称
-provider = "SILICONFLOW"
+temperature = 0.2
-pri_in = 2
+max_tokens = 800
 pri_out = 8
 temp = 0.2
 [model.lpmm_qa] # 问答模型
-name = "Qwen/Qwen3-30B-A3B"
+model_name = "deepseek-r1-distill-qwen-32b"  # 对应 model_config.toml 中的模型名称
-provider = "SILICONFLOW"
+temperature = 0.7
-pri_in = 0.7
+max_tokens = 800
 pri_out = 2.8
 temp = 0.7
 enable_thinking = false # 是否启用思考
 [maim_message]
 auth_token = [] # 认证令牌，用于API验证，为空则不启用验证
 # 以下项目若要使用需要打开use_custom，并单独配置maim_message的服务器
--- a/template/compare/model_config_template.toml
+++ b/template/compare/model_config_template.toml
@@ -0,0 +1,220 @@
 [inner]
 version = "0.2.1"
 # 配置文件版本号迭代规则同bot_config.toml
 # 
 # === 多API Key支持 ===
 # 本配置文件支持为每个API服务商配置多个API Key，实现以下功能：
 # 1. 错误自动切换：当某个API Key失败时，自动切换到下一个可用的Key
 # 2. 负载均衡：在多个可用的API Key之间循环使用，避免单个Key的频率限制
 # 3. 向后兼容：仍然支持单个key字段的配置方式
 # 
 # 配置方式：
 # - 多Key配置：使用 api_keys = ["key1", "key2", "key3"] 数组格式
 # - 单Key配置：使用 key = "your-key" 字符串格式（向后兼容）
 # 
 # 错误处理机制：
 # - 401/403认证错误：立即切换到下一个API Key
 # - 429频率限制：等待后重试，如果持续失败则切换Key
 # - 网络错误：短暂等待后重试，失败则切换Key
 # - 其他错误：按照正常重试机制处理
 # 
 # === 任务类型和模型能力配置 ===
 # 为了提高任务分配的准确性和可维护性，现在支持明确配置模型的任务类型和能力：
 # 
 # task_type（推荐配置）:
 # - 明确指定模型主要用于什么任务
 # - 可选值：llm_normal, llm_reasoning, vision, embedding, speech
 # - 如果不配置，系统会根据capabilities或模型名称自动推断（不推荐）
 # 
 # capabilities（推荐配置）:
 # - 描述模型支持的所有能力
 # - 可选值：text, vision, embedding, speech, tool_calling, reasoning
 # - 支持多个能力的组合，如：["text", "vision"]
 # 
 # 配置优先级：
 # 1. task_type（最高优先级，直接指定任务类型）
 # 2. capabilities（中等优先级，根据能力推断任务类型）
 # 3. 模型名称关键字（最低优先级，不推荐依赖）
 # 
 # 向后兼容：
 # - 仍然支持 model_flags 字段，但建议迁移到 capabilities
 # - 未配置新字段时会自动回退到基于模型名称的推断
 [request_conf] # 请求配置（此配置项数值均为默认值，如想修改，请取消对应条目的注释）
 #max_retry = 2 # 最大重试次数（单个模型API调用失败，最多重试的次数）
 #timeout = 10 # API调用的超时时长（超过这个时长，本次请求将被视为“请求超时”，单位：秒）
 #retry_interval = 10 # 重试间隔（如果API调用失败，重试的间隔时间，单位：秒）
 #default_temperature = 0.7 # 默认的温度（如果bot_config.toml中没有设置temperature参数，默认使用这个值）
 #default_max_tokens = 1024 # 默认的最大输出token数（如果bot_config.toml中没有设置max_tokens参数，默认使用这个值）
 [[api_providers]] # API服务提供商（可以配置多个）
 name = "DeepSeek"                       # API服务商名称（可随意命名，在models的api-provider中需使用这个命名）
 base_url = "https://api.deepseek.cn/v1"    # API服务商的BaseURL
 # 支持多个API Key，实现自动切换和负载均衡
 api_keys = [                            # API Key列表（多个key支持错误自动切换和负载均衡）
    "sk-your-first-key-here",
    "sk-your-second-key-here",
    "sk-your-third-key-here"
 ]
 # 向后兼容：如果只有一个key，也可以使用单个key字段
 #key = "******"                         # API Key （可选，默认为None）
 client_type = "openai"                  # 请求客户端（可选，默认值为"openai"，使用gimini等Google系模型时请配置为"gemini"）
 [[api_providers]] # 特殊：Google的Gimini使用特殊API，与OpenAI格式不兼容，需要配置client为"gemini"
 name = "Google"
 base_url = "https://api.google.com/v1"
 # Google API同样支持多key配置
 api_keys = [
    "your-google-api-key-1",
    "your-google-api-key-2"
 ]
 client_type = "gemini"
 [[api_providers]]
 name = "SiliconFlow"
 base_url = "https://api.siliconflow.cn/v1"
 # 单个key的示例（向后兼容）
 key = "******"
 #
 #[[api_providers]]
 #name = "LocalHost"
 #base_url = "https://localhost:8888"
 #key = "lm-studio"
 [[models]] # 模型（可以配置多个）
 # 模型标识符（API服务商提供的模型标识符）
 model_identifier = "deepseek-chat"
 # 模型名称（可随意命名，在bot_config.toml中需使用这个命名）
 #（可选，若无该字段，则将自动使用model_identifier填充）
 name = "deepseek-v3"
 # API服务商名称（对应在api_providers中配置的服务商名称）
 api_provider = "DeepSeek"
 # 任务类型（推荐配置，明确指定模型主要用于什么任务）
 # 可选值：llm_normal, llm_reasoning, vision, embedding, speech
 # 如果不配置，系统会根据capabilities或模型名称自动推断
 task_type = "llm_normal"
 # 模型能力列表（推荐配置，描述模型支持的能力）
 # 可选值：text, vision, embedding, speech, tool_calling, reasoning
 capabilities = ["text", "tool_calling"]
 # 输入价格（用于API调用统计，单位：元/兆token）（可选，若无该字段，默认值为0）
 price_in = 2.0
 # 输出价格（用于API调用统计，单位：元/兆token）（可选，若无该字段，默认值为0）
 price_out = 8.0
 # 强制流式输出模式（若模型不支持非流式输出，请取消该注释，启用强制流式输出）
 #（可选，若无该字段，默认值为false）
 #force_stream_mode = true
 [[models]]
 model_identifier = "deepseek-reasoner"
 name = "deepseek-r1"
 api_provider = "DeepSeek"
 # 推理模型的配置示例
 task_type = "llm_reasoning"
 capabilities = ["text", "tool_calling", "reasoning"]
 # 保留向后兼容的model_flags字段（已废弃，建议使用capabilities）
 model_flags = [ "text", "tool_calling", "reasoning",]
 price_in = 4.0
 price_out = 16.0
 [[models]]
 model_identifier = "Pro/deepseek-ai/DeepSeek-V3"
 name = "siliconflow-deepseek-v3"
 api_provider = "SiliconFlow"
 task_type = "llm_normal"
 capabilities = ["text", "tool_calling"]
 price_in = 2.0
 price_out = 8.0
 [[models]]
 model_identifier = "Pro/deepseek-ai/DeepSeek-R1"
 name = "siliconflow-deepseek-r1"
 api_provider = "SiliconFlow"
 task_type = "llm_reasoning"
 capabilities = ["text", "tool_calling", "reasoning"]
 price_in = 4.0
 price_out = 16.0
 [[models]]
 model_identifier = "Pro/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
 name = "deepseek-r1-distill-qwen-32b"
 api_provider = "SiliconFlow"
 task_type = "llm_reasoning"
 capabilities = ["text", "tool_calling", "reasoning"]
 price_in = 4.0
 price_out = 16.0
 [[models]]
 model_identifier = "Qwen/Qwen3-8B"
 name = "qwen3-8b"
 api_provider = "SiliconFlow"
 task_type = "llm_normal"
 capabilities = ["text"]
 price_in = 0
 price_out = 0
 [[models]]
 model_identifier = "Qwen/Qwen3-14B"
 name = "qwen3-14b"
 api_provider = "SiliconFlow"
 task_type = "llm_normal"
 capabilities = ["text", "tool_calling"]
 price_in = 0.5
 price_out = 2.0
 [[models]]
 model_identifier = "Qwen/Qwen3-30B-A3B"
 name = "qwen3-30b"
 api_provider = "SiliconFlow"
 task_type = "llm_normal"
 capabilities = ["text", "tool_calling"]
 price_in = 0.7
 price_out = 2.8
 [[models]]
 model_identifier = "Qwen/Qwen2.5-VL-72B-Instruct"
 name = "qwen2.5-vl-72b"
 api_provider = "SiliconFlow"
 # 视觉模型的配置示例
 task_type = "vision"
 capabilities = ["vision", "text"]
 # 保留向后兼容的model_flags字段（已废弃，建议使用capabilities）
 model_flags = [ "vision", "text",]
 price_in = 4.13
 price_out = 4.13
 [[models]]
 model_identifier = "FunAudioLLM/SenseVoiceSmall"
 name = "sensevoice-small"
 api_provider = "SiliconFlow"
 # 语音模型的配置示例
 task_type = "speech"
 capabilities = ["speech"]
 # 保留向后兼容的model_flags字段（已废弃，建议使用capabilities）
 model_flags = [ "audio",]
 price_in = 0
 price_out = 0
 [[models]]
 model_identifier = "BAAI/bge-m3"
 name = "bge-m3"
 api_provider = "SiliconFlow"
 # 嵌入模型的配置示例
 task_type = "embedding"
 capabilities = ["text", "embedding"]
 # 保留向后兼容的model_flags字段（已废弃，建议使用capabilities）
 model_flags = [ "text", "embedding",]
 price_in = 0
 price_out = 0
 [task_model_usage]
 llm_reasoning = {model="deepseek-r1", temperature=0.8, max_tokens=1024, max_retry=0}
 llm_normal = {model="deepseek-r1", max_tokens=1024, max_retry=0}
 embedding = "siliconflow-bge-m3"
 #schedule = [
 #    "deepseek-v3",
 #    "deepseek-r1",
 #]
--- a/template/model_config_template.toml
+++ b/template/model_config_template.toml
@@ -0,0 +1,220 @@
 [inner]
 version = "0.2.1"
 # 配置文件版本号迭代规则同bot_config.toml
 # 
 # === 多API Key支持 ===
 # 本配置文件支持为每个API服务商配置多个API Key，实现以下功能：
 # 1. 错误自动切换：当某个API Key失败时，自动切换到下一个可用的Key
 # 2. 负载均衡：在多个可用的API Key之间循环使用，避免单个Key的频率限制
 # 3. 向后兼容：仍然支持单个key字段的配置方式
 # 
 # 配置方式：
 # - 多Key配置：使用 api_keys = ["key1", "key2", "key3"] 数组格式
 # - 单Key配置：使用 key = "your-key" 字符串格式（向后兼容）
 # 
 # 错误处理机制：
 # - 401/403认证错误：立即切换到下一个API Key
 # - 429频率限制：等待后重试，如果持续失败则切换Key
 # - 网络错误：短暂等待后重试，失败则切换Key
 # - 其他错误：按照正常重试机制处理
 # 
 # === 任务类型和模型能力配置 ===
 # 为了提高任务分配的准确性和可维护性，现在支持明确配置模型的任务类型和能力：
 # 
 # task_type（推荐配置）:
 # - 明确指定模型主要用于什么任务
 # - 可选值：llm_normal, llm_reasoning, vision, embedding, speech
 # - 如果不配置，系统会根据capabilities或模型名称自动推断（不推荐）
 # 
 # capabilities（推荐配置）:
 # - 描述模型支持的所有能力
 # - 可选值：text, vision, embedding, speech, tool_calling, reasoning
 # - 支持多个能力的组合，如：["text", "vision"]
 # 
 # 配置优先级：
 # 1. task_type（最高优先级，直接指定任务类型）
 # 2. capabilities（中等优先级，根据能力推断任务类型）
 # 3. 模型名称关键字（最低优先级，不推荐依赖）
 # 
 # 向后兼容：
 # - 仍然支持 model_flags 字段，但建议迁移到 capabilities
 # - 未配置新字段时会自动回退到基于模型名称的推断
 [request_conf] # 请求配置（此配置项数值均为默认值，如想修改，请取消对应条目的注释）
 #max_retry = 2 # 最大重试次数（单个模型API调用失败，最多重试的次数）
 #timeout = 10 # API调用的超时时长（超过这个时长，本次请求将被视为“请求超时”，单位：秒）
 #retry_interval = 10 # 重试间隔（如果API调用失败，重试的间隔时间，单位：秒）
 #default_temperature = 0.7 # 默认的温度（如果bot_config.toml中没有设置temperature参数，默认使用这个值）
 #default_max_tokens = 1024 # 默认的最大输出token数（如果bot_config.toml中没有设置max_tokens参数，默认使用这个值）
 [[api_providers]] # API服务提供商（可以配置多个）
 name = "DeepSeek"                       # API服务商名称（可随意命名，在models的api-provider中需使用这个命名）
 base_url = "https://api.deepseek.cn/v1"    # API服务商的BaseURL
 # 支持多个API Key，实现自动切换和负载均衡
 api_keys = [                            # API Key列表（多个key支持错误自动切换和负载均衡）
    "sk-your-first-key-here",
    "sk-your-second-key-here",
    "sk-your-third-key-here"
 ]
 # 向后兼容：如果只有一个key，也可以使用单个key字段
 #key = "******"                         # API Key （可选，默认为None）
 client_type = "openai"                  # 请求客户端（可选，默认值为"openai"，使用gimini等Google系模型时请配置为"gemini"）
 [[api_providers]] # 特殊：Google的Gimini使用特殊API，与OpenAI格式不兼容，需要配置client为"gemini"
 name = "Google"
 base_url = "https://api.google.com/v1"
 # Google API同样支持多key配置
 api_keys = [
    "your-google-api-key-1",
    "your-google-api-key-2"
 ]
 client_type = "gemini"
 [[api_providers]]
 name = "SiliconFlow"
 base_url = "https://api.siliconflow.cn/v1"
 # 单个key的示例（向后兼容）
 key = "******"
 #
 #[[api_providers]]
 #name = "LocalHost"
 #base_url = "https://localhost:8888"
 #key = "lm-studio"
 [[models]] # 模型（可以配置多个）
 # 模型标识符（API服务商提供的模型标识符）
 model_identifier = "deepseek-chat"
 # 模型名称（可随意命名，在bot_config.toml中需使用这个命名）
 #（可选，若无该字段，则将自动使用model_identifier填充）
 name = "deepseek-v3"
 # API服务商名称（对应在api_providers中配置的服务商名称）
 api_provider = "DeepSeek"
 # 任务类型（推荐配置，明确指定模型主要用于什么任务）
 # 可选值：llm_normal, llm_reasoning, vision, embedding, speech
 # 如果不配置，系统会根据capabilities或模型名称自动推断
 task_type = "llm_normal"
 # 模型能力列表（推荐配置，描述模型支持的能力）
 # 可选值：text, vision, embedding, speech, tool_calling, reasoning
 capabilities = ["text", "tool_calling"]
 # 输入价格（用于API调用统计，单位：元/兆token）（可选，若无该字段，默认值为0）
 price_in = 2.0
 # 输出价格（用于API调用统计，单位：元/兆token）（可选，若无该字段，默认值为0）
 price_out = 8.0
 # 强制流式输出模式（若模型不支持非流式输出，请取消该注释，启用强制流式输出）
 #（可选，若无该字段，默认值为false）
 #force_stream_mode = true
 [[models]]
 model_identifier = "deepseek-reasoner"
 name = "deepseek-r1"
 api_provider = "DeepSeek"
 # 推理模型的配置示例
 task_type = "llm_reasoning"
 capabilities = ["text", "tool_calling", "reasoning"]
 # 保留向后兼容的model_flags字段（已废弃，建议使用capabilities）
 model_flags = [ "text", "tool_calling", "reasoning",]
 price_in = 4.0
 price_out = 16.0
 [[models]]
 model_identifier = "Pro/deepseek-ai/DeepSeek-V3"
 name = "siliconflow-deepseek-v3"
 api_provider = "SiliconFlow"
 task_type = "llm_normal"
 capabilities = ["text", "tool_calling"]
 price_in = 2.0
 price_out = 8.0
 [[models]]
 model_identifier = "Pro/deepseek-ai/DeepSeek-R1"
 name = "siliconflow-deepseek-r1"
 api_provider = "SiliconFlow"
 task_type = "llm_reasoning"
 capabilities = ["text", "tool_calling", "reasoning"]
 price_in = 4.0
 price_out = 16.0
 [[models]]
 model_identifier = "Pro/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
 name = "deepseek-r1-distill-qwen-32b"
 api_provider = "SiliconFlow"
 task_type = "llm_reasoning"
 capabilities = ["text", "tool_calling", "reasoning"]
 price_in = 4.0
 price_out = 16.0
 [[models]]
 model_identifier = "Qwen/Qwen3-8B"
 name = "qwen3-8b"
 api_provider = "SiliconFlow"
 task_type = "llm_normal"
 capabilities = ["text"]
 price_in = 0
 price_out = 0
 [[models]]
 model_identifier = "Qwen/Qwen3-14B"
 name = "qwen3-14b"
 api_provider = "SiliconFlow"
 task_type = "llm_normal"
 capabilities = ["text", "tool_calling"]
 price_in = 0.5
 price_out = 2.0
 [[models]]
 model_identifier = "Qwen/Qwen3-30B-A3B"
 name = "qwen3-30b"
 api_provider = "SiliconFlow"
 task_type = "llm_normal"
 capabilities = ["text", "tool_calling"]
 price_in = 0.7
 price_out = 2.8
 [[models]]
 model_identifier = "Qwen/Qwen2.5-VL-72B-Instruct"
 name = "qwen2.5-vl-72b"
 api_provider = "SiliconFlow"
 # 视觉模型的配置示例
 task_type = "vision"
 capabilities = ["vision", "text"]
 # 保留向后兼容的model_flags字段（已废弃，建议使用capabilities）
 model_flags = [ "vision", "text",]
 price_in = 4.13
 price_out = 4.13
 [[models]]
 model_identifier = "FunAudioLLM/SenseVoiceSmall"
 name = "sensevoice-small"
 api_provider = "SiliconFlow"
 # 语音模型的配置示例
 task_type = "speech"
 capabilities = ["speech"]
 # 保留向后兼容的model_flags字段（已废弃，建议使用capabilities）
 model_flags = [ "audio",]
 price_in = 0
 price_out = 0
 [[models]]
 model_identifier = "BAAI/bge-m3"
 name = "bge-m3"
 api_provider = "SiliconFlow"
 # 嵌入模型的配置示例
 task_type = "embedding"
 capabilities = ["text", "embedding"]
 # 保留向后兼容的model_flags字段（已废弃，建议使用capabilities）
 model_flags = [ "text", "embedding",]
 price_in = 0
 price_out = 0
 [task_model_usage]
 llm_reasoning = {model="deepseek-r1", temperature=0.8, max_tokens=1024, max_retry=0}
 llm_normal = {model="deepseek-r1", max_tokens=1024, max_retry=0}
 embedding = "siliconflow-bge-m3"
 #schedule = [
 #    "deepseek-v3",
 #    "deepseek-r1",
 #]
--- a/template/template.env
+++ b/template/template.env
@@ -1,23 +1,2 @@
 HOST=127.0.0.1
 PORT=8000
 # 密钥和url
 # 硅基流动
 SILICONFLOW_BASE_URL=https://api.siliconflow.cn/v1/
 # DeepSeek官方
 DEEP_SEEK_BASE_URL=https://api.deepseek.com/v1
 # 阿里百炼
 BAILIAN_BASE_URL = https://dashscope.aliyuncs.com/compatible-mode/v1
 # 火山引擎
 HUOSHAN_BASE_URL = 
 # xxxxx平台
 xxxxxxx_BASE_URL=https://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 # 定义你要用的api的key(需要去对应网站申请哦)
 DEEP_SEEK_KEY=
 CHAT_ANY_WHERE_KEY=
 SILICONFLOW_KEY=
 BAILIAN_KEY = 
 HUOSHAN_KEY = 
 xxxxxxx_KEY=