diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py index 7577f5f26..f90d38c88 100644 --- a/src/llm_models/utils_model.py +++ b/src/llm_models/utils_model.py @@ -216,24 +216,59 @@ class LLMRequest: def _determine_task_name(self, model: dict) -> str: """ 根据模型配置确定任务名称 + 优先使用配置文件中明确定义的任务类型,避免基于模型名称的脆弱推断 + Args: model: 模型配置字典 Returns: 任务名称 """ - # 兼容新旧格式的模型名称 - model_name = model.get("model_name", model.get("name", "")) + # 方法1: 优先使用配置文件中明确定义的 task_type 字段 + if "task_type" in model: + task_type = model["task_type"] + logger.debug(f"🎯 [任务确定] 使用配置中的 task_type: {task_type}") + return task_type - # 根据模型名称推断任务类型 + # 方法2: 使用 capabilities 字段来推断主要任务类型 + if "capabilities" in model: + capabilities = model["capabilities"] + if isinstance(capabilities, list): + # 按优先级顺序检查能力 + if "vision" in capabilities: + logger.debug(f"🎯 [任务确定] 从 capabilities {capabilities} 推断为: vision") + return "vision" + elif "embedding" in capabilities: + logger.debug(f"🎯 [任务确定] 从 capabilities {capabilities} 推断为: embedding") + return "embedding" + elif "speech" in capabilities: + logger.debug(f"🎯 [任务确定] 从 capabilities {capabilities} 推断为: speech") + return "speech" + elif "text" in capabilities: + # 如果只有文本能力,则根据request_type细分 + task = "llm_reasoning" if self.request_type == "reasoning" else "llm_normal" + logger.debug(f"🎯 [任务确定] 从 capabilities {capabilities} 和 request_type {self.request_type} 推断为: {task}") + return task + + # 方法3: 向后兼容 - 基于模型名称的关键字推断(不推荐但保留兼容性) + model_name = model.get("model_name", model.get("name", "")) + logger.warning(f"⚠️ [任务确定] 配置中未找到 task_type 或 capabilities,回退到基于模型名称的推断: {model_name}") + logger.warning("⚠️ [建议] 请在 model_config.toml 中为模型添加明确的 task_type 或 capabilities 字段") + + # 保留原有的关键字匹配逻辑作为fallback if any(keyword in model_name.lower() for keyword in ["vlm", "vision", "gpt-4o", "claude", "vl-"]): + logger.debug(f"🎯 [任务确定] 从模型名称 {model_name} 推断为: vision") return "vision" elif any(keyword in model_name.lower() for keyword in ["embed", "text-embedding", "bge-"]): + logger.debug(f"🎯 [任务确定] 从模型名称 {model_name} 推断为: embedding") return "embedding" elif any(keyword in model_name.lower() for keyword in ["whisper", "speech", "voice"]): + logger.debug(f"🎯 [任务确定] 从模型名称 {model_name} 推断为: speech") return "speech" else: # 根据request_type确定,映射到配置文件中定义的任务 - return "llm_reasoning" if self.request_type == "reasoning" else "llm_normal" + task = "llm_reasoning" if self.request_type == "reasoning" else "llm_normal" + logger.debug(f"🎯 [任务确定] 从 request_type {self.request_type} 推断为: {task}") + return task @staticmethod def _init_database(): diff --git a/template/model_config_template.toml b/template/model_config_template.toml index cc715d79e..8ab187626 100644 --- a/template/model_config_template.toml +++ b/template/model_config_template.toml @@ -1,5 +1,5 @@ [inner] -version = "0.2.0" +version = "0.2.1" # 配置文件版本号迭代规则同bot_config.toml # @@ -18,6 +18,28 @@ version = "0.2.0" # - 429频率限制:等待后重试,如果持续失败则切换Key # - 网络错误:短暂等待后重试,失败则切换Key # - 其他错误:按照正常重试机制处理 +# +# === 任务类型和模型能力配置 === +# 为了提高任务分配的准确性和可维护性,现在支持明确配置模型的任务类型和能力: +# +# task_type(推荐配置): +# - 明确指定模型主要用于什么任务 +# - 可选值:llm_normal, llm_reasoning, vision, embedding, speech +# - 如果不配置,系统会根据capabilities或模型名称自动推断(不推荐) +# +# capabilities(推荐配置): +# - 描述模型支持的所有能力 +# - 可选值:text, vision, embedding, speech, tool_calling, reasoning +# - 支持多个能力的组合,如:["text", "vision"] +# +# 配置优先级: +# 1. task_type(最高优先级,直接指定任务类型) +# 2. capabilities(中等优先级,根据能力推断任务类型) +# 3. 模型名称关键字(最低优先级,不推荐依赖) +# +# 向后兼容: +# - 仍然支持 model_flags 字段,但建议迁移到 capabilities +# - 未配置新字段时会自动回退到基于模型名称的推断 [request_conf] # 请求配置(此配置项数值均为默认值,如想修改,请取消对应条目的注释) #max_retry = 2 # 最大重试次数(单个模型API调用失败,最多重试的次数) @@ -70,6 +92,13 @@ model_identifier = "deepseek-chat" name = "deepseek-v3" # API服务商名称(对应在api_providers中配置的服务商名称) api_provider = "DeepSeek" +# 任务类型(推荐配置,明确指定模型主要用于什么任务) +# 可选值:llm_normal, llm_reasoning, vision, embedding, speech +# 如果不配置,系统会根据capabilities或模型名称自动推断 +task_type = "llm_normal" +# 模型能力列表(推荐配置,描述模型支持的能力) +# 可选值:text, vision, embedding, speech, tool_calling, reasoning +capabilities = ["text", "tool_calling"] # 输入价格(用于API调用统计,单位:元/兆token)(可选,若无该字段,默认值为0) price_in = 2.0 # 输出价格(用于API调用统计,单位:元/兆token)(可选,若无该字段,默认值为0) @@ -82,6 +111,10 @@ price_out = 8.0 model_identifier = "deepseek-reasoner" name = "deepseek-r1" api_provider = "DeepSeek" +# 推理模型的配置示例 +task_type = "llm_reasoning" +capabilities = ["text", "tool_calling", "reasoning"] +# 保留向后兼容的model_flags字段(已废弃,建议使用capabilities) model_flags = [ "text", "tool_calling", "reasoning",] price_in = 4.0 price_out = 16.0 @@ -90,6 +123,8 @@ price_out = 16.0 model_identifier = "Pro/deepseek-ai/DeepSeek-V3" name = "siliconflow-deepseek-v3" api_provider = "SiliconFlow" +task_type = "llm_normal" +capabilities = ["text", "tool_calling"] price_in = 2.0 price_out = 8.0 @@ -97,6 +132,8 @@ price_out = 8.0 model_identifier = "Pro/deepseek-ai/DeepSeek-R1" name = "siliconflow-deepseek-r1" api_provider = "SiliconFlow" +task_type = "llm_reasoning" +capabilities = ["text", "tool_calling", "reasoning"] price_in = 4.0 price_out = 16.0 @@ -104,6 +141,8 @@ price_out = 16.0 model_identifier = "Pro/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" name = "deepseek-r1-distill-qwen-32b" api_provider = "SiliconFlow" +task_type = "llm_reasoning" +capabilities = ["text", "tool_calling", "reasoning"] price_in = 4.0 price_out = 16.0 @@ -111,6 +150,8 @@ price_out = 16.0 model_identifier = "Qwen/Qwen3-8B" name = "qwen3-8b" api_provider = "SiliconFlow" +task_type = "llm_normal" +capabilities = ["text"] price_in = 0 price_out = 0 @@ -118,6 +159,8 @@ price_out = 0 model_identifier = "Qwen/Qwen3-14B" name = "qwen3-14b" api_provider = "SiliconFlow" +task_type = "llm_normal" +capabilities = ["text", "tool_calling"] price_in = 0.5 price_out = 2.0 @@ -125,6 +168,8 @@ price_out = 2.0 model_identifier = "Qwen/Qwen3-30B-A3B" name = "qwen3-30b" api_provider = "SiliconFlow" +task_type = "llm_normal" +capabilities = ["text", "tool_calling"] price_in = 0.7 price_out = 2.8 @@ -132,6 +177,10 @@ price_out = 2.8 model_identifier = "Qwen/Qwen2.5-VL-72B-Instruct" name = "qwen2.5-vl-72b" api_provider = "SiliconFlow" +# 视觉模型的配置示例 +task_type = "vision" +capabilities = ["vision", "text"] +# 保留向后兼容的model_flags字段(已废弃,建议使用capabilities) model_flags = [ "vision", "text",] price_in = 4.13 price_out = 4.13 @@ -140,6 +189,10 @@ price_out = 4.13 model_identifier = "FunAudioLLM/SenseVoiceSmall" name = "sensevoice-small" api_provider = "SiliconFlow" +# 语音模型的配置示例 +task_type = "speech" +capabilities = ["speech"] +# 保留向后兼容的model_flags字段(已废弃,建议使用capabilities) model_flags = [ "audio",] price_in = 0 price_out = 0 @@ -148,6 +201,10 @@ price_out = 0 model_identifier = "BAAI/bge-m3" name = "bge-m3" api_provider = "SiliconFlow" +# 嵌入模型的配置示例 +task_type = "embedding" +capabilities = ["text", "embedding"] +# 保留向后兼容的model_flags字段(已废弃,建议使用capabilities) model_flags = [ "text", "embedding",] price_in = 0 price_out = 0