refactor: 移除未使用的导入和冗余代码

2025-09-26 20:24:56 +08:00
parent 9c1a7ff123
commit f12cade772
7 changed files with 886 additions and 959 deletions
--- a/src/llm_models/llm_utils.py
+++ b/src/llm_models/llm_utils.py
@@ -1,65 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-@File    :   llm_utils.py
-@Time    :   2024/05/24 17:00:00
-@Author  :   墨墨
-@Version :   1.0
-@Desc    :   LLM相关通用工具函数
-"""
-from typing import List, Dict, Any, Tuple
-
-from src.common.logger import get_logger
-from .payload_content.tool_option import ToolOption, ToolOptionBuilder, ToolParamType
-
-logger = get_logger("model_utils")
-
-def normalize_image_format(image_format: str) -> str:
-    """
-    标准化图片格式名称，确保与各种API的兼容性
-    """
-    format_mapping = {
-        "jpg": "jpeg", "JPG": "jpeg", "JPEG": "jpeg", "jpeg": "jpeg",
-        "png": "png", "PNG": "png",
-        "webp": "webp", "WEBP": "webp",
-        "gif": "gif", "GIF": "gif",
-        "heic": "heic", "HEIC": "heic",
-        "heif": "heif", "HEIF": "heif",
-    }
-    normalized = format_mapping.get(image_format, image_format.lower())
-    logger.debug(f"图片格式标准化: {image_format} -> {normalized}")
-    return normalized
-
-def build_tool_options(tools: List[Dict[str, Any]] | None) -> List[ToolOption] | None:
-    """构建工具选项列表"""
-    if not tools:
-        return None
-    tool_options: List[ToolOption] = []
-    for tool in tools:
-        try:
-            tool_options_builder = ToolOptionBuilder()
-            tool_options_builder.set_name(tool.get("name", ""))
-            tool_options_builder.set_description(tool.get("description", ""))
-            parameters: List[Tuple[str, str, str, bool, List[str] | None]] = tool.get("parameters", [])
-            for param in parameters:
-                # 参数校验
-                assert isinstance(param, tuple) and len(param) == 5, "参数必须是包含5个元素的元组"
-                assert isinstance(param[0], str), "参数名称必须是字符串"
-                assert isinstance(param[1], ToolParamType), "参数类型必须是ToolParamType枚举"
-                assert isinstance(param[2], str), "参数描述必须是字符串"
-                assert isinstance(param[3], bool), "参数是否必填必须是布尔值"
-                assert isinstance(param[4], list) or param[4] is None, "参数枚举值必须是列表或None"
-                
-                tool_options_builder.add_param(
-                    name=param[0],
-                    param_type=param[1],
-                    description=param[2],
-                    required=param[3],
-                    enum_values=param[4],
-                )
-            tool_options.append(tool_options_builder.build())
-        except AssertionError as ae:
-            logger.error(f"工具 '{tool.get('name', 'unknown')}' 的参数定义错误: {str(ae)}")
-        except Exception as e:
-            logger.error(f"构建工具 '{tool.get('name', 'unknown')}' 失败: {str(e)}")
-            
-    return tool_options or None
--- a/src/llm_models/model_client/aiohttp_gemini_client.py
+++ b/src/llm_models/model_client/aiohttp_gemini_client.py
@@ -122,7 +122,7 @@ def _convert_tool_options(tool_options: list[ToolOption]) -> list[dict]:

    def _convert_tool_param(param: ToolParam) -> dict:
        """转换工具参数"""
-        result: dict[str, Any] = {
+        result = {
            "type": param.param_type.value,
            "description": param.description,
        }
@@ -132,7 +132,7 @@ def _convert_tool_options(tool_options: list[ToolOption]) -> list[dict]:

    def _convert_tool_option_item(tool_option: ToolOption) -> dict:
        """转换单个工具选项"""
-        function_declaration: dict[str, Any] = {
+        function_declaration = {
            "name": tool_option.name,
            "description": tool_option.description,
        }
@@ -500,7 +500,7 @@ class AiohttpGeminiClient(BaseClient):
            # 直接重抛项目定义的异常
            raise
        except Exception as e:
-            logger.debug(f"请求处理中发生未知异常: {e}")
+            logger.debug(e)
            # 其他异常转换为网络连接错误
            raise NetworkConnectionError() from e

--- a/src/llm_models/model_selector.py
+++ b/src/llm_models/model_selector.py
@@ -1,130 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-@File    :   model_selector.py
-@Time    :   2024/05/24 16:00:00
-@Author  :   墨墨
-@Version :   1.0
-@Desc    :   模型选择与负载均衡器
-"""
-from typing import Dict, Tuple, Set, Optional
-
-from src.common.logger import get_logger
-from src.config.config import model_config
-from src.config.api_ada_configs import ModelInfo, APIProvider, TaskConfig
-from .model_client.base_client import BaseClient, client_registry
-
-logger = get_logger("model_utils")
-
-
-class ModelSelector:
-    """模型选择与负载均衡器"""
-
-    def __init__(self, model_set: TaskConfig, request_type: str = ""):
-        """
-        初始化模型选择器
-
-        Args:
-            model_set (TaskConfig): 任务配置中定义的模型集合
-            request_type (str, optional): 请求类型 (例如 "embedding"). Defaults to "".
-        """
-        self.model_for_task = model_set
-        self.request_type = request_type
-        self.model_usage: Dict[str, Tuple[int, int, int]] = {
-            model: (0, 0, 0) for model in self.model_for_task.model_list
-        }
-        """模型使用量记录，用于进行负载均衡，对应为(total_tokens, penalty, usage_penalty)，惩罚值是为了能在某个模型请求不给力或正在被使用的时候进行调整"""
-
-    def select_best_available_model(
-        self, failed_models_in_this_request: Set[str]
-    ) -> Optional[Tuple[ModelInfo, APIProvider, BaseClient]]:
-        """
-        从可用模型中选择负载均衡评分最低的模型，并排除当前请求中已失败的模型。
-
-        Args:
-            failed_models_in_this_request (Set[str]): 当前请求中已失败的模型名称集合。
-
-        Returns:
-            Optional[Tuple[ModelInfo, APIProvider, BaseClient]]: 选定的模型详细信息，如果无可用模型则返回 None。
-        """
-        candidate_models_usage = {
-            model_name: usage_data
-            for model_name, usage_data in self.model_usage.items()
-            if model_name not in failed_models_in_this_request
-        }
-
-        if not candidate_models_usage:
-            logger.warning("没有可用的模型供当前请求选择。")
-            return None
-
-        # 根据现有公式查找分数最低的模型
-        # 公式: total_tokens + penalty * 300 + usage_penalty * 1000
-        # 较高的 usage_penalty (由于被选中的模型会被增加) 和 penalty (由于模型失败) 会使模型得分更高，从而降低被选中的几率。
-        least_used_model_name = min(
-            candidate_models_usage,
-            key=lambda k: candidate_models_usage[k][0]
-            + candidate_models_usage[k][1] * 300
-            + candidate_models_usage[k][2] * 1000,
-        )
-
-        # --- 动态故障转移的核心逻辑 ---
-        # RequestStrategy 中的循环会多次调用此函数。
-        # 如果当前选定的模型因异常而失败，下次循环会重新调用此函数，
-        # 此时由于失败模型已被标记，且其惩罚值可能已在 RequestExecutor 中增加，
-        # 此函数会自动选择一个得分更低（即更可用）的模型。
-        # 这种机制实现了动态的、基于当前系统状态的故障转移。
-        model_info = model_config.get_model_info(least_used_model_name)
-        api_provider = model_config.get_provider(model_info.api_provider)
-
-        force_new_client = self.request_type == "embedding"
-        client = client_registry.get_client_class_instance(api_provider, force_new=force_new_client)
-
-        logger.debug(f"为当前请求选择了最佳可用模型: {model_info.name}")
-
-        # 增加所选模型的请求使用惩罚值，以反映其当前使用情况/选择。
-        # 这有助于在同一请求的后续选择或未来请求中实现动态负载均衡。
-        total_tokens, penalty, usage_penalty = self.model_usage[model_info.name]
-        self.model_usage[model_info.name] = (total_tokens, penalty, usage_penalty + 1)
-
-        return model_info, api_provider, client
-
-    def select_model(self) -> Tuple[ModelInfo, APIProvider, BaseClient]:
-        """
-        根据总tokens和惩罚值选择的模型 (负载均衡)
-        """
-        least_used_model_name = min(
-            self.model_usage,
-            key=lambda k: self.model_usage[k][0] + self.model_usage[k][1] * 300 + self.model_usage[k][2] * 1000,
-        )
-        model_info = model_config.get_model_info(least_used_model_name)
-        api_provider = model_config.get_provider(model_info.api_provider)
-
-        force_new_client = self.request_type == "embedding"
-        client = client_registry.get_client_class_instance(api_provider, force_new=force_new_client)
-        logger.debug(f"选择请求模型: {model_info.name}")
-        total_tokens, penalty, usage_penalty = self.model_usage[model_info.name]
-        self.model_usage[model_info.name] = (total_tokens, penalty, usage_penalty + 1)
-        return model_info, api_provider, client
-
-    def update_model_penalty(self, model_name: str, penalty_increment: int):
-        """
-        更新指定模型的惩罚值
-
-        Args:
-            model_name (str): 模型名称
-            penalty_increment (int): 惩罚增量
-        """
-        if model_name in self.model_usage:
-            total_tokens, penalty, usage_penalty = self.model_usage[model_name]
-            self.model_usage[model_name] = (total_tokens, penalty + penalty_increment, usage_penalty)
-            logger.debug(f"模型 '{model_name}' 的惩罚值增加了 {penalty_increment}")
-
-    def decrease_usage_penalty(self, model_name: str):
-        """
-        请求结束后，减少使用惩罚值
-
-        Args:
-            model_name (str): 模型名称
-        """
-        if model_name in self.model_usage:
-            total_tokens, penalty, usage_penalty = self.model_usage[model_name]
-            self.model_usage[model_name] = (total_tokens, penalty, usage_penalty - 1)
--- a/src/llm_models/prompt_processor.py
+++ b/src/llm_models/prompt_processor.py
@@ -1,170 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-@File    :   prompt_processor.py
-@Time    :   2024/05/24 16:45:00
-@Author  :   墨墨
-@Version :   1.0
-@Desc    :   提示词预处理（反截断、内容混淆等）
-"""
-import random
-import string
-import re
-from typing import Tuple
-
-from src.common.logger import get_logger
-from src.config.api_ada_configs import APIProvider, ModelInfo
-
-logger = get_logger("prompt_processor")
-
-
-class PromptProcessor:
-    """
-    提示词处理器。
-    负责对发送给模型的原始prompt进行预处理，以增强模型性能或实现特定功能。
-    主要功能包括：
-    1.  **反截断**：在prompt末尾添加一个特殊的结束标记指令，帮助判断模型输出是否被截断。
-    2.  **内容混淆**：向prompt中注入随机的“噪音”字符串，并附带指令让模型忽略它们，
-        可能用于绕过某些平台的审查或内容策略。
-    3.  **思维链提取**：从模型的响应中分离出思考过程（被<think>标签包裹）和最终答案。
-    """
-
-    def __init__(self):
-        """初始化Prompt处理器，定义所需的指令文本。"""
-        # 指导模型忽略噪音字符串的指令
-        self.noise_instruction = """**【核心过滤规则】**
-在交互过程中,你会在用户的输入内容中看到一些无意义的、由字母和数字组成的随机字符串。这些字符串是API传输过程中的技术伪装信息,【不是】用户真实意图的一部分。
-
-你的任务是【完全并彻底地忽略】这些随机字符串。
-**【严禁】** 将这些字符串写入你的回复、对它们进行任何形式的解读、或将它们作为故事的一部分进行模拟。当看到这些字符串时,请将其视为空白或直接跳过,并根据用户其他有意义的输入(如有)或当前故事情节来继续你的叙事。此规则为最高优先级。
-"""
-        # 定义一个独特的结束标记，用于反截断检查
-        self.end_marker = "###MAI_RESPONSE_END###"
-        # 指导模型在回复末尾添加结束标记的指令
-        self.anti_truncation_instruction = f"""
-**【输出完成信令】**
-这是一个非常重要的指令,请务-务必遵守。在你的回复内容完全结束后,请务必在最后另起一行,只写 `{self.end_marker}` 作为结束标志。
-例如:
-<你的回复内容>
-{self.end_marker}
-
-这有助于我判断你的输出是否被截断。请不要在 `{self.end_marker}` 前后添加任何其他文字或标点。
-"""
-
-    def process_prompt(
-        self, prompt: str, model_info: ModelInfo, api_provider: APIProvider, task_name: str
-    ) -> str:
-        """
-        根据模型和API提供商的配置，对输入的prompt进行预处理。
-
-        Args:
-            prompt (str): 原始的用户输入prompt。
-            model_info (ModelInfo): 当前使用的模型信息。
-            api_provider (APIProvider): 当前API提供商的配置。
-            task_name (str): 当前任务的名称，用于日志记录。
-
-        Returns:
-            str: 经过处理后的、最终将发送给模型的prompt。
-        """
-        processed_prompt = prompt
-
-        # 步骤 1: 根据模型配置添加反截断指令
-        use_anti_truncation = getattr(model_info, "use_anti_truncation", False)
-        if use_anti_truncation:
-            processed_prompt += self.anti_truncation_instruction
-            logger.info(f"模型 '{model_info.name}' (任务: '{task_name}') 已启用反截断功能。")
-
-        # 步骤 2: 根据API提供商配置应用内容混淆
-        if getattr(api_provider, "enable_content_obfuscation", False):
-            intensity = getattr(api_provider, "obfuscation_intensity", 1)
-            logger.info(f"为API提供商 '{api_provider.name}' 启用内容混淆，强度级别: {intensity}")
-            processed_prompt = self._apply_content_obfuscation(processed_prompt, intensity)
-
-        return processed_prompt
-
-    def _apply_content_obfuscation(self, text: str, intensity: int) -> str:
-        """
-        对文本应用内容混淆处理。
-        首先添加过滤规则指令，然后注入随机噪音。
-        """
-        # 在文本开头加入指导模型忽略噪音的指令
-        processed_text = self.noise_instruction + "\n\n" + text
-        logger.debug(f"已添加过滤规则指令，文本长度: {len(text)} -> {len(processed_text)}")
-
-        # 在文本中注入随机乱码
-        final_text = self._inject_random_noise(processed_text, intensity)
-        logger.debug(f"乱码注入完成，最终文本长度: {len(final_text)}")
-
-        return final_text
-
-    @staticmethod
-    def _inject_random_noise(text: str, intensity: int) -> str:
-        """
-        根据指定的强度，在文本的词语之间随机注入噪音字符串。
-
-        Args:
-            text (str): 待注入噪音的文本。
-            intensity (int): 混淆强度 (1, 2, or 3)，决定噪音的注入概率和长度。
-
-        Returns:
-            str: 注入噪音后的文本。
-        """
-        def generate_noise(length: int) -> str:
-            """生成指定长度的随机噪音字符串。"""
-            chars = (
-                string.ascii_letters + string.digits + "!@#$%^&*()_+-=[]{}|;:,.<>?"
-                + "一二三四五六七八九零壹贰叁" + "αβγδεζηθικλμνξοπρστυφχψω" + "∀∃∈∉∪∩⊂⊃∧∨¬→↔∴∵"
-            )
-            return "".join(random.choice(chars) for _ in range(length))
-
-        # 根据强度级别定义注入参数
-        params = {
-            1: {"probability": 15, "length": (3, 6)},   # 低强度
-            2: {"probability": 25, "length": (5, 10)},  # 中强度
-            3: {"probability": 35, "length": (8, 15)},  # 高强度
-        }
-        config = params.get(intensity, params[1]) # 默认为低强度
-        logger.debug(f"乱码注入参数: 概率={config['probability']}%, 长度范围={config['length']}")
-
-        words = text.split()
-        result = []
-        noise_count = 0
-        for word in words:
-            result.append(word)
-            # 按概率决定是否注入噪音
-            if random.randint(1, 100) <= config["probability"]:
-                noise_length = random.randint(*config["length"])
-                noise = generate_noise(noise_length)
-                result.append(noise)
-                noise_count += 1
-
-        logger.debug(f"共注入 {noise_count} 个乱码片段，原词数: {len(words)}")
-        return " ".join(result)
-    
-    @staticmethod
-    def extract_reasoning(content: str) -> Tuple[str, str]:
-        """
-        从模型返回的完整内容中提取被<think>...</think>标签包裹的思考过程，
-        并返回清理后的内容和思考过程。
-
-        Args:
-            content (str): 模型返回的原始字符串。
-
-        Returns:
-            Tuple[str, str]:
-                - 清理后的内容（移除了<think>标签及其内容）。
-                - 提取出的思考过程文本（如果没有则为空字符串）。
-        """
-        # 使用正则表达式精确查找 <think>...</think> 标签及其内容
-        think_pattern = re.compile(r"<think>(.*?)</think>\s*", re.DOTALL)
-        match = think_pattern.search(content)
-
-        if match:
-            # 提取思考过程
-            reasoning = match.group(1).strip()
-            # 从原始内容中移除匹配到的整个部分（包括标签和后面的空白）
-            clean_content = think_pattern.sub("", content, count=1).strip()
-        else:
-            reasoning = ""
-            clean_content = content.strip()
-            
-        return clean_content, reasoning
--- a/src/llm_models/request_executor.py
+++ b/src/llm_models/request_executor.py
@@ -1,288 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-@File    :   request_executor.py
-@Time    :   2024/05/24 16:15:00
-@Author  :   墨墨
-@Version :   1.0
-@Desc    :   负责执行LLM请求、处理重试及异常
-"""
-import asyncio
-from typing import List, Callable, Optional, Tuple
-
-from src.common.logger import get_logger
-from src.config.api_ada_configs import APIProvider, ModelInfo, TaskConfig
-from .exceptions import (
-    NetworkConnectionError,
-    ReqAbortException,
-    RespNotOkException,
-    RespParseException,
-)
-from .model_client.base_client import APIResponse, BaseClient
-from .model_selector import ModelSelector
-from .payload_content.message import Message
-from .payload_content.resp_format import RespFormat
-from .payload_content.tool_option import ToolOption
-from .utils import compress_messages
-
-logger = get_logger("model_utils")
-
-
-class RequestExecutor:
-    """
-    请求执行器。
-    负责直接与模型客户端交互，执行API请求。
-    它包含了核心的请求重试、异常分类处理、模型惩罚机制和消息压缩等底层逻辑。
-    """
-
-    def __init__(
-        self,
-        task_name: str,
-        model_set: TaskConfig,
-        api_provider: APIProvider,
-        client: BaseClient,
-        model_info: ModelInfo,
-        model_selector: ModelSelector,
-    ):
-        """
-        初始化请求执行器。
-
-        Args:
-            task_name (str): 当前任务的名称。
-            model_set (TaskConfig): 任务相关的模型配置。
-            api_provider (APIProvider): API提供商配置。
-            client (BaseClient): 用于发送请求的客户端实例。
-            model_info (ModelInfo): 当前请求要使用的模型信息。
-            model_selector (ModelSelector): 模型选择器实例，用于更新模型状态（如惩罚值）。
-        """
-        self.task_name = task_name
-        self.model_set = model_set
-        self.api_provider = api_provider
-        self.client = client
-        self.model_info = model_info
-        self.model_selector = model_selector
-
-    async def execute_request(
-        self,
-        request_type: str,
-        message_list: List[Message] | None = None,
-        tool_options: list[ToolOption] | None = None,
-        response_format: RespFormat | None = None,
-        stream_response_handler: Optional[Callable] = None,
-        async_response_parser: Optional[Callable] = None,
-        temperature: Optional[float] = None,
-        max_tokens: Optional[int] = None,
-        embedding_input: str = "",
-        audio_base64: str = "",
-    ) -> APIResponse:
-        """
-        实际执行API请求，并包含完整的重试和异常处理逻辑。
-
-        Args:
-            request_type (str): 请求类型 ('response', 'embedding', 'audio')。
-            message_list (List[Message] | None, optional): 消息列表。 Defaults to None.
-            tool_options (list[ToolOption] | None, optional): 工具选项。 Defaults to None.
-            response_format (RespFormat | None, optional): 响应格式要求。 Defaults to None.
-            stream_response_handler (Optional[Callable], optional): 流式响应处理器。 Defaults to None.
-            async_response_parser (Optional[Callable], optional): 异步响应解析器。 Defaults to None.
-            temperature (Optional[float], optional): 温度参数。 Defaults to None.
-            max_tokens (Optional[int], optional): 最大token数。 Defaults to None.
-            embedding_input (str, optional): embedding输入文本。 Defaults to "".
-            audio_base64 (str, optional): 音频base64数据。 Defaults to "".
-
-        Returns:
-            APIResponse: 从模型客户端返回的API响应对象。
-            
-        Raises:
-            ValueError: 如果请求类型未知。
-            RuntimeError: 如果所有重试都失败。
-        """
-        retry_remain = self.api_provider.max_retry
-        compressed_messages: Optional[List[Message]] = None
-        
-        # 循环进行重试
-        while retry_remain > 0:
-            try:
-                # 根据请求类型调用不同的客户端方法
-                if request_type == "response":
-                    assert message_list is not None, "message_list cannot be None for response requests"
-                    return await self.client.get_response(
-                        model_info=self.model_info,
-                        message_list=(compressed_messages or message_list),
-                        tool_options=tool_options,
-                        max_tokens=self.model_set.max_tokens if max_tokens is None else max_tokens,
-                        temperature=self.model_set.temperature if temperature is None else temperature,
-                        response_format=response_format,
-                        stream_response_handler=stream_response_handler,
-                        async_response_parser=async_response_parser,
-                        extra_params=self.model_info.extra_params,
-                    )
-                elif request_type == "embedding":
-                    assert embedding_input, "embedding_input cannot be empty for embedding requests"
-                    return await self.client.get_embedding(
-                        model_info=self.model_info,
-                        embedding_input=embedding_input,
-                        extra_params=self.model_info.extra_params,
-                    )
-                elif request_type == "audio":
-                    assert audio_base64 is not None, "audio_base64 cannot be None for audio requests"
-                    return await self.client.get_audio_transcriptions(
-                        model_info=self.model_info,
-                        audio_base64=audio_base64,
-                        extra_params=self.model_info.extra_params,
-                    )
-                raise ValueError(f"未知的请求类型: {request_type}")
-            except Exception as e:
-                logger.debug(f"请求失败: {str(e)}")
-                # 对失败的模型应用惩罚
-                self._apply_penalty_on_failure(e)
-
-                # 使用默认异常处理器来决定下一步操作（等待、重试、压缩或终止）
-                wait_interval, compressed_messages = self._default_exception_handler(
-                    e,
-                    remain_try=retry_remain,
-                    retry_interval=self.api_provider.retry_interval,
-                    messages=(message_list, compressed_messages is not None) if message_list else None,
-                )
-
-                if wait_interval == -1:
-                    retry_remain = 0  # 处理器决定不再重试
-                elif wait_interval > 0:
-                    logger.info(f"等待 {wait_interval} 秒后重试...")
-                    await asyncio.sleep(wait_interval)
-            finally:
-                retry_remain -= 1
-
-        # 所有重试次数用尽后
-        self.model_selector.decrease_usage_penalty(self.model_info.name) # 减少因使用而增加的基础惩罚
-        logger.error(f"模型 '{self.model_info.name}' 请求失败，达到最大重试次数 {self.api_provider.max_retry} 次")
-        raise RuntimeError("请求失败，已达到最大重试次数")
-
-    def _apply_penalty_on_failure(self, e: Exception):
-        """
-        根据异常类型，动态调整失败模型的惩罚值。
-        关键错误（如网络问题、服务器5xx错误）会施加更重的惩罚。
-        """
-        CRITICAL_PENALTY_MULTIPLIER = 5
-        default_penalty_increment = 1
-        penalty_increment = default_penalty_increment
-
-        # 对严重错误施加更高的惩罚
-        if isinstance(e, (NetworkConnectionError, ReqAbortException)):
-            penalty_increment = CRITICAL_PENALTY_MULTIPLIER
-        elif isinstance(e, RespNotOkException):
-            if e.status_code >= 500: # 服务器内部错误
-                penalty_increment = CRITICAL_PENALTY_MULTIPLIER
-
-        # 记录日志
-        log_message = f"发生未知异常: {type(e).__name__}，增加基础惩罚值: {penalty_increment}"
-        if isinstance(e, (NetworkConnectionError, ReqAbortException)):
-            log_message = f"发生关键错误 ({type(e).__name__})，增加惩罚值: {penalty_increment}"
-        elif isinstance(e, RespNotOkException):
-            log_message = f"发生响应错误 (状态码: {e.status_code})，增加惩罚值: {penalty_increment}"
-        logger.warning(f"模型 '{self.model_info.name}' {log_message}")
-
-        # 更新模型的惩罚值
-        self.model_selector.update_model_penalty(self.model_info.name, penalty_increment)
-
-    def _default_exception_handler(
-        self,
-        e: Exception,
-        remain_try: int,
-        retry_interval: int = 10,
-        messages: Tuple[List[Message], bool] | None = None,
-    ) -> Tuple[int, List[Message] | None]:
-        """
-        默认的异常分类处理器。
-        根据异常类型决定是否重试、等待多久以及是否需要压缩消息。
-        
-        Returns:
-            Tuple[int, List[Message] | None]:
-                - 等待时间（秒）。-1表示不重试。
-                - 压缩后的消息列表（如果有）。
-        """
-        model_name = self.model_info.name
-
-        if isinstance(e, NetworkConnectionError):
-            return self._check_retry(
-                remain_try,
-                retry_interval,
-                can_retry_msg=f"任务-'{self.task_name}' 模型-'{model_name}': 连接异常，将于{retry_interval}秒后重试",
-                cannot_retry_msg=f"任务-'{self.task_name}' 模型-'{model_name}': 连接异常，超过最大重试次数",
-            )
-        elif isinstance(e, ReqAbortException):
-            logger.warning(f"任务-'{self.task_name}' 模型-'{model_name}': 请求被中断，详细信息-{str(e.message)}")
-            return -1, None # 请求被中断，不重试
-        elif isinstance(e, RespNotOkException):
-            return self._handle_resp_not_ok(e, remain_try, retry_interval, messages)
-        elif isinstance(e, RespParseException):
-            logger.error(f"任务-'{self.task_name}' 模型-'{model_name}': 响应解析错误，错误信息-{e.message}")
-            logger.debug(f"附加内容: {str(e.ext_info)}")
-            return -1, None # 解析错误通常不可重试
-        else:
-            logger.error(f"任务-'{self.task_name}' 模型-'{model_name}': 未知异常，错误信息-{str(e)}")
-            return -1, None # 未知异常，不重试
-
-    def _handle_resp_not_ok(
-        self,
-        e: RespNotOkException,
-        remain_try: int,
-        retry_interval: int = 10,
-        messages: tuple[list[Message], bool] | None = None,
-    ) -> Tuple[int, Optional[List[Message]]]:
-        """处理HTTP状态码非200的异常。"""
-        model_name = self.model_info.name
-        # 客户端错误 (4xx)，通常不可重试
-        if e.status_code in [400, 401, 402, 403, 404]:
-            logger.warning(f"任务-'{self.task_name}' 模型-'{model_name}': 请求失败，错误代码-{e.status_code}，错误信息-{e.message}")
-            return -1, None
-        # 请求体过大 (413)
-        elif e.status_code == 413:
-            # 如果消息存在且尚未被压缩，尝试压缩后重试一次
-            if messages and not messages[1]: # messages[1] is a flag indicating if it's already compressed
-                return self._check_retry(
-                    remain_try, 0, # 立即重试
-                    can_retry_msg=f"任务-'{self.task_name}' 模型-'{model_name}': 请求体过大，尝试压缩消息后重试",
-                    cannot_retry_msg=f"任务-'{self.task_name}' 模型-'{model_name}': 请求体过大，压缩后仍失败",
-                    can_retry_callable=compress_messages, messages=messages[0],
-                )
-            logger.warning(f"任务-'{self.task_name}' 模型-'{model_name}': 请求体过大，无法压缩，放弃请求。")
-            return -1, None
-        # 请求过于频繁 (429)
-        elif e.status_code == 429:
-            return self._check_retry(
-                remain_try, retry_interval,
-                can_retry_msg=f"任务-'{self.task_name}' 模型-'{model_name}': 请求过于频繁，将于{retry_interval}秒后重试",
-                cannot_retry_msg=f"任务-'{self.task_name}' 模型-'{model_name}': 请求过于频繁，超过最大重试次数",
-            )
-        # 服务器错误 (5xx)，可以重试
-        elif e.status_code >= 500:
-            return self._check_retry(
-                remain_try, retry_interval,
-                can_retry_msg=f"任务-'{self.task_name}' 模型-'{model_name}': 服务器错误，将于{retry_interval}秒后重试",
-                cannot_retry_msg=f"任务-'{self.task_name}' 模型-'{model_name}': 服务器错误，超过最大重试次数",
-            )
-        else:
-            logger.warning(f"任务-'{self.task_name}' 模型-'{model_name}': 未知错误，错误代码-{e.status_code}，错误信息-{e.message}")
-            return -1, None
-
-    @staticmethod
-    def _check_retry(
-        remain_try: int,
-        retry_interval: int,
-        can_retry_msg: str,
-        cannot_retry_msg: str,
-        can_retry_callable: Callable | None = None,
-        **kwargs,
-    ) -> Tuple[int, List[Message] | None]:
-        """
-        辅助函数：检查是否可以重试，并执行可选的回调函数（如消息压缩）。
-        """
-        if remain_try > 0:
-            logger.warning(f"{can_retry_msg}")
-            # 如果有可执行的回调（例如压缩函数），执行它并返回结果
-            if can_retry_callable is not None:
-                return retry_interval, can_retry_callable(**kwargs)
-            return retry_interval, None
-        else:
-            logger.warning(f"{cannot_retry_msg}")
-            return -1, None
--- a/src/llm_models/request_strategy.py
+++ b/src/llm_models/request_strategy.py
@@ -1,274 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-@File    :   request_strategy.py
-@Time    :   2024/05/24 16:30:00
-@Author  :   墨墨
-@Version :   1.0
-@Desc    :   高级请求策略（并发、故障转移）
-"""
-import asyncio
-import random
-from typing import Optional, Dict, Any, Callable, Coroutine
-
-from src.common.logger import get_logger
-from src.config.api_ada_configs import TaskConfig
-from .model_client.base_client import APIResponse
-from .model_selector import ModelSelector
-from .payload_content.message import MessageBuilder
-from .prompt_processor import PromptProcessor
-from .request_executor import RequestExecutor
-
-logger = get_logger("model_utils")
-
-
-class RequestStrategy:
-    """
-    高级请求策略模块。
-    负责实现复杂的请求逻辑，如模型的故障转移（fallback）和并发请求。
-    """
-
-    def __init__(self, model_set: TaskConfig, model_selector: ModelSelector, task_name: str):
-        """
-        初始化请求策略。
-
-        Args:
-            model_set (TaskConfig): 特定任务的模型配置。
-            model_selector (ModelSelector): 模型选择器实例。
-            task_name (str): 当前任务的名称。
-        """
-        self.model_set = model_set
-        self.model_selector = model_selector
-        self.task_name = task_name
-
-    async def execute_with_fallback(
-        self,
-        base_payload: Dict[str, Any],
-        raise_when_empty: bool = True,
-    ) -> Dict[str, Any]:
-        """
-        执行单次请求，动态选择最佳可用模型，并在模型失败时进行故障转移。
-
-        该方法会按顺序尝试任务配置中的所有可用模型，直到一个模型成功返回响应。
-        如果所有模型都失败，将根据 `raise_when_empty` 参数决定是抛出异常还是返回一个失败结果。
-
-        Args:
-            base_payload (Dict[str, Any]): 基础请求载荷，包含prompt、工具选项等。
-            raise_when_empty (bool, optional): 如果所有模型都失败或返回空内容，是否抛出异常。 Defaults to True.
-
-        Returns:
-            Dict[str, Any]: 一个包含响应结果的字典，包括内容、模型信息、用量和成功状态。
-        """
-        # 记录在本次请求中已经失败的模型，避免重复尝试
-        failed_models_in_this_request = set()
-        max_attempts = len(self.model_set.model_list)
-        last_exception: Optional[Exception] = None
-
-        for attempt in range(max_attempts):
-            # 选择一个当前最佳且未失败的模型
-            model_selection_result = self.model_selector.select_best_available_model(failed_models_in_this_request)
-
-            if model_selection_result is None:
-                logger.error(f"尝试 {attempt + 1}/{max_attempts}: 没有可用的模型了。")
-                break  # 没有更多可用模型，跳出循环
-
-            model_info, api_provider, client = model_selection_result
-            model_name = model_info.name
-            logger.debug(f"尝试 {attempt + 1}/{max_attempts}: 正在使用模型 '{model_name}'...")
-
-            try:
-                # 步骤 1: 预处理Prompt
-                prompt_processor: PromptProcessor = base_payload["prompt_processor"]
-                raw_prompt = base_payload["prompt"]
-                processed_prompt = prompt_processor.process_prompt(
-                    raw_prompt, model_info, api_provider, self.task_name
-                )
-                
-                # 步骤 2: 构建消息体
-                message_builder = MessageBuilder().add_text_content(processed_prompt)
-                messages = [message_builder.build()]
-
-                # 步骤 3: 为执行器创建载荷
-                executor_payload = {
-                    "request_type": "response",  # 策略模式目前只处理'response'类型请求
-                    "message_list": messages,
-                    "tool_options": base_payload["tool_options"],
-                    "temperature": base_payload["temperature"],
-                    "max_tokens": base_payload["max_tokens"],
-                }
-                
-                # 创建请求执行器实例
-                executor = RequestExecutor(
-                    task_name=self.task_name,
-                    model_set=self.model_set,
-                    api_provider=api_provider,
-                    client=client,
-                    model_info=model_info,
-                    model_selector=self.model_selector,
-                )
-                # 执行请求，并处理内部的空回复/截断重试
-                response = await self._execute_and_handle_empty_retry(executor, executor_payload, prompt_processor)
-
-                # 步骤 4: 后处理响应
-                # 在获取到成功的、完整的响应后，提取思考过程内容
-                final_content, reasoning_content = prompt_processor.extract_reasoning(response.content or "")
-                response.content = final_content  # 使用清理后的内容更新响应对象
-                
-                tool_calls = response.tool_calls
-
-                # 检查最终内容是否为空
-                if not final_content and not tool_calls:
-                    if raise_when_empty:
-                        raise RuntimeError("所选模型生成了空回复。")
-                    logger.warning(f"模型 '{model_name}' 生成了空回复，返回默认信息。")
-
-                logger.debug(f"模型 '{model_name}' 成功生成了回复。")
-                # 返回成功结果，包含用量和模型信息，供上层记录
-                return {
-                    "content": response.content,
-                    "reasoning_content": reasoning_content,
-                    "model_name": model_name,
-                    "tool_calls": tool_calls,
-                    "model_info": model_info,
-                    "usage": response.usage,
-                    "success": True,
-                }
-
-            except Exception as e:
-                # 捕获请求过程中的任何异常
-                logger.error(f"模型 '{model_info.name}' 失败，异常: {e}。将其添加到当前请求的失败模型列表中。")
-                failed_models_in_this_request.add(model_info.name)
-                last_exception = e
-
-        # 如果循环结束仍未成功
-        logger.error(f"当前请求已尝试 {max_attempts} 个模型，所有模型均已失败。")
-        if raise_when_empty:
-            if last_exception:
-                raise RuntimeError("所有模型均未能生成响应。") from last_exception
-            raise RuntimeError("所有模型均未能生成响应，且无具体异常信息。")
-        
-        # 返回失败结果
-        return {
-            "content": "所有模型都请求失败",
-            "reasoning_content": "",
-            "model_name": "unknown",
-            "tool_calls": None,
-            "model_info": None,
-            "usage": None,
-            "success": False,
-        }
-
-    async def execute_concurrently(
-        self,
-        coro_callable: Callable[..., Coroutine[Any, Any, Any]],
-        concurrency_count: int,
-        *args,
-        **kwargs,
-    ) -> Any:
-        """
-        以指定的并发数执行多个协程，并从所有成功的结果中随机选择一个返回。
-
-        Args:
-            coro_callable (Callable): 要并发执行的协程函数。
-            concurrency_count (int): 并发数量。
-            *args: 传递给协程函数的位置参数。
-            **kwargs: 传递给协程函数的关键字参数。
-
-        Returns:
-            Any: 从成功的结果中随机选择的一个。
-        
-        Raises:
-            RuntimeError: 如果所有并发任务都失败了。
-        """
-        logger.info(f"启用并发请求模式，并发数: {concurrency_count}")
-        # 创建并发任务列表
-        tasks = [coro_callable(*args, **kwargs) for _ in range(concurrency_count)]
-
-        # 等待所有任务完成
-        results = await asyncio.gather(*tasks, return_exceptions=True)
-        # 筛选出成功的结果
-        successful_results = [
-            res for res in results if isinstance(res, dict) and res.get("success")
-        ]
-
-        if successful_results:
-            # 从成功结果中随机选择一个
-            selected = random.choice(successful_results)
-            logger.info(f"并发请求完成，从{len(successful_results)}个成功结果中选择了一个")
-            return selected
-
-        # 如果没有成功的结果，记录所有异常
-        for i, res in enumerate(results):
-            if isinstance(res, Exception):
-                logger.error(f"并发任务 {i + 1}/{concurrency_count} 失败: {res}")
-
-        # 抛出第一个遇到的异常
-        first_exception = next((res for res in results if isinstance(res, Exception)), None)
-        if first_exception:
-            raise first_exception
-
-        raise RuntimeError(f"所有 {concurrency_count} 个并发请求都失败了，但没有具体的异常信息")
-
-    async def _execute_and_handle_empty_retry(
-        self, executor: RequestExecutor, payload: Dict[str, Any], prompt_processor: PromptProcessor
-    ) -> APIResponse:
-        """
-        在单个模型内部处理因回复为空或被截断而触发的重试逻辑。
-
-        Args:
-            executor (RequestExecutor): 请求执行器实例。
-            payload (Dict[str, Any]): 传递给 `execute_request` 的载荷。
-            prompt_processor (PromptProcessor): 提示词处理器，用于获取反截断标记。
-
-        Returns:
-            APIResponse: 一个有效的、非空且完整的API响应。
-        
-        Raises:
-            RuntimeError: 如果在达到最大重试次数后仍然收到空回复或截断的回复。
-        """
-        empty_retry_count = 0
-        max_empty_retry = executor.api_provider.max_retry
-        empty_retry_interval = executor.api_provider.retry_interval
-        # 检查模型是否启用了反截断功能
-        use_anti_truncation = getattr(executor.model_info, "use_anti_truncation", False)
-        end_marker = prompt_processor.end_marker
-
-        while empty_retry_count <= max_empty_retry:
-            response = await executor.execute_request(**payload)
-
-            content = response.content or ""
-            tool_calls = response.tool_calls
-            
-            # 判断是否为空回复
-            is_empty_reply = not tool_calls and (not content or content.strip() == "")
-            is_truncated = False
-            
-            # 如果启用了反截断，检查回复是否被截断
-            if use_anti_truncation and end_marker:
-                if content.endswith(end_marker):
-                    # 如果包含结束标记，说明回复完整，移除标记
-                    response.content = content[: -len(end_marker)].strip()
-                else:
-                    # 否则，认为回复被截断
-                    is_truncated = True
-
-            # 如果是空回复或截断，则进行重试
-            if is_empty_reply or is_truncated:
-                empty_retry_count += 1
-                if empty_retry_count <= max_empty_retry:
-                    reason = "空回复" if is_empty_reply else "截断"
-                    logger.warning(
-                        f"模型 '{executor.model_info.name}' 检测到{reason}，正在进行内部重试 ({empty_retry_count}/{max_empty_retry})..."
-                    )
-                    if empty_retry_interval > 0:
-                        await asyncio.sleep(empty_retry_interval)
-                    continue  # 继续下一次循环重试
-                else:
-                    # 达到最大重试次数，抛出异常
-                    reason = "空回复" if is_empty_reply else "截断"
-                    raise RuntimeError(f"模型 '{executor.model_info.name}' 经过 {max_empty_retry} 次内部重试后仍然生成{reason}的回复。")
-            
-            # 成功获取到有效响应，返回结果
-            return response
-        
-        # 此处理论上不会到达，因为循环要么返回要么抛出异常
-        raise RuntimeError("空回复/截断重试逻辑出现未知错误")
--- a/src/llm_models/utils_model.py
+++ b/src/llm_models/utils_model.py