From 3207b778c371ec9c60ca2fe040b35c0dc6aa10b8 Mon Sep 17 00:00:00 2001
From: minecraft1024a <wwwww95915@qq.com>
Date: Fri, 26 Sep 2025 20:26:19 +0800
Subject: [PATCH] =?UTF-8?q?refactor(llm):=20=E8=A7=A3=E5=86=B3=E5=90=88?=
 =?UTF-8?q?=E5=B9=B6=E5=86=B2=E7=AA=81=E5=B9=B6=E7=A7=BB=E9=99=A4=E8=AF=B7?=
 =?UTF-8?q?=E6=B1=82=E9=80=BB=E8=BE=91=E9=87=8D=E6=9E=84?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

最近为解耦LLM请求逻辑而进行的重构引入了严重的合并冲突。

此提交通过移除引入的 `RequestExecutor` 和 `RequestStrategy` 等新组件，并恢复到之前的代码结构，从而解决了这些冲突。这有助于稳定开发分支并为后续重新审视重构方案做准备。
---
 src/llm_models/utils_model.py | 193 ----------------------------------
 1 file changed, 193 deletions(-)

diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py
index ac318fbe4..c39ab8af9 100644
--- a/src/llm_models/utils_model.py
+++ b/src/llm_models/utils_model.py
@@ -8,16 +8,6 @@ from rich.traceback import install
 from typing import Tuple, List, Dict, Optional, Callable, Any, Coroutine, Generator
 
 from src.common.logger import get_logger
-<<<<<<< HEAD
-from src.config.api_ada_configs import TaskConfig, ModelInfo, UsageRecord
-from .llm_utils import build_tool_options, normalize_image_format
-from .model_selector import ModelSelector
-from .payload_content.message import MessageBuilder
-from .payload_content.tool_option import ToolCall
-from .prompt_processor import PromptProcessor
-from .request_strategy import RequestStrategy
-from .utils import llm_usage_recorder
-=======
 from src.config.config import model_config
 from src.config.api_ada_configs import APIProvider, ModelInfo, TaskConfig
 from .payload_content.message import MessageBuilder, Message
@@ -28,7 +18,6 @@ from .utils import compress_messages, llm_usage_recorder
 from .exceptions import NetworkConnectionError, ReqAbortException, RespNotOkException, RespParseException
 
 install(extra_lines=3)
->>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中)
 
 logger = get_logger("model_utils")
 
@@ -185,34 +174,6 @@ class LLMRequest:
         max_tokens: Optional[int] = None,
     ) -> Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]:
         """
-<<<<<<< HEAD
-        为包含图像的多模态输入生成文本响应。
-
-        Args:
-            prompt (str): 文本提示。
-            image_base64 (str): Base64编码的图像数据。
-            image_format (str): 图像格式 (例如, "png", "jpeg")。
-            temperature (Optional[float], optional): 控制生成文本的随机性。 Defaults to None.
-            max_tokens (Optional[int], optional): 生成响应的最大长度。 Defaults to None.
-
-        Returns:
-            Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]:
-                - 清理后的响应内容。
-                - 一个元组，包含思考过程、模型名称和工具调用列表。
-        """
-        start_time = time.time()
-        
-        # 步骤 1: 选择一个支持图像处理的模型
-        model_info, api_provider, client = self.model_selector.select_model()
-        
-        # 步骤 2: 准备消息体
-        # 预处理文本提示
-        processed_prompt = self.prompt_processor.process_prompt(prompt, model_info, api_provider, self.task_name)
-        # 规范化图像格式
-        normalized_format = normalize_image_format(image_format)
-        
-        # 使用MessageBuilder构建多模态消息
-=======
         为图像生成响应
         Args:
             prompt (str): 提示词
@@ -229,7 +190,6 @@ class LLMRequest:
         model_info, api_provider, client = self._select_model()
 
         # 请求体构建
->>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中)
         message_builder = MessageBuilder()
         message_builder.add_text_content(prompt)
         message_builder.add_image_content(
@@ -239,16 +199,8 @@ class LLMRequest:
         )
         messages = [message_builder.build()]
 
-<<<<<<< HEAD
-        # 步骤 3: 执行请求 (图像请求通常不走复杂的故障转移策略，直接执行)
-        from .request_executor import RequestExecutor
-        executor = RequestExecutor(
-            task_name=self.task_name,
-            model_set=self.model_for_task,
-=======
         # 请求并处理返回值
         response = await self._execute_request(
->>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中)
             api_provider=api_provider,
             client=client,
             request_type=RequestType.RESPONSE,
@@ -257,14 +209,6 @@ class LLMRequest:
             temperature=temperature,
             max_tokens=max_tokens,
         )
-<<<<<<< HEAD
-
-        # 步骤 4: 处理响应
-        content, reasoning_content = self.prompt_processor.extract_reasoning(response.content or "")
-        tool_calls = response.tool_calls
-        
-        # 记录用量
-=======
         content = response.content or ""
         reasoning_content = response.reasoning_content or ""
         tool_calls = response.tool_calls
@@ -272,7 +216,6 @@ class LLMRequest:
         if not reasoning_content and content:
             content, extracted_reasoning = self._extract_reasoning(content)
             reasoning_content = extracted_reasoning
->>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中)
         if usage := response.usage:
             await llm_usage_recorder.record_usage_to_database(
                 model_info=model_info,
@@ -286,24 +229,6 @@ class LLMRequest:
 
     async def generate_response_for_voice(self, voice_base64: str) -> Optional[str]:
         """
-<<<<<<< HEAD
-        将语音数据转换为文本（语音识别）。
-
-        Args:
-            voice_base64 (str): Base64编码的语音数据。
-
-        Returns:
-            Optional[str]: 识别出的文本内容，如果失败则返回None。
-        """
-        # 选择一个支持语音识别的模型
-        model_info, api_provider, client = self.model_selector.select_model()
-        
-        from .request_executor import RequestExecutor
-        # 创建请求执行器
-        executor = RequestExecutor(
-            task_name=self.task_name,
-            model_set=self.model_for_task,
-=======
         为语音生成响应
         Args:
             voice_base64 (str): 语音的Base64编码字符串
@@ -315,19 +240,10 @@ class LLMRequest:
 
         # 请求并处理返回值
         response = await self._execute_request(
->>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中)
             api_provider=api_provider,
             client=client,
             request_type=RequestType.AUDIO,
             model_info=model_info,
-<<<<<<< HEAD
-            model_selector=self.model_selector,
-        )
-        # 执行语音转文本请求
-        response = await executor.execute_request(
-            request_type="audio",
-=======
->>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中)
             audio_base64=voice_base64,
         )
         return response.content or None
@@ -341,35 +257,6 @@ class LLMRequest:
         raise_when_empty: bool = True,
     ) -> Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]:
         """
-<<<<<<< HEAD
-        异步生成文本响应，支持并发和故障转移等高级策略。
-
-        Args:
-            prompt (str): 用户输入的提示。
-            temperature (Optional[float], optional): 控制生成文本的随机性。 Defaults to None.
-            max_tokens (Optional[int], optional): 生成响应的最大长度。 Defaults to None.
-            tools (Optional[List[Dict[str, Any]]], optional): 可供模型调用的工具列表。 Defaults to None.
-            raise_when_empty (bool, optional): 如果最终响应为空，是否抛出异常。 Defaults to True.
-
-        Returns:
-            Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]:
-                - 清理后的响应内容。
-                - 一个元组，包含思考过程、最终使用的模型名称和工具调用列表。
-        """
-        start_time = time.time()
-        
-        # 步骤 1: 准备基础请求载荷
-        tool_built = build_tool_options(tools)
-        base_payload = {
-            "prompt": prompt,
-            "tool_options": tool_built,
-            "temperature": temperature,
-            "max_tokens": max_tokens,
-            "prompt_processor": self.prompt_processor,
-        }
-        
-        # 步骤 2: 根据配置选择执行策略 (并发或单次带故障转移)
-=======
         异步生成响应，支持并发请求
         Args:
             prompt (str): 提示词
@@ -381,7 +268,6 @@ class LLMRequest:
             (Tuple[str, str, str, Optional[List[ToolCall]]]): 响应内容、推理内容、模型名称、工具调用列表
         """
         # 检查是否需要并发请求
->>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中)
         concurrency_count = getattr(self.model_for_task, "concurrency_count", 1)
 
         if concurrency_count <= 1:
@@ -395,49 +281,6 @@ class LLMRequest:
             content, (reasoning_content, model_name, tool_calls) = await execute_concurrently(
                 self._execute_single_request,
                 concurrency_count,
-<<<<<<< HEAD
-                base_payload,
-                raise_when_empty=False, # 在并发模式下，单个任务失败不应立即抛出异常
-            )
-        
-        # 步骤 3: 处理最终结果
-        content = result.get("content", "")
-        reasoning_content = result.get("reasoning_content", "")
-        model_name = result.get("model_name", "unknown")
-        tool_calls = result.get("tool_calls")
-        
-        # 步骤 4: 记录用量 (从策略返回的结果中获取最终使用的模型信息和用量)
-        final_model_info = result.get("model_info")
-        usage = result.get("usage")
-
-        if final_model_info and usage:
-            await self._record_usage(final_model_info, usage, time.time() - start_time)
-        
-        return content, (reasoning_content, model_name, tool_calls)
-
-    async def get_embedding(self, embedding_input: str) -> Tuple[List[float], str]:
-        """
-        获取给定文本的嵌入向量 (Embedding)。
-
-        Args:
-            embedding_input (str): 需要进行嵌入的文本。
-
-        Returns:
-            Tuple[List[float], str]: 嵌入向量列表和所使用的模型名称。
-        
-        Raises:
-            RuntimeError: 如果获取embedding失败。
-        """
-        start_time = time.time()
-        # 选择一个支持embedding的模型
-        model_info, api_provider, client = self.model_selector.select_model()
-        
-        from .request_executor import RequestExecutor
-        # 创建请求执行器
-        executor = RequestExecutor(
-            task_name=self.task_name,
-            model_set=self.model_for_task,
-=======
                 prompt,
                 temperature,
                 max_tokens,
@@ -595,19 +438,10 @@ class LLMRequest:
 
         # 请求并处理返回值
         response = await self._execute_request(
->>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中)
             api_provider=api_provider,
             client=client,
             request_type=RequestType.EMBEDDING,
             model_info=model_info,
-<<<<<<< HEAD
-            model_selector=self.model_selector,
-        )
-        # 执行embedding请求
-        response = await executor.execute_request(
-            request_type="embedding",
-=======
->>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中)
             embedding_input=embedding_input,
         )
 
@@ -625,32 +459,6 @@ class LLMRequest:
 
         if not embedding:
             raise RuntimeError("获取embedding失败")
-<<<<<<< HEAD
-            
-        # 记录用量
-        if usage := response.usage:
-            await self._record_usage(model_info, usage, time.time() - start_time, "/embeddings")
-            
-        return embedding, model_info.name
-
-    async def _record_usage(self, model_info: ModelInfo, usage: UsageRecord, time_cost: float, endpoint: str = "/chat/completions"):
-        """
-        记录模型API的调用用量到数据库。
-
-        Args:
-            model_info (ModelInfo): 使用的模型信息。
-            usage (UsageRecord): 包含token用量信息的对象。
-            time_cost (float): 本次请求的总耗时（秒）。
-            endpoint (str, optional): 请求的API端点。 Defaults to "/chat/completions".
-        """
-        await llm_usage_recorder.record_usage_to_database(
-            model_info=model_info,
-            model_usage=usage,
-            user_id="system",  # 当前所有请求都以系统用户身份记录
-            time_cost=time_cost,
-            request_type=self.request_type,
-            endpoint=endpoint,
-=======
 
         return embedding, model_info.name
 
@@ -680,7 +488,6 @@ class LLMRequest:
         least_used_model_name = min(
             candidate_models_usage,
             key=lambda k: candidate_models_usage[k][0] + candidate_models_usage[k][1] * 300 + candidate_models_usage[k][2] * 1000,
->>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中)
         )
         
         # --- 动态故障转移的核心逻辑 ---