From 3207b778c371ec9c60ca2fe040b35c0dc6aa10b8 Mon Sep 17 00:00:00 2001 From: minecraft1024a Date: Fri, 26 Sep 2025 20:26:19 +0800 Subject: [PATCH] =?UTF-8?q?refactor(llm):=20=E8=A7=A3=E5=86=B3=E5=90=88?= =?UTF-8?q?=E5=B9=B6=E5=86=B2=E7=AA=81=E5=B9=B6=E7=A7=BB=E9=99=A4=E8=AF=B7?= =?UTF-8?q?=E6=B1=82=E9=80=BB=E8=BE=91=E9=87=8D=E6=9E=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 最近为解耦LLM请求逻辑而进行的重构引入了严重的合并冲突。 此提交通过移除引入的 `RequestExecutor` 和 `RequestStrategy` 等新组件,并恢复到之前的代码结构,从而解决了这些冲突。这有助于稳定开发分支并为后续重新审视重构方案做准备。 --- src/llm_models/utils_model.py | 193 ---------------------------------- 1 file changed, 193 deletions(-) diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py index ac318fbe4..c39ab8af9 100644 --- a/src/llm_models/utils_model.py +++ b/src/llm_models/utils_model.py @@ -8,16 +8,6 @@ from rich.traceback import install from typing import Tuple, List, Dict, Optional, Callable, Any, Coroutine, Generator from src.common.logger import get_logger -<<<<<<< HEAD -from src.config.api_ada_configs import TaskConfig, ModelInfo, UsageRecord -from .llm_utils import build_tool_options, normalize_image_format -from .model_selector import ModelSelector -from .payload_content.message import MessageBuilder -from .payload_content.tool_option import ToolCall -from .prompt_processor import PromptProcessor -from .request_strategy import RequestStrategy -from .utils import llm_usage_recorder -======= from src.config.config import model_config from src.config.api_ada_configs import APIProvider, ModelInfo, TaskConfig from .payload_content.message import MessageBuilder, Message @@ -28,7 +18,6 @@ from .utils import compress_messages, llm_usage_recorder from .exceptions import NetworkConnectionError, ReqAbortException, RespNotOkException, RespParseException install(extra_lines=3) ->>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中) logger = get_logger("model_utils") @@ -185,34 +174,6 @@ class LLMRequest: max_tokens: Optional[int] = None, ) -> Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]: """ -<<<<<<< HEAD - 为包含图像的多模态输入生成文本响应。 - - Args: - prompt (str): 文本提示。 - image_base64 (str): Base64编码的图像数据。 - image_format (str): 图像格式 (例如, "png", "jpeg")。 - temperature (Optional[float], optional): 控制生成文本的随机性。 Defaults to None. - max_tokens (Optional[int], optional): 生成响应的最大长度。 Defaults to None. - - Returns: - Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]: - - 清理后的响应内容。 - - 一个元组,包含思考过程、模型名称和工具调用列表。 - """ - start_time = time.time() - - # 步骤 1: 选择一个支持图像处理的模型 - model_info, api_provider, client = self.model_selector.select_model() - - # 步骤 2: 准备消息体 - # 预处理文本提示 - processed_prompt = self.prompt_processor.process_prompt(prompt, model_info, api_provider, self.task_name) - # 规范化图像格式 - normalized_format = normalize_image_format(image_format) - - # 使用MessageBuilder构建多模态消息 -======= 为图像生成响应 Args: prompt (str): 提示词 @@ -229,7 +190,6 @@ class LLMRequest: model_info, api_provider, client = self._select_model() # 请求体构建 ->>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中) message_builder = MessageBuilder() message_builder.add_text_content(prompt) message_builder.add_image_content( @@ -239,16 +199,8 @@ class LLMRequest: ) messages = [message_builder.build()] -<<<<<<< HEAD - # 步骤 3: 执行请求 (图像请求通常不走复杂的故障转移策略,直接执行) - from .request_executor import RequestExecutor - executor = RequestExecutor( - task_name=self.task_name, - model_set=self.model_for_task, -======= # 请求并处理返回值 response = await self._execute_request( ->>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中) api_provider=api_provider, client=client, request_type=RequestType.RESPONSE, @@ -257,14 +209,6 @@ class LLMRequest: temperature=temperature, max_tokens=max_tokens, ) -<<<<<<< HEAD - - # 步骤 4: 处理响应 - content, reasoning_content = self.prompt_processor.extract_reasoning(response.content or "") - tool_calls = response.tool_calls - - # 记录用量 -======= content = response.content or "" reasoning_content = response.reasoning_content or "" tool_calls = response.tool_calls @@ -272,7 +216,6 @@ class LLMRequest: if not reasoning_content and content: content, extracted_reasoning = self._extract_reasoning(content) reasoning_content = extracted_reasoning ->>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中) if usage := response.usage: await llm_usage_recorder.record_usage_to_database( model_info=model_info, @@ -286,24 +229,6 @@ class LLMRequest: async def generate_response_for_voice(self, voice_base64: str) -> Optional[str]: """ -<<<<<<< HEAD - 将语音数据转换为文本(语音识别)。 - - Args: - voice_base64 (str): Base64编码的语音数据。 - - Returns: - Optional[str]: 识别出的文本内容,如果失败则返回None。 - """ - # 选择一个支持语音识别的模型 - model_info, api_provider, client = self.model_selector.select_model() - - from .request_executor import RequestExecutor - # 创建请求执行器 - executor = RequestExecutor( - task_name=self.task_name, - model_set=self.model_for_task, -======= 为语音生成响应 Args: voice_base64 (str): 语音的Base64编码字符串 @@ -315,19 +240,10 @@ class LLMRequest: # 请求并处理返回值 response = await self._execute_request( ->>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中) api_provider=api_provider, client=client, request_type=RequestType.AUDIO, model_info=model_info, -<<<<<<< HEAD - model_selector=self.model_selector, - ) - # 执行语音转文本请求 - response = await executor.execute_request( - request_type="audio", -======= ->>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中) audio_base64=voice_base64, ) return response.content or None @@ -341,35 +257,6 @@ class LLMRequest: raise_when_empty: bool = True, ) -> Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]: """ -<<<<<<< HEAD - 异步生成文本响应,支持并发和故障转移等高级策略。 - - Args: - prompt (str): 用户输入的提示。 - temperature (Optional[float], optional): 控制生成文本的随机性。 Defaults to None. - max_tokens (Optional[int], optional): 生成响应的最大长度。 Defaults to None. - tools (Optional[List[Dict[str, Any]]], optional): 可供模型调用的工具列表。 Defaults to None. - raise_when_empty (bool, optional): 如果最终响应为空,是否抛出异常。 Defaults to True. - - Returns: - Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]: - - 清理后的响应内容。 - - 一个元组,包含思考过程、最终使用的模型名称和工具调用列表。 - """ - start_time = time.time() - - # 步骤 1: 准备基础请求载荷 - tool_built = build_tool_options(tools) - base_payload = { - "prompt": prompt, - "tool_options": tool_built, - "temperature": temperature, - "max_tokens": max_tokens, - "prompt_processor": self.prompt_processor, - } - - # 步骤 2: 根据配置选择执行策略 (并发或单次带故障转移) -======= 异步生成响应,支持并发请求 Args: prompt (str): 提示词 @@ -381,7 +268,6 @@ class LLMRequest: (Tuple[str, str, str, Optional[List[ToolCall]]]): 响应内容、推理内容、模型名称、工具调用列表 """ # 检查是否需要并发请求 ->>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中) concurrency_count = getattr(self.model_for_task, "concurrency_count", 1) if concurrency_count <= 1: @@ -395,49 +281,6 @@ class LLMRequest: content, (reasoning_content, model_name, tool_calls) = await execute_concurrently( self._execute_single_request, concurrency_count, -<<<<<<< HEAD - base_payload, - raise_when_empty=False, # 在并发模式下,单个任务失败不应立即抛出异常 - ) - - # 步骤 3: 处理最终结果 - content = result.get("content", "") - reasoning_content = result.get("reasoning_content", "") - model_name = result.get("model_name", "unknown") - tool_calls = result.get("tool_calls") - - # 步骤 4: 记录用量 (从策略返回的结果中获取最终使用的模型信息和用量) - final_model_info = result.get("model_info") - usage = result.get("usage") - - if final_model_info and usage: - await self._record_usage(final_model_info, usage, time.time() - start_time) - - return content, (reasoning_content, model_name, tool_calls) - - async def get_embedding(self, embedding_input: str) -> Tuple[List[float], str]: - """ - 获取给定文本的嵌入向量 (Embedding)。 - - Args: - embedding_input (str): 需要进行嵌入的文本。 - - Returns: - Tuple[List[float], str]: 嵌入向量列表和所使用的模型名称。 - - Raises: - RuntimeError: 如果获取embedding失败。 - """ - start_time = time.time() - # 选择一个支持embedding的模型 - model_info, api_provider, client = self.model_selector.select_model() - - from .request_executor import RequestExecutor - # 创建请求执行器 - executor = RequestExecutor( - task_name=self.task_name, - model_set=self.model_for_task, -======= prompt, temperature, max_tokens, @@ -595,19 +438,10 @@ class LLMRequest: # 请求并处理返回值 response = await self._execute_request( ->>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中) api_provider=api_provider, client=client, request_type=RequestType.EMBEDDING, model_info=model_info, -<<<<<<< HEAD - model_selector=self.model_selector, - ) - # 执行embedding请求 - response = await executor.execute_request( - request_type="embedding", -======= ->>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中) embedding_input=embedding_input, ) @@ -625,32 +459,6 @@ class LLMRequest: if not embedding: raise RuntimeError("获取embedding失败") -<<<<<<< HEAD - - # 记录用量 - if usage := response.usage: - await self._record_usage(model_info, usage, time.time() - start_time, "/embeddings") - - return embedding, model_info.name - - async def _record_usage(self, model_info: ModelInfo, usage: UsageRecord, time_cost: float, endpoint: str = "/chat/completions"): - """ - 记录模型API的调用用量到数据库。 - - Args: - model_info (ModelInfo): 使用的模型信息。 - usage (UsageRecord): 包含token用量信息的对象。 - time_cost (float): 本次请求的总耗时(秒)。 - endpoint (str, optional): 请求的API端点。 Defaults to "/chat/completions". - """ - await llm_usage_recorder.record_usage_to_database( - model_info=model_info, - model_usage=usage, - user_id="system", # 当前所有请求都以系统用户身份记录 - time_cost=time_cost, - request_type=self.request_type, - endpoint=endpoint, -======= return embedding, model_info.name @@ -680,7 +488,6 @@ class LLMRequest: least_used_model_name = min( candidate_models_usage, key=lambda k: candidate_models_usage[k][0] + candidate_models_usage[k][1] * 300 + candidate_models_usage[k][2] * 1000, ->>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中) ) # --- 动态故障转移的核心逻辑 ---