refactor(llm): 解决合并冲突并移除请求逻辑重构
最近为解耦LLM请求逻辑而进行的重构引入了严重的合并冲突。 此提交通过移除引入的 `RequestExecutor` 和 `RequestStrategy` 等新组件,并恢复到之前的代码结构,从而解决了这些冲突。这有助于稳定开发分支并为后续重新审视重构方案做准备。
This commit is contained in:
@@ -8,16 +8,6 @@ from rich.traceback import install
|
|||||||
from typing import Tuple, List, Dict, Optional, Callable, Any, Coroutine, Generator
|
from typing import Tuple, List, Dict, Optional, Callable, Any, Coroutine, Generator
|
||||||
|
|
||||||
from src.common.logger import get_logger
|
from src.common.logger import get_logger
|
||||||
<<<<<<< HEAD
|
|
||||||
from src.config.api_ada_configs import TaskConfig, ModelInfo, UsageRecord
|
|
||||||
from .llm_utils import build_tool_options, normalize_image_format
|
|
||||||
from .model_selector import ModelSelector
|
|
||||||
from .payload_content.message import MessageBuilder
|
|
||||||
from .payload_content.tool_option import ToolCall
|
|
||||||
from .prompt_processor import PromptProcessor
|
|
||||||
from .request_strategy import RequestStrategy
|
|
||||||
from .utils import llm_usage_recorder
|
|
||||||
=======
|
|
||||||
from src.config.config import model_config
|
from src.config.config import model_config
|
||||||
from src.config.api_ada_configs import APIProvider, ModelInfo, TaskConfig
|
from src.config.api_ada_configs import APIProvider, ModelInfo, TaskConfig
|
||||||
from .payload_content.message import MessageBuilder, Message
|
from .payload_content.message import MessageBuilder, Message
|
||||||
@@ -28,7 +18,6 @@ from .utils import compress_messages, llm_usage_recorder
|
|||||||
from .exceptions import NetworkConnectionError, ReqAbortException, RespNotOkException, RespParseException
|
from .exceptions import NetworkConnectionError, ReqAbortException, RespNotOkException, RespParseException
|
||||||
|
|
||||||
install(extra_lines=3)
|
install(extra_lines=3)
|
||||||
>>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中)
|
|
||||||
|
|
||||||
logger = get_logger("model_utils")
|
logger = get_logger("model_utils")
|
||||||
|
|
||||||
@@ -185,34 +174,6 @@ class LLMRequest:
|
|||||||
max_tokens: Optional[int] = None,
|
max_tokens: Optional[int] = None,
|
||||||
) -> Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]:
|
) -> Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]:
|
||||||
"""
|
"""
|
||||||
<<<<<<< HEAD
|
|
||||||
为包含图像的多模态输入生成文本响应。
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt (str): 文本提示。
|
|
||||||
image_base64 (str): Base64编码的图像数据。
|
|
||||||
image_format (str): 图像格式 (例如, "png", "jpeg")。
|
|
||||||
temperature (Optional[float], optional): 控制生成文本的随机性。 Defaults to None.
|
|
||||||
max_tokens (Optional[int], optional): 生成响应的最大长度。 Defaults to None.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]:
|
|
||||||
- 清理后的响应内容。
|
|
||||||
- 一个元组,包含思考过程、模型名称和工具调用列表。
|
|
||||||
"""
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
# 步骤 1: 选择一个支持图像处理的模型
|
|
||||||
model_info, api_provider, client = self.model_selector.select_model()
|
|
||||||
|
|
||||||
# 步骤 2: 准备消息体
|
|
||||||
# 预处理文本提示
|
|
||||||
processed_prompt = self.prompt_processor.process_prompt(prompt, model_info, api_provider, self.task_name)
|
|
||||||
# 规范化图像格式
|
|
||||||
normalized_format = normalize_image_format(image_format)
|
|
||||||
|
|
||||||
# 使用MessageBuilder构建多模态消息
|
|
||||||
=======
|
|
||||||
为图像生成响应
|
为图像生成响应
|
||||||
Args:
|
Args:
|
||||||
prompt (str): 提示词
|
prompt (str): 提示词
|
||||||
@@ -229,7 +190,6 @@ class LLMRequest:
|
|||||||
model_info, api_provider, client = self._select_model()
|
model_info, api_provider, client = self._select_model()
|
||||||
|
|
||||||
# 请求体构建
|
# 请求体构建
|
||||||
>>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中)
|
|
||||||
message_builder = MessageBuilder()
|
message_builder = MessageBuilder()
|
||||||
message_builder.add_text_content(prompt)
|
message_builder.add_text_content(prompt)
|
||||||
message_builder.add_image_content(
|
message_builder.add_image_content(
|
||||||
@@ -239,16 +199,8 @@ class LLMRequest:
|
|||||||
)
|
)
|
||||||
messages = [message_builder.build()]
|
messages = [message_builder.build()]
|
||||||
|
|
||||||
<<<<<<< HEAD
|
|
||||||
# 步骤 3: 执行请求 (图像请求通常不走复杂的故障转移策略,直接执行)
|
|
||||||
from .request_executor import RequestExecutor
|
|
||||||
executor = RequestExecutor(
|
|
||||||
task_name=self.task_name,
|
|
||||||
model_set=self.model_for_task,
|
|
||||||
=======
|
|
||||||
# 请求并处理返回值
|
# 请求并处理返回值
|
||||||
response = await self._execute_request(
|
response = await self._execute_request(
|
||||||
>>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中)
|
|
||||||
api_provider=api_provider,
|
api_provider=api_provider,
|
||||||
client=client,
|
client=client,
|
||||||
request_type=RequestType.RESPONSE,
|
request_type=RequestType.RESPONSE,
|
||||||
@@ -257,14 +209,6 @@ class LLMRequest:
|
|||||||
temperature=temperature,
|
temperature=temperature,
|
||||||
max_tokens=max_tokens,
|
max_tokens=max_tokens,
|
||||||
)
|
)
|
||||||
<<<<<<< HEAD
|
|
||||||
|
|
||||||
# 步骤 4: 处理响应
|
|
||||||
content, reasoning_content = self.prompt_processor.extract_reasoning(response.content or "")
|
|
||||||
tool_calls = response.tool_calls
|
|
||||||
|
|
||||||
# 记录用量
|
|
||||||
=======
|
|
||||||
content = response.content or ""
|
content = response.content or ""
|
||||||
reasoning_content = response.reasoning_content or ""
|
reasoning_content = response.reasoning_content or ""
|
||||||
tool_calls = response.tool_calls
|
tool_calls = response.tool_calls
|
||||||
@@ -272,7 +216,6 @@ class LLMRequest:
|
|||||||
if not reasoning_content and content:
|
if not reasoning_content and content:
|
||||||
content, extracted_reasoning = self._extract_reasoning(content)
|
content, extracted_reasoning = self._extract_reasoning(content)
|
||||||
reasoning_content = extracted_reasoning
|
reasoning_content = extracted_reasoning
|
||||||
>>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中)
|
|
||||||
if usage := response.usage:
|
if usage := response.usage:
|
||||||
await llm_usage_recorder.record_usage_to_database(
|
await llm_usage_recorder.record_usage_to_database(
|
||||||
model_info=model_info,
|
model_info=model_info,
|
||||||
@@ -286,24 +229,6 @@ class LLMRequest:
|
|||||||
|
|
||||||
async def generate_response_for_voice(self, voice_base64: str) -> Optional[str]:
|
async def generate_response_for_voice(self, voice_base64: str) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
<<<<<<< HEAD
|
|
||||||
将语音数据转换为文本(语音识别)。
|
|
||||||
|
|
||||||
Args:
|
|
||||||
voice_base64 (str): Base64编码的语音数据。
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Optional[str]: 识别出的文本内容,如果失败则返回None。
|
|
||||||
"""
|
|
||||||
# 选择一个支持语音识别的模型
|
|
||||||
model_info, api_provider, client = self.model_selector.select_model()
|
|
||||||
|
|
||||||
from .request_executor import RequestExecutor
|
|
||||||
# 创建请求执行器
|
|
||||||
executor = RequestExecutor(
|
|
||||||
task_name=self.task_name,
|
|
||||||
model_set=self.model_for_task,
|
|
||||||
=======
|
|
||||||
为语音生成响应
|
为语音生成响应
|
||||||
Args:
|
Args:
|
||||||
voice_base64 (str): 语音的Base64编码字符串
|
voice_base64 (str): 语音的Base64编码字符串
|
||||||
@@ -315,19 +240,10 @@ class LLMRequest:
|
|||||||
|
|
||||||
# 请求并处理返回值
|
# 请求并处理返回值
|
||||||
response = await self._execute_request(
|
response = await self._execute_request(
|
||||||
>>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中)
|
|
||||||
api_provider=api_provider,
|
api_provider=api_provider,
|
||||||
client=client,
|
client=client,
|
||||||
request_type=RequestType.AUDIO,
|
request_type=RequestType.AUDIO,
|
||||||
model_info=model_info,
|
model_info=model_info,
|
||||||
<<<<<<< HEAD
|
|
||||||
model_selector=self.model_selector,
|
|
||||||
)
|
|
||||||
# 执行语音转文本请求
|
|
||||||
response = await executor.execute_request(
|
|
||||||
request_type="audio",
|
|
||||||
=======
|
|
||||||
>>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中)
|
|
||||||
audio_base64=voice_base64,
|
audio_base64=voice_base64,
|
||||||
)
|
)
|
||||||
return response.content or None
|
return response.content or None
|
||||||
@@ -341,35 +257,6 @@ class LLMRequest:
|
|||||||
raise_when_empty: bool = True,
|
raise_when_empty: bool = True,
|
||||||
) -> Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]:
|
) -> Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]:
|
||||||
"""
|
"""
|
||||||
<<<<<<< HEAD
|
|
||||||
异步生成文本响应,支持并发和故障转移等高级策略。
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt (str): 用户输入的提示。
|
|
||||||
temperature (Optional[float], optional): 控制生成文本的随机性。 Defaults to None.
|
|
||||||
max_tokens (Optional[int], optional): 生成响应的最大长度。 Defaults to None.
|
|
||||||
tools (Optional[List[Dict[str, Any]]], optional): 可供模型调用的工具列表。 Defaults to None.
|
|
||||||
raise_when_empty (bool, optional): 如果最终响应为空,是否抛出异常。 Defaults to True.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]:
|
|
||||||
- 清理后的响应内容。
|
|
||||||
- 一个元组,包含思考过程、最终使用的模型名称和工具调用列表。
|
|
||||||
"""
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
# 步骤 1: 准备基础请求载荷
|
|
||||||
tool_built = build_tool_options(tools)
|
|
||||||
base_payload = {
|
|
||||||
"prompt": prompt,
|
|
||||||
"tool_options": tool_built,
|
|
||||||
"temperature": temperature,
|
|
||||||
"max_tokens": max_tokens,
|
|
||||||
"prompt_processor": self.prompt_processor,
|
|
||||||
}
|
|
||||||
|
|
||||||
# 步骤 2: 根据配置选择执行策略 (并发或单次带故障转移)
|
|
||||||
=======
|
|
||||||
异步生成响应,支持并发请求
|
异步生成响应,支持并发请求
|
||||||
Args:
|
Args:
|
||||||
prompt (str): 提示词
|
prompt (str): 提示词
|
||||||
@@ -381,7 +268,6 @@ class LLMRequest:
|
|||||||
(Tuple[str, str, str, Optional[List[ToolCall]]]): 响应内容、推理内容、模型名称、工具调用列表
|
(Tuple[str, str, str, Optional[List[ToolCall]]]): 响应内容、推理内容、模型名称、工具调用列表
|
||||||
"""
|
"""
|
||||||
# 检查是否需要并发请求
|
# 检查是否需要并发请求
|
||||||
>>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中)
|
|
||||||
concurrency_count = getattr(self.model_for_task, "concurrency_count", 1)
|
concurrency_count = getattr(self.model_for_task, "concurrency_count", 1)
|
||||||
|
|
||||||
if concurrency_count <= 1:
|
if concurrency_count <= 1:
|
||||||
@@ -395,49 +281,6 @@ class LLMRequest:
|
|||||||
content, (reasoning_content, model_name, tool_calls) = await execute_concurrently(
|
content, (reasoning_content, model_name, tool_calls) = await execute_concurrently(
|
||||||
self._execute_single_request,
|
self._execute_single_request,
|
||||||
concurrency_count,
|
concurrency_count,
|
||||||
<<<<<<< HEAD
|
|
||||||
base_payload,
|
|
||||||
raise_when_empty=False, # 在并发模式下,单个任务失败不应立即抛出异常
|
|
||||||
)
|
|
||||||
|
|
||||||
# 步骤 3: 处理最终结果
|
|
||||||
content = result.get("content", "")
|
|
||||||
reasoning_content = result.get("reasoning_content", "")
|
|
||||||
model_name = result.get("model_name", "unknown")
|
|
||||||
tool_calls = result.get("tool_calls")
|
|
||||||
|
|
||||||
# 步骤 4: 记录用量 (从策略返回的结果中获取最终使用的模型信息和用量)
|
|
||||||
final_model_info = result.get("model_info")
|
|
||||||
usage = result.get("usage")
|
|
||||||
|
|
||||||
if final_model_info and usage:
|
|
||||||
await self._record_usage(final_model_info, usage, time.time() - start_time)
|
|
||||||
|
|
||||||
return content, (reasoning_content, model_name, tool_calls)
|
|
||||||
|
|
||||||
async def get_embedding(self, embedding_input: str) -> Tuple[List[float], str]:
|
|
||||||
"""
|
|
||||||
获取给定文本的嵌入向量 (Embedding)。
|
|
||||||
|
|
||||||
Args:
|
|
||||||
embedding_input (str): 需要进行嵌入的文本。
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Tuple[List[float], str]: 嵌入向量列表和所使用的模型名称。
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
RuntimeError: 如果获取embedding失败。
|
|
||||||
"""
|
|
||||||
start_time = time.time()
|
|
||||||
# 选择一个支持embedding的模型
|
|
||||||
model_info, api_provider, client = self.model_selector.select_model()
|
|
||||||
|
|
||||||
from .request_executor import RequestExecutor
|
|
||||||
# 创建请求执行器
|
|
||||||
executor = RequestExecutor(
|
|
||||||
task_name=self.task_name,
|
|
||||||
model_set=self.model_for_task,
|
|
||||||
=======
|
|
||||||
prompt,
|
prompt,
|
||||||
temperature,
|
temperature,
|
||||||
max_tokens,
|
max_tokens,
|
||||||
@@ -595,19 +438,10 @@ class LLMRequest:
|
|||||||
|
|
||||||
# 请求并处理返回值
|
# 请求并处理返回值
|
||||||
response = await self._execute_request(
|
response = await self._execute_request(
|
||||||
>>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中)
|
|
||||||
api_provider=api_provider,
|
api_provider=api_provider,
|
||||||
client=client,
|
client=client,
|
||||||
request_type=RequestType.EMBEDDING,
|
request_type=RequestType.EMBEDDING,
|
||||||
model_info=model_info,
|
model_info=model_info,
|
||||||
<<<<<<< HEAD
|
|
||||||
model_selector=self.model_selector,
|
|
||||||
)
|
|
||||||
# 执行embedding请求
|
|
||||||
response = await executor.execute_request(
|
|
||||||
request_type="embedding",
|
|
||||||
=======
|
|
||||||
>>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中)
|
|
||||||
embedding_input=embedding_input,
|
embedding_input=embedding_input,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -625,32 +459,6 @@ class LLMRequest:
|
|||||||
|
|
||||||
if not embedding:
|
if not embedding:
|
||||||
raise RuntimeError("获取embedding失败")
|
raise RuntimeError("获取embedding失败")
|
||||||
<<<<<<< HEAD
|
|
||||||
|
|
||||||
# 记录用量
|
|
||||||
if usage := response.usage:
|
|
||||||
await self._record_usage(model_info, usage, time.time() - start_time, "/embeddings")
|
|
||||||
|
|
||||||
return embedding, model_info.name
|
|
||||||
|
|
||||||
async def _record_usage(self, model_info: ModelInfo, usage: UsageRecord, time_cost: float, endpoint: str = "/chat/completions"):
|
|
||||||
"""
|
|
||||||
记录模型API的调用用量到数据库。
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model_info (ModelInfo): 使用的模型信息。
|
|
||||||
usage (UsageRecord): 包含token用量信息的对象。
|
|
||||||
time_cost (float): 本次请求的总耗时(秒)。
|
|
||||||
endpoint (str, optional): 请求的API端点。 Defaults to "/chat/completions".
|
|
||||||
"""
|
|
||||||
await llm_usage_recorder.record_usage_to_database(
|
|
||||||
model_info=model_info,
|
|
||||||
model_usage=usage,
|
|
||||||
user_id="system", # 当前所有请求都以系统用户身份记录
|
|
||||||
time_cost=time_cost,
|
|
||||||
request_type=self.request_type,
|
|
||||||
endpoint=endpoint,
|
|
||||||
=======
|
|
||||||
|
|
||||||
return embedding, model_info.name
|
return embedding, model_info.name
|
||||||
|
|
||||||
@@ -680,7 +488,6 @@ class LLMRequest:
|
|||||||
least_used_model_name = min(
|
least_used_model_name = min(
|
||||||
candidate_models_usage,
|
candidate_models_usage,
|
||||||
key=lambda k: candidate_models_usage[k][0] + candidate_models_usage[k][1] * 300 + candidate_models_usage[k][2] * 1000,
|
key=lambda k: candidate_models_usage[k][0] + candidate_models_usage[k][1] * 300 + candidate_models_usage[k][2] * 1000,
|
||||||
>>>>>>> parent of 253946f (refactor(llm): 将LLM请求逻辑解耦到专门的组件中)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# --- 动态故障转移的核心逻辑 ---
|
# --- 动态故障转移的核心逻辑 ---
|
||||||
|
|||||||
Reference in New Issue
Block a user