初始化

2025-08-11 19:34:18 +08:00
parent ff7d1177fa
commit 2d4745cd58
257 changed files with 69069 additions and 0 deletions
--- a/src/llm_models/LICENSE
+++ b/src/llm_models/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 Mai.To.The.Gate
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/src/llm_models/init.py
+++ b/src/llm_models/init.py
--- a/src/llm_models/exceptions.py
+++ b/src/llm_models/exceptions.py
@@ -0,0 +1,98 @@
+from typing import Any
+
+
+# 常见Error Code Mapping (以OpenAI API为例)
+error_code_mapping = {
+    400: "参数不正确",
+    401: "API-Key错误，认证失败，请检查/config/model_list.toml中的配置是否正确",
+    402: "账号余额不足",
+    403: "模型拒绝访问，可能需要实名或余额不足",
+    404: "Not Found",
+    413: "请求体过大，请尝试压缩图片或减少输入内容",
+    429: "请求过于频繁，请稍后再试",
+    500: "服务器内部故障",
+    503: "服务器负载过高",
+}
+
+
+class NetworkConnectionError(Exception):
+    """连接异常，常见于网络问题或服务器不可用"""
+
+    def __init__(self):
+        super().__init__()
+
+    def __str__(self):
+        return "连接异常，请检查网络连接状态或URL是否正确"
+
+
+class ReqAbortException(Exception):
+    """请求异常退出，常见于请求被中断或取消"""
+
+    def __init__(self, message: str | None = None):
+        super().__init__(message)
+        self.message = message
+
+    def __str__(self):
+        return self.message or "请求因未知原因异常终止"
+
+
+class RespNotOkException(Exception):
+    """请求响应异常，见于请求未能成功响应（非 '200 OK'）"""
+
+    def __init__(self, status_code: int, message: str | None = None):
+        super().__init__(message)
+        self.status_code = status_code
+        self.message = message
+
+    def __str__(self):
+        if self.status_code in error_code_mapping:
+            return error_code_mapping[self.status_code]
+        elif self.message:
+            return self.message
+        else:
+            return f"未知的异常响应代码：{self.status_code}"
+
+
+class RespParseException(Exception):
+    """响应解析错误，常见于响应格式不正确或解析方法不匹配"""
+
+    def __init__(self, ext_info: Any, message: str | None = None):
+        super().__init__(message)
+        self.ext_info = ext_info
+        self.message = message
+
+    def __str__(self):
+        return self.message or "解析响应内容时发生未知错误，请检查是否配置了正确的解析方法"
+
+
+class PayLoadTooLargeError(Exception):
+    """自定义异常类，用于处理请求体过大错误"""
+
+    def __init__(self, message: str):
+        super().__init__(message)
+        self.message = message
+
+    def __str__(self):
+        return "请求体过大，请尝试压缩图片或减少输入内容。"
+
+
+class RequestAbortException(Exception):
+    """自定义异常类，用于处理请求中断异常"""
+
+    def __init__(self, message: str):
+        super().__init__(message)
+        self.message = message
+
+    def __str__(self):
+        return self.message
+
+
+class PermissionDeniedException(Exception):
+    """自定义异常类，用于处理访问拒绝的异常"""
+
+    def __init__(self, message: str):
+        super().__init__(message)
+        self.message = message
+
+    def __str__(self):
+        return self.message
--- a/src/llm_models/model_client/init.py
+++ b/src/llm_models/model_client/init.py
@@ -0,0 +1,8 @@
+from src.config.config import model_config
+
+used_client_types = {provider.client_type for provider in model_config.api_providers}
+
+if "openai" in used_client_types:
+    from . import openai_client  # noqa: F401
+if "gemini" in used_client_types:
+    from . import gemini_client  # noqa: F401
--- a/src/llm_models/model_client/base_client.py
+++ b/src/llm_models/model_client/base_client.py
@@ -0,0 +1,178 @@
+import asyncio
+from dataclasses import dataclass
+from abc import ABC, abstractmethod
+from typing import Callable, Any, Optional
+
+from src.config.api_ada_configs import ModelInfo, APIProvider
+from ..payload_content.message import Message
+from ..payload_content.resp_format import RespFormat
+from ..payload_content.tool_option import ToolOption, ToolCall
+
+
+@dataclass
+class UsageRecord:
+    """
+    使用记录类
+    """
+
+    model_name: str
+    """模型名称"""
+
+    provider_name: str
+    """提供商名称"""
+
+    prompt_tokens: int
+    """提示token数"""
+
+    completion_tokens: int
+    """完成token数"""
+
+    total_tokens: int
+    """总token数"""
+
+
+@dataclass
+class APIResponse:
+    """
+    API响应类
+    """
+
+    content: str | None = None
+    """响应内容"""
+
+    reasoning_content: str | None = None
+    """推理内容"""
+
+    tool_calls: list[ToolCall] | None = None
+    """工具调用 [(工具名称, 工具参数), ...]"""
+
+    embedding: list[float] | None = None
+    """嵌入向量"""
+
+    usage: UsageRecord | None = None
+    """使用情况 (prompt_tokens, completion_tokens, total_tokens)"""
+
+    raw_data: Any = None
+    """响应原始数据"""
+
+
+class BaseClient(ABC):
+    """
+    基础客户端
+    """
+
+    api_provider: APIProvider
+
+    def __init__(self, api_provider: APIProvider):
+        self.api_provider = api_provider
+
+    @abstractmethod
+    async def get_response(
+        self,
+        model_info: ModelInfo,
+        message_list: list[Message],
+        tool_options: list[ToolOption] | None = None,
+        max_tokens: int = 1024,
+        temperature: float = 0.7,
+        response_format: RespFormat | None = None,
+        stream_response_handler: Optional[
+            Callable[[Any, asyncio.Event | None], tuple[APIResponse, tuple[int, int, int]]]
+        ] = None,
+        async_response_parser: Callable[[Any], tuple[APIResponse, tuple[int, int, int]]] | None = None,
+        interrupt_flag: asyncio.Event | None = None,
+        extra_params: dict[str, Any] | None = None,
+    ) -> APIResponse:
+        """
+        获取对话响应
+        :param model_info: 模型信息
+        :param message_list: 对话体
+        :param tool_options: 工具选项（可选，默认为None）
+        :param max_tokens: 最大token数（可选，默认为1024）
+        :param temperature: 温度（可选，默认为0.7）
+        :param response_format: 响应格式（可选，默认为 NotGiven ）
+        :param stream_response_handler: 流式响应处理函数（可选）
+        :param async_response_parser: 响应解析函数（可选）
+        :param interrupt_flag: 中断信号量（可选，默认为None）
+        :return: (响应文本, 推理文本, 工具调用, 其他数据)
+        """
+        raise NotImplementedError("'get_response' method should be overridden in subclasses")
+
+    @abstractmethod
+    async def get_embedding(
+        self,
+        model_info: ModelInfo,
+        embedding_input: str,
+        extra_params: dict[str, Any] | None = None,
+    ) -> APIResponse:
+        """
+        获取文本嵌入
+        :param model_info: 模型信息
+        :param embedding_input: 嵌入输入文本
+        :return: 嵌入响应
+        """
+        raise NotImplementedError("'get_embedding' method should be overridden in subclasses")
+
+    @abstractmethod
+    async def get_audio_transcriptions(
+        self,
+        model_info: ModelInfo,
+        audio_base64: str,
+        extra_params: dict[str, Any] | None = None,
+    ) -> APIResponse:
+        """
+        获取音频转录
+        :param model_info: 模型信息
+        :param audio_base64: base64编码的音频数据
+        :extra_params: 附加的请求参数
+        :return: 音频转录响应
+        """
+        raise NotImplementedError("'get_audio_transcriptions' method should be overridden in subclasses")
+
+    @abstractmethod
+    def get_support_image_formats(self) -> list[str]:
+        """
+        获取支持的图片格式
+        :return: 支持的图片格式列表
+        """
+        raise NotImplementedError("'get_support_image_formats' method should be overridden in subclasses")
+
+
+class ClientRegistry:
+    def __init__(self) -> None:
+        self.client_registry: dict[str, type[BaseClient]] = {}
+        """APIProvider.type -> BaseClient的映射表"""
+        self.client_instance_cache: dict[str, BaseClient] = {}
+        """APIProvider.name -> BaseClient的映射表"""
+
+    def register_client_class(self, client_type: str):
+        """
+        注册API客户端类
+        Args:
+            client_class: API客户端类
+        """
+
+        def decorator(cls: type[BaseClient]) -> type[BaseClient]:
+            if not issubclass(cls, BaseClient):
+                raise TypeError(f"{cls.__name__} is not a subclass of BaseClient")
+            self.client_registry[client_type] = cls
+            return cls
+
+        return decorator
+
+    def get_client_class_instance(self, api_provider: APIProvider) -> BaseClient:
+        """
+        获取注册的API客户端实例
+        Args:
+            api_provider: APIProvider实例
+        Returns:
+            BaseClient: 注册的API客户端实例
+        """
+        if api_provider.name not in self.client_instance_cache:
+            if client_class := self.client_registry.get(api_provider.client_type):
+                self.client_instance_cache[api_provider.name] = client_class(api_provider)
+            else:
+                raise KeyError(f"'{api_provider.client_type}' 类型的 Client 未注册")
+        return self.client_instance_cache[api_provider.name]
+
+
+client_registry = ClientRegistry()
--- a/src/llm_models/model_client/gemini_client.py
+++ b/src/llm_models/model_client/gemini_client.py
@@ -0,0 +1,560 @@
+import asyncio
+import io
+import base64
+from typing import Callable, AsyncIterator, Optional, Coroutine, Any, List
+
+from google import genai
+from google.genai.types import (
+    Content,
+    Part,
+    FunctionDeclaration,
+    GenerateContentResponse,
+    ContentListUnion,
+    ContentUnion,
+    ThinkingConfig,
+    Tool,
+    GenerateContentConfig,
+    EmbedContentResponse,
+    EmbedContentConfig,
+    SafetySetting,
+    HarmCategory,
+    HarmBlockThreshold,
+)
+from google.genai.errors import (
+    ClientError,
+    ServerError,
+    UnknownFunctionCallArgumentError,
+    UnsupportedFunctionError,
+    FunctionInvocationError,
+)
+
+from src.config.api_ada_configs import ModelInfo, APIProvider
+from src.common.logger import get_logger
+
+from .base_client import APIResponse, UsageRecord, BaseClient, client_registry
+from ..exceptions import (
+    RespParseException,
+    NetworkConnectionError,
+    RespNotOkException,
+    ReqAbortException,
+)
+from ..payload_content.message import Message, RoleType
+from ..payload_content.resp_format import RespFormat, RespFormatType
+from ..payload_content.tool_option import ToolOption, ToolParam, ToolCall
+
+logger = get_logger("Gemini客户端")
+
+gemini_safe_settings = [
+    SafetySetting(category=HarmCategory.HARM_CATEGORY_HATE_SPEECH, threshold=HarmBlockThreshold.BLOCK_NONE),
+    SafetySetting(category=HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, threshold=HarmBlockThreshold.BLOCK_NONE),
+    SafetySetting(category=HarmCategory.HARM_CATEGORY_HARASSMENT, threshold=HarmBlockThreshold.BLOCK_NONE),
+    SafetySetting(category=HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, threshold=HarmBlockThreshold.BLOCK_NONE),
+    SafetySetting(category=HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY, threshold=HarmBlockThreshold.BLOCK_NONE),
+]
+
+
+def _convert_messages(
+    messages: list[Message],
+) -> tuple[ContentListUnion, list[str] | None]:
+    """
+    转换消息格式 - 将消息转换为Gemini API所需的格式
+    :param messages: 消息列表
+    :return: 转换后的消息列表(和可能存在的system消息)
+    """
+
+    def _convert_message_item(message: Message) -> Content:
+        """
+        转换单个消息格式，除了system和tool类型的消息
+        :param message: 消息对象
+        :return: 转换后的消息字典
+        """
+
+        # 将openai格式的角色重命名为gemini格式的角色
+        if message.role == RoleType.Assistant:
+            role = "model"
+        elif message.role == RoleType.User:
+            role = "user"
+
+        # 添加Content
+        if isinstance(message.content, str):
+            content = [Part.from_text(text=message.content)]
+        elif isinstance(message.content, list):
+            content: List[Part] = []
+            for item in message.content:
+                if isinstance(item, tuple):
+                    content.append(
+                        Part.from_bytes(data=base64.b64decode(item[1]), mime_type=f"image/{item[0].lower()}")
+                    )
+                elif isinstance(item, str):
+                    content.append(Part.from_text(text=item))
+        else:
+            raise RuntimeError("无法触及的代码：请使用MessageBuilder类构建消息对象")
+
+        return Content(role=role, parts=content)
+
+    temp_list: list[ContentUnion] = []
+    system_instructions: list[str] = []
+    for message in messages:
+        if message.role == RoleType.System:
+            if isinstance(message.content, str):
+                system_instructions.append(message.content)
+            else:
+                raise ValueError("你tm怎么往system里面塞图片base64？")
+        elif message.role == RoleType.Tool:
+            if not message.tool_call_id:
+                raise ValueError("无法触及的代码：请使用MessageBuilder类构建消息对象")
+        else:
+            temp_list.append(_convert_message_item(message))
+    if system_instructions:
+        # 如果有system消息，就把它加上去
+        ret: tuple = (temp_list, system_instructions)
+    else:
+        # 如果没有system消息，就直接返回
+        ret: tuple = (temp_list, None)
+
+    return ret
+
+
+def _convert_tool_options(tool_options: list[ToolOption]) -> list[FunctionDeclaration]:
+    """
+    转换工具选项格式 - 将工具选项转换为Gemini API所需的格式
+    :param tool_options: 工具选项列表
+    :return: 转换后的工具对象列表
+    """
+
+    def _convert_tool_param(tool_option_param: ToolParam) -> dict:
+        """
+        转换单个工具参数格式
+        :param tool_option_param: 工具参数对象
+        :return: 转换后的工具参数字典
+        """
+        return_dict: dict[str, Any] = {
+            "type": tool_option_param.param_type.value,
+            "description": tool_option_param.description,
+        }
+        if tool_option_param.enum_values:
+            return_dict["enum"] = tool_option_param.enum_values
+        return return_dict
+
+    def _convert_tool_option_item(tool_option: ToolOption) -> FunctionDeclaration:
+        """
+        转换单个工具项格式
+        :param tool_option: 工具选项对象
+        :return: 转换后的Gemini工具选项对象
+        """
+        ret: dict[str, Any] = {
+            "name": tool_option.name,
+            "description": tool_option.description,
+        }
+        if tool_option.params:
+            ret["parameters"] = {
+                "type": "object",
+                "properties": {param.name: _convert_tool_param(param) for param in tool_option.params},
+                "required": [param.name for param in tool_option.params if param.required],
+            }
+        ret1 = FunctionDeclaration(**ret)
+        return ret1
+
+    return [_convert_tool_option_item(tool_option) for tool_option in tool_options]
+
+
+def _process_delta(
+    delta: GenerateContentResponse,
+    fc_delta_buffer: io.StringIO,
+    tool_calls_buffer: list[tuple[str, str, dict[str, Any]]],
+):
+    if not hasattr(delta, "candidates") or not delta.candidates:
+        raise RespParseException(delta, "响应解析失败，缺失candidates字段")
+
+    if delta.text:
+        fc_delta_buffer.write(delta.text)
+
+    if delta.function_calls:  # 为什么不用hasattr呢，是因为这个属性一定有，即使是个空的
+        for call in delta.function_calls:
+            try:
+                if not isinstance(call.args, dict):  # gemini返回的function call参数就是dict格式的了
+                    raise RespParseException(delta, "响应解析失败，工具调用参数无法解析为字典类型")
+                if not call.id or not call.name:
+                    raise RespParseException(delta, "响应解析失败，工具调用缺失id或name字段")
+                tool_calls_buffer.append(
+                    (
+                        call.id,
+                        call.name,
+                        call.args or {},  # 如果args是None，则转换为一个空字典
+                    )
+                )
+            except Exception as e:
+                raise RespParseException(delta, "响应解析失败，无法解析工具调用参数") from e
+
+
+def _build_stream_api_resp(
+    _fc_delta_buffer: io.StringIO,
+    _tool_calls_buffer: list[tuple[str, str, dict]],
+) -> APIResponse:
+    # sourcery skip: simplify-len-comparison, use-assigned-variable
+    resp = APIResponse()
+
+    if _fc_delta_buffer.tell() > 0:
+        # 如果正式内容缓冲区不为空，则将其写入APIResponse对象
+        resp.content = _fc_delta_buffer.getvalue()
+    _fc_delta_buffer.close()
+    if len(_tool_calls_buffer) > 0:
+        # 如果工具调用缓冲区不为空，则将其解析为ToolCall对象列表
+        resp.tool_calls = []
+        for call_id, function_name, arguments_buffer in _tool_calls_buffer:
+            if arguments_buffer is not None:
+                arguments = arguments_buffer
+                if not isinstance(arguments, dict):
+                    raise RespParseException(
+                        None,
+                        f"响应解析失败，工具调用参数无法解析为字典类型。工具调用参数原始响应：\n{arguments_buffer}",
+                    )
+            else:
+                arguments = None
+
+            resp.tool_calls.append(ToolCall(call_id, function_name, arguments))
+
+    return resp
+
+
+async def _default_stream_response_handler(
+    resp_stream: AsyncIterator[GenerateContentResponse],
+    interrupt_flag: asyncio.Event | None,
+) -> tuple[APIResponse, Optional[tuple[int, int, int]]]:
+    """
+    流式响应处理函数 - 处理Gemini API的流式响应
+    :param resp_stream: 流式响应对象,是一个神秘的iterator，我完全不知道这个玩意能不能跑，不过遍历一遍之后它就空了，如果跑不了一点的话可以考虑改成别的东西
+    :return: APIResponse对象
+    """
+    _fc_delta_buffer = io.StringIO()  # 正式内容缓冲区，用于存储接收到的正式内容
+    _tool_calls_buffer: list[tuple[str, str, dict]] = []  # 工具调用缓冲区，用于存储接收到的工具调用
+    _usage_record = None  # 使用情况记录
+
+    def _insure_buffer_closed():
+        if _fc_delta_buffer and not _fc_delta_buffer.closed:
+            _fc_delta_buffer.close()
+
+    async for chunk in resp_stream:
+        # 检查是否有中断量
+        if interrupt_flag and interrupt_flag.is_set():
+            # 如果中断量被设置，则抛出ReqAbortException
+            raise ReqAbortException("请求被外部信号中断")
+
+        _process_delta(
+            chunk,
+            _fc_delta_buffer,
+            _tool_calls_buffer,
+        )
+
+        if chunk.usage_metadata:
+            # 如果有使用情况，则将其存储在APIResponse对象中
+            _usage_record = (
+                chunk.usage_metadata.prompt_token_count or 0,
+                (chunk.usage_metadata.candidates_token_count or 0) + (chunk.usage_metadata.thoughts_token_count or 0),
+                chunk.usage_metadata.total_token_count or 0,
+            )
+    try:
+        return _build_stream_api_resp(
+            _fc_delta_buffer,
+            _tool_calls_buffer,
+        ), _usage_record
+    except Exception:
+        # 确保缓冲区被关闭
+        _insure_buffer_closed()
+        raise
+
+
+def _default_normal_response_parser(
+    resp: GenerateContentResponse,
+) -> tuple[APIResponse, Optional[tuple[int, int, int]]]:
+    """
+    解析对话补全响应 - 将Gemini API响应解析为APIResponse对象
+    :param resp: 响应对象
+    :return: APIResponse对象
+    """
+    api_response = APIResponse()
+
+    if not hasattr(resp, "candidates") or not resp.candidates:
+        raise RespParseException(resp, "响应解析失败，缺失candidates字段")
+    try:
+        if resp.candidates[0].content and resp.candidates[0].content.parts:
+            for part in resp.candidates[0].content.parts:
+                if not part.text:
+                    continue
+                if part.thought:
+                    api_response.reasoning_content = (
+                        api_response.reasoning_content + part.text if api_response.reasoning_content else part.text
+                    )
+    except Exception as e:
+        logger.warning(f"解析思考内容时发生错误: {e}，跳过解析")
+
+    if resp.text:
+        api_response.content = resp.text
+
+    if resp.function_calls:
+        api_response.tool_calls = []
+        for call in resp.function_calls:
+            try:
+                if not isinstance(call.args, dict):
+                    raise RespParseException(resp, "响应解析失败，工具调用参数无法解析为字典类型")
+                if not call.name:
+                    raise RespParseException(resp, "响应解析失败，工具调用缺失name字段")
+                api_response.tool_calls.append(ToolCall(call.id or "gemini-tool_call", call.name, call.args or {}))
+            except Exception as e:
+                raise RespParseException(resp, "响应解析失败，无法解析工具调用参数") from e
+
+    if resp.usage_metadata:
+        _usage_record = (
+            resp.usage_metadata.prompt_token_count or 0,
+            (resp.usage_metadata.candidates_token_count or 0) + (resp.usage_metadata.thoughts_token_count or 0),
+            resp.usage_metadata.total_token_count or 0,
+        )
+    else:
+        _usage_record = None
+
+    api_response.raw_data = resp
+
+    return api_response, _usage_record
+
+
+@client_registry.register_client_class("gemini")
+class GeminiClient(BaseClient):
+    client: genai.Client
+
+    def __init__(self, api_provider: APIProvider):
+        super().__init__(api_provider)
+        self.client = genai.Client(
+            api_key=api_provider.api_key,
+        )  # 这里和openai不一样，gemini会自己决定自己是否需要retry
+
+    async def get_response(
+        self,
+        model_info: ModelInfo,
+        message_list: list[Message],
+        tool_options: list[ToolOption] | None = None,
+        max_tokens: int = 1024,
+        temperature: float = 0.4,
+        response_format: RespFormat | None = None,
+        stream_response_handler: Optional[
+            Callable[
+                [AsyncIterator[GenerateContentResponse], asyncio.Event | None],
+                Coroutine[Any, Any, tuple[APIResponse, Optional[tuple[int, int, int]]]],
+            ]
+        ] = None,
+        async_response_parser: Optional[
+            Callable[[GenerateContentResponse], tuple[APIResponse, Optional[tuple[int, int, int]]]]
+        ] = None,
+        interrupt_flag: asyncio.Event | None = None,
+        extra_params: dict[str, Any] | None = None,
+    ) -> APIResponse:
+        """
+        获取对话响应
+        Args:
+            model_info: 模型信息
+            message_list: 对话体
+            tool_options: 工具选项（可选，默认为None）
+            max_tokens: 最大token数（可选，默认为1024）
+            temperature: 温度（可选，默认为0.7）
+            response_format: 响应格式（默认为text/plain,如果是输入的JSON Schema则必须遵守OpenAPI3.0格式,理论上和openai是一样的，暂不支持其它相应格式输入）
+            stream_response_handler: 流式响应处理函数（可选，默认为default_stream_response_handler）
+            async_response_parser: 响应解析函数（可选，默认为default_response_parser）
+            interrupt_flag: 中断信号量（可选，默认为None）
+        Returns:
+            APIResponse对象，包含响应内容、推理内容、工具调用等信息
+        """
+        if stream_response_handler is None:
+            stream_response_handler = _default_stream_response_handler
+
+        if async_response_parser is None:
+            async_response_parser = _default_normal_response_parser
+
+        # 将messages构造为Gemini API所需的格式
+        messages = _convert_messages(message_list)
+        # 将tool_options转换为Gemini API所需的格式
+        tools = _convert_tool_options(tool_options) if tool_options else None
+        # 将response_format转换为Gemini API所需的格式
+        generation_config_dict = {
+            "max_output_tokens": max_tokens,
+            "temperature": temperature,
+            "response_modalities": ["TEXT"],
+            "thinking_config": ThinkingConfig(
+                include_thoughts=True,
+                thinking_budget=(
+                    extra_params["thinking_budget"]
+                    if extra_params and "thinking_budget" in extra_params
+                    else int(max_tokens / 2)  # 默认思考预算为最大token数的一半，防止空回复
+                ),
+            ),
+            "safety_settings": gemini_safe_settings,  # 防止空回复问题
+        }
+        if tools:
+            generation_config_dict["tools"] = Tool(function_declarations=tools)
+        if messages[1]:
+            # 如果有system消息，则将其添加到配置中
+            generation_config_dict["system_instructions"] = messages[1]
+        if response_format and response_format.format_type == RespFormatType.TEXT:
+            generation_config_dict["response_mime_type"] = "text/plain"
+        elif response_format and response_format.format_type in (RespFormatType.JSON_OBJ, RespFormatType.JSON_SCHEMA):
+            generation_config_dict["response_mime_type"] = "application/json"
+            generation_config_dict["response_schema"] = response_format.to_dict()
+
+        generation_config = GenerateContentConfig(**generation_config_dict)
+
+        try:
+            if model_info.force_stream_mode:
+                req_task = asyncio.create_task(
+                    self.client.aio.models.generate_content_stream(
+                        model=model_info.model_identifier,
+                        contents=messages[0],
+                        config=generation_config,
+                    )
+                )
+                while not req_task.done():
+                    if interrupt_flag and interrupt_flag.is_set():
+                        # 如果中断量存在且被设置，则取消任务并抛出异常
+                        req_task.cancel()
+                        raise ReqAbortException("请求被外部信号中断")
+                    await asyncio.sleep(0.1)  # 等待0.1秒后再次检查任务&中断信号量状态
+                resp, usage_record = await stream_response_handler(req_task.result(), interrupt_flag)
+            else:
+                req_task = asyncio.create_task(
+                    self.client.aio.models.generate_content(
+                        model=model_info.model_identifier,
+                        contents=messages[0],
+                        config=generation_config,
+                    )
+                )
+                while not req_task.done():
+                    if interrupt_flag and interrupt_flag.is_set():
+                        # 如果中断量存在且被设置，则取消任务并抛出异常
+                        req_task.cancel()
+                        raise ReqAbortException("请求被外部信号中断")
+                    await asyncio.sleep(0.5)  # 等待0.5秒后再次检查任务&中断信号量状态
+
+                resp, usage_record = async_response_parser(req_task.result())
+        except (ClientError, ServerError) as e:
+            # 重封装ClientError和ServerError为RespNotOkException
+            raise RespNotOkException(e.code, e.message) from None
+        except (
+            UnknownFunctionCallArgumentError,
+            UnsupportedFunctionError,
+            FunctionInvocationError,
+        ) as e:
+            raise ValueError(f"工具类型错误：请检查工具选项和参数：{str(e)}") from None
+        except Exception as e:
+            raise NetworkConnectionError() from e
+
+        if usage_record:
+            resp.usage = UsageRecord(
+                model_name=model_info.name,
+                provider_name=model_info.api_provider,
+                prompt_tokens=usage_record[0],
+                completion_tokens=usage_record[1],
+                total_tokens=usage_record[2],
+            )
+
+        return resp
+
+    async def get_embedding(
+        self,
+        model_info: ModelInfo,
+        embedding_input: str,
+        extra_params: dict[str, Any] | None = None,
+    ) -> APIResponse:
+        """
+        获取文本嵌入
+        :param model_info: 模型信息
+        :param embedding_input: 嵌入输入文本
+        :return: 嵌入响应
+        """
+        try:
+            raw_response: EmbedContentResponse = await self.client.aio.models.embed_content(
+                model=model_info.model_identifier,
+                contents=embedding_input,
+                config=EmbedContentConfig(task_type="SEMANTIC_SIMILARITY"),
+            )
+        except (ClientError, ServerError) as e:
+            # 重封装ClientError和ServerError为RespNotOkException
+            raise RespNotOkException(e.code) from None
+        except Exception as e:
+            raise NetworkConnectionError() from e
+
+        response = APIResponse()
+
+        # 解析嵌入响应和使用情况
+        if hasattr(raw_response, "embeddings") and raw_response.embeddings:
+            response.embedding = raw_response.embeddings[0].values
+        else:
+            raise RespParseException(raw_response, "响应解析失败，缺失embeddings字段")
+
+        response.usage = UsageRecord(
+            model_name=model_info.name,
+            provider_name=model_info.api_provider,
+            prompt_tokens=len(embedding_input),
+            completion_tokens=0,
+            total_tokens=len(embedding_input),
+        )
+
+        return response
+
+    def get_audio_transcriptions(
+        self, model_info: ModelInfo, audio_base64: str, extra_params: dict[str, Any] | None = None
+    ) -> APIResponse:
+        """
+        获取音频转录
+        :param model_info: 模型信息
+        :param audio_base64: 音频文件的Base64编码字符串
+        :param extra_params: 额外参数（可选）
+        :return: 转录响应
+        """
+        generation_config_dict = {
+            "max_output_tokens": 2048,
+            "response_modalities": ["TEXT"],
+            "thinking_config": ThinkingConfig(
+                include_thoughts=True,
+                thinking_budget=(
+                    extra_params["thinking_budget"] if extra_params and "thinking_budget" in extra_params else 1024
+                ),
+            ),
+            "safety_settings": gemini_safe_settings,
+        }
+        generate_content_config = GenerateContentConfig(**generation_config_dict)
+        prompt = "Generate a transcript of the speech. The language of the transcript should **match the language of the speech**."
+        try:
+            raw_response: GenerateContentResponse = self.client.models.generate_content(
+                model=model_info.model_identifier,
+                contents=[
+                    Content(
+                        role="user",
+                        parts=[
+                            Part.from_text(text=prompt),
+                            Part.from_bytes(data=base64.b64decode(audio_base64), mime_type="audio/wav"),
+                        ],
+                    )
+                ],
+                config=generate_content_config,
+            )
+            resp, usage_record = _default_normal_response_parser(raw_response)
+        except (ClientError, ServerError) as e:
+            # 重封装ClientError和ServerError为RespNotOkException
+            raise RespNotOkException(e.code) from None
+        except Exception as e:
+            raise NetworkConnectionError() from e
+
+        if usage_record:
+            resp.usage = UsageRecord(
+                model_name=model_info.name,
+                provider_name=model_info.api_provider,
+                prompt_tokens=usage_record[0],
+                completion_tokens=usage_record[1],
+                total_tokens=usage_record[2],
+            )
+
+        return resp
+
+    def get_support_image_formats(self) -> list[str]:
+        """
+        获取支持的图片格式
+        :return: 支持的图片格式列表
+        """
+        return ["png", "jpg", "jpeg", "webp", "heic", "heif"]
--- a/src/llm_models/model_client/openai_client.py
+++ b/src/llm_models/model_client/openai_client.py
@@ -0,0 +1,583 @@
+import asyncio
+import io
+import json
+import re
+import base64
+from collections.abc import Iterable
+from typing import Callable, Any, Coroutine, Optional
+from json_repair import repair_json
+
+from openai import (
+    AsyncOpenAI,
+    APIConnectionError,
+    APIStatusError,
+    NOT_GIVEN,
+    AsyncStream,
+)
+from openai.types.chat import (
+    ChatCompletion,
+    ChatCompletionChunk,
+    ChatCompletionMessageParam,
+    ChatCompletionToolParam,
+)
+from openai.types.chat.chat_completion_chunk import ChoiceDelta
+
+from src.config.api_ada_configs import ModelInfo, APIProvider
+from src.common.logger import get_logger
+from .base_client import APIResponse, UsageRecord, BaseClient, client_registry
+from ..exceptions import (
+    RespParseException,
+    NetworkConnectionError,
+    RespNotOkException,
+    ReqAbortException,
+)
+from ..payload_content.message import Message, RoleType
+from ..payload_content.resp_format import RespFormat
+from ..payload_content.tool_option import ToolOption, ToolParam, ToolCall
+
+logger = get_logger("OpenAI客户端")
+
+
+def _convert_messages(messages: list[Message]) -> list[ChatCompletionMessageParam]:
+    """
+    转换消息格式 - 将消息转换为OpenAI API所需的格式
+    :param messages: 消息列表
+    :return: 转换后的消息列表
+    """
+
+    def _convert_message_item(message: Message) -> ChatCompletionMessageParam:
+        """
+        转换单个消息格式
+        :param message: 消息对象
+        :return: 转换后的消息字典
+        """
+
+        # 添加Content
+        content: str | list[dict[str, Any]]
+        if isinstance(message.content, str):
+            content = message.content
+        elif isinstance(message.content, list):
+            content = []
+            for item in message.content:
+                if isinstance(item, tuple):
+                    content.append(
+                        {
+                            "type": "image_url",
+                            "image_url": {"url": f"data:image/{item[0].lower()};base64,{item[1]}"},
+                        }
+                    )
+                elif isinstance(item, str):
+                    content.append({"type": "text", "text": item})
+        else:
+            raise RuntimeError("无法触及的代码：请使用MessageBuilder类构建消息对象")
+
+        ret = {
+            "role": message.role.value,
+            "content": content,
+        }
+
+        # 添加工具调用ID
+        if message.role == RoleType.Tool:
+            if not message.tool_call_id:
+                raise ValueError("无法触及的代码：请使用MessageBuilder类构建消息对象")
+            ret["tool_call_id"] = message.tool_call_id
+
+        return ret  # type: ignore
+
+    return [_convert_message_item(message) for message in messages]
+
+
+def _convert_tool_options(tool_options: list[ToolOption]) -> list[dict[str, Any]]:
+    """
+    转换工具选项格式 - 将工具选项转换为OpenAI API所需的格式
+    :param tool_options: 工具选项列表
+    :return: 转换后的工具选项列表
+    """
+
+    def _convert_tool_param(tool_option_param: ToolParam) -> dict[str, Any]:
+        """
+        转换单个工具参数格式
+        :param tool_option_param: 工具参数对象
+        :return: 转换后的工具参数字典
+        """
+        return_dict: dict[str, Any] = {
+            "type": tool_option_param.param_type.value,
+            "description": tool_option_param.description,
+        }
+        if tool_option_param.enum_values:
+            return_dict["enum"] = tool_option_param.enum_values
+        return return_dict
+
+    def _convert_tool_option_item(tool_option: ToolOption) -> dict[str, Any]:
+        """
+        转换单个工具项格式
+        :param tool_option: 工具选项对象
+        :return: 转换后的工具选项字典
+        """
+        ret: dict[str, Any] = {
+            "name": tool_option.name,
+            "description": tool_option.description,
+        }
+        if tool_option.params:
+            ret["parameters"] = {
+                "type": "object",
+                "properties": {param.name: _convert_tool_param(param) for param in tool_option.params},
+                "required": [param.name for param in tool_option.params if param.required],
+            }
+        return ret
+
+    return [
+        {
+            "type": "function",
+            "function": _convert_tool_option_item(tool_option),
+        }
+        for tool_option in tool_options
+    ]
+
+
+def _process_delta(
+    delta: ChoiceDelta,
+    has_rc_attr_flag: bool,
+    in_rc_flag: bool,
+    rc_delta_buffer: io.StringIO,
+    fc_delta_buffer: io.StringIO,
+    tool_calls_buffer: list[tuple[str, str, io.StringIO]],
+) -> bool:
+    # 接收content
+    if has_rc_attr_flag:
+        # 有独立的推理内容块，则无需考虑content内容的判读
+        if hasattr(delta, "reasoning_content") and delta.reasoning_content:  # type: ignore
+            # 如果有推理内容，则将其写入推理内容缓冲区
+            assert isinstance(delta.reasoning_content, str)  # type: ignore
+            rc_delta_buffer.write(delta.reasoning_content)  # type: ignore
+        elif delta.content:
+            # 如果有正式内容，则将其写入正式内容缓冲区
+            fc_delta_buffer.write(delta.content)
+    elif hasattr(delta, "content") and delta.content is not None:
+        # 没有独立的推理内容块，但有正式内容
+        if in_rc_flag:
+            # 当前在推理内容块中
+            if delta.content == "</think>":
+                # 如果当前内容是</think>，则将其视为推理内容的结束标记，退出推理内容块
+                in_rc_flag = False
+            else:
+                # 其他情况视为推理内容，加入推理内容缓冲区
+                rc_delta_buffer.write(delta.content)
+        elif delta.content == "<think>" and not fc_delta_buffer.getvalue():
+            # 如果当前内容是<think>，且正式内容缓冲区为空，说明<think>为输出的首个token
+            # 则将其视为推理内容的开始标记，进入推理内容块
+            in_rc_flag = True
+        else:
+            # 其他情况视为正式内容，加入正式内容缓冲区
+            fc_delta_buffer.write(delta.content)
+    # 接收tool_calls
+    if hasattr(delta, "tool_calls") and delta.tool_calls:
+        tool_call_delta = delta.tool_calls[0]
+
+        if tool_call_delta.index >= len(tool_calls_buffer):
+            # 调用索引号大于等于缓冲区长度，说明是新的工具调用
+            if tool_call_delta.id and tool_call_delta.function and tool_call_delta.function.name:
+                tool_calls_buffer.append(
+                    (
+                        tool_call_delta.id,
+                        tool_call_delta.function.name,
+                        io.StringIO(),
+                    )
+                )
+            else:
+                logger.warning("工具调用索引号大于等于缓冲区长度，但缺少ID或函数信息。")
+
+        if tool_call_delta.function and tool_call_delta.function.arguments:
+            # 如果有工具调用参数，则添加到对应的工具调用的参数串缓冲区中
+            tool_calls_buffer[tool_call_delta.index][2].write(tool_call_delta.function.arguments)
+
+    return in_rc_flag
+
+
+def _build_stream_api_resp(
+    _fc_delta_buffer: io.StringIO,
+    _rc_delta_buffer: io.StringIO,
+    _tool_calls_buffer: list[tuple[str, str, io.StringIO]],
+) -> APIResponse:
+    resp = APIResponse()
+
+    if _rc_delta_buffer.tell() > 0:
+        # 如果推理内容缓冲区不为空，则将其写入APIResponse对象
+        resp.reasoning_content = _rc_delta_buffer.getvalue()
+    _rc_delta_buffer.close()
+    if _fc_delta_buffer.tell() > 0:
+        # 如果正式内容缓冲区不为空，则将其写入APIResponse对象
+        resp.content = _fc_delta_buffer.getvalue()
+    _fc_delta_buffer.close()
+    if _tool_calls_buffer:
+        # 如果工具调用缓冲区不为空，则将其解析为ToolCall对象列表
+        resp.tool_calls = []
+        for call_id, function_name, arguments_buffer in _tool_calls_buffer:
+            if arguments_buffer.tell() > 0:
+                # 如果参数串缓冲区不为空，则解析为JSON对象
+                raw_arg_data = arguments_buffer.getvalue()
+                arguments_buffer.close()
+                try:
+                    arguments = json.loads(repair_json(raw_arg_data))
+                    if not isinstance(arguments, dict):
+                        raise RespParseException(
+                            None,
+                            f"响应解析失败，工具调用参数无法解析为字典类型。工具调用参数原始响应：\n{raw_arg_data}",
+                        )
+                except json.JSONDecodeError as e:
+                    raise RespParseException(
+                        None,
+                        f"响应解析失败，无法解析工具调用参数。工具调用参数原始响应：{raw_arg_data}",
+                    ) from e
+            else:
+                arguments_buffer.close()
+                arguments = None
+
+            resp.tool_calls.append(ToolCall(call_id, function_name, arguments))
+
+    return resp
+
+
+async def _default_stream_response_handler(
+    resp_stream: AsyncStream[ChatCompletionChunk],
+    interrupt_flag: asyncio.Event | None,
+) -> tuple[APIResponse, Optional[tuple[int, int, int]]]:
+    """
+    流式响应处理函数 - 处理OpenAI API的流式响应
+    :param resp_stream: 流式响应对象
+    :return: APIResponse对象
+    """
+
+    _has_rc_attr_flag = False  # 标记是否有独立的推理内容块
+    _in_rc_flag = False  # 标记是否在推理内容块中
+    _rc_delta_buffer = io.StringIO()  # 推理内容缓冲区，用于存储接收到的推理内容
+    _fc_delta_buffer = io.StringIO()  # 正式内容缓冲区，用于存储接收到的正式内容
+    _tool_calls_buffer: list[tuple[str, str, io.StringIO]] = []  # 工具调用缓冲区，用于存储接收到的工具调用
+    _usage_record = None  # 使用情况记录
+
+    def _insure_buffer_closed():
+        # 确保缓冲区被关闭
+        if _rc_delta_buffer and not _rc_delta_buffer.closed:
+            _rc_delta_buffer.close()
+        if _fc_delta_buffer and not _fc_delta_buffer.closed:
+            _fc_delta_buffer.close()
+        for _, _, buffer in _tool_calls_buffer:
+            if buffer and not buffer.closed:
+                buffer.close()
+
+    async for event in resp_stream:
+        if interrupt_flag and interrupt_flag.is_set():
+            # 如果中断量被设置，则抛出ReqAbortException
+            _insure_buffer_closed()
+            raise ReqAbortException("请求被外部信号中断")
+
+        delta = event.choices[0].delta  # 获取当前块的delta内容
+
+        if hasattr(delta, "reasoning_content") and delta.reasoning_content:  # type: ignore
+            # 标记：有独立的推理内容块
+            _has_rc_attr_flag = True
+
+        _in_rc_flag = _process_delta(
+            delta,
+            _has_rc_attr_flag,
+            _in_rc_flag,
+            _rc_delta_buffer,
+            _fc_delta_buffer,
+            _tool_calls_buffer,
+        )
+
+        if event.usage:
+            # 如果有使用情况，则将其存储在APIResponse对象中
+            _usage_record = (
+                event.usage.prompt_tokens or 0,
+                event.usage.completion_tokens or 0,
+                event.usage.total_tokens or 0,
+            )
+
+    try:
+        return _build_stream_api_resp(
+            _fc_delta_buffer,
+            _rc_delta_buffer,
+            _tool_calls_buffer,
+        ), _usage_record
+    except Exception:
+        # 确保缓冲区被关闭
+        _insure_buffer_closed()
+        raise
+
+
+pattern = re.compile(
+    r"<think>(?P<think>.*?)</think>(?P<content>.*)|<think>(?P<think_unclosed>.*)|(?P<content_only>.+)",
+    re.DOTALL,
+)
+"""用于解析推理内容的正则表达式"""
+
+
+def _default_normal_response_parser(
+    resp: ChatCompletion,
+) -> tuple[APIResponse, Optional[tuple[int, int, int]]]:
+    """
+    解析对话补全响应 - 将OpenAI API响应解析为APIResponse对象
+    :param resp: 响应对象
+    :return: APIResponse对象
+    """
+    api_response = APIResponse()
+
+    if not hasattr(resp, "choices") or len(resp.choices) == 0:
+        raise RespParseException(resp, "响应解析失败，缺失choices字段")
+    message_part = resp.choices[0].message
+
+    if hasattr(message_part, "reasoning_content") and message_part.reasoning_content:  # type: ignore
+        # 有有效的推理字段
+        api_response.content = message_part.content
+        api_response.reasoning_content = message_part.reasoning_content  # type: ignore
+    elif message_part.content:
+        # 提取推理和内容
+        match = pattern.match(message_part.content)
+        if not match:
+            raise RespParseException(resp, "响应解析失败，无法捕获推理内容和输出内容")
+        if match.group("think") is not None:
+            result = match.group("think").strip(), match.group("content").strip()
+        elif match.group("think_unclosed") is not None:
+            result = match.group("think_unclosed").strip(), None
+        else:
+            result = None, match.group("content_only").strip()
+        api_response.reasoning_content, api_response.content = result
+
+    # 提取工具调用
+    if message_part.tool_calls:
+        api_response.tool_calls = []
+        for call in message_part.tool_calls:
+            try:
+                arguments = json.loads(repair_json(call.function.arguments))
+                if not isinstance(arguments, dict):
+                    raise RespParseException(resp, "响应解析失败，工具调用参数无法解析为字典类型")
+                api_response.tool_calls.append(ToolCall(call.id, call.function.name, arguments))
+            except json.JSONDecodeError as e:
+                raise RespParseException(resp, "响应解析失败，无法解析工具调用参数") from e
+
+    # 提取Usage信息
+    if resp.usage:
+        _usage_record = (
+            resp.usage.prompt_tokens or 0,
+            resp.usage.completion_tokens or 0,
+            resp.usage.total_tokens or 0,
+        )
+    else:
+        _usage_record = None
+
+    # 将原始响应存储在原始数据中
+    api_response.raw_data = resp
+
+    return api_response, _usage_record
+
+
+@client_registry.register_client_class("openai")
+class OpenaiClient(BaseClient):
+    def __init__(self, api_provider: APIProvider):
+        super().__init__(api_provider)
+        self.client: AsyncOpenAI = AsyncOpenAI(
+            base_url=api_provider.base_url,
+            api_key=api_provider.api_key,
+            max_retries=0,
+        )
+
+    async def get_response(
+        self,
+        model_info: ModelInfo,
+        message_list: list[Message],
+        tool_options: list[ToolOption] | None = None,
+        max_tokens: int = 1024,
+        temperature: float = 0.7,
+        response_format: RespFormat | None = None,
+        stream_response_handler: Optional[
+            Callable[
+                [AsyncStream[ChatCompletionChunk], asyncio.Event | None],
+                Coroutine[Any, Any, tuple[APIResponse, Optional[tuple[int, int, int]]]],
+            ]
+        ] = None,
+        async_response_parser: Optional[
+            Callable[[ChatCompletion], tuple[APIResponse, Optional[tuple[int, int, int]]]]
+        ] = None,
+        interrupt_flag: asyncio.Event | None = None,
+        extra_params: dict[str, Any] | None = None,
+    ) -> APIResponse:
+        """
+        获取对话响应
+        Args:
+            model_info: 模型信息
+            message_list: 对话体
+            tool_options: 工具选项（可选，默认为None）
+            max_tokens: 最大token数（可选，默认为1024）
+            temperature: 温度（可选，默认为0.7）
+            response_format: 响应格式（可选，默认为 NotGiven ）
+            stream_response_handler: 流式响应处理函数（可选，默认为default_stream_response_handler）
+            async_response_parser: 响应解析函数（可选，默认为default_response_parser）
+            interrupt_flag: 中断信号量（可选，默认为None）
+        Returns:
+            (响应文本, 推理文本, 工具调用, 其他数据)
+        """
+        if stream_response_handler is None:
+            stream_response_handler = _default_stream_response_handler
+
+        if async_response_parser is None:
+            async_response_parser = _default_normal_response_parser
+
+        # 将messages构造为OpenAI API所需的格式
+        messages: Iterable[ChatCompletionMessageParam] = _convert_messages(message_list)
+        # 将tool_options转换为OpenAI API所需的格式
+        tools: Iterable[ChatCompletionToolParam] = _convert_tool_options(tool_options) if tool_options else NOT_GIVEN  # type: ignore
+
+        try:
+            if model_info.force_stream_mode:
+                req_task = asyncio.create_task(
+                    self.client.chat.completions.create(
+                        model=model_info.model_identifier,
+                        messages=messages,
+                        tools=tools,
+                        temperature=temperature,
+                        max_tokens=max_tokens,
+                        stream=True,
+                        response_format=NOT_GIVEN,
+                        extra_body=extra_params,
+                    )
+                )
+                while not req_task.done():
+                    if interrupt_flag and interrupt_flag.is_set():
+                        # 如果中断量存在且被设置，则取消任务并抛出异常
+                        req_task.cancel()
+                        raise ReqAbortException("请求被外部信号中断")
+                    await asyncio.sleep(0.1)  # 等待0.1秒后再次检查任务&中断信号量状态
+
+                resp, usage_record = await stream_response_handler(req_task.result(), interrupt_flag)
+            else:
+                # 发送请求并获取响应
+                # start_time = time.time()
+                req_task = asyncio.create_task(
+                    self.client.chat.completions.create(
+                        model=model_info.model_identifier,
+                        messages=messages,
+                        tools=tools,
+                        temperature=temperature,
+                        max_tokens=max_tokens,
+                        stream=False,
+                        response_format=NOT_GIVEN,
+                        extra_body=extra_params,
+                    )
+                )
+                while not req_task.done():
+                    if interrupt_flag and interrupt_flag.is_set():
+                        # 如果中断量存在且被设置，则取消任务并抛出异常
+                        req_task.cancel()
+                        raise ReqAbortException("请求被外部信号中断")
+                    await asyncio.sleep(0.1)  # 等待0.5秒后再次检查任务&中断信号量状态
+                
+                # logger.info(f"OpenAI请求时间: {model_info.model_identifier}  {time.time() - start_time} \n{messages}")
+
+                resp, usage_record = async_response_parser(req_task.result())
+        except APIConnectionError as e:
+            # 重封装APIConnectionError为NetworkConnectionError
+            raise NetworkConnectionError() from e
+        except APIStatusError as e:
+            # 重封装APIError为RespNotOkException
+            raise RespNotOkException(e.status_code, e.message) from e
+
+        if usage_record:
+            resp.usage = UsageRecord(
+                model_name=model_info.name,
+                provider_name=model_info.api_provider,
+                prompt_tokens=usage_record[0],
+                completion_tokens=usage_record[1],
+                total_tokens=usage_record[2],
+            )
+
+        return resp
+
+    async def get_embedding(
+        self,
+        model_info: ModelInfo,
+        embedding_input: str,
+        extra_params: dict[str, Any] | None = None,
+    ) -> APIResponse:
+        """
+        获取文本嵌入
+        :param model_info: 模型信息
+        :param embedding_input: 嵌入输入文本
+        :return: 嵌入响应
+        """
+        try:
+            raw_response = await self.client.embeddings.create(
+                model=model_info.model_identifier,
+                input=embedding_input,
+                extra_body=extra_params,
+            )
+        except APIConnectionError as e:
+            raise NetworkConnectionError() from e
+        except APIStatusError as e:
+            # 重封装APIError为RespNotOkException
+            raise RespNotOkException(e.status_code) from e
+
+        response = APIResponse()
+
+        # 解析嵌入响应
+        if len(raw_response.data) > 0:
+            response.embedding = raw_response.data[0].embedding
+        else:
+            raise RespParseException(
+                raw_response,
+                "响应解析失败，缺失嵌入数据。",
+            )
+
+        # 解析使用情况
+        if hasattr(raw_response, "usage"):
+            response.usage = UsageRecord(
+                model_name=model_info.name,
+                provider_name=model_info.api_provider,
+                prompt_tokens=raw_response.usage.prompt_tokens or 0,
+                completion_tokens=raw_response.usage.completion_tokens or 0,  # type: ignore
+                total_tokens=raw_response.usage.total_tokens or 0,
+            )
+
+        return response
+
+    async def get_audio_transcriptions(
+        self,
+        model_info: ModelInfo,
+        audio_base64: str,
+        extra_params: dict[str, Any] | None = None,
+    ) -> APIResponse:
+        """
+        获取音频转录
+        :param model_info: 模型信息
+        :param audio_base64: base64编码的音频数据
+        :extra_params: 附加的请求参数
+        :return: 音频转录响应
+        """
+        try:
+            raw_response = await self.client.audio.transcriptions.create(
+                model=model_info.model_identifier,
+                file=("audio.wav", io.BytesIO(base64.b64decode(audio_base64))),
+                extra_body=extra_params,
+            )
+        except APIConnectionError as e:
+            raise NetworkConnectionError() from e
+        except APIStatusError as e:
+            # 重封装APIError为RespNotOkException
+            raise RespNotOkException(e.status_code) from e
+        response = APIResponse()
+        # 解析转录响应
+        if hasattr(raw_response, "text"):
+            response.content = raw_response.text
+        else:
+            raise RespParseException(
+                raw_response,
+                "响应解析失败，缺失转录文本。",
+            )
+        return response
+
+    def get_support_image_formats(self) -> list[str]:
+        """
+        获取支持的图片格式
+        :return: 支持的图片格式列表
+        """
+        return ["jpg", "jpeg", "png", "webp", "gif"]
--- a/src/llm_models/payload_content/init.py
+++ b/src/llm_models/payload_content/init.py
@@ -0,0 +1,3 @@
+from .tool_option import ToolCall
+
+__all__ = ["ToolCall"]
--- a/src/llm_models/payload_content/message.py
+++ b/src/llm_models/payload_content/message.py
@@ -0,0 +1,107 @@
+from enum import Enum
+
+
+# 设计这系列类的目的是为未来可能的扩展做准备
+
+
+class RoleType(Enum):
+    System = "system"
+    User = "user"
+    Assistant = "assistant"
+    Tool = "tool"
+
+
+SUPPORTED_IMAGE_FORMATS = ["jpg", "jpeg", "png", "webp", "gif"]  # openai支持的图片格式
+
+
+class Message:
+    def __init__(
+        self,
+        role: RoleType,
+        content: str | list[tuple[str, str] | str],
+        tool_call_id: str | None = None,
+    ):
+        """
+        初始化消息对象
+        （不应直接修改Message类，而应使用MessageBuilder类来构建对象）
+        """
+        self.role: RoleType = role
+        self.content: str | list[tuple[str, str] | str] = content
+        self.tool_call_id: str | None = tool_call_id
+
+
+class MessageBuilder:
+    def __init__(self):
+        self.__role: RoleType = RoleType.User
+        self.__content: list[tuple[str, str] | str] = []
+        self.__tool_call_id: str | None = None
+
+    def set_role(self, role: RoleType = RoleType.User) -> "MessageBuilder":
+        """
+        设置角色（默认为User）
+        :param role: 角色
+        :return: MessageBuilder对象
+        """
+        self.__role = role
+        return self
+
+    def add_text_content(self, text: str) -> "MessageBuilder":
+        """
+        添加文本内容
+        :param text: 文本内容
+        :return: MessageBuilder对象
+        """
+        self.__content.append(text)
+        return self
+
+    def add_image_content(
+        self,
+        image_format: str,
+        image_base64: str,
+        support_formats: list[str] = SUPPORTED_IMAGE_FORMATS,  # 默认支持格式
+    ) -> "MessageBuilder":
+        """
+        添加图片内容
+        :param image_format: 图片格式
+        :param image_base64: 图片的base64编码
+        :return: MessageBuilder对象
+        """
+        if image_format.lower() not in support_formats:
+            raise ValueError("不受支持的图片格式")
+        if not image_base64:
+            raise ValueError("图片的base64编码不能为空")
+        self.__content.append((image_format, image_base64))
+        return self
+
+    def add_tool_call(self, tool_call_id: str) -> "MessageBuilder":
+        """
+        添加工具调用指令（调用时请确保已设置为Tool角色）
+        :param tool_call_id: 工具调用指令的id
+        :return: MessageBuilder对象
+        """
+        if self.__role != RoleType.Tool:
+            raise ValueError("仅当角色为Tool时才能添加工具调用ID")
+        if not tool_call_id:
+            raise ValueError("工具调用ID不能为空")
+        self.__tool_call_id = tool_call_id
+        return self
+
+    def build(self) -> Message:
+        """
+        构建消息对象
+        :return: Message对象
+        """
+        if len(self.__content) == 0:
+            raise ValueError("内容不能为空")
+        if self.__role == RoleType.Tool and self.__tool_call_id is None:
+            raise ValueError("Tool角色的工具调用ID不能为空")
+
+        return Message(
+            role=self.__role,
+            content=(
+                self.__content[0]
+                if (len(self.__content) == 1 and isinstance(self.__content[0], str))
+                else self.__content
+            ),
+            tool_call_id=self.__tool_call_id,
+        )
--- a/src/llm_models/payload_content/resp_format.py
+++ b/src/llm_models/payload_content/resp_format.py
@@ -0,0 +1,223 @@
+from enum import Enum
+from typing import Optional, Any
+
+from pydantic import BaseModel
+from typing_extensions import TypedDict, Required
+
+
+class RespFormatType(Enum):
+    TEXT = "text"  # 文本
+    JSON_OBJ = "json_object"  # JSON
+    JSON_SCHEMA = "json_schema"  # JSON Schema
+
+
+class JsonSchema(TypedDict, total=False):
+    name: Required[str]
+    """
+    The name of the response format.
+
+    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
+    of 64.
+    """
+
+    description: Optional[str]
+    """
+    A description of what the response format is for, used by the model to determine
+    how to respond in the format.
+    """
+
+    schema: dict[str, object]
+    """
+    The schema for the response format, described as a JSON Schema object. Learn how
+    to build JSON schemas [here](https://json-schema.org/).
+    """
+
+    strict: Optional[bool]
+    """
+    Whether to enable strict schema adherence when generating the output. If set to
+    true, the model will always follow the exact schema defined in the `schema`
+    field. Only a subset of JSON Schema is supported when `strict` is `true`. To
+    learn more, read the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+    """
+
+
+def _json_schema_type_check(instance) -> str | None:
+    if "name" not in instance:
+        return "schema必须包含'name'字段"
+    elif not isinstance(instance["name"], str) or instance["name"].strip() == "":
+        return "schema的'name'字段必须是非空字符串"
+    if "description" in instance and (
+        not isinstance(instance["description"], str)
+        or instance["description"].strip() == ""
+    ):
+        return "schema的'description'字段只能填入非空字符串"
+    if "schema" not in instance:
+        return "schema必须包含'schema'字段"
+    elif not isinstance(instance["schema"], dict):
+        return "schema的'schema'字段必须是字典，详见https://json-schema.org/"
+    if "strict" in instance and not isinstance(instance["strict"], bool):
+        return "schema的'strict'字段只能填入布尔值"
+
+    return None
+
+
+def _remove_title(schema: dict[str, Any] | list[Any]) -> dict[str, Any] | list[Any]:
+    """
+    递归移除JSON Schema中的title字段
+    """
+    if isinstance(schema, list):
+        # 如果当前Schema是列表，则对所有dict/list子元素递归调用
+        for idx, item in enumerate(schema):
+            if isinstance(item, (dict, list)):
+                schema[idx] = _remove_title(item)
+    elif isinstance(schema, dict):
+        # 是字典，移除title字段，并对所有dict/list子元素递归调用
+        if "title" in schema:
+            del schema["title"]
+        for key, value in schema.items():
+            if isinstance(value, (dict, list)):
+                schema[key] = _remove_title(value)
+
+    return schema
+
+
+def _link_definitions(schema: dict[str, Any]) -> dict[str, Any]:
+    """
+    链接JSON Schema中的definitions字段
+    """
+
+    def link_definitions_recursive(
+        path: str, sub_schema: list[Any] | dict[str, Any], defs: dict[str, Any]
+    ) -> dict[str, Any]:
+        """
+        递归链接JSON Schema中的definitions字段
+        :param path: 当前路径
+        :param sub_schema: 子Schema
+        :param defs: Schema定义集
+        :return:
+        """
+        if isinstance(sub_schema, list):
+            # 如果当前Schema是列表，则遍历每个元素
+            for i in range(len(sub_schema)):
+                if isinstance(sub_schema[i], dict):
+                    sub_schema[i] = link_definitions_recursive(
+                        f"{path}/{str(i)}", sub_schema[i], defs
+                    )
+        else:
+            # 否则为字典
+            if "$defs" in sub_schema:
+                # 如果当前Schema有$def字段，则将其添加到defs中
+                key_prefix = f"{path}/$defs/"
+                for key, value in sub_schema["$defs"].items():
+                    def_key = key_prefix + key
+                    if def_key not in defs:
+                        defs[def_key] = value
+                del sub_schema["$defs"]
+            if "$ref" in sub_schema:
+                # 如果当前Schema有$ref字段，则将其替换为defs中的定义
+                def_key = sub_schema["$ref"]
+                if def_key in defs:
+                    sub_schema = defs[def_key]
+                else:
+                    raise ValueError(f"Schema中引用的定义'{def_key}'不存在")
+            # 遍历键值对
+            for key, value in sub_schema.items():
+                if isinstance(value, (dict, list)):
+                    # 如果当前值是字典或列表，则递归调用
+                    sub_schema[key] = link_definitions_recursive(
+                        f"{path}/{key}", value, defs
+                    )
+
+        return sub_schema
+
+    return link_definitions_recursive("#", schema, {})
+
+
+def _remove_defs(schema: dict[str, Any]) -> dict[str, Any]:
+    """
+    递归移除JSON Schema中的$defs字段
+    """
+    if isinstance(schema, list):
+        # 如果当前Schema是列表，则对所有dict/list子元素递归调用
+        for idx, item in enumerate(schema):
+            if isinstance(item, (dict, list)):
+                schema[idx] = _remove_title(item)
+    elif isinstance(schema, dict):
+        # 是字典，移除title字段，并对所有dict/list子元素递归调用
+        if "$defs" in schema:
+            del schema["$defs"]
+        for key, value in schema.items():
+            if isinstance(value, (dict, list)):
+                schema[key] = _remove_title(value)
+
+    return schema
+
+
+class RespFormat:
+    """
+    响应格式
+    """
+
+    @staticmethod
+    def _generate_schema_from_model(schema):
+        json_schema = {
+            "name": schema.__name__,
+            "schema": _remove_defs(
+                _link_definitions(_remove_title(schema.model_json_schema()))
+            ),
+            "strict": False,
+        }
+        if schema.__doc__:
+            json_schema["description"] = schema.__doc__
+        return json_schema
+
+    def __init__(
+        self,
+        format_type: RespFormatType = RespFormatType.TEXT,
+        schema: type | JsonSchema | None = None,
+    ):
+        """
+        响应格式
+        :param format_type: 响应格式类型（默认为文本）
+        :param schema: 模板类或JsonSchema（仅当format_type为JSON Schema时有效）
+        """
+        self.format_type: RespFormatType = format_type
+
+        if format_type == RespFormatType.JSON_SCHEMA:
+            if schema is None:
+                raise ValueError("当format_type为'JSON_SCHEMA'时，schema不能为空")
+            if isinstance(schema, dict):
+                if check_msg := _json_schema_type_check(schema):
+                    raise ValueError(f"schema格式不正确，{check_msg}")
+
+                self.schema = schema
+            elif issubclass(schema, BaseModel):
+                try:
+                    json_schema = self._generate_schema_from_model(schema)
+
+                    self.schema = json_schema
+                except Exception as e:
+                    raise ValueError(
+                        f"自动生成JSON Schema时发生异常，请检查模型类{schema.__name__}的定义，详细信息：\n"
+                        f"{schema.__name__}:\n"
+                    ) from e
+            else:
+                raise ValueError("schema必须是BaseModel的子类或JsonSchema")
+        else:
+            self.schema = None
+
+    def to_dict(self):
+        """
+        将响应格式转换为字典
+        :return: 字典
+        """
+        if self.schema:
+            return {
+                "format_type": self.format_type.value,
+                "schema": self.schema,
+            }
+        else:
+            return {
+                "format_type": self.format_type.value,
+            }
--- a/src/llm_models/payload_content/tool_option.py
+++ b/src/llm_models/payload_content/tool_option.py
@@ -0,0 +1,163 @@
+from enum import Enum
+
+
+class ToolParamType(Enum):
+    """
+    工具调用参数类型
+    """
+
+    STRING = "string"  # 字符串
+    INTEGER = "integer"  # 整型
+    FLOAT = "float"  # 浮点型
+    BOOLEAN = "bool"  # 布尔型
+
+
+class ToolParam:
+    """
+    工具调用参数
+    """
+
+    def __init__(
+        self,
+        name: str,
+        param_type: ToolParamType,
+        description: str,
+        required: bool,
+        enum_values: list[str] | None = None,
+    ):
+        """
+        初始化工具调用参数
+        （不应直接修改ToolParam类，而应使用ToolOptionBuilder类来构建对象）
+        :param name: 参数名称
+        :param param_type: 参数类型
+        :param description: 参数描述
+        :param required: 是否必填
+        """
+        self.name: str = name
+        self.param_type: ToolParamType = param_type
+        self.description: str = description
+        self.required: bool = required
+        self.enum_values: list[str] | None = enum_values
+
+
+class ToolOption:
+    """
+    工具调用项
+    """
+
+    def __init__(
+        self,
+        name: str,
+        description: str,
+        params: list[ToolParam] | None = None,
+    ):
+        """
+        初始化工具调用项
+        （不应直接修改ToolOption类，而应使用ToolOptionBuilder类来构建对象）
+        :param name: 工具名称
+        :param description: 工具描述
+        :param params: 工具参数列表
+        """
+        self.name: str = name
+        self.description: str = description
+        self.params: list[ToolParam] | None = params
+
+
+class ToolOptionBuilder:
+    """
+    工具调用项构建器
+    """
+
+    def __init__(self):
+        self.__name: str = ""
+        self.__description: str = ""
+        self.__params: list[ToolParam] = []
+
+    def set_name(self, name: str) -> "ToolOptionBuilder":
+        """
+        设置工具名称
+        :param name: 工具名称
+        :return: ToolBuilder实例
+        """
+        if not name:
+            raise ValueError("工具名称不能为空")
+        self.__name = name
+        return self
+
+    def set_description(self, description: str) -> "ToolOptionBuilder":
+        """
+        设置工具描述
+        :param description: 工具描述
+        :return: ToolBuilder实例
+        """
+        if not description:
+            raise ValueError("工具描述不能为空")
+        self.__description = description
+        return self
+
+    def add_param(
+        self,
+        name: str,
+        param_type: ToolParamType,
+        description: str,
+        required: bool = False,
+        enum_values: list[str] | None = None,
+    ) -> "ToolOptionBuilder":
+        """
+        添加工具参数
+        :param name: 参数名称
+        :param param_type: 参数类型
+        :param description: 参数描述
+        :param required: 是否必填（默认为False）
+        :return: ToolBuilder实例
+        """
+        if not name or not description:
+            raise ValueError("参数名称/描述不能为空")
+
+        self.__params.append(
+            ToolParam(
+                name=name,
+                param_type=param_type,
+                description=description,
+                required=required,
+                enum_values=enum_values,
+            )
+        )
+
+        return self
+
+    def build(self):
+        """
+        构建工具调用项
+        :return: 工具调用项
+        """
+        if self.__name == "" or self.__description == "":
+            raise ValueError("工具名称/描述不能为空")
+
+        return ToolOption(
+            name=self.__name,
+            description=self.__description,
+            params=None if len(self.__params) == 0 else self.__params,
+        )
+
+
+class ToolCall:
+    """
+    来自模型反馈的工具调用
+    """
+
+    def __init__(
+        self,
+        call_id: str,
+        func_name: str,
+        args: dict | None = None,
+    ):
+        """
+        初始化工具调用
+        :param call_id: 工具调用ID
+        :param func_name: 要调用的函数名称
+        :param args: 工具调用参数
+        """
+        self.call_id: str = call_id
+        self.func_name: str = func_name
+        self.args: dict | None = args
--- a/src/llm_models/utils.py
+++ b/src/llm_models/utils.py
@@ -0,0 +1,191 @@
+import base64
+import io
+
+from PIL import Image
+from datetime import datetime
+
+from src.common.logger import get_logger
+from src.common.database.sqlalchemy_models import LLMUsage, get_session
+from src.config.api_ada_configs import ModelInfo
+from .payload_content.message import Message, MessageBuilder
+from .model_client.base_client import UsageRecord
+
+logger = get_logger("消息压缩工具")
+
+
+def compress_messages(messages: list[Message], img_target_size: int = 1 * 1024 * 1024) -> list[Message]:
+    """
+    压缩消息列表中的图片
+    :param messages: 消息列表
+    :param img_target_size: 图片目标大小，默认1MB
+    :return: 压缩后的消息列表
+    """
+
+    def reformat_static_image(image_data: bytes) -> bytes:
+        """
+        将静态图片转换为JPEG格式
+        :param image_data: 图片数据
+        :return: 转换后的图片数据
+        """
+        try:
+            image = Image.open(image_data)
+
+            if image.format and (image.format.upper() in ["JPEG", "JPG", "PNG", "WEBP"]):
+                # 静态图像，转换为JPEG格式
+                reformated_image_data = io.BytesIO()
+                image.save(reformated_image_data, format="JPEG", quality=95, optimize=True)
+                image_data = reformated_image_data.getvalue()
+
+            return image_data
+        except Exception as e:
+            logger.error(f"图片转换格式失败: {str(e)}")
+            return image_data
+
+    def rescale_image(image_data: bytes, scale: float) -> tuple[bytes, tuple[int, int] | None, tuple[int, int] | None]:
+        """
+        缩放图片
+        :param image_data: 图片数据
+        :param scale: 缩放比例
+        :return: 缩放后的图片数据
+        """
+        try:
+            image = Image.open(image_data)
+
+            # 原始尺寸
+            original_size = (image.width, image.height)
+
+            # 计算新的尺寸
+            new_size = (int(original_size[0] * scale), int(original_size[1] * scale))
+
+            output_buffer = io.BytesIO()
+
+            if getattr(image, "is_animated", False):
+                # 动态图片，处理所有帧
+                frames = []
+                new_size = (new_size[0] // 2, new_size[1] // 2)  # 动图，缩放尺寸再打折
+                for frame_idx in range(getattr(image, "n_frames", 1)):
+                    image.seek(frame_idx)
+                    new_frame = image.copy()
+                    new_frame = new_frame.resize(new_size, Image.Resampling.LANCZOS)
+                    frames.append(new_frame)
+
+                # 保存到缓冲区
+                frames[0].save(
+                    output_buffer,
+                    format="GIF",
+                    save_all=True,
+                    append_images=frames[1:],
+                    optimize=True,
+                    duration=image.info.get("duration", 100),
+                    loop=image.info.get("loop", 0),
+                )
+            else:
+                # 静态图片，直接缩放保存
+                resized_image = image.resize(new_size, Image.Resampling.LANCZOS)
+                resized_image.save(output_buffer, format="JPEG", quality=95, optimize=True)
+
+            return output_buffer.getvalue(), original_size, new_size
+
+        except Exception as e:
+            logger.error(f"图片缩放失败: {str(e)}")
+            import traceback
+
+            logger.error(traceback.format_exc())
+            return image_data, None, None
+
+    def compress_base64_image(base64_data: str, target_size: int = 1 * 1024 * 1024) -> str:
+        original_b64_data_size = len(base64_data)  # 计算原始数据大小
+
+        image_data = base64.b64decode(base64_data)
+
+        # 先尝试转换格式为JPEG
+        image_data = reformat_static_image(image_data)
+        base64_data = base64.b64encode(image_data).decode("utf-8")
+        if len(base64_data) <= target_size:
+            # 如果转换后小于目标大小，直接返回
+            logger.info(f"成功将图片转为JPEG格式，编码后大小: {len(base64_data) / 1024:.1f}KB")
+            return base64_data
+
+        # 如果转换后仍然大于目标大小，进行尺寸压缩
+        scale = min(1.0, target_size / len(base64_data))
+        image_data, original_size, new_size = rescale_image(image_data, scale)
+        base64_data = base64.b64encode(image_data).decode("utf-8")
+
+        if original_size and new_size:
+            logger.info(
+                f"压缩图片: {original_size[0]}x{original_size[1]} -> {new_size[0]}x{new_size[1]}\n"
+                f"压缩前大小: {original_b64_data_size / 1024:.1f}KB, 压缩后大小: {len(base64_data) / 1024:.1f}KB"
+            )
+
+        return base64_data
+
+    compressed_messages = []
+    for message in messages:
+        if isinstance(message.content, list):
+            # 检查content，如有图片则压缩
+            message_builder = MessageBuilder()
+            for content_item in message.content:
+                if isinstance(content_item, tuple):
+                    # 图片，进行压缩
+                    message_builder.add_image_content(
+                        content_item[0],
+                        compress_base64_image(content_item[1], target_size=img_target_size),
+                    )
+                else:
+                    message_builder.add_text_content(content_item)
+            compressed_messages.append(message_builder.build())
+        else:
+            compressed_messages.append(message)
+
+    return compressed_messages
+
+
+class LLMUsageRecorder:
+    """
+    LLM使用情况记录器（SQLAlchemy版本）
+    """
+
+
+    def record_usage_to_database(
+        self, model_info: ModelInfo, model_usage: UsageRecord, user_id: str, request_type: str, endpoint: str
+    ):
+        input_cost = (model_usage.prompt_tokens / 1000000) * model_info.price_in
+        output_cost = (model_usage.completion_tokens / 1000000) * model_info.price_out
+        total_cost = round(input_cost + output_cost, 6)
+        
+        session = None
+        try:
+            # 使用 SQLAlchemy 会话创建记录
+            session = get_session()
+            
+            usage_record = LLMUsage(
+                model_name=model_info.model_identifier,
+                user_id=user_id,
+                request_type=request_type,
+                endpoint=endpoint,
+                prompt_tokens=model_usage.prompt_tokens or 0,
+                completion_tokens=model_usage.completion_tokens or 0,
+                total_tokens=model_usage.total_tokens or 0,
+                cost=total_cost or 0.0,
+                status="success",
+                timestamp=datetime.now(),  # SQLAlchemy 会处理 DateTime 字段
+            )
+            
+            session.add(usage_record)
+            session.commit()
+            
+            logger.debug(
+                f"Token使用情况 - 模型: {model_usage.model_name}, "
+                f"用户: {user_id}, 类型: {request_type}, "
+                f"提示词: {model_usage.prompt_tokens}, 完成: {model_usage.completion_tokens}, "
+                f"总计: {model_usage.total_tokens}"
+            )
+        except Exception as e:
+            if session:
+                session.rollback()
+            logger.error(f"记录token使用情况失败: {str(e)}")
+        finally:
+            if session:
+                session.close()
+
+llm_usage_recorder = LLMUsageRecorder()
--- a/src/llm_models/utils_model.py
+++ b/src/llm_models/utils_model.py
@@ -0,0 +1,525 @@
+import re
+import asyncio
+import time
+
+from enum import Enum
+from rich.traceback import install
+from typing import Tuple, List, Dict, Optional, Callable, Any
+
+from src.common.logger import get_logger
+from src.config.config import model_config
+from src.config.api_ada_configs import APIProvider, ModelInfo, TaskConfig
+from .payload_content.message import MessageBuilder, Message
+from .payload_content.resp_format import RespFormat
+from .payload_content.tool_option import ToolOption, ToolCall, ToolOptionBuilder, ToolParamType
+from .model_client.base_client import BaseClient, APIResponse, client_registry
+from .utils import compress_messages, llm_usage_recorder
+from .exceptions import NetworkConnectionError, ReqAbortException, RespNotOkException, RespParseException
+
+install(extra_lines=3)
+
+logger = get_logger("model_utils")
+
+# 常见Error Code Mapping
+error_code_mapping = {
+    400: "参数不正确",
+    401: "API key 错误，认证失败，请检查 config/model_config.toml 中的配置是否正确",
+    402: "账号余额不足",
+    403: "需要实名,或余额不足",
+    404: "Not Found",
+    429: "请求过于频繁，请稍后再试",
+    500: "服务器内部故障",
+    503: "服务器负载过高",
+}
+
+
+class RequestType(Enum):
+    """请求类型枚举"""
+
+    RESPONSE = "response"
+    EMBEDDING = "embedding"
+    AUDIO = "audio"
+
+
+class LLMRequest:
+    """LLM请求类"""
+
+    def __init__(self, model_set: TaskConfig, request_type: str = "") -> None:
+        self.task_name = request_type
+        self.model_for_task = model_set
+        self.request_type = request_type
+        self.model_usage: Dict[str, Tuple[int, int, int]] = {
+            model: (0, 0, 0) for model in self.model_for_task.model_list
+        }
+        """模型使用量记录，用于进行负载均衡，对应为(total_tokens, penalty, usage_penalty)，惩罚值是为了能在某个模型请求不给力或正在被使用的时候进行调整"""
+
+    async def generate_response_for_image(
+        self,
+        prompt: str,
+        image_base64: str,
+        image_format: str,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+    ) -> Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]:
+        """
+        为图像生成响应
+        Args:
+            prompt (str): 提示词
+            image_base64 (str): 图像的Base64编码字符串
+            image_format (str): 图像格式（如 'png', 'jpeg' 等）
+        Returns:
+            (Tuple[str, str, str, Optional[List[ToolCall]]]): 响应内容、推理内容、模型名称、工具调用列表
+        """
+        # 模型选择
+        model_info, api_provider, client = self._select_model()
+
+        # 请求体构建
+        message_builder = MessageBuilder()
+        message_builder.add_text_content(prompt)
+        message_builder.add_image_content(
+            image_base64=image_base64, image_format=image_format, support_formats=client.get_support_image_formats()
+        )
+        messages = [message_builder.build()]
+
+        # 请求并处理返回值
+        response = await self._execute_request(
+            api_provider=api_provider,
+            client=client,
+            request_type=RequestType.RESPONSE,
+            model_info=model_info,
+            message_list=messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+        )
+        content = response.content or ""
+        reasoning_content = response.reasoning_content or ""
+        tool_calls = response.tool_calls
+        # 从内容中提取<think>标签的推理内容（向后兼容）
+        if not reasoning_content and content:
+            content, extracted_reasoning = self._extract_reasoning(content)
+            reasoning_content = extracted_reasoning
+        if usage := response.usage:
+            llm_usage_recorder.record_usage_to_database(
+                model_info=model_info,
+                model_usage=usage,
+                user_id="system",
+                request_type=self.request_type,
+                endpoint="/chat/completions",
+            )
+        return content, (reasoning_content, model_info.name, tool_calls)
+
+    async def generate_response_for_voice(self, voice_base64: str) -> Optional[str]:
+        """
+        为语音生成响应
+        Args:
+            voice_base64 (str): 语音的Base64编码字符串
+        Returns:
+            (Optional[str]): 生成的文本描述或None
+        """
+        # 模型选择
+        model_info, api_provider, client = self._select_model()
+
+        # 请求并处理返回值
+        response = await self._execute_request(
+            api_provider=api_provider,
+            client=client,
+            request_type=RequestType.AUDIO,
+            model_info=model_info,
+            audio_base64=voice_base64,
+        )
+        return response.content or None
+
+    async def generate_response_async(
+        self,
+        prompt: str,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        tools: Optional[List[Dict[str, Any]]] = None,
+        raise_when_empty: bool = True,
+    ) -> Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]:
+        """
+        异步生成响应
+        Args:
+            prompt (str): 提示词
+            temperature (float, optional): 温度参数
+            max_tokens (int, optional): 最大token数
+        Returns:
+            (Tuple[str, str, str, Optional[List[ToolCall]]]): 响应内容、推理内容、模型名称、工具调用列表
+        """
+        # 请求体构建
+        start_time = time.time()
+        
+        
+        
+        message_builder = MessageBuilder()
+        message_builder.add_text_content(prompt)
+        messages = [message_builder.build()]
+        
+        tool_built = self._build_tool_options(tools)
+        
+        # 模型选择
+        model_info, api_provider, client = self._select_model()
+        
+        # 请求并处理返回值
+        logger.debug(f"LLM选择耗时: {model_info.name} {time.time() - start_time}")
+        
+        response = await self._execute_request(
+            api_provider=api_provider,
+            client=client,
+            request_type=RequestType.RESPONSE,
+            model_info=model_info,
+            message_list=messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            tool_options=tool_built,
+        )
+        
+        
+        content = response.content
+        reasoning_content = response.reasoning_content or ""
+        tool_calls = response.tool_calls
+        # 从内容中提取<think>标签的推理内容（向后兼容）
+        if not reasoning_content and content:
+            content, extracted_reasoning = self._extract_reasoning(content)
+            reasoning_content = extracted_reasoning
+            
+        if usage := response.usage:
+            llm_usage_recorder.record_usage_to_database(
+                model_info=model_info,
+                model_usage=usage,
+                user_id="system",
+                request_type=self.request_type,
+                endpoint="/chat/completions",
+            )
+        
+        if not content:
+            if raise_when_empty:
+                logger.warning("生成的响应为空")
+                raise RuntimeError("生成的响应为空")
+            content = "生成的响应为空，请检查模型配置或输入内容是否正确"
+
+        return content, (reasoning_content, model_info.name, tool_calls)
+
+    async def get_embedding(self, embedding_input: str) -> Tuple[List[float], str]:
+        """获取嵌入向量
+        Args:
+            embedding_input (str): 获取嵌入的目标
+        Returns:
+            (Tuple[List[float], str]): (嵌入向量，使用的模型名称)
+        """
+        # 无需构建消息体，直接使用输入文本
+        model_info, api_provider, client = self._select_model()
+
+        # 请求并处理返回值
+        response = await self._execute_request(
+            api_provider=api_provider,
+            client=client,
+            request_type=RequestType.EMBEDDING,
+            model_info=model_info,
+            embedding_input=embedding_input,
+        )
+
+        embedding = response.embedding
+
+        if usage := response.usage:
+            llm_usage_recorder.record_usage_to_database(
+                model_info=model_info,
+                model_usage=usage,
+                user_id="system",
+                request_type=self.request_type,
+                endpoint="/embeddings",
+            )
+
+        if not embedding:
+            raise RuntimeError("获取embedding失败")
+
+        return embedding, model_info.name
+
+    def _select_model(self) -> Tuple[ModelInfo, APIProvider, BaseClient]:
+        """
+        根据总tokens和惩罚值选择的模型
+        """
+        least_used_model_name = min(
+            self.model_usage,
+            key=lambda k: self.model_usage[k][0] + self.model_usage[k][1] * 300 + self.model_usage[k][2] * 1000,
+        )
+        model_info = model_config.get_model_info(least_used_model_name)
+        api_provider = model_config.get_provider(model_info.api_provider)
+        client = client_registry.get_client_class_instance(api_provider)
+        logger.debug(f"选择请求模型: {model_info.name}")
+        total_tokens, penalty, usage_penalty = self.model_usage[model_info.name]
+        self.model_usage[model_info.name] = (total_tokens, penalty, usage_penalty + 1)  # 增加使用惩罚值防止连续使用
+        return model_info, api_provider, client
+
+    async def _execute_request(
+        self,
+        api_provider: APIProvider,
+        client: BaseClient,
+        request_type: RequestType,
+        model_info: ModelInfo,
+        message_list: List[Message] | None = None,
+        tool_options: list[ToolOption] | None = None,
+        response_format: RespFormat | None = None,
+        stream_response_handler: Optional[Callable] = None,
+        async_response_parser: Optional[Callable] = None,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        embedding_input: str = "",
+        audio_base64: str = "",
+    ) -> APIResponse:
+        """
+        实际执行请求的方法
+
+        包含了重试和异常处理逻辑
+        """
+        retry_remain = api_provider.max_retry
+        compressed_messages: Optional[List[Message]] = None
+        while retry_remain > 0:
+            try:
+                if request_type == RequestType.RESPONSE:
+                    assert message_list is not None, "message_list cannot be None for response requests"
+                    return await client.get_response(
+                        model_info=model_info,
+                        message_list=(compressed_messages or message_list),
+                        tool_options=tool_options,
+                        max_tokens=self.model_for_task.max_tokens if max_tokens is None else max_tokens,
+                        temperature=self.model_for_task.temperature if temperature is None else temperature,
+                        response_format=response_format,
+                        stream_response_handler=stream_response_handler,
+                        async_response_parser=async_response_parser,
+                        extra_params=model_info.extra_params,
+                    )
+                elif request_type == RequestType.EMBEDDING:
+                    assert embedding_input, "embedding_input cannot be empty for embedding requests"
+                    return await client.get_embedding(
+                        model_info=model_info,
+                        embedding_input=embedding_input,
+                        extra_params=model_info.extra_params,
+                    )
+                elif request_type == RequestType.AUDIO:
+                    assert audio_base64 is not None, "audio_base64 cannot be None for audio requests"
+                    return await client.get_audio_transcriptions(
+                        model_info=model_info,
+                        audio_base64=audio_base64,
+                        extra_params=model_info.extra_params,
+                    )
+            except Exception as e:
+                logger.debug(f"请求失败: {str(e)}")
+                # 处理异常
+                total_tokens, penalty, usage_penalty = self.model_usage[model_info.name]
+                self.model_usage[model_info.name] = (total_tokens, penalty + 1, usage_penalty)
+
+                wait_interval, compressed_messages = self._default_exception_handler(
+                    e,
+                    self.task_name,
+                    model_name=model_info.name,
+                    remain_try=retry_remain,
+                    retry_interval=api_provider.retry_interval,
+                    messages=(message_list, compressed_messages is not None) if message_list else None,
+                )
+
+                if wait_interval == -1:
+                    retry_remain = 0  # 不再重试
+                elif wait_interval > 0:
+                    logger.info(f"等待 {wait_interval} 秒后重试...")
+                    await asyncio.sleep(wait_interval)
+            finally:
+                # 放在finally防止死循环
+                retry_remain -= 1
+        total_tokens, penalty, usage_penalty = self.model_usage[model_info.name]
+        self.model_usage[model_info.name] = (total_tokens, penalty, usage_penalty - 1)  # 使用结束，减少使用惩罚值
+        logger.error(f"模型 '{model_info.name}' 请求失败，达到最大重试次数 {api_provider.max_retry} 次")
+        raise RuntimeError("请求失败，已达到最大重试次数")
+
+    def _default_exception_handler(
+        self,
+        e: Exception,
+        task_name: str,
+        model_name: str,
+        remain_try: int,
+        retry_interval: int = 10,
+        messages: Tuple[List[Message], bool] | None = None,
+    ) -> Tuple[int, List[Message] | None]:
+        """
+        默认异常处理函数
+        Args:
+            e (Exception): 异常对象
+            task_name (str): 任务名称
+            model_name (str): 模型名称
+            remain_try (int): 剩余尝试次数
+            retry_interval (int): 重试间隔
+            messages (tuple[list[Message], bool] | None): (消息列表, 是否已压缩过)
+        Returns:
+            (等待间隔（如果为0则不等待，为-1则不再请求该模型）, 新的消息列表（适用于压缩消息）)
+        """
+
+        if isinstance(e, NetworkConnectionError):  # 网络连接错误
+            return self._check_retry(
+                remain_try,
+                retry_interval,
+                can_retry_msg=f"任务-'{task_name}' 模型-'{model_name}': 连接异常，将于{retry_interval}秒后重试",
+                cannot_retry_msg=f"任务-'{task_name}' 模型-'{model_name}': 连接异常，超过最大重试次数，请检查网络连接状态或URL是否正确",
+            )
+        elif isinstance(e, ReqAbortException):
+            logger.warning(f"任务-'{task_name}' 模型-'{model_name}': 请求被中断，详细信息-{str(e.message)}")
+            return -1, None  # 不再重试请求该模型
+        elif isinstance(e, RespNotOkException):
+            return self._handle_resp_not_ok(
+                e,
+                task_name,
+                model_name,
+                remain_try,
+                retry_interval,
+                messages,
+            )
+        elif isinstance(e, RespParseException):
+            # 响应解析错误
+            logger.error(f"任务-'{task_name}' 模型-'{model_name}': 响应解析错误，错误信息-{e.message}")
+            logger.debug(f"附加内容: {str(e.ext_info)}")
+            return -1, None  # 不再重试请求该模型
+        else:
+            logger.error(f"任务-'{task_name}' 模型-'{model_name}': 未知异常，错误信息-{str(e)}")
+            return -1, None  # 不再重试请求该模型
+
+    def _check_retry(
+        self,
+        remain_try: int,
+        retry_interval: int,
+        can_retry_msg: str,
+        cannot_retry_msg: str,
+        can_retry_callable: Callable | None = None,
+        **kwargs,
+    ) -> Tuple[int, List[Message] | None]:
+        """辅助函数：检查是否可以重试
+        Args:
+            remain_try (int): 剩余尝试次数
+            retry_interval (int): 重试间隔
+            can_retry_msg (str): 可以重试时的提示信息
+            cannot_retry_msg (str): 不可以重试时的提示信息
+            can_retry_callable (Callable | None): 可以重试时调用的函数（如果有）
+            **kwargs: 其他参数
+
+        Returns:
+            (Tuple[int, List[Message] | None]): (等待间隔（如果为0则不等待，为-1则不再请求该模型）, 新的消息列表（适用于压缩消息）)
+        """
+        if remain_try > 0:
+            # 还有重试机会
+            logger.warning(f"{can_retry_msg}")
+            if can_retry_callable is not None:
+                return retry_interval, can_retry_callable(**kwargs)
+            else:
+                return retry_interval, None
+        else:
+            # 达到最大重试次数
+            logger.warning(f"{cannot_retry_msg}")
+            return -1, None  # 不再重试请求该模型
+
+    def _handle_resp_not_ok(
+        self,
+        e: RespNotOkException,
+        task_name: str,
+        model_name: str,
+        remain_try: int,
+        retry_interval: int = 10,
+        messages: tuple[list[Message], bool] | None = None,
+    ):
+        """
+        处理响应错误异常
+        Args:
+            e (RespNotOkException): 响应错误异常对象
+            task_name (str): 任务名称
+            model_name (str): 模型名称
+            remain_try (int): 剩余尝试次数
+            retry_interval (int): 重试间隔
+            messages (tuple[list[Message], bool] | None): (消息列表, 是否已压缩过)
+        Returns:
+            (等待间隔（如果为0则不等待，为-1则不再请求该模型）, 新的消息列表（适用于压缩消息）)
+        """
+        # 响应错误
+        if e.status_code in [400, 401, 402, 403, 404]:
+            # 客户端错误
+            logger.warning(
+                f"任务-'{task_name}' 模型-'{model_name}': 请求失败，错误代码-{e.status_code}，错误信息-{e.message}"
+            )
+            return -1, None  # 不再重试请求该模型
+        elif e.status_code == 413:
+            if messages and not messages[1]:
+                # 消息列表不为空且未压缩，尝试压缩消息
+                return self._check_retry(
+                    remain_try,
+                    0,
+                    can_retry_msg=f"任务-'{task_name}' 模型-'{model_name}': 请求体过大，尝试压缩消息后重试",
+                    cannot_retry_msg=f"任务-'{task_name}' 模型-'{model_name}': 请求体过大，压缩消息后仍然过大，放弃请求",
+                    can_retry_callable=compress_messages,
+                    messages=messages[0],
+                )
+            # 没有消息可压缩
+            logger.warning(f"任务-'{task_name}' 模型-'{model_name}': 请求体过大，无法压缩消息，放弃请求。")
+            return -1, None
+        elif e.status_code == 429:
+            # 请求过于频繁
+            return self._check_retry(
+                remain_try,
+                retry_interval,
+                can_retry_msg=f"任务-'{task_name}' 模型-'{model_name}': 请求过于频繁，将于{retry_interval}秒后重试",
+                cannot_retry_msg=f"任务-'{task_name}' 模型-'{model_name}': 请求过于频繁，超过最大重试次数，放弃请求",
+            )
+        elif e.status_code >= 500:
+            # 服务器错误
+            return self._check_retry(
+                remain_try,
+                retry_interval,
+                can_retry_msg=f"任务-'{task_name}' 模型-'{model_name}': 服务器错误，将于{retry_interval}秒后重试",
+                cannot_retry_msg=f"任务-'{task_name}' 模型-'{model_name}': 服务器错误，超过最大重试次数，请稍后再试",
+            )
+        else:
+            # 未知错误
+            logger.warning(
+                f"任务-'{task_name}' 模型-'{model_name}': 未知错误，错误代码-{e.status_code}，错误信息-{e.message}"
+            )
+            return -1, None
+
+    def _build_tool_options(self, tools: Optional[List[Dict[str, Any]]]) -> Optional[List[ToolOption]]:
+        # sourcery skip: extract-method
+        """构建工具选项列表"""
+        if not tools:
+            return None
+        tool_options: List[ToolOption] = []
+        for tool in tools:
+            tool_legal = True
+            tool_options_builder = ToolOptionBuilder()
+            tool_options_builder.set_name(tool.get("name", ""))
+            tool_options_builder.set_description(tool.get("description", ""))
+            parameters: List[Tuple[str, str, str, bool, List[str] | None]] = tool.get("parameters", [])
+            for param in parameters:
+                try:
+                    assert isinstance(param, tuple) and len(param) == 5, "参数必须是包含5个元素的元组"
+                    assert isinstance(param[0], str), "参数名称必须是字符串"
+                    assert isinstance(param[1], ToolParamType), "参数类型必须是ToolParamType枚举"
+                    assert isinstance(param[2], str), "参数描述必须是字符串"
+                    assert isinstance(param[3], bool), "参数是否必填必须是布尔值"
+                    assert isinstance(param[4], list) or param[4] is None, "参数枚举值必须是列表或None"
+                    tool_options_builder.add_param(
+                        name=param[0],
+                        param_type=param[1],
+                        description=param[2],
+                        required=param[3],
+                        enum_values=param[4],
+                    )
+                except AssertionError as ae:
+                    tool_legal = False
+                    logger.error(f"{param[0]} 参数定义错误: {str(ae)}")
+                except Exception as e:
+                    tool_legal = False
+                    logger.error(f"构建工具参数失败: {str(e)}")
+            if tool_legal:
+                tool_options.append(tool_options_builder.build())
+        return tool_options or None
+
+    @staticmethod
+    def _extract_reasoning(content: str) -> Tuple[str, str]:
+        """CoT思维链提取，向后兼容"""
+        match = re.search(r"(?:<think>)?(.*?)</think>", content, re.DOTALL)
+        content = re.sub(r"(?:<think>)?.*?</think>", "", content, flags=re.DOTALL, count=1).strip()
+        reasoning = match[1].strip() if match else ""
+        return content, reasoning