diff --git a/src/llm_models/model_client/base_client.py b/src/llm_models/model_client/base_client.py index 1bc653699..b06f846a4 100644 --- a/src/llm_models/model_client/base_client.py +++ b/src/llm_models/model_client/base_client.py @@ -117,13 +117,14 @@ class BaseClient: async def get_audio_transcriptions( self, model_info: ModelInfo, - message_list: list[Message], + audio_base64: str, extra_params: dict[str, Any] | None = None, ) -> APIResponse: """ 获取音频转录 :param model_info: 模型信息 - :param message_list: 消息列表,包含音频内容 + :param audio_base64: base64编码的音频数据 + :extra_params: 附加的请求参数 :return: 音频转录响应 """ raise RuntimeError("This method should be overridden in subclasses") diff --git a/src/llm_models/model_client/openai_client.py b/src/llm_models/model_client/openai_client.py index 1bcd54bf2..d7a923faf 100644 --- a/src/llm_models/model_client/openai_client.py +++ b/src/llm_models/model_client/openai_client.py @@ -2,6 +2,7 @@ import asyncio import io import json import re +import base64 from collections.abc import Iterable from typing import Callable, Any, Coroutine, Optional from json_repair import repair_json @@ -536,19 +537,20 @@ class OpenaiClient(BaseClient): async def get_audio_transcriptions( self, model_info: ModelInfo, - message_list: list[Message], + audio_base64: str, extra_params: dict[str, Any] | None = None, ) -> APIResponse: """ 获取音频转录 :param model_info: 模型信息 - :param message_list: 消息列表,包含音频内容 - :return: 转录响应 + :param audio_base64: base64编码的音频数据 + :extra_params: 附加的请求参数 + :return: 音频转录响应 """ try: raw_response = await self.client.audio.transcriptions.create( model=model_info.model_identifier, - file=message_list[0].content[0], + file=("audio.wav", io.BytesIO(base64.b64decode(audio_base64))), extra_body=extra_params ) except APIConnectionError as e: diff --git a/src/llm_models/payload_content/message.py b/src/llm_models/payload_content/message.py index 71ab67389..e07f473b8 100644 --- a/src/llm_models/payload_content/message.py +++ b/src/llm_models/payload_content/message.py @@ -1,6 +1,5 @@ -import base64 from enum import Enum -from io import BytesIO + # 设计这系列类的目的是为未来可能的扩展做准备 @@ -34,7 +33,7 @@ class Message: class MessageBuilder: def __init__(self): self.__role: RoleType = RoleType.User - self.__content: list[tuple[str, str] | str | tuple[str, BytesIO]] = [] + self.__content: list[tuple[str, str] | str] = [] self.__tool_call_id: str | None = None def set_role(self, role: RoleType = RoleType.User) -> "MessageBuilder": @@ -54,20 +53,6 @@ class MessageBuilder: """ self.__content.append(text) return self - - def add_file_content( - self, file_name: str, file_base64: str - ) -> "MessageBuilder": - """ - 添加文件内容 - :param file_name: 文件名(包含类型后缀) - :param file_base64: 文件的base64编码 - :return: MessageBuilder对象 - """ - if not file_name or not file_base64: - raise ValueError("文件名和base64编码不能为空") - self.__content.append((file_name, BytesIO(base64.b64decode(file_base64)))) - return self def add_image_content( self, image_format: str, image_base64: str diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py index 8e9bafeb4..53cc7aaae 100644 --- a/src/llm_models/utils_model.py +++ b/src/llm_models/utils_model.py @@ -114,11 +114,6 @@ class LLMRequest: Returns: (Optional[str]): 生成的文本描述或None """ - # 请求体构建 - message_builder = MessageBuilder() - message_builder.add_file_content(file_name="audio.wav", file_base64=voice_base64) - messages = [message_builder.build()] - # 模型选择 model_info, api_provider, client = self._select_model() @@ -128,7 +123,7 @@ class LLMRequest: client=client, request_type=RequestType.AUDIO, model_info=model_info, - message_list=messages, + audio_base64=voice_base64, ) return response.content or None @@ -249,6 +244,7 @@ class LLMRequest: temperature: Optional[float] = None, max_tokens: Optional[int] = None, embedding_input: str = "", + audio_base64: str = "" ) -> APIResponse: """ 实际执行请求的方法 @@ -283,7 +279,7 @@ class LLMRequest: assert message_list is not None, "message_list cannot be None for audio requests" return await client.get_audio_transcriptions( model_info=model_info, - message_list=message_list, + audio_base64=audio_base64, extra_params=model_info.extra_params, ) except Exception as e: