修复了语音识别功能

This commit is contained in:
Windpicker-owo
2025-08-01 03:32:00 +08:00
parent 17d6aeefab
commit 25cb8d41bb
5 changed files with 99 additions and 10 deletions

View File

@@ -15,13 +15,8 @@ async def get_voice_text(voice_base64: str) -> str:
logger.warning("语音识别未启用,无法处理语音消息")
return "[语音]"
try:
# 解码base64音频数据
# 确保base64字符串只包含ASCII字符
if isinstance(voice_base64, str):
voice_base64 = voice_base64.encode("ascii", errors="ignore").decode("ascii")
voice_bytes = base64.b64decode(voice_base64)
_llm = LLMRequest(model_set=model_config.model_task_config.voice, request_type="voice")
text = await _llm.generate_response_for_voice(voice_bytes)
text = await _llm.generate_response_for_voice(voice_base64)
if text is None:
logger.warning("未能生成语音文本")
return "[语音(文本生成失败)]"

View File

@@ -113,6 +113,20 @@ class BaseClient:
:return: 嵌入响应
"""
raise RuntimeError("This method should be overridden in subclasses")
async def get_audio_transcriptions(
self,
model_info: ModelInfo,
message_list: list[Message],
extra_params: dict[str, Any] | None = None,
) -> APIResponse:
"""
获取音频转录
:param model_info: 模型信息
:param message_list: 消息列表,包含音频内容
:return: 音频转录响应
"""
raise RuntimeError("This method should be overridden in subclasses")
class ClientRegistry:

View File

@@ -532,3 +532,37 @@ class OpenaiClient(BaseClient):
)
return response
async def get_audio_transcriptions(
self,
model_info: ModelInfo,
message_list: list[Message],
extra_params: dict[str, Any] | None = None,
) -> APIResponse:
"""
获取音频转录
:param model_info: 模型信息
:param audio_base64: 音频的base64编码
:return: 转录响应
"""
try:
raw_response = await self.client.audio.transcriptions.create(
model=model_info.model_identifier,
file=message_list[0].content[0],
extra_body=extra_params
)
except APIConnectionError as e:
raise NetworkConnectionError() from e
except APIStatusError as e:
# 重封装APIError为RespNotOkException
raise RespNotOkException(e.status_code) from e
response = APIResponse()
# 解析转录响应
if hasattr(raw_response, "text"):
response.content = raw_response.text
else:
raise RespParseException(
raw_response,
"响应解析失败,缺失转录文本。",
)
return response

View File

@@ -1,5 +1,6 @@
import base64
from enum import Enum
from io import BytesIO
# 设计这系列类的目的是为未来可能的扩展做准备
@@ -54,6 +55,20 @@ class MessageBuilder:
self.__content.append(text)
return self
def add_file_content(
self, file_name: str, file_base64: str
) -> "MessageBuilder":
"""
添加文件内容
:param file_name: 文件名(包含类型后缀)
:param file_base64: 文件的base64编码
:return: MessageBuilder对象
"""
if not file_name or not file_base64:
raise ValueError("文件名和base64编码不能为空")
self.__content.append((file_name, BytesIO(base64.b64decode(file_base64))))
return self
def add_image_content(
self, image_format: str, image_base64: str
) -> "MessageBuilder":

View File

@@ -38,7 +38,7 @@ class RequestType(Enum):
RESPONSE = "response"
EMBEDDING = "embedding"
AUDIO = "audio"
class LLMRequest:
"""LLM请求类"""
@@ -106,8 +106,32 @@ class LLMRequest:
)
return content, (reasoning_content, model_info.name, tool_calls)
async def generate_response_for_voice(self):
pass
async def generate_response_for_voice(self, voice_base64: str) -> Optional[str]:
"""
为语音生成响应
Args:
voice_base64 (str): 语音的Base64编码字符串
Returns:
(Optional[str]): 生成的文本描述或None
"""
# 请求体构建
message_builder = MessageBuilder()
message_builder.add_file_content(file_name="audio.wav", file_base64=voice_base64)
messages = [message_builder.build()]
# 模型选择
model_info, api_provider, client = self._select_model()
# 请求并处理返回值
response = await self._execute_request(
api_provider=api_provider,
client=client,
request_type=RequestType.AUDIO,
model_info=model_info,
message_list=messages,
)
return response.content or None
async def generate_response_async(
self,
@@ -255,6 +279,13 @@ class LLMRequest:
embedding_input=embedding_input,
extra_params=model_info.extra_params,
)
elif request_type == RequestType.AUDIO:
assert message_list is not None, "message_list cannot be None for audio requests"
return await client.get_audio_transcriptions(
model_info=model_info,
message_list=message_list,
extra_params=model_info.extra_params,
)
except Exception as e:
logger.debug(f"请求失败: {str(e)}")
# 处理异常