From 3d9f1a1d5ac69f8f4de2dc235fd73493851d5a82 Mon Sep 17 00:00:00 2001 From: Windpicker-owo <3431391539@qq.com> Date: Fri, 18 Jul 2025 13:02:38 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BA=86enable=5Fasr?= =?UTF-8?q?=E9=85=8D=E7=BD=AE=E9=80=89=E9=A1=B9=EF=BC=8C=E6=9B=B4=E6=94=B9?= =?UTF-8?q?=E4=B8=80=E5=A4=84=E6=BD=9C=E5=9C=A8=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chat/utils/utils_voice.py | 3 +++ src/config/official_configs.py | 3 +++ src/llm_models/utils_model.py | 2 +- template/bot_config_template.toml | 1 + 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/chat/utils/utils_voice.py b/src/chat/utils/utils_voice.py index feab92cf0..1bc3e7dda 100644 --- a/src/chat/utils/utils_voice.py +++ b/src/chat/utils/utils_voice.py @@ -11,6 +11,9 @@ logger = get_logger("chat_voice") async def get_voice_text(voice_base64: str) -> str: """获取音频文件描述""" + if not global_config.chat.enable_asr: + logger.warning("语音识别未启用,无法处理语音消息") + return "[语音]" try: # 解码base64音频数据 # 确保base64字符串只包含ASCII字符 diff --git a/src/config/official_configs.py b/src/config/official_configs.py index 68d9468e1..be3ac1834 100644 --- a/src/config/official_configs.py +++ b/src/config/official_configs.py @@ -106,6 +106,9 @@ class ChatConfig(ConfigBase): focus_value: float = 1.0 """麦麦的专注思考能力,越低越容易专注,消耗token也越多""" + enable_asr: bool = False + """是否启用语音识别""" + def get_current_talk_frequency(self, chat_stream_id: Optional[str] = None) -> float: """ 根据当前时间和聊天流获取对应的 talk_frequency diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py index 511835c83..215b0f739 100644 --- a/src/llm_models/utils_model.py +++ b/src/llm_models/utils_model.py @@ -684,7 +684,7 @@ class LLMRequest: data.add_field( "file",io.BytesIO(file_bytes), filename=f"file.{file_format}", - content_type=f'{content_type_list[file_format]}' # 根据实际文件类型设置 + content_type=f'{content_type}' # 根据实际文件类型设置 ) data.add_field( "model", self.model_name diff --git a/template/bot_config_template.toml b/template/bot_config_template.toml index 87110f329..3b21dae38 100644 --- a/template/bot_config_template.toml +++ b/template/bot_config_template.toml @@ -87,6 +87,7 @@ talk_frequency_adjust = [ # - 时间支持跨天,例如 "00:10,0.3" 表示从凌晨0:10开始使用频率0.3 # - 系统会自动将 "platform:id:type" 转换为内部的哈希chat_id进行匹配 +enable_asr = false # 是否启用语音识别,启用后麦麦可以通过语音输入进行对话,启用该功能需要配置语音识别模型[model.voice] [message_receive] # 以下是消息过滤,可以根据规则过滤特定消息,将不会读取这些消息