diff --git a/src/chat/utils/utils_voice.py b/src/chat/utils/utils_voice.py
index feab92cf0..1bc3e7dda 100644
--- a/src/chat/utils/utils_voice.py
+++ b/src/chat/utils/utils_voice.py
@@ -11,6 +11,9 @@ logger = get_logger("chat_voice")
 
 async def get_voice_text(voice_base64: str) -> str:
     """获取音频文件描述"""
+    if not global_config.chat.enable_asr:
+        logger.warning("语音识别未启用，无法处理语音消息")
+        return "[语音]"
     try:
         # 解码base64音频数据
         # 确保base64字符串只包含ASCII字符
diff --git a/src/config/official_configs.py b/src/config/official_configs.py
index 68d9468e1..be3ac1834 100644
--- a/src/config/official_configs.py
+++ b/src/config/official_configs.py
@@ -106,6 +106,9 @@ class ChatConfig(ConfigBase):
     focus_value: float = 1.0
     """麦麦的专注思考能力，越低越容易专注，消耗token也越多"""
 
+    enable_asr: bool = False
+    """是否启用语音识别"""
+
     def get_current_talk_frequency(self, chat_stream_id: Optional[str] = None) -> float:
         """
         根据当前时间和聊天流获取对应的 talk_frequency
diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py
index 511835c83..215b0f739 100644
--- a/src/llm_models/utils_model.py
+++ b/src/llm_models/utils_model.py
@@ -684,7 +684,7 @@ class LLMRequest:
         data.add_field(
             "file",io.BytesIO(file_bytes),
             filename=f"file.{file_format}",
-            content_type=f'{content_type_list[file_format]}' # 根据实际文件类型设置
+            content_type=f'{content_type}' # 根据实际文件类型设置
         )
         data.add_field(
             "model", self.model_name
diff --git a/template/bot_config_template.toml b/template/bot_config_template.toml
index 87110f329..3b21dae38 100644
--- a/template/bot_config_template.toml
+++ b/template/bot_config_template.toml
@@ -87,6 +87,7 @@ talk_frequency_adjust = [
 # - 时间支持跨天，例如 "00:10,0.3" 表示从凌晨0:10开始使用频率0.3
 # - 系统会自动将 "platform:id:type" 转换为内部的哈希chat_id进行匹配
 
+enable_asr = false # 是否启用语音识别，启用后麦麦可以通过语音输入进行对话，启用该功能需要配置语音识别模型[model.voice]
 
 [message_receive]
 # 以下是消息过滤，可以根据规则过滤特定消息，将不会读取这些消息