增加对voice类型消息的支持

This commit is contained in:
Windpicker-owo
2025-07-17 14:50:19 +08:00
parent 8768b5d31b
commit 587aca4d18
5 changed files with 157 additions and 27 deletions

View File

@@ -9,6 +9,7 @@ from maim_message import Seg, UserInfo, BaseMessageInfo, MessageBase
from src.common.logger import get_logger
from src.chat.utils.utils_image import get_image_manager
from src.chat.utils.utils_voice import get_voice_text
from .chat_stream import ChatStream
install(extra_lines=3)
@@ -106,6 +107,7 @@ class MessageRecv(Message):
self.has_emoji = False
self.is_picid = False
self.has_picid = False
self.is_voice = False
self.is_mentioned = None
self.is_command = False
@@ -156,6 +158,14 @@ class MessageRecv(Message):
if isinstance(segment.data, str):
return await get_image_manager().get_emoji_description(segment.data)
return "[发了一个表情包,网卡了加载不出来]"
elif segment.type == "voice":
self.has_picid = False
self.is_picid = False
self.is_emoji = False
self.is_voice == True
if isinstance(segment.data, str):
return await get_voice_text(segment.data)
return "[发了一段语音,网卡了加载不出来]"
elif segment.type == "mention_bot":
self.is_picid = False
self.is_emoji = False
@@ -233,6 +243,14 @@ class MessageRecvS4U(MessageRecv):
if isinstance(segment.data, str):
return await get_image_manager().get_emoji_description(segment.data)
return "[发了一个表情包,网卡了加载不出来]"
elif segment.type == "voice":
self.has_picid = False
self.is_picid = False
self.is_emoji = False
self.is_voice == True
if isinstance(segment.data, str):
return await get_voice_text(segment.data)
return "[发了一段语音,网卡了加载不出来]"
elif segment.type == "mention_bot":
self.is_picid = False
self.is_emoji = False
@@ -343,6 +361,10 @@ class MessageProcessBase(Message):
if isinstance(seg.data, str):
return await get_image_manager().get_emoji_description(seg.data)
return "[表情,网卡了加载不出来]"
elif seg.type == "voice":
if isinstance(seg.data, str):
return await get_voice_text(seg.data)
return "[发了一段语音,网卡了加载不出来]"
elif seg.type == "at":
return f"[@{seg.data}]"
elif seg.type == "reply":

View File

@@ -0,0 +1,46 @@
import base64
import os
import time
import hashlib
import uuid
from typing import Optional, Tuple
from PIL import Image
import io
import numpy as np
import asyncio
from src.common.database.database import db
from src.common.database.database_model import Images, ImageDescriptions
from src.config.config import global_config
from src.llm_models.utils_model import LLMRequest
from src.common.logger import get_logger
from rich.traceback import install
import traceback
install(extra_lines=3)
logger = get_logger("chat_voice")
async def get_voice_text(voice_base64: str) -> str:
"""获取音频文件描述"""
try:
# 计算图片哈希
# 确保base64字符串只包含ASCII字符
if isinstance(voice_base64, str):
voice_base64 = voice_base64.encode("ascii", errors="ignore").decode("ascii")
voice_bytes = base64.b64decode(voice_base64)
_llm = LLMRequest(model=global_config.model.voice, request_type="voice")
text = await _llm.generate_response_for_voice(voice_bytes)
if text is None:
logger.warning("未能生成语音文本")
return "[语音(文本生成失败)]"
logger.debug(f"描述是{text}")
return f"[语音:{text}]"
except Exception as e:
traceback.print_exc()
logger.error(f"语音转文字失败: {str(e)}")
return "[语音]"