增加对voice类型消息的支持
This commit is contained in:
@@ -9,6 +9,7 @@ from maim_message import Seg, UserInfo, BaseMessageInfo, MessageBase
|
||||
|
||||
from src.common.logger import get_logger
|
||||
from src.chat.utils.utils_image import get_image_manager
|
||||
from src.chat.utils.utils_voice import get_voice_text
|
||||
from .chat_stream import ChatStream
|
||||
|
||||
install(extra_lines=3)
|
||||
@@ -106,6 +107,7 @@ class MessageRecv(Message):
|
||||
self.has_emoji = False
|
||||
self.is_picid = False
|
||||
self.has_picid = False
|
||||
self.is_voice = False
|
||||
self.is_mentioned = None
|
||||
|
||||
self.is_command = False
|
||||
@@ -156,6 +158,14 @@ class MessageRecv(Message):
|
||||
if isinstance(segment.data, str):
|
||||
return await get_image_manager().get_emoji_description(segment.data)
|
||||
return "[发了一个表情包,网卡了加载不出来]"
|
||||
elif segment.type == "voice":
|
||||
self.has_picid = False
|
||||
self.is_picid = False
|
||||
self.is_emoji = False
|
||||
self.is_voice == True
|
||||
if isinstance(segment.data, str):
|
||||
return await get_voice_text(segment.data)
|
||||
return "[发了一段语音,网卡了加载不出来]"
|
||||
elif segment.type == "mention_bot":
|
||||
self.is_picid = False
|
||||
self.is_emoji = False
|
||||
@@ -233,6 +243,14 @@ class MessageRecvS4U(MessageRecv):
|
||||
if isinstance(segment.data, str):
|
||||
return await get_image_manager().get_emoji_description(segment.data)
|
||||
return "[发了一个表情包,网卡了加载不出来]"
|
||||
elif segment.type == "voice":
|
||||
self.has_picid = False
|
||||
self.is_picid = False
|
||||
self.is_emoji = False
|
||||
self.is_voice == True
|
||||
if isinstance(segment.data, str):
|
||||
return await get_voice_text(segment.data)
|
||||
return "[发了一段语音,网卡了加载不出来]"
|
||||
elif segment.type == "mention_bot":
|
||||
self.is_picid = False
|
||||
self.is_emoji = False
|
||||
@@ -343,6 +361,10 @@ class MessageProcessBase(Message):
|
||||
if isinstance(seg.data, str):
|
||||
return await get_image_manager().get_emoji_description(seg.data)
|
||||
return "[表情,网卡了加载不出来]"
|
||||
elif seg.type == "voice":
|
||||
if isinstance(seg.data, str):
|
||||
return await get_voice_text(seg.data)
|
||||
return "[发了一段语音,网卡了加载不出来]"
|
||||
elif seg.type == "at":
|
||||
return f"[@{seg.data}]"
|
||||
elif seg.type == "reply":
|
||||
|
||||
46
src/chat/utils/utils_voice.py
Normal file
46
src/chat/utils/utils_voice.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import base64
|
||||
import os
|
||||
import time
|
||||
import hashlib
|
||||
import uuid
|
||||
from typing import Optional, Tuple
|
||||
from PIL import Image
|
||||
import io
|
||||
import numpy as np
|
||||
import asyncio
|
||||
|
||||
|
||||
from src.common.database.database import db
|
||||
from src.common.database.database_model import Images, ImageDescriptions
|
||||
from src.config.config import global_config
|
||||
from src.llm_models.utils_model import LLMRequest
|
||||
|
||||
from src.common.logger import get_logger
|
||||
from rich.traceback import install
|
||||
import traceback
|
||||
install(extra_lines=3)
|
||||
|
||||
logger = get_logger("chat_voice")
|
||||
|
||||
async def get_voice_text(voice_base64: str) -> str:
|
||||
"""获取音频文件描述"""
|
||||
try:
|
||||
# 计算图片哈希
|
||||
# 确保base64字符串只包含ASCII字符
|
||||
if isinstance(voice_base64, str):
|
||||
voice_base64 = voice_base64.encode("ascii", errors="ignore").decode("ascii")
|
||||
voice_bytes = base64.b64decode(voice_base64)
|
||||
_llm = LLMRequest(model=global_config.model.voice, request_type="voice")
|
||||
text = await _llm.generate_response_for_voice(voice_bytes)
|
||||
if text is None:
|
||||
logger.warning("未能生成语音文本")
|
||||
return "[语音(文本生成失败)]"
|
||||
|
||||
logger.debug(f"描述是{text}")
|
||||
|
||||
return f"[语音:{text}]"
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
logger.error(f"语音转文字失败: {str(e)}")
|
||||
return "[语音]"
|
||||
|
||||
Reference in New Issue
Block a user