@@ -9,6 +9,7 @@ from maim_message import Seg, UserInfo, BaseMessageInfo, MessageBase
|
|||||||
|
|
||||||
from src.common.logger import get_logger
|
from src.common.logger import get_logger
|
||||||
from src.chat.utils.utils_image import get_image_manager
|
from src.chat.utils.utils_image import get_image_manager
|
||||||
|
from src.chat.utils.utils_voice import get_voice_text
|
||||||
from .chat_stream import ChatStream
|
from .chat_stream import ChatStream
|
||||||
|
|
||||||
install(extra_lines=3)
|
install(extra_lines=3)
|
||||||
@@ -106,6 +107,7 @@ class MessageRecv(Message):
|
|||||||
self.has_emoji = False
|
self.has_emoji = False
|
||||||
self.is_picid = False
|
self.is_picid = False
|
||||||
self.has_picid = False
|
self.has_picid = False
|
||||||
|
self.is_voice = False
|
||||||
self.is_mentioned = None
|
self.is_mentioned = None
|
||||||
|
|
||||||
self.is_command = False
|
self.is_command = False
|
||||||
@@ -153,17 +155,27 @@ class MessageRecv(Message):
|
|||||||
self.has_emoji = True
|
self.has_emoji = True
|
||||||
self.is_emoji = True
|
self.is_emoji = True
|
||||||
self.is_picid = False
|
self.is_picid = False
|
||||||
|
self.is_voice = False
|
||||||
if isinstance(segment.data, str):
|
if isinstance(segment.data, str):
|
||||||
return await get_image_manager().get_emoji_description(segment.data)
|
return await get_image_manager().get_emoji_description(segment.data)
|
||||||
return "[发了一个表情包,网卡了加载不出来]"
|
return "[发了一个表情包,网卡了加载不出来]"
|
||||||
|
elif segment.type == "voice":
|
||||||
|
self.is_picid = False
|
||||||
|
self.is_emoji = False
|
||||||
|
self.is_voice = True
|
||||||
|
if isinstance(segment.data, str):
|
||||||
|
return await get_voice_text(segment.data)
|
||||||
|
return "[发了一段语音,网卡了加载不出来]"
|
||||||
elif segment.type == "mention_bot":
|
elif segment.type == "mention_bot":
|
||||||
self.is_picid = False
|
self.is_picid = False
|
||||||
self.is_emoji = False
|
self.is_emoji = False
|
||||||
|
self.is_voice = False
|
||||||
self.is_mentioned = float(segment.data) # type: ignore
|
self.is_mentioned = float(segment.data) # type: ignore
|
||||||
return ""
|
return ""
|
||||||
elif segment.type == "priority_info":
|
elif segment.type == "priority_info":
|
||||||
self.is_picid = False
|
self.is_picid = False
|
||||||
self.is_emoji = False
|
self.is_emoji = False
|
||||||
|
self.is_voice = False
|
||||||
if isinstance(segment.data, dict):
|
if isinstance(segment.data, dict):
|
||||||
# 处理优先级信息
|
# 处理优先级信息
|
||||||
self.priority_mode = "priority"
|
self.priority_mode = "priority"
|
||||||
@@ -212,10 +224,12 @@ class MessageRecvS4U(MessageRecv):
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
if segment.type == "text":
|
if segment.type == "text":
|
||||||
|
self.is_voice = False
|
||||||
self.is_picid = False
|
self.is_picid = False
|
||||||
self.is_emoji = False
|
self.is_emoji = False
|
||||||
return segment.data # type: ignore
|
return segment.data # type: ignore
|
||||||
elif segment.type == "image":
|
elif segment.type == "image":
|
||||||
|
self.is_voice = False
|
||||||
# 如果是base64图片数据
|
# 如果是base64图片数据
|
||||||
if isinstance(segment.data, str):
|
if isinstance(segment.data, str):
|
||||||
self.has_picid = True
|
self.has_picid = True
|
||||||
@@ -233,12 +247,22 @@ class MessageRecvS4U(MessageRecv):
|
|||||||
if isinstance(segment.data, str):
|
if isinstance(segment.data, str):
|
||||||
return await get_image_manager().get_emoji_description(segment.data)
|
return await get_image_manager().get_emoji_description(segment.data)
|
||||||
return "[发了一个表情包,网卡了加载不出来]"
|
return "[发了一个表情包,网卡了加载不出来]"
|
||||||
|
elif segment.type == "voice":
|
||||||
|
self.has_picid = False
|
||||||
|
self.is_picid = False
|
||||||
|
self.is_emoji = False
|
||||||
|
self.is_voice = True
|
||||||
|
if isinstance(segment.data, str):
|
||||||
|
return await get_voice_text(segment.data)
|
||||||
|
return "[发了一段语音,网卡了加载不出来]"
|
||||||
elif segment.type == "mention_bot":
|
elif segment.type == "mention_bot":
|
||||||
|
self.is_voice = False
|
||||||
self.is_picid = False
|
self.is_picid = False
|
||||||
self.is_emoji = False
|
self.is_emoji = False
|
||||||
self.is_mentioned = float(segment.data) # type: ignore
|
self.is_mentioned = float(segment.data) # type: ignore
|
||||||
return ""
|
return ""
|
||||||
elif segment.type == "priority_info":
|
elif segment.type == "priority_info":
|
||||||
|
self.is_voice = False
|
||||||
self.is_picid = False
|
self.is_picid = False
|
||||||
self.is_emoji = False
|
self.is_emoji = False
|
||||||
if isinstance(segment.data, dict):
|
if isinstance(segment.data, dict):
|
||||||
@@ -253,6 +277,7 @@ class MessageRecvS4U(MessageRecv):
|
|||||||
"""
|
"""
|
||||||
return ""
|
return ""
|
||||||
elif segment.type == "gift":
|
elif segment.type == "gift":
|
||||||
|
self.is_voice = False
|
||||||
self.is_gift = True
|
self.is_gift = True
|
||||||
# 解析gift_info,格式为"名称:数量"
|
# 解析gift_info,格式为"名称:数量"
|
||||||
name, count = segment.data.split(":", 1) # type: ignore
|
name, count = segment.data.split(":", 1) # type: ignore
|
||||||
@@ -343,6 +368,10 @@ class MessageProcessBase(Message):
|
|||||||
if isinstance(seg.data, str):
|
if isinstance(seg.data, str):
|
||||||
return await get_image_manager().get_emoji_description(seg.data)
|
return await get_image_manager().get_emoji_description(seg.data)
|
||||||
return "[表情,网卡了加载不出来]"
|
return "[表情,网卡了加载不出来]"
|
||||||
|
elif seg.type == "voice":
|
||||||
|
if isinstance(seg.data, str):
|
||||||
|
return await get_voice_text(seg.data)
|
||||||
|
return "[发了一段语音,网卡了加载不出来]"
|
||||||
elif seg.type == "at":
|
elif seg.type == "at":
|
||||||
return f"[@{seg.data}]"
|
return f"[@{seg.data}]"
|
||||||
elif seg.type == "reply":
|
elif seg.type == "reply":
|
||||||
|
|||||||
35
src/chat/utils/utils_voice.py
Normal file
35
src/chat/utils/utils_voice.py
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
import base64
|
||||||
|
|
||||||
|
from src.config.config import global_config
|
||||||
|
from src.llm_models.utils_model import LLMRequest
|
||||||
|
|
||||||
|
from src.common.logger import get_logger
|
||||||
|
from rich.traceback import install
|
||||||
|
install(extra_lines=3)
|
||||||
|
|
||||||
|
logger = get_logger("chat_voice")
|
||||||
|
|
||||||
|
async def get_voice_text(voice_base64: str) -> str:
|
||||||
|
"""获取音频文件描述"""
|
||||||
|
if not global_config.chat.enable_asr:
|
||||||
|
logger.warning("语音识别未启用,无法处理语音消息")
|
||||||
|
return "[语音]"
|
||||||
|
try:
|
||||||
|
# 解码base64音频数据
|
||||||
|
# 确保base64字符串只包含ASCII字符
|
||||||
|
if isinstance(voice_base64, str):
|
||||||
|
voice_base64 = voice_base64.encode("ascii", errors="ignore").decode("ascii")
|
||||||
|
voice_bytes = base64.b64decode(voice_base64)
|
||||||
|
_llm = LLMRequest(model=global_config.model.voice, request_type="voice")
|
||||||
|
text = await _llm.generate_response_for_voice(voice_bytes)
|
||||||
|
if text is None:
|
||||||
|
logger.warning("未能生成语音文本")
|
||||||
|
return "[语音(文本生成失败)]"
|
||||||
|
|
||||||
|
logger.debug(f"描述是{text}")
|
||||||
|
|
||||||
|
return f"[语音:{text}]"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"语音转文字失败: {str(e)}")
|
||||||
|
return "[语音]"
|
||||||
|
|
||||||
@@ -106,6 +106,9 @@ class ChatConfig(ConfigBase):
|
|||||||
focus_value: float = 1.0
|
focus_value: float = 1.0
|
||||||
"""麦麦的专注思考能力,越低越容易专注,消耗token也越多"""
|
"""麦麦的专注思考能力,越低越容易专注,消耗token也越多"""
|
||||||
|
|
||||||
|
enable_asr: bool = False
|
||||||
|
"""是否启用语音识别"""
|
||||||
|
|
||||||
def get_current_talk_frequency(self, chat_stream_id: Optional[str] = None) -> float:
|
def get_current_talk_frequency(self, chat_stream_id: Optional[str] = None) -> float:
|
||||||
"""
|
"""
|
||||||
根据当前时间和聊天流获取对应的 talk_frequency
|
根据当前时间和聊天流获取对应的 talk_frequency
|
||||||
@@ -630,6 +633,9 @@ class ModelConfig(ConfigBase):
|
|||||||
vlm: dict[str, Any] = field(default_factory=lambda: {})
|
vlm: dict[str, Any] = field(default_factory=lambda: {})
|
||||||
"""视觉语言模型配置"""
|
"""视觉语言模型配置"""
|
||||||
|
|
||||||
|
voice: dict[str, Any] = field(default_factory=lambda: {})
|
||||||
|
"""语音识别模型配置"""
|
||||||
|
|
||||||
tool_use: dict[str, Any] = field(default_factory=lambda: {})
|
tool_use: dict[str, Any] = field(default_factory=lambda: {})
|
||||||
"""专注工具使用模型配置"""
|
"""专注工具使用模型配置"""
|
||||||
|
|
||||||
|
|||||||
@@ -216,6 +216,8 @@ class LLMRequest:
|
|||||||
prompt: str = None,
|
prompt: str = None,
|
||||||
image_base64: str = None,
|
image_base64: str = None,
|
||||||
image_format: str = None,
|
image_format: str = None,
|
||||||
|
file_bytes: bytes = None,
|
||||||
|
file_format: str = None,
|
||||||
payload: dict = None,
|
payload: dict = None,
|
||||||
retry_policy: dict = None,
|
retry_policy: dict = None,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
@@ -225,6 +227,8 @@ class LLMRequest:
|
|||||||
prompt: prompt文本
|
prompt: prompt文本
|
||||||
image_base64: 图片的base64编码
|
image_base64: 图片的base64编码
|
||||||
image_format: 图片格式
|
image_format: 图片格式
|
||||||
|
file_bytes: 文件的二进制数据
|
||||||
|
file_format: 文件格式
|
||||||
payload: 请求体数据
|
payload: 请求体数据
|
||||||
retry_policy: 自定义重试策略
|
retry_policy: 自定义重试策略
|
||||||
request_type: 请求类型
|
request_type: 请求类型
|
||||||
@@ -246,9 +250,12 @@ class LLMRequest:
|
|||||||
# 构建请求体
|
# 构建请求体
|
||||||
if image_base64:
|
if image_base64:
|
||||||
payload = await self._build_payload(prompt, image_base64, image_format)
|
payload = await self._build_payload(prompt, image_base64, image_format)
|
||||||
|
elif file_bytes:
|
||||||
|
payload = await self._build_formdata_payload(file_bytes, file_format)
|
||||||
elif payload is None:
|
elif payload is None:
|
||||||
payload = await self._build_payload(prompt)
|
payload = await self._build_payload(prompt)
|
||||||
|
|
||||||
|
if not file_bytes:
|
||||||
if stream_mode:
|
if stream_mode:
|
||||||
payload["stream"] = stream_mode
|
payload["stream"] = stream_mode
|
||||||
|
|
||||||
@@ -278,6 +285,8 @@ class LLMRequest:
|
|||||||
"stream_mode": stream_mode,
|
"stream_mode": stream_mode,
|
||||||
"image_base64": image_base64, # 保留必要的exception处理所需的原始数据
|
"image_base64": image_base64, # 保留必要的exception处理所需的原始数据
|
||||||
"image_format": image_format,
|
"image_format": image_format,
|
||||||
|
"file_bytes": file_bytes,
|
||||||
|
"file_format": file_format,
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -287,6 +296,8 @@ class LLMRequest:
|
|||||||
prompt: str = None,
|
prompt: str = None,
|
||||||
image_base64: str = None,
|
image_base64: str = None,
|
||||||
image_format: str = None,
|
image_format: str = None,
|
||||||
|
file_bytes: bytes = None,
|
||||||
|
file_format: str = None,
|
||||||
payload: dict = None,
|
payload: dict = None,
|
||||||
retry_policy: dict = None,
|
retry_policy: dict = None,
|
||||||
response_handler: callable = None,
|
response_handler: callable = None,
|
||||||
@@ -299,6 +310,8 @@ class LLMRequest:
|
|||||||
prompt: prompt文本
|
prompt: prompt文本
|
||||||
image_base64: 图片的base64编码
|
image_base64: 图片的base64编码
|
||||||
image_format: 图片格式
|
image_format: 图片格式
|
||||||
|
file_bytes: 文件的二进制数据
|
||||||
|
file_format: 文件格式
|
||||||
payload: 请求体数据
|
payload: 请求体数据
|
||||||
retry_policy: 自定义重试策略
|
retry_policy: 自定义重试策略
|
||||||
response_handler: 自定义响应处理器
|
response_handler: 自定义响应处理器
|
||||||
@@ -307,25 +320,36 @@ class LLMRequest:
|
|||||||
"""
|
"""
|
||||||
# 获取请求配置
|
# 获取请求配置
|
||||||
request_content = await self._prepare_request(
|
request_content = await self._prepare_request(
|
||||||
endpoint, prompt, image_base64, image_format, payload, retry_policy
|
endpoint, prompt, image_base64, image_format, file_bytes, file_format, payload, retry_policy
|
||||||
)
|
)
|
||||||
if request_type is None:
|
if request_type is None:
|
||||||
request_type = self.request_type
|
request_type = self.request_type
|
||||||
for retry in range(request_content["policy"]["max_retries"]):
|
for retry in range(request_content["policy"]["max_retries"]):
|
||||||
try:
|
try:
|
||||||
# 使用上下文管理器处理会话
|
# 使用上下文管理器处理会话
|
||||||
headers = await self._build_headers()
|
if file_bytes:
|
||||||
|
headers = await self._build_headers(is_formdata=True)
|
||||||
|
else:
|
||||||
|
headers = await self._build_headers(is_formdata=False)
|
||||||
# 似乎是openai流式必须要的东西,不过阿里云的qwq-plus加了这个没有影响
|
# 似乎是openai流式必须要的东西,不过阿里云的qwq-plus加了这个没有影响
|
||||||
if request_content["stream_mode"]:
|
if request_content["stream_mode"]:
|
||||||
headers["Accept"] = "text/event-stream"
|
headers["Accept"] = "text/event-stream"
|
||||||
async with aiohttp.ClientSession(connector=await get_tcp_connector()) as session:
|
async with aiohttp.ClientSession(connector=await get_tcp_connector()) as session:
|
||||||
|
post_kwargs = {"headers": headers}
|
||||||
|
#form-data数据上传方式不同
|
||||||
|
if file_bytes:
|
||||||
|
post_kwargs["data"] = request_content["payload"]
|
||||||
|
else:
|
||||||
|
post_kwargs["json"] = request_content["payload"]
|
||||||
|
|
||||||
async with session.post(
|
async with session.post(
|
||||||
request_content["api_url"], headers=headers, json=request_content["payload"]
|
request_content["api_url"], **post_kwargs
|
||||||
) as response:
|
) as response:
|
||||||
handled_result = await self._handle_response(
|
handled_result = await self._handle_response(
|
||||||
response, request_content, retry, response_handler, user_id, request_type, endpoint
|
response, request_content, retry, response_handler, user_id, request_type, endpoint
|
||||||
)
|
)
|
||||||
return handled_result
|
return handled_result
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
handled_payload, count_delta = await self._handle_exception(e, retry, request_content)
|
handled_payload, count_delta = await self._handle_exception(e, retry, request_content)
|
||||||
retry += count_delta # 降级不计入重试次数
|
retry += count_delta # 降级不计入重试次数
|
||||||
@@ -640,6 +664,33 @@ class LLMRequest:
|
|||||||
new_params["max_completion_tokens"] = new_params.pop("max_tokens")
|
new_params["max_completion_tokens"] = new_params.pop("max_tokens")
|
||||||
return new_params
|
return new_params
|
||||||
|
|
||||||
|
async def _build_formdata_payload(self, file_bytes: bytes, file_format: str) -> aiohttp.FormData:
|
||||||
|
"""构建form-data请求体"""
|
||||||
|
# 目前只适配了音频文件
|
||||||
|
# 如果后续要支持其他类型的文件,可以在这里添加更多的处理逻辑
|
||||||
|
data = aiohttp.FormData()
|
||||||
|
content_type_list = {
|
||||||
|
"wav": "audio/wav",
|
||||||
|
"mp3": "audio/mpeg",
|
||||||
|
"ogg": "audio/ogg",
|
||||||
|
"flac": "audio/flac",
|
||||||
|
"aac": "audio/aac",
|
||||||
|
}
|
||||||
|
|
||||||
|
content_type = content_type_list.get(file_format)
|
||||||
|
if not content_type:
|
||||||
|
logger.warning(f"暂不支持的文件类型: {file_format}")
|
||||||
|
|
||||||
|
data.add_field(
|
||||||
|
"file",io.BytesIO(file_bytes),
|
||||||
|
filename=f"file.{file_format}",
|
||||||
|
content_type=f'{content_type}' # 根据实际文件类型设置
|
||||||
|
)
|
||||||
|
data.add_field(
|
||||||
|
"model", self.model_name
|
||||||
|
)
|
||||||
|
return data
|
||||||
|
|
||||||
async def _build_payload(self, prompt: str, image_base64: str = None, image_format: str = None) -> dict:
|
async def _build_payload(self, prompt: str, image_base64: str = None, image_format: str = None) -> dict:
|
||||||
"""构建请求体"""
|
"""构建请求体"""
|
||||||
# 复制一份参数,避免直接修改 self.params
|
# 复制一份参数,避免直接修改 self.params
|
||||||
@@ -725,7 +776,8 @@ class LLMRequest:
|
|||||||
return content, reasoning_content, tool_calls
|
return content, reasoning_content, tool_calls
|
||||||
else:
|
else:
|
||||||
return content, reasoning_content
|
return content, reasoning_content
|
||||||
|
elif "text" in result and result["text"]:
|
||||||
|
return result["text"]
|
||||||
return "没有返回结果", ""
|
return "没有返回结果", ""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -739,11 +791,15 @@ class LLMRequest:
|
|||||||
reasoning = ""
|
reasoning = ""
|
||||||
return content, reasoning
|
return content, reasoning
|
||||||
|
|
||||||
async def _build_headers(self, no_key: bool = False) -> dict:
|
async def _build_headers(self, no_key: bool = False, is_formdata: bool = False) -> dict:
|
||||||
"""构建请求头"""
|
"""构建请求头"""
|
||||||
if no_key:
|
if no_key:
|
||||||
|
if is_formdata:
|
||||||
|
return {"Authorization": "Bearer **********"}
|
||||||
return {"Authorization": "Bearer **********", "Content-Type": "application/json"}
|
return {"Authorization": "Bearer **********", "Content-Type": "application/json"}
|
||||||
else:
|
else:
|
||||||
|
if is_formdata:
|
||||||
|
return {"Authorization": f"Bearer {self.api_key}"}
|
||||||
return {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
|
return {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
|
||||||
# 防止小朋友们截图自己的key
|
# 防止小朋友们截图自己的key
|
||||||
|
|
||||||
@@ -761,6 +817,11 @@ class LLMRequest:
|
|||||||
content, reasoning_content = response
|
content, reasoning_content = response
|
||||||
return content, reasoning_content
|
return content, reasoning_content
|
||||||
|
|
||||||
|
async def generate_response_for_voice(self, voice_bytes: bytes) -> Tuple:
|
||||||
|
"""根据输入的语音文件生成模型的异步响应"""
|
||||||
|
response = await self._execute_request(endpoint="/audio/transcriptions",file_bytes=voice_bytes, file_format='wav')
|
||||||
|
return response
|
||||||
|
|
||||||
async def generate_response_async(self, prompt: str, **kwargs) -> Union[str, Tuple]:
|
async def generate_response_async(self, prompt: str, **kwargs) -> Union[str, Tuple]:
|
||||||
"""异步方式根据输入的提示生成模型的响应"""
|
"""异步方式根据输入的提示生成模型的响应"""
|
||||||
# 构建请求体,不硬编码max_tokens
|
# 构建请求体,不硬编码max_tokens
|
||||||
|
|||||||
@@ -87,6 +87,7 @@ talk_frequency_adjust = [
|
|||||||
# - 时间支持跨天,例如 "00:10,0.3" 表示从凌晨0:10开始使用频率0.3
|
# - 时间支持跨天,例如 "00:10,0.3" 表示从凌晨0:10开始使用频率0.3
|
||||||
# - 系统会自动将 "platform:id:type" 转换为内部的哈希chat_id进行匹配
|
# - 系统会自动将 "platform:id:type" 转换为内部的哈希chat_id进行匹配
|
||||||
|
|
||||||
|
enable_asr = false # 是否启用语音识别,启用后麦麦可以通过语音输入进行对话,启用该功能需要配置语音识别模型[model.voice]
|
||||||
|
|
||||||
[message_receive]
|
[message_receive]
|
||||||
# 以下是消息过滤,可以根据规则过滤特定消息,将不会读取这些消息
|
# 以下是消息过滤,可以根据规则过滤特定消息,将不会读取这些消息
|
||||||
@@ -294,6 +295,12 @@ provider = "SILICONFLOW"
|
|||||||
pri_in = 0.35
|
pri_in = 0.35
|
||||||
pri_out = 0.35
|
pri_out = 0.35
|
||||||
|
|
||||||
|
[model.voice] # 语音识别模型
|
||||||
|
name = "FunAudioLLM/SenseVoiceSmall"
|
||||||
|
provider = "SILICONFLOW"
|
||||||
|
pri_in = 0
|
||||||
|
pri_out = 0
|
||||||
|
|
||||||
[model.tool_use] #工具调用模型,需要使用支持工具调用的模型
|
[model.tool_use] #工具调用模型,需要使用支持工具调用的模型
|
||||||
name = "Qwen/Qwen3-14B"
|
name = "Qwen/Qwen3-14B"
|
||||||
provider = "SILICONFLOW"
|
provider = "SILICONFLOW"
|
||||||
|
|||||||
Reference in New Issue
Block a user