Merge branch 'MaiM-with-u:dev' into dev
This commit is contained in:
@@ -2,7 +2,7 @@ import asyncio
|
||||
import time
|
||||
import traceback
|
||||
import random # <-- 添加导入
|
||||
from typing import List, Optional, Dict, Any, Deque
|
||||
from typing import List, Optional, Dict, Any, Deque, Callable, Coroutine
|
||||
from collections import deque
|
||||
from src.plugins.chat.message import MessageRecv, BaseMessageInfo, MessageThinking, MessageSending
|
||||
from src.plugins.chat.message import Seg # Local import needed after move
|
||||
@@ -25,7 +25,6 @@ import contextlib
|
||||
from src.plugins.utils.chat_message_builder import num_new_messages_since
|
||||
from src.plugins.heartFC_chat.heartFC_Cycleinfo import CycleInfo
|
||||
from .heartFC_sender import HeartFCSender
|
||||
# --- End import ---
|
||||
|
||||
|
||||
INITIAL_DURATION = 60.0
|
||||
@@ -155,18 +154,30 @@ class HeartFChatting:
|
||||
其生命周期现在由其关联的 SubHeartflow 的 FOCUSED 状态控制。
|
||||
"""
|
||||
|
||||
def __init__(self, chat_id: str, sub_mind: SubMind, observations: Observation):
|
||||
CONSECUTIVE_NO_REPLY_THRESHOLD = 4 # 连续不回复的阈值
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
chat_id: str,
|
||||
sub_mind: SubMind,
|
||||
observations: Observation,
|
||||
on_consecutive_no_reply_callback: Callable[[], Coroutine[None, None, None]],
|
||||
):
|
||||
"""
|
||||
HeartFChatting 初始化函数
|
||||
|
||||
参数:
|
||||
chat_id: 聊天流唯一标识符(如stream_id)
|
||||
sub_mind: 关联的子思维
|
||||
observations: 关联的观察列表
|
||||
on_consecutive_no_reply_callback: 连续不回复达到阈值时调用的异步回调函数
|
||||
"""
|
||||
# 基础属性
|
||||
self.stream_id: str = chat_id # 聊天流ID
|
||||
self.chat_stream: Optional[ChatStream] = None # 关联的聊天流
|
||||
self.sub_mind: SubMind = sub_mind # 关联的子思维
|
||||
self.observations: List[Observation] = observations # 关联的观察列表,用于监控聊天流状态
|
||||
self.on_consecutive_no_reply_callback = on_consecutive_no_reply_callback
|
||||
|
||||
# 日志前缀
|
||||
self.log_prefix: str = f"[{chat_manager.get_stream_name(chat_id) or chat_id}]"
|
||||
@@ -198,6 +209,8 @@ class HeartFChatting:
|
||||
self._cycle_counter = 0
|
||||
self._cycle_history: Deque[CycleInfo] = deque(maxlen=10) # 保留最近10个循环的信息
|
||||
self._current_cycle: Optional[CycleInfo] = None
|
||||
self._lian_xu_bu_hui_fu_ci_shu: int = 0 # <--- 新增:连续不回复计数器
|
||||
self._shutting_down: bool = False # <--- 新增:关闭标志位
|
||||
|
||||
async def _initialize(self) -> bool:
|
||||
"""
|
||||
@@ -276,6 +289,12 @@ class HeartFChatting:
|
||||
"""主循环,持续进行计划并可能回复消息,直到被外部取消。"""
|
||||
try:
|
||||
while True: # 主循环
|
||||
# --- 在循环开始处检查关闭标志 ---
|
||||
if self._shutting_down:
|
||||
logger.info(f"{self.log_prefix} 检测到关闭标志,退出 HFC 循环。")
|
||||
break
|
||||
# --------------------------------
|
||||
|
||||
# 创建新的循环信息
|
||||
self._cycle_counter += 1
|
||||
self._current_cycle = CycleInfo(self._cycle_counter)
|
||||
@@ -287,6 +306,12 @@ class HeartFChatting:
|
||||
# 执行规划和处理阶段
|
||||
async with self._get_cycle_context() as acquired_lock:
|
||||
if not acquired_lock:
|
||||
# 如果未能获取锁(理论上不太可能,除非 shutdown 过程中释放了但又被抢了?)
|
||||
# 或者也可以在这里再次检查 self._shutting_down
|
||||
if self._shutting_down:
|
||||
break # 再次检查,确保退出
|
||||
logger.warning(f"{self.log_prefix} 未能获取循环处理锁,跳过本次循环。")
|
||||
await asyncio.sleep(0.1) # 短暂等待避免空转
|
||||
continue
|
||||
|
||||
# 记录规划开始时间点
|
||||
@@ -320,7 +345,11 @@ class HeartFChatting:
|
||||
)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
logger.info(f"{self.log_prefix} HeartFChatting: 麦麦的认真水群(HFC)被取消了")
|
||||
# 设置了关闭标志位后被取消是正常流程
|
||||
if not self._shutting_down:
|
||||
logger.warning(f"{self.log_prefix} HeartFChatting: 麦麦的认真水群(HFC)循环意外被取消")
|
||||
else:
|
||||
logger.info(f"{self.log_prefix} HeartFChatting: 麦麦的认真水群(HFC)循环已取消 (正常关闭)")
|
||||
except Exception as e:
|
||||
logger.error(f"{self.log_prefix} HeartFChatting: 意外错误: {e}")
|
||||
logger.error(traceback.format_exc())
|
||||
@@ -451,6 +480,8 @@ class HeartFChatting:
|
||||
return await handler(reasoning, planner_start_db_time, cycle_timers), ""
|
||||
except HeartFCError as e:
|
||||
logger.error(f"{self.log_prefix} 处理{action}时出错: {e}")
|
||||
# 出错时也重置计数器
|
||||
self._lian_xu_bu_hui_fu_ci_shu = 0
|
||||
return False, ""
|
||||
|
||||
async def _handle_text_reply(self, reasoning: str, emoji_query: str, cycle_timers: dict) -> tuple[bool, str]:
|
||||
@@ -471,6 +502,8 @@ class HeartFChatting:
|
||||
返回:
|
||||
tuple[bool, str]: (是否回复成功, 思考消息ID)
|
||||
"""
|
||||
# 重置连续不回复计数器
|
||||
self._lian_xu_bu_hui_fu_ci_shu = 0
|
||||
|
||||
# 获取锚点消息
|
||||
anchor_message = await self._get_anchor_message()
|
||||
@@ -544,8 +577,9 @@ class HeartFChatting:
|
||||
处理不回复的情况
|
||||
|
||||
工作流程:
|
||||
1. 等待新消息
|
||||
2. 超时或收到新消息时返回
|
||||
1. 等待新消息、超时或关闭信号
|
||||
2. 根据等待结果更新连续不回复计数
|
||||
3. 如果达到阈值,触发回调
|
||||
|
||||
参数:
|
||||
reasoning: 不回复的原因
|
||||
@@ -561,14 +595,39 @@ class HeartFChatting:
|
||||
|
||||
try:
|
||||
with Timer("等待新消息", cycle_timers):
|
||||
return await self._wait_for_new_message(observation, planner_start_db_time, self.log_prefix)
|
||||
# 等待新消息、超时或关闭信号,并获取结果
|
||||
await self._wait_for_new_message(observation, planner_start_db_time, self.log_prefix)
|
||||
|
||||
if not self._shutting_down:
|
||||
self._lian_xu_bu_hui_fu_ci_shu += 1
|
||||
logger.debug(
|
||||
f"{self.log_prefix} 连续不回复计数增加: {self._lian_xu_bu_hui_fu_ci_shu}/{self.CONSECUTIVE_NO_REPLY_THRESHOLD}"
|
||||
)
|
||||
|
||||
# 检查是否达到阈值
|
||||
if self._lian_xu_bu_hui_fu_ci_shu >= self.CONSECUTIVE_NO_REPLY_THRESHOLD:
|
||||
logger.info(
|
||||
f"{self.log_prefix} 连续不回复达到阈值 ({self._lian_xu_bu_hui_fu_ci_shu}次),调用回调请求状态转换"
|
||||
)
|
||||
# 调用回调。注意:这里不重置计数器,依赖回调函数成功改变状态来隐式重置上下文。
|
||||
await self.on_consecutive_no_reply_callback()
|
||||
|
||||
return True
|
||||
|
||||
except asyncio.CancelledError:
|
||||
logger.info(f"{self.log_prefix} 等待被中断")
|
||||
# 如果在等待过程中任务被取消(可能是因为 shutdown)
|
||||
logger.info(f"{self.log_prefix} 处理 'no_reply' 时等待被中断 (CancelledError)")
|
||||
# 让异常向上传播,由 _hfc_loop 的异常处理逻辑接管
|
||||
raise
|
||||
except Exception as e: # 捕获调用管理器或其他地方可能发生的错误
|
||||
logger.error(f"{self.log_prefix} 处理 'no_reply' 时发生错误: {e}")
|
||||
logger.error(traceback.format_exc())
|
||||
# 发生意外错误时,可以选择是否重置计数器,这里选择不重置
|
||||
return False # 表示动作未成功
|
||||
|
||||
async def _wait_for_new_message(self, observation, planner_start_db_time: float, log_prefix: str) -> bool:
|
||||
"""
|
||||
等待新消息
|
||||
等待新消息 或 检测到关闭信号
|
||||
|
||||
参数:
|
||||
observation: 观察实例
|
||||
@@ -576,19 +635,36 @@ class HeartFChatting:
|
||||
log_prefix: 日志前缀
|
||||
|
||||
返回:
|
||||
bool: 是否检测到新消息
|
||||
bool: 是否检测到新消息 (如果因关闭信号退出则返回 False)
|
||||
"""
|
||||
wait_start_time = time.monotonic()
|
||||
while True:
|
||||
# --- 在每次循环开始时检查关闭标志 ---
|
||||
if self._shutting_down:
|
||||
logger.info(f"{log_prefix} 等待新消息时检测到关闭信号,中断等待。")
|
||||
return False # 表示因为关闭而退出
|
||||
# -----------------------------------
|
||||
|
||||
# 检查新消息
|
||||
if await observation.has_new_messages_since(planner_start_db_time):
|
||||
logger.info(f"{log_prefix} 检测到新消息")
|
||||
return True
|
||||
|
||||
# 检查超时 (放在检查新消息和关闭之后)
|
||||
if time.monotonic() - wait_start_time > 120:
|
||||
logger.warning(f"{log_prefix} 等待超时(120秒)")
|
||||
logger.warning(f"{log_prefix} 等待新消息超时(20秒)")
|
||||
return False
|
||||
|
||||
await asyncio.sleep(1.5)
|
||||
try:
|
||||
# 短暂休眠,让其他任务有机会运行,并能更快响应取消或关闭
|
||||
await asyncio.sleep(0.5) # 缩短休眠时间
|
||||
except asyncio.CancelledError:
|
||||
# 如果在休眠时被取消,再次检查关闭标志
|
||||
# 如果是正常关闭,则不需要警告
|
||||
if not self._shutting_down:
|
||||
logger.warning(f"{log_prefix} _wait_for_new_message 的休眠被意外取消")
|
||||
# 无论如何,重新抛出异常,让上层处理
|
||||
raise
|
||||
|
||||
async def _log_cycle_timers(self, cycle_timers: dict, log_prefix: str):
|
||||
"""记录循环周期的计时器结果"""
|
||||
@@ -599,7 +675,9 @@ class HeartFChatting:
|
||||
timer_strings.append(f"{name}: {formatted_time}")
|
||||
|
||||
if timer_strings:
|
||||
logger.debug(f"{log_prefix} 该次决策耗时: {'; '.join(timer_strings)}")
|
||||
# 在记录前检查关闭标志
|
||||
if not self._shutting_down:
|
||||
logger.debug(f"{log_prefix} 该次决策耗时: {'; '.join(timer_strings)}")
|
||||
|
||||
async def _handle_cycle_delay(self, action_taken_this_cycle: bool, cycle_start_time: float, log_prefix: str):
|
||||
"""处理循环延迟"""
|
||||
@@ -835,6 +913,7 @@ class HeartFChatting:
|
||||
async def shutdown(self):
|
||||
"""优雅关闭HeartFChatting实例,取消活动循环任务"""
|
||||
logger.info(f"{self.log_prefix} 正在关闭HeartFChatting...")
|
||||
self._shutting_down = True # <-- 在开始关闭时设置标志位
|
||||
|
||||
# 取消循环任务
|
||||
if self._loop_task and not self._loop_task.done():
|
||||
@@ -865,6 +944,25 @@ class HeartFChatting:
|
||||
action=action,
|
||||
reasoning=reasoning,
|
||||
)
|
||||
|
||||
# 在记录循环日志前检查关闭标志
|
||||
if not self._shutting_down:
|
||||
self._current_cycle.complete_cycle()
|
||||
self._cycle_history.append(self._current_cycle)
|
||||
|
||||
# 记录循环信息和计时器结果
|
||||
timer_strings = []
|
||||
for name, elapsed in self._current_cycle.timers.items():
|
||||
formatted_time = f"{elapsed * 1000:.2f}毫秒" if elapsed < 1 else f"{elapsed:.2f}秒"
|
||||
timer_strings.append(f"{name}: {formatted_time}")
|
||||
|
||||
logger.debug(
|
||||
f"{self.log_prefix} 第 #{self._current_cycle.cycle_id}次思考完成,"
|
||||
f"耗时: {self._current_cycle.end_time - self._current_cycle.start_time:.2f}秒, "
|
||||
f"动作: {self._current_cycle.action_type}"
|
||||
+ (f"\n计时器详情: {'; '.join(timer_strings)}" if timer_strings else "")
|
||||
)
|
||||
|
||||
return prompt
|
||||
|
||||
async def _build_planner_prompt(
|
||||
|
||||
@@ -178,395 +178,6 @@ class LLMRequest:
|
||||
output_cost = (completion_tokens / 1000000) * self.pri_out
|
||||
return round(input_cost + output_cost, 6)
|
||||
|
||||
'''
|
||||
async def _execute_request(
|
||||
self,
|
||||
endpoint: str,
|
||||
prompt: str = None,
|
||||
image_base64: str = None,
|
||||
image_format: str = None,
|
||||
payload: dict = None,
|
||||
retry_policy: dict = None,
|
||||
response_handler: callable = None,
|
||||
user_id: str = "system",
|
||||
request_type: str = None,
|
||||
):
|
||||
"""统一请求执行入口
|
||||
Args:
|
||||
endpoint: API端点路径 (如 "chat/completions")
|
||||
prompt: prompt文本
|
||||
image_base64: 图片的base64编码
|
||||
image_format: 图片格式
|
||||
payload: 请求体数据
|
||||
retry_policy: 自定义重试策略
|
||||
response_handler: 自定义响应处理器
|
||||
user_id: 用户ID
|
||||
request_type: 请求类型
|
||||
"""
|
||||
|
||||
if request_type is None:
|
||||
request_type = self.request_type
|
||||
|
||||
# 合并重试策略
|
||||
default_retry = {
|
||||
"max_retries": 3,
|
||||
"base_wait": 10,
|
||||
"retry_codes": [429, 413, 500, 503],
|
||||
"abort_codes": [400, 401, 402, 403],
|
||||
}
|
||||
policy = {**default_retry, **(retry_policy or {})}
|
||||
|
||||
# 常见Error Code Mapping
|
||||
error_code_mapping = {
|
||||
400: "参数不正确",
|
||||
401: "API key 错误,认证失败,请检查/config/bot_config.toml和.env中的配置是否正确哦~",
|
||||
402: "账号余额不足",
|
||||
403: "需要实名,或余额不足",
|
||||
404: "Not Found",
|
||||
429: "请求过于频繁,请稍后再试",
|
||||
500: "服务器内部故障",
|
||||
503: "服务器负载过高",
|
||||
}
|
||||
|
||||
api_url = f"{self.base_url.rstrip('/')}/{endpoint.lstrip('/')}"
|
||||
# 判断是否为流式
|
||||
stream_mode = self.stream
|
||||
# logger_msg = "进入流式输出模式," if stream_mode else ""
|
||||
# logger.debug(f"{logger_msg}发送请求到URL: {api_url}")
|
||||
# logger.info(f"使用模型: {self.model_name}")
|
||||
|
||||
# 构建请求体
|
||||
if image_base64:
|
||||
payload = await self._build_payload(prompt, image_base64, image_format)
|
||||
elif payload is None:
|
||||
payload = await self._build_payload(prompt)
|
||||
|
||||
# 流式输出标志
|
||||
# 先构建payload,再添加流式输出标志
|
||||
if stream_mode:
|
||||
payload["stream"] = stream_mode
|
||||
|
||||
for retry in range(policy["max_retries"]):
|
||||
try:
|
||||
# 使用上下文管理器处理会话
|
||||
headers = await self._build_headers()
|
||||
# 似乎是openai流式必须要的东西,不过阿里云的qwq-plus加了这个没有影响
|
||||
if stream_mode:
|
||||
headers["Accept"] = "text/event-stream"
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
try:
|
||||
async with session.post(api_url, headers=headers, json=payload) as response:
|
||||
# 处理需要重试的状态码
|
||||
if response.status in policy["retry_codes"]:
|
||||
wait_time = policy["base_wait"] * (2**retry)
|
||||
logger.warning(
|
||||
f"模型 {self.model_name} 错误码: {response.status}, 等待 {wait_time}秒后重试"
|
||||
)
|
||||
if response.status == 413:
|
||||
logger.warning("请求体过大,尝试压缩...")
|
||||
image_base64 = compress_base64_image_by_scale(image_base64)
|
||||
payload = await self._build_payload(prompt, image_base64, image_format)
|
||||
elif response.status in [500, 503]:
|
||||
logger.error(
|
||||
f"模型 {self.model_name} 错误码: {response.status} - {error_code_mapping.get(response.status)}"
|
||||
)
|
||||
raise RuntimeError("服务器负载过高,模型恢复失败QAQ")
|
||||
else:
|
||||
logger.warning(f"模型 {self.model_name} 请求限制(429),等待{wait_time}秒后重试...")
|
||||
|
||||
await asyncio.sleep(wait_time)
|
||||
continue
|
||||
elif response.status in policy["abort_codes"]:
|
||||
logger.error(
|
||||
f"模型 {self.model_name} 错误码: {response.status} - {error_code_mapping.get(response.status)}"
|
||||
)
|
||||
# 尝试获取并记录服务器返回的详细错误信息
|
||||
try:
|
||||
error_json = await response.json()
|
||||
if error_json and isinstance(error_json, list) and len(error_json) > 0:
|
||||
for error_item in error_json:
|
||||
if "error" in error_item and isinstance(error_item["error"], dict):
|
||||
error_obj = error_item["error"]
|
||||
error_code = error_obj.get("code")
|
||||
error_message = error_obj.get("message")
|
||||
error_status = error_obj.get("status")
|
||||
logger.error(
|
||||
f"服务器错误详情: 代码={error_code}, 状态={error_status}, "
|
||||
f"消息={error_message}"
|
||||
)
|
||||
elif isinstance(error_json, dict) and "error" in error_json:
|
||||
# 处理单个错误对象的情况
|
||||
error_obj = error_json.get("error", {})
|
||||
error_code = error_obj.get("code")
|
||||
error_message = error_obj.get("message")
|
||||
error_status = error_obj.get("status")
|
||||
logger.error(
|
||||
f"服务器错误详情: 代码={error_code}, 状态={error_status}, 消息={error_message}"
|
||||
)
|
||||
else:
|
||||
# 记录原始错误响应内容
|
||||
logger.error(f"服务器错误响应: {error_json}")
|
||||
except Exception as e:
|
||||
logger.warning(f"无法解析服务器错误响应: {str(e)}")
|
||||
|
||||
if response.status == 403:
|
||||
# 只针对硅基流动的V3和R1进行降级处理
|
||||
if (
|
||||
self.model_name.startswith("Pro/deepseek-ai")
|
||||
and self.base_url == "https://api.siliconflow.cn/v1/"
|
||||
):
|
||||
old_model_name = self.model_name
|
||||
self.model_name = self.model_name[4:] # 移除"Pro/"前缀
|
||||
logger.warning(
|
||||
f"检测到403错误,模型从 {old_model_name} 降级为 {self.model_name}"
|
||||
)
|
||||
|
||||
# 对全局配置进行更新
|
||||
if global_config.llm_normal.get("name") == old_model_name:
|
||||
global_config.llm_normal["name"] = self.model_name
|
||||
logger.warning(f"将全局配置中的 llm_normal 模型临时降级至{self.model_name}")
|
||||
|
||||
if global_config.llm_reasoning.get("name") == old_model_name:
|
||||
global_config.llm_reasoning["name"] = self.model_name
|
||||
logger.warning(
|
||||
f"将全局配置中的 llm_reasoning 模型临时降级至{self.model_name}"
|
||||
)
|
||||
|
||||
# 更新payload中的模型名
|
||||
if payload and "model" in payload:
|
||||
payload["model"] = self.model_name
|
||||
|
||||
# 重新尝试请求
|
||||
retry -= 1 # 不计入重试次数
|
||||
continue
|
||||
|
||||
raise RuntimeError(f"请求被拒绝: {error_code_mapping.get(response.status)}")
|
||||
|
||||
response.raise_for_status()
|
||||
reasoning_content = ""
|
||||
|
||||
# 将流式输出转化为非流式输出
|
||||
if stream_mode:
|
||||
flag_delta_content_finished = False
|
||||
accumulated_content = ""
|
||||
usage = None # 初始化usage变量,避免未定义错误
|
||||
|
||||
async for line_bytes in response.content:
|
||||
try:
|
||||
line = line_bytes.decode("utf-8").strip()
|
||||
if not line:
|
||||
continue
|
||||
if line.startswith("data:"):
|
||||
data_str = line[5:].strip()
|
||||
if data_str == "[DONE]":
|
||||
break
|
||||
try:
|
||||
chunk = json.loads(data_str)
|
||||
if flag_delta_content_finished:
|
||||
chunk_usage = chunk.get("usage", None)
|
||||
if chunk_usage:
|
||||
usage = chunk_usage # 获取token用量
|
||||
else:
|
||||
delta = chunk["choices"][0]["delta"]
|
||||
delta_content = delta.get("content")
|
||||
if delta_content is None:
|
||||
delta_content = ""
|
||||
accumulated_content += delta_content
|
||||
# 检测流式输出文本是否结束
|
||||
finish_reason = chunk["choices"][0].get("finish_reason")
|
||||
if delta.get("reasoning_content", None):
|
||||
reasoning_content += delta["reasoning_content"]
|
||||
if finish_reason == "stop":
|
||||
chunk_usage = chunk.get("usage", None)
|
||||
if chunk_usage:
|
||||
usage = chunk_usage
|
||||
break
|
||||
# 部分平台在文本输出结束前不会返回token用量,此时需要再获取一次chunk
|
||||
flag_delta_content_finished = True
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"模型 {self.model_name} 解析流式输出错误: {str(e)}")
|
||||
except GeneratorExit:
|
||||
logger.warning("模型 {self.model_name} 流式输出被中断,正在清理资源...")
|
||||
# 确保资源被正确清理
|
||||
await response.release()
|
||||
# 返回已经累积的内容
|
||||
result = {
|
||||
"choices": [
|
||||
{
|
||||
"message": {
|
||||
"content": accumulated_content,
|
||||
"reasoning_content": reasoning_content,
|
||||
# 流式输出可能没有工具调用,此处不需要添加tool_calls字段
|
||||
}
|
||||
}
|
||||
],
|
||||
"usage": usage,
|
||||
}
|
||||
return (
|
||||
response_handler(result)
|
||||
if response_handler
|
||||
else self._default_response_handler(result, user_id, request_type, endpoint)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"模型 {self.model_name} 处理流式输出时发生错误: {str(e)}")
|
||||
# 确保在发生错误时也能正确清理资源
|
||||
try:
|
||||
await response.release()
|
||||
except Exception as cleanup_error:
|
||||
logger.error(f"清理资源时发生错误: {cleanup_error}")
|
||||
# 返回已经累积的内容
|
||||
result = {
|
||||
"choices": [
|
||||
{
|
||||
"message": {
|
||||
"content": accumulated_content,
|
||||
"reasoning_content": reasoning_content,
|
||||
# 流式输出可能没有工具调用,此处不需要添加tool_calls字段
|
||||
}
|
||||
}
|
||||
],
|
||||
"usage": usage,
|
||||
}
|
||||
return (
|
||||
response_handler(result)
|
||||
if response_handler
|
||||
else self._default_response_handler(result, user_id, request_type, endpoint)
|
||||
)
|
||||
content = accumulated_content
|
||||
think_match = re.search(r"<think>(.*?)</think>", content, re.DOTALL)
|
||||
if think_match:
|
||||
reasoning_content = think_match.group(1).strip()
|
||||
content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL).strip()
|
||||
# 构造一个伪result以便调用自定义响应处理器或默认处理器
|
||||
result = {
|
||||
"choices": [
|
||||
{
|
||||
"message": {
|
||||
"content": content,
|
||||
"reasoning_content": reasoning_content,
|
||||
# 流式输出可能没有工具调用,此处不需要添加tool_calls字段
|
||||
}
|
||||
}
|
||||
],
|
||||
"usage": usage,
|
||||
}
|
||||
return (
|
||||
response_handler(result)
|
||||
if response_handler
|
||||
else self._default_response_handler(result, user_id, request_type, endpoint)
|
||||
)
|
||||
else:
|
||||
result = await response.json()
|
||||
# 使用自定义处理器或默认处理
|
||||
return (
|
||||
response_handler(result)
|
||||
if response_handler
|
||||
else self._default_response_handler(result, user_id, request_type, endpoint)
|
||||
)
|
||||
|
||||
except (aiohttp.ClientError, asyncio.TimeoutError) as e:
|
||||
if retry < policy["max_retries"] - 1:
|
||||
wait_time = policy["base_wait"] * (2**retry)
|
||||
logger.error(f"模型 {self.model_name} 网络错误,等待{wait_time}秒后重试... 错误: {str(e)}")
|
||||
await asyncio.sleep(wait_time)
|
||||
continue
|
||||
else:
|
||||
logger.critical(f"模型 {self.model_name} 网络错误达到最大重试次数: {str(e)}")
|
||||
raise RuntimeError(f"网络请求失败: {str(e)}") from e
|
||||
except Exception as e:
|
||||
logger.critical(f"模型 {self.model_name} 未预期的错误: {str(e)}")
|
||||
raise RuntimeError(f"请求过程中发生错误: {str(e)}") from e
|
||||
|
||||
except aiohttp.ClientResponseError as e:
|
||||
# 处理aiohttp抛出的响应错误
|
||||
if retry < policy["max_retries"] - 1:
|
||||
wait_time = policy["base_wait"] * (2**retry)
|
||||
logger.error(
|
||||
f"模型 {self.model_name} HTTP响应错误,等待{wait_time}秒后重试... 状态码: {e.status}, 错误: {e.message}"
|
||||
)
|
||||
try:
|
||||
if hasattr(e, "response") and e.response and hasattr(e.response, "text"):
|
||||
error_text = await e.response.text()
|
||||
try:
|
||||
error_json = json.loads(error_text)
|
||||
if isinstance(error_json, list) and len(error_json) > 0:
|
||||
for error_item in error_json:
|
||||
if "error" in error_item and isinstance(error_item["error"], dict):
|
||||
error_obj = error_item["error"]
|
||||
logger.error(
|
||||
f"模型 {self.model_name} 服务器错误详情: 代码={error_obj.get('code')}, "
|
||||
f"状态={error_obj.get('status')}, "
|
||||
f"消息={error_obj.get('message')}"
|
||||
)
|
||||
elif isinstance(error_json, dict) and "error" in error_json:
|
||||
error_obj = error_json.get("error", {})
|
||||
logger.error(
|
||||
f"模型 {self.model_name} 服务器错误详情: 代码={error_obj.get('code')}, "
|
||||
f"状态={error_obj.get('status')}, "
|
||||
f"消息={error_obj.get('message')}"
|
||||
)
|
||||
else:
|
||||
logger.error(f"模型 {self.model_name} 服务器错误响应: {error_json}")
|
||||
except (json.JSONDecodeError, TypeError) as json_err:
|
||||
logger.warning(
|
||||
f"模型 {self.model_name} 响应不是有效的JSON: {str(json_err)}, 原始内容: {error_text[:200]}"
|
||||
)
|
||||
except (AttributeError, TypeError, ValueError) as parse_err:
|
||||
logger.warning(f"模型 {self.model_name} 无法解析响应错误内容: {str(parse_err)}")
|
||||
|
||||
await asyncio.sleep(wait_time)
|
||||
else:
|
||||
logger.critical(
|
||||
f"模型 {self.model_name} HTTP响应错误达到最大重试次数: 状态码: {e.status}, 错误: {e.message}"
|
||||
)
|
||||
# 安全地检查和记录请求详情
|
||||
if (
|
||||
image_base64
|
||||
and payload
|
||||
and isinstance(payload, dict)
|
||||
and "messages" in payload
|
||||
and len(payload["messages"]) > 0
|
||||
):
|
||||
if isinstance(payload["messages"][0], dict) and "content" in payload["messages"][0]:
|
||||
content = payload["messages"][0]["content"]
|
||||
if isinstance(content, list) and len(content) > 1 and "image_url" in content[1]:
|
||||
payload["messages"][0]["content"][1]["image_url"]["url"] = (
|
||||
f"data:image/{image_format.lower() if image_format else 'jpeg'};base64,"
|
||||
f"{image_base64[:10]}...{image_base64[-10:]}"
|
||||
)
|
||||
logger.critical(f"请求头: {await self._build_headers(no_key=True)} 请求体: {payload}")
|
||||
raise RuntimeError(f"模型 {self.model_name} API请求失败: 状态码 {e.status}, {e.message}") from e
|
||||
except Exception as e:
|
||||
if retry < policy["max_retries"] - 1:
|
||||
wait_time = policy["base_wait"] * (2**retry)
|
||||
logger.error(f"模型 {self.model_name} 请求失败,等待{wait_time}秒后重试... 错误: {str(e)}")
|
||||
await asyncio.sleep(wait_time)
|
||||
else:
|
||||
logger.critical(f"模型 {self.model_name} 请求失败: {str(e)}")
|
||||
# 安全地检查和记录请求详情
|
||||
if (
|
||||
image_base64
|
||||
and payload
|
||||
and isinstance(payload, dict)
|
||||
and "messages" in payload
|
||||
and len(payload["messages"]) > 0
|
||||
):
|
||||
if isinstance(payload["messages"][0], dict) and "content" in payload["messages"][0]:
|
||||
content = payload["messages"][0]["content"]
|
||||
if isinstance(content, list) and len(content) > 1 and "image_url" in content[1]:
|
||||
payload["messages"][0]["content"][1]["image_url"]["url"] = (
|
||||
f"data:image/{image_format.lower() if image_format else 'jpeg'};base64,"
|
||||
f"{image_base64[:10]}...{image_base64[-10:]}"
|
||||
)
|
||||
logger.critical(f"请求头: {await self._build_headers(no_key=True)} 请求体: {payload}")
|
||||
raise RuntimeError(f"模型 {self.model_name} API请求失败: {str(e)}") from e
|
||||
|
||||
logger.error(f"模型 {self.model_name} 达到最大重试次数,请求仍然失败")
|
||||
raise RuntimeError(f"模型 {self.model_name} 达到最大重试次数,API请求仍然失败")
|
||||
'''
|
||||
|
||||
async def _prepare_request(
|
||||
self,
|
||||
endpoint: str,
|
||||
@@ -820,6 +431,7 @@ class LLMRequest:
|
||||
policy = request_content["policy"]
|
||||
payload = request_content["payload"]
|
||||
wait_time = policy["base_wait"] * (2**retry_count)
|
||||
keep_request = False
|
||||
if retry_count < policy["max_retries"] - 1:
|
||||
keep_request = True
|
||||
if isinstance(exception, RequestAbortException):
|
||||
|
||||
Reference in New Issue
Block a user