优化API调用超时提示信息,增加空回复重试逻辑(应该能用)
This commit is contained in:
@@ -23,7 +23,7 @@ class APIProvider(ConfigBase):
|
|||||||
"""最大重试次数(单个模型API调用失败,最多重试的次数)"""
|
"""最大重试次数(单个模型API调用失败,最多重试的次数)"""
|
||||||
|
|
||||||
timeout: int = 10
|
timeout: int = 10
|
||||||
"""API调用的超时时长(超过这个时长,本次请求将被视为“请求超时”,单位:秒)"""
|
"""API调用的超时时长(超过这个时长,本次请求将被视为"请求超时",单位:秒)"""
|
||||||
|
|
||||||
retry_interval: int = 10
|
retry_interval: int = 10
|
||||||
"""重试间隔(如果API调用失败,重试的间隔时间,单位:秒)"""
|
"""重试间隔(如果API调用失败,重试的间隔时间,单位:秒)"""
|
||||||
|
|||||||
@@ -159,6 +159,8 @@ def _build_generation_config(
|
|||||||
config = {
|
config = {
|
||||||
"maxOutputTokens": max_tokens,
|
"maxOutputTokens": max_tokens,
|
||||||
"temperature": temperature,
|
"temperature": temperature,
|
||||||
|
"topK": 1,
|
||||||
|
"topP": 1,
|
||||||
}
|
}
|
||||||
|
|
||||||
# 处理响应格式
|
# 处理响应格式
|
||||||
|
|||||||
@@ -207,42 +207,87 @@ class LLMRequest:
|
|||||||
# 请求并处理返回值
|
# 请求并处理返回值
|
||||||
logger.debug(f"LLM选择耗时: {model_info.name} {time.time() - start_time}")
|
logger.debug(f"LLM选择耗时: {model_info.name} {time.time() - start_time}")
|
||||||
|
|
||||||
response = await self._execute_request(
|
# 空回复重试逻辑
|
||||||
api_provider=api_provider,
|
empty_retry_count = 0
|
||||||
client=client,
|
max_empty_retry = api_provider.max_retry
|
||||||
request_type=RequestType.RESPONSE,
|
empty_retry_interval = api_provider.retry_interval
|
||||||
model_info=model_info,
|
|
||||||
message_list=messages,
|
|
||||||
temperature=temperature,
|
|
||||||
max_tokens=max_tokens,
|
|
||||||
tool_options=tool_built,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
while empty_retry_count <= max_empty_retry:
|
||||||
content = response.content
|
try:
|
||||||
reasoning_content = response.reasoning_content or ""
|
response = await self._execute_request(
|
||||||
tool_calls = response.tool_calls
|
api_provider=api_provider,
|
||||||
# 从内容中提取<think>标签的推理内容(向后兼容)
|
client=client,
|
||||||
if not reasoning_content and content:
|
request_type=RequestType.RESPONSE,
|
||||||
content, extracted_reasoning = self._extract_reasoning(content)
|
model_info=model_info,
|
||||||
reasoning_content = extracted_reasoning
|
message_list=messages,
|
||||||
|
temperature=temperature,
|
||||||
if usage := response.usage:
|
max_tokens=max_tokens,
|
||||||
llm_usage_recorder.record_usage_to_database(
|
tool_options=tool_built,
|
||||||
model_info=model_info,
|
)
|
||||||
model_usage=usage,
|
|
||||||
user_id="system",
|
content = response.content
|
||||||
request_type=self.request_type,
|
reasoning_content = response.reasoning_content or ""
|
||||||
endpoint="/chat/completions",
|
tool_calls = response.tool_calls
|
||||||
)
|
|
||||||
|
# 从内容中提取<think>标签的推理内容(向后兼容)
|
||||||
if not content:
|
if not reasoning_content and content:
|
||||||
if raise_when_empty:
|
content, extracted_reasoning = self._extract_reasoning(content)
|
||||||
logger.warning("生成的响应为空")
|
reasoning_content = extracted_reasoning
|
||||||
raise RuntimeError("生成的响应为空")
|
|
||||||
content = "生成的响应为空,请检查模型配置或输入内容是否正确"
|
# 检测是否为空回复
|
||||||
|
is_empty_reply = not content or content.strip() == ""
|
||||||
|
|
||||||
|
if is_empty_reply and empty_retry_count < max_empty_retry:
|
||||||
|
empty_retry_count += 1
|
||||||
|
logger.warning(f"检测到空回复,正在进行第 {empty_retry_count}/{max_empty_retry} 次重新生成")
|
||||||
|
|
||||||
|
# 等待一定时间后重试
|
||||||
|
if empty_retry_interval > 0:
|
||||||
|
await asyncio.sleep(empty_retry_interval)
|
||||||
|
|
||||||
|
# 重新选择模型(可能选择不同的模型)
|
||||||
|
model_info, api_provider, client = self._select_model()
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 记录使用情况
|
||||||
|
if usage := response.usage:
|
||||||
|
llm_usage_recorder.record_usage_to_database(
|
||||||
|
model_info=model_info,
|
||||||
|
model_usage=usage,
|
||||||
|
user_id="system",
|
||||||
|
request_type=self.request_type,
|
||||||
|
endpoint="/chat/completions",
|
||||||
|
)
|
||||||
|
|
||||||
|
# 如果内容仍然为空
|
||||||
|
if not content:
|
||||||
|
if raise_when_empty:
|
||||||
|
logger.warning(f"经过 {empty_retry_count} 次重试后仍然生成空回复")
|
||||||
|
raise RuntimeError(f"经过 {empty_retry_count} 次重试后仍然生成空回复")
|
||||||
|
content = "生成的响应为空,请检查模型配置或输入内容是否正确"
|
||||||
|
else:
|
||||||
|
# 成功生成非空回复
|
||||||
|
if empty_retry_count > 0:
|
||||||
|
logger.info(f"经过 {empty_retry_count} 次重试后成功生成回复")
|
||||||
|
|
||||||
return content, (reasoning_content, model_info.name, tool_calls)
|
return content, (reasoning_content, model_info.name, tool_calls)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# 如果是网络错误等其他异常,不进行空回复重试
|
||||||
|
if empty_retry_count == 0: # 只在第一次出错时抛出异常
|
||||||
|
raise e
|
||||||
|
else:
|
||||||
|
# 如果已经在重试过程中出错,记录日志并继续
|
||||||
|
logger.error(f"重试过程中出错: {e}")
|
||||||
|
empty_retry_count += 1
|
||||||
|
if empty_retry_count <= max_empty_retry and empty_retry_interval > 0:
|
||||||
|
await asyncio.sleep(empty_retry_interval)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 如果所有重试都失败了
|
||||||
|
if raise_when_empty:
|
||||||
|
raise RuntimeError(f"经过 {max_empty_retry} 次重试后仍然无法生成有效回复")
|
||||||
|
return "生成的响应为空,请检查模型配置或输入内容是否正确", ("", model_info.name, None)
|
||||||
|
|
||||||
async def get_embedding(self, embedding_input: str) -> Tuple[List[float], str]:
|
async def get_embedding(self, embedding_input: str) -> Tuple[List[float], str]:
|
||||||
"""获取嵌入向量
|
"""获取嵌入向量
|
||||||
|
|||||||
Reference in New Issue
Block a user