From 602653c8c03752ce0ca95a132aa75ed778338658 Mon Sep 17 00:00:00 2001 From: minecraft1024a Date: Wed, 13 Aug 2025 17:56:32 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96API=E8=B0=83=E7=94=A8?= =?UTF-8?q?=E8=B6=85=E6=97=B6=E6=8F=90=E7=A4=BA=E4=BF=A1=E6=81=AF=EF=BC=8C?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E7=A9=BA=E5=9B=9E=E5=A4=8D=E9=87=8D=E8=AF=95?= =?UTF-8?q?=E9=80=BB=E8=BE=91(=E5=BA=94=E8=AF=A5=E8=83=BD=E7=94=A8)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/api_ada_configs.py | 2 +- .../model_client/aiohttp_gemini_client.py | 2 + src/llm_models/utils_model.py | 113 ++++++++++++------ 3 files changed, 82 insertions(+), 35 deletions(-) diff --git a/src/config/api_ada_configs.py b/src/config/api_ada_configs.py index 5037d6a97..d4d6719e5 100644 --- a/src/config/api_ada_configs.py +++ b/src/config/api_ada_configs.py @@ -23,7 +23,7 @@ class APIProvider(ConfigBase): """最大重试次数(单个模型API调用失败,最多重试的次数)""" timeout: int = 10 - """API调用的超时时长(超过这个时长,本次请求将被视为“请求超时”,单位:秒)""" + """API调用的超时时长(超过这个时长,本次请求将被视为"请求超时",单位:秒)""" retry_interval: int = 10 """重试间隔(如果API调用失败,重试的间隔时间,单位:秒)""" diff --git a/src/llm_models/model_client/aiohttp_gemini_client.py b/src/llm_models/model_client/aiohttp_gemini_client.py index dbdd4efaf..d35f54618 100644 --- a/src/llm_models/model_client/aiohttp_gemini_client.py +++ b/src/llm_models/model_client/aiohttp_gemini_client.py @@ -159,6 +159,8 @@ def _build_generation_config( config = { "maxOutputTokens": max_tokens, "temperature": temperature, + "topK": 1, + "topP": 1, } # 处理响应格式 diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py index d7eb506a9..3c6ca8ff3 100644 --- a/src/llm_models/utils_model.py +++ b/src/llm_models/utils_model.py @@ -207,42 +207,87 @@ class LLMRequest: # 请求并处理返回值 logger.debug(f"LLM选择耗时: {model_info.name} {time.time() - start_time}") - response = await self._execute_request( - api_provider=api_provider, - client=client, - request_type=RequestType.RESPONSE, - model_info=model_info, - message_list=messages, - temperature=temperature, - max_tokens=max_tokens, - tool_options=tool_built, - ) + # 空回复重试逻辑 + empty_retry_count = 0 + max_empty_retry = api_provider.max_retry + empty_retry_interval = api_provider.retry_interval - - content = response.content - reasoning_content = response.reasoning_content or "" - tool_calls = response.tool_calls - # 从内容中提取标签的推理内容(向后兼容) - if not reasoning_content and content: - content, extracted_reasoning = self._extract_reasoning(content) - reasoning_content = extracted_reasoning - - if usage := response.usage: - llm_usage_recorder.record_usage_to_database( - model_info=model_info, - model_usage=usage, - user_id="system", - request_type=self.request_type, - endpoint="/chat/completions", - ) - - if not content: - if raise_when_empty: - logger.warning("生成的响应为空") - raise RuntimeError("生成的响应为空") - content = "生成的响应为空,请检查模型配置或输入内容是否正确" + while empty_retry_count <= max_empty_retry: + try: + response = await self._execute_request( + api_provider=api_provider, + client=client, + request_type=RequestType.RESPONSE, + model_info=model_info, + message_list=messages, + temperature=temperature, + max_tokens=max_tokens, + tool_options=tool_built, + ) + + content = response.content + reasoning_content = response.reasoning_content or "" + tool_calls = response.tool_calls + + # 从内容中提取标签的推理内容(向后兼容) + if not reasoning_content and content: + content, extracted_reasoning = self._extract_reasoning(content) + reasoning_content = extracted_reasoning + + # 检测是否为空回复 + is_empty_reply = not content or content.strip() == "" + + if is_empty_reply and empty_retry_count < max_empty_retry: + empty_retry_count += 1 + logger.warning(f"检测到空回复,正在进行第 {empty_retry_count}/{max_empty_retry} 次重新生成") + + # 等待一定时间后重试 + if empty_retry_interval > 0: + await asyncio.sleep(empty_retry_interval) + + # 重新选择模型(可能选择不同的模型) + model_info, api_provider, client = self._select_model() + continue + + # 记录使用情况 + if usage := response.usage: + llm_usage_recorder.record_usage_to_database( + model_info=model_info, + model_usage=usage, + user_id="system", + request_type=self.request_type, + endpoint="/chat/completions", + ) + + # 如果内容仍然为空 + if not content: + if raise_when_empty: + logger.warning(f"经过 {empty_retry_count} 次重试后仍然生成空回复") + raise RuntimeError(f"经过 {empty_retry_count} 次重试后仍然生成空回复") + content = "生成的响应为空,请检查模型配置或输入内容是否正确" + else: + # 成功生成非空回复 + if empty_retry_count > 0: + logger.info(f"经过 {empty_retry_count} 次重试后成功生成回复") - return content, (reasoning_content, model_info.name, tool_calls) + return content, (reasoning_content, model_info.name, tool_calls) + + except Exception as e: + # 如果是网络错误等其他异常,不进行空回复重试 + if empty_retry_count == 0: # 只在第一次出错时抛出异常 + raise e + else: + # 如果已经在重试过程中出错,记录日志并继续 + logger.error(f"重试过程中出错: {e}") + empty_retry_count += 1 + if empty_retry_count <= max_empty_retry and empty_retry_interval > 0: + await asyncio.sleep(empty_retry_interval) + continue + + # 如果所有重试都失败了 + if raise_when_empty: + raise RuntimeError(f"经过 {max_empty_retry} 次重试后仍然无法生成有效回复") + return "生成的响应为空,请检查模型配置或输入内容是否正确", ("", model_info.name, None) async def get_embedding(self, embedding_input: str) -> Tuple[List[float], str]: """获取嵌入向量