From 602653c8c03752ce0ca95a132aa75ed778338658 Mon Sep 17 00:00:00 2001
From: minecraft1024a <wwwww95915@qq.com>
Date: Wed, 13 Aug 2025 17:56:32 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96API=E8=B0=83=E7=94=A8?=
 =?UTF-8?q?=E8=B6=85=E6=97=B6=E6=8F=90=E7=A4=BA=E4=BF=A1=E6=81=AF=EF=BC=8C?=
 =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E7=A9=BA=E5=9B=9E=E5=A4=8D=E9=87=8D=E8=AF=95?=
 =?UTF-8?q?=E9=80=BB=E8=BE=91(=E5=BA=94=E8=AF=A5=E8=83=BD=E7=94=A8)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/config/api_ada_configs.py                 |   2 +-
 .../model_client/aiohttp_gemini_client.py     |   2 +
 src/llm_models/utils_model.py                 | 113 ++++++++++++------
 3 files changed, 82 insertions(+), 35 deletions(-)
diff --git a/src/config/api_ada_configs.py b/src/config/api_ada_configs.py
index 5037d6a97..d4d6719e5 100644
--- a/src/config/api_ada_configs.py
+++ b/src/config/api_ada_configs.py
@@ -23,7 +23,7 @@ class APIProvider(ConfigBase):
     """最大重试次数（单个模型API调用失败，最多重试的次数）"""
 
     timeout: int = 10
-    """API调用的超时时长（超过这个时长，本次请求将被视为“请求超时”，单位：秒）"""
+    """API调用的超时时长（超过这个时长，本次请求将被视为"请求超时"，单位：秒）"""
 
     retry_interval: int = 10
     """重试间隔（如果API调用失败，重试的间隔时间，单位：秒）"""
diff --git a/src/llm_models/model_client/aiohttp_gemini_client.py b/src/llm_models/model_client/aiohttp_gemini_client.py
index dbdd4efaf..d35f54618 100644
--- a/src/llm_models/model_client/aiohttp_gemini_client.py
+++ b/src/llm_models/model_client/aiohttp_gemini_client.py
@@ -159,6 +159,8 @@ def _build_generation_config(
     config = {
         "maxOutputTokens": max_tokens,
         "temperature": temperature,
+        "topK": 1,
+        "topP": 1,
     }
     
     # 处理响应格式
diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py
index d7eb506a9..3c6ca8ff3 100644
--- a/src/llm_models/utils_model.py
+++ b/src/llm_models/utils_model.py
@@ -207,42 +207,87 @@ class LLMRequest:
         # 请求并处理返回值
         logger.debug(f"LLM选择耗时: {model_info.name} {time.time() - start_time}")
         
-        response = await self._execute_request(
-            api_provider=api_provider,
-            client=client,
-            request_type=RequestType.RESPONSE,
-            model_info=model_info,
-            message_list=messages,
-            temperature=temperature,
-            max_tokens=max_tokens,
-            tool_options=tool_built,
-        )
+        # 空回复重试逻辑
+        empty_retry_count = 0
+        max_empty_retry = api_provider.max_retry
+        empty_retry_interval = api_provider.retry_interval
         
-        
-        content = response.content
-        reasoning_content = response.reasoning_content or ""
-        tool_calls = response.tool_calls
-        # 从内容中提取<think>标签的推理内容（向后兼容）
-        if not reasoning_content and content:
-            content, extracted_reasoning = self._extract_reasoning(content)
-            reasoning_content = extracted_reasoning
-            
-        if usage := response.usage:
-            llm_usage_recorder.record_usage_to_database(
-                model_info=model_info,
-                model_usage=usage,
-                user_id="system",
-                request_type=self.request_type,
-                endpoint="/chat/completions",
-            )
-        
-        if not content:
-            if raise_when_empty:
-                logger.warning("生成的响应为空")
-                raise RuntimeError("生成的响应为空")
-            content = "生成的响应为空，请检查模型配置或输入内容是否正确"
+        while empty_retry_count <= max_empty_retry:
+            try:
+                response = await self._execute_request(
+                    api_provider=api_provider,
+                    client=client,
+                    request_type=RequestType.RESPONSE,
+                    model_info=model_info,
+                    message_list=messages,
+                    temperature=temperature,
+                    max_tokens=max_tokens,
+                    tool_options=tool_built,
+                )
+                
+                content = response.content
+                reasoning_content = response.reasoning_content or ""
+                tool_calls = response.tool_calls
+                
+                # 从内容中提取<think>标签的推理内容（向后兼容）
+                if not reasoning_content and content:
+                    content, extracted_reasoning = self._extract_reasoning(content)
+                    reasoning_content = extracted_reasoning
+                
+                # 检测是否为空回复
+                is_empty_reply = not content or content.strip() == ""
+                
+                if is_empty_reply and empty_retry_count < max_empty_retry:
+                    empty_retry_count += 1
+                    logger.warning(f"检测到空回复，正在进行第 {empty_retry_count}/{max_empty_retry} 次重新生成")
+                    
+                    # 等待一定时间后重试
+                    if empty_retry_interval > 0:
+                        await asyncio.sleep(empty_retry_interval)
+                    
+                    # 重新选择模型（可能选择不同的模型）
+                    model_info, api_provider, client = self._select_model()
+                    continue
+                
+                # 记录使用情况
+                if usage := response.usage:
+                    llm_usage_recorder.record_usage_to_database(
+                        model_info=model_info,
+                        model_usage=usage,
+                        user_id="system",
+                        request_type=self.request_type,
+                        endpoint="/chat/completions",
+                    )
+                
+                # 如果内容仍然为空
+                if not content:
+                    if raise_when_empty:
+                        logger.warning(f"经过 {empty_retry_count} 次重试后仍然生成空回复")
+                        raise RuntimeError(f"经过 {empty_retry_count} 次重试后仍然生成空回复")
+                    content = "生成的响应为空，请检查模型配置或输入内容是否正确"
+                else:
+                    # 成功生成非空回复
+                    if empty_retry_count > 0:
+                        logger.info(f"经过 {empty_retry_count} 次重试后成功生成回复")
 
-        return content, (reasoning_content, model_info.name, tool_calls)
+                return content, (reasoning_content, model_info.name, tool_calls)
+                
+            except Exception as e:
+                # 如果是网络错误等其他异常，不进行空回复重试
+                if empty_retry_count == 0:  # 只在第一次出错时抛出异常
+                    raise e
+                else:
+                    # 如果已经在重试过程中出错，记录日志并继续
+                    logger.error(f"重试过程中出错: {e}")
+                    empty_retry_count += 1
+                    if empty_retry_count <= max_empty_retry and empty_retry_interval > 0:
+                        await asyncio.sleep(empty_retry_interval)
+                    continue
+        
+        # 如果所有重试都失败了
+        if raise_when_empty:
+            raise RuntimeError(f"经过 {max_empty_retry} 次重试后仍然无法生成有效回复")
+        return "生成的响应为空，请检查模型配置或输入内容是否正确", ("", model_info.name, None)
 
     async def get_embedding(self, embedding_input: str) -> Tuple[List[float], str]:
         """获取嵌入向量