feat: 支持多个API Key，增强错误处理和负载均衡机制

2025-07-27 13:55:18 +08:00
parent e240fb92ca
commit 16931ef7b4
6 changed files with 391 additions and 44 deletions
--- a/src/llm_models/model_client/init.py
+++ b/src/llm_models/model_client/init.py
@@ -74,8 +74,22 @@ def _handle_resp_not_ok(
    :return: (等待间隔（如果为0则不等待，为-1则不再请求该模型）, 新的消息列表（适用于压缩消息）)
    """
    # 响应错误
-    if e.status_code in [400, 401, 402, 403, 404]:
-        # 客户端错误
+    if e.status_code in [401, 403]:
+        # API Key认证错误 - 让多API Key机制处理，给一次重试机会
+        if remain_try > 0:
+            logger.warning(
+                f"任务-'{task_name}' 模型-'{model_name}'\n"
+                f"API Key认证失败（错误代码-{e.status_code}），多API Key机制会自动切换"
+            )
+            return 0, None  # 立即重试，让底层客户端切换API Key
+        else:
+            logger.warning(
+                f"任务-'{task_name}' 模型-'{model_name}'\n"
+                f"所有API Key都认证失败，错误代码-{e.status_code}，错误信息-{e.message}"
+            )
+            return -1, None  # 不再重试请求该模型
+    elif e.status_code in [400, 402, 404]:
+        # 其他客户端错误（不应该重试）
        logger.warning(
            f"任务-'{task_name}' 模型-'{model_name}'\n"
            f"请求失败，错误代码-{e.status_code}，错误信息-{e.message}"
@@ -105,17 +119,17 @@ def _handle_resp_not_ok(
        )
        return -1, None
    elif e.status_code == 429:
-        # 请求过于频繁
+        # 请求过于频繁 - 让多API Key机制处理，适当延迟后重试
        return _check_retry(
            remain_try,
-            retry_interval,
+            min(retry_interval, 5),  # 限制最大延迟为5秒，让API Key切换更快生效
            can_retry_msg=(
                f"任务-'{task_name}' 模型-'{model_name}'\n"
-                f"请求过于频繁，将于{retry_interval}秒后重试"
+                f"请求过于频繁，多API Key机制会自动切换，{min(retry_interval, 5)}秒后重试"
            ),
            cannot_retry_msg=(
                f"任务-'{task_name}' 模型-'{model_name}'\n"
-                "请求过于频繁，超过最大重试次数，放弃请求"
+                "请求过于频繁，所有API Key都被限制，放弃请求"
            ),
        )
    elif e.status_code >= 500:
@@ -161,12 +175,13 @@ def default_exception_handler(
    """

    if isinstance(e, NetworkConnectionError):  # 网络连接错误
+        # 网络错误可能是某个API Key的端点问题，给多API Key机制一次快速重试机会
        return _check_retry(
            remain_try,
-            retry_interval,
+            min(retry_interval, 3),  # 网络错误时减少等待时间，让API Key切换更快
            can_retry_msg=(
                f"任务-'{task_name}' 模型-'{model_name}'\n"
-                f"连接异常，将于{retry_interval}秒后重试"
+                f"连接异常，多API Key机制会尝试其他Key，{min(retry_interval, 3)}秒后重试"
            ),
            cannot_retry_msg=(
                f"任务-'{task_name}' 模型-'{model_name}'\n"
--- a/src/llm_models/model_client/gemini_client.py
+++ b/src/llm_models/model_client/gemini_client.py
@@ -17,6 +17,7 @@ from google.genai.errors import (
 from .base_client import APIResponse, UsageRecord
 from src.config.api_ada_configs import ModelInfo, APIProvider
 from . import BaseClient
+from src.common.logger import get_logger

 from ..exceptions import (
    RespParseException,
@@ -28,6 +29,7 @@ from ..payload_content.message import Message, RoleType
 from ..payload_content.resp_format import RespFormat, RespFormatType
 from ..payload_content.tool_option import ToolOption, ToolParam, ToolCall

+logger = get_logger("Gemini客户端")
 T = TypeVar("T")


@@ -309,13 +311,55 @@ def _default_normal_response_parser(


 class GeminiClient(BaseClient):
-    client: genai.Client
-
    def __init__(self, api_provider: APIProvider):
        super().__init__(api_provider)
-        self.client = genai.Client(
-            api_key=api_provider.api_key,
-        )  # 这里和openai不一样，gemini会自己决定自己是否需要retry
+        # 不再在初始化时创建固定的client，而是在请求时动态创建
+        self._clients_cache = {}  # API Key -> genai.Client 的缓存
+
+    def _get_client(self, api_key: str = None) -> genai.Client:
+        """获取或创建对应API Key的客户端"""
+        if api_key is None:
+            api_key = self.api_provider.get_current_api_key()
+        
+        if not api_key:
+            raise ValueError(f"API Provider '{self.api_provider.name}' 没有可用的API Key")
+        
+        # 使用缓存避免重复创建客户端
+        if api_key not in self._clients_cache:
+            self._clients_cache[api_key] = genai.Client(api_key=api_key)
+        
+        return self._clients_cache[api_key]
+
+    async def _execute_with_fallback(self, func, *args, **kwargs):
+        """执行请求并在失败时切换API Key"""
+        current_api_key = self.api_provider.get_current_api_key()
+        max_attempts = len(self.api_provider.api_keys) if self.api_provider.api_keys else 1
+        
+        for attempt in range(max_attempts):
+            try:
+                client = self._get_client(current_api_key)
+                result = await func(client, *args, **kwargs)
+                # 成功时重置失败计数
+                self.api_provider.reset_key_failures(current_api_key)
+                return result
+                
+            except (ClientError, ServerError) as e:
+                # 记录失败并尝试下一个API Key
+                logger.warning(f"API Key失败 (尝试 {attempt + 1}/{max_attempts}): {str(e)}")
+                
+                if attempt < max_attempts - 1:  # 还有重试机会
+                    next_api_key = self.api_provider.mark_key_failed(current_api_key)
+                    if next_api_key and next_api_key != current_api_key:
+                        current_api_key = next_api_key
+                        logger.info(f"切换到下一个API Key: {current_api_key[:8]}***{current_api_key[-4:]}")
+                        continue
+                
+                # 所有API Key都失败了，重新抛出异常
+                raise RespNotOkException(e.status_code, e.message) from e
+            
+            except Exception as e:
+                # 其他异常直接抛出
+                raise e

    async def get_response(
        self,
@@ -348,6 +392,39 @@ class GeminiClient(BaseClient):
        :param interrupt_flag: 中断信号量（可选，默认为None）
        :return: (响应文本, 推理文本, 工具调用, 其他数据)
        """
+        return await self._execute_with_fallback(
+            self._get_response_internal,
+            model_info,
+            message_list,
+            tool_options,
+            max_tokens,
+            temperature,
+            thinking_budget,
+            response_format,
+            stream_response_handler,
+            async_response_parser,
+            interrupt_flag,
+        )
+
+    async def _get_response_internal(
+        self,
+        client: genai.Client,
+        model_info: ModelInfo,
+        message_list: list[Message],
+        tool_options: list[ToolOption] | None = None,
+        max_tokens: int = 1024,
+        temperature: float = 0.7,
+        thinking_budget: int = 0,
+        response_format: RespFormat | None = None,
+        stream_response_handler: Callable[
+            [Iterator[GenerateContentResponse], asyncio.Event | None], APIResponse
+        ]
+        | None = None,
+        async_response_parser: Callable[[GenerateContentResponse], APIResponse]
+        | None = None,
+        interrupt_flag: asyncio.Event | None = None,
+    ) -> APIResponse:
+        """内部方法：执行实际的API调用"""
        if stream_response_handler is None:
            stream_response_handler = _default_stream_response_handler

@@ -385,7 +462,7 @@ class GeminiClient(BaseClient):
        try:
            if model_info.force_stream_mode:
                req_task = asyncio.create_task(
-                    self.client.aio.models.generate_content_stream(
+                    client.aio.models.generate_content_stream(
                        model=model_info.model_identifier,
                        contents=messages[0],
                        config=generation_config,
@@ -402,7 +479,7 @@ class GeminiClient(BaseClient):
                )
            else:
                req_task = asyncio.create_task(
-                    self.client.aio.models.generate_content(
+                    client.aio.models.generate_content(
                        model=model_info.model_identifier,
                        contents=messages[0],
                        config=generation_config,
@@ -418,13 +495,13 @@ class GeminiClient(BaseClient):
                resp, usage_record = async_response_parser(req_task.result())
        except (ClientError, ServerError) as e:
            # 重封装ClientError和ServerError为RespNotOkException
-            raise RespNotOkException(e.status_code, e.message)
+            raise RespNotOkException(e.status_code, e.message) from e
        except (
            UnknownFunctionCallArgumentError,
            UnsupportedFunctionError,
            FunctionInvocationError,
        ) as e:
-            raise ValueError("工具类型错误：请检查工具选项和参数：" + str(e))
+            raise ValueError(f"工具类型错误：请检查工具选项和参数：{str(e)}") from e
        except Exception as e:
            raise NetworkConnectionError() from e

@@ -437,6 +514,8 @@ class GeminiClient(BaseClient):
                total_tokens=usage_record[2],
            )

+        return resp
+
    async def get_embedding(
        self,
        model_info: ModelInfo,
@@ -448,9 +527,22 @@ class GeminiClient(BaseClient):
        :param embedding_input: 嵌入输入文本
        :return: 嵌入响应
        """
+        return await self._execute_with_fallback(
+            self._get_embedding_internal,
+            model_info,
+            embedding_input,
+        )
+
+    async def _get_embedding_internal(
+        self,
+        client: genai.Client,
+        model_info: ModelInfo,
+        embedding_input: str,
+    ) -> APIResponse:
+        """内部方法：执行实际的嵌入API调用"""
        try:
            raw_response: types.EmbedContentResponse = (
-                await self.client.aio.models.embed_content(
+                await client.aio.models.embed_content(
                    model=model_info.model_identifier,
                    contents=embedding_input,
                    config=types.EmbedContentConfig(task_type="SEMANTIC_SIMILARITY"),
@@ -458,7 +550,7 @@ class GeminiClient(BaseClient):
            )
        except (ClientError, ServerError) as e:
            # 重封装ClientError和ServerError为RespNotOkException
-            raise RespNotOkException(e.status_code)
+            raise RespNotOkException(e.status_code) from e
        except Exception as e:
            raise NetworkConnectionError() from e

--- a/src/llm_models/model_client/openai_client.py
+++ b/src/llm_models/model_client/openai_client.py
@@ -23,6 +23,7 @@ from openai.types.chat.chat_completion_chunk import ChoiceDelta
 from .base_client import APIResponse, UsageRecord
 from src.config.api_ada_configs import ModelInfo, APIProvider
 from . import BaseClient
+from src.common.logger import get_logger

 from ..exceptions import (
    RespParseException,
@@ -34,6 +35,8 @@ from ..payload_content.message import Message, RoleType
 from ..payload_content.resp_format import RespFormat
 from ..payload_content.tool_option import ToolOption, ToolParam, ToolCall

+logger = get_logger("OpenAI客户端")
+

 def _convert_messages(messages: list[Message]) -> list[ChatCompletionMessageParam]:
    """
@@ -385,11 +388,60 @@ def _default_normal_response_parser(
 class OpenaiClient(BaseClient):
    def __init__(self, api_provider: APIProvider):
        super().__init__(api_provider)
-        self.client: AsyncOpenAI = AsyncOpenAI(
-            base_url=api_provider.base_url,
-            api_key=api_provider.api_key,
-            max_retries=0,
-        )
+        # 不再在初始化时创建固定的client，而是在请求时动态创建
+        self._clients_cache = {}  # API Key -> AsyncOpenAI client 的缓存
+
+    def _get_client(self, api_key: str = None) -> AsyncOpenAI:
+        """获取或创建对应API Key的客户端"""
+        if api_key is None:
+            api_key = self.api_provider.get_current_api_key()
+        
+        if not api_key:
+            raise ValueError(f"API Provider '{self.api_provider.name}' 没有可用的API Key")
+        
+        # 使用缓存避免重复创建客户端
+        if api_key not in self._clients_cache:
+            self._clients_cache[api_key] = AsyncOpenAI(
+                base_url=self.api_provider.base_url,
+                api_key=api_key,
+                max_retries=0,
+            )
+        
+        return self._clients_cache[api_key]
+
+    async def _execute_with_fallback(self, func, *args, **kwargs):
+        """执行请求并在失败时切换API Key"""
+        current_api_key = self.api_provider.get_current_api_key()
+        max_attempts = len(self.api_provider.api_keys) if self.api_provider.api_keys else 1
+        
+        for attempt in range(max_attempts):
+            try:
+                client = self._get_client(current_api_key)
+                result = await func(client, *args, **kwargs)
+                # 成功时重置失败计数
+                self.api_provider.reset_key_failures(current_api_key)
+                return result
+                
+            except (APIStatusError, APIConnectionError) as e:
+                # 记录失败并尝试下一个API Key
+                logger.warning(f"API Key失败 (尝试 {attempt + 1}/{max_attempts}): {str(e)}")
+                
+                if attempt < max_attempts - 1:  # 还有重试机会
+                    next_api_key = self.api_provider.mark_key_failed(current_api_key)
+                    if next_api_key and next_api_key != current_api_key:
+                        current_api_key = next_api_key
+                        logger.info(f"切换到下一个API Key: {current_api_key[:8]}***{current_api_key[-4:]}")
+                        continue
+                
+                # 所有API Key都失败了，重新抛出异常
+                if isinstance(e, APIStatusError):
+                    raise RespNotOkException(e.status_code, e.message) from e
+                elif isinstance(e, APIConnectionError):
+                    raise NetworkConnectionError(str(e)) from e
+            
+            except Exception as e:
+                # 其他异常直接抛出
+                raise e

    async def get_response(
        self,
@@ -423,6 +475,40 @@ class OpenaiClient(BaseClient):
        :param interrupt_flag: 中断信号量（可选，默认为None）
        :return: (响应文本, 推理文本, 工具调用, 其他数据)
        """
+        return await self._execute_with_fallback(
+            self._get_response_internal,
+            model_info,
+            message_list,
+            tool_options,
+            max_tokens,
+            temperature,
+            response_format,
+            stream_response_handler,
+            async_response_parser,
+            interrupt_flag,
+        )
+
+    async def _get_response_internal(
+        self,
+        client: AsyncOpenAI,
+        model_info: ModelInfo,
+        message_list: list[Message],
+        tool_options: list[ToolOption] | None = None,
+        max_tokens: int = 1024,
+        temperature: float = 0.7,
+        response_format: RespFormat | None = None,
+        stream_response_handler: Callable[
+            [AsyncStream[ChatCompletionChunk], asyncio.Event | None],
+            tuple[APIResponse, tuple[int, int, int]],
+        ]
+        | None = None,
+        async_response_parser: Callable[
+            [ChatCompletion], tuple[APIResponse, tuple[int, int, int]]
+        ]
+        | None = None,
+        interrupt_flag: asyncio.Event | None = None,
+    ) -> APIResponse:
+        """内部方法：执行实际的API调用"""
        if stream_response_handler is None:
            stream_response_handler = _default_stream_response_handler

@@ -439,7 +525,7 @@ class OpenaiClient(BaseClient):
        try:
            if model_info.force_stream_mode:
                req_task = asyncio.create_task(
-                    self.client.chat.completions.create(
+                    client.chat.completions.create(
                        model=model_info.model_identifier,
                        messages=messages,
                        tools=tools,
@@ -464,7 +550,7 @@ class OpenaiClient(BaseClient):
            else:
                # 发送请求并获取响应
                req_task = asyncio.create_task(
-                    self.client.chat.completions.create(
+                    client.chat.completions.create(
                        model=model_info.model_identifier,
                        messages=messages,
                        tools=tools,
@@ -513,8 +599,21 @@ class OpenaiClient(BaseClient):
        :param embedding_input: 嵌入输入文本
        :return: 嵌入响应
        """
+        return await self._execute_with_fallback(
+            self._get_embedding_internal,
+            model_info,
+            embedding_input,
+        )
+
+    async def _get_embedding_internal(
+        self,
+        client: AsyncOpenAI,
+        model_info: ModelInfo,
+        embedding_input: str,
+    ) -> APIResponse:
+        """内部方法：执行实际的嵌入API调用"""
        try:
-            raw_response = await self.client.embeddings.create(
+            raw_response = await client.embeddings.create(
                model=model_info.model_identifier,
                input=embedding_input,
            )