feat: 支持多个API Key,增强错误处理和负载均衡机制
This commit is contained in:
@@ -74,8 +74,22 @@ def _handle_resp_not_ok(
|
||||
:return: (等待间隔(如果为0则不等待,为-1则不再请求该模型), 新的消息列表(适用于压缩消息))
|
||||
"""
|
||||
# 响应错误
|
||||
if e.status_code in [400, 401, 402, 403, 404]:
|
||||
# 客户端错误
|
||||
if e.status_code in [401, 403]:
|
||||
# API Key认证错误 - 让多API Key机制处理,给一次重试机会
|
||||
if remain_try > 0:
|
||||
logger.warning(
|
||||
f"任务-'{task_name}' 模型-'{model_name}'\n"
|
||||
f"API Key认证失败(错误代码-{e.status_code}),多API Key机制会自动切换"
|
||||
)
|
||||
return 0, None # 立即重试,让底层客户端切换API Key
|
||||
else:
|
||||
logger.warning(
|
||||
f"任务-'{task_name}' 模型-'{model_name}'\n"
|
||||
f"所有API Key都认证失败,错误代码-{e.status_code},错误信息-{e.message}"
|
||||
)
|
||||
return -1, None # 不再重试请求该模型
|
||||
elif e.status_code in [400, 402, 404]:
|
||||
# 其他客户端错误(不应该重试)
|
||||
logger.warning(
|
||||
f"任务-'{task_name}' 模型-'{model_name}'\n"
|
||||
f"请求失败,错误代码-{e.status_code},错误信息-{e.message}"
|
||||
@@ -105,17 +119,17 @@ def _handle_resp_not_ok(
|
||||
)
|
||||
return -1, None
|
||||
elif e.status_code == 429:
|
||||
# 请求过于频繁
|
||||
# 请求过于频繁 - 让多API Key机制处理,适当延迟后重试
|
||||
return _check_retry(
|
||||
remain_try,
|
||||
retry_interval,
|
||||
min(retry_interval, 5), # 限制最大延迟为5秒,让API Key切换更快生效
|
||||
can_retry_msg=(
|
||||
f"任务-'{task_name}' 模型-'{model_name}'\n"
|
||||
f"请求过于频繁,将于{retry_interval}秒后重试"
|
||||
f"请求过于频繁,多API Key机制会自动切换,{min(retry_interval, 5)}秒后重试"
|
||||
),
|
||||
cannot_retry_msg=(
|
||||
f"任务-'{task_name}' 模型-'{model_name}'\n"
|
||||
"请求过于频繁,超过最大重试次数,放弃请求"
|
||||
"请求过于频繁,所有API Key都被限制,放弃请求"
|
||||
),
|
||||
)
|
||||
elif e.status_code >= 500:
|
||||
@@ -161,12 +175,13 @@ def default_exception_handler(
|
||||
"""
|
||||
|
||||
if isinstance(e, NetworkConnectionError): # 网络连接错误
|
||||
# 网络错误可能是某个API Key的端点问题,给多API Key机制一次快速重试机会
|
||||
return _check_retry(
|
||||
remain_try,
|
||||
retry_interval,
|
||||
min(retry_interval, 3), # 网络错误时减少等待时间,让API Key切换更快
|
||||
can_retry_msg=(
|
||||
f"任务-'{task_name}' 模型-'{model_name}'\n"
|
||||
f"连接异常,将于{retry_interval}秒后重试"
|
||||
f"连接异常,多API Key机制会尝试其他Key,{min(retry_interval, 3)}秒后重试"
|
||||
),
|
||||
cannot_retry_msg=(
|
||||
f"任务-'{task_name}' 模型-'{model_name}'\n"
|
||||
|
||||
@@ -17,6 +17,7 @@ from google.genai.errors import (
|
||||
from .base_client import APIResponse, UsageRecord
|
||||
from src.config.api_ada_configs import ModelInfo, APIProvider
|
||||
from . import BaseClient
|
||||
from src.common.logger import get_logger
|
||||
|
||||
from ..exceptions import (
|
||||
RespParseException,
|
||||
@@ -28,6 +29,7 @@ from ..payload_content.message import Message, RoleType
|
||||
from ..payload_content.resp_format import RespFormat, RespFormatType
|
||||
from ..payload_content.tool_option import ToolOption, ToolParam, ToolCall
|
||||
|
||||
logger = get_logger("Gemini客户端")
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
@@ -309,13 +311,55 @@ def _default_normal_response_parser(
|
||||
|
||||
|
||||
class GeminiClient(BaseClient):
|
||||
client: genai.Client
|
||||
|
||||
def __init__(self, api_provider: APIProvider):
|
||||
super().__init__(api_provider)
|
||||
self.client = genai.Client(
|
||||
api_key=api_provider.api_key,
|
||||
) # 这里和openai不一样,gemini会自己决定自己是否需要retry
|
||||
# 不再在初始化时创建固定的client,而是在请求时动态创建
|
||||
self._clients_cache = {} # API Key -> genai.Client 的缓存
|
||||
|
||||
def _get_client(self, api_key: str = None) -> genai.Client:
|
||||
"""获取或创建对应API Key的客户端"""
|
||||
if api_key is None:
|
||||
api_key = self.api_provider.get_current_api_key()
|
||||
|
||||
if not api_key:
|
||||
raise ValueError(f"API Provider '{self.api_provider.name}' 没有可用的API Key")
|
||||
|
||||
# 使用缓存避免重复创建客户端
|
||||
if api_key not in self._clients_cache:
|
||||
self._clients_cache[api_key] = genai.Client(api_key=api_key)
|
||||
|
||||
return self._clients_cache[api_key]
|
||||
|
||||
async def _execute_with_fallback(self, func, *args, **kwargs):
|
||||
"""执行请求并在失败时切换API Key"""
|
||||
current_api_key = self.api_provider.get_current_api_key()
|
||||
max_attempts = len(self.api_provider.api_keys) if self.api_provider.api_keys else 1
|
||||
|
||||
for attempt in range(max_attempts):
|
||||
try:
|
||||
client = self._get_client(current_api_key)
|
||||
result = await func(client, *args, **kwargs)
|
||||
# 成功时重置失败计数
|
||||
self.api_provider.reset_key_failures(current_api_key)
|
||||
return result
|
||||
|
||||
except (ClientError, ServerError) as e:
|
||||
# 记录失败并尝试下一个API Key
|
||||
logger.warning(f"API Key失败 (尝试 {attempt + 1}/{max_attempts}): {str(e)}")
|
||||
|
||||
if attempt < max_attempts - 1: # 还有重试机会
|
||||
next_api_key = self.api_provider.mark_key_failed(current_api_key)
|
||||
if next_api_key and next_api_key != current_api_key:
|
||||
current_api_key = next_api_key
|
||||
logger.info(f"切换到下一个API Key: {current_api_key[:8]}***{current_api_key[-4:]}")
|
||||
continue
|
||||
|
||||
# 所有API Key都失败了,重新抛出异常
|
||||
raise RespNotOkException(e.status_code, e.message) from e
|
||||
|
||||
except Exception as e:
|
||||
# 其他异常直接抛出
|
||||
raise e
|
||||
|
||||
async def get_response(
|
||||
self,
|
||||
@@ -348,6 +392,39 @@ class GeminiClient(BaseClient):
|
||||
:param interrupt_flag: 中断信号量(可选,默认为None)
|
||||
:return: (响应文本, 推理文本, 工具调用, 其他数据)
|
||||
"""
|
||||
return await self._execute_with_fallback(
|
||||
self._get_response_internal,
|
||||
model_info,
|
||||
message_list,
|
||||
tool_options,
|
||||
max_tokens,
|
||||
temperature,
|
||||
thinking_budget,
|
||||
response_format,
|
||||
stream_response_handler,
|
||||
async_response_parser,
|
||||
interrupt_flag,
|
||||
)
|
||||
|
||||
async def _get_response_internal(
|
||||
self,
|
||||
client: genai.Client,
|
||||
model_info: ModelInfo,
|
||||
message_list: list[Message],
|
||||
tool_options: list[ToolOption] | None = None,
|
||||
max_tokens: int = 1024,
|
||||
temperature: float = 0.7,
|
||||
thinking_budget: int = 0,
|
||||
response_format: RespFormat | None = None,
|
||||
stream_response_handler: Callable[
|
||||
[Iterator[GenerateContentResponse], asyncio.Event | None], APIResponse
|
||||
]
|
||||
| None = None,
|
||||
async_response_parser: Callable[[GenerateContentResponse], APIResponse]
|
||||
| None = None,
|
||||
interrupt_flag: asyncio.Event | None = None,
|
||||
) -> APIResponse:
|
||||
"""内部方法:执行实际的API调用"""
|
||||
if stream_response_handler is None:
|
||||
stream_response_handler = _default_stream_response_handler
|
||||
|
||||
@@ -385,7 +462,7 @@ class GeminiClient(BaseClient):
|
||||
try:
|
||||
if model_info.force_stream_mode:
|
||||
req_task = asyncio.create_task(
|
||||
self.client.aio.models.generate_content_stream(
|
||||
client.aio.models.generate_content_stream(
|
||||
model=model_info.model_identifier,
|
||||
contents=messages[0],
|
||||
config=generation_config,
|
||||
@@ -402,7 +479,7 @@ class GeminiClient(BaseClient):
|
||||
)
|
||||
else:
|
||||
req_task = asyncio.create_task(
|
||||
self.client.aio.models.generate_content(
|
||||
client.aio.models.generate_content(
|
||||
model=model_info.model_identifier,
|
||||
contents=messages[0],
|
||||
config=generation_config,
|
||||
@@ -418,13 +495,13 @@ class GeminiClient(BaseClient):
|
||||
resp, usage_record = async_response_parser(req_task.result())
|
||||
except (ClientError, ServerError) as e:
|
||||
# 重封装ClientError和ServerError为RespNotOkException
|
||||
raise RespNotOkException(e.status_code, e.message)
|
||||
raise RespNotOkException(e.status_code, e.message) from e
|
||||
except (
|
||||
UnknownFunctionCallArgumentError,
|
||||
UnsupportedFunctionError,
|
||||
FunctionInvocationError,
|
||||
) as e:
|
||||
raise ValueError("工具类型错误:请检查工具选项和参数:" + str(e))
|
||||
raise ValueError(f"工具类型错误:请检查工具选项和参数:{str(e)}") from e
|
||||
except Exception as e:
|
||||
raise NetworkConnectionError() from e
|
||||
|
||||
@@ -437,6 +514,8 @@ class GeminiClient(BaseClient):
|
||||
total_tokens=usage_record[2],
|
||||
)
|
||||
|
||||
return resp
|
||||
|
||||
async def get_embedding(
|
||||
self,
|
||||
model_info: ModelInfo,
|
||||
@@ -448,9 +527,22 @@ class GeminiClient(BaseClient):
|
||||
:param embedding_input: 嵌入输入文本
|
||||
:return: 嵌入响应
|
||||
"""
|
||||
return await self._execute_with_fallback(
|
||||
self._get_embedding_internal,
|
||||
model_info,
|
||||
embedding_input,
|
||||
)
|
||||
|
||||
async def _get_embedding_internal(
|
||||
self,
|
||||
client: genai.Client,
|
||||
model_info: ModelInfo,
|
||||
embedding_input: str,
|
||||
) -> APIResponse:
|
||||
"""内部方法:执行实际的嵌入API调用"""
|
||||
try:
|
||||
raw_response: types.EmbedContentResponse = (
|
||||
await self.client.aio.models.embed_content(
|
||||
await client.aio.models.embed_content(
|
||||
model=model_info.model_identifier,
|
||||
contents=embedding_input,
|
||||
config=types.EmbedContentConfig(task_type="SEMANTIC_SIMILARITY"),
|
||||
@@ -458,7 +550,7 @@ class GeminiClient(BaseClient):
|
||||
)
|
||||
except (ClientError, ServerError) as e:
|
||||
# 重封装ClientError和ServerError为RespNotOkException
|
||||
raise RespNotOkException(e.status_code)
|
||||
raise RespNotOkException(e.status_code) from e
|
||||
except Exception as e:
|
||||
raise NetworkConnectionError() from e
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@ from openai.types.chat.chat_completion_chunk import ChoiceDelta
|
||||
from .base_client import APIResponse, UsageRecord
|
||||
from src.config.api_ada_configs import ModelInfo, APIProvider
|
||||
from . import BaseClient
|
||||
from src.common.logger import get_logger
|
||||
|
||||
from ..exceptions import (
|
||||
RespParseException,
|
||||
@@ -34,6 +35,8 @@ from ..payload_content.message import Message, RoleType
|
||||
from ..payload_content.resp_format import RespFormat
|
||||
from ..payload_content.tool_option import ToolOption, ToolParam, ToolCall
|
||||
|
||||
logger = get_logger("OpenAI客户端")
|
||||
|
||||
|
||||
def _convert_messages(messages: list[Message]) -> list[ChatCompletionMessageParam]:
|
||||
"""
|
||||
@@ -385,11 +388,60 @@ def _default_normal_response_parser(
|
||||
class OpenaiClient(BaseClient):
|
||||
def __init__(self, api_provider: APIProvider):
|
||||
super().__init__(api_provider)
|
||||
self.client: AsyncOpenAI = AsyncOpenAI(
|
||||
base_url=api_provider.base_url,
|
||||
api_key=api_provider.api_key,
|
||||
max_retries=0,
|
||||
)
|
||||
# 不再在初始化时创建固定的client,而是在请求时动态创建
|
||||
self._clients_cache = {} # API Key -> AsyncOpenAI client 的缓存
|
||||
|
||||
def _get_client(self, api_key: str = None) -> AsyncOpenAI:
|
||||
"""获取或创建对应API Key的客户端"""
|
||||
if api_key is None:
|
||||
api_key = self.api_provider.get_current_api_key()
|
||||
|
||||
if not api_key:
|
||||
raise ValueError(f"API Provider '{self.api_provider.name}' 没有可用的API Key")
|
||||
|
||||
# 使用缓存避免重复创建客户端
|
||||
if api_key not in self._clients_cache:
|
||||
self._clients_cache[api_key] = AsyncOpenAI(
|
||||
base_url=self.api_provider.base_url,
|
||||
api_key=api_key,
|
||||
max_retries=0,
|
||||
)
|
||||
|
||||
return self._clients_cache[api_key]
|
||||
|
||||
async def _execute_with_fallback(self, func, *args, **kwargs):
|
||||
"""执行请求并在失败时切换API Key"""
|
||||
current_api_key = self.api_provider.get_current_api_key()
|
||||
max_attempts = len(self.api_provider.api_keys) if self.api_provider.api_keys else 1
|
||||
|
||||
for attempt in range(max_attempts):
|
||||
try:
|
||||
client = self._get_client(current_api_key)
|
||||
result = await func(client, *args, **kwargs)
|
||||
# 成功时重置失败计数
|
||||
self.api_provider.reset_key_failures(current_api_key)
|
||||
return result
|
||||
|
||||
except (APIStatusError, APIConnectionError) as e:
|
||||
# 记录失败并尝试下一个API Key
|
||||
logger.warning(f"API Key失败 (尝试 {attempt + 1}/{max_attempts}): {str(e)}")
|
||||
|
||||
if attempt < max_attempts - 1: # 还有重试机会
|
||||
next_api_key = self.api_provider.mark_key_failed(current_api_key)
|
||||
if next_api_key and next_api_key != current_api_key:
|
||||
current_api_key = next_api_key
|
||||
logger.info(f"切换到下一个API Key: {current_api_key[:8]}***{current_api_key[-4:]}")
|
||||
continue
|
||||
|
||||
# 所有API Key都失败了,重新抛出异常
|
||||
if isinstance(e, APIStatusError):
|
||||
raise RespNotOkException(e.status_code, e.message) from e
|
||||
elif isinstance(e, APIConnectionError):
|
||||
raise NetworkConnectionError(str(e)) from e
|
||||
|
||||
except Exception as e:
|
||||
# 其他异常直接抛出
|
||||
raise e
|
||||
|
||||
async def get_response(
|
||||
self,
|
||||
@@ -423,6 +475,40 @@ class OpenaiClient(BaseClient):
|
||||
:param interrupt_flag: 中断信号量(可选,默认为None)
|
||||
:return: (响应文本, 推理文本, 工具调用, 其他数据)
|
||||
"""
|
||||
return await self._execute_with_fallback(
|
||||
self._get_response_internal,
|
||||
model_info,
|
||||
message_list,
|
||||
tool_options,
|
||||
max_tokens,
|
||||
temperature,
|
||||
response_format,
|
||||
stream_response_handler,
|
||||
async_response_parser,
|
||||
interrupt_flag,
|
||||
)
|
||||
|
||||
async def _get_response_internal(
|
||||
self,
|
||||
client: AsyncOpenAI,
|
||||
model_info: ModelInfo,
|
||||
message_list: list[Message],
|
||||
tool_options: list[ToolOption] | None = None,
|
||||
max_tokens: int = 1024,
|
||||
temperature: float = 0.7,
|
||||
response_format: RespFormat | None = None,
|
||||
stream_response_handler: Callable[
|
||||
[AsyncStream[ChatCompletionChunk], asyncio.Event | None],
|
||||
tuple[APIResponse, tuple[int, int, int]],
|
||||
]
|
||||
| None = None,
|
||||
async_response_parser: Callable[
|
||||
[ChatCompletion], tuple[APIResponse, tuple[int, int, int]]
|
||||
]
|
||||
| None = None,
|
||||
interrupt_flag: asyncio.Event | None = None,
|
||||
) -> APIResponse:
|
||||
"""内部方法:执行实际的API调用"""
|
||||
if stream_response_handler is None:
|
||||
stream_response_handler = _default_stream_response_handler
|
||||
|
||||
@@ -439,7 +525,7 @@ class OpenaiClient(BaseClient):
|
||||
try:
|
||||
if model_info.force_stream_mode:
|
||||
req_task = asyncio.create_task(
|
||||
self.client.chat.completions.create(
|
||||
client.chat.completions.create(
|
||||
model=model_info.model_identifier,
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
@@ -464,7 +550,7 @@ class OpenaiClient(BaseClient):
|
||||
else:
|
||||
# 发送请求并获取响应
|
||||
req_task = asyncio.create_task(
|
||||
self.client.chat.completions.create(
|
||||
client.chat.completions.create(
|
||||
model=model_info.model_identifier,
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
@@ -513,8 +599,21 @@ class OpenaiClient(BaseClient):
|
||||
:param embedding_input: 嵌入输入文本
|
||||
:return: 嵌入响应
|
||||
"""
|
||||
return await self._execute_with_fallback(
|
||||
self._get_embedding_internal,
|
||||
model_info,
|
||||
embedding_input,
|
||||
)
|
||||
|
||||
async def _get_embedding_internal(
|
||||
self,
|
||||
client: AsyncOpenAI,
|
||||
model_info: ModelInfo,
|
||||
embedding_input: str,
|
||||
) -> APIResponse:
|
||||
"""内部方法:执行实际的嵌入API调用"""
|
||||
try:
|
||||
raw_response = await self.client.embeddings.create(
|
||||
raw_response = await client.embeddings.create(
|
||||
model=model_info.model_identifier,
|
||||
input=embedding_input,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user