diff --git a/src/llm_models/model_client/base_client.py b/src/llm_models/model_client/base_client.py index 5089666f1..0ca092447 100644 --- a/src/llm_models/model_client/base_client.py +++ b/src/llm_models/model_client/base_client.py @@ -83,6 +83,7 @@ class BaseClient: | None = None, async_response_parser: Callable[[ChatCompletion], tuple[APIResponse, tuple[int, int, int]]] | None = None, interrupt_flag: asyncio.Event | None = None, + extra_params: dict[str, Any] | None = None, ) -> APIResponse: """ 获取对话响应 @@ -103,6 +104,7 @@ class BaseClient: self, model_info: ModelInfo, embedding_input: str, + extra_params: dict[str, Any] | None = None, ) -> APIResponse: """ 获取文本嵌入 diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py index af144dde2..0377fb118 100644 --- a/src/llm_models/model_client/gemini_client.py +++ b/src/llm_models/model_client/gemini_client.py @@ -1,3 +1,4 @@ +raise DeprecationWarning("Genimi Client is not fully available yet.") import asyncio import io from collections.abc import Iterable diff --git a/src/llm_models/model_client/openai_client.py b/src/llm_models/model_client/openai_client.py index 8fc234297..c8483eba9 100644 --- a/src/llm_models/model_client/openai_client.py +++ b/src/llm_models/model_client/openai_client.py @@ -396,6 +396,7 @@ class OpenaiClient(BaseClient): Callable[[ChatCompletion], tuple[APIResponse, Optional[tuple[int, int, int]]]] ] = None, interrupt_flag: asyncio.Event | None = None, + extra_params: dict[str, Any] | None = None, ) -> APIResponse: """ 获取对话响应 @@ -434,6 +435,7 @@ class OpenaiClient(BaseClient): max_tokens=max_tokens, stream=True, response_format=NOT_GIVEN, + extra_body=extra_params, ) ) while not req_task.done(): @@ -455,6 +457,7 @@ class OpenaiClient(BaseClient): max_tokens=max_tokens, stream=False, response_format=NOT_GIVEN, + extra_body=extra_params, ) ) while not req_task.done(): @@ -487,6 +490,7 @@ class OpenaiClient(BaseClient): self, model_info: ModelInfo, embedding_input: str, + extra_params: dict[str, Any] | None = None, ) -> APIResponse: """ 获取文本嵌入 @@ -498,6 +502,7 @@ class OpenaiClient(BaseClient): raw_response = await self.client.embeddings.create( model=model_info.model_identifier, input=embedding_input, + extra_body=extra_params, ) except APIConnectionError as e: raise NetworkConnectionError() from e diff --git a/template/model_config_template.toml b/template/model_config_template.toml index e99f039d3..3dcff6f84 100644 --- a/template/model_config_template.toml +++ b/template/model_config_template.toml @@ -1,5 +1,5 @@ [inner] -version = "1.1.0" +version = "1.1.1" # 配置文件版本号迭代规则同bot_config.toml @@ -59,6 +59,8 @@ name = "qwen3-8b" api_provider = "SiliconFlow" price_in = 0 price_out = 0 +[models.extra_params] # 可选的额外参数配置 +enable_thinking = false # 不启用思考 [[models]] model_identifier = "Qwen/Qwen3-14B" @@ -66,6 +68,8 @@ name = "qwen3-14b" api_provider = "SiliconFlow" price_in = 0.5 price_out = 2.0 +[models.extra_params] # 可选的额外参数配置 +enable_thinking = false # 不启用思考 [[models]] model_identifier = "Qwen/Qwen3-30B-A3B" @@ -73,6 +77,8 @@ name = "qwen3-30b" api_provider = "SiliconFlow" price_in = 0.7 price_out = 2.8 +[models.extra_params] # 可选的额外参数配置 +enable_thinking = false # 不启用思考 [[models]] model_identifier = "Qwen/Qwen2.5-VL-72B-Instruct"