From c7a804e28631a5dd06fc71e0f848cffb3f1413a1 Mon Sep 17 00:00:00 2001 From: Todysheep Date: Mon, 23 Jun 2025 16:09:14 +0800 Subject: [PATCH 1/2] =?UTF-8?q?feat:=20=E6=9B=B4=E6=96=B0=20LLMRequest=20?= =?UTF-8?q?=E7=B1=BB=E4=BB=A5=E6=94=AF=E6=8C=81=E8=87=AA=E5=AE=9A=E4=B9=89?= =?UTF-8?q?=E5=8F=82=E6=95=B0=EF=BC=8C=E6=9B=B4=E6=96=B0=20payload=20?= =?UTF-8?q?=E9=94=AE=E5=80=BC=E6=B7=BB=E5=8A=A0=E9=80=BB=E8=BE=91=EF=BC=8C?= =?UTF-8?q?=E5=85=BC=E5=AE=B9=E4=B8=8D=E6=94=AF=E6=8C=81=E6=9F=90=E4=BA=9B?= =?UTF-8?q?=E9=94=AE=E5=80=BC=E7=9A=84api?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/llm_models/utils_model.py | 38 ++++++++--------------------------- 1 file changed, 8 insertions(+), 30 deletions(-) diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py index 377fd3813..12f396758 100644 --- a/src/llm_models/utils_model.py +++ b/src/llm_models/utils_model.py @@ -122,14 +122,16 @@ class LLMRequest: self.model_name: str = model["name"] self.params = kwargs - self.enable_thinking = model.get("enable_thinking", False) + self.enable_thinking = model.get("enable_thinking", None) self.temp = model.get("temp", 0.7) - self.thinking_budget = model.get("thinking_budget", 4096) + self.thinking_budget = model.get("thinking_budget", None) self.stream = model.get("stream", False) self.pri_in = model.get("pri_in", 0) self.pri_out = model.get("pri_out", 0) self.max_tokens = model.get("max_tokens", global_config.model.model_max_output_length) # print(f"max_tokens: {self.max_tokens}") + self.custom_params = model.get("custom_params", "{}") + self.custom_params = json.loads(self.custom_params) # 获取数据库实例 self._init_database() @@ -247,28 +249,6 @@ class LLMRequest: elif payload is None: payload = await self._build_payload(prompt) - if stream_mode: - payload["stream"] = stream_mode - - if self.temp != 0.7: - payload["temperature"] = self.temp - - # 添加enable_thinking参数(如果不是默认值False) - if not self.enable_thinking: - payload["enable_thinking"] = False - - if self.thinking_budget != 4096: - payload["thinking_budget"] = self.thinking_budget - - if self.max_tokens: - payload["max_tokens"] = self.max_tokens - - # if "max_tokens" not in payload and "max_completion_tokens" not in payload: - # payload["max_tokens"] = global_config.model.model_max_output_length - # 如果 payload 中依然存在 max_tokens 且需要转换,在这里进行再次检查 - if self.model_name.lower() in self.MODELS_NEEDING_TRANSFORMATION and "max_tokens" in payload: - payload["max_completion_tokens"] = payload.pop("max_tokens") - return { "policy": policy, "payload": payload, @@ -668,18 +648,16 @@ class LLMRequest: if self.temp != 0.7: payload["temperature"] = self.temp - # 添加enable_thinking参数(如果不是默认值False) - if not self.enable_thinking: - payload["enable_thinking"] = False + # 仅当配置文件中存在参数时,添加对应参数 + if self.enable_thinking is not None: + payload["enable_thinking"] = self.enable_thinking - if self.thinking_budget != 4096: + if self.thinking_budget is not None: payload["thinking_budget"] = self.thinking_budget if self.max_tokens: payload["max_tokens"] = self.max_tokens - # if "max_tokens" not in payload and "max_completion_tokens" not in payload: - # payload["max_tokens"] = global_config.model.model_max_output_length # 如果 payload 中依然存在 max_tokens 且需要转换,在这里进行再次检查 if self.model_name.lower() in self.MODELS_NEEDING_TRANSFORMATION and "max_tokens" in payload: payload["max_completion_tokens"] = payload.pop("max_tokens") From 7961a1f04c08553befcb95fe4e97d97c6c0fb50d Mon Sep 17 00:00:00 2001 From: Todysheep <97968466+Todysheep@users.noreply.github.com> Date: Mon, 23 Jun 2025 16:30:25 +0800 Subject: [PATCH 2/2] Update src/llm_models/utils_model.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- src/llm_models/utils_model.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py index 12f396758..52f5f2139 100644 --- a/src/llm_models/utils_model.py +++ b/src/llm_models/utils_model.py @@ -130,8 +130,13 @@ class LLMRequest: self.pri_out = model.get("pri_out", 0) self.max_tokens = model.get("max_tokens", global_config.model.model_max_output_length) # print(f"max_tokens: {self.max_tokens}") - self.custom_params = model.get("custom_params", "{}") - self.custom_params = json.loads(self.custom_params) + custom_params_str = model.get("custom_params", "{}") + try: + self.custom_params = json.loads(custom_params_str) + except json.JSONDecodeError as e: + logger.error(f"Invalid JSON in custom_params for model '{self.model_name}': {custom_params_str}") + self.custom_params = {} + # 获取数据库实例 self._init_database()