diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py index 377fd3813..12f396758 100644 --- a/src/llm_models/utils_model.py +++ b/src/llm_models/utils_model.py @@ -122,14 +122,16 @@ class LLMRequest: self.model_name: str = model["name"] self.params = kwargs - self.enable_thinking = model.get("enable_thinking", False) + self.enable_thinking = model.get("enable_thinking", None) self.temp = model.get("temp", 0.7) - self.thinking_budget = model.get("thinking_budget", 4096) + self.thinking_budget = model.get("thinking_budget", None) self.stream = model.get("stream", False) self.pri_in = model.get("pri_in", 0) self.pri_out = model.get("pri_out", 0) self.max_tokens = model.get("max_tokens", global_config.model.model_max_output_length) # print(f"max_tokens: {self.max_tokens}") + self.custom_params = model.get("custom_params", "{}") + self.custom_params = json.loads(self.custom_params) # 获取数据库实例 self._init_database() @@ -247,28 +249,6 @@ class LLMRequest: elif payload is None: payload = await self._build_payload(prompt) - if stream_mode: - payload["stream"] = stream_mode - - if self.temp != 0.7: - payload["temperature"] = self.temp - - # 添加enable_thinking参数(如果不是默认值False) - if not self.enable_thinking: - payload["enable_thinking"] = False - - if self.thinking_budget != 4096: - payload["thinking_budget"] = self.thinking_budget - - if self.max_tokens: - payload["max_tokens"] = self.max_tokens - - # if "max_tokens" not in payload and "max_completion_tokens" not in payload: - # payload["max_tokens"] = global_config.model.model_max_output_length - # 如果 payload 中依然存在 max_tokens 且需要转换,在这里进行再次检查 - if self.model_name.lower() in self.MODELS_NEEDING_TRANSFORMATION and "max_tokens" in payload: - payload["max_completion_tokens"] = payload.pop("max_tokens") - return { "policy": policy, "payload": payload, @@ -668,18 +648,16 @@ class LLMRequest: if self.temp != 0.7: payload["temperature"] = self.temp - # 添加enable_thinking参数(如果不是默认值False) - if not self.enable_thinking: - payload["enable_thinking"] = False + # 仅当配置文件中存在参数时,添加对应参数 + if self.enable_thinking is not None: + payload["enable_thinking"] = self.enable_thinking - if self.thinking_budget != 4096: + if self.thinking_budget is not None: payload["thinking_budget"] = self.thinking_budget if self.max_tokens: payload["max_tokens"] = self.max_tokens - # if "max_tokens" not in payload and "max_completion_tokens" not in payload: - # payload["max_tokens"] = global_config.model.model_max_output_length # 如果 payload 中依然存在 max_tokens 且需要转换,在这里进行再次检查 if self.model_name.lower() in self.MODELS_NEEDING_TRANSFORMATION and "max_tokens" in payload: payload["max_completion_tokens"] = payload.pop("max_tokens")