diff --git a/src/chat/focus_chat/planners/planner_simple.py b/src/chat/focus_chat/planners/planner_simple.py index cddd53e2d..94ac085d7 100644 --- a/src/chat/focus_chat/planners/planner_simple.py +++ b/src/chat/focus_chat/planners/planner_simple.py @@ -78,13 +78,11 @@ class ActionPlanner(BasePlanner): # LLM规划器配置 self.planner_llm = LLMRequest( model=global_config.model.planner, - max_tokens=1000, request_type="focus.planner", # 用于动作规划 ) self.utils_llm = LLMRequest( model=global_config.model.utils_small, - max_tokens=1000, request_type="focus.planner", # 用于动作规划 ) @@ -188,6 +186,12 @@ class ActionPlanner(BasePlanner): llm_content, (reasoning_content, _) = await self.planner_llm.generate_response_async(prompt=prompt) logger.debug(f"{self.log_prefix}LLM 原始理由响应: {reasoning_content}") + + logger.info(f"{self.log_prefix}规划器原始提示词: {prompt}") + logger.info(f"{self.log_prefix}规划器原始响应: {llm_content}") + logger.info(f"{self.log_prefix}规划器推理: {reasoning_content}") + + except Exception as req_e: logger.error(f"{self.log_prefix}LLM 请求执行失败: {req_e}") reasoning = f"LLM 请求失败,你的模型出现问题: {req_e}" diff --git a/src/chat/normal_chat/normal_chat_planner.py b/src/chat/normal_chat/normal_chat_planner.py index 114636be1..f6cf89ab4 100644 --- a/src/chat/normal_chat/normal_chat_planner.py +++ b/src/chat/normal_chat/normal_chat_planner.py @@ -43,8 +43,7 @@ def init_prompt(): 基于以上聊天上下文和用户的最新消息,选择最合适的action。 -请以动作的输出要求,以严格的 JSON 格式输出,且仅包含 JSON 内容。 -请输出你提取的JSON,不要有任何其他文字或解释: +请以动作的输出要求,以严格的 JSON 格式输出,且仅包含 JSON 内容。不要有任何其他文字或解释: """, "normal_chat_planner_prompt", ) @@ -70,7 +69,6 @@ class NormalChatPlanner: # LLM规划器配置 self.planner_llm = LLMRequest( model=global_config.model.planner, - max_tokens=1000, request_type="normal_chat.planner", # 用于normal_chat动作规划 ) diff --git a/src/config/official_configs.py b/src/config/official_configs.py index 423357f5c..9e2700748 100644 --- a/src/config/official_configs.py +++ b/src/config/official_configs.py @@ -439,7 +439,6 @@ class ModelConfig(ConfigBase): focus_working_memory: dict[str, Any] = field(default_factory=lambda: {}) """专注工作记忆模型配置""" - focus_tool_use: dict[str, Any] = field(default_factory=lambda: {}) """专注工具使用模型配置""" diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py index 4f5cd3b0f..24cc9731a 100644 --- a/src/llm_models/utils_model.py +++ b/src/llm_models/utils_model.py @@ -123,6 +123,8 @@ class LLMRequest: self.stream = model.get("stream", False) self.pri_in = model.get("pri_in", 0) self.pri_out = model.get("pri_out", 0) + self.max_tokens = model.get("max_tokens", global_config.model.model_max_output_length) + # print(f"max_tokens: {self.max_tokens}") # 获取数据库实例 self._init_database() @@ -242,6 +244,26 @@ class LLMRequest: if stream_mode: payload["stream"] = stream_mode + + if self.temp != 0.7: + payload["temperature"] = self.temp + + # 添加enable_thinking参数(如果不是默认值False) + if not self.enable_thinking: + payload["enable_thinking"] = False + + if self.thinking_budget != 4096: + payload["thinking_budget"] = self.thinking_budget + + if self.max_tokens: + payload["max_tokens"] = self.max_tokens + + # if "max_tokens" not in payload and "max_completion_tokens" not in payload: + # payload["max_tokens"] = global_config.model.model_max_output_length + # 如果 payload 中依然存在 max_tokens 且需要转换,在这里进行再次检查 + if self.model_name.lower() in self.MODELS_NEEDING_TRANSFORMATION and "max_tokens" in payload: + payload["max_completion_tokens"] = payload.pop("max_tokens") + return { "policy": policy, @@ -631,6 +653,7 @@ class LLMRequest: ] else: messages = [{"role": "user", "content": prompt}] + payload = { "model": self.model_name, "messages": messages, @@ -648,8 +671,11 @@ class LLMRequest: if self.thinking_budget != 4096: payload["thinking_budget"] = self.thinking_budget - if "max_tokens" not in payload and "max_completion_tokens" not in payload: - payload["max_tokens"] = global_config.model.model_max_output_length + if self.max_tokens: + payload["max_tokens"] = self.max_tokens + + # if "max_tokens" not in payload and "max_completion_tokens" not in payload: + # payload["max_tokens"] = global_config.model.model_max_output_length # 如果 payload 中依然存在 max_tokens 且需要转换,在这里进行再次检查 if self.model_name.lower() in self.MODELS_NEEDING_TRANSFORMATION and "max_tokens" in payload: payload["max_completion_tokens"] = payload.pop("max_tokens")