fix:提供更自定义的max_token配置
弱智api服务商输出</think>输出一半被max_tokens截断了
This commit is contained in:
@@ -78,13 +78,11 @@ class ActionPlanner(BasePlanner):
|
|||||||
# LLM规划器配置
|
# LLM规划器配置
|
||||||
self.planner_llm = LLMRequest(
|
self.planner_llm = LLMRequest(
|
||||||
model=global_config.model.planner,
|
model=global_config.model.planner,
|
||||||
max_tokens=1000,
|
|
||||||
request_type="focus.planner", # 用于动作规划
|
request_type="focus.planner", # 用于动作规划
|
||||||
)
|
)
|
||||||
|
|
||||||
self.utils_llm = LLMRequest(
|
self.utils_llm = LLMRequest(
|
||||||
model=global_config.model.utils_small,
|
model=global_config.model.utils_small,
|
||||||
max_tokens=1000,
|
|
||||||
request_type="focus.planner", # 用于动作规划
|
request_type="focus.planner", # 用于动作规划
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -188,6 +186,12 @@ class ActionPlanner(BasePlanner):
|
|||||||
llm_content, (reasoning_content, _) = await self.planner_llm.generate_response_async(prompt=prompt)
|
llm_content, (reasoning_content, _) = await self.planner_llm.generate_response_async(prompt=prompt)
|
||||||
|
|
||||||
logger.debug(f"{self.log_prefix}LLM 原始理由响应: {reasoning_content}")
|
logger.debug(f"{self.log_prefix}LLM 原始理由响应: {reasoning_content}")
|
||||||
|
|
||||||
|
logger.info(f"{self.log_prefix}规划器原始提示词: {prompt}")
|
||||||
|
logger.info(f"{self.log_prefix}规划器原始响应: {llm_content}")
|
||||||
|
logger.info(f"{self.log_prefix}规划器推理: {reasoning_content}")
|
||||||
|
|
||||||
|
|
||||||
except Exception as req_e:
|
except Exception as req_e:
|
||||||
logger.error(f"{self.log_prefix}LLM 请求执行失败: {req_e}")
|
logger.error(f"{self.log_prefix}LLM 请求执行失败: {req_e}")
|
||||||
reasoning = f"LLM 请求失败,你的模型出现问题: {req_e}"
|
reasoning = f"LLM 请求失败,你的模型出现问题: {req_e}"
|
||||||
|
|||||||
@@ -43,8 +43,7 @@ def init_prompt():
|
|||||||
|
|
||||||
基于以上聊天上下文和用户的最新消息,选择最合适的action。
|
基于以上聊天上下文和用户的最新消息,选择最合适的action。
|
||||||
|
|
||||||
请以动作的输出要求,以严格的 JSON 格式输出,且仅包含 JSON 内容。
|
请以动作的输出要求,以严格的 JSON 格式输出,且仅包含 JSON 内容。不要有任何其他文字或解释:
|
||||||
请输出你提取的JSON,不要有任何其他文字或解释:
|
|
||||||
""",
|
""",
|
||||||
"normal_chat_planner_prompt",
|
"normal_chat_planner_prompt",
|
||||||
)
|
)
|
||||||
@@ -70,7 +69,6 @@ class NormalChatPlanner:
|
|||||||
# LLM规划器配置
|
# LLM规划器配置
|
||||||
self.planner_llm = LLMRequest(
|
self.planner_llm = LLMRequest(
|
||||||
model=global_config.model.planner,
|
model=global_config.model.planner,
|
||||||
max_tokens=1000,
|
|
||||||
request_type="normal_chat.planner", # 用于normal_chat动作规划
|
request_type="normal_chat.planner", # 用于normal_chat动作规划
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -439,7 +439,6 @@ class ModelConfig(ConfigBase):
|
|||||||
focus_working_memory: dict[str, Any] = field(default_factory=lambda: {})
|
focus_working_memory: dict[str, Any] = field(default_factory=lambda: {})
|
||||||
"""专注工作记忆模型配置"""
|
"""专注工作记忆模型配置"""
|
||||||
|
|
||||||
|
|
||||||
focus_tool_use: dict[str, Any] = field(default_factory=lambda: {})
|
focus_tool_use: dict[str, Any] = field(default_factory=lambda: {})
|
||||||
"""专注工具使用模型配置"""
|
"""专注工具使用模型配置"""
|
||||||
|
|
||||||
|
|||||||
@@ -123,6 +123,8 @@ class LLMRequest:
|
|||||||
self.stream = model.get("stream", False)
|
self.stream = model.get("stream", False)
|
||||||
self.pri_in = model.get("pri_in", 0)
|
self.pri_in = model.get("pri_in", 0)
|
||||||
self.pri_out = model.get("pri_out", 0)
|
self.pri_out = model.get("pri_out", 0)
|
||||||
|
self.max_tokens = model.get("max_tokens", global_config.model.model_max_output_length)
|
||||||
|
# print(f"max_tokens: {self.max_tokens}")
|
||||||
|
|
||||||
# 获取数据库实例
|
# 获取数据库实例
|
||||||
self._init_database()
|
self._init_database()
|
||||||
@@ -243,6 +245,26 @@ class LLMRequest:
|
|||||||
if stream_mode:
|
if stream_mode:
|
||||||
payload["stream"] = stream_mode
|
payload["stream"] = stream_mode
|
||||||
|
|
||||||
|
if self.temp != 0.7:
|
||||||
|
payload["temperature"] = self.temp
|
||||||
|
|
||||||
|
# 添加enable_thinking参数(如果不是默认值False)
|
||||||
|
if not self.enable_thinking:
|
||||||
|
payload["enable_thinking"] = False
|
||||||
|
|
||||||
|
if self.thinking_budget != 4096:
|
||||||
|
payload["thinking_budget"] = self.thinking_budget
|
||||||
|
|
||||||
|
if self.max_tokens:
|
||||||
|
payload["max_tokens"] = self.max_tokens
|
||||||
|
|
||||||
|
# if "max_tokens" not in payload and "max_completion_tokens" not in payload:
|
||||||
|
# payload["max_tokens"] = global_config.model.model_max_output_length
|
||||||
|
# 如果 payload 中依然存在 max_tokens 且需要转换,在这里进行再次检查
|
||||||
|
if self.model_name.lower() in self.MODELS_NEEDING_TRANSFORMATION and "max_tokens" in payload:
|
||||||
|
payload["max_completion_tokens"] = payload.pop("max_tokens")
|
||||||
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"policy": policy,
|
"policy": policy,
|
||||||
"payload": payload,
|
"payload": payload,
|
||||||
@@ -631,6 +653,7 @@ class LLMRequest:
|
|||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
messages = [{"role": "user", "content": prompt}]
|
messages = [{"role": "user", "content": prompt}]
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": self.model_name,
|
"model": self.model_name,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
@@ -648,8 +671,11 @@ class LLMRequest:
|
|||||||
if self.thinking_budget != 4096:
|
if self.thinking_budget != 4096:
|
||||||
payload["thinking_budget"] = self.thinking_budget
|
payload["thinking_budget"] = self.thinking_budget
|
||||||
|
|
||||||
if "max_tokens" not in payload and "max_completion_tokens" not in payload:
|
if self.max_tokens:
|
||||||
payload["max_tokens"] = global_config.model.model_max_output_length
|
payload["max_tokens"] = self.max_tokens
|
||||||
|
|
||||||
|
# if "max_tokens" not in payload and "max_completion_tokens" not in payload:
|
||||||
|
# payload["max_tokens"] = global_config.model.model_max_output_length
|
||||||
# 如果 payload 中依然存在 max_tokens 且需要转换,在这里进行再次检查
|
# 如果 payload 中依然存在 max_tokens 且需要转换,在这里进行再次检查
|
||||||
if self.model_name.lower() in self.MODELS_NEEDING_TRANSFORMATION and "max_tokens" in payload:
|
if self.model_name.lower() in self.MODELS_NEEDING_TRANSFORMATION and "max_tokens" in payload:
|
||||||
payload["max_completion_tokens"] = payload.pop("max_tokens")
|
payload["max_completion_tokens"] = payload.pop("max_tokens")
|
||||||
|
|||||||
Reference in New Issue
Block a user