feat: 更新 LLMRequest 类以支持自定义参数,更新 payload 键值添加逻辑,兼容不支持某些键值的api
This commit is contained in:
@@ -122,14 +122,16 @@ class LLMRequest:
|
|||||||
self.model_name: str = model["name"]
|
self.model_name: str = model["name"]
|
||||||
self.params = kwargs
|
self.params = kwargs
|
||||||
|
|
||||||
self.enable_thinking = model.get("enable_thinking", False)
|
self.enable_thinking = model.get("enable_thinking", None)
|
||||||
self.temp = model.get("temp", 0.7)
|
self.temp = model.get("temp", 0.7)
|
||||||
self.thinking_budget = model.get("thinking_budget", 4096)
|
self.thinking_budget = model.get("thinking_budget", None)
|
||||||
self.stream = model.get("stream", False)
|
self.stream = model.get("stream", False)
|
||||||
self.pri_in = model.get("pri_in", 0)
|
self.pri_in = model.get("pri_in", 0)
|
||||||
self.pri_out = model.get("pri_out", 0)
|
self.pri_out = model.get("pri_out", 0)
|
||||||
self.max_tokens = model.get("max_tokens", global_config.model.model_max_output_length)
|
self.max_tokens = model.get("max_tokens", global_config.model.model_max_output_length)
|
||||||
# print(f"max_tokens: {self.max_tokens}")
|
# print(f"max_tokens: {self.max_tokens}")
|
||||||
|
self.custom_params = model.get("custom_params", "{}")
|
||||||
|
self.custom_params = json.loads(self.custom_params)
|
||||||
|
|
||||||
# 获取数据库实例
|
# 获取数据库实例
|
||||||
self._init_database()
|
self._init_database()
|
||||||
@@ -247,28 +249,6 @@ class LLMRequest:
|
|||||||
elif payload is None:
|
elif payload is None:
|
||||||
payload = await self._build_payload(prompt)
|
payload = await self._build_payload(prompt)
|
||||||
|
|
||||||
if stream_mode:
|
|
||||||
payload["stream"] = stream_mode
|
|
||||||
|
|
||||||
if self.temp != 0.7:
|
|
||||||
payload["temperature"] = self.temp
|
|
||||||
|
|
||||||
# 添加enable_thinking参数(如果不是默认值False)
|
|
||||||
if not self.enable_thinking:
|
|
||||||
payload["enable_thinking"] = False
|
|
||||||
|
|
||||||
if self.thinking_budget != 4096:
|
|
||||||
payload["thinking_budget"] = self.thinking_budget
|
|
||||||
|
|
||||||
if self.max_tokens:
|
|
||||||
payload["max_tokens"] = self.max_tokens
|
|
||||||
|
|
||||||
# if "max_tokens" not in payload and "max_completion_tokens" not in payload:
|
|
||||||
# payload["max_tokens"] = global_config.model.model_max_output_length
|
|
||||||
# 如果 payload 中依然存在 max_tokens 且需要转换,在这里进行再次检查
|
|
||||||
if self.model_name.lower() in self.MODELS_NEEDING_TRANSFORMATION and "max_tokens" in payload:
|
|
||||||
payload["max_completion_tokens"] = payload.pop("max_tokens")
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"policy": policy,
|
"policy": policy,
|
||||||
"payload": payload,
|
"payload": payload,
|
||||||
@@ -668,18 +648,16 @@ class LLMRequest:
|
|||||||
if self.temp != 0.7:
|
if self.temp != 0.7:
|
||||||
payload["temperature"] = self.temp
|
payload["temperature"] = self.temp
|
||||||
|
|
||||||
# 添加enable_thinking参数(如果不是默认值False)
|
# 仅当配置文件中存在参数时,添加对应参数
|
||||||
if not self.enable_thinking:
|
if self.enable_thinking is not None:
|
||||||
payload["enable_thinking"] = False
|
payload["enable_thinking"] = self.enable_thinking
|
||||||
|
|
||||||
if self.thinking_budget != 4096:
|
if self.thinking_budget is not None:
|
||||||
payload["thinking_budget"] = self.thinking_budget
|
payload["thinking_budget"] = self.thinking_budget
|
||||||
|
|
||||||
if self.max_tokens:
|
if self.max_tokens:
|
||||||
payload["max_tokens"] = self.max_tokens
|
payload["max_tokens"] = self.max_tokens
|
||||||
|
|
||||||
# if "max_tokens" not in payload and "max_completion_tokens" not in payload:
|
|
||||||
# payload["max_tokens"] = global_config.model.model_max_output_length
|
|
||||||
# 如果 payload 中依然存在 max_tokens 且需要转换,在这里进行再次检查
|
# 如果 payload 中依然存在 max_tokens 且需要转换,在这里进行再次检查
|
||||||
if self.model_name.lower() in self.MODELS_NEEDING_TRANSFORMATION and "max_tokens" in payload:
|
if self.model_name.lower() in self.MODELS_NEEDING_TRANSFORMATION and "max_tokens" in payload:
|
||||||
payload["max_completion_tokens"] = payload.pop("max_tokens")
|
payload["max_completion_tokens"] = payload.pop("max_tokens")
|
||||||
|
|||||||
Reference in New Issue
Block a user