fix:统一llm请求函数,改动模型名称

This commit is contained in:
SengokuCola
2025-06-04 23:27:24 +08:00
parent 077b67fa60
commit 4ebfca096c
17 changed files with 116 additions and 187 deletions

View File

@@ -500,11 +500,11 @@ class LLMRequest:
logger.warning(f"检测到403错误模型从 {old_model_name} 降级为 {self.model_name}")
# 对全局配置进行更新
if global_config.model.normal_chat_2.get("name") == old_model_name:
global_config.model.normal_chat_2["name"] = self.model_name
if global_config.model.replyer_2.get("name") == old_model_name:
global_config.model.replyer_2["name"] = self.model_name
logger.warning(f"将全局配置中的 llm_normal 模型临时降级至{self.model_name}")
if global_config.model.normal_chat_1.get("name") == old_model_name:
global_config.model.normal_chat_1["name"] = self.model_name
if global_config.model.replyer_1.get("name") == old_model_name:
global_config.model.replyer_1["name"] = self.model_name
logger.warning(f"将全局配置中的 llm_reasoning 模型临时降级至{self.model_name}")
if payload and "model" in payload:
@@ -715,18 +715,6 @@ class LLMRequest:
return {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
# 防止小朋友们截图自己的key
async def generate_response(self, prompt: str) -> Tuple:
"""根据输入的提示生成模型的异步响应"""
response = await self._execute_request(endpoint="/chat/completions", prompt=prompt)
# 根据返回值的长度决定怎么处理
if len(response) == 3:
content, reasoning_content, tool_calls = response
return content, reasoning_content, self.model_name, tool_calls
else:
content, reasoning_content = response
return content, reasoning_content, self.model_name
async def generate_response_for_image(self, prompt: str, image_base64: str, image_format: str) -> Tuple:
"""根据输入的提示和图片生成模型的异步响应"""
@@ -761,29 +749,6 @@ class LLMRequest:
content, reasoning_content = response
return content, (reasoning_content, self.model_name)
async def generate_response_tool_async(self, prompt: str, tools: list, **kwargs) -> tuple[str, str, list]:
"""异步方式根据输入的提示生成模型的响应"""
# 构建请求体不硬编码max_tokens
data = {
"model": self.model_name,
"messages": [{"role": "user", "content": prompt}],
**self.params,
**kwargs,
"tools": tools,
}
response = await self._execute_request(endpoint="/chat/completions", payload=data, prompt=prompt)
logger.debug(f"向模型 {self.model_name} 发送工具调用请求,包含 {len(tools)} 个工具,返回结果: {response}")
# 检查响应是否包含工具调用
if len(response) == 3:
content, reasoning_content, tool_calls = response
logger.debug(f"收到工具调用响应,包含 {len(tool_calls) if tool_calls else 0} 个工具调用")
return content, reasoning_content, tool_calls
else:
content, reasoning_content = response
logger.debug("收到普通响应,无工具调用")
return content, reasoning_content, None
async def get_embedding(self, text: str) -> Union[list, None]:
"""异步方法获取文本的embedding向量