fix:通过计时定位LLM异常延时,移除memory模型
This commit is contained in:
@@ -36,11 +36,7 @@ def init_prompt():
|
|||||||
请以JSON格式输出,只需要输出选中的情境编号:
|
请以JSON格式输出,只需要输出选中的情境编号:
|
||||||
例如:
|
例如:
|
||||||
{{
|
{{
|
||||||
"selected_situations": [2, 3, 5, 7, 19, 22, 25, 38, 39, 45, 48 , 64]
|
"selected_situations": [2, 3, 5, 7, 19, 22, 25, 38, 39, 45, 48, 64]
|
||||||
}}
|
|
||||||
例如:
|
|
||||||
{{
|
|
||||||
"selected_situations": [1, 4, 7, 9, 23, 38, 44]
|
|
||||||
}}
|
}}
|
||||||
|
|
||||||
请严格按照JSON格式输出,不要包含其他内容:
|
请严格按照JSON格式输出,不要包含其他内容:
|
||||||
@@ -214,7 +210,7 @@ class ExpressionSelector:
|
|||||||
"""使用LLM选择适合的表达方式"""
|
"""使用LLM选择适合的表达方式"""
|
||||||
|
|
||||||
# 1. 获取35个随机表达方式(现在按权重抽取)
|
# 1. 获取35个随机表达方式(现在按权重抽取)
|
||||||
style_exprs, grammar_exprs = self.get_random_expressions(chat_id, 50, 0.5, 0.5)
|
style_exprs, grammar_exprs = self.get_random_expressions(chat_id, 30, 0.5, 0.5)
|
||||||
|
|
||||||
# 2. 构建所有表达方式的索引和情境列表
|
# 2. 构建所有表达方式的索引和情境列表
|
||||||
all_expressions = []
|
all_expressions = []
|
||||||
@@ -264,7 +260,10 @@ class ExpressionSelector:
|
|||||||
|
|
||||||
# 4. 调用LLM
|
# 4. 调用LLM
|
||||||
try:
|
try:
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
content, (reasoning_content, model_name, _) = await self.llm_model.generate_response_async(prompt=prompt)
|
content, (reasoning_content, model_name, _) = await self.llm_model.generate_response_async(prompt=prompt)
|
||||||
|
logger.info(f"LLM请求时间: {model_name} {time.time() - start_time} \n{prompt}")
|
||||||
|
|
||||||
# logger.info(f"模型名称: {model_name}")
|
# logger.info(f"模型名称: {model_name}")
|
||||||
# logger.info(f"LLM返回结果: {content}")
|
# logger.info(f"LLM返回结果: {content}")
|
||||||
|
|||||||
@@ -190,7 +190,7 @@ class MemoryGraph:
|
|||||||
class Hippocampus:
|
class Hippocampus:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.memory_graph = MemoryGraph()
|
self.memory_graph = MemoryGraph()
|
||||||
self.model_summary: LLMRequest = None # type: ignore
|
self.model_small: LLMRequest = None # type: ignore
|
||||||
self.entorhinal_cortex: EntorhinalCortex = None # type: ignore
|
self.entorhinal_cortex: EntorhinalCortex = None # type: ignore
|
||||||
self.parahippocampal_gyrus: ParahippocampalGyrus = None # type: ignore
|
self.parahippocampal_gyrus: ParahippocampalGyrus = None # type: ignore
|
||||||
|
|
||||||
@@ -200,7 +200,7 @@ class Hippocampus:
|
|||||||
self.parahippocampal_gyrus = ParahippocampalGyrus(self)
|
self.parahippocampal_gyrus = ParahippocampalGyrus(self)
|
||||||
# 从数据库加载记忆图
|
# 从数据库加载记忆图
|
||||||
self.entorhinal_cortex.sync_memory_from_db()
|
self.entorhinal_cortex.sync_memory_from_db()
|
||||||
self.model_summary = LLMRequest(model_set=model_config.model_task_config.memory, request_type="memory.builder")
|
self.model_small = LLMRequest(model_set=model_config.model_task_config.utils_small, request_type="memory.small")
|
||||||
|
|
||||||
def get_all_node_names(self) -> list:
|
def get_all_node_names(self) -> list:
|
||||||
"""获取记忆图中所有节点的名字列表"""
|
"""获取记忆图中所有节点的名字列表"""
|
||||||
@@ -340,7 +340,7 @@ class Hippocampus:
|
|||||||
else:
|
else:
|
||||||
topic_num = 5 # 51+字符: 5个关键词 (其余长文本)
|
topic_num = 5 # 51+字符: 5个关键词 (其余长文本)
|
||||||
|
|
||||||
topics_response, _ = await self.model_summary.generate_response_async(self.find_topic_llm(text, topic_num))
|
topics_response, _ = await self.model_small.generate_response_async(self.find_topic_llm(text, topic_num))
|
||||||
|
|
||||||
# 提取关键词
|
# 提取关键词
|
||||||
keywords = re.findall(r"<([^>]+)>", topics_response)
|
keywords = re.findall(r"<([^>]+)>", topics_response)
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ class InstantMemory:
|
|||||||
self.chat_id = chat_id
|
self.chat_id = chat_id
|
||||||
self.last_view_time = time.time()
|
self.last_view_time = time.time()
|
||||||
self.summary_model = LLMRequest(
|
self.summary_model = LLMRequest(
|
||||||
model_set=model_config.model_task_config.memory,
|
model_set=model_config.model_task_config.utils,
|
||||||
request_type="memory.summary",
|
request_type="memory.summary",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -105,9 +105,6 @@ class ModelTaskConfig(ConfigBase):
|
|||||||
replyer_2: TaskConfig
|
replyer_2: TaskConfig
|
||||||
"""normal_chat次要回复模型配置"""
|
"""normal_chat次要回复模型配置"""
|
||||||
|
|
||||||
memory: TaskConfig
|
|
||||||
"""记忆模型配置"""
|
|
||||||
|
|
||||||
emotion: TaskConfig
|
emotion: TaskConfig
|
||||||
"""情绪模型配置"""
|
"""情绪模型配置"""
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
|
import time
|
||||||
import re
|
import re
|
||||||
import base64
|
import base64
|
||||||
from collections.abc import Iterable
|
from collections.abc import Iterable
|
||||||
@@ -452,6 +453,7 @@ class OpenaiClient(BaseClient):
|
|||||||
resp, usage_record = await stream_response_handler(req_task.result(), interrupt_flag)
|
resp, usage_record = await stream_response_handler(req_task.result(), interrupt_flag)
|
||||||
else:
|
else:
|
||||||
# 发送请求并获取响应
|
# 发送请求并获取响应
|
||||||
|
# start_time = time.time()
|
||||||
req_task = asyncio.create_task(
|
req_task = asyncio.create_task(
|
||||||
self.client.chat.completions.create(
|
self.client.chat.completions.create(
|
||||||
model=model_info.model_identifier,
|
model=model_info.model_identifier,
|
||||||
@@ -469,7 +471,9 @@ class OpenaiClient(BaseClient):
|
|||||||
# 如果中断量存在且被设置,则取消任务并抛出异常
|
# 如果中断量存在且被设置,则取消任务并抛出异常
|
||||||
req_task.cancel()
|
req_task.cancel()
|
||||||
raise ReqAbortException("请求被外部信号中断")
|
raise ReqAbortException("请求被外部信号中断")
|
||||||
await asyncio.sleep(0.5) # 等待0.5秒后再次检查任务&中断信号量状态
|
await asyncio.sleep(0.1) # 等待0.5秒后再次检查任务&中断信号量状态
|
||||||
|
|
||||||
|
# logger.info(f"OpenAI请求时间: {model_info.model_identifier} {time.time() - start_time} \n{messages}")
|
||||||
|
|
||||||
resp, usage_record = async_response_parser(req_task.result())
|
resp, usage_record = async_response_parser(req_task.result())
|
||||||
except APIConnectionError as e:
|
except APIConnectionError as e:
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import re
|
import re
|
||||||
import copy
|
import copy
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import time
|
||||||
|
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from rich.traceback import install
|
from rich.traceback import install
|
||||||
@@ -150,14 +151,22 @@ class LLMRequest:
|
|||||||
(Tuple[str, str, str, Optional[List[ToolCall]]]): 响应内容、推理内容、模型名称、工具调用列表
|
(Tuple[str, str, str, Optional[List[ToolCall]]]): 响应内容、推理内容、模型名称、工具调用列表
|
||||||
"""
|
"""
|
||||||
# 请求体构建
|
# 请求体构建
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
message_builder = MessageBuilder()
|
message_builder = MessageBuilder()
|
||||||
message_builder.add_text_content(prompt)
|
message_builder.add_text_content(prompt)
|
||||||
messages = [message_builder.build()]
|
messages = [message_builder.build()]
|
||||||
|
|
||||||
tool_built = self._build_tool_options(tools)
|
tool_built = self._build_tool_options(tools)
|
||||||
|
|
||||||
# 模型选择
|
# 模型选择
|
||||||
model_info, api_provider, client = self._select_model()
|
model_info, api_provider, client = self._select_model()
|
||||||
|
|
||||||
# 请求并处理返回值
|
# 请求并处理返回值
|
||||||
|
logger.info(f"LLM选择耗时: {model_info.name} {time.time() - start_time}")
|
||||||
|
|
||||||
response = await self._execute_request(
|
response = await self._execute_request(
|
||||||
api_provider=api_provider,
|
api_provider=api_provider,
|
||||||
client=client,
|
client=client,
|
||||||
@@ -168,6 +177,8 @@ class LLMRequest:
|
|||||||
max_tokens=max_tokens,
|
max_tokens=max_tokens,
|
||||||
tool_options=tool_built,
|
tool_options=tool_built,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
content = response.content
|
content = response.content
|
||||||
reasoning_content = response.reasoning_content or ""
|
reasoning_content = response.reasoning_content or ""
|
||||||
tool_calls = response.tool_calls
|
tool_calls = response.tool_calls
|
||||||
@@ -175,6 +186,7 @@ class LLMRequest:
|
|||||||
if not reasoning_content and content:
|
if not reasoning_content and content:
|
||||||
content, extracted_reasoning = self._extract_reasoning(content)
|
content, extracted_reasoning = self._extract_reasoning(content)
|
||||||
reasoning_content = extracted_reasoning
|
reasoning_content = extracted_reasoning
|
||||||
|
|
||||||
if usage := response.usage:
|
if usage := response.usage:
|
||||||
llm_usage_recorder.record_usage_to_database(
|
llm_usage_recorder.record_usage_to_database(
|
||||||
model_info=model_info,
|
model_info=model_info,
|
||||||
@@ -183,6 +195,7 @@ class LLMRequest:
|
|||||||
request_type=self.request_type,
|
request_type=self.request_type,
|
||||||
endpoint="/chat/completions",
|
endpoint="/chat/completions",
|
||||||
)
|
)
|
||||||
|
|
||||||
if not content:
|
if not content:
|
||||||
if raise_when_empty:
|
if raise_when_empty:
|
||||||
logger.warning("生成的响应为空")
|
logger.warning("生成的响应为空")
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[inner]
|
[inner]
|
||||||
version = "1.1.1"
|
version = "1.2.0"
|
||||||
|
|
||||||
# 配置文件版本号迭代规则同bot_config.toml
|
# 配置文件版本号迭代规则同bot_config.toml
|
||||||
|
|
||||||
@@ -132,11 +132,6 @@ model_list = ["siliconflow-deepseek-v3"]
|
|||||||
temperature = 0.3
|
temperature = 0.3
|
||||||
max_tokens = 800
|
max_tokens = 800
|
||||||
|
|
||||||
[model_task_config.memory] # 记忆模型
|
|
||||||
model_list = ["qwen3-30b"]
|
|
||||||
temperature = 0.7
|
|
||||||
max_tokens = 800
|
|
||||||
|
|
||||||
[model_task_config.vlm] # 图像识别模型
|
[model_task_config.vlm] # 图像识别模型
|
||||||
model_list = ["qwen2.5-vl-72b"]
|
model_list = ["qwen2.5-vl-72b"]
|
||||||
max_tokens = 800
|
max_tokens = 800
|
||||||
|
|||||||
Reference in New Issue
Block a user