diff --git a/src/chat/express/expression_selector.py b/src/chat/express/expression_selector.py index 83fdc128f..3f848e43f 100644 --- a/src/chat/express/expression_selector.py +++ b/src/chat/express/expression_selector.py @@ -36,11 +36,7 @@ def init_prompt(): 请以JSON格式输出,只需要输出选中的情境编号: 例如: {{ - "selected_situations": [2, 3, 5, 7, 19, 22, 25, 38, 39, 45, 48 , 64] -}} -例如: -{{ - "selected_situations": [1, 4, 7, 9, 23, 38, 44] + "selected_situations": [2, 3, 5, 7, 19, 22, 25, 38, 39, 45, 48, 64] }} 请严格按照JSON格式输出,不要包含其他内容: @@ -214,7 +210,7 @@ class ExpressionSelector: """使用LLM选择适合的表达方式""" # 1. 获取35个随机表达方式(现在按权重抽取) - style_exprs, grammar_exprs = self.get_random_expressions(chat_id, 50, 0.5, 0.5) + style_exprs, grammar_exprs = self.get_random_expressions(chat_id, 30, 0.5, 0.5) # 2. 构建所有表达方式的索引和情境列表 all_expressions = [] @@ -264,7 +260,10 @@ class ExpressionSelector: # 4. 调用LLM try: + + start_time = time.time() content, (reasoning_content, model_name, _) = await self.llm_model.generate_response_async(prompt=prompt) + logger.info(f"LLM请求时间: {model_name} {time.time() - start_time} \n{prompt}") # logger.info(f"模型名称: {model_name}") # logger.info(f"LLM返回结果: {content}") diff --git a/src/chat/memory_system/Hippocampus.py b/src/chat/memory_system/Hippocampus.py index fe3c25625..9e4005b97 100644 --- a/src/chat/memory_system/Hippocampus.py +++ b/src/chat/memory_system/Hippocampus.py @@ -190,7 +190,7 @@ class MemoryGraph: class Hippocampus: def __init__(self): self.memory_graph = MemoryGraph() - self.model_summary: LLMRequest = None # type: ignore + self.model_small: LLMRequest = None # type: ignore self.entorhinal_cortex: EntorhinalCortex = None # type: ignore self.parahippocampal_gyrus: ParahippocampalGyrus = None # type: ignore @@ -200,7 +200,7 @@ class Hippocampus: self.parahippocampal_gyrus = ParahippocampalGyrus(self) # 从数据库加载记忆图 self.entorhinal_cortex.sync_memory_from_db() - self.model_summary = LLMRequest(model_set=model_config.model_task_config.memory, request_type="memory.builder") + self.model_small = LLMRequest(model_set=model_config.model_task_config.utils_small, request_type="memory.small") def get_all_node_names(self) -> list: """获取记忆图中所有节点的名字列表""" @@ -340,7 +340,7 @@ class Hippocampus: else: topic_num = 5 # 51+字符: 5个关键词 (其余长文本) - topics_response, _ = await self.model_summary.generate_response_async(self.find_topic_llm(text, topic_num)) + topics_response, _ = await self.model_small.generate_response_async(self.find_topic_llm(text, topic_num)) # 提取关键词 keywords = re.findall(r"<([^>]+)>", topics_response) diff --git a/src/chat/memory_system/instant_memory.py b/src/chat/memory_system/instant_memory.py index a702a87ed..a6be80ef1 100644 --- a/src/chat/memory_system/instant_memory.py +++ b/src/chat/memory_system/instant_memory.py @@ -38,7 +38,7 @@ class InstantMemory: self.chat_id = chat_id self.last_view_time = time.time() self.summary_model = LLMRequest( - model_set=model_config.model_task_config.memory, + model_set=model_config.model_task_config.utils, request_type="memory.summary", ) diff --git a/src/config/api_ada_configs.py b/src/config/api_ada_configs.py index 9692aced3..0292f7238 100644 --- a/src/config/api_ada_configs.py +++ b/src/config/api_ada_configs.py @@ -105,9 +105,6 @@ class ModelTaskConfig(ConfigBase): replyer_2: TaskConfig """normal_chat次要回复模型配置""" - memory: TaskConfig - """记忆模型配置""" - emotion: TaskConfig """情绪模型配置""" diff --git a/src/llm_models/model_client/openai_client.py b/src/llm_models/model_client/openai_client.py index ad9cbf177..6fbf02467 100644 --- a/src/llm_models/model_client/openai_client.py +++ b/src/llm_models/model_client/openai_client.py @@ -1,6 +1,7 @@ import asyncio import io import json +import time import re import base64 from collections.abc import Iterable @@ -452,6 +453,7 @@ class OpenaiClient(BaseClient): resp, usage_record = await stream_response_handler(req_task.result(), interrupt_flag) else: # 发送请求并获取响应 + # start_time = time.time() req_task = asyncio.create_task( self.client.chat.completions.create( model=model_info.model_identifier, @@ -469,7 +471,9 @@ class OpenaiClient(BaseClient): # 如果中断量存在且被设置,则取消任务并抛出异常 req_task.cancel() raise ReqAbortException("请求被外部信号中断") - await asyncio.sleep(0.5) # 等待0.5秒后再次检查任务&中断信号量状态 + await asyncio.sleep(0.1) # 等待0.5秒后再次检查任务&中断信号量状态 + + # logger.info(f"OpenAI请求时间: {model_info.model_identifier} {time.time() - start_time} \n{messages}") resp, usage_record = async_response_parser(req_task.result()) except APIConnectionError as e: diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py index b7aa0a8b8..f3668eef3 100644 --- a/src/llm_models/utils_model.py +++ b/src/llm_models/utils_model.py @@ -1,6 +1,7 @@ import re import copy import asyncio +import time from enum import Enum from rich.traceback import install @@ -150,14 +151,22 @@ class LLMRequest: (Tuple[str, str, str, Optional[List[ToolCall]]]): 响应内容、推理内容、模型名称、工具调用列表 """ # 请求体构建 + start_time = time.time() + + + message_builder = MessageBuilder() message_builder.add_text_content(prompt) messages = [message_builder.build()] + tool_built = self._build_tool_options(tools) + # 模型选择 model_info, api_provider, client = self._select_model() - + # 请求并处理返回值 + logger.info(f"LLM选择耗时: {model_info.name} {time.time() - start_time}") + response = await self._execute_request( api_provider=api_provider, client=client, @@ -168,6 +177,8 @@ class LLMRequest: max_tokens=max_tokens, tool_options=tool_built, ) + + content = response.content reasoning_content = response.reasoning_content or "" tool_calls = response.tool_calls @@ -175,6 +186,7 @@ class LLMRequest: if not reasoning_content and content: content, extracted_reasoning = self._extract_reasoning(content) reasoning_content = extracted_reasoning + if usage := response.usage: llm_usage_recorder.record_usage_to_database( model_info=model_info, @@ -183,6 +195,7 @@ class LLMRequest: request_type=self.request_type, endpoint="/chat/completions", ) + if not content: if raise_when_empty: logger.warning("生成的响应为空") diff --git a/template/model_config_template.toml b/template/model_config_template.toml index 3dcff6f84..77993954a 100644 --- a/template/model_config_template.toml +++ b/template/model_config_template.toml @@ -1,5 +1,5 @@ [inner] -version = "1.1.1" +version = "1.2.0" # 配置文件版本号迭代规则同bot_config.toml @@ -132,11 +132,6 @@ model_list = ["siliconflow-deepseek-v3"] temperature = 0.3 max_tokens = 800 -[model_task_config.memory] # 记忆模型 -model_list = ["qwen3-30b"] -temperature = 0.7 -max_tokens = 800 - [model_task_config.vlm] # 图像识别模型 model_list = ["qwen2.5-vl-72b"] max_tokens = 800