From f8450f705a21bbdc1d5991e56ff5e78b159a91df Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Thu, 24 Apr 2025 14:18:41 +0800 Subject: [PATCH] =?UTF-8?q?feat:=E5=90=88=E5=B9=B6=E5=B7=A5=E5=85=B7?= =?UTF-8?q?=E8=B0=83=E7=94=A8=E6=A8=A1=E5=9E=8B=E5=92=8C=E5=BF=83=E6=B5=81?= =?UTF-8?q?=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- llm_tool_benchmark_results.json | 71 +++++ src/config/config.py | 1 + src/do_tool/tool_can_use/base_tool.py | 3 +- .../tool_can_use/compare_numbers_tool.py | 2 +- src/do_tool/tool_can_use/get_knowledge.py | 4 +- src/do_tool/tool_can_use/get_memory.py | 4 +- src/do_tool/tool_can_use/get_time_date.py | 2 +- .../tool_can_use/lpmm_get_knowledge.py | 4 +- src/do_tool/tool_use.py | 12 +- src/heart_flow/mai_state_manager.py | 3 +- src/heart_flow/observation.py | 20 +- src/heart_flow/sub_heartflow.py | 194 +++++++++--- src/plugins/chat/bot.py | 8 +- src/plugins/chat/utils.py | 4 +- src/plugins/heartFC_chat/heartFC_chat.py | 214 +++++-------- src/plugins/heartFC_chat/heartFC_generator.py | 106 +------ .../heartFC_chat/heartflow_prompt_builder.py | 9 +- .../heartFC_chat/normal_chat_generator.py | 1 + src/plugins/models/utils_model.py | 54 +++- src/plugins/utils/chat_message_builder.py | 2 +- src/plugins/utils/json_utils.py | 297 ++++++++++++++++++ tool_call_benchmark.py | 289 +++++++++++++++++ 22 files changed, 973 insertions(+), 331 deletions(-) create mode 100644 llm_tool_benchmark_results.json create mode 100644 src/plugins/utils/json_utils.py create mode 100644 tool_call_benchmark.py diff --git a/llm_tool_benchmark_results.json b/llm_tool_benchmark_results.json new file mode 100644 index 000000000..e6be2a7dc --- /dev/null +++ b/llm_tool_benchmark_results.json @@ -0,0 +1,71 @@ +{ + "测试时间": "2025-04-24 13:22:36", + "测试迭代次数": 3, + "不使用工具调用": { + "平均耗时": 3.1020479996999106, + "最短耗时": 2.980656862258911, + "最长耗时": 3.2487313747406006, + "标准差": 0.13581516492157006, + "所有耗时": [ + 2.98, + 3.08, + 3.25 + ] + }, + "不使用工具调用_详细响应": [ + { + "内容摘要": "那个猫猫头表情包真的太可爱了,墨墨发的表情包也好萌,感觉可以分享一下我收藏的猫猫头系列", + "推理内容摘要": "" + }, + { + "内容摘要": "那个猫猫头表情包确实很魔性,我存了好多张,每次看到都觉得特别治愈。墨墨好像也喜欢这种可爱的表情包,可以分享一下我收藏的。", + "推理内容摘要": "" + }, + { + "内容摘要": "那个猫猫头表情包真的超可爱,我存了好多张,每次看到都会忍不住笑出来。墨墨发的表情包也好萌,感觉可以和大家分享一下我收藏的猫猫头。\n\n工具:无", + "推理内容摘要": "" + } + ], + "使用工具调用": { + "平均耗时": 7.927528937657674, + "最短耗时": 5.714647531509399, + "最长耗时": 11.046205997467041, + "标准差": 2.778799784731646, + "所有耗时": [ + 7.02, + 11.05, + 5.71 + ] + }, + "使用工具调用_详细响应": [ + { + "内容摘要": "这个猫猫头表情包确实挺有意思的,不过他们好像还在讨论版本问题。小千石在问3.8和3.11谁大,这挺简单的。", + "推理内容摘要": "", + "工具调用数量": 1, + "工具调用详情": [ + { + "工具名称": "compare_numbers", + "参数": "{\"num1\":3.8,\"num2\":3.11}" + } + ] + }, + { + "内容摘要": "3.8和3.11谁大这个问题有点突然,不过可以简单比较一下。可能小千石在测试我或者真的想知道答案。现在群里的话题有点分散,既有技术讨论又有表情包的话题,我还是先回答数字比较的问题好了,毕竟比较直接。", + "推理内容摘要": "", + "工具调用数量": 1, + "工具调用详情": [ + { + "工具名称": "compare_numbers", + "参数": "{\"num1\":3.8,\"num2\":3.11}" + } + ] + }, + { + "内容摘要": "他们还在纠结调试消息的事儿,不过好像讨论得差不多了。猫猫头表情包确实挺有意思的,但感觉聊得有点散了哦。小千石问3.8和3.11谁大,这个问题可以回答一下。", + "推理内容摘要": "", + "工具调用数量": 0, + "工具调用详情": [] + } + ], + "差异百分比": 155.56 +} \ No newline at end of file diff --git a/src/config/config.py b/src/config/config.py index ba9416d51..db2fd89d1 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -286,6 +286,7 @@ class BotConfig: llm_observation: Dict[str, str] = field(default_factory=lambda: {}) llm_sub_heartflow: Dict[str, str] = field(default_factory=lambda: {}) llm_heartflow: Dict[str, str] = field(default_factory=lambda: {}) + llm_tool_use: Dict[str, str] = field(default_factory=lambda: {}) api_urls: Dict[str, str] = field(default_factory=lambda: {}) diff --git a/src/do_tool/tool_can_use/base_tool.py b/src/do_tool/tool_can_use/base_tool.py index 7a89369fe..af12adf28 100644 --- a/src/do_tool/tool_can_use/base_tool.py +++ b/src/do_tool/tool_can_use/base_tool.py @@ -41,12 +41,11 @@ class BaseTool: "function": {"name": cls.name, "description": cls.description, "parameters": cls.parameters}, } - async def execute(self, function_args: Dict[str, Any], message_txt: str = "") -> Dict[str, Any]: + async def execute(self, function_args: Dict[str, Any]) -> Dict[str, Any]: """执行工具函数 Args: function_args: 工具调用参数 - message_txt: 原始消息文本 Returns: Dict: 工具执行结果 diff --git a/src/do_tool/tool_can_use/compare_numbers_tool.py b/src/do_tool/tool_can_use/compare_numbers_tool.py index 48cee5157..1fbd812a0 100644 --- a/src/do_tool/tool_can_use/compare_numbers_tool.py +++ b/src/do_tool/tool_can_use/compare_numbers_tool.py @@ -19,7 +19,7 @@ class CompareNumbersTool(BaseTool): "required": ["num1", "num2"], } - async def execute(self, function_args: Dict[str, Any], message_txt: str = "") -> Dict[str, Any]: + async def execute(self, function_args: Dict[str, Any]) -> Dict[str, Any]: """执行比较两个数的大小 Args: diff --git a/src/do_tool/tool_can_use/get_knowledge.py b/src/do_tool/tool_can_use/get_knowledge.py index 0ccac52c4..600afd36d 100644 --- a/src/do_tool/tool_can_use/get_knowledge.py +++ b/src/do_tool/tool_can_use/get_knowledge.py @@ -21,7 +21,7 @@ class SearchKnowledgeTool(BaseTool): "required": ["query"], } - async def execute(self, function_args: Dict[str, Any], message_txt: str = "") -> Dict[str, Any]: + async def execute(self, function_args: Dict[str, Any]) -> Dict[str, Any]: """执行知识库搜索 Args: @@ -32,7 +32,7 @@ class SearchKnowledgeTool(BaseTool): Dict: 工具执行结果 """ try: - query = function_args.get("query", message_txt) + query = function_args.get("query") threshold = function_args.get("threshold", 0.4) # 调用知识库搜索 diff --git a/src/do_tool/tool_can_use/get_memory.py b/src/do_tool/tool_can_use/get_memory.py index 28346d46c..98a4e85e2 100644 --- a/src/do_tool/tool_can_use/get_memory.py +++ b/src/do_tool/tool_can_use/get_memory.py @@ -20,7 +20,7 @@ class GetMemoryTool(BaseTool): "required": ["topic"], } - async def execute(self, function_args: Dict[str, Any], message_txt: str = "") -> Dict[str, Any]: + async def execute(self, function_args: Dict[str, Any]) -> Dict[str, Any]: """执行记忆获取 Args: @@ -31,7 +31,7 @@ class GetMemoryTool(BaseTool): Dict: 工具执行结果 """ try: - topic = function_args.get("topic", message_txt) + topic = function_args.get("topic") max_memory_num = function_args.get("max_memory_num", 2) # 将主题字符串转换为列表 diff --git a/src/do_tool/tool_can_use/get_time_date.py b/src/do_tool/tool_can_use/get_time_date.py index c3c9c8376..df6067bfb 100644 --- a/src/do_tool/tool_can_use/get_time_date.py +++ b/src/do_tool/tool_can_use/get_time_date.py @@ -17,7 +17,7 @@ class GetCurrentDateTimeTool(BaseTool): "required": [], } - async def execute(self, function_args: Dict[str, Any], message_txt: str = "") -> Dict[str, Any]: + async def execute(self, function_args: Dict[str, Any]) -> Dict[str, Any]: """执行获取当前时间、日期、年份和星期 Args: diff --git a/src/do_tool/tool_can_use/lpmm_get_knowledge.py b/src/do_tool/tool_can_use/lpmm_get_knowledge.py index 601d6083b..7541d48a9 100644 --- a/src/do_tool/tool_can_use/lpmm_get_knowledge.py +++ b/src/do_tool/tool_can_use/lpmm_get_knowledge.py @@ -24,7 +24,7 @@ class SearchKnowledgeFromLPMMTool(BaseTool): "required": ["query"], } - async def execute(self, function_args: Dict[str, Any], message_txt: str = "") -> Dict[str, Any]: + async def execute(self, function_args: Dict[str, Any]) -> Dict[str, Any]: """执行知识库搜索 Args: @@ -35,7 +35,7 @@ class SearchKnowledgeFromLPMMTool(BaseTool): Dict: 工具执行结果 """ try: - query = function_args.get("query", message_txt) + query = function_args.get("query") # threshold = function_args.get("threshold", 0.4) # 调用知识库搜索 diff --git a/src/do_tool/tool_use.py b/src/do_tool/tool_use.py index 019294ec5..1f625a586 100644 --- a/src/do_tool/tool_use.py +++ b/src/do_tool/tool_use.py @@ -50,8 +50,8 @@ class ToolUser: prompt += message_txt # prompt += f"你注意到{sender_name}刚刚说:{message_txt}\n" prompt += f"注意你就是{bot_name},{bot_name}是你的名字。根据之前的聊天记录补充问题信息,搜索时避开你的名字。\n" - prompt += "必须调用 'lpmm_get_knowledge' 工具来获取知识。\n" - prompt += "你现在需要对群里的聊天内容进行回复,现在选择工具来对消息和你的回复进行处理,你是否需要额外的信息,比如回忆或者搜寻已有的知识,改变关系和情感,或者了解你现在正在做什么。" + # prompt += "必须调用 'lpmm_get_knowledge' 工具来获取知识。\n" + prompt += "你现在需要对群里的聊天内容进行回复,请你思考应该使用什么工具,然后选择工具来对消息和你的回复进行处理,你是否需要额外的信息,比如回忆或者搜寻已有的知识,改变关系和情感,或者了解你现在正在做什么。" prompt = await relationship_manager.convert_all_person_sign_to_person_name(prompt) prompt = parse_text_timestamps(prompt, mode="lite") @@ -68,7 +68,7 @@ class ToolUser: return get_all_tool_definitions() @staticmethod - async def _execute_tool_call(tool_call, message_txt: str): + async def _execute_tool_call(tool_call): """执行特定的工具调用 Args: @@ -89,7 +89,7 @@ class ToolUser: return None # 执行工具 - result = await tool_instance.execute(function_args, message_txt) + result = await tool_instance.execute(function_args) if result: # 直接使用 function_name 作为 tool_type tool_type = function_name @@ -159,13 +159,13 @@ class ToolUser: tool_calls_str = "" for tool_call in tool_calls: tool_calls_str += f"{tool_call['function']['name']}\n" - logger.info(f"根据:\n{prompt}\n模型请求调用{len(tool_calls)}个工具: {tool_calls_str}") + logger.info(f"根据:\n{prompt}\n\n内容:{content}\n\n模型请求调用{len(tool_calls)}个工具: {tool_calls_str}") tool_results = [] structured_info = {} # 动态生成键 # 执行所有工具调用 for tool_call in tool_calls: - result = await self._execute_tool_call(tool_call, message_txt) + result = await self._execute_tool_call(tool_call) if result: tool_results.append(result) # 使用工具名称作为键 diff --git a/src/heart_flow/mai_state_manager.py b/src/heart_flow/mai_state_manager.py index 740b715fd..9a39b5fe5 100644 --- a/src/heart_flow/mai_state_manager.py +++ b/src/heart_flow/mai_state_manager.py @@ -13,7 +13,8 @@ mai_state_config = LogConfig( logger = get_module_logger("mai_state_manager", config=mai_state_config) -enable_unlimited_hfc_chat = False +enable_unlimited_hfc_chat = True +# enable_unlimited_hfc_chat = False class MaiState(enum.Enum): diff --git a/src/heart_flow/observation.py b/src/heart_flow/observation.py index ba4d23de9..0f61f6082 100644 --- a/src/heart_flow/observation.py +++ b/src/heart_flow/observation.py @@ -78,29 +78,33 @@ class ChattingObservation(Observation): return self.talking_message_str async def observe(self): + # 自上一次观察的新消息 new_messages_list = get_raw_msg_by_timestamp_with_chat( chat_id=self.chat_id, timestamp_start=self.last_observe_time, - timestamp_end=datetime.now().timestamp(), # 使用当前时间作为结束时间戳 + timestamp_end=datetime.now().timestamp(), limit=self.max_now_obs_len, limit_mode="latest", ) - if new_messages_list: # 检查列表是否为空 - last_obs_time_mark = self.last_observe_time + + last_obs_time_mark = self.last_observe_time + if new_messages_list: self.last_observe_time = new_messages_list[-1]["time"] self.talking_message.extend(new_messages_list) + if len(self.talking_message) > self.max_now_obs_len: # 计算需要移除的消息数量,保留最新的 max_now_obs_len 条 messages_to_remove_count = len(self.talking_message) - self.max_now_obs_len oldest_messages = self.talking_message[:messages_to_remove_count] self.talking_message = self.talking_message[messages_to_remove_count:] # 保留后半部分,即最新的 - + oldest_messages_str = await build_readable_messages( messages=oldest_messages, timestamp_mode="normal", - read_mark=last_obs_time_mark, + read_mark=0 ) + # 调用 LLM 总结主题 prompt = ( @@ -137,7 +141,11 @@ class ChattingObservation(Observation): ) self.mid_memory_info = mid_memory_str - self.talking_message_str = await build_readable_messages(messages=self.talking_message, timestamp_mode="normal") + self.talking_message_str = await build_readable_messages( + messages=self.talking_message, + timestamp_mode="normal", + read_mark=last_obs_time_mark, + ) logger.trace( f"Chat {self.chat_id} - 压缩早期记忆:{self.mid_memory_info}\n现在聊天内容:{self.talking_message_str}" diff --git a/src/heart_flow/sub_heartflow.py b/src/heart_flow/sub_heartflow.py index 76d60b14e..f0a448866 100644 --- a/src/heart_flow/sub_heartflow.py +++ b/src/heart_flow/sub_heartflow.py @@ -18,10 +18,9 @@ from src.plugins.chat.chat_stream import chat_manager import math from src.plugins.heartFC_chat.heartFC_chat import HeartFChatting from src.plugins.heartFC_chat.normal_chat import NormalChat - -# from src.do_tool.tool_use import ToolUser +from src.do_tool.tool_use import ToolUser from src.heart_flow.mai_state_manager import MaiStateInfo - +from src.plugins.utils.json_utils import safe_json_dumps, process_llm_tool_response, normalize_llm_response, process_llm_tool_calls # 定义常量 (从 interest.py 移动过来) MAX_INTEREST = 15.0 @@ -54,8 +53,9 @@ def init_prompt(): # prompt += "你注意到{sender_name}刚刚说:{message_txt}\n" prompt += "现在请你根据刚刚的想法继续思考,思考时可以想想如何对群聊内容进行回复,要不要对群里的话题进行回复,关注新话题,可以适当转换话题,大家正在说的话才是聊天的主题。\n" prompt += "回复的要求是:平淡一些,简短一些,说中文,如果你要回复,最好只回复一个人的一个话题\n" - prompt += "请注意不要输出多余内容(包括前后缀,冒号和引号,括号, 表情,等),不要带有括号和动作描写。不要回复自己的发言,尽量不要说你说过的话。" - prompt += "现在请你{hf_do_next},不要分点输出,生成内心想法,文字不要浮夸" + prompt += "请注意不要输出多余内容(包括前后缀,冒号和引号,括号, 表情,等),不要带有括号和动作描写。不要回复自己的发言,尽量不要说你说过的话。\n" + prompt += "现在请你先{hf_do_next},不要分点输出,生成内心想法,文字不要浮夸" + prompt += "在输出完想法后,请你思考应该使用什么工具。如果你需要做某件事,来对消息和你的回复进行处理,请使用工具。\n" Prompt(prompt, "sub_heartflow_prompt_before") @@ -114,6 +114,8 @@ class InterestChatting: self.above_threshold = False self.start_hfc_probability = 0.0 + + def add_interest_dict(self, message: MessageRecv, interest_value: float, is_mentioned: bool): self.interest_dict[message.message_info.message_id] = (message, interest_value, is_mentioned) @@ -291,6 +293,8 @@ class SubHeartflow: ) self.log_prefix = chat_manager.get_stream_name(self.subheartflow_id) or self.subheartflow_id + + self.structured_info = {} async def add_time_current_state(self, add_time: float): self.current_state_time += add_time @@ -477,58 +481,63 @@ class SubHeartflow: logger.info(f"{self.log_prefix} 子心流后台任务已停止。") - async def do_thinking_before_reply( - self, - extra_info: str, - obs_id: list[str] = None, - ): + async def do_thinking_before_reply(self): + """ + 在回复前进行思考,生成内心想法并收集工具调用结果 + + 返回: + tuple: (current_mind, past_mind) 当前想法和过去的想法列表 + """ + # 更新活跃时间 self.last_active_time = time.time() - + + # ---------- 1. 准备基础数据 ---------- + # 获取现有想法和情绪状态 current_thinking_info = self.current_mind mood_info = self.chat_state.mood + + # 获取观察对象 observation = self._get_primary_observation() - - chat_observe_info = "" - if obs_id: - try: - chat_observe_info = observation.get_observe_info(obs_id) - logger.debug(f"[{self.subheartflow_id}] Using specific observation IDs: {obs_id}") - except Exception as e: - logger.error( - f"[{self.subheartflow_id}] Error getting observe info with IDs {obs_id}: {e}. Falling back." - ) - chat_observe_info = observation.get_observe_info() - else: - chat_observe_info = observation.get_observe_info() - # logger.debug(f"[{self.subheartflow_id}] Using default observation info.") - - extra_info_prompt = "" - if extra_info: - for tool_name, tool_data in extra_info.items(): - extra_info_prompt += f"{tool_name} 相关信息:\n" - for item in tool_data: - extra_info_prompt += f"- {item['name']}: {item['content']}\n" - else: - extra_info_prompt = "无工具信息。\n" - + if not observation: + logger.error(f"[{self.subheartflow_id}] 无法获取观察对象") + self.update_current_mind("(我没看到任何聊天内容...)") + return self.current_mind, self.past_mind + + # 获取观察内容 + chat_observe_info = observation.get_observe_info() + + # ---------- 2. 准备工具和个性化数据 ---------- + # 初始化工具 + tool_instance = ToolUser() + tools = tool_instance._define_tools() + + # 获取个性化信息 individuality = Individuality.get_instance() + + # 构建个性部分 prompt_personality = f"你的名字是{individuality.personality.bot_nickname},你" prompt_personality += individuality.personality.personality_core + # 随机添加个性侧面 if individuality.personality.personality_sides: random_side = random.choice(individuality.personality.personality_sides) prompt_personality += f",{random_side}" + # 随机添加身份细节 if individuality.identity.identity_detail: random_detail = random.choice(individuality.identity.identity_detail) prompt_personality += f",{random_detail}" + # 获取当前时间 time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + # ---------- 3. 构建思考指导部分 ---------- + # 创建本地随机数生成器,基于分钟数作为种子 local_random = random.Random() current_minute = int(time.strftime("%M")) local_random.seed(current_minute) + # 思考指导选项和权重 hf_options = [ ("继续生成你在这个聊天中的想法,在原来想法的基础上继续思考", 0.7), ("生成你在这个聊天中的想法,在原来的想法上尝试新的话题", 0.1), @@ -536,12 +545,17 @@ class SubHeartflow: ("继续生成你在这个聊天中的想法,进行深入思考", 0.1), ] + # 加权随机选择思考指导 hf_do_next = local_random.choices( - [option[0] for option in hf_options], weights=[option[1] for option in hf_options], k=1 + [option[0] for option in hf_options], + weights=[option[1] for option in hf_options], + k=1 )[0] + # ---------- 4. 构建最终提示词 ---------- + # 获取提示词模板并填充数据 prompt = (await global_prompt_manager.get_prompt_async("sub_heartflow_prompt_before")).format( - extra_info=extra_info_prompt, + extra_info="", # 可以在这里添加额外信息 prompt_personality=prompt_personality, bot_name=individuality.personality.bot_nickname, current_thinking_info=current_thinking_info, @@ -551,26 +565,104 @@ class SubHeartflow: hf_do_next=hf_do_next, ) - prompt = await relationship_manager.convert_all_person_sign_to_person_name(prompt) - prompt = parse_text_timestamps(prompt, mode="lite") - - logger.debug(f"[{self.subheartflow_id}] 心流思考prompt:\n{prompt}\n") + logger.debug(f"[{self.subheartflow_id}] 心流思考提示词构建完成") + # ---------- 5. 执行LLM请求并处理响应 ---------- + content = "" # 初始化内容变量 + reasoning_content = "" # 初始化推理内容变量 + try: - response, reasoning_content = await self.llm_model.generate_response_async(prompt) - - logger.debug(f"[{self.subheartflow_id}] 心流思考结果:\n{response}\n") - - if not response: - response = "(不知道该想些什么...)" - logger.warning(f"[{self.subheartflow_id}] LLM 返回空结果,思考失败。") + # 调用LLM生成响应 + response = await self.llm_model.generate_response_tool_async(prompt=prompt, tools=tools) + + # 标准化响应格式 + success, normalized_response, error_msg = normalize_llm_response( + response, log_prefix=f"[{self.subheartflow_id}] " + ) + + if not success: + # 处理标准化失败情况 + logger.warning(f"[{self.subheartflow_id}] {error_msg}") + content = "LLM响应格式无法处理" + else: + # 从标准化响应中提取内容 + if len(normalized_response) >= 2: + content = normalized_response[0] + reasoning_content = normalized_response[1] if len(normalized_response) > 1 else "" + + # 处理可能的工具调用 + if len(normalized_response) == 3: + # 提取并验证工具调用 + success, valid_tool_calls, error_msg = process_llm_tool_calls( + normalized_response, log_prefix=f"[{self.subheartflow_id}] " + ) + + if success and valid_tool_calls: + # 记录工具调用信息 + tool_calls_str = ", ".join([ + call.get("function", {}).get("name", "未知工具") + for call in valid_tool_calls + ]) + logger.info(f"[{self.subheartflow_id}] 模型请求调用{len(valid_tool_calls)}个工具: {tool_calls_str}") + + # 收集工具执行结果 + await self._execute_tool_calls(valid_tool_calls, tool_instance) + elif not success: + logger.warning(f"[{self.subheartflow_id}] {error_msg}") except Exception as e: - logger.error(f"[{self.subheartflow_id}] 内心独白获取失败: {e}") - response = "(思考时发生错误...)" + # 处理总体异常 + logger.error(f"[{self.subheartflow_id}] 执行LLM请求或处理响应时出错: {e}") + logger.error(traceback.format_exc()) + content = "思考过程中出现错误" - self.update_current_mind(response) + # 记录最终思考结果 + logger.debug(f"[{self.subheartflow_id}] 心流思考结果:\n{content}\n") + + # 处理空响应情况 + if not content: + content = "(不知道该想些什么...)" + logger.warning(f"[{self.subheartflow_id}] LLM返回空结果,思考失败。") + + # ---------- 6. 更新思考状态并返回结果 ---------- + # 更新当前思考内容 + self.update_current_mind(content) return self.current_mind, self.past_mind + + async def _execute_tool_calls(self, tool_calls, tool_instance): + """ + 执行一组工具调用并收集结果 + + 参数: + tool_calls: 工具调用列表 + tool_instance: 工具使用器实例 + """ + tool_results = [] + structured_info = {} # 动态生成键 + + # 执行所有工具调用 + for tool_call in tool_calls: + try: + result = await tool_instance._execute_tool_call(tool_call) + if result: + tool_results.append(result) + + # 使用工具名称作为键 + tool_name = result["name"] + if tool_name not in structured_info: + structured_info[tool_name] = [] + + structured_info[tool_name].append({ + "name": result["name"], + "content": result["content"] + }) + except Exception as tool_e: + logger.error(f"[{self.subheartflow_id}] 工具执行失败: {tool_e}") + + # 如果有工具结果,记录并更新结构化信息 + if structured_info: + logger.debug(f"工具调用收集到结构化信息: {safe_json_dumps(structured_info, ensure_ascii=False)}") + self.structured_info = structured_info def update_current_mind(self, response): self.past_mind.append(self.current_mind) diff --git a/src/plugins/chat/bot.py b/src/plugins/chat/bot.py index fdb2576a2..5c1ce6f81 100644 --- a/src/plugins/chat/bot.py +++ b/src/plugins/chat/bot.py @@ -78,13 +78,15 @@ class ChatBot: groupinfo = message.message_info.group_info userinfo = message.message_info.user_info + if userinfo.user_id in global_config.ban_user_id: logger.debug(f"用户{userinfo.user_id}被禁止回复") return - if groupinfo.group_id not in global_config.talk_allowed_groups: - logger.debug(f"群{groupinfo.group_id}被禁止回复") - return + if groupinfo: + if groupinfo.group_id not in global_config.talk_allowed_groups: + logger.trace(f"群{groupinfo.group_id}被禁止回复") + return if message.message_info.template_info and not message.message_info.template_info.template_default: template_group_name = message.message_info.template_info.template_name diff --git a/src/plugins/chat/utils.py b/src/plugins/chat/utils.py index 271386ff5..386d6ac7a 100644 --- a/src/plugins/chat/utils.py +++ b/src/plugins/chat/utils.py @@ -327,8 +327,8 @@ def split_into_sentences_w_remove_punctuation(text: str) -> List[str]: # 提取最终的句子内容 final_sentences = [content for content, sep in merged_segments if content] # 只保留有内容的段 - # 清理可能引入的空字符串 - final_sentences = [s for s in final_sentences if s] + # 清理可能引入的空字符串和仅包含空白的字符串 + final_sentences = [s for s in final_sentences if s.strip()] # 过滤掉空字符串以及仅包含空白(如换行符、空格)的字符串 logger.debug(f"分割并合并后的句子: {final_sentences}") return final_sentences diff --git a/src/plugins/heartFC_chat/heartFC_chat.py b/src/plugins/heartFC_chat/heartFC_chat.py index ac8030f00..494ddeb09 100644 --- a/src/plugins/heartFC_chat/heartFC_chat.py +++ b/src/plugins/heartFC_chat/heartFC_chat.py @@ -2,7 +2,7 @@ import asyncio import time import traceback from typing import List, Optional, Dict, Any, TYPE_CHECKING -import json +# import json # 移除,因为使用了json_utils from src.plugins.chat.message import MessageRecv, BaseMessageInfo, MessageThinking, MessageSending from src.plugins.chat.message import MessageSet, Seg # Local import needed after move from src.plugins.chat.chat_stream import ChatStream @@ -17,6 +17,7 @@ from src.plugins.heartFC_chat.heartFC_generator import HeartFCGenerator from src.do_tool.tool_use import ToolUser from ..chat.message_sender import message_manager # <-- Import the global manager from src.plugins.chat.emoji_manager import emoji_manager +from src.plugins.utils.json_utils import extract_tool_call_arguments, safe_json_dumps, process_llm_tool_response # 导入新的JSON工具 # --- End import --- @@ -245,9 +246,6 @@ class HeartFChatting: action = planner_result.get("action", "error") reasoning = planner_result.get("reasoning", "Planner did not provide reasoning.") emoji_query = planner_result.get("emoji_query", "") - # current_mind = planner_result.get("current_mind", "[Mind unavailable]") - # send_emoji_from_tools = planner_result.get("send_emoji_from_tools", "") # Emoji from tools - observed_messages = planner_result.get("observed_messages", []) llm_error = planner_result.get("llm_error", False) if llm_error: @@ -259,7 +257,7 @@ class HeartFChatting: elif action == "text_reply": logger.debug(f"{log_prefix} HeartFChatting: 麦麦决定回复文本. 理由: {reasoning}") action_taken_this_cycle = True - anchor_message = await self._get_anchor_message(observed_messages) + anchor_message = await self._get_anchor_message() if not anchor_message: logger.error(f"{log_prefix} 循环: 无法获取锚点消息用于回复. 跳过周期.") else: @@ -304,7 +302,7 @@ class HeartFChatting: f"{log_prefix} HeartFChatting: 麦麦决定回复表情 ('{emoji_query}'). 理由: {reasoning}" ) action_taken_this_cycle = True - anchor = await self._get_anchor_message(observed_messages) + anchor = await self._get_anchor_message() if anchor: try: # --- Handle Emoji (Moved) --- # @@ -329,11 +327,6 @@ class HeartFChatting: with Timer("Wait New Msg", cycle_timers): # <--- Start Wait timer wait_start_time = time.monotonic() while True: - # Removed timer check within wait loop - # async with self._timer_lock: - # if self._loop_timer <= 0: - # logger.info(f"{log_prefix} HeartFChatting: 等待新消息时计时器耗尽。") - # break # 计时器耗尽,退出等待 # 检查是否有新消息 has_new = await observation.has_new_messages_since(planner_start_db_time) @@ -395,14 +388,6 @@ class HeartFChatting: self._processing_lock.release() # logger.trace(f"{log_prefix} 循环释放了处理锁.") # Reduce noise - # --- Timer Decrement Logging Removed --- - # async with self._timer_lock: - # self._loop_timer -= cycle_duration - # # Log timer decrement less aggressively - # if cycle_duration > 0.1 or not action_taken_this_cycle: - # logger.debug( - # f"{log_prefix} HeartFChatting: 周期耗时 {cycle_duration:.2f}s. 剩余时间: {self._loop_timer:.1f}s." - # ) if cycle_duration > 0.1: logger.debug(f"{log_prefix} HeartFChatting: 周期耗时 {cycle_duration:.2f}s.") @@ -437,77 +422,34 @@ class HeartFChatting: """ log_prefix = self._get_log_prefix() observed_messages: List[dict] = [] - tool_result_info = {} - get_mid_memory_id = [] - # send_emoji_from_tools = "" # Emoji suggested by tools - current_mind: Optional[str] = None - llm_error = False # Flag for LLM failure - # --- Ensure SubHeartflow is available --- - if not self.sub_hf: - # Attempt to re-fetch if missing (might happen if initialization order changes) - self.sub_hf = heartflow.get_subheartflow(self.stream_id) - if not self.sub_hf: - logger.error(f"{log_prefix}[Planner] SubHeartflow is not available. Cannot proceed.") - return { - "action": "error", - "reasoning": "SubHeartflow unavailable", - "llm_error": True, - "observed_messages": [], - } + current_mind: Optional[str] = None + llm_error = False try: - # Access observation via self.sub_hf observation = self.sub_hf._get_primary_observation() await observation.observe() observed_messages = observation.talking_message observed_messages_str = observation.talking_message_str except Exception as e: logger.error(f"{log_prefix}[Planner] 获取观察信息时出错: {e}") - # Handle error gracefully, maybe return an error state - observed_messages_str = "[Error getting observation]" - # Consider returning error here if observation is critical - # --- 结束获取观察信息 --- # - # --- (Moved from _replier_work) 1. 思考前使用工具 --- # + try: - # Access tool_user directly - tool_result = await self.tool_user.use_tool( - message_txt=observed_messages_str, - chat_stream=self.chat_stream, - observation=self.sub_hf._get_primary_observation(), - ) - if tool_result.get("used_tools", False): - tool_result_info = tool_result.get("structured_info", {}) - logger.debug(f"{log_prefix}[Planner] 规划前工具结果: {tool_result_info}") - - get_mid_memory_id = [ - mem["content"] for mem in tool_result_info.get("mid_chat_mem", []) if "content" in mem - ] - - except Exception as e_tool: - logger.error(f"{log_prefix}[Planner] 规划前工具使用失败: {e_tool}") - # --- 结束工具使用 --- # - - # --- (Moved from _replier_work) 2. SubHeartflow 思考 --- # - try: - current_mind, _past_mind = await self.sub_hf.do_thinking_before_reply( - extra_info=tool_result_info, - obs_id=get_mid_memory_id, - ) - # logger.debug(f"{log_prefix}[Planner] SubHF Mind: {current_mind}") + current_mind, _past_mind = await self.sub_hf.do_thinking_before_reply() except Exception as e_subhf: logger.error(f"{log_prefix}[Planner] SubHeartflow 思考失败: {e_subhf}") current_mind = "[思考时出错]" - # --- 结束 SubHeartflow 思考 --- # + # --- 使用 LLM 进行决策 --- # - action = "no_reply" # Default action - emoji_query = "" # Default emoji query (used if action is emoji_reply or text_reply with emoji) - reasoning = "默认决策或获取决策失败" + action = "no_reply" # 默认动作 + emoji_query = "" # 默认表情查询 + reasoning = "默认决策或获取决策失败" + llm_error = False # LLM错误标志 try: - prompt = await self._build_planner_prompt(observed_messages_str, current_mind) + prompt = await self._build_planner_prompt(observed_messages_str, current_mind, self.sub_hf.structured_info) payload = { "model": self.planner_llm.model_name, "messages": [{"role": "user", "content": prompt}], @@ -515,83 +457,70 @@ class HeartFChatting: "tool_choice": {"type": "function", "function": {"name": "decide_reply_action"}}, } - response = await self.planner_llm._execute_request( - endpoint="/chat/completions", payload=payload, prompt=prompt - ) + # 执行LLM请求 + try: + response = await self.planner_llm._execute_request( + endpoint="/chat/completions", payload=payload, prompt=prompt + ) + except Exception as req_e: + logger.error(f"{log_prefix}[Planner] LLM请求执行失败: {req_e}") + return { + "action": "error", + "reasoning": f"LLM请求执行失败: {req_e}", + "emoji_query": "", + "current_mind": current_mind, + "observed_messages": observed_messages, + "llm_error": True, + } - if len(response) == 3: - _, _, tool_calls = response - if tool_calls and isinstance(tool_calls, list) and len(tool_calls) > 0: - tool_call = tool_calls[0] - if ( - tool_call.get("type") == "function" - and tool_call.get("function", {}).get("name") == "decide_reply_action" - ): - try: - arguments = json.loads(tool_call["function"]["arguments"]) - action = arguments.get("action", "no_reply") - reasoning = arguments.get("reasoning", "未提供理由") - # Planner explicitly provides emoji query if action is emoji_reply or text_reply wants emoji - emoji_query = arguments.get("emoji_query", "") - logger.debug( - f"{log_prefix}[Planner] LLM Prompt: {prompt}\n决策: {action}, 理由: {reasoning}, EmojiQuery: '{emoji_query}'" - ) - except json.JSONDecodeError as json_e: - logger.error( - f"{log_prefix}[Planner] 解析工具参数失败: {json_e}. Args: {tool_call['function'].get('arguments')}" - ) - action = "error" - reasoning = "工具参数解析失败" - llm_error = True - except Exception as parse_e: - logger.error(f"{log_prefix}[Planner] 处理工具参数时出错: {parse_e}") - action = "error" - reasoning = "处理工具参数时出错" - llm_error = True - else: - logger.warning( - f"{log_prefix}[Planner] LLM 未按预期调用 'decide_reply_action' 工具。Tool calls: {tool_calls}" - ) - action = "error" - reasoning = "LLM未调用预期工具" - llm_error = True - else: - logger.warning(f"{log_prefix}[Planner] LLM 响应中未包含有效的工具调用。Tool calls: {tool_calls}") - action = "error" - reasoning = "LLM响应无工具调用" - llm_error = True + # 使用辅助函数处理工具调用响应 + success, arguments, error_msg = process_llm_tool_response( + response, + expected_tool_name="decide_reply_action", + log_prefix=f"{log_prefix}[Planner] " + ) + + if success: + # 提取决策参数 + action = arguments.get("action", "no_reply") + reasoning = arguments.get("reasoning", "未提供理由") + emoji_query = arguments.get("emoji_query", "") + + # 记录决策结果 + logger.debug( + f"{log_prefix}[Planner] 决策结果: {action}, 理由: {reasoning}, 表情查询: '{emoji_query}'" + ) else: - logger.warning(f"{log_prefix}[Planner] LLM 未返回预期的工具调用响应。Response parts: {len(response)}") + # 处理工具调用失败 + logger.warning(f"{log_prefix}[Planner] {error_msg}") action = "error" - reasoning = "LLM响应格式错误" + reasoning = error_msg llm_error = True except Exception as llm_e: - logger.error(f"{log_prefix}[Planner] Planner LLM 调用失败: {llm_e}") - # logger.error(traceback.format_exc()) # Maybe too verbose for loop? + logger.error(f"{log_prefix}[Planner] Planner LLM处理过程中出错: {llm_e}") + logger.error(traceback.format_exc()) # 记录完整堆栈以便调试 action = "error" - reasoning = f"LLM 调用失败: {llm_e}" + reasoning = f"LLM处理失败: {llm_e}" llm_error = True # --- 结束 LLM 决策 --- # return { "action": action, "reasoning": reasoning, - "emoji_query": emoji_query, # Explicit query from Planner/LLM + "emoji_query": emoji_query, "current_mind": current_mind, - # "send_emoji_from_tools": send_emoji_from_tools, # Emoji suggested by tools (used as fallback) "observed_messages": observed_messages, "llm_error": llm_error, } - async def _get_anchor_message(self, observed_messages: List[dict]) -> Optional[MessageRecv]: + async def _get_anchor_message(self) -> Optional[MessageRecv]: """ 重构观察到的最后一条消息作为回复的锚点, 如果重构失败或观察为空,则创建一个占位符。 """ try: - # --- Create Placeholder --- # placeholder_id = f"mid_pf_{int(time.time() * 1000)}" placeholder_user = UserInfo( user_id="system_trigger", user_nickname="System Trigger", platform=self.chat_stream.platform @@ -652,37 +581,41 @@ class HeartFChatting: raise RuntimeError("发送回复失败,_send_response_messages返回None") async def shutdown(self): - """ - Gracefully shuts down the HeartFChatting instance by cancelling the active loop task. - """ + """优雅关闭HeartFChatting实例,取消活动循环任务""" log_prefix = self._get_log_prefix() - logger.info(f"{log_prefix} Shutting down HeartFChatting...") + logger.info(f"{log_prefix} 正在关闭HeartFChatting...") + + # 取消循环任务 if self._loop_task and not self._loop_task.done(): - logger.info(f"{log_prefix} Cancelling active PF loop task.") + logger.info(f"{log_prefix} 正在取消HeartFChatting循环任务") self._loop_task.cancel() try: - await asyncio.wait_for(self._loop_task, timeout=1.0) # Shorter timeout? - except asyncio.CancelledError: - logger.info(f"{log_prefix} PF loop task cancelled successfully.") - except asyncio.TimeoutError: - logger.warning(f"{log_prefix} Timeout waiting for PF loop task cancellation.") + await asyncio.wait_for(self._loop_task, timeout=1.0) + logger.info(f"{log_prefix} HeartFChatting循环任务已取消") + except (asyncio.CancelledError, asyncio.TimeoutError): + pass except Exception as e: - logger.error(f"{log_prefix} Error during loop task cancellation: {e}") + logger.error(f"{log_prefix} 取消循环任务出错: {e}") else: - logger.info(f"{log_prefix} No active PF loop task found to cancel.") + logger.info(f"{log_prefix} 没有活动的HeartFChatting循环任务") + # 清理状态 self._loop_active = False self._loop_task = None if self._processing_lock.locked(): - logger.warning(f"{log_prefix} Releasing processing lock during shutdown.") self._processing_lock.release() - logger.info(f"{log_prefix} HeartFChatting shutdown complete.") + logger.warning(f"{log_prefix} 已释放处理锁") + + logger.info(f"{log_prefix} HeartFChatting关闭完成") - async def _build_planner_prompt(self, observed_messages_str: str, current_mind: Optional[str]) -> str: + async def _build_planner_prompt(self, observed_messages_str: str, current_mind: Optional[str], structured_info: Dict[str, Any]) -> str: """构建 Planner LLM 的提示词""" prompt = f"你的名字是 {global_config.BOT_NICKNAME}。是一个女大二学生,正在QQ聊天,正在决定是否以及如何回应当前的聊天。\n" + if structured_info: + prompt += f"以下是一些额外的信息:\n{structured_info}\n" + if observed_messages_str: prompt += "观察到的最新聊天内容如下 (最近的消息在最后):\n---\n" prompt += observed_messages_str @@ -726,6 +659,7 @@ class HeartFChatting: response_set: Optional[List[str]] = None try: response_set = await self.gpt_instance.generate_response( + structured_info=self.sub_hf.structured_info, current_mind_info=self.sub_hf.current_mind, reason=reason, message=anchor_message, # Pass anchor_message positionally (matches 'message' parameter) diff --git a/src/plugins/heartFC_chat/heartFC_generator.py b/src/plugins/heartFC_chat/heartFC_generator.py index 28329b896..0ed6229e6 100644 --- a/src/plugins/heartFC_chat/heartFC_generator.py +++ b/src/plugins/heartFC_chat/heartFC_generator.py @@ -39,6 +39,7 @@ class HeartFCGenerator: async def generate_response( self, + structured_info: str, current_mind_info: str, reason: str, message: MessageRecv, @@ -56,7 +57,7 @@ class HeartFCGenerator: current_model = self.model_normal current_model.temperature = global_config.llm_normal["temp"] * arousal_multiplier # 激活度越高,温度越高 model_response = await self._generate_response_with_model( - current_mind_info, reason, message, current_model, thinking_id + structured_info, current_mind_info, reason, message, current_model, thinking_id ) if model_response: @@ -71,7 +72,7 @@ class HeartFCGenerator: return None async def _generate_response_with_model( - self, current_mind_info: str, reason: str, message: MessageRecv, model: LLMRequest, thinking_id: str + self, structured_info: str, current_mind_info: str, reason: str, message: MessageRecv, model: LLMRequest, thinking_id: str ) -> str: sender_name = "" @@ -84,6 +85,7 @@ class HeartFCGenerator: build_mode="focus", reason=reason, current_mind_info=current_mind_info, + structured_info=structured_info, message_txt=message.processed_plain_text, sender_name=sender_name, chat_stream=message.chat_stream, @@ -103,106 +105,6 @@ class HeartFCGenerator: return content - async def _get_emotion_tags(self, content: str, processed_plain_text: str): - """提取情感标签,结合立场和情绪""" - try: - # 构建提示词,结合回复内容、被回复的内容以及立场分析 - prompt = f""" - 请严格根据以下对话内容,完成以下任务: - 1. 判断回复者对被回复者观点的直接立场: - - "支持":明确同意或强化被回复者观点 - - "反对":明确反驳或否定被回复者观点 - - "中立":不表达明确立场或无关回应 - 2. 从"开心,愤怒,悲伤,惊讶,平静,害羞,恐惧,厌恶,困惑"中选出最匹配的1个情感标签 - 3. 按照"立场-情绪"的格式直接输出结果,例如:"反对-愤怒" - 4. 考虑回复者的人格设定为{global_config.personality_core} - - 对话示例: - 被回复:「A就是笨」 - 回复:「A明明很聪明」 → 反对-愤怒 - - 当前对话: - 被回复:「{processed_plain_text}」 - 回复:「{content}」 - - 输出要求: - - 只需输出"立场-情绪"结果,不要解释 - - 严格基于文字直接表达的对立关系判断 - """ - - # 调用模型生成结果 - result, _, _ = await self.model_sum.generate_response(prompt) - result = result.strip() - - # 解析模型输出的结果 - if "-" in result: - stance, emotion = result.split("-", 1) - valid_stances = ["支持", "反对", "中立"] - valid_emotions = ["开心", "愤怒", "悲伤", "惊讶", "害羞", "平静", "恐惧", "厌恶", "困惑"] - if stance in valid_stances and emotion in valid_emotions: - return stance, emotion # 返回有效的立场-情绪组合 - else: - logger.debug(f"无效立场-情感组合:{result}") - return "中立", "平静" # 默认返回中立-平静 - else: - logger.debug(f"立场-情感格式错误:{result}") - return "中立", "平静" # 格式错误时返回默认值 - - except Exception as e: - logger.debug(f"获取情感标签时出错: {e}") - return "中立", "平静" # 出错时返回默认值 - - async def _get_emotion_tags_with_reason(self, content: str, processed_plain_text: str, reason: str): - """提取情感标签,结合立场和情绪""" - try: - # 构建提示词,结合回复内容、被回复的内容以及立场分析 - prompt = f""" - 请严格根据以下对话内容,完成以下任务: - 1. 判断回复者对被回复者观点的直接立场: - - "支持":明确同意或强化被回复者观点 - - "反对":明确反驳或否定被回复者观点 - - "中立":不表达明确立场或无关回应 - 2. 从"开心,愤怒,悲伤,惊讶,平静,害羞,恐惧,厌恶,困惑"中选出最匹配的1个情感标签 - 3. 按照"立场-情绪"的格式直接输出结果,例如:"反对-愤怒" - 4. 考虑回复者的人格设定为{global_config.personality_core} - - 对话示例: - 被回复:「A就是笨」 - 回复:「A明明很聪明」 → 反对-愤怒 - - 当前对话: - 被回复:「{processed_plain_text}」 - 回复:「{content}」 - - 原因:「{reason}」 - - 输出要求: - - 只需输出"立场-情绪"结果,不要解释 - - 严格基于文字直接表达的对立关系判断 - """ - - # 调用模型生成结果 - result, _, _ = await self.model_sum.generate_response(prompt) - result = result.strip() - - # 解析模型输出的结果 - if "-" in result: - stance, emotion = result.split("-", 1) - valid_stances = ["支持", "反对", "中立"] - valid_emotions = ["开心", "愤怒", "悲伤", "惊讶", "害羞", "平静", "恐惧", "厌恶", "困惑"] - if stance in valid_stances and emotion in valid_emotions: - return stance, emotion # 返回有效的立场-情绪组合 - else: - logger.debug(f"无效立场-情感组合:{result}") - return "中立", "平静" # 默认返回中立-平静 - else: - logger.debug(f"立场-情感格式错误:{result}") - return "中立", "平静" # 格式错误时返回默认值 - - except Exception as e: - logger.debug(f"获取情感标签时出错: {e}") - return "中立", "平静" # 出错时返回默认值 - async def _process_response(self, content: str) -> List[str]: """处理响应内容,返回处理后的内容和情感标签""" if not content: diff --git a/src/plugins/heartFC_chat/heartflow_prompt_builder.py b/src/plugins/heartFC_chat/heartflow_prompt_builder.py index 1d19d1ca9..33baad371 100644 --- a/src/plugins/heartFC_chat/heartflow_prompt_builder.py +++ b/src/plugins/heartFC_chat/heartflow_prompt_builder.py @@ -21,6 +21,8 @@ logger = get_module_logger("prompt") def init_prompt(): Prompt( """ +你有以下信息可供参考: +{structured_info} {chat_target} {chat_talking_prompt} 现在"{sender_name}"说的:{message_txt}。引起了你的注意,你想要在群里发言发言或者回复这条消息。\n @@ -79,17 +81,17 @@ class PromptBuilder: self.activate_messages = "" async def build_prompt( - self, build_mode, reason, current_mind_info, message_txt: str, sender_name: str = "某人", chat_stream=None + self, build_mode, reason, current_mind_info, structured_info, message_txt: str, sender_name: str = "某人", chat_stream=None ) -> Optional[tuple[str, str]]: if build_mode == "normal": return await self._build_prompt_normal(chat_stream, message_txt, sender_name) elif build_mode == "focus": - return await self._build_prompt_focus(reason, current_mind_info, chat_stream, message_txt, sender_name) + return await self._build_prompt_focus(reason, current_mind_info, structured_info, chat_stream, message_txt, sender_name) return None async def _build_prompt_focus( - self, reason, current_mind_info, chat_stream, message_txt: str, sender_name: str = "某人" + self, reason, current_mind_info, structured_info, chat_stream, message_txt: str, sender_name: str = "某人" ) -> tuple[str, str]: individuality = Individuality.get_instance() prompt_personality = individuality.get_prompt(type="personality", x_person=2, level=1) @@ -148,6 +150,7 @@ class PromptBuilder: prompt = await global_prompt_manager.format_prompt( "heart_flow_prompt", + structured_info=structured_info, chat_target=await global_prompt_manager.get_prompt_async("chat_target_group1") if chat_in_group else await global_prompt_manager.get_prompt_async("chat_target_private1"), diff --git a/src/plugins/heartFC_chat/normal_chat_generator.py b/src/plugins/heartFC_chat/normal_chat_generator.py index 07635baf6..cd9208b3a 100644 --- a/src/plugins/heartFC_chat/normal_chat_generator.py +++ b/src/plugins/heartFC_chat/normal_chat_generator.py @@ -83,6 +83,7 @@ class NormalChatGenerator: build_mode="normal", reason="", current_mind_info="", + structured_info="", message_txt=message.processed_plain_text, sender_name=sender_name, chat_stream=message.chat_stream, diff --git a/src/plugins/models/utils_model.py b/src/plugins/models/utils_model.py index e2ec7ac3d..bdc408aba 100644 --- a/src/plugins/models/utils_model.py +++ b/src/plugins/models/utils_model.py @@ -710,6 +710,8 @@ class LLMRequest: usage = None # 初始化usage变量,避免未定义错误 reasoning_content = "" content = "" + tool_calls = None # 初始化工具调用变量 + async for line_bytes in response.content: try: line = line_bytes.decode("utf-8").strip() @@ -731,11 +733,20 @@ class LLMRequest: if delta_content is None: delta_content = "" accumulated_content += delta_content + + # 提取工具调用信息 + if "tool_calls" in delta: + if tool_calls is None: + tool_calls = delta["tool_calls"] + else: + # 合并工具调用信息 + tool_calls.extend(delta["tool_calls"]) + # 检测流式输出文本是否结束 finish_reason = chunk["choices"][0].get("finish_reason") if delta.get("reasoning_content", None): reasoning_content += delta["reasoning_content"] - if finish_reason == "stop": + if finish_reason == "stop" or finish_reason == "tool_calls": chunk_usage = chunk.get("usage", None) if chunk_usage: usage = chunk_usage @@ -763,14 +774,21 @@ class LLMRequest: if think_match: reasoning_content = think_match.group(1).strip() content = re.sub(r".*?", "", content, flags=re.DOTALL).strip() + + # 构建消息对象 + message = { + "content": content, + "reasoning_content": reasoning_content, + } + + # 如果有工具调用,添加到消息中 + if tool_calls: + message["tool_calls"] = tool_calls + result = { "choices": [ { - "message": { - "content": content, - "reasoning_content": reasoning_content, - # 流式输出可能没有工具调用,此处不需要添加tool_calls字段 - } + "message": message } ], "usage": usage, @@ -1046,6 +1064,7 @@ class LLMRequest: # 只有当tool_calls存在且不为空时才返回 if tool_calls: + logger.debug(f"检测到工具调用: {tool_calls}") return content, reasoning_content, tool_calls else: return content, reasoning_content @@ -1109,7 +1128,30 @@ class LLMRequest: response = await self._execute_request(endpoint="/chat/completions", payload=data, prompt=prompt) # 原样返回响应,不做处理 + return response + + async def generate_response_tool_async(self, prompt: str, tools: list, **kwargs) -> Union[str, Tuple]: + """异步方式根据输入的提示生成模型的响应""" + # 构建请求体,不硬编码max_tokens + data = { + "model": self.model_name, + "messages": [{"role": "user", "content": prompt}], + **self.params, + **kwargs, + "tools": tools + } + + logger.debug(f"向模型 {self.model_name} 发送工具调用请求,包含 {len(tools)} 个工具") + response = await self._execute_request(endpoint="/chat/completions", payload=data, prompt=prompt) + # 检查响应是否包含工具调用 + if isinstance(response, tuple) and len(response) == 3: + content, reasoning_content, tool_calls = response + logger.debug(f"收到工具调用响应,包含 {len(tool_calls) if tool_calls else 0} 个工具调用") + return content, reasoning_content, tool_calls + else: + logger.debug(f"收到普通响应,无工具调用") + return response async def get_embedding(self, text: str) -> Union[list, None]: """异步方法:获取文本的embedding向量 diff --git a/src/plugins/utils/chat_message_builder.py b/src/plugins/utils/chat_message_builder.py index d822263db..6a5e4e8e1 100644 --- a/src/plugins/utils/chat_message_builder.py +++ b/src/plugins/utils/chat_message_builder.py @@ -303,7 +303,7 @@ async def build_readable_messages( ) readable_read_mark = translate_timestamp_to_human_readable(read_mark, mode=timestamp_mode) - read_mark_line = f"\n--- 以上消息已读 (标记时间: {readable_read_mark}) ---\n" + read_mark_line = f"\n\n--- 以上消息已读 (标记时间: {readable_read_mark}) ---\n--- 请关注你上次思考之后以下的新消息---\n" # 组合结果,确保空部分不引入多余的标记或换行 if formatted_before and formatted_after: diff --git a/src/plugins/utils/json_utils.py b/src/plugins/utils/json_utils.py new file mode 100644 index 000000000..962901b55 --- /dev/null +++ b/src/plugins/utils/json_utils.py @@ -0,0 +1,297 @@ +import json +import logging +from typing import Any, Dict, Optional, TypeVar, Generic, List, Union, Callable, Tuple + +# 定义类型变量用于泛型类型提示 +T = TypeVar('T') + +# 获取logger +logger = logging.getLogger("json_utils") + +def safe_json_loads(json_str: str, default_value: T = None) -> Union[Any, T]: + """ + 安全地解析JSON字符串,出错时返回默认值 + + 参数: + json_str: 要解析的JSON字符串 + default_value: 解析失败时返回的默认值 + + 返回: + 解析后的Python对象,或在解析失败时返回default_value + """ + if not json_str: + return default_value + + try: + return json.loads(json_str) + except json.JSONDecodeError as e: + logger.error(f"JSON解析失败: {e}, JSON字符串: {json_str[:100]}...") + return default_value + except Exception as e: + logger.error(f"JSON解析过程中发生意外错误: {e}") + return default_value + +def extract_tool_call_arguments(tool_call: Dict[str, Any], + default_value: Dict[str, Any] = None) -> Dict[str, Any]: + """ + 从LLM工具调用对象中提取参数 + + 参数: + tool_call: 工具调用对象字典 + default_value: 解析失败时返回的默认值 + + 返回: + 解析后的参数字典,或在解析失败时返回default_value + """ + default_result = default_value or {} + + if not tool_call or not isinstance(tool_call, dict): + logger.error(f"无效的工具调用对象: {tool_call}") + return default_result + + try: + # 提取function参数 + function_data = tool_call.get("function", {}) + if not function_data or not isinstance(function_data, dict): + logger.error(f"工具调用缺少function字段或格式不正确: {tool_call}") + return default_result + + # 提取arguments + arguments_str = function_data.get("arguments", "{}") + if not arguments_str: + return default_result + + # 解析JSON + return safe_json_loads(arguments_str, default_result) + + except Exception as e: + logger.error(f"提取工具调用参数时出错: {e}") + return default_result + +def get_json_value(json_obj: Dict[str, Any], key_path: str, + default_value: T = None, + transform_func: Callable[[Any], T] = None) -> Union[Any, T]: + """ + 从JSON对象中按照路径提取值,支持点表示法路径,如"data.items.0.name" + + 参数: + json_obj: JSON对象(已解析的字典) + key_path: 键路径,使用点表示法,如"data.items.0.name" + default_value: 获取失败时返回的默认值 + transform_func: 可选的转换函数,用于对获取的值进行转换 + + 返回: + 路径指向的值,或在获取失败时返回default_value + """ + if not json_obj or not key_path: + return default_value + + try: + # 分割路径 + keys = key_path.split(".") + current = json_obj + + # 遍历路径 + for key in keys: + # 处理数组索引 + if key.isdigit() and isinstance(current, list): + index = int(key) + if 0 <= index < len(current): + current = current[index] + else: + return default_value + # 处理字典键 + elif isinstance(current, dict): + if key in current: + current = current[key] + else: + return default_value + else: + return default_value + + # 应用转换函数(如果提供) + if transform_func and current is not None: + return transform_func(current) + return current + except Exception as e: + logger.error(f"从JSON获取值时出错: {e}, 路径: {key_path}") + return default_value + +def safe_json_dumps(obj: Any, default_value: str = "{}", ensure_ascii: bool = False, + pretty: bool = False) -> str: + """ + 安全地将Python对象序列化为JSON字符串 + + 参数: + obj: 要序列化的Python对象 + default_value: 序列化失败时返回的默认值 + ensure_ascii: 是否确保ASCII编码(默认False,允许中文等非ASCII字符) + pretty: 是否美化输出JSON + + 返回: + 序列化后的JSON字符串,或在序列化失败时返回default_value + """ + try: + indent = 2 if pretty else None + return json.dumps(obj, ensure_ascii=ensure_ascii, indent=indent) + except TypeError as e: + logger.error(f"JSON序列化失败(类型错误): {e}") + return default_value + except Exception as e: + logger.error(f"JSON序列化过程中发生意外错误: {e}") + return default_value + +def merge_json_objects(*objects: Dict[str, Any]) -> Dict[str, Any]: + """ + 合并多个JSON对象(字典) + + 参数: + *objects: 要合并的JSON对象(字典) + + 返回: + 合并后的字典,后面的对象会覆盖前面对象的相同键 + """ + result = {} + for obj in objects: + if obj and isinstance(obj, dict): + result.update(obj) + return result + +def normalize_llm_response(response: Any, log_prefix: str = "") -> Tuple[bool, List[Any], str]: + """ + 标准化LLM响应格式,将各种格式(如元组)转换为统一的列表格式 + + 参数: + response: 原始LLM响应 + log_prefix: 日志前缀 + + 返回: + 元组 (成功标志, 标准化后的响应列表, 错误消息) + """ + # 检查是否为None + if response is None: + return False, [], "LLM响应为None" + + # 记录原始类型 + logger.debug(f"{log_prefix}LLM响应原始类型: {type(response).__name__}") + + # 将元组转换为列表 + if isinstance(response, tuple): + logger.debug(f"{log_prefix}将元组响应转换为列表") + response = list(response) + + # 确保是列表类型 + if not isinstance(response, list): + return False, [], f"无法处理的LLM响应类型: {type(response).__name__}" + + # 处理工具调用部分(如果存在) + if len(response) == 3: + content, reasoning, tool_calls = response + + # 将工具调用部分转换为列表(如果是元组) + if isinstance(tool_calls, tuple): + logger.debug(f"{log_prefix}将工具调用元组转换为列表") + tool_calls = list(tool_calls) + response[2] = tool_calls + + return True, response, "" + +def process_llm_tool_calls(response: List[Any], log_prefix: str = "") -> Tuple[bool, List[Dict[str, Any]], str]: + """ + 处理并提取LLM响应中的工具调用列表 + + 参数: + response: 标准化后的LLM响应列表 + log_prefix: 日志前缀 + + 返回: + 元组 (成功标志, 工具调用列表, 错误消息) + """ + # 确保响应格式正确 + if len(response) != 3: + return False, [], f"LLM响应元素数量不正确: 预期3个元素,实际{len(response)}个" + + # 提取工具调用部分 + tool_calls = response[2] + + # 检查工具调用是否有效 + if tool_calls is None: + return False, [], "工具调用部分为None" + + if not isinstance(tool_calls, list): + return False, [], f"工具调用部分不是列表: {type(tool_calls).__name__}" + + if len(tool_calls) == 0: + return False, [], "工具调用列表为空" + + # 检查工具调用是否格式正确 + valid_tool_calls = [] + for i, tool_call in enumerate(tool_calls): + if not isinstance(tool_call, dict): + logger.warning(f"{log_prefix}工具调用[{i}]不是字典: {type(tool_call).__name__}") + continue + + if tool_call.get("type") != "function": + logger.warning(f"{log_prefix}工具调用[{i}]不是函数类型: {tool_call.get('type', '未知')}") + continue + + if "function" not in tool_call or not isinstance(tool_call["function"], dict): + logger.warning(f"{log_prefix}工具调用[{i}]缺少function字段或格式不正确") + continue + + valid_tool_calls.append(tool_call) + + # 检查是否有有效的工具调用 + if not valid_tool_calls: + return False, [], "没有找到有效的工具调用" + + return True, valid_tool_calls, "" + +def process_llm_tool_response( + response: Any, + expected_tool_name: str = None, + log_prefix: str = "" +) -> Tuple[bool, Dict[str, Any], str]: + """ + 处理LLM返回的工具调用响应,进行常见错误检查并提取参数 + + 参数: + response: LLM的响应,预期是[content, reasoning, tool_calls]格式的列表或元组 + expected_tool_name: 预期的工具名称,如不指定则不检查 + log_prefix: 日志前缀,用于标识日志来源 + + 返回: + 三元组(成功标志, 参数字典, 错误描述) + - 如果成功解析,返回(True, 参数字典, "") + - 如果解析失败,返回(False, {}, 错误描述) + """ + # 使用新的标准化函数 + success, normalized_response, error_msg = normalize_llm_response(response, log_prefix) + if not success: + return False, {}, error_msg + + # 使用新的工具调用处理函数 + success, valid_tool_calls, error_msg = process_llm_tool_calls(normalized_response, log_prefix) + if not success: + return False, {}, error_msg + + # 检查是否有工具调用 + if not valid_tool_calls: + return False, {}, "没有有效的工具调用" + + # 获取第一个工具调用 + tool_call = valid_tool_calls[0] + + # 检查工具名称(如果提供了预期名称) + if expected_tool_name: + actual_name = tool_call.get("function", {}).get("name") + if actual_name != expected_tool_name: + return False, {}, f"工具名称不匹配: 预期'{expected_tool_name}',实际'{actual_name}'" + + # 提取并解析参数 + try: + arguments = extract_tool_call_arguments(tool_call, {}) + return True, arguments, "" + except Exception as e: + logger.error(f"{log_prefix}解析工具参数时出错: {e}") + return False, {}, f"解析参数失败: {str(e)}" \ No newline at end of file diff --git a/tool_call_benchmark.py b/tool_call_benchmark.py new file mode 100644 index 000000000..691aeb7c5 --- /dev/null +++ b/tool_call_benchmark.py @@ -0,0 +1,289 @@ +import asyncio +import time +from src.plugins.models.utils_model import LLMRequest +from src.config.config import global_config +from src.do_tool.tool_use import ToolUser +import statistics +import json + +async def run_test(test_name, test_function, iterations=5): + """ + 运行指定次数的测试并计算平均响应时间 + + 参数: + test_name: 测试名称 + test_function: 要执行的测试函数 + iterations: 测试迭代次数 + + 返回: + 测试结果统计 + """ + print(f"开始 {test_name} 测试({iterations}次迭代)...") + times = [] + responses = [] + + for i in range(iterations): + print(f" 运行第 {i+1}/{iterations} 次测试...") + start_time = time.time() + response = await test_function() + end_time = time.time() + elapsed = end_time - start_time + times.append(elapsed) + responses.append(response) + print(f" - 耗时: {elapsed:.2f}秒") + + results = { + "平均耗时": statistics.mean(times), + "最短耗时": min(times), + "最长耗时": max(times), + "标准差": statistics.stdev(times) if len(times) > 1 else 0, + "所有耗时": times, + "响应结果": responses + } + + return results + +async def test_with_tool_calls(): + """使用工具调用的LLM请求测试""" + # 创建LLM模型实例 + llm_model = LLMRequest( + model=global_config.llm_sub_heartflow, + # model = global_config.llm_tool_use, + # temperature=global_config.llm_sub_heartflow["temp"], + max_tokens=800, + request_type="benchmark_test", + ) + + # 创建工具实例 + tool_instance = ToolUser() + tools = tool_instance._define_tools() + + # 简单的测试提示词 + prompt = "请分析当前天气情况,并查询今日历史上的重要事件。并且3.9和3.11谁比较大?请使用适当的工具来获取这些信息。" + prompt = ''' + 你的名字是麦麦,你包容开放,情绪敏感,有时候有些搞怪幽默, 是一个学习心理学和脑科学的女大学生,现在在读大二,你会刷贴吧,有时候会想瑟瑟,喜欢刷小红书 +----------------------------------- +现在是2025-04-24 12:37:00,你正在上网,和qq群里的网友们聊天,群里正在聊的话题是: +2025-04-24 12:33:00既文横 说:这条调试消息是napcat控制台输出的,还是麦麦log输出的; +2025-04-24 12:33:23麦麦(你) 说:应该是napcat吧; +2025-04-24 12:33:24麦麦(你) 说:[表达了:害羞、害羞。]; +2025-04-24 12:33:25兔伽兔伽 说:就打开麦麦的那个终端发的呀; +2025-04-24 12:33:45既文横 说:那应该不是napcat输出的,是麦麦输出的消息,怀疑版本问题; +2025-04-24 12:34:02兔伽兔伽 说:版本05.15; +2025-04-24 12:34:07麦麦(你) 说:话说你们最近刷贴吧看到那个猫猫头表情包了吗; +2025-04-24 12:34:07麦麦(你) 说:笑死; +2025-04-24 12:34:08麦麦(你) 说:[表达了:惊讶、搞笑。]; +2025-04-24 12:34:14兔伽兔伽 说:只开一个终端; +2025-04-24 12:35:45兔伽兔伽 说:回复既文横的消息(怀疑版本问题),说:因为之前你连模型的那个我用的了; +2025-04-24 12:35:56麦麦(你) 说:那个猫猫头真的魔性; +2025-04-24 12:35:56麦麦(你) 说:我存了一堆; +2025-04-24 12:35:56麦麦(你) 说:[表达了:温馨、宠爱]; +2025-04-24 12:36:03小千石 说:麦麦3.8和3.11谁大; + +--- 以上消息已读 (标记时间: 2025-04-24 12:36:43) --- +--- 请关注你上次思考之后以下的新消息--- +2025-04-24 12:36:53墨墨 说:[表情包:开心、满足。]; + +你现在当前心情:平静。 +现在请你根据刚刚的想法继续思考,思考时可以想想如何对群聊内容进行回复,要不要对群里的话题进行回复,关注新话题,可以适当转换话题,大家正在说的话才是聊天的主题。 +回复的要求是:平淡一些,简短一些,说中文,如果你要回复,最好只回复一个人的一个话题 +请注意不要输出多余内容(包括前后缀,冒号和引号,括号, 表情,等),不要带有括号和动作描写。不要回复自己的发言,尽量不要说你说过的话。 +现在请你继续生成你在这个聊天中的想法,在原来想法的基础上继续思考,不要分点输出,生成内心想法,文字不要浮夸 +在输出完想法后,请你思考应该使用什么工具,如果你需要做某件事,来对消息和你的回复进行处理,请使用工具。''' + + # 发送带有工具调用的请求 + response = await llm_model.generate_response_tool_async(prompt=prompt, tools=tools) + + result_info = {} + + # 简单处理工具调用结果 + if len(response) == 3: + content, reasoning_content, tool_calls = response + tool_calls_count = len(tool_calls) if tool_calls else 0 + print(f" 工具调用请求生成了 {tool_calls_count} 个工具调用") + + # 输出内容和工具调用详情 + print("\n 生成的内容:") + print(f" {content[:200]}..." if len(content) > 200 else f" {content}") + + if tool_calls: + print("\n 工具调用详情:") + for i, tool_call in enumerate(tool_calls): + tool_name = tool_call['function']['name'] + tool_params = tool_call['function'].get('arguments', {}) + print(f" - 工具 {i+1}: {tool_name}") + print(f" 参数: {json.dumps(tool_params, ensure_ascii=False)[:100]}..." + if len(json.dumps(tool_params, ensure_ascii=False)) > 100 + else f" 参数: {json.dumps(tool_params, ensure_ascii=False)}") + + result_info = { + "内容": content, + "推理内容": reasoning_content, + "工具调用": tool_calls + } + else: + content, reasoning_content = response + print(" 工具调用请求未生成任何工具调用") + print("\n 生成的内容:") + print(f" {content[:200]}..." if len(content) > 200 else f" {content}") + + result_info = { + "内容": content, + "推理内容": reasoning_content, + "工具调用": [] + } + + return result_info + +async def test_without_tool_calls(): + """不使用工具调用的LLM请求测试""" + # 创建LLM模型实例 + llm_model = LLMRequest( + model=global_config.llm_sub_heartflow, + temperature=global_config.llm_sub_heartflow["temp"], + max_tokens=800, + request_type="benchmark_test", + ) + + # 简单的测试提示词(与工具调用相同,以便公平比较) + prompt = ''' + 你的名字是麦麦,你包容开放,情绪敏感,有时候有些搞怪幽默, 是一个学习心理学和脑科学的女大学生,现在在读大二,你会刷贴吧,有时候会想瑟瑟,喜欢刷小红书 +刚刚你的想法是: +我是麦麦,我想,('小千石问3.8和3.11谁大,已经简单回答了3.11大,现在可以继续聊猫猫头表情包,毕竟大家好像对版本问题兴趣不大,而且猫猫头的话题更轻松有趣。', '') +----------------------------------- +现在是2025-04-24 12:37:00,你正在上网,和qq群里的网友们聊天,群里正在聊的话题是: +2025-04-24 12:33:00既文横 说:这条调试消息是napcat控制台输出的,还是麦麦log输出的; +2025-04-24 12:33:23麦麦(你) 说:应该是napcat吧; +2025-04-24 12:33:24麦麦(你) 说:[表达了:害羞、害羞。]; +2025-04-24 12:33:25兔伽兔伽 说:就打开麦麦的那个终端发的呀; +2025-04-24 12:33:45既文横 说:那应该不是napcat输出的,是麦麦输出的消息,怀疑版本问题; +2025-04-24 12:34:02兔伽兔伽 说:版本05.15; +2025-04-24 12:34:07麦麦(你) 说:话说你们最近刷贴吧看到那个猫猫头表情包了吗; +2025-04-24 12:34:07麦麦(你) 说:笑死; +2025-04-24 12:34:08麦麦(你) 说:[表达了:惊讶、搞笑。]; +2025-04-24 12:34:14兔伽兔伽 说:只开一个终端; +2025-04-24 12:35:45兔伽兔伽 说:回复既文横的消息(怀疑版本问题),说:因为之前你连模型的那个我用的了; +2025-04-24 12:35:56麦麦(你) 说:那个猫猫头真的魔性; +2025-04-24 12:35:56麦麦(你) 说:我存了一堆; +2025-04-24 12:35:56麦麦(你) 说:[表达了:温馨、宠爱]; +2025-04-24 12:36:03小千石 说:麦麦3.8和3.11谁大; +2025-04-24 12:36:22麦麦(你) 说:真的魔性那个猫猫头; +2025-04-24 12:36:22麦麦(你) 说:[表达了:害羞、可爱]; +2025-04-24 12:36:43麦麦(你) 说:3.11大啦; +2025-04-24 12:36:43麦麦(你) 说:[表达了:害羞、可爱]; + +--- 以上消息已读 (标记时间: 2025-04-24 12:36:43) --- +--- 请关注你上次思考之后以下的新消息--- +2025-04-24 12:36:53墨墨 说:[表情包:开心、满足。]; + +你现在当前心情:平静。 +现在请你根据刚刚的想法继续思考,思考时可以想想如何对群聊内容进行回复,要不要对群里的话题进行回复,关注新话题,可以适当转换话题,大家正在说的话才是聊天的主题。 +回复的要求是:平淡一些,简短一些,说中文,如果你要回复,最好只回复一个人的一个话题 +请注意不要输出多余内容(包括前后缀,冒号和引号,括号, 表情,等),不要带有括号和动作描写。不要回复自己的发言,尽量不要说你说过的话。 +现在请你继续生成你在这个聊天中的想法,在原来想法的基础上继续思考,不要分点输出,生成内心想法,文字不要浮夸 +在输出完想法后,请你思考应该使用什么工具,如果你需要做某件事,来对消息和你的回复进行处理,请使用工具。''' + + # 发送不带工具调用的请求 + response, reasoning_content = await llm_model.generate_response_async(prompt) + + # 输出生成的内容 + print("\n 生成的内容:") + print(f" {response[:200]}..." if len(response) > 200 else f" {response}") + + result_info = { + "内容": response, + "推理内容": reasoning_content, + "工具调用": [] + } + + return result_info + +async def main(): + """主测试函数""" + print("=" * 50) + print("LLM工具调用与普通请求性能比较测试") + print("=" * 50) + + # 设置测试迭代次数 + iterations = 3 + + # 测试不使用工具调用 + results_without_tools = await run_test("不使用工具调用", test_without_tool_calls, iterations) + + print("\n" + "-" * 50 + "\n") + + # 测试使用工具调用 + results_with_tools = await run_test("使用工具调用", test_with_tool_calls, iterations) + + # 显示结果比较 + print("\n" + "=" * 50) + print("测试结果比较") + print("=" * 50) + + print("\n不使用工具调用:") + for key, value in results_without_tools.items(): + if key == "所有耗时": + print(f" {key}: {[f'{t:.2f}秒' for t in value]}") + elif key == "响应结果": + print(f" {key}: [内容已省略,详见结果文件]") + else: + print(f" {key}: {value:.2f}秒") + + print("\n使用工具调用:") + for key, value in results_with_tools.items(): + if key == "所有耗时": + print(f" {key}: {[f'{t:.2f}秒' for t in value]}") + elif key == "响应结果": + tool_calls_counts = [len(res.get("工具调用", [])) for res in value] + print(f" {key}: [内容已省略,详见结果文件]") + print(f" 工具调用数量: {tool_calls_counts}") + else: + print(f" {key}: {value:.2f}秒") + + # 计算差异百分比 + diff_percent = ((results_with_tools["平均耗时"] / results_without_tools["平均耗时"]) - 1) * 100 + print(f"\n工具调用比普通请求平均耗时相差: {diff_percent:.2f}%") + + # 保存结果到JSON文件 + results = { + "测试时间": time.strftime("%Y-%m-%d %H:%M:%S"), + "测试迭代次数": iterations, + "不使用工具调用": { + k: (v if k != "所有耗时" else [float(f"{t:.2f}") for t in v]) + for k, v in results_without_tools.items() + if k != "响应结果" + }, + "不使用工具调用_详细响应": [ + { + "内容摘要": resp["内容"][:200] + "..." if len(resp["内容"]) > 200 else resp["内容"], + "推理内容摘要": resp["推理内容"][:200] + "..." if len(resp["推理内容"]) > 200 else resp["推理内容"] + } for resp in results_without_tools["响应结果"] + ], + "使用工具调用": { + k: (v if k != "所有耗时" else [float(f"{t:.2f}") for t in v]) + for k, v in results_with_tools.items() + if k != "响应结果" + }, + "使用工具调用_详细响应": [ + { + "内容摘要": resp["内容"][:200] + "..." if len(resp["内容"]) > 200 else resp["内容"], + "推理内容摘要": resp["推理内容"][:200] + "..." if len(resp["推理内容"]) > 200 else resp["推理内容"], + "工具调用数量": len(resp["工具调用"]), + "工具调用详情": [ + { + "工具名称": tool["function"]["name"], + "参数": tool["function"].get("arguments", {}) + } for tool in resp["工具调用"] + ] + } for resp in results_with_tools["响应结果"] + ], + "差异百分比": float(f"{diff_percent:.2f}") + } + + with open("llm_tool_benchmark_results.json", "w", encoding="utf-8") as f: + json.dump(results, f, ensure_ascii=False, indent=2) + + print(f"\n测试结果已保存到 llm_tool_benchmark_results.json") + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file