better：优化prompt，修改buffer行为（更严格判定降低延迟，不丢弃图片前文本）

2025-04-25 23:05:46 +08:00
parent 5ed676e404
commit e17e47bcaf
6 changed files with 185 additions and 145 deletions
--- a/src/heart_flow/sub_mind.py
+++ b/src/heart_flow/sub_mind.py
@@ -25,18 +25,20 @@ def init_prompt():
    prompt += "{extra_info}\n"
    prompt += "{prompt_personality}\n"
    prompt += "{last_loop_prompt}\n"
    prompt += "-----------------------------------\n"
    prompt += "现在是{time_now}，你正在上网，和qq群里的网友们聊天，以下是正在进行的聊天内容：\n{chat_observe_info}\n"
    prompt += "\n你现在{mood_info}\n"
-    prompt += "现在请你，阅读群里正在进行的聊天内容，思考群里的正在进行的话题，分析群里成员与你的关系。"
+    prompt += "请仔细阅读当前群聊内容，分析讨论话题和群成员关系，思考你要不要回复。"
-    prompt += "请你思考，生成你的内心想法，包括你的思考，要不要对群里的话题进行回复，以及如何对群聊内容进行回复\n"
+    prompt += "思考并输出你的内心想法\n"
-    prompt += "回复的要求是：不要总是重复自己提到过的话题，如果你要回复，最好只回复一个人的一个话题\n"
+    prompt += "输出要求：\n"
-    prompt += "如果最后一条消息是你自己发的，观察到的内容只有你自己的发言，并且之后没有人回复你，不要回复。"
+    prompt += "1. 根据聊天内容生成你的想法，{hf_do_next}\n"
-    prompt += "如果聊天记录中最新的消息是你自己发送的，并且你还想继续回复，你应该紧紧衔接你发送的消息，进行话题的深入，补充，或追问等等。"
+    prompt += "2. 不要分点、不要使用表情符号\n"
-    prompt += "请注意不要输出多余内容(包括前后缀，冒号和引号，括号， 表情，等)，不要回复自己的发言\n"
+    prompt += "3. 避免多余符号(冒号、引号、括号等)\n"
-    prompt += "现在请你先输出想法，{hf_do_next}，不要分点输出,文字不要浮夸"
+    prompt += "4. 语言简洁自然，不要浮夸\n"
-    prompt += "在输出完想法后，请你思考应该使用什么工具。工具可以帮你取得一些你不知道的信息，或者进行一些操作。"
+    prompt += "5. 如果你刚发言，并且没有人回复你，不要回复\n"
-    prompt += "如果你需要做某件事，来对消息和你的回复进行处理，请使用工具。\n"
+    prompt += "工具使用说明：\n"
    prompt += "1. 输出想法后考虑是否需要使用工具\n"
    prompt += "2. 工具可获取信息或执行操作\n"
    prompt += "3. 如需处理消息或回复，请使用工具\n"
    Prompt(prompt, "sub_heartflow_prompt_before")
@@ -65,7 +67,7 @@ class SubMind:
        self.past_mind = []
        self.structured_info = {}
-    async def do_thinking_before_reply(self, last_cycle: CycleInfo):
+    async def do_thinking_before_reply(self, last_cycle: CycleInfo = None):
        """
        在回复前进行思考，生成内心想法并收集工具调用结果
@@ -123,14 +125,14 @@ class SubMind:
        # 思考指导选项和权重
        hf_options = [
-            ("继续生成你在这个聊天中的想法，在原来想法的基础上继续思考，但是不要纠结于同一个话题", 0.6),
+            ("可以参考之前的想法，在原来想法的基础上继续思考", 0.2),
-            ("生成你在这个聊天中的想法，在原来的想法上尝试新的话题", 0.1),
+            ("可以参考之前的想法，在原来的想法上尝试新的话题", 0.4),
-            ("生成你在这个聊天中的想法，不要太深入", 0.2),
+            ("不要太深入", 0.2),
-            ("继续生成你在这个聊天中的想法，进行深入思考", 0.1),
+            ("进行深入思考", 0.2),
        ]
        #上一次决策信息
-        if last_cycle.action_type:
+        if last_cycle != None:
            last_action = last_cycle.action_type
            last_reasoning = last_cycle.reasoning
            is_replan = last_cycle.replanned
@@ -143,11 +145,13 @@ class SubMind:
            last_reasoning = ""
            is_replan = False
            if_replan_prompt = ""
-
+        if current_thinking_info:
            last_loop_prompt = (await global_prompt_manager.get_prompt_async("last_loop")).format(
                current_thinking_info=current_thinking_info,
                if_replan_prompt=if_replan_prompt
            )
        else:
            last_loop_prompt = ""
        # 加权随机选择思考指导
        hf_do_next = local_random.choices(
--- a/src/plugins/chat/message_buffer.py
+++ b/src/plugins/chat/message_buffer.py
@@ -128,58 +128,55 @@ class MessageBuffer:
            if result:
                async with self.lock:  # 再次加锁
                    # 清理所有早于当前消息的已处理消息， 收集所有早于当前消息的F消息的processed_plain_text
-                    keep_msgs = OrderedDict()
+                    keep_msgs = OrderedDict() # 用于存放 T 消息之后的消息
-                    combined_text = []
+                    collected_texts = []      # 用于收集 T 消息及之前 F 消息的文本
-                    found = False
+                    process_target_found = False
-                    type = "seglist"
+
-                    is_update = True
+                    # 遍历当前用户的所有缓冲消息
-                    for msg_id, msg in self.buffer_pool[person_id_].items():
+                    for msg_id, cache_msg in self.buffer_pool[person_id_].items():
                        # 如果找到了目标处理消息 (T 状态)
                        if msg_id == message.message_info.message_id:
-                            found = True
+                            process_target_found = True
-                            if msg.message.message_segment.type != "seglist":
+                            # 收集这条 T 消息的文本 (如果有)
-                                type = msg.message.message_segment.type
+                            if hasattr(cache_msg.message, "processed_plain_text") and cache_msg.message.processed_plain_text:
-                            else:
+                                collected_texts.append(cache_msg.message.processed_plain_text)
-                                if (
+                            # 不立即放入 keep_msgs，因为它之前的 F 消息也处理完了
                                    isinstance(msg.message.message_segment.data, list)
                                    and all(isinstance(x, Seg) for x in msg.message.message_segment.data)
                                    and len(msg.message.message_segment.data) == 1
                                ):
                                    type = msg.message.message_segment.data[0].type
                            combined_text.append(msg.message.processed_plain_text)
                            continue
                        if found:
                            keep_msgs[msg_id] = msg
                        elif msg.result == "F":
                            # 收集F消息的文本内容
                            f_type = "seglist"
                            if msg.message.message_segment.type != "seglist":
                                f_type = msg.message.message_segment.type
                            else:
                                if (
                                    isinstance(msg.message.message_segment.data, list)
                                    and all(isinstance(x, Seg) for x in msg.message.message_segment.data)
                                    and len(msg.message.message_segment.data) == 1
                                ):
                                    f_type = msg.message.message_segment.data[0].type
                            if hasattr(msg.message, "processed_plain_text") and msg.message.processed_plain_text:
                                if f_type == "text":
                                    combined_text.append(msg.message.processed_plain_text)
                                elif f_type != "text":
                                    is_update = False
                        elif msg.result == "U":
                            logger.debug(f"异常未处理信息id： {msg.message.message_info.message_id}")
-                    # 更新当前消息的processed_plain_text
+                        # 如果已经找到了目标 T 消息，之后的消息需要保留
-                    if combined_text and combined_text[0] != message.processed_plain_text and is_update:
+                        elif process_target_found:
-                        if type == "text":
+                            keep_msgs[msg_id] = cache_msg
-                            message.processed_plain_text = "，".join(combined_text)
+
-                            logger.debug(f"整合了{len(combined_text) - 1}条F消息的内容到当前消息")
+                        # 如果还没找到目标 T 消息，说明是之前的消息 (F 或 U)
-                        elif type == "emoji":
+                        else:
-                            combined_text.pop()
+                            if cache_msg.result == "F":
-                            message.processed_plain_text = "，".join(combined_text)
+                                # 收集这条 F 消息的文本 (如果有)
                                if hasattr(cache_msg.message, "processed_plain_text") and cache_msg.message.processed_plain_text:
                                    collected_texts.append(cache_msg.message.processed_plain_text)
                            elif cache_msg.result == "U":
                                # 理论上不应该在 T 消息之前还有 U 消息，记录日志
                                logger.warning(f"异常状态：在目标 T 消息 {message.message_info.message_id} 之前发现未处理的 U 消息 {cache_msg.message.message_info.message_id}")
                                # 也可以选择收集其文本
                                if hasattr(cache_msg.message, "processed_plain_text") and cache_msg.message.processed_plain_text:
                                    collected_texts.append(cache_msg.message.processed_plain_text)
                    # 更新当前消息 (message) 的 processed_plain_text
                    # 只有在收集到的文本多于一条，或者只有一条但与原始文本不同时才合并
                    if collected_texts:
                        # 使用 OrderedDict 去重，同时保留原始顺序
                        unique_texts = list(OrderedDict.fromkeys(collected_texts))
                        merged_text = "，".join(unique_texts)
                        # 只有在合并后的文本与原始文本不同时才更新
                        # 并且确保不是空合并
                        if merged_text and merged_text != message.processed_plain_text:
                            message.processed_plain_text = merged_text
                            # 如果合并了文本，原消息不再视为纯 emoji
                            if hasattr(message, 'is_emoji'):
                                message.is_emoji = False
-                            logger.debug(f"整合了{len(combined_text) - 1}条F消息的内容，覆盖当前emoji消息")
+                            logger.debug(f"合并了 {len(unique_texts)} 条消息的文本内容到当前消息 {message.message_info.message_id}")
                    # 更新缓冲池，只保留 T 消息之后的消息
                    self.buffer_pool[person_id_] = keep_msgs
            return result
        except asyncio.TimeoutError:
--- a/src/plugins/heartFC_chat/heartFC_chat.py
+++ b/src/plugins/heartFC_chat/heartFC_chat.py
@@ -405,7 +405,7 @@ class HeartFChatting:
                return False, ""
            # execute:执行
-            with Timer("执行", cycle_timers):
+            with Timer("执行动作", cycle_timers):
                return await self._handle_action(action, reasoning, planner_result.get("emoji_query", ""), cycle_timers, planner_start_db_time)
        except PlannerError as e:
@@ -490,7 +490,7 @@ class HeartFChatting:
        try:
            # 生成回复
-            with Timer("Replier", cycle_timers):
+            with Timer("生成回复", cycle_timers):
                reply = await self._replier_work(
                    anchor_message=anchor_message,
                    thinking_id=thinking_id,
@@ -501,7 +501,7 @@ class HeartFChatting:
                raise ReplierError("回复生成失败")
            # 发送消息
-            with Timer("Sender", cycle_timers):
+
            await self._sender(
                thinking_id=thinking_id,
                anchor_message=anchor_message,
@@ -675,7 +675,7 @@ class HeartFChatting:
        # 获取观察信息
        observation = self.observations[0]
        if is_re_planned:
-            observation.observe()
+            await observation.observe()
        observed_messages = observation.talking_message
        observed_messages_str = observation.talking_message_str
@@ -687,10 +687,10 @@ class HeartFChatting:
        try:
            # 构建提示词
-            with Timer("构建提示词", cycle_timers):
+
            if is_re_planned:
                replan_prompt = await self._build_replan_prompt(
-                        self._current_cycle.action, self._current_cycle.reasoning
+                    self._current_cycle.action_type, self._current_cycle.reasoning
                )
                prompt = replan_prompt
            else:
@@ -706,7 +706,7 @@ class HeartFChatting:
            }
            # 执行LLM请求
-            with Timer("LLM回复", cycle_timers):
+
            try:
                response = await self.planner_llm._execute_request(
                    endpoint="/chat/completions", payload=payload, prompt=prompt
@@ -883,7 +883,7 @@ class HeartFChatting:
        # 准备聊天内容块
        chat_content_block = ""
        if observed_messages_str:
-            chat_content_block = "观察到的最新聊天内容如下 (最近的消息在最后)：\n---\n"
+            chat_content_block = "观察到的最新聊天内容如下：\n---\n"
            chat_content_block += observed_messages_str
            chat_content_block += "\n---"
        else:
@@ -892,9 +892,9 @@ class HeartFChatting:
        # 准备当前思维块
        current_mind_block = ""
        if current_mind:
-            current_mind_block = f"\n---\n{current_mind}\n---\n\n"
+            current_mind_block = f"{current_mind}"
        else:
-            current_mind_block = " [没有特别的想法] \n\n"
+            current_mind_block = "[没有特别的想法]"
        # 获取提示词模板并填充数据
        prompt = (await global_prompt_manager.get_prompt_async("planner_prompt")).format(
--- a/src/plugins/heartFC_chat/heartflow_prompt_builder.py
+++ b/src/plugins/heartFC_chat/heartflow_prompt_builder.py
@@ -44,28 +44,49 @@ def init_prompt():
 以上的消息是你获取到的消息，或许可以帮助你更好地回复。
 """, "info_from_tools")
-    # Planner提示词
+    # Planner提示词 - 优化版
    Prompt(
-        """你的名字是 {bot_name}。是一个女大二学生，正在QQ聊天，正在决定是否以及如何回应当前的聊天。
+        """你的名字是{bot_name}，一名大二女生，正在QQ群聊中。需要基于以下信息决定如何参与对话：
 {structured_info_block}
 {chat_content_block}
-看了以上内容，你产生的内心想法是：
+你的内心想法：
 {current_mind_block}
 {replan}
-请结合你的内心想法和观察到的聊天内容，分析情况并使用 'decide_reply_action' 工具来决定你的最终行动。
+
-注意你必须参考以下决策依据来选择工具：
+请综合分析聊天内容和你看到的新消息，参考内心想法，使用'decide_reply_action'工具做出决策。决策时请注意：
-1. 如果聊天内容无聊、与你无关、或者你的内心想法认为不适合回复（例如在讨论你不懂或不感兴趣的话题），选择 'no_reply'。
+
-2. 如果聊天内容值得回应，且适合用文字表达（参考你的内心想法），选择 'text_reply'。如果你有情绪想表达，想在文字后追加一个表达情绪的表情，请同时提供 'emoji_query' (每个标签用一个词组表示，格式例如：幽默的讽刺,单纯的开心,愤怒的抗议)。
+【回复原则】
-3. 如果聊天内容或你的内心想法适合用一个表情来回应，选择 'emoji_reply' 并提供表情主题 'emoji_query'。
+1. 不回复(no_reply)适用：
-4. 如果最后一条消息是你自己发的，观察到的内容只有你自己的发言，并且之后没有人回复你，通常选择 'no_reply'，除非有特殊原因需要追问。
+- 话题无关/无聊/不感兴趣
-5. 如果聊天记录中最新的消息是你自己发送的，并且你还想继续回复，你应该紧紧衔接你发送的消息，进行话题的深入，补充，或追问等等；。
+- 最后一条消息是你自己发的且无人回应你
-6. 表情包是用来表达情绪的，不要直接回复或评价别人的表情包，而是根据对话内容和情绪选择是否用表情回应。
+- 讨论你不懂的专业话题
-7. 不要回复你自己的话，不要把自己的话当做别人说的。
+- 讨论你不想参与的话题
-必须调用 'decide_reply_action' 工具并提供 'action' 和 'reasoning'。如果选择了 'emoji_reply' 或者选择了 'text_reply' 并想追加表情，则必须提供 'emoji_query'。""",
+- 你发送了太多消息
 2. 文字回复(text_reply)适用：
 - 有实质性内容需要表达
 - 可以追加emoji_query表达情绪(格式：情绪描述,如"俏皮的调侃")
 - 不要追加太多表情
 3. 纯表情回复(emoji_reply)适用：
 - 适合用表情回应的场景
 - 需提供明确的emoji_query
 4. 自我对话处理：
 - 如果是自己发的消息想继续，需自然衔接
 - 避免重复或评价自己的发言
 - 不要和自己聊天
 【必须遵守】
 - 必须调用工具并包含action和reasoning
 - 你可以选择文字回复(text_reply)，纯表情回复(emoji_reply)，不回复(no_reply)
 - 选择text_reply或emoji_reply时必须提供emoji_query
 - 保持回复自然，符合日常聊天习惯""",
        "planner_prompt",
    )
-    Prompt("你原本打算{action}，因为：{reasoning}，但是你看到了新的消息，你决定重新决定行动。", "replan_prompt")
+    Prompt('''你原本打算{action}，因为：{reasoning}
 但是你看到了新的消息，你决定重新决定行动。''', "replan_prompt")
    Prompt("你正在qq群里聊天，下面是群里在聊的内容：", "chat_target_group1")
    Prompt("和群里聊天", "chat_target_group2")
--- a/src/plugins/person_info/person_info.py
+++ b/src/plugins/person_info/person_info.py
@@ -53,7 +53,7 @@ person_info_default = {
    # "impression" : None,
    # "gender" : Unkown,
    "konw_time": 0,
-    "msg_interval": 3000,
+    "msg_interval": 2000,
    "msg_interval_list": [],
 }  # 个人信息的各项与默认值在此定义，以下处理会自动创建/补全每一项
@@ -384,18 +384,21 @@ class PersonInfoManager:
                            if delta > 0:
                                time_interval.append(delta)
-                        time_interval = [t for t in time_interval if 500 <= t <= 8000]
+                        time_interval = [t for t in time_interval if 200 <= t <= 8000]
-                        if len(time_interval) >= 30:
+                        # --- 修改后的逻辑 ---
                        # 数据量检查 (至少需要 30 条有效间隔，并且足够进行头尾截断)
                        if len(time_interval) >= 30 + 10: # 至少30条有效+头尾各5条
                            time_interval.sort()
-                            # 画图(log)
+                            # 画图(log) - 这部分保留
                            msg_interval_map = True
                            log_dir = Path("logs/person_info")
                            log_dir.mkdir(parents=True, exist_ok=True)
                            plt.figure(figsize=(10, 6))
-                            time_series = pd.Series(time_interval)
+                            # 使用截断前的数据画图，更能反映原始分布
-                            plt.hist(time_series, bins=50, density=True, alpha=0.4, color="pink", label="Histogram")
+                            time_series_original = pd.Series(time_interval)
-                            time_series.plot(kind="kde", color="mediumpurple", linewidth=1, label="Density")
+                            plt.hist(time_series_original, bins=50, density=True, alpha=0.4, color="pink", label="Histogram (Original Filtered)")
                            time_series_original.plot(kind="kde", color="mediumpurple", linewidth=1, label="Density (Original Filtered)")
                            plt.grid(True, alpha=0.2)
                            plt.xlim(0, 8000)
                            plt.title(f"Message Interval Distribution (User: {person_id[:8]}...)")
@@ -405,15 +408,22 @@ class PersonInfoManager:
                            img_path = log_dir / f"interval_distribution_{person_id[:8]}.png"
                            plt.savefig(img_path)
                            plt.close()
-                            # 画图
+                            # 画图结束
-                            q25, q75 = np.percentile(time_interval, [25, 75])
+                            # 去掉头尾各 5 个数据点
-                            iqr = q75 - q25
+                            trimmed_interval = time_interval[5:-5]
                            filtered = [x for x in time_interval if (q25 - 1.5 * iqr) <= x <= (q75 + 1.5 * iqr)]
-                            msg_interval = int(round(np.percentile(filtered, 80)))
+                            # 计算截断后数据的 37% 分位数
                            if trimmed_interval: # 确保截断后列表不为空
                                msg_interval = int(round(np.percentile(trimmed_interval, 37)))
                                # 更新数据库
                                await self.update_one_field(person_id, "msg_interval", msg_interval)
-                            logger.trace(f"用户{person_id}的msg_interval已经被更新为{msg_interval}")
+                                logger.trace(f"用户{person_id}的msg_interval通过头尾截断和37分位数更新为{msg_interval}")
                            else:
                                logger.trace(f"用户{person_id}截断后数据为空，无法计算msg_interval")
                        else:
                            logger.trace(f"用户{person_id}有效消息间隔数量 ({len(time_interval)}) 不足进行推断 (需要至少 {30+10} 条)")
                        # --- 修改结束 ---
                    except Exception as e:
                        logger.trace(f"用户{person_id}消息间隔计算失败: {type(e).__name__}: {str(e)}")
                        continue
--- a/src/plugins/utils/chat_message_builder.py
+++ b/src/plugins/utils/chat_message_builder.py
@@ -168,7 +168,10 @@ async def _build_readable_messages_internal(
        user_info = msg.get("user_info", {})
        platform = user_info.get("platform")
        user_id = user_info.get("user_id")
-        user_nickname = user_info.get("nickname")
+        
        user_nickname = user_info.get("user_nickname")
        user_cardname = user_info.get("user_cardname")
        timestamp = msg.get("time")
        content = msg.get("processed_plain_text", "")  # 默认空字符串
@@ -186,7 +189,12 @@ async def _build_readable_messages_internal(
        # 如果 person_name 未设置，则使用消息中的 nickname 或默认名称
        if not person_name:
-            person_name = user_nickname
+            if user_cardname:
                person_name = f"昵称：{user_cardname}"
            elif user_nickname:
                person_name = f"{user_nickname}"
            else:
                person_name = "某人"
        message_details.append((timestamp, person_name, content))
@@ -304,7 +312,7 @@ async def build_readable_messages(
        readable_read_mark = translate_timestamp_to_human_readable(read_mark, mode=timestamp_mode)
        read_mark_line = (
-            f"\n\n--- 以上消息已读 (标记时间: {readable_read_mark}) ---\n--- 以下新消息未读---\n"
+            f"\n--- 以上消息已读 (标记时间: {readable_read_mark}) ---\n--- 以下新消息未读---\n"
        )
        # 组合结果，确保空部分不引入多余的标记或换行