feat：更好的回复信息收集器

2025-04-09 19:27:23 +08:00
parent 84c79c43a8
commit a889d9d222
9 changed files with 349 additions and 82 deletions
--- a/src/heart_flow/heartflow.py
+++ b/src/heart_flow/heartflow.py
@@ -200,7 +200,7 @@ class Heartflow:
            logger.error(f"创建 subheartflow 失败: {e}")
            return None

-    def get_subheartflow(self, observe_chat_id):
+    def get_subheartflow(self, observe_chat_id) -> SubHeartflow:
        """获取指定ID的SubHeartflow实例"""
        return self._subheartflows.get(observe_chat_id)

--- a/src/heart_flow/sub_heartflow.py
+++ b/src/heart_flow/sub_heartflow.py
@@ -42,7 +42,7 @@ class SubHeartflow:
        self.past_mind = []
        self.current_state: CuttentState = CuttentState()
        self.llm_model = LLM_request(
-            model=global_config.llm_sub_heartflow, temperature=0.7, max_tokens=600, request_type="sub_heart_flow"
+            model=global_config.llm_sub_heartflow, temperature=0.5, max_tokens=600, request_type="sub_heart_flow"
        )

        self.main_heartflow_info = ""
@@ -221,9 +221,9 @@ class SubHeartflow:

        self.update_current_mind(reponse)

-        self.current_mind = reponse
        logger.debug(f"prompt:\n{prompt}\n")
        logger.info(f"麦麦的思考前脑内状态：{self.current_mind}")
+        return self.current_mind ,self.past_mind

    async def do_thinking_after_reply(self, reply_content, chat_talking_prompt):
        # print("麦麦回复之后脑袋转起来了")
--- a/src/plugins/chat/bot.py
+++ b/src/plugins/chat/bot.py
@@ -84,7 +84,7 @@ class ChatBot:
            message = MessageRecv(message_data)
            groupinfo = message.message_info.group_info
            userinfo = message.message_info.user_info
-            logger.debug(f"处理消息:{str(message_data)[:80]}...")
+            logger.debug(f"处理消息:{str(message_data)[:120]}...")

            if userinfo.user_id in global_config.ban_user_id:
                logger.debug(f"用户{userinfo.user_id}被禁止回复")
@@ -106,11 +106,11 @@ class ChatBot:
                        await self._create_PFC_chat(message)
                    else:
                        if groupinfo.group_id in global_config.talk_allowed_groups:
-                            logger.debug(f"开始群聊模式{str(message_data)[:50]}...")
+                            # logger.debug(f"开始群聊模式{str(message_data)[:50]}...")
                            if global_config.response_mode == "heart_flow":
                                await self.think_flow_chat.process_message(message_data)
                            elif global_config.response_mode == "reasoning":
-                                logger.debug(f"开始推理模式{str(message_data)[:50]}...")
+                                # logger.debug(f"开始推理模式{str(message_data)[:50]}...")
                                await self.reasoning_chat.process_message(message_data)
                            else:
                                logger.error(f"未知的回复模式，请检查配置文件！！: {global_config.response_mode}")
--- a/src/plugins/chat/message.py
+++ b/src/plugins/chat/message.py
@@ -365,7 +365,7 @@ class MessageSet:
        self.chat_stream = chat_stream
        self.message_id = message_id
        self.messages: List[MessageSending] = []
-        self.time = round(time.time(), 2)
+        self.time = round(time.time(), 3)  # 保留3位小数

    def add_message(self, message: MessageSending) -> None:
        """添加消息到集合"""
--- a/src/plugins/chat_module/reasoning_chat/reasoning_generator.py
+++ b/src/plugins/chat_module/reasoning_chat/reasoning_generator.py
@@ -96,40 +96,39 @@ class ResponseGenerator:
            return None

        # 保存到数据库
-        self._save_to_db(
-            message=message,
-            sender_name=sender_name,
-            prompt=prompt,
-            content=content,
-            reasoning_content=reasoning_content,
-            # reasoning_content_check=reasoning_content_check if global_config.enable_kuuki_read else ""
-        )
+        # self._save_to_db(
+        #     message=message,
+        #     sender_name=sender_name,
+        #     prompt=prompt,
+        #     content=content,
+        #     reasoning_content=reasoning_content,
+        #     # reasoning_content_check=reasoning_content_check if global_config.enable_kuuki_read else ""
+        # )

        return content

-    # def _save_to_db(self, message: Message, sender_name: str, prompt: str, prompt_check: str,
-    #                 content: str, content_check: str, reasoning_content: str, reasoning_content_check: str):
-    def _save_to_db(
-        self,
-        message: MessageRecv,
-        sender_name: str,
-        prompt: str,
-        content: str,
-        reasoning_content: str,
-    ):
-        """保存对话记录到数据库"""
-        db.reasoning_logs.insert_one(
-            {
-                "time": time.time(),
-                "chat_id": message.chat_stream.stream_id,
-                "user": sender_name,
-                "message": message.processed_plain_text,
-                "model": self.current_model_name,
-                "reasoning": reasoning_content,
-                "response": content,
-                "prompt": prompt,
-            }
-        )
+
+    # def _save_to_db(
+    #     self,
+    #     message: MessageRecv,
+    #     sender_name: str,
+    #     prompt: str,
+    #     content: str,
+    #     reasoning_content: str,
+    # ):
+    #     """保存对话记录到数据库"""
+    #     db.reasoning_logs.insert_one(
+    #         {
+    #             "time": time.time(),
+    #             "chat_id": message.chat_stream.stream_id,
+    #             "user": sender_name,
+    #             "message": message.processed_plain_text,
+    #             "model": self.current_model_name,
+    #             "reasoning": reasoning_content,
+    #             "response": content,
+    #             "prompt": prompt,
+    #         }
+    #     )

    async def _get_emotion_tags(self, content: str, processed_plain_text: str):
        """提取情感标签，结合立场和情绪"""
--- a/src/plugins/chat_module/think_flow_chat/think_flow_chat.py
+++ b/src/plugins/chat_module/think_flow_chat/think_flow_chat.py
@@ -1,7 +1,8 @@
 import time
 from random import random
 import re
-
+import traceback
+from typing import List
 from ...memory_system.Hippocampus import HippocampusManager
 from ...moods.moods import MoodManager
 from ...config.config import global_config
@@ -19,6 +20,7 @@ from src.common.logger import get_module_logger, CHAT_STYLE_CONFIG, LogConfig
 from ...chat.chat_stream import chat_manager
 from ...person_info.relationship_manager import relationship_manager
 from ...chat.message_buffer import message_buffer
+from src.plugins.respon_info_catcher.info_catcher import info_catcher_manager

 # 定义日志配置
 chat_config = LogConfig(
@@ -59,7 +61,11 @@ class ThinkFlowChat:

        return thinking_id

-    async def _send_response_messages(self, message, chat, response_set, thinking_id):
+    async def _send_response_messages(self, 
+                                      message, 
+                                      chat, 
+                                      response_set:List[str], 
+                                      thinking_id) -> MessageSending:
        """发送回复消息"""
        container = message_manager.get_container(chat.stream_id)
        thinking_message = None
@@ -72,12 +78,13 @@ class ThinkFlowChat:

        if not thinking_message:
            logger.warning("未找到对应的思考消息，可能已超时被移除")
-            return
+            return None

        thinking_start_time = thinking_message.thinking_start_time
        message_set = MessageSet(chat, thinking_id)

        mark_head = False
+        first_bot_msg = None
        for msg in response_set:
            message_segment = Seg(type="text", data=msg)
            bot_message = MessageSending(
@@ -97,10 +104,12 @@ class ThinkFlowChat:
            )
            if not mark_head:
                mark_head = True
+                first_bot_msg = bot_message

            # print(f"thinking_start_time:{bot_message.thinking_start_time}")
            message_set.add_message(bot_message)
        message_manager.add_message(message_set)
+        return first_bot_msg

    async def _handle_emoji(self, message, chat, response):
        """处理表情包"""
@@ -257,6 +266,8 @@ class ThinkFlowChat:
        if random() < reply_probability:
            try:
                do_reply = True
+                
+                

                # 创建思考消息
                try:
@@ -266,6 +277,11 @@ class ThinkFlowChat:
                    timing_results["创建思考消息"] = timer2 - timer1
                except Exception as e:
                    logger.error(f"心流创建思考消息失败: {e}")
+                    
+                logger.debug(f"创建捕捉器，thinking_id:{thinking_id}")
+                
+                info_catcher = info_catcher_manager.get_info_catcher(thinking_id)
+                info_catcher.catch_decide_to_response(message)

                try:
                    # 观察
@@ -275,36 +291,48 @@ class ThinkFlowChat:
                    timing_results["观察"] = timer2 - timer1
                except Exception as e:
                    logger.error(f"心流观察失败: {e}")
+                    
+                info_catcher.catch_after_observe(timing_results["观察"])

                # 思考前脑内状态
                try:
                    timer1 = time.time()
-                    await heartflow.get_subheartflow(chat.stream_id).do_thinking_before_reply(
+                    current_mind,past_mind = await heartflow.get_subheartflow(chat.stream_id).do_thinking_before_reply(
                        message.processed_plain_text
                    )
                    timer2 = time.time()
                    timing_results["思考前脑内状态"] = timer2 - timer1
                except Exception as e:
                    logger.error(f"心流思考前脑内状态失败: {e}")
+                    
+                info_catcher.catch_afer_shf_step(timing_results["思考前脑内状态"],past_mind,current_mind)

                # 生成回复
                timer1 = time.time()
-                response_set = await self.gpt.generate_response(message)
+                response_set = await self.gpt.generate_response(message,thinking_id)
                timer2 = time.time()
                timing_results["生成回复"] = timer2 - timer1

+                info_catcher.catch_after_generate_response(timing_results["生成回复"])
+                
                if not response_set:
-                    logger.info("为什么生成回复失败？")
+                    logger.info("回复生成失败，返回为空")
                    return

                # 发送消息
                try:
                    timer1 = time.time()
-                    await self._send_response_messages(message, chat, response_set, thinking_id)
+                    first_bot_msg = await self._send_response_messages(message, chat, response_set, thinking_id)
                    timer2 = time.time()
                    timing_results["发送消息"] = timer2 - timer1
                except Exception as e:
                    logger.error(f"心流发送消息失败: {e}")
+                
+                
+                info_catcher.catch_after_response(timing_results["发送消息"],response_set,first_bot_msg)
+                
+                
+                info_catcher.done_catch()

                # 处理表情包
                try:
@@ -335,6 +363,7 @@ class ThinkFlowChat:

            except Exception as e:
                logger.error(f"心流处理消息失败: {e}")
+                logger.error(traceback.format_exc())

        # 输出性能计时结果
        if do_reply:
--- a/src/plugins/chat_module/think_flow_chat/think_flow_generator.py
+++ b/src/plugins/chat_module/think_flow_chat/think_flow_generator.py
@@ -9,6 +9,7 @@ from ...chat.message import MessageRecv, MessageThinking
 from .think_flow_prompt_builder import prompt_builder
 from ...chat.utils import process_llm_response
 from src.common.logger import get_module_logger, LogConfig, LLM_STYLE_CONFIG
+from src.plugins.respon_info_catcher.info_catcher import info_catcher_manager

 # 定义日志配置
 llm_config = LogConfig(
@@ -32,15 +33,16 @@ class ResponseGenerator:
        self.current_model_type = "r1"  # 默认使用 R1
        self.current_model_name = "unknown model"

-    async def generate_response(self, message: MessageThinking) -> Optional[Union[str, List[str]]]:
+    async def generate_response(self, message: MessageRecv,thinking_id:str) -> Optional[List[str]]:
        """根据当前模型类型选择对应的生成函数"""
+        

        logger.info(
            f"思考:{message.processed_plain_text[:30] + '...' if len(message.processed_plain_text) > 30 else message.processed_plain_text}"
        )

        current_model = self.model_normal
-        model_response = await self._generate_response_with_model(message, current_model)
+        model_response = await self._generate_response_with_model(message, current_model,thinking_id)

        # print(f"raw_content: {model_response}")

@@ -53,8 +55,11 @@ class ResponseGenerator:
            logger.info(f"{self.current_model_type}思考，失败")
            return None

-    async def _generate_response_with_model(self, message: MessageThinking, model: LLM_request):
+    async def _generate_response_with_model(self, message: MessageRecv, model: LLM_request,thinking_id:str):
        sender_name = ""
+        
+        info_catcher = info_catcher_manager.get_info_catcher(thinking_id)
+        
        if message.chat_stream.user_info.user_cardname and message.chat_stream.user_info.user_nickname:
            sender_name = (
                f"[({message.chat_stream.user_info.user_id}){message.chat_stream.user_info.user_nickname}]"
@@ -79,45 +84,51 @@ class ResponseGenerator:

        try:
            content, reasoning_content, self.current_model_name = await model.generate_response(prompt)
+            
+            info_catcher.catch_after_llm_generated(
+                prompt=prompt,
+                response=content,
+                reasoning_content=reasoning_content,
+                model_name=self.current_model_name)
+            
        except Exception:
            logger.exception("生成回复时出错")
            return None

        # 保存到数据库
-        self._save_to_db(
-            message=message,
-            sender_name=sender_name,
-            prompt=prompt,
-            content=content,
-            reasoning_content=reasoning_content,
-            # reasoning_content_check=reasoning_content_check if global_config.enable_kuuki_read else ""
-        )
+        # self._save_to_db(
+        #     message=message,
+        #     sender_name=sender_name,
+        #     prompt=prompt,
+        #     content=content,
+        #     reasoning_content=reasoning_content,
+        #     # reasoning_content_check=reasoning_content_check if global_config.enable_kuuki_read else ""
+        # )

        return content

-    # def _save_to_db(self, message: Message, sender_name: str, prompt: str, prompt_check: str,
-    #                 content: str, content_check: str, reasoning_content: str, reasoning_content_check: str):
-    def _save_to_db(
-        self,
-        message: MessageRecv,
-        sender_name: str,
-        prompt: str,
-        content: str,
-        reasoning_content: str,
-    ):
-        """保存对话记录到数据库"""
-        db.reasoning_logs.insert_one(
-            {
-                "time": time.time(),
-                "chat_id": message.chat_stream.stream_id,
-                "user": sender_name,
-                "message": message.processed_plain_text,
-                "model": self.current_model_name,
-                "reasoning": reasoning_content,
-                "response": content,
-                "prompt": prompt,
-            }
-        )
+
+    # def _save_to_db(
+    #     self,
+    #     message: MessageRecv,
+    #     sender_name: str,
+    #     prompt: str,
+    #     content: str,
+    #     reasoning_content: str,
+    # ):
+    #     """保存对话记录到数据库"""
+    #     db.reasoning_logs.insert_one(
+    #         {
+    #             "time": time.time(),
+    #             "chat_id": message.chat_stream.stream_id,
+    #             "user": sender_name,
+    #             "message": message.processed_plain_text,
+    #             "model": self.current_model_name,
+    #             "reasoning": reasoning_content,
+    #             "response": content,
+    #             "prompt": prompt,
+    #         }
+    #     )

    async def _get_emotion_tags(self, content: str, processed_plain_text: str):
        """提取情感标签，结合立场和情绪"""
@@ -167,10 +178,10 @@ class ResponseGenerator:
            logger.debug(f"获取情感标签时出错: {e}")
            return "中立", "平静"  # 出错时返回默认值

-    async def _process_response(self, content: str) -> Tuple[List[str], List[str]]:
+    async def _process_response(self, content: str) -> List[str]:
        """处理响应内容，返回处理后的内容和情感标签"""
        if not content:
-            return None, []
+            return None

        processed_response = process_llm_response(content)

--- a/src/plugins/respon_info_catcher/info_catcher.py
+++ b/src/plugins/respon_info_catcher/info_catcher.py
@@ -0,0 +1,228 @@
+from src.plugins.config.config import global_config
+from src.plugins.chat.message import MessageRecv,MessageSending,Message
+from src.common.database import db
+import time
+import traceback
+from typing import List
+
+class InfoCatcher:
+    def __init__(self):
+        self.chat_history = [] # 聊天历史，长度为三倍使用的上下文
+        self.context_length = global_config.MAX_CONTEXT_SIZE
+        self.chat_history_in_thinking = [] # 思考期间的聊天内容
+        self.chat_history_after_response = [] # 回复后的聊天内容，长度为一倍上下文
+        
+        self.chat_id = ""
+        self.response_mode = global_config.response_mode
+        self.trigger_response_text = ""
+        self.response_text = ""
+        
+        self.trigger_response_time = 0
+        self.trigger_response_message = None
+        
+        self.response_time = 0
+        self.response_messages = []
+        
+        # 使用字典来存储 heartflow 模式的数据
+        self.heartflow_data = {
+            "heart_flow_prompt": "",
+            "sub_heartflow_before": "",
+            "sub_heartflow_now": "",
+            "sub_heartflow_after": "",
+            "sub_heartflow_model": "",
+            "prompt": "",
+            "response": "",
+            "model": ""
+        }
+        
+        # 使用字典来存储 reasoning 模式的数据
+        self.reasoning_data = {
+            "thinking_log": "",
+            "prompt": "",
+            "response": "",
+            "model": ""
+        }
+        
+        # 耗时
+        self.timing_results = {
+            "interested_rate_time": 0,
+            "sub_heartflow_observe_time": 0,
+            "sub_heartflow_step_time": 0,
+            "make_response_time": 0,
+        }
+    
+    def catch_decide_to_response(self,message:MessageRecv):
+        # 搜集决定回复时的信息
+        self.trigger_response_message = message
+        self.trigger_response_text = message.detailed_plain_text
+        
+        self.trigger_response_time = time.time()
+        
+        self.chat_id = message.chat_stream.stream_id
+        
+        self.chat_history = self.get_message_from_db_before_msg(message)
+        
+    def catch_after_observe(self,obs_duration:float):#这里可以有更多信息
+        self.timing_results["sub_heartflow_observe_time"] = obs_duration
+
+    # def catch_shf
+    
+    def catch_afer_shf_step(self,step_duration:float,past_mind:str,current_mind:str):
+        self.timing_results["sub_heartflow_step_time"] = step_duration
+        if len(past_mind) > 1:
+            self.heartflow_data["sub_heartflow_before"] = past_mind[-1]
+            self.heartflow_data["sub_heartflow_now"] = current_mind
+        else:
+            self.heartflow_data["sub_heartflow_before"] = past_mind[-1]
+            self.heartflow_data["sub_heartflow_now"] = current_mind
+    
+    def catch_after_llm_generated(self,prompt:str,
+                                  response:str,
+                                  reasoning_content:str = "",
+                                  model_name:str = ""):
+        if self.response_mode == "heart_flow":
+            self.heartflow_data["prompt"] = prompt
+            self.heartflow_data["response"] = response
+            self.heartflow_data["model"] = model_name
+        elif self.response_mode == "reasoning":
+            self.reasoning_data["thinking_log"] = reasoning_content
+            self.reasoning_data["prompt"] = prompt
+            self.reasoning_data["response"] = response
+            self.reasoning_data["model"] = model_name
+            
+        self.response_text = response
+    
+    def catch_after_generate_response(self,response_duration:float):
+        self.timing_results["make_response_time"] = response_duration
+        
+        
+        
+    def catch_after_response(self,response_duration:float,
+                             response_message:List[str],
+                             first_bot_msg:MessageSending):
+        self.timing_results["make_response_time"] = response_duration
+        self.response_time = time.time()
+        for msg in response_message:
+            self.response_messages.append(msg)
+        
+        self.chat_history_in_thinking = self.get_message_from_db_between_msgs(self.trigger_response_message,first_bot_msg)
+   
+    def get_message_from_db_between_msgs(self, message_start: Message, message_end: Message):
+        try:
+            # 从数据库中获取消息的时间戳
+            time_start = message_start.message_info.time
+            time_end = message_end.message_info.time
+            chat_id = message_start.chat_stream.stream_id
+            
+            print(f"查询参数: time_start={time_start}, time_end={time_end}, chat_id={chat_id}")
+            
+            # 查询数据库，获取 chat_id 相同且时间在 start 和 end 之间的数据
+            messages_between = db.messages.find(
+                {
+                    "chat_id": chat_id,
+                    "time": {"$gt": time_start, "$lt": time_end}
+                }
+            ).sort("time", -1)
+            
+            result = list(messages_between)
+            print(f"查询结果数量: {len(result)}")
+            if result:
+                print(f"第一条消息时间: {result[0]['time']}")
+                print(f"最后一条消息时间: {result[-1]['time']}")
+            return result
+        except Exception as e:
+            print(f"获取消息时出错: {str(e)}")
+            return []
+     
+    def get_message_from_db_before_msg(self, message: MessageRecv):
+        # 从数据库中获取消息
+        message_id = message.message_info.message_id
+        chat_id = message.chat_stream.stream_id
+        
+        # 查询数据库，获取 chat_id 相同且 message_id 小于当前消息的 30 条数据
+        messages_before = db.messages.find(
+            {"chat_id": chat_id, "message_id": {"$lt": message_id}}
+        ).sort("time", -1).limit(self.context_length*3) #获取更多历史信息
+        
+        return list(messages_before)
+    
+    def message_list_to_dict(self, message_list):
+        #存储简化的聊天记录
+        result = []
+        for message in message_list:
+            if not isinstance(message, dict):
+                message = self.message_to_dict(message)
+            # print(message)
+
+            lite_message = {
+                "time": message["time"],
+                "user_nickname": message["user_info"]["user_nickname"],
+                "processed_plain_text": message["processed_plain_text"],
+            }
+            result.append(lite_message)
+            
+        return result
+
+    def message_to_dict(self, message):
+        if not message:
+            return None
+        if isinstance(message, dict):
+            return message
+        return {
+            # "message_id": message.message_info.message_id,
+            "time": message.message_info.time,
+            "user_id": message.message_info.user_info.user_id,
+            "user_nickname": message.message_info.user_info.user_nickname,
+            "processed_plain_text": message.processed_plain_text,
+            # "detailed_plain_text": message.detailed_plain_text
+        }
+                
+    def done_catch(self):
+        """将收集到的信息存储到数据库的 thinking_log 集合中"""
+        try:
+            # 将消息对象转换为可序列化的字典
+            
+            thinking_log_data = {
+                "chat_id": self.chat_id,
+                "response_mode": self.response_mode,
+                "trigger_text": self.trigger_response_text,
+                "response_text": self.response_text,
+                "trigger_info": {
+                    "time": self.trigger_response_time,
+                    "message": self.message_to_dict(self.trigger_response_message),
+                },
+                "response_info": {
+                    "time": self.response_time,
+                    "message": self.response_messages,
+                },
+                "timing_results": self.timing_results,
+                "chat_history": self.message_list_to_dict(self.chat_history),
+                "chat_history_in_thinking": self.message_list_to_dict(self.chat_history_in_thinking),
+                "chat_history_after_response": self.message_list_to_dict(self.chat_history_after_response)
+            }
+
+            # 根据不同的响应模式添加相应的数据
+            if self.response_mode == "heart_flow":
+                thinking_log_data["mode_specific_data"] = self.heartflow_data
+            elif self.response_mode == "reasoning":
+                thinking_log_data["mode_specific_data"] = self.reasoning_data
+
+            # 将数据插入到 thinking_log 集合中
+            db.thinking_log.insert_one(thinking_log_data)
+            
+            return True
+        except Exception as e:
+            print(f"存储思考日志时出错: {str(e)}")
+            print(traceback.format_exc())
+            return False
+
+class InfoCatcherManager:
+    def __init__(self):
+        self.info_catchers = {}
+
+    def get_info_catcher(self,thinking_id:str) -> InfoCatcher:
+        if thinking_id not in self.info_catchers:
+            self.info_catchers[thinking_id] = InfoCatcher()
+        return self.info_catchers[thinking_id]
+
+info_catcher_manager = InfoCatcherManager()
--- a/template/bot_config_template.toml
+++ b/template/bot_config_template.toml
@@ -164,7 +164,7 @@ response_max_sentence_num = 4 # 回复允许的最大句子数
 [remote] #发送统计信息，主要是看全球有多少只麦麦
 enable = true

-[experimental]
+[experimental] #实验性功能，不一定完善或者根本不能用
 enable_friend_chat = false # 是否启用好友聊天
 pfc_chatting = false # 是否启用PFC聊天，该功能仅作用于私聊，与回复模式独立