Merge branch 'dev' of https://github.com/MaiM-with-u/MaiBot into dev

2025-04-16 14:12:36 +08:00
parent 5421e62539 1aad7f4f6e
commit abc4c812c5
59 changed files with 1789 additions and 2221 deletions
--- a/src/plugins/chat/init.py
+++ b/src/plugins/chat/init.py
@@ -3,7 +3,6 @@ from ..person_info.relationship_manager import relationship_manager
 from .chat_stream import chat_manager
 from .message_sender import message_manager
 from ..storage.storage import MessageStorage
-from .auto_speak import auto_speak_manager


 __all__ = [
@@ -12,5 +11,4 @@ __all__ = [
    "chat_manager",
    "message_manager",
    "MessageStorage",
-    "auto_speak_manager",
 ]
--- a/src/plugins/chat/auto_speak.py
+++ b/src/plugins/chat/auto_speak.py
@@ -1,184 +0,0 @@
-import time
-import asyncio
-import random
-from random import random as random_float
-from typing import Dict
-from ..config.config import global_config
-from .message import MessageSending, MessageThinking, MessageSet, MessageRecv
-from ..message.message_base import UserInfo, Seg
-from .message_sender import message_manager
-from ..moods.moods import MoodManager
-from ..chat_module.reasoning_chat.reasoning_generator import ResponseGenerator
-from src.common.logger import get_module_logger
-from src.heart_flow.heartflow import heartflow
-from ...common.database import db
-
-logger = get_module_logger("auto_speak")
-
-
-class AutoSpeakManager:
-    def __init__(self):
-        self._last_auto_speak_time: Dict[str, float] = {}  # 记录每个聊天流上次自主发言的时间
-        self.mood_manager = MoodManager.get_instance()
-        self.gpt = ResponseGenerator()  # 添加gpt实例
-        self._started = False
-        self._check_task = None
-        self.db = db
-
-    async def get_chat_info(self, chat_id: str) -> dict:
-        """从数据库获取聊天流信息"""
-        chat_info = await self.db.chat_streams.find_one({"stream_id": chat_id})
-        return chat_info
-
-    async def start_auto_speak_check(self):
-        """启动自动发言检查任务"""
-        if not self._started:
-            self._check_task = asyncio.create_task(self._periodic_check())
-            self._started = True
-            logger.success("自动发言检查任务已启动")
-
-    async def _periodic_check(self):
-        """定期检查是否需要自主发言"""
-        while True and global_config.enable_think_flow:
-            # 获取所有活跃的子心流
-            active_subheartflows = []
-            for chat_id, subheartflow in heartflow._subheartflows.items():
-                if (
-                    subheartflow.is_active and subheartflow.current_state.willing > 0
-                ):  # 只考虑活跃且意愿值大于0.5的子心流
-                    active_subheartflows.append((chat_id, subheartflow))
-                    logger.debug(
-                        f"发现活跃子心流 - 聊天ID: {chat_id}, 意愿值: {subheartflow.current_state.willing:.2f}"
-                    )
-
-            if not active_subheartflows:
-                logger.debug("当前没有活跃的子心流")
-                await asyncio.sleep(20)  # 添加异步等待
-                continue
-
-            # 随机选择一个活跃的子心流
-            chat_id, subheartflow = random.choice(active_subheartflows)
-            logger.info(f"随机选择子心流 - 聊天ID: {chat_id}, 意愿值: {subheartflow.current_state.willing:.2f}")
-
-            # 检查是否应该自主发言
-            if await self.check_auto_speak(subheartflow):
-                logger.info(f"准备自主发言 - 聊天ID: {chat_id}")
-                # 生成自主发言
-                bot_user_info = UserInfo(
-                    user_id=global_config.BOT_QQ,
-                    user_nickname=global_config.BOT_NICKNAME,
-                    platform="qq",  # 默认使用qq平台
-                )
-
-                # 创建一个空的MessageRecv对象作为上下文
-                message = MessageRecv(
-                    {
-                        "message_info": {
-                            "user_info": {"user_id": chat_id, "user_nickname": "", "platform": "qq"},
-                            "group_info": None,
-                            "platform": "qq",
-                            "time": time.time(),
-                        },
-                        "processed_plain_text": "",
-                        "raw_message": "",
-                        "is_emoji": False,
-                    }
-                )
-
-                await self.generate_auto_speak(
-                    subheartflow, message, bot_user_info, message.message_info["user_info"], message.message_info
-                )
-            else:
-                logger.debug(f"不满足自主发言条件 - 聊天ID: {chat_id}")
-
-            # 每分钟检查一次
-            await asyncio.sleep(20)
-
-            # await asyncio.sleep(5)  # 发生错误时等待5秒再继续
-
-    async def check_auto_speak(self, subheartflow) -> bool:
-        """检查是否应该自主发言"""
-        if not subheartflow:
-            return False
-
-        current_time = time.time()
-        chat_id = subheartflow.observe_chat_id
-
-        # 获取上次自主发言时间
-        if chat_id not in self._last_auto_speak_time:
-            self._last_auto_speak_time[chat_id] = 0
-        last_speak_time = self._last_auto_speak_time.get(chat_id, 0)
-
-        # 如果距离上次自主发言不到5分钟，不发言
-        if current_time - last_speak_time < 30:
-            logger.debug(
-                f"距离上次发言时间太短 - 聊天ID: {chat_id}, 剩余时间: {30 - (current_time - last_speak_time):.1f}秒"
-            )
-            return False
-
-        # 获取当前意愿值
-        current_willing = subheartflow.current_state.willing
-
-        if current_willing > 0.1 and random_float() < 0.5:
-            self._last_auto_speak_time[chat_id] = current_time
-            logger.info(f"满足自主发言条件 - 聊天ID: {chat_id}, 意愿值: {current_willing:.2f}")
-            return True
-
-        logger.debug(f"不满足自主发言条件 - 聊天ID: {chat_id}, 意愿值: {current_willing:.2f}")
-        return False
-
-    async def generate_auto_speak(self, subheartflow, message, bot_user_info: UserInfo, userinfo, messageinfo):
-        """生成自主发言内容"""
-        thinking_time_point = round(time.time(), 2)
-        think_id = "mt" + str(thinking_time_point)
-        thinking_message = MessageThinking(
-            message_id=think_id,
-            chat_stream=None,  # 不需要chat_stream
-            bot_user_info=bot_user_info,
-            reply=message,
-            thinking_start_time=thinking_time_point,
-        )
-
-        message_manager.add_message(thinking_message)
-
-        # 生成自主发言内容
-        try:
-            response, raw_content = await self.gpt.generate_response(message)
-        except Exception as e:
-            logger.error(f"生成自主发言内容时发生错误: {e}")
-            return False
-
-        if response:
-            message_set = MessageSet(None, think_id)  # 不需要chat_stream
-            mark_head = False
-
-            for msg in response:
-                message_segment = Seg(type="text", data=msg)
-                bot_message = MessageSending(
-                    message_id=think_id,
-                    chat_stream=None,  # 不需要chat_stream
-                    bot_user_info=bot_user_info,
-                    sender_info=userinfo,
-                    message_segment=message_segment,
-                    reply=message,
-                    is_head=not mark_head,
-                    is_emoji=False,
-                    thinking_start_time=thinking_time_point,
-                )
-                if not mark_head:
-                    mark_head = True
-                message_set.add_message(bot_message)
-
-            message_manager.add_message(message_set)
-
-            # 更新情绪和关系
-            stance, emotion = await self.gpt._get_emotion_tags(raw_content, message.processed_plain_text)
-            self.mood_manager.update_mood_from_emotion(emotion, global_config.mood_intensity_factor)
-
-            return True
-
-        return False
-
-
-# 创建全局AutoSpeakManager实例
-auto_speak_manager = AutoSpeakManager()
--- a/src/plugins/chat/bot.py
+++ b/src/plugins/chat/bot.py
@@ -38,7 +38,7 @@ class ChatBot:
    async def _ensure_started(self):
        """确保所有任务已启动"""
        if not self._started:
-            logger.info("确保ChatBot所有任务已启动")
+            logger.trace("确保ChatBot所有任务已启动")

            self._started = True

@@ -65,10 +65,6 @@ class ChatBot:
           - 没有思维流相关的状态管理
           - 更简单直接的回复逻辑

-        3. pfc_chatting模式：仅进行消息处理
-           - 不进行任何回复
-           - 只处理和存储消息
-
        所有模式都包含：
        - 消息过滤
        - 记忆激活
@@ -84,7 +80,7 @@ class ChatBot:
            message = MessageRecv(message_data)
            groupinfo = message.message_info.group_info
            userinfo = message.message_info.user_info
-            logger.debug(f"处理消息:{str(message_data)[:120]}...")
+            logger.trace(f"处理消息:{str(message_data)[:120]}...")

            if userinfo.user_id in global_config.ban_user_id:
                logger.debug(f"用户{userinfo.user_id}被禁止回复")
--- a/src/plugins/chat/message.py
+++ b/src/plugins/chat/message.py
@@ -142,14 +142,18 @@ class MessageRecv(Message):

    def _generate_detailed_text(self) -> str:
        """生成详细文本，包含时间和用户信息"""
-        time_str = time.strftime("%m-%d %H:%M:%S", time.localtime(self.message_info.time))
+        # time_str = time.strftime("%m-%d %H:%M:%S", time.localtime(self.message_info.time))
+        time = self.message_info.time
        user_info = self.message_info.user_info
+        # name = (
+        #     f"{user_info.user_nickname}(ta的昵称:{user_info.user_cardname},ta的id:{user_info.user_id})"
+        #     if user_info.user_cardname != None
+        #     else f"{user_info.user_nickname}(ta的id:{user_info.user_id})"
+        # )
        name = (
-            f"{user_info.user_nickname}(ta的昵称:{user_info.user_cardname},ta的id:{user_info.user_id})"
-            if user_info.user_cardname != None
-            else f"{user_info.user_nickname}(ta的id:{user_info.user_id})"
+            f"<{self.message_info.platform}:{user_info.user_id}:{user_info.user_nickname}:{user_info.user_cardname}>"
        )
-        return f"[{time_str}] {name}: {self.processed_plain_text}\n"
+        return f"[{time}] {name}: {self.processed_plain_text}\n"


@dataclass
@@ -239,14 +243,18 @@ class MessageProcessBase(Message):

    def _generate_detailed_text(self) -> str:
        """生成详细文本，包含时间和用户信息"""
-        time_str = time.strftime("%m-%d %H:%M:%S", time.localtime(self.message_info.time))
+        # time_str = time.strftime("%m-%d %H:%M:%S", time.localtime(self.message_info.time))
+        time = self.message_info.time
        user_info = self.message_info.user_info
+        # name = (
+        #     f"{user_info.user_nickname}(ta的昵称:{user_info.user_cardname},ta的id:{user_info.user_id})"
+        #     if user_info.user_cardname != None
+        #     else f"{user_info.user_nickname}(ta的id:{user_info.user_id})"
+        # )
        name = (
-            f"{user_info.user_nickname}(ta的昵称:{user_info.user_cardname},ta的id:{user_info.user_id})"
-            if user_info.user_cardname != None
-            else f"{user_info.user_nickname}(ta的id:{user_info.user_id})"
+            f"<{self.message_info.platform}:{user_info.user_id}:{user_info.user_nickname}:{user_info.user_cardname}>"
        )
-        return f"[{time_str}] {name}: {self.processed_plain_text}\n"
+        return f"[{time}] {name}: {self.processed_plain_text}\n"


@dataclass
--- a/src/plugins/chat/message_buffer.py
+++ b/src/plugins/chat/message_buffer.py
@@ -153,11 +153,11 @@ class MessageBuffer:
                    # 更新当前消息的processed_plain_text
                    if combined_text and combined_text[0] != message.processed_plain_text and is_update:
                        if type == "text":
-                            message.processed_plain_text = "".join(combined_text)
+                            message.processed_plain_text = "，".join(combined_text)
                            logger.debug(f"整合了{len(combined_text) - 1}条F消息的内容到当前消息")
                        elif type == "emoji":
                            combined_text.pop()
-                            message.processed_plain_text = "".join(combined_text)
+                            message.processed_plain_text = "，".join(combined_text)
                            message.is_emoji = False
                            logger.debug(f"整合了{len(combined_text) - 1}条F消息的内容，覆盖当前emoji消息")

--- a/src/plugins/chat/message_sender.py
+++ b/src/plugins/chat/message_sender.py
@@ -70,9 +70,9 @@ class Message_Sender:
                    thinking_start_time=message.thinking_start_time,
                    is_emoji=message.is_emoji,
                )
-                logger.debug(f"{message.processed_plain_text},{typing_time},计算输入时间结束")
+                logger.trace(f"{message.processed_plain_text},{typing_time},计算输入时间结束")
                await asyncio.sleep(typing_time)
-                logger.debug(f"{message.processed_plain_text},{typing_time},等待输入时间结束")
+                logger.trace(f"{message.processed_plain_text},{typing_time},等待输入时间结束")

                message_json = message.to_dict()

--- a/src/plugins/chat/utils.py
+++ b/src/plugins/chat/utils.py
@@ -334,27 +334,35 @@ def random_remove_punctuation(text: str) -> str:


 def process_llm_response(text: str) -> List[str]:
-    # processed_response = process_text_with_typos(content)
-    # 对西文字符段落的回复长度设置为汉字字符的两倍
-    max_length = global_config.response_max_length
+    # 提取被 () 或 [] 包裹的内容
+    pattern = re.compile(r"[\(\[].*?[\)\]]")
+    _extracted_contents = pattern.findall(text)
+    # 去除 () 和 [] 及其包裹的内容
+    cleaned_text = pattern.sub("", text)
+    logger.debug(f"{text}去除括号处理后的文本: {cleaned_text}")
+
+    # 对清理后的文本进行进一步处理
+    max_length = global_config.response_max_length * 2
    max_sentence_num = global_config.response_max_sentence_num
-    if len(text) > max_length and not is_western_paragraph(text):
-        logger.warning(f"回复过长 ({len(text)} 字符)，返回默认回复")
+    if len(cleaned_text) > max_length and not is_western_paragraph(cleaned_text):
+        logger.warning(f"回复过长 ({len(cleaned_text)} 字符)，返回默认回复")
        return ["懒得说"]
-    elif len(text) > 200:
-        logger.warning(f"回复过长 ({len(text)} 字符)，返回默认回复")
+    elif len(cleaned_text) > 200:
+        logger.warning(f"回复过长 ({len(cleaned_text)} 字符)，返回默认回复")
        return ["懒得说"]
-    # 处理长消息
+
    typo_generator = ChineseTypoGenerator(
        error_rate=global_config.chinese_typo_error_rate,
        min_freq=global_config.chinese_typo_min_freq,
        tone_error_rate=global_config.chinese_typo_tone_error_rate,
        word_replace_rate=global_config.chinese_typo_word_replace_rate,
    )
-    if global_config.enable_response_spliter:
-        split_sentences = split_into_sentences_w_remove_punctuation(text)
+
+    if global_config.enable_response_splitter:
+        split_sentences = split_into_sentences_w_remove_punctuation(cleaned_text)
    else:
-        split_sentences = [text]
+        split_sentences = [cleaned_text]
+
    sentences = []
    for sentence in split_sentences:
        if global_config.chinese_typo_enable:
@@ -364,12 +372,13 @@ def process_llm_response(text: str) -> List[str]:
                sentences.append(typo_corrections)
        else:
            sentences.append(sentence)
-    # 检查分割后的消息数量是否过多（超过3条）

    if len(sentences) > max_sentence_num:
        logger.warning(f"分割后消息数量过多 ({len(sentences)} 条)，返回默认回复")
        return [f"{global_config.BOT_NICKNAME}不知道哦"]

+    # sentences.extend(extracted_contents)
+
    return sentences


@@ -630,3 +639,141 @@ def count_messages_between(start_time: float, end_time: float, stream_id: str) -
    except Exception as e:
        logger.error(f"计算消息数量时出错: {str(e)}")
        return 0, 0
+
+
+def translate_timestamp_to_human_readable(timestamp: float, mode: str = "normal") -> str:
+    """将时间戳转换为人类可读的时间格式
+    
+    Args:
+        timestamp: 时间戳
+        mode: 转换模式，"normal"为标准格式，"relative"为相对时间格式
+        
+    Returns:
+        str: 格式化后的时间字符串
+    """
+    if mode == "normal":
+        return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(timestamp))
+    elif mode == "relative":
+        now = time.time()
+        diff = now - timestamp
+        
+        if diff < 20:
+            return "刚刚:"
+        elif diff < 60:
+            return f"{int(diff)}秒前:"
+        elif diff < 1800:
+            return f"{int(diff / 60)}分钟前:"
+        elif diff < 3600:
+            return f"{int(diff / 60)}分钟前:\n"
+        elif diff < 86400:
+            return f"{int(diff / 3600)}小时前:\n"
+        elif diff < 604800:
+            return f"{int(diff / 86400)}天前:\n"
+        else:
+            return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(timestamp)) + ":"
+            
+def parse_text_timestamps(text: str, mode: str = "normal") -> str:
+    """解析文本中的时间戳并转换为可读时间格式
+    
+    Args:
+        text: 包含时间戳的文本，时间戳应以[]包裹
+        mode: 转换模式，传递给translate_timestamp_to_human_readable，"normal"或"relative"
+        
+    Returns:
+        str: 替换后的文本
+    
+    转换规则:
+    - normal模式: 将文本中所有时间戳转换为可读格式
+    - lite模式: 
+        - 第一个和最后一个时间戳必须转换
+        - 以5秒为间隔划分时间段，每段最多转换一个时间戳
+        - 不转换的时间戳替换为空字符串
+    """
+    # 匹配[数字]或[数字.数字]格式的时间戳
+    pattern = r'\[(\d+(?:\.\d+)?)\]'
+    
+    # 找出所有匹配的时间戳
+    matches = list(re.finditer(pattern, text))
+    
+    if not matches:
+        return text
+    
+    # normal模式: 直接转换所有时间戳
+    if mode == "normal":
+        result_text = text
+        for match in matches:
+            timestamp = float(match.group(1))
+            readable_time = translate_timestamp_to_human_readable(timestamp, "normal")
+            # 由于替换会改变文本长度，需要使用正则替换而非直接替换
+            pattern_instance = re.escape(match.group(0))
+            result_text = re.sub(pattern_instance, readable_time, result_text, count=1)
+        return result_text
+    else:
+        # lite模式: 按5秒间隔划分并选择性转换
+        result_text = text
+        
+        # 提取所有时间戳及其位置
+        timestamps = [(float(m.group(1)), m) for m in matches]
+        timestamps.sort(key=lambda x: x[0])  # 按时间戳升序排序
+        
+        if not timestamps:
+            return text
+            
+        # 获取第一个和最后一个时间戳
+        first_timestamp, first_match = timestamps[0]
+        last_timestamp, last_match = timestamps[-1]
+        
+        # 将时间范围划分成5秒间隔的时间段
+        time_segments = {}
+        
+        # 对所有时间戳按15秒间隔分组
+        for ts, match in timestamps:
+            segment_key = int(ts // 15)  # 将时间戳除以15取整，作为时间段的键
+            if segment_key not in time_segments:
+                time_segments[segment_key] = []
+            time_segments[segment_key].append((ts, match))
+        
+        # 记录需要转换的时间戳
+        to_convert = []
+        
+        # 从每个时间段中选择一个时间戳进行转换
+        for segment, segment_timestamps in time_segments.items():
+            # 选择这个时间段中的第一个时间戳
+            to_convert.append(segment_timestamps[0])
+        
+        # 确保第一个和最后一个时间戳在转换列表中
+        first_in_list = False
+        last_in_list = False
+        
+        for ts, match in to_convert:
+            if ts == first_timestamp:
+                first_in_list = True
+            if ts == last_timestamp:
+                last_in_list = True
+        
+        if not first_in_list:
+            to_convert.append((first_timestamp, first_match))
+        if not last_in_list:
+            to_convert.append((last_timestamp, last_match))
+        
+        # 创建需要转换的时间戳集合，用于快速查找
+        to_convert_set = {match.group(0) for _, match in to_convert}
+        
+        # 首先替换所有不需要转换的时间戳为空字符串
+        for ts, match in timestamps:
+            if match.group(0) not in to_convert_set:
+                pattern_instance = re.escape(match.group(0))
+                result_text = re.sub(pattern_instance, "", result_text, count=1)
+        
+        # 按照时间戳原始顺序排序，避免替换时位置错误
+        to_convert.sort(key=lambda x: x[1].start())
+        
+        # 执行替换
+        # 由于替换会改变文本长度，从后向前替换
+        to_convert.reverse()
+        for ts, match in to_convert:
+            readable_time = translate_timestamp_to_human_readable(ts, "relative")
+            pattern_instance = re.escape(match.group(0))
+            result_text = re.sub(pattern_instance, readable_time, result_text, count=1)
+        
+        return result_text