From d5e6746a219aed8987867515f61fabf4d6885a82 Mon Sep 17 00:00:00 2001 From: Windpicker-owo <3431391539@qq.com> Date: Tue, 2 Dec 2025 23:11:29 +0800 Subject: [PATCH 1/9] =?UTF-8?q?fix(stream):=20=E4=BF=AE=E5=A4=8D=20Chatter?= =?UTF-8?q?=20=E5=A4=84=E7=90=86=E6=A0=87=E5=BF=97=E7=9A=84=E5=81=87?= =?UTF-8?q?=E6=AD=BB=E7=8A=B6=E6=80=81=E5=B9=B6=E5=A2=9E=E5=BC=BA=E5=B9=B6?= =?UTF-8?q?=E5=8F=91=E4=BF=9D=E6=8A=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../message_manager/distribution_manager.py | 58 +++++++++++++++++-- 1 file changed, 53 insertions(+), 5 deletions(-) diff --git a/src/chat/message_manager/distribution_manager.py b/src/chat/message_manager/distribution_manager.py index e3b3c4070..6689d84ac 100644 --- a/src/chat/message_manager/distribution_manager.py +++ b/src/chat/message_manager/distribution_manager.py @@ -321,11 +321,14 @@ class StreamLoopManager: # 🔒 并发保护:如果 Chatter 正在处理中,跳过本轮 # 这可能发生在:1) 打断后重启循环 2) 处理时间超过轮询间隔 if context.is_chatter_processing: - logger.debug(f"🔒 [流工作器] stream={stream_id[:8]}, Chatter正在处理中,跳过本轮") - # 不打印"开始处理"日志,直接进入下一轮等待 - # 使用较短的等待时间,等待当前处理完成 - await asyncio.sleep(1.0) - continue + if self._recover_stale_chatter_state(stream_id, context): + logger.warning(f"🔄 [流工作器] stream={stream_id[:8]}, 处理标志疑似残留,已尝试自动修复") + else: + logger.debug(f"🔒 [流工作器] stream={stream_id[:8]}, Chatter正在处理中,跳过本轮") + # 不打印"开始处理"日志,直接进入下一轮等待 + # 使用较短的等待时间,等待当前处理完成 + await asyncio.sleep(1.0) + continue if force_dispatch: logger.info(f"⚡ [流工作器] stream={stream_id[:8]}, 任务ID={task_id}, 未读消息 {unread_count} 条,触发强制分发") @@ -529,6 +532,21 @@ class StreamLoopManager: name=f"chatter_process_{stream_id}" ) + # 记录任务句柄,便于后续检测/自愈 + context.processing_task = chatter_task + + def _cleanup_processing_flag(task: asyncio.Task) -> None: + try: + context.processing_task = None + if context.is_chatter_processing: + context.is_chatter_processing = False + self._set_stream_processing_status(stream_id, False) + logger.debug(f"🔄 [并发保护] stream={stream_id[:8]}, chatter任务结束自动清理处理标志") + except Exception as callback_error: + logger.debug(f"清理chatter处理标志失败: {callback_error}") + + chatter_task.add_done_callback(_cleanup_processing_flag) + # 等待 chatter 任务完成 results = await chatter_task success = results.get("success", False) @@ -550,6 +568,7 @@ class StreamLoopManager: finally: # 清除 Chatter 处理标志 context.is_chatter_processing = False + context.processing_task = None logger.debug(f"清除 Chatter 处理标志: {stream_id}") # 无论成功或失败,都要设置处理状态为未处理 @@ -759,6 +778,35 @@ class StreamLoopManager: logger.debug(f"流 {stream_id} 使用默认间隔: {base_interval:.2f}s ({e})") return base_interval + def _recover_stale_chatter_state(self, stream_id: str, context: "StreamContext") -> bool: + """ + 检测并修复 Chatter 处理标志的假死状态。 + + 返回 True 表示已发现并修复了异常状态;False 表示未发现异常。 + """ + try: + processing_task = getattr(context, "processing_task", None) + + # 标志为 True 但没有任务句柄,直接修复 + if processing_task is None: + context.is_chatter_processing = False + self._set_stream_processing_status(stream_id, False) + logger.warning(f"🛠️ [自愈] stream={stream_id[:8]}, 发现无任务但标志为真,已重置") + return True + + # 标志为 True 但任务已经结束/被取消 + if processing_task.done(): + context.is_chatter_processing = False + context.processing_task = None + self._set_stream_processing_status(stream_id, False) + logger.warning(f"🛠️ [自愈] stream={stream_id[:8]}, 任务已结束但标志未清,已重置") + return True + + return False + except Exception as e: + logger.debug(f"检测 Chatter 状态异常失败: stream={stream_id}, error={e}") + return False + def get_queue_status(self) -> dict[str, Any]: """获取队列状态 From 1acead1f9d7bc0746e95e0c4be70cb60b665ccd0 Mon Sep 17 00:00:00 2001 From: Windpicker-owo <3431391539@qq.com> Date: Wed, 3 Dec 2025 11:42:38 +0800 Subject: [PATCH 2/9] =?UTF-8?q?feat(cache):=20=E6=B7=BB=E5=8A=A0=20LRU=20?= =?UTF-8?q?=E6=B7=98=E6=B1=B0=E6=9C=BA=E5=88=B6=E5=92=8C=E7=BC=93=E5=AD=98?= =?UTF-8?q?=E5=A4=A7=E5=B0=8F=E9=99=90=E5=88=B6=E4=BB=A5=E4=BC=98=E5=8C=96?= =?UTF-8?q?=E5=86=85=E5=AD=98=E4=BD=BF=E7=94=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chat/express/style_learner.py | 39 ++++++++++++++- src/chat/utils/utils_image.py | 23 ++++++++- src/plugin_system/core/stream_tool_history.py | 41 +++++++++++++++- .../services/relationship_service.py | 47 ++++++++++++++++++- 4 files changed, 145 insertions(+), 5 deletions(-) diff --git a/src/chat/express/style_learner.py b/src/chat/express/style_learner.py index 1b9975bdb..722c5b0c2 100644 --- a/src/chat/express/style_learner.py +++ b/src/chat/express/style_learner.py @@ -437,7 +437,13 @@ class StyleLearner: class StyleLearnerManager: - """多聊天室表达风格学习管理器""" + """多聊天室表达风格学习管理器 + + 添加 LRU 淘汰机制,限制最大活跃 learner 数量 + """ + + # 🔧 最大活跃 learner 数量 + MAX_ACTIVE_LEARNERS = 50 def __init__(self, model_save_path: str = "data/expression/style_models"): """ @@ -445,6 +451,7 @@ class StyleLearnerManager: model_save_path: 模型保存路径 """ self.learners: dict[str, StyleLearner] = {} + self.learner_last_used: dict[str, float] = {} # 🔧 记录最后使用时间 self.model_save_path = model_save_path # 确保保存目录存在 @@ -452,6 +459,30 @@ class StyleLearnerManager: logger.debug(f"StyleLearnerManager初始化成功, 模型保存路径: {model_save_path}") + def _evict_if_needed(self) -> None: + """🔧 内存优化:如果超过最大数量,淘汰最久未使用的 learner""" + if len(self.learners) < self.MAX_ACTIVE_LEARNERS: + return + + # 按最后使用时间排序,淘汰最旧的 20% + evict_count = max(1, len(self.learners) // 5) + sorted_by_time = sorted( + self.learner_last_used.items(), + key=lambda x: x[1] + ) + + evicted = [] + for chat_id, last_used in sorted_by_time[:evict_count]: + if chat_id in self.learners: + # 先保存再淘汰 + self.learners[chat_id].save(self.model_save_path) + del self.learners[chat_id] + del self.learner_last_used[chat_id] + evicted.append(chat_id) + + if evicted: + logger.info(f"StyleLearner LRU淘汰: 释放了 {len(evicted)} 个不活跃的学习器") + def get_learner(self, chat_id: str, model_config: dict | None = None) -> StyleLearner: """ 获取或创建指定chat_id的学习器 @@ -463,7 +494,13 @@ class StyleLearnerManager: Returns: StyleLearner实例 """ + # 🔧 更新最后使用时间 + self.learner_last_used[chat_id] = time.time() + if chat_id not in self.learners: + # 🔧 检查是否需要淘汰 + self._evict_if_needed() + # 创建新的学习器 learner = StyleLearner(chat_id, model_config) diff --git a/src/chat/utils/utils_image.py b/src/chat/utils/utils_image.py index dd2033122..f51f18b29 100644 --- a/src/chat/utils/utils_image.py +++ b/src/chat/utils/utils_image.py @@ -168,15 +168,22 @@ class ImageManager: image_bytes = base64.b64decode(image_base64) image_hash = hashlib.md5(image_bytes).hexdigest() + # 如果缓存命中,可以提前释放 image_bytes + # 但如果需要保存表情包,则需要保留 image_bytes + # 2. 优先查询已注册表情的缓存(Emoji表) if full_description := await emoji_manager.get_emoji_description_by_hash(image_hash): logger.info("[缓存命中] 使用已注册表情包(Emoji表)的完整描述") + del image_bytes # 缓存命中,不再需要 + del image_base64 refined_part = full_description.split(" Keywords:")[0] return f"[表情包:{refined_part}]" # 3. 查询通用图片描述缓存(ImageDescriptions表) if cached_description := await self._get_description_from_db(image_hash, "emoji"): logger.info("[缓存命中] 使用通用图片缓存(ImageDescriptions表)中的描述") + del image_bytes # 缓存命中,不再需要 + del image_base64 refined_part = cached_description.split(" Keywords:")[0] return f"[表情包:{refined_part}]" @@ -209,7 +216,11 @@ class ImageManager: await self._save_description_to_db(image_hash, full_description, "emoji") logger.info(f"新生成的表情包描述已存入通用缓存 (Hash: {image_hash[:8]}...)") - # 6. 返回新生成的描述中用于显示的“精炼描述”部分 + # 内存优化:处理完成后主动释放大型二进制数据 + del image_bytes + del image_base64 + + # 6. 返回新生成的描述中用于显示的"精炼描述"部分 refined_part = full_description.split(" Keywords:")[0] return f"[表情包:{refined_part}]" @@ -248,11 +259,17 @@ class ImageManager: existing_image = result.scalar() if existing_image and existing_image.description: logger.debug(f"[缓存命中] 使用Images表中的图片描述: {existing_image.description[:50]}...") + # 缓存命中,释放 base64 和 image_bytes + del image_bytes + del image_base64 return f"[图片:{existing_image.description}]" # 3. 其次查询 ImageDescriptions 表缓存 if cached_description := await self._get_description_from_db(image_hash, "image"): logger.debug(f"[缓存命中] 使用ImageDescriptions表中的描述: {cached_description[:50]}...") + # 缓存命中,释放 base64 和 image_bytes + del image_bytes + del image_base64 return f"[图片:{cached_description}]" # 4. 如果都未命中,则同步调用VLM生成新描述 @@ -301,6 +318,10 @@ class ImageManager: logger.info(f"新生成的图片描述已存入缓存 (Hash: {image_hash[:8]}...)") + # 内存优化:处理完成后主动释放大型二进制数据 + del image_bytes + del image_base64 + return f"[图片:{description}]" except Exception as e: diff --git a/src/plugin_system/core/stream_tool_history.py b/src/plugin_system/core/stream_tool_history.py index e589e6fe7..8e498395b 100644 --- a/src/plugin_system/core/stream_tool_history.py +++ b/src/plugin_system/core/stream_tool_history.py @@ -387,8 +387,36 @@ class StreamToolHistoryManager: return result -# 全局管理器字典,按chat_id索引 +# 内存优化:全局管理器字典,按chat_id索引,添加 LRU 淘汰 _stream_managers: dict[str, StreamToolHistoryManager] = {} +_stream_managers_last_used: dict[str, float] = {} # 记录最后使用时间 +_STREAM_MANAGERS_MAX_SIZE = 100 # 最大保留数量 + + +def _evict_old_stream_managers() -> None: + """内存优化:淘汰最久未使用的 stream manager""" + import time + + if len(_stream_managers) < _STREAM_MANAGERS_MAX_SIZE: + return + + # 按最后使用时间排序,淘汰最旧的 20% + evict_count = max(1, len(_stream_managers) // 5) + sorted_by_time = sorted( + _stream_managers_last_used.items(), + key=lambda x: x[1] + ) + + evicted = [] + for chat_id, _ in sorted_by_time[:evict_count]: + if chat_id in _stream_managers: + del _stream_managers[chat_id] + if chat_id in _stream_managers_last_used: + del _stream_managers_last_used[chat_id] + evicted.append(chat_id) + + if evicted: + logger.info(f"🔧 StreamToolHistoryManager LRU淘汰: 释放了 {len(evicted)} 个不活跃的管理器") def get_stream_tool_history_manager(chat_id: str) -> StreamToolHistoryManager: @@ -400,7 +428,14 @@ def get_stream_tool_history_manager(chat_id: str) -> StreamToolHistoryManager: Returns: 工具历史记录管理器实例 """ + import time + + # 🔧 更新最后使用时间 + _stream_managers_last_used[chat_id] = time.time() + if chat_id not in _stream_managers: + # 🔧 检查是否需要淘汰 + _evict_old_stream_managers() _stream_managers[chat_id] = StreamToolHistoryManager(chat_id) return _stream_managers[chat_id] @@ -413,4 +448,6 @@ def cleanup_stream_manager(chat_id: str) -> None: """ if chat_id in _stream_managers: del _stream_managers[chat_id] - logger.info(f"已清理聊天 {chat_id} 的工具历史记录管理器") + if chat_id in _stream_managers_last_used: + del _stream_managers_last_used[chat_id] + logger.info(f"已清理聊天 {chat_id} 的工具历史记录管理器") diff --git a/src/plugin_system/services/relationship_service.py b/src/plugin_system/services/relationship_service.py index 1bb995209..424832c68 100644 --- a/src/plugin_system/services/relationship_service.py +++ b/src/plugin_system/services/relationship_service.py @@ -14,11 +14,19 @@ logger = get_logger("relationship_service") class RelationshipService: - """用户关系分服务 - 独立于插件的数据库直接访问层""" + """用户关系分服务 - 独立于插件的数据库直接访问层 + + 内存优化:添加缓存大小限制和自动过期清理 + """ + + # 🔧 缓存配置 + CACHE_MAX_SIZE = 1000 # 最大缓存用户数 def __init__(self): self._cache: dict[str, dict] = {} # user_id -> {score, text, last_updated} self._cache_ttl = 300 # 缓存5分钟 + self._last_cleanup = time.time() # 上次清理时间 + self._cleanup_interval = 60 # 每60秒清理一次过期条目 async def get_user_relationship_score(self, user_id: str) -> float: """ @@ -162,6 +170,9 @@ class RelationshipService: def _get_from_cache(self, user_id: str) -> dict | None: """从缓存获取数据""" + # 🔧 触发定期清理 + self._maybe_cleanup_expired() + if user_id in self._cache: cached_data = self._cache[user_id] if time.time() - cached_data["last_updated"] < self._cache_ttl: @@ -173,12 +184,46 @@ class RelationshipService: def _update_cache(self, user_id: str, score: float, text: str): """更新缓存""" + # 🔧 内存优化:检查缓存大小限制 + if len(self._cache) >= self.CACHE_MAX_SIZE and user_id not in self._cache: + # 淘汰最旧的 10% 条目 + self._evict_oldest_entries() + self._cache[user_id] = { "score": score, "text": text, "last_updated": time.time() } + def _maybe_cleanup_expired(self): + """🔧 内存优化:定期清理过期条目""" + now = time.time() + if now - self._last_cleanup < self._cleanup_interval: + return + + self._last_cleanup = now + expired_keys = [] + for user_id, data in self._cache.items(): + if now - data["last_updated"] >= self._cache_ttl: + expired_keys.append(user_id) + + for key in expired_keys: + del self._cache[key] + + if expired_keys: + logger.debug(f"🔧 relationship_service 清理了 {len(expired_keys)} 个过期缓存条目") + + def _evict_oldest_entries(self): + """🔧 内存优化:淘汰最旧的条目""" + evict_count = max(1, len(self._cache) // 10) + sorted_entries = sorted( + self._cache.items(), + key=lambda x: x[1]["last_updated"] + ) + for user_id, _ in sorted_entries[:evict_count]: + del self._cache[user_id] + logger.debug(f"🔧 relationship_service LRU淘汰了 {evict_count} 个缓存条目") + async def _fetch_from_database(self, user_id: str) -> UserRelationships | None: """从数据库获取关系数据""" try: From a9fc842287f5523fa775e9cb1371bcbfabb8c8d2 Mon Sep 17 00:00:00 2001 From: Windpicker-owo <3431391539@qq.com> Date: Wed, 3 Dec 2025 12:48:31 +0800 Subject: [PATCH 3/9] =?UTF-8?q?feat(expression):=20=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E8=A7=A3=E6=9E=90=E8=81=8A=E5=A4=A9=E6=B5=81=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E5=92=8C=E8=8E=B7=E5=8F=96=E7=9B=B8=E5=85=B3=E8=81=8A=E5=A4=A9?= =?UTF-8?q?ID=E7=9A=84=E5=8A=9F=E8=83=BD=E4=BB=A5=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E5=85=B1=E4=BA=AB=E7=BB=84=E8=AE=AD=E7=BB=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chat/express/expression_learner.py | 120 +++++++++++++++++++------ 1 file changed, 93 insertions(+), 27 deletions(-) diff --git a/src/chat/express/expression_learner.py b/src/chat/express/expression_learner.py index 8ddf296d9..c1c7dd1e7 100644 --- a/src/chat/express/expression_learner.py +++ b/src/chat/express/expression_learner.py @@ -1,3 +1,4 @@ +import hashlib import os import time from datetime import datetime @@ -126,6 +127,55 @@ class ExpressionLearner: self.min_learning_interval = 300 # 最短学习时间间隔(秒) self._chat_name_initialized = False + @staticmethod + def _parse_stream_config_to_chat_id(stream_config_str: str) -> str | None: + """解析'platform:id:type'为chat_id(与get_stream_id一致)""" + try: + parts = stream_config_str.split(":") + if len(parts) != 3: + return None + platform = parts[0] + id_str = parts[1] + stream_type = parts[2] + is_group = stream_type == "group" + if is_group: + components = [platform, str(id_str)] + else: + components = [platform, str(id_str), "private"] + key = "_".join(components) + return hashlib.md5(key.encode()).hexdigest() + except Exception: + return None + + def get_related_chat_ids(self) -> list[str]: + """根据expression.rules配置,获取与当前chat_id相关的所有chat_id(包括自身) + + 用于共享组功能:同一共享组内的聊天流可以共享学习到的表达方式 + """ + if global_config is None: + return [self.chat_id] + rules = global_config.expression.rules + current_group = None + + # 找到当前chat_id所在的组 + for rule in rules: + if rule.chat_stream_id and self._parse_stream_config_to_chat_id(rule.chat_stream_id) == self.chat_id: + current_group = rule.group + break + + # 始终包含当前 chat_id(确保至少能查到自己的数据) + related_chat_ids = [self.chat_id] + + if current_group: + # 找出同一组的所有chat_id + for rule in rules: + if rule.group == current_group and rule.chat_stream_id: + if chat_id_candidate := self._parse_stream_config_to_chat_id(rule.chat_stream_id): + if chat_id_candidate not in related_chat_ids: + related_chat_ids.append(chat_id_candidate) + + return related_chat_ids + async def _initialize_chat_name(self): """异步初始化chat_name""" if not self._chat_name_initialized: @@ -540,46 +590,62 @@ class ExpressionLearner: # 提交后清除相关缓存 await session.commit() - # 清除该chat_id的表达方式缓存 + # 🔥 清除共享组内所有 chat_id 的表达方式缓存 from src.common.database.optimization.cache_manager import get_cache from src.common.database.utils.decorators import generate_cache_key cache = await get_cache() - await cache.delete(generate_cache_key("chat_expressions", chat_id)) + + # 获取共享组内所有 chat_id 并清除其缓存 + related_chat_ids = self.get_related_chat_ids() + for related_id in related_chat_ids: + await cache.delete(generate_cache_key("chat_expressions", related_id)) + if len(related_chat_ids) > 1: + logger.debug(f"已清除共享组内 {len(related_chat_ids)} 个 chat_id 的表达方式缓存") - # 🔥 训练 StyleLearner + # 🔥 训练 StyleLearner(支持共享组) # 只对 style 类型的表达方式进行训练(grammar 不需要训练到模型) if type == "style": try: - # 获取 StyleLearner 实例 - learner = style_learner_manager.get_learner(chat_id) + logger.debug(f"开始训练 StyleLearner: 源chat_id={chat_id}, 共享组包含 {len(related_chat_ids)} 个chat_id, 样本数={len(expr_list)}") - logger.debug(f"开始训练 StyleLearner: chat_id={chat_id}, 样本数={len(expr_list)}") + # 为每个共享组内的 chat_id 训练其 StyleLearner + for target_chat_id in related_chat_ids: + learner = style_learner_manager.get_learner(target_chat_id) + + # 为每个学习到的表达方式训练模型 + # 使用 situation 作为输入,style 作为目标 + # 这是最符合语义的方式:场景 -> 表达方式 + success_count = 0 + for expr in expr_list: + situation = expr["situation"] + style = expr["style"] - # 为每个学习到的表达方式训练模型 - # 使用 situation 作为输入,style 作为目标 - # 这是最符合语义的方式:场景 -> 表达方式 - success_count = 0 - for expr in expr_list: - situation = expr["situation"] - style = expr["style"] + # 训练映射关系: situation -> style + if learner.learn_mapping(situation, style): + success_count += 1 + else: + logger.warning(f"训练失败 (target={target_chat_id}): {situation} -> {style}") - # 训练映射关系: situation -> style - if learner.learn_mapping(situation, style): - success_count += 1 + # 保存模型 + if learner.save(style_learner_manager.model_save_path): + logger.debug(f"StyleLearner 模型保存成功: {target_chat_id}") else: - logger.warning(f"训练失败: {situation} -> {style}") + logger.error(f"StyleLearner 模型保存失败: {target_chat_id}") - logger.info( - f"StyleLearner 训练完成: {success_count}/{len(expr_list)} 成功, " - f"当前风格总数={len(learner.get_all_styles())}, " - f"总样本数={learner.learning_stats['total_samples']}" - ) + if target_chat_id == chat_id: + # 只为源 chat_id 记录详细日志 + logger.info( + f"StyleLearner 训练完成 (源): {success_count}/{len(expr_list)} 成功, " + f"当前风格总数={len(learner.get_all_styles())}, " + f"总样本数={learner.learning_stats['total_samples']}" + ) + else: + logger.debug( + f"StyleLearner 训练完成 (共享组成员 {target_chat_id}): {success_count}/{len(expr_list)} 成功" + ) - # 保存模型 - if learner.save(style_learner_manager.model_save_path): - logger.debug(f"StyleLearner 模型保存成功: {chat_id}") - else: - logger.error(f"StyleLearner 模型保存失败: {chat_id}") + if len(related_chat_ids) > 1: + logger.info(f"共享组内共 {len(related_chat_ids)} 个 StyleLearner 已同步训练") except Exception as e: logger.error(f"训练 StyleLearner 失败: {e}") From 16afd8f6ff374dc6ed329937317867d4b3ffff81 Mon Sep 17 00:00:00 2001 From: Windpicker-owo <3431391539@qq.com> Date: Wed, 3 Dec 2025 13:13:41 +0800 Subject: [PATCH 4/9] =?UTF-8?q?feat(filter):=20=E6=B7=BB=E5=8A=A0=E6=97=A0?= =?UTF-8?q?=E6=84=8F=E4=B9=89=E6=B6=88=E6=81=AF=E8=BF=87=E6=BB=A4=E5=8A=9F?= =?UTF-8?q?=E8=83=BD=E4=BB=A5=E4=BC=98=E5=8C=96=E8=A1=A8=E8=BE=BE=E5=AD=A6?= =?UTF-8?q?=E4=B9=A0=E6=95=88=E6=9E=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chat/express/expression_learner.py | 11 ++++- src/chat/utils/chat_message_builder.py | 65 +++++++++++++++++++++++++- src/common/message_repository.py | 10 ++++ 3 files changed, 82 insertions(+), 4 deletions(-) diff --git a/src/chat/express/expression_learner.py b/src/chat/express/expression_learner.py index c1c7dd1e7..6343badb5 100644 --- a/src/chat/express/expression_learner.py +++ b/src/chat/express/expression_learner.py @@ -670,13 +670,14 @@ class ExpressionLearner: current_time = time.time() - # 获取上次学习时间,过滤掉机器人自己的消息 + # 获取上次学习时间,过滤掉机器人自己的消息和无意义消息 random_msg: list[dict[str, Any]] | None = await get_raw_msg_by_timestamp_with_chat_inclusive( chat_id=self.chat_id, timestamp_start=self.last_learning_time, timestamp_end=current_time, limit=num, filter_bot=True, # 过滤掉机器人自己的消息,防止学习自己的表达方式 + filter_meaningless=True, # 🔥 过滤掉表情包、通知等无意义消息 ) # print(random_msg) @@ -685,8 +686,14 @@ class ExpressionLearner: # 转化成str chat_id: str = random_msg[0]["chat_id"] # random_msg_str: str = build_readable_messages(random_msg, timestamp_mode="normal") - random_msg_str: str = await build_anonymous_messages(random_msg) + # 🔥 启用表达学习场景的过滤,过滤掉纯回复、纯@、纯图片等无意义内容 + random_msg_str: str = await build_anonymous_messages(random_msg, filter_for_learning=True) # print(f"random_msg_str:{random_msg_str}") + + # 🔥 检查过滤后是否还有足够的内容 + if not random_msg_str or len(random_msg_str.strip()) < 20: + logger.debug(f"过滤后消息内容不足,跳过本次{type_str}学习") + return None prompt: str = await global_prompt_manager.format_prompt( prompt, diff --git a/src/chat/utils/chat_message_builder.py b/src/chat/utils/chat_message_builder.py index 7cd4e0596..79883fe7b 100644 --- a/src/chat/utils/chat_message_builder.py +++ b/src/chat/utils/chat_message_builder.py @@ -224,10 +224,12 @@ async def get_raw_msg_by_timestamp_with_chat_inclusive( limit: int = 0, limit_mode: str = "latest", filter_bot=False, + filter_meaningless=False, ) -> list[dict[str, Any]]: """获取在特定聊天从指定时间戳到指定时间戳的消息(包含边界),按时间升序排序,返回消息列表 limit: 限制返回的消息数量,0为不限制 limit_mode: 当 limit > 0 时生效。 'earliest' 表示获取最早的记录, 'latest' 表示获取最新的记录。默认为 'latest'。 + filter_meaningless: 是否过滤无意义消息(表情包、通知等)。用于表达学习等场景。 """ filter_query = {"chat_id": chat_id, "time": {"$gte": timestamp_start, "$lte": timestamp_end}} # 只有当 limit 为 0 时才应用外部 sort @@ -235,7 +237,12 @@ async def get_raw_msg_by_timestamp_with_chat_inclusive( # 直接将 limit_mode 传递给 find_messages return await find_messages( - message_filter=filter_query, sort=sort_order, limit=limit, limit_mode=limit_mode, filter_bot=filter_bot + message_filter=filter_query, + sort=sort_order, + limit=limit, + limit_mode=limit_mode, + filter_bot=filter_bot, + filter_meaningless=filter_meaningless, ) @@ -1114,10 +1121,14 @@ async def build_readable_messages( return "".join(result_parts) -async def build_anonymous_messages(messages: list[dict[str, Any]]) -> str: +async def build_anonymous_messages(messages: list[dict[str, Any]], filter_for_learning: bool = False) -> str: """ 构建匿名可读消息,将不同人的名称转为唯一占位符(A、B、C...),bot自己用SELF。 处理 回复 和 @ 字段,将bbb映射为匿名占位符。 + + Args: + messages: 消息列表 + filter_for_learning: 是否为表达学习场景进行额外过滤(过滤掉纯回复、纯@、纯图片等无意义内容) """ assert global_config is not None if not messages: @@ -1151,6 +1162,52 @@ async def build_anonymous_messages(messages: list[dict[str, Any]]) -> str: person_map[person_id] = chr(current_char) current_char += 1 return person_map[person_id] + + def is_meaningless_content(content: str, msg: dict) -> bool: + """ + 判断消息内容是否无意义(用于表达学习过滤) + """ + if not content or not content.strip(): + return True + + stripped = content.strip() + + # 检查消息标记字段 + if msg.get("is_emoji", False): + return True + if msg.get("is_notify", False): + return True + if msg.get("is_public_notice", False): + return True + if msg.get("is_command", False): + return True + + # 🔥 检查纯回复消息(只有[回复]没有其他内容) + reply_pattern = r"^\s*\[回复[^\]]*\]\s*$" + if re.match(reply_pattern, stripped): + return True + + # 🔥 检查纯@消息(只有@xxx没有其他内容) + at_pattern = r"^\s*(@[^\s]+\s*)+$" + if re.match(at_pattern, stripped): + return True + + # 🔥 检查纯图片消息 + image_pattern = r"^\s*(\[图片\]|\[动画表情\]|\[表情\]|\[picid:[^\]]+\])\s*$" + if re.match(image_pattern, stripped): + return True + + # 🔥 移除回复标记、@标记、图片标记后检查是否还有实质内容 + clean_content = re.sub(r"\[回复[^\]]*\]", "", stripped) + clean_content = re.sub(r"@[^\s]+", "", clean_content) + clean_content = re.sub(r"\[图片\]|\[动画表情\]|\[表情\]|\[picid:[^\]]+\]", "", clean_content) + clean_content = clean_content.strip() + + # 如果移除后内容太短(少于2个字符),认为无意义 + if len(clean_content) < 2: + return True + + return False for msg in messages: try: @@ -1170,6 +1227,10 @@ async def build_anonymous_messages(messages: list[dict[str, Any]]) -> str: # For anonymous messages, we just replace with a placeholder. content = re.sub(r"\[picid:([^\]]+)\]", "[图片]", content) + + # 🔥 表达学习场景:过滤无意义消息 + if filter_for_learning and is_meaningless_content(content, msg): + continue # if not all([platform, user_id, timestamp is not None]): # continue diff --git a/src/common/message_repository.py b/src/common/message_repository.py index b74b76f20..392fd001d 100644 --- a/src/common/message_repository.py +++ b/src/common/message_repository.py @@ -38,6 +38,7 @@ async def find_messages( limit_mode: str = "latest", filter_bot=False, filter_command=False, + filter_meaningless=False, ) -> list[dict[str, Any]]: """ 根据提供的过滤器、排序和限制条件查找消息。 @@ -47,6 +48,7 @@ async def find_messages( sort: 排序条件列表,例如 [('time', 1)] (1 for asc, -1 for desc)。仅在 limit 为 0 时生效。 limit: 返回的最大文档数,0表示不限制。 limit_mode: 当 limit > 0 时生效。 'earliest' 表示获取最早的记录, 'latest' 表示获取最新的记录(结果仍按时间正序排列)。默认为 'latest'。 + filter_meaningless: 是否过滤无意义消息(表情包、通知、纯回复等)。用于表达学习等场景。 Returns: 消息字典列表,如果出错则返回空列表。 @@ -95,6 +97,14 @@ async def find_messages( if filter_command: query = query.where(not_(Messages.is_command)) + # 🔥 过滤无意义消息(用于表达学习等场景) + if filter_meaningless: + # 排除:纯表情包、通知消息、公告消息、命令消息 + query = query.where(not_(Messages.is_emoji)) + query = query.where(not_(Messages.is_notify)) + query = query.where(not_(Messages.is_public_notice)) + query = query.where(not_(Messages.is_command)) + if limit > 0: # 确保limit是正整数 limit = max(1, int(limit)) From c1da7452d68126c5ff06539fc6c026deafdd2c9e Mon Sep 17 00:00:00 2001 From: Windpicker-owo <3431391539@qq.com> Date: Wed, 3 Dec 2025 13:22:07 +0800 Subject: [PATCH 5/9] =?UTF-8?q?feat(interest):=20=E5=A2=9E=E5=8A=A0LLM?= =?UTF-8?q?=E5=85=B4=E8=B6=A3=E6=A0=87=E7=AD=BE=E7=94=9F=E6=88=90=E6=97=B6?= =?UTF-8?q?=E7=9A=84=E8=B6=85=E6=97=B6=E8=AE=BE=E7=BD=AE=EF=BC=8C=E7=A1=AE?= =?UTF-8?q?=E4=BF=9D=E5=88=9D=E5=A7=8B=E5=8C=96=E9=98=B6=E6=AE=B5=E4=B8=8D?= =?UTF-8?q?=E5=9B=A0=E8=B6=85=E6=97=B6=E5=A4=B1=E8=B4=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../interest_system/bot_interest_manager.py | 50 +++++++++++++++---- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/src/chat/interest_system/bot_interest_manager.py b/src/chat/interest_system/bot_interest_manager.py index 7843588be..52e71ed84 100644 --- a/src/chat/interest_system/bot_interest_manager.py +++ b/src/chat/interest_system/bot_interest_manager.py @@ -246,7 +246,11 @@ class BotInterestManager: raise async def _call_llm_for_interest_generation(self, prompt: str) -> str | None: - """调用LLM生成兴趣标签""" + """调用LLM生成兴趣标签 + + 注意:此方法会临时增加 API 超时时间,以确保初始化阶段的人设标签生成 + 不会因用户配置的较短超时而失败。 + """ try: logger.debug("配置LLM客户端...") @@ -267,14 +271,42 @@ class BotInterestManager: # 使用replyer模型配置 replyer_config = model_config.model_task_config.replyer - # 调用LLM API - success, response, reasoning_content, model_name = await llm_api.generate_with_model( - prompt=full_prompt, - model_config=replyer_config, - request_type="interest_generation", - temperature=0.7, - max_tokens=2000, - ) + # 🔧 临时增加超时时间,避免初始化阶段因超时失败 + # 人设标签生成需要较长时间(15-25个标签的JSON),使用更长的超时 + INIT_TIMEOUT = 180 # 初始化阶段使用 180 秒超时 + original_timeouts: dict[str, int] = {} + + try: + # 保存并修改所有相关模型的 API provider 超时设置 + for model_name in replyer_config.model_list: + try: + model_info = model_config.get_model_info(model_name) + provider = model_config.get_provider(model_info.api_provider) + original_timeouts[provider.name] = provider.timeout + if provider.timeout < INIT_TIMEOUT: + logger.debug(f"⏱️ 临时增加 API provider '{provider.name}' 超时: {provider.timeout}s → {INIT_TIMEOUT}s") + provider.timeout = INIT_TIMEOUT + except Exception as e: + logger.warning(f"⚠️ 无法修改模型 '{model_name}' 的超时设置: {e}") + + # 调用LLM API + success, response, reasoning_content, model_name = await llm_api.generate_with_model( + prompt=full_prompt, + model_config=replyer_config, + request_type="interest_generation", + temperature=0.7, + max_tokens=2000, + ) + finally: + # 🔧 恢复原始超时设置 + for provider_name, original_timeout in original_timeouts.items(): + try: + provider = model_config.get_provider(provider_name) + if provider.timeout != original_timeout: + logger.debug(f"⏱️ 恢复 API provider '{provider_name}' 超时: {provider.timeout}s → {original_timeout}s") + provider.timeout = original_timeout + except Exception as e: + logger.warning(f"⚠️ 无法恢复 provider '{provider_name}' 的超时设置: {e}") if success and response: # 直接返回原始响应,后续使用统一的 JSON 解析工具 From fe48b8cc71512c224712401062653a0b5dda4302 Mon Sep 17 00:00:00 2001 From: Windpicker-owo <3431391539@qq.com> Date: Wed, 3 Dec 2025 13:33:43 +0800 Subject: [PATCH 6/9] =?UTF-8?q?feat(prompt):=20=E6=B7=BB=E5=8A=A0=E5=AE=89?= =?UTF-8?q?=E5=85=A8=E4=BA=92=E5=8A=A8=E5=87=86=E5=88=99=E5=9D=97=E4=BB=A5?= =?UTF-8?q?=E5=A2=9E=E5=BC=BA=E7=94=A8=E6=88=B7=E4=BA=A4=E4=BA=92=E5=AE=89?= =?UTF-8?q?=E5=85=A8=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chat/message_receive/message_handler.py | 4 +-- .../kokoro_flow_chatter/prompt/builder.py | 30 +++++++++++++++++++ .../kokoro_flow_chatter/prompt/prompts.py | 6 ++++ 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/src/chat/message_receive/message_handler.py b/src/chat/message_receive/message_handler.py index 46c86bcee..85ebdbf17 100644 --- a/src/chat/message_receive/message_handler.py +++ b/src/chat/message_receive/message_handler.py @@ -470,8 +470,8 @@ class MessageHandler: # 过滤检查 raw_text = message.display_message or message.processed_plain_text or "" - if _check_ban_words(processed_text, chat, user_info) or _check_ban_regex( - raw_text, chat, user_info + if _check_ban_words(processed_text, chat, message.user_info) or _check_ban_regex( + raw_text, chat, message.user_info ): return None diff --git a/src/plugins/built_in/kokoro_flow_chatter/prompt/builder.py b/src/plugins/built_in/kokoro_flow_chatter/prompt/builder.py index 6a7bb4fd1..ce0e10480 100644 --- a/src/plugins/built_in/kokoro_flow_chatter/prompt/builder.py +++ b/src/plugins/built_in/kokoro_flow_chatter/prompt/builder.py @@ -69,6 +69,9 @@ class PromptBuilder: # 1. 构建人设块 persona_block = self._build_persona_block() + # 1.5. 构建安全互动准则块 + safety_guidelines_block = self._build_safety_guidelines_block() + # 2. 使用 context_builder 获取关系、记忆、表达习惯等 context_data = await self._build_context_data(user_name, chat_stream, user_id) relation_block = context_data.get("relation_info", f"你与 {user_name} 还不太熟悉,这是早期的交流阶段。") @@ -97,6 +100,7 @@ class PromptBuilder: PROMPT_NAMES["main"], user_name=user_name, persona_block=persona_block, + safety_guidelines_block=safety_guidelines_block, relation_block=relation_block, memory_block=memory_block or "(暂无相关记忆)", expression_habits=expression_habits or "(根据自然对话风格回复即可)", @@ -140,6 +144,9 @@ class PromptBuilder: # 1. 构建人设块 persona_block = self._build_persona_block() + # 1.5. 构建安全互动准则块 + safety_guidelines_block = self._build_safety_guidelines_block() + # 2. 使用 context_builder 获取关系、记忆、表达习惯等 context_data = await self._build_context_data(user_name, chat_stream, user_id) relation_block = context_data.get("relation_info", f"你与 {user_name} 还不太熟悉,这是早期的交流阶段。") @@ -167,6 +174,7 @@ class PromptBuilder: PROMPT_NAMES["replyer"], user_name=user_name, persona_block=persona_block, + safety_guidelines_block=safety_guidelines_block, relation_block=relation_block, memory_block=memory_block or "(暂无相关记忆)", activity_stream=activity_stream or "(这是你们第一次聊天)", @@ -198,6 +206,24 @@ class PromptBuilder: return "\n\n".join(parts) if parts else "你是一个温暖、真诚的人。" + def _build_safety_guidelines_block(self) -> str: + """ + 构建安全互动准则块 + + 从配置中读取 safety_guidelines,构建成提示词格式 + """ + if global_config is None: + return "" + + safety_guidelines = global_config.personality.safety_guidelines + if not safety_guidelines: + return "" + + guidelines_text = "\n".join(f"{i + 1}. {line}" for i, line in enumerate(safety_guidelines)) + return f"""在任何情况下,你都必须遵守以下由你的设定者为你定义的原则: +{guidelines_text} +如果遇到违反上述原则的请求,请在保持你核心人设的同时,以合适的方式进行回应。""" + def _build_combined_expression_block(self, learned_habits: str) -> str: """ 构建合并后的表达习惯块 @@ -881,6 +907,9 @@ class PromptBuilder: # 1. 构建人设块 persona_block = self._build_persona_block() + # 1.5. 构建安全互动准则块 + safety_guidelines_block = self._build_safety_guidelines_block() + # 2. 使用 context_builder 获取关系、记忆、表达习惯等 context_data = await self._build_context_data(user_name, chat_stream, user_id) relation_block = context_data.get("relation_info", f"你与 {user_name} 还不太熟悉,这是早期的交流阶段。") @@ -909,6 +938,7 @@ class PromptBuilder: PROMPT_NAMES["main"], user_name=user_name, persona_block=persona_block, + safety_guidelines_block=safety_guidelines_block, relation_block=relation_block, memory_block=memory_block or "(暂无相关记忆)", expression_habits=expression_habits or "(根据自然对话风格回复即可)", diff --git a/src/plugins/built_in/kokoro_flow_chatter/prompt/prompts.py b/src/plugins/built_in/kokoro_flow_chatter/prompt/prompts.py index 9f5956415..6ac97718c 100644 --- a/src/plugins/built_in/kokoro_flow_chatter/prompt/prompts.py +++ b/src/plugins/built_in/kokoro_flow_chatter/prompt/prompts.py @@ -17,6 +17,9 @@ kfc_MAIN_PROMPT = Prompt( ## 人设 {persona_block} +## 安全互动准则 +{safety_guidelines_block} + ## 你与 {user_name} 的关系 {relation_block} @@ -272,6 +275,9 @@ kfc_REPLYER_PROMPT = Prompt( ## 人设 {persona_block} +## 安全互动准则 +{safety_guidelines_block} + ## 你与 {user_name} 的关系 {relation_block} From 6233e27d4637bee47564b2d0a156387b8abbb1e0 Mon Sep 17 00:00:00 2001 From: Windpicker-owo <3431391539@qq.com> Date: Wed, 3 Dec 2025 13:48:15 +0800 Subject: [PATCH 7/9] =?UTF-8?q?fix(message=5Fhandler):=20=E4=BF=AE?= =?UTF-8?q?=E5=A4=8D=E5=85=A8=E5=B1=80=E5=B0=81=E7=A6=81=E7=94=A8=E6=88=B7?= =?UTF-8?q?=E5=88=97=E8=A1=A8=E7=9A=84=E7=94=A8=E6=88=B7ID=E5=A4=84?= =?UTF-8?q?=E7=90=86=E6=96=B9=E5=BC=8F=EF=BC=8C=E7=A1=AE=E4=BF=9DID?= =?UTF-8?q?=E4=B8=BA=E5=AD=97=E7=AC=A6=E4=B8=B2=E6=A0=BC=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../napcat_adapter/src/handlers/to_core/message_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/built_in/napcat_adapter/src/handlers/to_core/message_handler.py b/src/plugins/built_in/napcat_adapter/src/handlers/to_core/message_handler.py index b0188a8c3..3babf85e6 100644 --- a/src/plugins/built_in/napcat_adapter/src/handlers/to_core/message_handler.py +++ b/src/plugins/built_in/napcat_adapter/src/handlers/to_core/message_handler.py @@ -60,7 +60,7 @@ class MessageHandler: user_id = str(sender_info.get("user_id", "")) # 检查全局封禁用户列表 - ban_user_ids = features_config.get("ban_user_id", []) + ban_user_ids = [str(item) for item in features_config.get("ban_user_id", [])] if user_id in ban_user_ids: logger.debug(f"用户 {user_id} 在全局封禁列表中,消息被过滤") return False From a36e2fdf92caabec37aae8efdb0ab25cc306590b Mon Sep 17 00:00:00 2001 From: Windpicker-owo <3431391539@qq.com> Date: Wed, 3 Dec 2025 13:55:37 +0800 Subject: [PATCH 8/9] =?UTF-8?q?feat(set=5Femoji=5Flike):=20=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E7=BE=A4=E8=81=8A=E6=A3=80=E6=9F=A5=EF=BC=8C=E7=A1=AE?= =?UTF-8?q?=E4=BF=9D=E8=A1=A8=E6=83=85=E5=9B=9E=E5=BA=94=E5=8A=A8=E4=BD=9C?= =?UTF-8?q?=E4=BB=85=E5=9C=A8=E7=BE=A4=E8=81=8A=E4=B8=AD=E6=9C=89=E6=95=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/built_in/napcat_adapter/plugin.py | 1 + src/plugins/built_in/social_toolkit_plugin/plugin.py | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/src/plugins/built_in/napcat_adapter/plugin.py b/src/plugins/built_in/napcat_adapter/plugin.py index 874d7a6d1..191437abf 100644 --- a/src/plugins/built_in/napcat_adapter/plugin.py +++ b/src/plugins/built_in/napcat_adapter/plugin.py @@ -176,6 +176,7 @@ class NapcatAdapter(BaseAdapter): # 消息事件 if post_type == "message": return await self.message_handler.handle_raw_message(raw) # type: ignore[return-value] + # 通知事件 elif post_type == "notice": return await self.notice_handler.handle_notice(raw) # type: ignore[return-value] diff --git a/src/plugins/built_in/social_toolkit_plugin/plugin.py b/src/plugins/built_in/social_toolkit_plugin/plugin.py index 31fe45caa..8796189a2 100644 --- a/src/plugins/built_in/social_toolkit_plugin/plugin.py +++ b/src/plugins/built_in/social_toolkit_plugin/plugin.py @@ -244,6 +244,14 @@ class SetEmojiLikeAction(BaseAction): async def execute(self) -> tuple[bool, str]: """执行设置表情回应的动作""" + # 检查是否在群聊中,该动作仅在群聊中有效 + if not self.is_group: + logger.warning("set_emoji_like 动作仅在群聊中有效,当前为私聊场景") + await self.store_action_info( + action_prompt_display="贴表情失败: 该功能仅在群聊中可用", action_done=False + ) + return False, "该功能仅在群聊中可用" + message_id = None set_like = self.action_data.get("set", True) From 55919f90db9c45400f4017f5367adfc888ea2520 Mon Sep 17 00:00:00 2001 From: Windpicker-owo <3431391539@qq.com> Date: Wed, 3 Dec 2025 14:30:40 +0800 Subject: [PATCH 9/9] =?UTF-8?q?feat(context=5Fbuilder):=20=E6=9B=B4?= =?UTF-8?q?=E6=96=B0=E8=AE=B0=E5=BF=86=E5=9D=97=E6=9E=84=E5=BB=BA=E9=80=BB?= =?UTF-8?q?=E8=BE=91=EF=BC=8C=E6=B7=BB=E5=8A=A0=E6=9F=A5=E8=AF=A2=E6=96=87?= =?UTF-8?q?=E6=9C=AC=E8=8E=B7=E5=8F=96=E7=AD=96=E7=95=A5=E4=BB=A5=E6=8F=90?= =?UTF-8?q?=E5=8D=87=E8=AE=B0=E5=BF=86=E6=A3=80=E7=B4=A2=E6=95=88=E6=9E=9C?= =?UTF-8?q?=20feat(prompt=5Fbuilder):=20=E6=89=A9=E5=B1=95=E4=B8=8A?= =?UTF-8?q?=E4=B8=8B=E6=96=87=E6=95=B0=E6=8D=AE=E6=9E=84=E5=BB=BA=EF=BC=8C?= =?UTF-8?q?=E6=94=AF=E6=8C=81=E4=BC=9A=E8=AF=9D=E5=92=8C=E6=83=85=E5=86=B5?= =?UTF-8?q?=E7=B1=BB=E5=9E=8B=E5=8F=82=E6=95=B0=E4=BB=A5=E4=BC=98=E5=8C=96?= =?UTF-8?q?=E6=8F=90=E7=A4=BA=E8=AF=8D=E7=94=9F=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../built_in/kokoro_flow_chatter/chatter.py | 4 + .../kokoro_flow_chatter/context_builder.py | 19 ++- .../kokoro_flow_chatter/prompt/builder.py | 151 ++++++++++++++++-- .../kokoro_flow_chatter/prompt/prompts.py | 8 +- 4 files changed, 168 insertions(+), 14 deletions(-) diff --git a/src/plugins/built_in/kokoro_flow_chatter/chatter.py b/src/plugins/built_in/kokoro_flow_chatter/chatter.py index 08b494133..5b558c75c 100644 --- a/src/plugins/built_in/kokoro_flow_chatter/chatter.py +++ b/src/plugins/built_in/kokoro_flow_chatter/chatter.py @@ -325,6 +325,10 @@ class KokoroFlowChatter(BaseChatter): """ if session.status == SessionStatus.WAITING: # 之前在等待 + # 如果 max_wait_seconds <= 0,说明不是有效的等待状态,视为新消息 + if session.waiting_config.max_wait_seconds <= 0: + return "new_message" + if session.waiting_config.is_timeout(): # 超时了才收到回复 return "reply_late" diff --git a/src/plugins/built_in/kokoro_flow_chatter/context_builder.py b/src/plugins/built_in/kokoro_flow_chatter/context_builder.py index a83a2bf22..c9adf318a 100644 --- a/src/plugins/built_in/kokoro_flow_chatter/context_builder.py +++ b/src/plugins/built_in/kokoro_flow_chatter/context_builder.py @@ -177,7 +177,12 @@ class KFCContextBuilder: return f"你与{sender_name}是普通朋友关系。" async def _build_memory_block(self, chat_history: str, target_message: str) -> str: - """构建记忆块(使用三层记忆系统)""" + """构建记忆块(使用三层记忆系统) + + Args: + chat_history: 聊天历史文本 + target_message: 目标消息/查询文本。如果为空,将使用 chat_history 的前 200 字符作为查询 + """ config = _get_config() if not (config.memory and config.memory.enable): @@ -192,8 +197,18 @@ class KFCContextBuilder: logger.debug("[三层记忆] 管理器未初始化") return "" + # 如果 target_message 为空,使用 chat_history 的前 200 字符作为查询 + query_text = target_message.strip() if target_message else "" + if not query_text and chat_history: + query_text = chat_history[:200].strip() + logger.debug(f"[三层记忆] target_message 为空,使用 chat_history 前 200 字符作为查询") + + if not query_text: + logger.debug("[三层记忆] 没有可用的查询文本,跳过记忆搜索") + return "" + search_result = await unified_manager.search_memories( - query_text=target_message, + query_text=query_text, use_judge=True, recent_chat_history=chat_history, ) diff --git a/src/plugins/built_in/kokoro_flow_chatter/prompt/builder.py b/src/plugins/built_in/kokoro_flow_chatter/prompt/builder.py index ce0e10480..cdd371e63 100644 --- a/src/plugins/built_in/kokoro_flow_chatter/prompt/builder.py +++ b/src/plugins/built_in/kokoro_flow_chatter/prompt/builder.py @@ -73,7 +73,10 @@ class PromptBuilder: safety_guidelines_block = self._build_safety_guidelines_block() # 2. 使用 context_builder 获取关系、记忆、表达习惯等 - context_data = await self._build_context_data(user_name, chat_stream, user_id) + context_data = await self._build_context_data( + user_name, chat_stream, user_id, + session=session, situation_type=situation_type + ) relation_block = context_data.get("relation_info", f"你与 {user_name} 还不太熟悉,这是早期的交流阶段。") memory_block = context_data.get("memory_block", "") expression_habits = self._build_combined_expression_block(context_data.get("expression_habits", "")) @@ -148,7 +151,10 @@ class PromptBuilder: safety_guidelines_block = self._build_safety_guidelines_block() # 2. 使用 context_builder 获取关系、记忆、表达习惯等 - context_data = await self._build_context_data(user_name, chat_stream, user_id) + context_data = await self._build_context_data( + user_name, chat_stream, user_id, + session=session, situation_type=situation_type + ) relation_block = context_data.get("relation_info", f"你与 {user_name} 还不太熟悉,这是早期的交流阶段。") memory_block = context_data.get("memory_block", "") expression_habits = self._build_combined_expression_block(context_data.get("expression_habits", "")) @@ -259,11 +265,20 @@ class PromptBuilder: user_name: str, chat_stream: Optional["ChatStream"], user_id: Optional[str] = None, + session: Optional[KokoroSession] = None, + situation_type: str = "new_message", ) -> dict[str, str]: """ 使用 KFCContextBuilder 构建完整的上下文数据 包括:关系信息、记忆、表达习惯等 + + Args: + user_name: 用户名称 + chat_stream: 聊天流对象 + user_id: 用户ID + session: KokoroSession 会话对象(用于超时/主动思考场景) + situation_type: 情况类型(用于判断记忆搜索策略) """ if not chat_stream: return { @@ -280,12 +295,13 @@ class PromptBuilder: builder = self._context_builder(chat_stream) - # 获取最近的消息作为 target_message(用于记忆检索) - target_message = "" - if chat_stream.context: - unread = chat_stream.context.get_unread_messages() - if unread: - target_message = unread[-1].processed_plain_text or unread[-1].display_message or "" + # 获取用于记忆检索的查询文本 + target_message = await self._get_memory_search_query( + chat_stream=chat_stream, + session=session, + situation_type=situation_type, + user_name=user_name, + ) context_data = await builder.build_all_context( sender_name=user_name, @@ -304,6 +320,113 @@ class PromptBuilder: "expression_habits": "", } + async def _get_memory_search_query( + self, + chat_stream: Optional["ChatStream"], + session: Optional[KokoroSession], + situation_type: str, + user_name: str, + ) -> str: + """ + 根据场景类型获取合适的记忆搜索查询文本 + + 策略: + 1. 优先使用未读消息(new_message/reply_in_time/reply_late) + 2. 如果没有未读消息(timeout/proactive),使用最近的历史消息 + 3. 如果历史消息也为空,从 session 的 mental_log 中提取 + 4. 最后回退到用户名作为查询 + + Args: + chat_stream: 聊天流对象 + session: KokoroSession 会话对象 + situation_type: 情况类型 + user_name: 用户名称 + + Returns: + 用于记忆搜索的查询文本 + """ + target_message = "" + + # 策略1: 优先从未读消息获取(适用于 new_message/reply_in_time/reply_late) + if chat_stream and chat_stream.context: + unread = chat_stream.context.get_unread_messages() + if unread: + target_message = unread[-1].processed_plain_text or unread[-1].display_message or "" + if target_message: + logger.debug(f"[记忆搜索] 使用未读消息作为查询: {target_message[:50]}...") + return target_message + + # 策略2: 从最近的历史消息获取(适用于 timeout/proactive) + if chat_stream and chat_stream.context: + history_messages = chat_stream.context.history_messages + if history_messages: + # 获取最近的几条非通知消息,组合成查询 + recent_texts = [] + for msg in reversed(history_messages[-5:]): + content = getattr(msg, "processed_plain_text", "") or getattr(msg, "display_message", "") + if content and not getattr(msg, "is_notify", False): + recent_texts.append(content) + if len(recent_texts) >= 3: + break + + if recent_texts: + target_message = " ".join(reversed(recent_texts)) + logger.debug(f"[记忆搜索] 使用历史消息作为查询 (situation={situation_type}): {target_message[:80]}...") + return target_message + + # 策略3: 从 session 的 mental_log 中提取(超时/主动思考场景的最后手段) + if session and situation_type in ("timeout", "proactive"): + entries = session.get_recent_entries(limit=10) + recent_texts = [] + + for entry in reversed(entries): + # 从用户消息中提取 + if entry.event_type == EventType.USER_MESSAGE and entry.content: + recent_texts.append(entry.content) + # 从 bot 的预期反应中提取(可能包含相关话题) + elif entry.event_type == EventType.BOT_PLANNING and entry.expected_reaction: + recent_texts.append(entry.expected_reaction) + + if len(recent_texts) >= 3: + break + + if recent_texts: + target_message = " ".join(reversed(recent_texts)) + logger.debug(f"[记忆搜索] 使用 mental_log 作为查询 (situation={situation_type}): {target_message[:80]}...") + return target_message + + # 策略4: 最后回退 - 使用用户名 + 场景描述 + if situation_type == "timeout": + target_message = f"与 {user_name} 的对话 等待回复" + elif situation_type == "proactive": + target_message = f"与 {user_name} 的对话 主动发起聊天" + else: + target_message = f"与 {user_name} 的对话" + + logger.debug(f"[记忆搜索] 使用回退查询 (situation={situation_type}): {target_message}") + return target_message + + def _get_latest_user_message(self, session: Optional[KokoroSession]) -> str: + """ + 获取最新的用户消息内容 + + Args: + session: KokoroSession 会话对象 + + Returns: + 最新用户消息的内容,如果没有则返回提示文本 + """ + if not session: + return "(未知消息)" + + # 从 mental_log 中获取最新的用户消息 + entries = session.get_recent_entries(limit=10) + for entry in reversed(entries): + if entry.event_type == EventType.USER_MESSAGE and entry.content: + return entry.content + + return "(消息内容不可用)" + async def _build_chat_history_block( self, chat_stream: Optional["ChatStream"], @@ -525,32 +648,39 @@ class PromptBuilder: situation_type = "new_message" if situation_type == "new_message": + # 获取最新消息内容 + latest_message = self._get_latest_user_message(session) return await global_prompt_manager.format_prompt( PROMPT_NAMES["situation_new_message"], current_time=current_time, user_name=user_name, + latest_message=latest_message, ) elif situation_type == "reply_in_time": elapsed = session.waiting_config.get_elapsed_seconds() max_wait = session.waiting_config.max_wait_seconds + latest_message = self._get_latest_user_message(session) return await global_prompt_manager.format_prompt( PROMPT_NAMES["situation_reply_in_time"], current_time=current_time, user_name=user_name, elapsed_minutes=elapsed / 60, max_wait_minutes=max_wait / 60, + latest_message=latest_message, ) elif situation_type == "reply_late": elapsed = session.waiting_config.get_elapsed_seconds() max_wait = session.waiting_config.max_wait_seconds + latest_message = self._get_latest_user_message(session) return await global_prompt_manager.format_prompt( PROMPT_NAMES["situation_reply_late"], current_time=current_time, user_name=user_name, elapsed_minutes=elapsed / 60, max_wait_minutes=max_wait / 60, + latest_message=latest_message, ) elif situation_type == "timeout": @@ -911,7 +1041,10 @@ class PromptBuilder: safety_guidelines_block = self._build_safety_guidelines_block() # 2. 使用 context_builder 获取关系、记忆、表达习惯等 - context_data = await self._build_context_data(user_name, chat_stream, user_id) + context_data = await self._build_context_data( + user_name, chat_stream, user_id, + session=session, situation_type=situation_type + ) relation_block = context_data.get("relation_info", f"你与 {user_name} 还不太熟悉,这是早期的交流阶段。") memory_block = context_data.get("memory_block", "") expression_habits = self._build_combined_expression_block(context_data.get("expression_habits", "")) diff --git a/src/plugins/built_in/kokoro_flow_chatter/prompt/prompts.py b/src/plugins/built_in/kokoro_flow_chatter/prompt/prompts.py index 6ac97718c..d5c8c176d 100644 --- a/src/plugins/built_in/kokoro_flow_chatter/prompt/prompts.py +++ b/src/plugins/built_in/kokoro_flow_chatter/prompt/prompts.py @@ -88,7 +88,9 @@ kfc_SITUATION_NEW_MESSAGE = Prompt( name="kfc_situation_new_message", template="""现在是 {current_time}。 -{user_name} 刚刚给你发了消息。这是一次新的对话发起(不是对你之前消息的回复)。 +{user_name} 刚刚给你发了消息:「{latest_message}」 + +这是一次新的对话发起(不是对你之前消息的回复)。 请决定你要怎么回应。你可以: - 发送文字消息回复 @@ -103,7 +105,7 @@ kfc_SITUATION_REPLY_IN_TIME = Prompt( 你之前发了消息后一直在等 {user_name} 的回复。 等了大约 {elapsed_minutes:.1f} 分钟(你原本打算最多等 {max_wait_minutes:.1f} 分钟)。 -现在 {user_name} 回复了! +现在 {user_name} 回复了:「{latest_message}」 请决定你接下来要怎么回应。""", ) @@ -114,7 +116,7 @@ kfc_SITUATION_REPLY_LATE = Prompt( 你之前发了消息后在等 {user_name} 的回复。 你原本打算最多等 {max_wait_minutes:.1f} 分钟,但实际等了 {elapsed_minutes:.1f} 分钟才收到回复。 -虽然有点迟,但 {user_name} 终于回复了。 +虽然有点迟,但 {user_name} 终于回复了:「{latest_message}」 请决定你接下来要怎么回应。(可以选择轻轻抱怨一下迟到,也可以装作没在意)""", )