ruff
This commit is contained in:
@@ -680,9 +680,9 @@ class EmojiManager:
|
||||
try:
|
||||
# 🔧 使用 QueryBuilder 以启用数据库缓存
|
||||
from src.common.database.api.query import QueryBuilder
|
||||
|
||||
|
||||
logger.debug("[数据库] 开始加载所有表情包记录 ...")
|
||||
|
||||
|
||||
emoji_instances = await QueryBuilder(Emoji).all()
|
||||
emoji_objects, load_errors = _to_emoji_objects(emoji_instances)
|
||||
|
||||
@@ -802,7 +802,7 @@ class EmojiManager:
|
||||
# 如果内存中没有,从数据库查找(使用 QueryBuilder 启用数据库缓存)
|
||||
try:
|
||||
from src.common.database.api.query import QueryBuilder
|
||||
|
||||
|
||||
emoji_record = await QueryBuilder(Emoji).filter(emoji_hash=emoji_hash).first()
|
||||
if emoji_record and emoji_record.description:
|
||||
logger.info(f"[缓存命中] 从数据库获取表情包描述: {emoji_record.description[:50]}...")
|
||||
@@ -966,7 +966,7 @@ class EmojiManager:
|
||||
existing_description = None
|
||||
try:
|
||||
from src.common.database.api.query import QueryBuilder
|
||||
|
||||
|
||||
existing_image = await QueryBuilder(Images).filter(emoji_hash=image_hash, type="emoji").first()
|
||||
if existing_image and existing_image.description:
|
||||
existing_description = existing_image.description
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
@@ -135,20 +134,20 @@ class ExpressionLearner:
|
||||
async def cleanup_expired_expressions(self, expiration_days: int | None = None) -> int:
|
||||
"""
|
||||
清理过期的表达方式
|
||||
|
||||
|
||||
Args:
|
||||
expiration_days: 过期天数,超过此天数未激活的表达方式将被删除(不指定则从配置读取)
|
||||
|
||||
|
||||
Returns:
|
||||
int: 删除的表达方式数量
|
||||
"""
|
||||
# 从配置读取过期天数
|
||||
if expiration_days is None:
|
||||
expiration_days = global_config.expression.expiration_days
|
||||
|
||||
|
||||
current_time = time.time()
|
||||
expiration_threshold = current_time - (expiration_days * 24 * 3600)
|
||||
|
||||
|
||||
try:
|
||||
deleted_count = 0
|
||||
async with get_db_session() as session:
|
||||
@@ -160,15 +159,15 @@ class ExpressionLearner:
|
||||
)
|
||||
)
|
||||
expired_expressions = list(query.scalars())
|
||||
|
||||
|
||||
if expired_expressions:
|
||||
for expr in expired_expressions:
|
||||
await session.delete(expr)
|
||||
deleted_count += 1
|
||||
|
||||
|
||||
await session.commit()
|
||||
logger.info(f"清理了 {deleted_count} 个过期表达方式(超过 {expiration_days} 天未使用)")
|
||||
|
||||
|
||||
# 清除缓存
|
||||
from src.common.database.optimization.cache_manager import get_cache
|
||||
from src.common.database.utils.decorators import generate_cache_key
|
||||
@@ -176,7 +175,7 @@ class ExpressionLearner:
|
||||
await cache.delete(generate_cache_key("chat_expressions", self.chat_id))
|
||||
else:
|
||||
logger.debug(f"没有发现过期的表达方式(阈值:{expiration_days} 天)")
|
||||
|
||||
|
||||
return deleted_count
|
||||
except Exception as e:
|
||||
logger.error(f"清理过期表达方式失败: {e}")
|
||||
@@ -460,7 +459,7 @@ class ExpressionLearner:
|
||||
)
|
||||
)
|
||||
same_situation_expr = query_same_situation.scalar()
|
||||
|
||||
|
||||
# 情况2:相同 chat_id + type + style(相同表达,不同情景)
|
||||
query_same_style = await session.execute(
|
||||
select(Expression).where(
|
||||
@@ -470,7 +469,7 @@ class ExpressionLearner:
|
||||
)
|
||||
)
|
||||
same_style_expr = query_same_style.scalar()
|
||||
|
||||
|
||||
# 情况3:完全相同(相同情景+相同表达)
|
||||
query_exact_match = await session.execute(
|
||||
select(Expression).where(
|
||||
@@ -481,7 +480,7 @@ class ExpressionLearner:
|
||||
)
|
||||
)
|
||||
exact_match_expr = query_exact_match.scalar()
|
||||
|
||||
|
||||
# 优先处理完全匹配的情况
|
||||
if exact_match_expr:
|
||||
# 完全相同:增加count,更新时间
|
||||
|
||||
@@ -72,21 +72,21 @@ class ExpressorModel:
|
||||
是否删除成功
|
||||
"""
|
||||
removed = False
|
||||
|
||||
|
||||
if cid in self._candidates:
|
||||
del self._candidates[cid]
|
||||
removed = True
|
||||
|
||||
|
||||
if cid in self._situations:
|
||||
del self._situations[cid]
|
||||
|
||||
|
||||
# 从nb模型中删除
|
||||
if cid in self.nb.cls_counts:
|
||||
del self.nb.cls_counts[cid]
|
||||
|
||||
|
||||
if cid in self.nb.token_counts:
|
||||
del self.nb.token_counts[cid]
|
||||
|
||||
|
||||
return removed
|
||||
|
||||
def predict(self, text: str, k: int | None = None) -> tuple[str | None, dict[str, float]]:
|
||||
|
||||
@@ -72,7 +72,7 @@ class StyleLearner:
|
||||
# 检查是否需要清理
|
||||
current_count = len(self.style_to_id)
|
||||
cleanup_trigger = int(self.max_styles * self.cleanup_threshold)
|
||||
|
||||
|
||||
if current_count >= cleanup_trigger:
|
||||
if current_count >= self.max_styles:
|
||||
# 已经达到最大限制,必须清理
|
||||
@@ -109,7 +109,7 @@ class StyleLearner:
|
||||
def _cleanup_styles(self):
|
||||
"""
|
||||
清理低价值的风格,为新风格腾出空间
|
||||
|
||||
|
||||
清理策略:
|
||||
1. 综合考虑使用次数和最后使用时间
|
||||
2. 删除得分最低的风格
|
||||
@@ -118,34 +118,34 @@ class StyleLearner:
|
||||
try:
|
||||
current_time = time.time()
|
||||
cleanup_count = max(1, int(len(self.style_to_id) * self.cleanup_ratio))
|
||||
|
||||
|
||||
# 计算每个风格的价值分数
|
||||
style_scores = []
|
||||
for style_id in self.style_to_id.values():
|
||||
# 使用次数
|
||||
usage_count = self.learning_stats["style_counts"].get(style_id, 0)
|
||||
|
||||
|
||||
# 最后使用时间(越近越好)
|
||||
last_used = self.learning_stats["style_last_used"].get(style_id, 0)
|
||||
time_since_used = current_time - last_used if last_used > 0 else float('inf')
|
||||
|
||||
time_since_used = current_time - last_used if last_used > 0 else float("inf")
|
||||
|
||||
# 综合分数:使用次数越多越好,距离上次使用时间越短越好
|
||||
# 使用对数来平滑使用次数的影响
|
||||
import math
|
||||
usage_score = math.log1p(usage_count) # log(1 + count)
|
||||
|
||||
|
||||
# 时间分数:转换为天数,使用指数衰减
|
||||
days_unused = time_since_used / 86400 # 转换为天
|
||||
time_score = math.exp(-days_unused / 30) # 30天衰减因子
|
||||
|
||||
|
||||
# 综合分数:80%使用频率 + 20%时间新鲜度
|
||||
total_score = 0.8 * usage_score + 0.2 * time_score
|
||||
|
||||
|
||||
style_scores.append((style_id, total_score, usage_count, days_unused))
|
||||
|
||||
|
||||
# 按分数排序,分数低的先删除
|
||||
style_scores.sort(key=lambda x: x[1])
|
||||
|
||||
|
||||
# 删除分数最低的风格
|
||||
deleted_styles = []
|
||||
for style_id, score, usage, days in style_scores[:cleanup_count]:
|
||||
@@ -156,27 +156,27 @@ class StyleLearner:
|
||||
del self.id_to_style[style_id]
|
||||
if style_id in self.id_to_situation:
|
||||
del self.id_to_situation[style_id]
|
||||
|
||||
|
||||
# 从统计中删除
|
||||
if style_id in self.learning_stats["style_counts"]:
|
||||
del self.learning_stats["style_counts"][style_id]
|
||||
if style_id in self.learning_stats["style_last_used"]:
|
||||
del self.learning_stats["style_last_used"][style_id]
|
||||
|
||||
|
||||
# 从expressor模型中删除
|
||||
self.expressor.remove_candidate(style_id)
|
||||
|
||||
|
||||
deleted_styles.append((style_text[:30], usage, f"{days:.1f}天"))
|
||||
|
||||
|
||||
logger.info(
|
||||
f"风格清理完成: 删除了 {len(deleted_styles)}/{len(style_scores)} 个风格,"
|
||||
f"剩余 {len(self.style_to_id)} 个风格"
|
||||
)
|
||||
|
||||
|
||||
# 记录前5个被删除的风格(用于调试)
|
||||
if deleted_styles:
|
||||
logger.debug(f"被删除的风格样例(前5): {deleted_styles[:5]}")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"清理风格失败: {e}", exc_info=True)
|
||||
|
||||
@@ -303,10 +303,10 @@ class StyleLearner:
|
||||
def cleanup_old_styles(self, ratio: float | None = None) -> int:
|
||||
"""
|
||||
手动清理旧风格
|
||||
|
||||
|
||||
Args:
|
||||
ratio: 清理比例,如果为None则使用默认的cleanup_ratio
|
||||
|
||||
|
||||
Returns:
|
||||
清理的风格数量
|
||||
"""
|
||||
@@ -318,7 +318,7 @@ class StyleLearner:
|
||||
self.cleanup_ratio = old_cleanup_ratio
|
||||
else:
|
||||
self._cleanup_styles()
|
||||
|
||||
|
||||
new_count = len(self.style_to_id)
|
||||
cleaned = old_count - new_count
|
||||
logger.info(f"手动清理完成: chat_id={self.chat_id}, 清理了 {cleaned} 个风格")
|
||||
@@ -357,11 +357,11 @@ class StyleLearner:
|
||||
import pickle
|
||||
|
||||
meta_path = os.path.join(save_dir, "meta.pkl")
|
||||
|
||||
|
||||
# 确保 learning_stats 包含所有必要字段
|
||||
if "style_last_used" not in self.learning_stats:
|
||||
self.learning_stats["style_last_used"] = {}
|
||||
|
||||
|
||||
meta_data = {
|
||||
"style_to_id": self.style_to_id,
|
||||
"id_to_style": self.id_to_style,
|
||||
@@ -416,7 +416,7 @@ class StyleLearner:
|
||||
self.id_to_situation = meta_data["id_to_situation"]
|
||||
self.next_style_id = meta_data["next_style_id"]
|
||||
self.learning_stats = meta_data["learning_stats"]
|
||||
|
||||
|
||||
# 确保旧数据兼容:如果没有 style_last_used 字段,添加它
|
||||
if "style_last_used" not in self.learning_stats:
|
||||
self.learning_stats["style_last_used"] = {}
|
||||
@@ -526,10 +526,10 @@ class StyleLearnerManager:
|
||||
def cleanup_all_old_styles(self, ratio: float | None = None) -> dict[str, int]:
|
||||
"""
|
||||
对所有学习器清理旧风格
|
||||
|
||||
|
||||
Args:
|
||||
ratio: 清理比例
|
||||
|
||||
|
||||
Returns:
|
||||
{chat_id: 清理数量}
|
||||
"""
|
||||
@@ -538,7 +538,7 @@ class StyleLearnerManager:
|
||||
cleaned = learner.cleanup_old_styles(ratio)
|
||||
if cleaned > 0:
|
||||
cleanup_results[chat_id] = cleaned
|
||||
|
||||
|
||||
total_cleaned = sum(cleanup_results.values())
|
||||
logger.info(f"清理所有StyleLearner完成: 总共清理了 {total_cleaned} 个风格")
|
||||
return cleanup_results
|
||||
|
||||
@@ -8,7 +8,6 @@ from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
import orjson
|
||||
from sqlalchemy import select
|
||||
|
||||
from src.common.config_helpers import resolve_embedding_dimension
|
||||
@@ -124,7 +123,7 @@ class BotInterestManager:
|
||||
tags_info = [f" - '{tag.tag_name}' (权重: {tag.weight:.2f})" for tag in loaded_interests.get_active_tags()]
|
||||
tags_str = "\n".join(tags_info)
|
||||
logger.info(f"当前兴趣标签:\n{tags_str}")
|
||||
|
||||
|
||||
# 为加载的标签生成embedding(数据库不存储embedding,启动时动态生成)
|
||||
logger.info("🧠 为加载的标签生成embedding向量...")
|
||||
await self._generate_embeddings_for_tags(loaded_interests)
|
||||
@@ -326,13 +325,13 @@ class BotInterestManager:
|
||||
raise RuntimeError("❌ Embedding客户端未初始化,无法生成embedding")
|
||||
|
||||
total_tags = len(interests.interest_tags)
|
||||
|
||||
|
||||
# 尝试从文件加载缓存
|
||||
file_cache = await self._load_embedding_cache_from_file(interests.personality_id)
|
||||
if file_cache:
|
||||
logger.info(f"📂 从文件加载 {len(file_cache)} 个embedding缓存")
|
||||
self.embedding_cache.update(file_cache)
|
||||
|
||||
|
||||
logger.info(f"🧠 开始为 {total_tags} 个兴趣标签生成embedding向量...")
|
||||
|
||||
memory_cached_count = 0
|
||||
@@ -477,14 +476,14 @@ class BotInterestManager:
|
||||
self, message_text: str, keywords: list[str] | None = None
|
||||
) -> InterestMatchResult:
|
||||
"""计算消息与机器人兴趣的匹配度(优化版 - 标签扩展策略)
|
||||
|
||||
|
||||
核心优化:将短标签扩展为完整的描述性句子,解决语义粒度不匹配问题
|
||||
|
||||
|
||||
原问题:
|
||||
- 消息: "今天天气不错" (完整句子)
|
||||
- 标签: "蹭人治愈" (2-4字短语)
|
||||
- 标签: "蹭人治愈" (2-4字短语)
|
||||
- 结果: 误匹配,因为短标签的 embedding 过于抽象
|
||||
|
||||
|
||||
解决方案:
|
||||
- 标签扩展: "蹭人治愈" -> "表达亲近、寻求安慰、撒娇的内容"
|
||||
- 现在是: 句子 vs 句子,匹配更准确
|
||||
@@ -527,18 +526,18 @@ class BotInterestManager:
|
||||
if tag.embedding:
|
||||
# 🔧 优化:获取扩展标签的 embedding(带缓存)
|
||||
expanded_embedding = await self._get_expanded_tag_embedding(tag.tag_name)
|
||||
|
||||
|
||||
if expanded_embedding:
|
||||
# 使用扩展标签的 embedding 进行匹配
|
||||
similarity = self._calculate_cosine_similarity(message_embedding, expanded_embedding)
|
||||
|
||||
|
||||
# 同时计算原始标签的相似度作为参考
|
||||
original_similarity = self._calculate_cosine_similarity(message_embedding, tag.embedding)
|
||||
|
||||
|
||||
# 混合策略:扩展标签权重更高(70%),原始标签作为补充(30%)
|
||||
# 这样可以兼顾准确性(扩展)和灵活性(原始)
|
||||
final_similarity = similarity * 0.7 + original_similarity * 0.3
|
||||
|
||||
|
||||
logger.debug(f"标签'{tag.tag_name}': 原始={original_similarity:.3f}, 扩展={similarity:.3f}, 最终={final_similarity:.3f}")
|
||||
else:
|
||||
# 如果扩展 embedding 获取失败,使用原始 embedding
|
||||
@@ -603,27 +602,27 @@ class BotInterestManager:
|
||||
logger.debug(
|
||||
f"最终结果: 总分={result.overall_score:.3f}, 置信度={result.confidence:.3f}, 匹配标签数={len(result.matched_tags)}"
|
||||
)
|
||||
|
||||
|
||||
# 如果有新生成的扩展embedding,保存到缓存文件
|
||||
if hasattr(self, '_new_expanded_embeddings_generated') and self._new_expanded_embeddings_generated:
|
||||
if hasattr(self, "_new_expanded_embeddings_generated") and self._new_expanded_embeddings_generated:
|
||||
await self._save_embedding_cache_to_file(self.current_interests.personality_id)
|
||||
self._new_expanded_embeddings_generated = False
|
||||
logger.debug("💾 已保存新生成的扩展embedding到缓存文件")
|
||||
|
||||
|
||||
return result
|
||||
|
||||
async def _get_expanded_tag_embedding(self, tag_name: str) -> list[float] | None:
|
||||
"""获取扩展标签的 embedding(带缓存)
|
||||
|
||||
|
||||
优先使用缓存,如果没有则生成并缓存
|
||||
"""
|
||||
# 检查缓存
|
||||
if tag_name in self.expanded_embedding_cache:
|
||||
return self.expanded_embedding_cache[tag_name]
|
||||
|
||||
|
||||
# 扩展标签
|
||||
expanded_tag = self._expand_tag_for_matching(tag_name)
|
||||
|
||||
|
||||
# 生成 embedding
|
||||
try:
|
||||
embedding = await self._get_embedding(expanded_tag)
|
||||
@@ -636,19 +635,19 @@ class BotInterestManager:
|
||||
return embedding
|
||||
except Exception as e:
|
||||
logger.warning(f"为标签'{tag_name}'生成扩展embedding失败: {e}")
|
||||
|
||||
|
||||
return None
|
||||
|
||||
def _expand_tag_for_matching(self, tag_name: str) -> str:
|
||||
"""将短标签扩展为完整的描述性句子
|
||||
|
||||
|
||||
这是解决"标签太短导致误匹配"的核心方法
|
||||
|
||||
|
||||
策略:
|
||||
1. 优先使用 LLM 生成的 expanded 字段(最准确)
|
||||
2. 如果没有,使用基于规则的回退方案
|
||||
3. 最后使用通用模板
|
||||
|
||||
|
||||
示例:
|
||||
- "Python" + expanded -> "讨论Python编程语言、写Python代码、Python脚本开发、Python技术问题"
|
||||
- "蹭人治愈" + expanded -> "想要获得安慰、寻求温暖关怀、撒娇卖萌、表达亲昵、求抱抱求陪伴的对话"
|
||||
@@ -656,7 +655,7 @@ class BotInterestManager:
|
||||
# 使用缓存
|
||||
if tag_name in self.expanded_tag_cache:
|
||||
return self.expanded_tag_cache[tag_name]
|
||||
|
||||
|
||||
# 🎯 优先策略:使用 LLM 生成的 expanded 字段
|
||||
if self.current_interests:
|
||||
for tag in self.current_interests.interest_tags:
|
||||
@@ -664,66 +663,66 @@ class BotInterestManager:
|
||||
logger.debug(f"✅ 使用LLM生成的扩展描述: {tag_name} -> {tag.expanded[:50]}...")
|
||||
self.expanded_tag_cache[tag_name] = tag.expanded
|
||||
return tag.expanded
|
||||
|
||||
|
||||
# 🔧 回退策略:基于规则的扩展(用于兼容旧数据或LLM未生成扩展的情况)
|
||||
logger.debug(f"⚠️ 标签'{tag_name}'没有LLM扩展描述,使用规则回退方案")
|
||||
tag_lower = tag_name.lower()
|
||||
|
||||
|
||||
# 技术编程类标签(具体化描述)
|
||||
if any(word in tag_lower for word in ['python', 'java', 'code', '代码', '编程', '脚本', '算法', '开发']):
|
||||
if 'python' in tag_lower:
|
||||
return f"讨论Python编程语言、写Python代码、Python脚本开发、Python技术问题"
|
||||
elif '算法' in tag_lower:
|
||||
return f"讨论算法题目、数据结构、编程竞赛、刷LeetCode题目、代码优化"
|
||||
elif '代码' in tag_lower or '被窝' in tag_lower:
|
||||
return f"讨论写代码、编程开发、代码实现、技术方案、编程技巧"
|
||||
if any(word in tag_lower for word in ["python", "java", "code", "代码", "编程", "脚本", "算法", "开发"]):
|
||||
if "python" in tag_lower:
|
||||
return "讨论Python编程语言、写Python代码、Python脚本开发、Python技术问题"
|
||||
elif "算法" in tag_lower:
|
||||
return "讨论算法题目、数据结构、编程竞赛、刷LeetCode题目、代码优化"
|
||||
elif "代码" in tag_lower or "被窝" in tag_lower:
|
||||
return "讨论写代码、编程开发、代码实现、技术方案、编程技巧"
|
||||
else:
|
||||
return f"讨论编程开发、软件技术、代码编写、技术实现"
|
||||
|
||||
return "讨论编程开发、软件技术、代码编写、技术实现"
|
||||
|
||||
# 情感表达类标签(具体化为真实对话场景)
|
||||
elif any(word in tag_lower for word in ['治愈', '撒娇', '安慰', '呼噜', '蹭', '卖萌']):
|
||||
return f"想要获得安慰、寻求温暖关怀、撒娇卖萌、表达亲昵、求抱抱求陪伴的对话"
|
||||
|
||||
elif any(word in tag_lower for word in ["治愈", "撒娇", "安慰", "呼噜", "蹭", "卖萌"]):
|
||||
return "想要获得安慰、寻求温暖关怀、撒娇卖萌、表达亲昵、求抱抱求陪伴的对话"
|
||||
|
||||
# 游戏娱乐类标签(具体游戏场景)
|
||||
elif any(word in tag_lower for word in ['游戏', '网游', 'mmo', '游', '玩']):
|
||||
return f"讨论网络游戏、MMO游戏、游戏玩法、组队打副本、游戏攻略心得"
|
||||
|
||||
elif any(word in tag_lower for word in ["游戏", "网游", "mmo", "游", "玩"]):
|
||||
return "讨论网络游戏、MMO游戏、游戏玩法、组队打副本、游戏攻略心得"
|
||||
|
||||
# 动漫影视类标签(具体观看行为)
|
||||
elif any(word in tag_lower for word in ['番', '动漫', '视频', 'b站', '弹幕', '追番', '云新番']):
|
||||
elif any(word in tag_lower for word in ["番", "动漫", "视频", "b站", "弹幕", "追番", "云新番"]):
|
||||
# 特别处理"云新番" - 它的意思是在网上看新动漫,不是泛泛的"新东西"
|
||||
if '云' in tag_lower or '新番' in tag_lower:
|
||||
return f"讨论正在播出的新动漫、新番剧集、动漫剧情、追番心得、动漫角色"
|
||||
if "云" in tag_lower or "新番" in tag_lower:
|
||||
return "讨论正在播出的新动漫、新番剧集、动漫剧情、追番心得、动漫角色"
|
||||
else:
|
||||
return f"讨论动漫番剧内容、B站视频、弹幕文化、追番体验"
|
||||
|
||||
return "讨论动漫番剧内容、B站视频、弹幕文化、追番体验"
|
||||
|
||||
# 社交平台类标签(具体平台行为)
|
||||
elif any(word in tag_lower for word in ['小红书', '贴吧', '论坛', '社区', '吃瓜', '八卦']):
|
||||
if '吃瓜' in tag_lower:
|
||||
return f"聊八卦爆料、吃瓜看热闹、网络热点事件、社交平台热议话题"
|
||||
elif any(word in tag_lower for word in ["小红书", "贴吧", "论坛", "社区", "吃瓜", "八卦"]):
|
||||
if "吃瓜" in tag_lower:
|
||||
return "聊八卦爆料、吃瓜看热闹、网络热点事件、社交平台热议话题"
|
||||
else:
|
||||
return f"讨论社交平台内容、网络社区话题、论坛讨论、分享生活"
|
||||
|
||||
return "讨论社交平台内容、网络社区话题、论坛讨论、分享生活"
|
||||
|
||||
# 生活日常类标签(具体萌宠场景)
|
||||
elif any(word in tag_lower for word in ['猫', '宠物', '尾巴', '耳朵', '毛绒']):
|
||||
return f"讨论猫咪宠物、晒猫分享、萌宠日常、可爱猫猫、养猫心得"
|
||||
|
||||
elif any(word in tag_lower for word in ["猫", "宠物", "尾巴", "耳朵", "毛绒"]):
|
||||
return "讨论猫咪宠物、晒猫分享、萌宠日常、可爱猫猫、养猫心得"
|
||||
|
||||
# 状态心情类标签(具体情绪状态)
|
||||
elif any(word in tag_lower for word in ['社恐', '隐身', '流浪', '深夜', '被窝']):
|
||||
if '社恐' in tag_lower:
|
||||
return f"表达社交焦虑、不想见人、想躲起来、害怕社交的心情"
|
||||
elif '深夜' in tag_lower:
|
||||
return f"深夜睡不着、熬夜、夜猫子、深夜思考人生的对话"
|
||||
elif any(word in tag_lower for word in ["社恐", "隐身", "流浪", "深夜", "被窝"]):
|
||||
if "社恐" in tag_lower:
|
||||
return "表达社交焦虑、不想见人、想躲起来、害怕社交的心情"
|
||||
elif "深夜" in tag_lower:
|
||||
return "深夜睡不着、熬夜、夜猫子、深夜思考人生的对话"
|
||||
else:
|
||||
return f"表达当前心情状态、个人感受、生活状态"
|
||||
|
||||
return "表达当前心情状态、个人感受、生活状态"
|
||||
|
||||
# 物品装备类标签(具体使用场景)
|
||||
elif any(word in tag_lower for word in ['键盘', '耳机', '装备', '设备']):
|
||||
return f"讨论键盘耳机装备、数码产品、使用体验、装备推荐评测"
|
||||
|
||||
elif any(word in tag_lower for word in ["键盘", "耳机", "装备", "设备"]):
|
||||
return "讨论键盘耳机装备、数码产品、使用体验、装备推荐评测"
|
||||
|
||||
# 互动关系类标签
|
||||
elif any(word in tag_lower for word in ['拾风', '互怼', '互动']):
|
||||
return f"聊天互动、开玩笑、友好互怼、日常对话交流"
|
||||
|
||||
elif any(word in tag_lower for word in ["拾风", "互怼", "互动"]):
|
||||
return "聊天互动、开玩笑、友好互怼、日常对话交流"
|
||||
|
||||
# 默认:尽量具体化
|
||||
else:
|
||||
return f"明确讨论{tag_name}这个特定主题的具体内容和相关话题"
|
||||
@@ -1011,56 +1010,58 @@ class BotInterestManager:
|
||||
async def _load_embedding_cache_from_file(self, personality_id: str) -> dict[str, list[float]] | None:
|
||||
"""从文件加载embedding缓存"""
|
||||
try:
|
||||
import orjson
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
import orjson
|
||||
|
||||
cache_dir = Path("data/embedding")
|
||||
cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
cache_file = cache_dir / f"{personality_id}_embeddings.json"
|
||||
|
||||
|
||||
if not cache_file.exists():
|
||||
logger.debug(f"📂 Embedding缓存文件不存在: {cache_file}")
|
||||
return None
|
||||
|
||||
|
||||
# 读取缓存文件
|
||||
with open(cache_file, "rb") as f:
|
||||
cache_data = orjson.loads(f.read())
|
||||
|
||||
|
||||
# 验证缓存版本和embedding模型
|
||||
cache_version = cache_data.get("version", 1)
|
||||
cache_embedding_model = cache_data.get("embedding_model", "")
|
||||
current_embedding_model = self.embedding_config.model_list[0] if hasattr(self.embedding_config, "model_list") else ""
|
||||
|
||||
|
||||
if cache_embedding_model != current_embedding_model:
|
||||
logger.warning(f"⚠️ Embedding模型已变更 ({cache_embedding_model} → {current_embedding_model}),忽略旧缓存")
|
||||
return None
|
||||
|
||||
|
||||
embeddings = cache_data.get("embeddings", {})
|
||||
|
||||
|
||||
# 同时加载扩展标签的embedding缓存
|
||||
expanded_embeddings = cache_data.get("expanded_embeddings", {})
|
||||
if expanded_embeddings:
|
||||
self.expanded_embedding_cache.update(expanded_embeddings)
|
||||
logger.info(f"📂 加载 {len(expanded_embeddings)} 个扩展标签embedding缓存")
|
||||
|
||||
|
||||
logger.info(f"✅ 成功从文件加载 {len(embeddings)} 个标签embedding缓存 (版本: {cache_version}, 模型: {cache_embedding_model})")
|
||||
return embeddings
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ 加载embedding缓存文件失败: {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def _save_embedding_cache_to_file(self, personality_id: str):
|
||||
"""保存embedding缓存到文件(包括扩展标签的embedding)"""
|
||||
try:
|
||||
import orjson
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import orjson
|
||||
|
||||
cache_dir = Path("data/embedding")
|
||||
cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
cache_file = cache_dir / f"{personality_id}_embeddings.json"
|
||||
|
||||
|
||||
# 准备缓存数据
|
||||
current_embedding_model = self.embedding_config.model_list[0] if hasattr(self.embedding_config, "model_list") and self.embedding_config.model_list else ""
|
||||
cache_data = {
|
||||
@@ -1071,13 +1072,13 @@ class BotInterestManager:
|
||||
"embeddings": self.embedding_cache,
|
||||
"expanded_embeddings": self.expanded_embedding_cache, # 同时保存扩展标签的embedding
|
||||
}
|
||||
|
||||
|
||||
# 写入文件
|
||||
with open(cache_file, "wb") as f:
|
||||
f.write(orjson.dumps(cache_data, option=orjson.OPT_INDENT_2))
|
||||
|
||||
|
||||
logger.debug(f"💾 已保存 {len(self.embedding_cache)} 个标签embedding和 {len(self.expanded_embedding_cache)} 个扩展embedding到缓存文件: {cache_file}")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ 保存embedding缓存文件失败: {e}")
|
||||
|
||||
|
||||
@@ -9,8 +9,8 @@ from .scheduler_dispatcher import SchedulerDispatcher, scheduler_dispatcher
|
||||
|
||||
__all__ = [
|
||||
"MessageManager",
|
||||
"SingleStreamContextManager",
|
||||
"SchedulerDispatcher",
|
||||
"SingleStreamContextManager",
|
||||
"message_manager",
|
||||
"scheduler_dispatcher",
|
||||
]
|
||||
|
||||
@@ -73,7 +73,7 @@ class SingleStreamContextManager:
|
||||
cache_enabled = global_config.chat.enable_message_cache
|
||||
use_cache_system = message_manager.is_running and cache_enabled
|
||||
if not cache_enabled:
|
||||
logger.debug(f"消息缓存系统已在配置中禁用")
|
||||
logger.debug("消息缓存系统已在配置中禁用")
|
||||
except Exception as e:
|
||||
logger.debug(f"MessageManager不可用,使用直接添加: {e}")
|
||||
use_cache_system = False
|
||||
@@ -129,13 +129,13 @@ class SingleStreamContextManager:
|
||||
await self._calculate_message_interest(message)
|
||||
self.total_messages += 1
|
||||
self.last_access_time = time.time()
|
||||
|
||||
|
||||
logger.debug(f"添加消息{message.processed_plain_text}到单流上下文: {self.stream_id}")
|
||||
return True
|
||||
|
||||
|
||||
# 不应该到达这里,但为了类型检查添加返回值
|
||||
return True
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"添加消息到单流上下文失败 {self.stream_id}: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
@@ -4,13 +4,11 @@
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import random
|
||||
import time
|
||||
from collections import defaultdict, deque
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from src.chat.chatter_manager import ChatterManager
|
||||
from src.chat.message_receive.chat_stream import ChatStream
|
||||
from src.chat.planner_actions.action_manager import ChatterActionManager
|
||||
from src.common.data_models.database_data_model import DatabaseMessages
|
||||
from src.common.data_models.message_manager_data_model import MessageManagerStats, StreamStats
|
||||
@@ -77,7 +75,7 @@ class MessageManager:
|
||||
# 启动基于 scheduler 的消息分发器
|
||||
await scheduler_dispatcher.start()
|
||||
scheduler_dispatcher.set_chatter_manager(self.chatter_manager)
|
||||
|
||||
|
||||
# 保留旧的流循环管理器(暂时)以便平滑过渡
|
||||
# TODO: 在确认新机制稳定后移除
|
||||
# await stream_loop_manager.start()
|
||||
@@ -108,7 +106,7 @@ class MessageManager:
|
||||
|
||||
# 停止基于 scheduler 的消息分发器
|
||||
await scheduler_dispatcher.stop()
|
||||
|
||||
|
||||
# 停止旧的流循环管理器(如果启用)
|
||||
# await stream_loop_manager.stop()
|
||||
|
||||
@@ -116,7 +114,7 @@ class MessageManager:
|
||||
|
||||
async def add_message(self, stream_id: str, message: DatabaseMessages):
|
||||
"""添加消息到指定聊天流
|
||||
|
||||
|
||||
新的流程:
|
||||
1. 检查 notice 消息
|
||||
2. 将消息添加到上下文(缓存)
|
||||
@@ -149,10 +147,10 @@ class MessageManager:
|
||||
if not chat_stream:
|
||||
logger.warning(f"MessageManager.add_message: 聊天流 {stream_id} 不存在")
|
||||
return
|
||||
|
||||
|
||||
# 将消息添加到上下文
|
||||
await chat_stream.context_manager.add_message(message)
|
||||
|
||||
|
||||
# 通知 scheduler_dispatcher 处理消息接收事件
|
||||
# dispatcher 会检查是否需要打断、创建或更新 schedule
|
||||
await scheduler_dispatcher.on_message_received(stream_id)
|
||||
|
||||
@@ -20,7 +20,7 @@ logger = get_logger("scheduler_dispatcher")
|
||||
|
||||
class SchedulerDispatcher:
|
||||
"""基于 scheduler 的消息分发器
|
||||
|
||||
|
||||
工作流程:
|
||||
1. 接收消息时,将消息添加到聊天流上下文
|
||||
2. 检查是否有活跃的 schedule,如果没有则创建
|
||||
@@ -32,13 +32,13 @@ class SchedulerDispatcher:
|
||||
def __init__(self):
|
||||
# 追踪每个流的 schedule_id
|
||||
self.stream_schedules: dict[str, str] = {} # stream_id -> schedule_id
|
||||
|
||||
|
||||
# 用于保护 schedule 创建/删除的锁,避免竞态条件
|
||||
self.schedule_locks: dict[str, asyncio.Lock] = {} # stream_id -> Lock
|
||||
|
||||
|
||||
# Chatter 管理器
|
||||
self.chatter_manager: ChatterManager | None = None
|
||||
|
||||
|
||||
# 统计信息
|
||||
self.stats = {
|
||||
"total_schedules_created": 0,
|
||||
@@ -48,9 +48,9 @@ class SchedulerDispatcher:
|
||||
"total_failures": 0,
|
||||
"start_time": time.time(),
|
||||
}
|
||||
|
||||
|
||||
self.is_running = False
|
||||
|
||||
|
||||
logger.info("基于 Scheduler 的消息分发器初始化完成")
|
||||
|
||||
async def start(self) -> None:
|
||||
@@ -58,7 +58,7 @@ class SchedulerDispatcher:
|
||||
if self.is_running:
|
||||
logger.warning("分发器已在运行")
|
||||
return
|
||||
|
||||
|
||||
self.is_running = True
|
||||
logger.info("基于 Scheduler 的消息分发器已启动")
|
||||
|
||||
@@ -66,9 +66,9 @@ class SchedulerDispatcher:
|
||||
"""停止分发器"""
|
||||
if not self.is_running:
|
||||
return
|
||||
|
||||
|
||||
self.is_running = False
|
||||
|
||||
|
||||
# 取消所有活跃的 schedule
|
||||
schedule_ids = list(self.stream_schedules.values())
|
||||
for schedule_id in schedule_ids:
|
||||
@@ -76,7 +76,7 @@ class SchedulerDispatcher:
|
||||
await unified_scheduler.remove_schedule(schedule_id)
|
||||
except Exception as e:
|
||||
logger.error(f"移除 schedule {schedule_id} 失败: {e}")
|
||||
|
||||
|
||||
self.stream_schedules.clear()
|
||||
logger.info("基于 Scheduler 的消息分发器已停止")
|
||||
|
||||
@@ -84,7 +84,7 @@ class SchedulerDispatcher:
|
||||
"""设置 Chatter 管理器"""
|
||||
self.chatter_manager = chatter_manager
|
||||
logger.debug(f"设置 Chatter 管理器: {chatter_manager.__class__.__name__}")
|
||||
|
||||
|
||||
def _get_schedule_lock(self, stream_id: str) -> asyncio.Lock:
|
||||
"""获取流的 schedule 锁"""
|
||||
if stream_id not in self.schedule_locks:
|
||||
@@ -93,40 +93,40 @@ class SchedulerDispatcher:
|
||||
|
||||
async def on_message_received(self, stream_id: str) -> None:
|
||||
"""消息接收时的处理逻辑
|
||||
|
||||
|
||||
Args:
|
||||
stream_id: 聊天流ID
|
||||
"""
|
||||
if not self.is_running:
|
||||
logger.warning("分发器未运行,忽略消息")
|
||||
return
|
||||
|
||||
|
||||
try:
|
||||
# 1. 获取流上下文
|
||||
context = await self._get_stream_context(stream_id)
|
||||
if not context:
|
||||
logger.warning(f"无法获取流上下文: {stream_id}")
|
||||
return
|
||||
|
||||
|
||||
# 2. 检查是否有活跃的 schedule
|
||||
has_active_schedule = stream_id in self.stream_schedules
|
||||
|
||||
|
||||
if not has_active_schedule:
|
||||
# 4. 创建新的 schedule(在锁内,避免重复创建)
|
||||
await self._create_schedule(stream_id, context)
|
||||
return
|
||||
|
||||
|
||||
# 3. 检查打断判定
|
||||
if has_active_schedule:
|
||||
should_interrupt = await self._check_interruption(stream_id, context)
|
||||
|
||||
|
||||
if should_interrupt:
|
||||
# 移除旧 schedule 并创建新的(内部有锁保护)
|
||||
await self._cancel_and_recreate_schedule(stream_id, context)
|
||||
logger.debug(f"⚡ 打断成功: 流={stream_id[:8]}..., 已重新创建 schedule")
|
||||
else:
|
||||
logger.debug(f"打断判定失败,保持原有 schedule: 流={stream_id[:8]}...")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"处理消息接收事件失败 {stream_id}: {e}", exc_info=True)
|
||||
|
||||
@@ -144,18 +144,18 @@ class SchedulerDispatcher:
|
||||
|
||||
async def _check_interruption(self, stream_id: str, context: StreamContext) -> bool:
|
||||
"""检查是否应该打断当前处理
|
||||
|
||||
|
||||
Args:
|
||||
stream_id: 流ID
|
||||
context: 流上下文
|
||||
|
||||
|
||||
Returns:
|
||||
bool: 是否应该打断
|
||||
"""
|
||||
# 检查是否启用打断
|
||||
if not global_config.chat.interruption_enabled:
|
||||
return False
|
||||
|
||||
|
||||
# 检查是否正在回复,以及是否允许在回复时打断
|
||||
if context.is_replying:
|
||||
if not global_config.chat.allow_reply_interruption:
|
||||
@@ -163,49 +163,49 @@ class SchedulerDispatcher:
|
||||
return False
|
||||
else:
|
||||
logger.debug(f"聊天流 {stream_id} 正在回复中,但配置允许回复时打断")
|
||||
|
||||
|
||||
# 只有当 Chatter 真正在处理时才检查打断
|
||||
if not context.is_chatter_processing:
|
||||
logger.debug(f"聊天流 {stream_id} Chatter 未在处理,无需打断")
|
||||
return False
|
||||
|
||||
|
||||
# 检查最后一条消息
|
||||
last_message = context.get_last_message()
|
||||
if not last_message:
|
||||
return False
|
||||
|
||||
|
||||
# 检查是否为表情包消息
|
||||
if last_message.is_picid or last_message.is_emoji:
|
||||
logger.info(f"消息 {last_message.message_id} 是表情包或Emoji,跳过打断检查")
|
||||
return False
|
||||
|
||||
|
||||
# 检查触发用户ID
|
||||
triggering_user_id = context.triggering_user_id
|
||||
if triggering_user_id and last_message.user_info.user_id != triggering_user_id:
|
||||
logger.info(f"消息来自非触发用户 {last_message.user_info.user_id},实际触发用户为 {triggering_user_id},跳过打断检查")
|
||||
return False
|
||||
|
||||
|
||||
# 检查是否已达到最大打断次数
|
||||
if context.interruption_count >= global_config.chat.interruption_max_limit:
|
||||
logger.debug(
|
||||
f"聊天流 {stream_id} 已达到最大打断次数 {context.interruption_count}/{global_config.chat.interruption_max_limit}"
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
# 计算打断概率
|
||||
interruption_probability = context.calculate_interruption_probability(
|
||||
global_config.chat.interruption_max_limit
|
||||
)
|
||||
|
||||
|
||||
# 根据概率决定是否打断
|
||||
import random
|
||||
if random.random() < interruption_probability:
|
||||
logger.debug(f"聊天流 {stream_id} 触发消息打断,打断概率: {interruption_probability:.2f}")
|
||||
|
||||
|
||||
# 增加打断计数
|
||||
await context.increment_interruption_count()
|
||||
self.stats["total_interruptions"] += 1
|
||||
|
||||
|
||||
# 检查是否已达到最大次数
|
||||
if context.interruption_count >= global_config.chat.interruption_max_limit:
|
||||
logger.warning(
|
||||
@@ -215,7 +215,7 @@ class SchedulerDispatcher:
|
||||
logger.info(
|
||||
f"聊天流 {stream_id} 已打断,当前打断次数: {context.interruption_count}/{global_config.chat.interruption_max_limit}"
|
||||
)
|
||||
|
||||
|
||||
return True
|
||||
else:
|
||||
logger.debug(f"聊天流 {stream_id} 未触发打断,打断概率: {interruption_probability:.2f}")
|
||||
@@ -223,7 +223,7 @@ class SchedulerDispatcher:
|
||||
|
||||
async def _cancel_and_recreate_schedule(self, stream_id: str, context: StreamContext) -> None:
|
||||
"""取消旧的 schedule 并创建新的(打断模式,使用极短延迟)
|
||||
|
||||
|
||||
Args:
|
||||
stream_id: 流ID
|
||||
context: 流上下文
|
||||
@@ -244,13 +244,13 @@ class SchedulerDispatcher:
|
||||
)
|
||||
# 移除失败,不创建新 schedule,避免重复
|
||||
return
|
||||
|
||||
|
||||
# 创建新的 schedule,使用即时处理模式(极短延迟)
|
||||
await self._create_schedule(stream_id, context, immediate_mode=True)
|
||||
|
||||
async def _create_schedule(self, stream_id: str, context: StreamContext, immediate_mode: bool = False) -> None:
|
||||
"""为聊天流创建新的 schedule
|
||||
|
||||
|
||||
Args:
|
||||
stream_id: 流ID
|
||||
context: 流上下文
|
||||
@@ -266,7 +266,7 @@ class SchedulerDispatcher:
|
||||
)
|
||||
await unified_scheduler.remove_schedule(old_schedule_id)
|
||||
del self.stream_schedules[stream_id]
|
||||
|
||||
|
||||
# 如果是即时处理模式(打断时),使用固定的1秒延迟立即重新处理
|
||||
if immediate_mode:
|
||||
delay = 1.0 # 硬编码1秒延迟,确保打断后能快速重新处理
|
||||
@@ -277,10 +277,10 @@ class SchedulerDispatcher:
|
||||
else:
|
||||
# 常规模式:计算初始延迟
|
||||
delay = await self._calculate_initial_delay(stream_id, context)
|
||||
|
||||
|
||||
# 获取未读消息数量用于日志
|
||||
unread_count = len(context.unread_messages) if context.unread_messages else 0
|
||||
|
||||
|
||||
# 创建 schedule
|
||||
schedule_id = await unified_scheduler.create_schedule(
|
||||
callback=self._on_schedule_triggered,
|
||||
@@ -290,41 +290,41 @@ class SchedulerDispatcher:
|
||||
task_name=f"dispatch_{stream_id[:8]}",
|
||||
callback_args=(stream_id,),
|
||||
)
|
||||
|
||||
|
||||
# 追踪 schedule
|
||||
self.stream_schedules[stream_id] = schedule_id
|
||||
self.stats["total_schedules_created"] += 1
|
||||
|
||||
|
||||
mode_indicator = "⚡打断" if immediate_mode else "📅常规"
|
||||
|
||||
|
||||
logger.info(
|
||||
f"{mode_indicator} 创建 schedule: 流={stream_id[:8]}..., "
|
||||
f"延迟={delay:.3f}s, 未读={unread_count}, "
|
||||
f"ID={schedule_id[:8]}..."
|
||||
)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"创建 schedule 失败 {stream_id}: {e}", exc_info=True)
|
||||
|
||||
async def _calculate_initial_delay(self, stream_id: str, context: StreamContext) -> float:
|
||||
"""计算初始延迟时间
|
||||
|
||||
|
||||
Args:
|
||||
stream_id: 流ID
|
||||
context: 流上下文
|
||||
|
||||
|
||||
Returns:
|
||||
float: 延迟时间(秒)
|
||||
"""
|
||||
# 基础间隔
|
||||
base_interval = getattr(global_config.chat, "distribution_interval", 5.0)
|
||||
|
||||
|
||||
# 检查是否有未读消息
|
||||
unread_count = len(context.unread_messages) if context.unread_messages else 0
|
||||
|
||||
|
||||
# 强制分发阈值
|
||||
force_dispatch_threshold = getattr(global_config.chat, "force_dispatch_unread_threshold", 20)
|
||||
|
||||
|
||||
# 如果未读消息过多,使用最小间隔
|
||||
if force_dispatch_threshold and unread_count > force_dispatch_threshold:
|
||||
min_interval = getattr(global_config.chat, "force_dispatch_min_interval", 0.1)
|
||||
@@ -334,24 +334,24 @@ class SchedulerDispatcher:
|
||||
f"使用最小间隔={min_interval}s"
|
||||
)
|
||||
return min_interval
|
||||
|
||||
|
||||
# 尝试使用能量管理器计算间隔
|
||||
try:
|
||||
# 更新能量值
|
||||
await self._update_stream_energy(stream_id, context)
|
||||
|
||||
|
||||
# 获取当前 focus_energy
|
||||
focus_energy = energy_manager.energy_cache.get(stream_id, (0.5, 0))[0]
|
||||
|
||||
|
||||
# 使用能量管理器计算间隔
|
||||
interval = energy_manager.get_distribution_interval(focus_energy)
|
||||
|
||||
|
||||
logger.info(
|
||||
f"📊 动态间隔计算: 流={stream_id[:8]}..., "
|
||||
f"能量={focus_energy:.3f}, 间隔={interval:.2f}s"
|
||||
)
|
||||
return interval
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.info(
|
||||
f"📊 使用默认间隔: 流={stream_id[:8]}..., "
|
||||
@@ -361,96 +361,96 @@ class SchedulerDispatcher:
|
||||
|
||||
async def _update_stream_energy(self, stream_id: str, context: StreamContext) -> None:
|
||||
"""更新流的能量值
|
||||
|
||||
|
||||
Args:
|
||||
stream_id: 流ID
|
||||
context: 流上下文
|
||||
"""
|
||||
try:
|
||||
from src.chat.message_receive.chat_stream import get_chat_manager
|
||||
|
||||
|
||||
# 获取聊天流
|
||||
chat_manager = get_chat_manager()
|
||||
chat_stream = await chat_manager.get_stream(stream_id)
|
||||
|
||||
|
||||
if not chat_stream:
|
||||
logger.debug(f"无法找到聊天流 {stream_id},跳过能量更新")
|
||||
return
|
||||
|
||||
|
||||
# 合并未读消息和历史消息
|
||||
all_messages = []
|
||||
|
||||
|
||||
# 添加历史消息
|
||||
history_messages = context.get_history_messages(limit=global_config.chat.max_context_size)
|
||||
all_messages.extend(history_messages)
|
||||
|
||||
|
||||
# 添加未读消息
|
||||
unread_messages = context.get_unread_messages()
|
||||
all_messages.extend(unread_messages)
|
||||
|
||||
|
||||
# 按时间排序并限制数量
|
||||
all_messages.sort(key=lambda m: m.time)
|
||||
messages = all_messages[-global_config.chat.max_context_size:]
|
||||
|
||||
|
||||
# 获取用户ID
|
||||
user_id = context.triggering_user_id
|
||||
|
||||
|
||||
# 使用能量管理器计算并缓存能量值
|
||||
energy = await energy_manager.calculate_focus_energy(
|
||||
stream_id=stream_id,
|
||||
messages=messages,
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
|
||||
# 同步更新到 ChatStream
|
||||
chat_stream._focus_energy = energy
|
||||
|
||||
|
||||
logger.debug(f"已更新流 {stream_id} 的能量值: {energy:.3f}")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"更新流能量失败 {stream_id}: {e}", exc_info=False)
|
||||
|
||||
async def _on_schedule_triggered(self, stream_id: str) -> None:
|
||||
"""schedule 触发时的回调
|
||||
|
||||
|
||||
Args:
|
||||
stream_id: 流ID
|
||||
"""
|
||||
try:
|
||||
old_schedule_id = self.stream_schedules.get(stream_id)
|
||||
|
||||
|
||||
logger.info(
|
||||
f"⏰ Schedule 触发: 流={stream_id[:8]}..., "
|
||||
f"ID={old_schedule_id[:8] if old_schedule_id else 'None'}..., "
|
||||
f"开始处理消息"
|
||||
)
|
||||
|
||||
|
||||
# 获取流上下文
|
||||
context = await self._get_stream_context(stream_id)
|
||||
if not context:
|
||||
logger.warning(f"Schedule 触发时无法获取流上下文: {stream_id}")
|
||||
return
|
||||
|
||||
|
||||
# 检查是否有未读消息
|
||||
if not context.unread_messages:
|
||||
logger.debug(f"流 {stream_id} 没有未读消息,跳过处理")
|
||||
return
|
||||
|
||||
|
||||
# 激活 chatter 处理(不需要锁,允许并发处理)
|
||||
success = await self._process_stream(stream_id, context)
|
||||
|
||||
|
||||
# 更新统计
|
||||
self.stats["total_process_cycles"] += 1
|
||||
if not success:
|
||||
self.stats["total_failures"] += 1
|
||||
|
||||
|
||||
self.stream_schedules.pop(stream_id, None)
|
||||
|
||||
|
||||
# 检查缓存中是否有待处理的消息
|
||||
from src.chat.message_manager.message_manager import message_manager
|
||||
|
||||
|
||||
has_cached = message_manager.has_cached_messages(stream_id)
|
||||
|
||||
|
||||
if has_cached:
|
||||
# 有缓存消息,立即创建新 schedule 继续处理
|
||||
logger.info(
|
||||
@@ -464,60 +464,60 @@ class SchedulerDispatcher:
|
||||
f"✅ 处理完成且无缓存消息: 流={stream_id[:8]}..., "
|
||||
f"等待新消息到达"
|
||||
)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Schedule 回调执行失败 {stream_id}: {e}", exc_info=True)
|
||||
|
||||
async def _process_stream(self, stream_id: str, context: StreamContext) -> bool:
|
||||
"""处理流消息
|
||||
|
||||
|
||||
Args:
|
||||
stream_id: 流ID
|
||||
context: 流上下文
|
||||
|
||||
|
||||
Returns:
|
||||
bool: 是否处理成功
|
||||
"""
|
||||
if not self.chatter_manager:
|
||||
logger.warning(f"Chatter 管理器未设置: {stream_id}")
|
||||
return False
|
||||
|
||||
|
||||
# 设置处理状态
|
||||
self._set_stream_processing_status(stream_id, True)
|
||||
|
||||
|
||||
try:
|
||||
start_time = time.time()
|
||||
|
||||
|
||||
# 设置触发用户ID
|
||||
last_message = context.get_last_message()
|
||||
if last_message:
|
||||
context.triggering_user_id = last_message.user_info.user_id
|
||||
|
||||
|
||||
# 创建异步任务刷新能量(不阻塞主流程)
|
||||
energy_task = asyncio.create_task(self._refresh_focus_energy(stream_id))
|
||||
|
||||
|
||||
# 设置 Chatter 正在处理的标志
|
||||
context.is_chatter_processing = True
|
||||
logger.debug(f"设置 Chatter 处理标志: {stream_id}")
|
||||
|
||||
|
||||
try:
|
||||
# 调用 chatter_manager 处理流上下文
|
||||
results = await self.chatter_manager.process_stream_context(stream_id, context)
|
||||
success = results.get("success", False)
|
||||
|
||||
|
||||
if success:
|
||||
process_time = time.time() - start_time
|
||||
logger.debug(f"流处理成功: {stream_id} (耗时: {process_time:.2f}s)")
|
||||
else:
|
||||
logger.warning(f"流处理失败: {stream_id} - {results.get('error_message', '未知错误')}")
|
||||
|
||||
|
||||
return success
|
||||
|
||||
|
||||
finally:
|
||||
# 清除 Chatter 处理标志
|
||||
context.is_chatter_processing = False
|
||||
logger.debug(f"清除 Chatter 处理标志: {stream_id}")
|
||||
|
||||
|
||||
# 等待能量刷新任务完成
|
||||
try:
|
||||
await asyncio.wait_for(energy_task, timeout=5.0)
|
||||
@@ -525,11 +525,11 @@ class SchedulerDispatcher:
|
||||
logger.warning(f"等待能量刷新超时: {stream_id}")
|
||||
except Exception as e:
|
||||
logger.debug(f"能量刷新任务异常: {e}")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"流处理异常: {stream_id} - {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
|
||||
finally:
|
||||
# 设置处理状态为未处理
|
||||
self._set_stream_processing_status(stream_id, False)
|
||||
@@ -538,11 +538,11 @@ class SchedulerDispatcher:
|
||||
"""设置流的处理状态"""
|
||||
try:
|
||||
from src.chat.message_manager.message_manager import message_manager
|
||||
|
||||
|
||||
if message_manager.is_running:
|
||||
message_manager.set_stream_processing_status(stream_id, is_processing)
|
||||
logger.debug(f"设置流处理状态: stream={stream_id}, processing={is_processing}")
|
||||
|
||||
|
||||
except ImportError:
|
||||
logger.debug("MessageManager 不可用,跳过状态设置")
|
||||
except Exception as e:
|
||||
@@ -556,7 +556,7 @@ class SchedulerDispatcher:
|
||||
if not chat_stream:
|
||||
logger.debug(f"刷新能量时未找到聊天流: {stream_id}")
|
||||
return
|
||||
|
||||
|
||||
await chat_stream.context_manager.refresh_focus_energy_from_history()
|
||||
logger.debug(f"已刷新聊天流 {stream_id} 的聚焦能量")
|
||||
except Exception as e:
|
||||
|
||||
@@ -367,7 +367,7 @@ class ChatBot:
|
||||
message_segment = message_data.get("message_segment")
|
||||
if message_segment and isinstance(message_segment, dict):
|
||||
if message_segment.get("type") == "adapter_response":
|
||||
logger.info(f"[DEBUG bot.py message_process] 检测到adapter_response,立即处理")
|
||||
logger.info("[DEBUG bot.py message_process] 检测到adapter_response,立即处理")
|
||||
await self._handle_adapter_response_from_dict(message_segment.get("data"))
|
||||
return
|
||||
|
||||
|
||||
@@ -205,7 +205,7 @@ async def _process_single_segment(segment: Seg, state: dict, message_info: BaseM
|
||||
return result
|
||||
else:
|
||||
logger.warning(f"[at处理] 无法解析格式: '{segment.data}'")
|
||||
return f"@{segment.data}"
|
||||
return f"@{segment.data}"
|
||||
logger.warning(f"[at处理] 数据类型异常: {type(segment.data)}")
|
||||
return f"@{segment.data}" if isinstance(segment.data, str) else "@未知用户"
|
||||
|
||||
|
||||
@@ -542,7 +542,7 @@ class DefaultReplyer:
|
||||
all_memories = []
|
||||
try:
|
||||
from src.memory_graph.manager_singleton import get_memory_manager, is_initialized
|
||||
|
||||
|
||||
if is_initialized():
|
||||
manager = get_memory_manager()
|
||||
if manager:
|
||||
@@ -552,12 +552,12 @@ class DefaultReplyer:
|
||||
sender_name = ""
|
||||
if user_info_obj:
|
||||
sender_name = getattr(user_info_obj, "user_nickname", "") or getattr(user_info_obj, "user_cardname", "")
|
||||
|
||||
|
||||
# 获取参与者信息
|
||||
participants = []
|
||||
try:
|
||||
# 尝试从聊天流中获取参与者信息
|
||||
if hasattr(stream, 'chat_history_manager'):
|
||||
if hasattr(stream, "chat_history_manager"):
|
||||
history_manager = stream.chat_history_manager
|
||||
# 获取最近的参与者列表
|
||||
recent_records = history_manager.get_memory_chat_history(
|
||||
@@ -586,16 +586,16 @@ class DefaultReplyer:
|
||||
formatted_history = ""
|
||||
if chat_history:
|
||||
# 移除过长的历史记录,只保留最近部分
|
||||
lines = chat_history.strip().split('\n')
|
||||
lines = chat_history.strip().split("\n")
|
||||
recent_lines = lines[-10:] if len(lines) > 10 else lines
|
||||
formatted_history = '\n'.join(recent_lines)
|
||||
formatted_history = "\n".join(recent_lines)
|
||||
|
||||
query_context = {
|
||||
"chat_history": formatted_history,
|
||||
"sender": sender_name,
|
||||
"participants": participants,
|
||||
}
|
||||
|
||||
|
||||
# 使用记忆管理器的智能检索(多查询策略)
|
||||
memories = await manager.search_memories(
|
||||
query=target,
|
||||
@@ -605,23 +605,23 @@ class DefaultReplyer:
|
||||
use_multi_query=True,
|
||||
context=query_context,
|
||||
)
|
||||
|
||||
|
||||
if memories:
|
||||
logger.info(f"[记忆图] 检索到 {len(memories)} 条相关记忆")
|
||||
|
||||
|
||||
# 使用新的格式化工具构建完整的记忆描述
|
||||
from src.memory_graph.utils.memory_formatter import (
|
||||
format_memory_for_prompt,
|
||||
get_memory_type_label,
|
||||
)
|
||||
|
||||
|
||||
for memory in memories:
|
||||
# 使用格式化工具生成完整的主谓宾描述
|
||||
content = format_memory_for_prompt(memory, include_metadata=False)
|
||||
|
||||
|
||||
# 获取记忆类型
|
||||
mem_type = memory.memory_type.value if memory.memory_type else "未知"
|
||||
|
||||
|
||||
if content:
|
||||
all_memories.append({
|
||||
"content": content,
|
||||
@@ -636,7 +636,7 @@ class DefaultReplyer:
|
||||
except Exception as e:
|
||||
logger.debug(f"[记忆图] 检索失败: {e}")
|
||||
all_memories = []
|
||||
|
||||
|
||||
# 构建记忆字符串,使用方括号格式
|
||||
memory_str = ""
|
||||
has_any_memory = False
|
||||
@@ -725,7 +725,7 @@ class DefaultReplyer:
|
||||
for tool_result in tool_results:
|
||||
tool_name = tool_result.get("tool_name", "unknown")
|
||||
content = tool_result.get("content", "")
|
||||
result_type = tool_result.get("type", "tool_result")
|
||||
tool_result.get("type", "tool_result")
|
||||
|
||||
# 不进行截断,让工具自己处理结果长度
|
||||
current_results_parts.append(f"- **{tool_name}**: {content}")
|
||||
@@ -744,7 +744,7 @@ class DefaultReplyer:
|
||||
logger.error(f"工具信息获取失败: {e}")
|
||||
return ""
|
||||
|
||||
|
||||
|
||||
def _parse_reply_target(self, target_message: str) -> tuple[str, str]:
|
||||
"""解析回复目标消息 - 使用共享工具"""
|
||||
from src.chat.utils.prompt import Prompt
|
||||
@@ -1897,7 +1897,7 @@ class DefaultReplyer:
|
||||
async def _store_chat_memory_async(self, reply_to: str, reply_message: DatabaseMessages | dict[str, Any] | None = None):
|
||||
"""
|
||||
[已废弃] 异步存储聊天记忆(从build_memory_block迁移而来)
|
||||
|
||||
|
||||
此函数已被记忆图系统的工具调用方式替代。
|
||||
记忆现在由LLM在对话过程中通过CreateMemoryTool主动创建。
|
||||
|
||||
@@ -1906,14 +1906,13 @@ class DefaultReplyer:
|
||||
reply_message: 回复的原始消息
|
||||
"""
|
||||
return # 已禁用,保留函数签名以防其他地方有引用
|
||||
|
||||
|
||||
# 以下代码已废弃,不再执行
|
||||
try:
|
||||
if not global_config.memory.enable_memory:
|
||||
return
|
||||
|
||||
# 使用统一记忆系统存储记忆
|
||||
from src.chat.memory_system import get_memory_system
|
||||
|
||||
stream = self.chat_stream
|
||||
user_info_obj = getattr(stream, "user_info", None)
|
||||
@@ -2036,7 +2035,7 @@ class DefaultReplyer:
|
||||
timestamp=time.time(),
|
||||
limit=int(global_config.chat.max_context_size),
|
||||
)
|
||||
chat_history = await build_readable_messages(
|
||||
await build_readable_messages(
|
||||
message_list_before_short,
|
||||
replace_bot_name=True,
|
||||
merge_messages=False,
|
||||
|
||||
@@ -400,7 +400,7 @@ class Prompt:
|
||||
|
||||
# 初始化预构建参数字典
|
||||
pre_built_params = {}
|
||||
|
||||
|
||||
try:
|
||||
# --- 步骤 1: 准备构建任务 ---
|
||||
tasks = []
|
||||
|
||||
@@ -87,20 +87,18 @@ def is_mentioned_bot_in_message(message) -> tuple[bool, float]:
|
||||
)
|
||||
|
||||
processed_text = message.processed_plain_text or ""
|
||||
|
||||
|
||||
# 1. 判断是否为私聊(强提及)
|
||||
group_info = getattr(message, "group_info", None)
|
||||
if not group_info or not getattr(group_info, "group_id", None):
|
||||
is_private = True
|
||||
mention_type = 2
|
||||
logger.debug("检测到私聊消息 - 强提及")
|
||||
|
||||
|
||||
# 2. 判断是否被@(强提及)
|
||||
if re.search(rf"@<(.+?):{global_config.bot.qq_account}>", processed_text):
|
||||
is_at = True
|
||||
mention_type = 2
|
||||
logger.debug("检测到@提及 - 强提及")
|
||||
|
||||
|
||||
# 3. 判断是否被回复(强提及)
|
||||
if re.match(
|
||||
rf"\[回复 (.+?)\({global_config.bot.qq_account!s}\):(.+?)\],说:", processed_text
|
||||
@@ -108,10 +106,9 @@ def is_mentioned_bot_in_message(message) -> tuple[bool, float]:
|
||||
rf"\[回复<(.+?)(?=:{global_config.bot.qq_account!s}>)\:{global_config.bot.qq_account!s}>:(.+?)\],说:",
|
||||
processed_text,
|
||||
):
|
||||
is_replied = True
|
||||
mention_type = 2
|
||||
logger.debug("检测到回复消息 - 强提及")
|
||||
|
||||
|
||||
# 4. 判断文本中是否提及bot名字或别名(弱提及)
|
||||
if mention_type == 0: # 只有在没有强提及时才检查弱提及
|
||||
# 移除@和回复标记后再检查
|
||||
@@ -119,21 +116,19 @@ def is_mentioned_bot_in_message(message) -> tuple[bool, float]:
|
||||
message_content = re.sub(r"@<(.+?)(?=:(\d+))\:(\d+)>", "", message_content)
|
||||
message_content = re.sub(r"\[回复 (.+?)\(((\d+)|未知id)\):(.+?)\],说:", "", message_content)
|
||||
message_content = re.sub(r"\[回复<(.+?)(?=:(\d+))\:(\d+)>:(.+?)\],说:", "", message_content)
|
||||
|
||||
|
||||
# 检查bot主名字
|
||||
if global_config.bot.nickname in message_content:
|
||||
is_text_mentioned = True
|
||||
mention_type = 1
|
||||
logger.debug(f"检测到文本提及bot主名字 '{global_config.bot.nickname}' - 弱提及")
|
||||
# 如果主名字没匹配,再检查别名
|
||||
elif nicknames:
|
||||
for alias_name in nicknames:
|
||||
if alias_name in message_content:
|
||||
is_text_mentioned = True
|
||||
mention_type = 1
|
||||
logger.debug(f"检测到文本提及bot别名 '{alias_name}' - 弱提及")
|
||||
break
|
||||
|
||||
|
||||
# 返回结果
|
||||
is_mentioned = mention_type > 0
|
||||
return is_mentioned, float(mention_type)
|
||||
|
||||
Reference in New Issue
Block a user