fix:优化关系构建频率
This commit is contained in:
@@ -17,7 +17,9 @@ from json_repair import repair_json
|
|||||||
from src.person_info.person_info import get_person_info_manager
|
from src.person_info.person_info import get_person_info_manager
|
||||||
import json
|
import json
|
||||||
import asyncio
|
import asyncio
|
||||||
from src.chat.utils.chat_message_builder import get_raw_msg_by_timestamp_with_chat
|
from src.chat.utils.chat_message_builder import get_raw_msg_by_timestamp_with_chat, get_raw_msg_by_timestamp_with_chat_inclusive, get_raw_msg_before_timestamp_with_chat, num_new_messages_since
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
|
||||||
# 配置常量:是否启用小模型即时信息提取
|
# 配置常量:是否启用小模型即时信息提取
|
||||||
@@ -25,6 +27,14 @@ from src.chat.utils.chat_message_builder import get_raw_msg_by_timestamp_with_ch
|
|||||||
# 关闭时:使用原来的异步模式,精度更高但速度较慢
|
# 关闭时:使用原来的异步模式,精度更高但速度较慢
|
||||||
ENABLE_INSTANT_INFO_EXTRACTION = True
|
ENABLE_INSTANT_INFO_EXTRACTION = True
|
||||||
|
|
||||||
|
# 消息段清理配置
|
||||||
|
SEGMENT_CLEANUP_CONFIG = {
|
||||||
|
"enable_cleanup": True, # 是否启用清理
|
||||||
|
"max_segment_age_days": 7, # 消息段最大保存天数
|
||||||
|
"max_segments_per_user": 10, # 每用户最大消息段数
|
||||||
|
"cleanup_interval_hours": 1, # 清理间隔(小时)
|
||||||
|
}
|
||||||
|
|
||||||
logger = get_logger("processor")
|
logger = get_logger("processor")
|
||||||
|
|
||||||
|
|
||||||
@@ -95,8 +105,19 @@ class RelationshipProcessor(BaseProcessor):
|
|||||||
self.info_fetched_cache: Dict[
|
self.info_fetched_cache: Dict[
|
||||||
str, Dict[str, any]
|
str, Dict[str, any]
|
||||||
] = {} # {person_id: {"info": str, "ttl": int, "start_time": float}}
|
] = {} # {person_id: {"info": str, "ttl": int, "start_time": float}}
|
||||||
self.person_engaged_cache: List[Dict[str, any]] = [] # [{person_id: str, start_time: float, rounds: int}]
|
|
||||||
self.grace_period_rounds = 5
|
# 新的消息段缓存结构:
|
||||||
|
# {person_id: [{"start_time": float, "end_time": float, "last_msg_time": float, "message_count": int}, ...]}
|
||||||
|
self.person_engaged_cache: Dict[str, List[Dict[str, any]]] = {}
|
||||||
|
|
||||||
|
# 持久化存储文件路径
|
||||||
|
self.cache_file_path = os.path.join("data", f"relationship_cache_{self.subheartflow_id}.pkl")
|
||||||
|
|
||||||
|
# 最后处理的消息时间,避免重复处理相同消息
|
||||||
|
self.last_processed_message_time = 0.0
|
||||||
|
|
||||||
|
# 最后清理时间,用于定期清理老消息段
|
||||||
|
self.last_cleanup_time = 0.0
|
||||||
|
|
||||||
self.llm_model = LLMRequest(
|
self.llm_model = LLMRequest(
|
||||||
model=global_config.model.relation,
|
model=global_config.model.relation,
|
||||||
@@ -112,7 +133,293 @@ class RelationshipProcessor(BaseProcessor):
|
|||||||
|
|
||||||
name = get_chat_manager().get_stream_name(self.subheartflow_id)
|
name = get_chat_manager().get_stream_name(self.subheartflow_id)
|
||||||
self.log_prefix = f"[{name}] "
|
self.log_prefix = f"[{name}] "
|
||||||
|
|
||||||
|
# 加载持久化的缓存
|
||||||
|
self._load_cache()
|
||||||
|
|
||||||
|
# ================================
|
||||||
|
# 缓存管理模块
|
||||||
|
# 负责持久化存储、状态管理、缓存读写
|
||||||
|
# ================================
|
||||||
|
|
||||||
|
def _load_cache(self):
|
||||||
|
"""从文件加载持久化的缓存"""
|
||||||
|
if os.path.exists(self.cache_file_path):
|
||||||
|
try:
|
||||||
|
with open(self.cache_file_path, 'rb') as f:
|
||||||
|
cache_data = pickle.load(f)
|
||||||
|
if isinstance(cache_data, dict) and 'person_engaged_cache' in cache_data:
|
||||||
|
# 新格式:包含额外信息的缓存
|
||||||
|
self.person_engaged_cache = cache_data.get('person_engaged_cache', {})
|
||||||
|
self.last_processed_message_time = cache_data.get('last_processed_message_time', 0.0)
|
||||||
|
self.last_cleanup_time = cache_data.get('last_cleanup_time', 0.0)
|
||||||
|
else:
|
||||||
|
# 旧格式:仅包含person_engaged_cache
|
||||||
|
self.person_engaged_cache = cache_data
|
||||||
|
self.last_processed_message_time = 0.0
|
||||||
|
self.last_cleanup_time = 0.0
|
||||||
|
logger.info(f"{self.log_prefix} 成功加载关系缓存,包含 {len(self.person_engaged_cache)} 个用户,最后处理时间:{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(self.last_processed_message_time)) if self.last_processed_message_time > 0 else '未设置'}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"{self.log_prefix} 加载关系缓存失败: {e}")
|
||||||
|
self.person_engaged_cache = {}
|
||||||
|
self.last_processed_message_time = 0.0
|
||||||
|
else:
|
||||||
|
logger.info(f"{self.log_prefix} 关系缓存文件不存在,使用空缓存")
|
||||||
|
|
||||||
|
def _save_cache(self):
|
||||||
|
"""保存缓存到文件"""
|
||||||
|
try:
|
||||||
|
os.makedirs(os.path.dirname(self.cache_file_path), exist_ok=True)
|
||||||
|
cache_data = {
|
||||||
|
'person_engaged_cache': self.person_engaged_cache,
|
||||||
|
'last_processed_message_time': self.last_processed_message_time,
|
||||||
|
'last_cleanup_time': self.last_cleanup_time
|
||||||
|
}
|
||||||
|
with open(self.cache_file_path, 'wb') as f:
|
||||||
|
pickle.dump(cache_data, f)
|
||||||
|
logger.debug(f"{self.log_prefix} 成功保存关系缓存")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"{self.log_prefix} 保存关系缓存失败: {e}")
|
||||||
|
|
||||||
|
# ================================
|
||||||
|
# 消息段管理模块
|
||||||
|
# 负责跟踪用户消息活动、管理消息段、清理过期数据
|
||||||
|
# ================================
|
||||||
|
|
||||||
|
def _update_message_segments(self, person_id: str, message_time: float):
|
||||||
|
"""更新用户的消息段
|
||||||
|
|
||||||
|
Args:
|
||||||
|
person_id: 用户ID
|
||||||
|
message_time: 消息时间戳
|
||||||
|
"""
|
||||||
|
if person_id not in self.person_engaged_cache:
|
||||||
|
self.person_engaged_cache[person_id] = []
|
||||||
|
|
||||||
|
segments = self.person_engaged_cache[person_id]
|
||||||
|
current_time = time.time()
|
||||||
|
|
||||||
|
# 获取该消息前5条消息的时间作为潜在的开始时间
|
||||||
|
before_messages = get_raw_msg_before_timestamp_with_chat(self.subheartflow_id, message_time, limit=5)
|
||||||
|
if before_messages:
|
||||||
|
# 由于get_raw_msg_before_timestamp_with_chat返回按时间升序排序的消息,最后一个是最接近message_time的
|
||||||
|
# 我们需要第一个消息作为开始时间,但应该确保至少包含5条消息或该用户之前的消息
|
||||||
|
potential_start_time = before_messages[0]['time']
|
||||||
|
else:
|
||||||
|
# 如果没有前面的消息,就从当前消息开始
|
||||||
|
potential_start_time = message_time
|
||||||
|
|
||||||
|
# 如果没有现有消息段,创建新的
|
||||||
|
if not segments:
|
||||||
|
new_segment = {
|
||||||
|
"start_time": potential_start_time,
|
||||||
|
"end_time": message_time,
|
||||||
|
"last_msg_time": message_time,
|
||||||
|
"message_count": self._count_messages_in_timerange(potential_start_time, message_time)
|
||||||
|
}
|
||||||
|
segments.append(new_segment)
|
||||||
|
logger.info(f"{self.log_prefix} 为用户 {person_id} 创建新消息段: 时间范围 {time.strftime('%H:%M:%S', time.localtime(potential_start_time))} - {time.strftime('%H:%M:%S', time.localtime(message_time))}, 消息数: {new_segment['message_count']}")
|
||||||
|
self._save_cache()
|
||||||
|
return
|
||||||
|
|
||||||
|
# 获取最后一个消息段
|
||||||
|
last_segment = segments[-1]
|
||||||
|
|
||||||
|
# 计算从最后一条消息到当前消息之间的消息数量(不包含边界)
|
||||||
|
messages_between = self._count_messages_between(last_segment["last_msg_time"], message_time)
|
||||||
|
|
||||||
|
if messages_between <= 10:
|
||||||
|
# 在10条消息内,延伸当前消息段
|
||||||
|
last_segment["end_time"] = message_time
|
||||||
|
last_segment["last_msg_time"] = message_time
|
||||||
|
# 重新计算整个消息段的消息数量
|
||||||
|
last_segment["message_count"] = self._count_messages_in_timerange(
|
||||||
|
last_segment["start_time"], last_segment["end_time"]
|
||||||
|
)
|
||||||
|
logger.info(f"{self.log_prefix} 延伸用户 {person_id} 的消息段: {last_segment}")
|
||||||
|
else:
|
||||||
|
# 超过10条消息,结束当前消息段并创建新的
|
||||||
|
# 结束当前消息段:延伸到原消息段最后一条消息后5条消息的时间
|
||||||
|
after_messages = get_raw_msg_by_timestamp_with_chat(
|
||||||
|
self.subheartflow_id, last_segment["last_msg_time"], current_time, limit=5, limit_mode="earliest"
|
||||||
|
)
|
||||||
|
if after_messages and len(after_messages) >= 5:
|
||||||
|
# 如果有足够的后续消息,使用第5条消息的时间作为结束时间
|
||||||
|
last_segment["end_time"] = after_messages[4]['time']
|
||||||
|
else:
|
||||||
|
# 如果没有足够的后续消息,保持原有的结束时间
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 重新计算当前消息段的消息数量
|
||||||
|
last_segment["message_count"] = self._count_messages_in_timerange(
|
||||||
|
last_segment["start_time"], last_segment["end_time"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# 创建新的消息段
|
||||||
|
new_segment = {
|
||||||
|
"start_time": potential_start_time,
|
||||||
|
"end_time": message_time,
|
||||||
|
"last_msg_time": message_time,
|
||||||
|
"message_count": self._count_messages_in_timerange(potential_start_time, message_time)
|
||||||
|
}
|
||||||
|
segments.append(new_segment)
|
||||||
|
logger.info(f"{self.log_prefix} 为用户 {person_id} 创建新消息段(超过10条消息间隔): {new_segment}")
|
||||||
|
|
||||||
|
self._save_cache()
|
||||||
|
|
||||||
|
def _count_messages_in_timerange(self, start_time: float, end_time: float) -> int:
|
||||||
|
"""计算指定时间范围内的消息数量(包含边界)"""
|
||||||
|
messages = get_raw_msg_by_timestamp_with_chat_inclusive(self.subheartflow_id, start_time, end_time)
|
||||||
|
return len(messages)
|
||||||
|
|
||||||
|
def _count_messages_between(self, start_time: float, end_time: float) -> int:
|
||||||
|
"""计算两个时间点之间的消息数量(不包含边界),用于间隔检查"""
|
||||||
|
return num_new_messages_since(self.subheartflow_id, start_time, end_time)
|
||||||
|
|
||||||
|
def _get_total_message_count(self, person_id: str) -> int:
|
||||||
|
"""获取用户所有消息段的总消息数量"""
|
||||||
|
if person_id not in self.person_engaged_cache:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
total_count = 0
|
||||||
|
for segment in self.person_engaged_cache[person_id]:
|
||||||
|
total_count += segment["message_count"]
|
||||||
|
|
||||||
|
return total_count
|
||||||
|
|
||||||
|
def _cleanup_old_segments(self) -> bool:
|
||||||
|
"""清理老旧的消息段
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: 是否执行了清理操作
|
||||||
|
"""
|
||||||
|
if not SEGMENT_CLEANUP_CONFIG["enable_cleanup"]:
|
||||||
|
return False
|
||||||
|
|
||||||
|
current_time = time.time()
|
||||||
|
|
||||||
|
# 检查是否需要执行清理(基于时间间隔)
|
||||||
|
cleanup_interval_seconds = SEGMENT_CLEANUP_CONFIG["cleanup_interval_hours"] * 3600
|
||||||
|
if current_time - self.last_cleanup_time < cleanup_interval_seconds:
|
||||||
|
return False
|
||||||
|
|
||||||
|
logger.info(f"{self.log_prefix} 开始执行老消息段清理...")
|
||||||
|
|
||||||
|
cleanup_stats = {
|
||||||
|
"users_cleaned": 0,
|
||||||
|
"segments_removed": 0,
|
||||||
|
"total_segments_before": 0,
|
||||||
|
"total_segments_after": 0
|
||||||
|
}
|
||||||
|
|
||||||
|
max_age_seconds = SEGMENT_CLEANUP_CONFIG["max_segment_age_days"] * 24 * 3600
|
||||||
|
max_segments_per_user = SEGMENT_CLEANUP_CONFIG["max_segments_per_user"]
|
||||||
|
|
||||||
|
users_to_remove = []
|
||||||
|
|
||||||
|
for person_id, segments in self.person_engaged_cache.items():
|
||||||
|
cleanup_stats["total_segments_before"] += len(segments)
|
||||||
|
original_segment_count = len(segments)
|
||||||
|
|
||||||
|
# 1. 按时间清理:移除过期的消息段
|
||||||
|
segments_after_age_cleanup = []
|
||||||
|
for segment in segments:
|
||||||
|
segment_age = current_time - segment["end_time"]
|
||||||
|
if segment_age <= max_age_seconds:
|
||||||
|
segments_after_age_cleanup.append(segment)
|
||||||
|
else:
|
||||||
|
cleanup_stats["segments_removed"] += 1
|
||||||
|
logger.debug(f"{self.log_prefix} 移除用户 {person_id} 的过期消息段: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(segment['start_time']))} - {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(segment['end_time']))}")
|
||||||
|
|
||||||
|
# 2. 按数量清理:如果消息段数量仍然过多,保留最新的
|
||||||
|
if len(segments_after_age_cleanup) > max_segments_per_user:
|
||||||
|
# 按end_time排序,保留最新的
|
||||||
|
segments_after_age_cleanup.sort(key=lambda x: x["end_time"], reverse=True)
|
||||||
|
segments_removed_count = len(segments_after_age_cleanup) - max_segments_per_user
|
||||||
|
cleanup_stats["segments_removed"] += segments_removed_count
|
||||||
|
segments_after_age_cleanup = segments_after_age_cleanup[:max_segments_per_user]
|
||||||
|
logger.debug(f"{self.log_prefix} 用户 {person_id} 消息段数量过多,移除 {segments_removed_count} 个最老的消息段")
|
||||||
|
|
||||||
|
# 使用清理后的消息段
|
||||||
|
|
||||||
|
# 更新缓存
|
||||||
|
if len(segments_after_age_cleanup) == 0:
|
||||||
|
# 如果没有剩余消息段,标记用户为待移除
|
||||||
|
users_to_remove.append(person_id)
|
||||||
|
else:
|
||||||
|
self.person_engaged_cache[person_id] = segments_after_age_cleanup
|
||||||
|
cleanup_stats["total_segments_after"] += len(segments_after_age_cleanup)
|
||||||
|
|
||||||
|
if original_segment_count != len(segments_after_age_cleanup):
|
||||||
|
cleanup_stats["users_cleaned"] += 1
|
||||||
|
|
||||||
|
# 移除没有消息段的用户
|
||||||
|
for person_id in users_to_remove:
|
||||||
|
del self.person_engaged_cache[person_id]
|
||||||
|
logger.debug(f"{self.log_prefix} 移除用户 {person_id}:没有剩余消息段")
|
||||||
|
|
||||||
|
# 更新最后清理时间
|
||||||
|
self.last_cleanup_time = current_time
|
||||||
|
|
||||||
|
# 保存缓存
|
||||||
|
if cleanup_stats["segments_removed"] > 0 or len(users_to_remove) > 0:
|
||||||
|
self._save_cache()
|
||||||
|
logger.info(f"{self.log_prefix} 清理完成 - 影响用户: {cleanup_stats['users_cleaned']}, 移除消息段: {cleanup_stats['segments_removed']}, 移除用户: {len(users_to_remove)}")
|
||||||
|
logger.info(f"{self.log_prefix} 消息段统计 - 清理前: {cleanup_stats['total_segments_before']}, 清理后: {cleanup_stats['total_segments_after']}")
|
||||||
|
else:
|
||||||
|
logger.debug(f"{self.log_prefix} 清理完成 - 无需清理任何内容")
|
||||||
|
|
||||||
|
return cleanup_stats["segments_removed"] > 0 or len(users_to_remove) > 0
|
||||||
|
|
||||||
|
def force_cleanup_user_segments(self, person_id: str) -> bool:
|
||||||
|
"""强制清理指定用户的所有消息段
|
||||||
|
|
||||||
|
Args:
|
||||||
|
person_id: 用户ID
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: 是否成功清理
|
||||||
|
"""
|
||||||
|
if person_id in self.person_engaged_cache:
|
||||||
|
segments_count = len(self.person_engaged_cache[person_id])
|
||||||
|
del self.person_engaged_cache[person_id]
|
||||||
|
self._save_cache()
|
||||||
|
logger.info(f"{self.log_prefix} 强制清理用户 {person_id} 的 {segments_count} 个消息段")
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_cache_status(self) -> str:
|
||||||
|
"""获取缓存状态信息,用于调试和监控"""
|
||||||
|
if not self.person_engaged_cache:
|
||||||
|
return f"{self.log_prefix} 关系缓存为空"
|
||||||
|
|
||||||
|
status_lines = [f"{self.log_prefix} 关系缓存状态:"]
|
||||||
|
status_lines.append(f"最后处理消息时间:{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(self.last_processed_message_time)) if self.last_processed_message_time > 0 else '未设置'}")
|
||||||
|
status_lines.append(f"最后清理时间:{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(self.last_cleanup_time)) if self.last_cleanup_time > 0 else '未执行'}")
|
||||||
|
status_lines.append(f"总用户数:{len(self.person_engaged_cache)}")
|
||||||
|
status_lines.append(f"清理配置:{'启用' if SEGMENT_CLEANUP_CONFIG['enable_cleanup'] else '禁用'} (最大保存{SEGMENT_CLEANUP_CONFIG['max_segment_age_days']}天, 每用户最多{SEGMENT_CLEANUP_CONFIG['max_segments_per_user']}段)")
|
||||||
|
status_lines.append("")
|
||||||
|
|
||||||
|
for person_id, segments in self.person_engaged_cache.items():
|
||||||
|
total_count = self._get_total_message_count(person_id)
|
||||||
|
status_lines.append(f"用户 {person_id}:")
|
||||||
|
status_lines.append(f" 总消息数:{total_count} ({total_count}/45)")
|
||||||
|
status_lines.append(f" 消息段数:{len(segments)}")
|
||||||
|
|
||||||
|
for i, segment in enumerate(segments):
|
||||||
|
start_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(segment['start_time']))
|
||||||
|
end_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(segment['end_time']))
|
||||||
|
last_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(segment['last_msg_time']))
|
||||||
|
status_lines.append(f" 段{i+1}: {start_str} -> {end_str} (最后消息: {last_str}, 消息数: {segment['message_count']})")
|
||||||
|
status_lines.append("")
|
||||||
|
|
||||||
|
return "\n".join(status_lines)
|
||||||
|
|
||||||
|
# ================================
|
||||||
|
# 主要处理流程
|
||||||
|
# 统筹各模块协作、对外提供服务接口
|
||||||
|
# ================================
|
||||||
|
|
||||||
async def process_info(self, observations: List[Observation] = None, *infos) -> List[InfoBase]:
|
async def process_info(self, observations: List[Observation] = None, *infos) -> List[InfoBase]:
|
||||||
"""处理信息对象
|
"""处理信息对象
|
||||||
|
|
||||||
@@ -140,7 +447,10 @@ class RelationshipProcessor(BaseProcessor):
|
|||||||
"""
|
"""
|
||||||
在回复前进行思考,生成内心想法并收集工具调用结果
|
在回复前进行思考,生成内心想法并收集工具调用结果
|
||||||
"""
|
"""
|
||||||
# 0. 从观察信息中提取所需数据
|
# 0. 执行定期清理
|
||||||
|
self._cleanup_old_segments()
|
||||||
|
|
||||||
|
# 1. 从观察信息中提取所需数据
|
||||||
# 需要兼容私聊
|
# 需要兼容私聊
|
||||||
|
|
||||||
chat_observe_info = ""
|
chat_observe_info = ""
|
||||||
@@ -149,36 +459,52 @@ class RelationshipProcessor(BaseProcessor):
|
|||||||
for observation in observations:
|
for observation in observations:
|
||||||
if isinstance(observation, ChattingObservation):
|
if isinstance(observation, ChattingObservation):
|
||||||
chat_observe_info = observation.get_observe_info()
|
chat_observe_info = observation.get_observe_info()
|
||||||
|
|
||||||
|
# 从聊天观察中提取用户信息并更新消息段
|
||||||
|
# 获取最新的非bot消息来更新消息段
|
||||||
|
latest_messages = get_raw_msg_by_timestamp_with_chat(
|
||||||
|
self.subheartflow_id, self.last_processed_message_time, current_time, limit=50 # 获取自上次处理后的消息
|
||||||
|
)
|
||||||
|
if latest_messages:
|
||||||
|
# 处理所有新的非bot消息
|
||||||
|
for latest_msg in latest_messages:
|
||||||
|
user_id = latest_msg.get('user_id')
|
||||||
|
platform = latest_msg.get('user_platform') or latest_msg.get('chat_info_platform')
|
||||||
|
msg_time = latest_msg.get('time', 0)
|
||||||
|
|
||||||
|
if user_id and platform and user_id != global_config.bot.qq_account and msg_time > self.last_processed_message_time:
|
||||||
|
from src.person_info.person_info import PersonInfoManager
|
||||||
|
person_id = PersonInfoManager.get_person_id(platform, user_id)
|
||||||
|
self._update_message_segments(person_id, msg_time)
|
||||||
|
logger.debug(f"{self.log_prefix} 更新用户 {person_id} 的消息段,消息时间:{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(msg_time))}")
|
||||||
|
self.last_processed_message_time = max(self.last_processed_message_time, msg_time)
|
||||||
break
|
break
|
||||||
|
|
||||||
# 1. 处理person_engaged_cache
|
# 1. 检查是否有用户达到关系构建条件(总消息数达到45条)
|
||||||
for record in list(self.person_engaged_cache):
|
users_to_build_relationship = []
|
||||||
record["rounds"] += 1
|
for person_id, segments in self.person_engaged_cache.items():
|
||||||
time_elapsed = current_time - record["start_time"]
|
total_message_count = self._get_total_message_count(person_id)
|
||||||
message_count = len(
|
if total_message_count >= 45:
|
||||||
get_raw_msg_by_timestamp_with_chat(self.subheartflow_id, record["start_time"], current_time)
|
users_to_build_relationship.append(person_id)
|
||||||
)
|
|
||||||
|
|
||||||
print(record)
|
|
||||||
|
|
||||||
# 根据消息数量和时间设置不同的触发条件
|
|
||||||
should_trigger = (
|
|
||||||
message_count >= 50 # 50条消息必定满足
|
|
||||||
or (message_count >= 35 and time_elapsed >= 300) # 35条且10分钟
|
|
||||||
or (message_count >= 25 and time_elapsed >= 900) # 25条且30分钟
|
|
||||||
or (message_count >= 10 and time_elapsed >= 2000) # 10条且1小时
|
|
||||||
)
|
|
||||||
|
|
||||||
if should_trigger:
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"{self.log_prefix} 用户 {record['person_id']} 满足关系构建条件,开始构建关系。消息数:{message_count},时长:{time_elapsed:.0f}秒"
|
f"{self.log_prefix} 用户 {person_id} 满足关系构建条件,总消息数:{total_message_count},消息段数:{len(segments)}"
|
||||||
)
|
)
|
||||||
asyncio.create_task(
|
elif total_message_count > 0:
|
||||||
self.update_impression_on_cache_expiry(
|
# 记录进度信息
|
||||||
record["person_id"], self.subheartflow_id, record["start_time"], current_time
|
logger.debug(
|
||||||
)
|
f"{self.log_prefix} 用户 {person_id} 进度:{total_message_count}/45 条消息,{len(segments)} 个消息段"
|
||||||
)
|
)
|
||||||
self.person_engaged_cache.remove(record)
|
|
||||||
|
# 2. 为满足条件的用户构建关系
|
||||||
|
for person_id in users_to_build_relationship:
|
||||||
|
segments = self.person_engaged_cache[person_id]
|
||||||
|
# 异步执行关系构建
|
||||||
|
asyncio.create_task(
|
||||||
|
self.update_impression_on_segments(person_id, self.subheartflow_id, segments)
|
||||||
|
)
|
||||||
|
# 移除已处理的用户缓存
|
||||||
|
del self.person_engaged_cache[person_id]
|
||||||
|
self._save_cache()
|
||||||
|
|
||||||
# 2. 减少info_fetched_cache中所有信息的TTL
|
# 2. 减少info_fetched_cache中所有信息的TTL
|
||||||
for person_id in list(self.info_fetched_cache.keys()):
|
for person_id in list(self.info_fetched_cache.keys()):
|
||||||
@@ -260,12 +586,8 @@ class RelationshipProcessor(BaseProcessor):
|
|||||||
|
|
||||||
logger.info(f"{self.log_prefix} 调取用户 {person_name} 的 {info_type} 信息。")
|
logger.info(f"{self.log_prefix} 调取用户 {person_name} 的 {info_type} 信息。")
|
||||||
|
|
||||||
# 检查person_engaged_cache中是否已存在该person_id
|
# 这里不需要检查person_engaged_cache,因为消息段的管理由_update_message_segments处理
|
||||||
person_exists = any(record["person_id"] == person_id for record in self.person_engaged_cache)
|
# 信息提取和消息段管理是独立的流程
|
||||||
if not person_exists:
|
|
||||||
self.person_engaged_cache.append(
|
|
||||||
{"person_id": person_id, "start_time": time.time(), "rounds": 0}
|
|
||||||
)
|
|
||||||
|
|
||||||
if ENABLE_INSTANT_INFO_EXTRACTION:
|
if ENABLE_INSTANT_INFO_EXTRACTION:
|
||||||
# 收集即时提取任务
|
# 收集即时提取任务
|
||||||
@@ -336,6 +658,79 @@ class RelationshipProcessor(BaseProcessor):
|
|||||||
|
|
||||||
return persons_infos_str
|
return persons_infos_str
|
||||||
|
|
||||||
|
# ================================
|
||||||
|
# 关系构建模块
|
||||||
|
# 负责触发关系构建、整合消息段、更新用户印象
|
||||||
|
# ================================
|
||||||
|
|
||||||
|
async def update_impression_on_segments(self, person_id: str, chat_id: str, segments: List[Dict[str, any]]):
|
||||||
|
"""
|
||||||
|
基于消息段更新用户印象
|
||||||
|
|
||||||
|
Args:
|
||||||
|
person_id: 用户ID
|
||||||
|
chat_id: 聊天ID
|
||||||
|
segments: 消息段列表
|
||||||
|
"""
|
||||||
|
logger.info(f"开始为 {person_id} 基于 {len(segments)} 个消息段更新印象")
|
||||||
|
try:
|
||||||
|
processed_messages = []
|
||||||
|
|
||||||
|
for i, segment in enumerate(segments):
|
||||||
|
start_time = segment["start_time"]
|
||||||
|
end_time = segment["end_time"]
|
||||||
|
message_count = segment["message_count"]
|
||||||
|
start_date = time.strftime('%Y-%m-%d %H:%M', time.localtime(start_time))
|
||||||
|
|
||||||
|
# 获取该段的消息(包含边界)
|
||||||
|
segment_messages = get_raw_msg_by_timestamp_with_chat_inclusive(self.subheartflow_id, start_time, end_time)
|
||||||
|
logger.info(f"消息段 {i+1}: {start_date} - {time.strftime('%Y-%m-%d %H:%M', time.localtime(end_time))}, 消息数: {len(segment_messages)}")
|
||||||
|
|
||||||
|
if segment_messages:
|
||||||
|
# 如果不是第一个消息段,在消息列表前添加间隔标识
|
||||||
|
if i > 0:
|
||||||
|
# 创建一个特殊的间隔消息
|
||||||
|
gap_message = {
|
||||||
|
"time": start_time - 0.1, # 稍微早于段开始时间
|
||||||
|
"user_id": "system",
|
||||||
|
"user_platform": "system",
|
||||||
|
"user_nickname": "系统",
|
||||||
|
"user_cardname": "",
|
||||||
|
"display_message": f"...(中间省略一些消息){start_date} 之后的消息如下...",
|
||||||
|
"is_action_record": True,
|
||||||
|
"chat_info_platform": segment_messages[0].get("chat_info_platform", ""),
|
||||||
|
"chat_id": chat_id
|
||||||
|
}
|
||||||
|
processed_messages.append(gap_message)
|
||||||
|
|
||||||
|
# 添加该段的所有消息
|
||||||
|
processed_messages.extend(segment_messages)
|
||||||
|
|
||||||
|
if processed_messages:
|
||||||
|
# 按时间排序所有消息(包括间隔标识)
|
||||||
|
processed_messages.sort(key=lambda x: x['time'])
|
||||||
|
|
||||||
|
logger.info(f"为 {person_id} 获取到总共 {len(processed_messages)} 条消息(包含间隔标识)用于印象更新")
|
||||||
|
relationship_manager = get_relationship_manager()
|
||||||
|
|
||||||
|
# 调用原有的更新方法
|
||||||
|
await relationship_manager.update_person_impression(
|
||||||
|
person_id=person_id,
|
||||||
|
timestamp=time.time(),
|
||||||
|
bot_engaged_messages=processed_messages
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info(f"没有找到 {person_id} 的消息段对应的消息,不更新印象")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"为 {person_id} 更新印象时发生错误: {e}")
|
||||||
|
logger.error(traceback.format_exc())
|
||||||
|
|
||||||
|
# ================================
|
||||||
|
# 信息调取模块
|
||||||
|
# 负责实时分析对话需求、提取用户信息、管理信息缓存
|
||||||
|
# ================================
|
||||||
|
|
||||||
async def _execute_instant_extraction_batch(self, instant_tasks: list):
|
async def _execute_instant_extraction_batch(self, instant_tasks: list):
|
||||||
"""
|
"""
|
||||||
批量执行即时提取任务
|
批量执行即时提取任务
|
||||||
@@ -526,25 +921,7 @@ class RelationshipProcessor(BaseProcessor):
|
|||||||
logger.error(f"{self.log_prefix} 执行LLM请求获取用户信息时出错: {e}")
|
logger.error(f"{self.log_prefix} 执行LLM请求获取用户信息时出错: {e}")
|
||||||
logger.error(traceback.format_exc())
|
logger.error(traceback.format_exc())
|
||||||
|
|
||||||
async def update_impression_on_cache_expiry(self, person_id: str, chat_id: str, start_time: float, end_time: float):
|
|
||||||
"""
|
|
||||||
在缓存过期时,获取聊天记录并更新用户印象
|
|
||||||
"""
|
|
||||||
logger.info(f"缓存过期,开始为 {person_id} 更新印象。时间范围:{start_time} -> {end_time}")
|
|
||||||
try:
|
|
||||||
impression_messages = get_raw_msg_by_timestamp_with_chat(chat_id, start_time, end_time)
|
|
||||||
if impression_messages:
|
|
||||||
logger.info(f"为 {person_id} 获取到 {len(impression_messages)} 条消息用于印象更新。")
|
|
||||||
relationship_manager = get_relationship_manager()
|
|
||||||
await relationship_manager.update_person_impression(
|
|
||||||
person_id=person_id, timestamp=end_time, bot_engaged_messages=impression_messages
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
logger.info(f"在指定时间范围内没有找到 {person_id} 的消息,不更新印象。")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"为 {person_id} 更新印象时发生错误: {e}")
|
|
||||||
logger.error(traceback.format_exc())
|
|
||||||
|
|
||||||
|
|
||||||
init_prompt()
|
init_prompt()
|
||||||
|
|||||||
@@ -851,7 +851,7 @@ class NormalChat:
|
|||||||
reply_ratio = reply_count / total_messages if total_messages > 0 else 0
|
reply_ratio = reply_count / total_messages if total_messages > 0 else 0
|
||||||
# 使用对数函数让低比率时概率上升更快:log(1 + ratio * k) / log(1 + k) + base
|
# 使用对数函数让低比率时概率上升更快:log(1 + ratio * k) / log(1 + k) + base
|
||||||
# k=7时,0.05比率对应约0.4概率,0.1比率对应约0.6概率,0.2比率对应约0.8概率
|
# k=7时,0.05比率对应约0.4概率,0.1比率对应约0.6概率,0.2比率对应约0.8概率
|
||||||
k_reply = 7 * global_config.relationship.relation_frequency
|
k_reply = 10 * global_config.relationship.relation_frequency
|
||||||
base_reply_prob = 0.1 # 基础概率10%
|
base_reply_prob = 0.1 # 基础概率10%
|
||||||
reply_build_probability = (
|
reply_build_probability = (
|
||||||
(math.log(1 + reply_ratio * k_reply) / math.log(1 + k_reply)) * 0.9 + base_reply_prob
|
(math.log(1 + reply_ratio * k_reply) / math.log(1 + k_reply)) * 0.9 + base_reply_prob
|
||||||
@@ -862,7 +862,7 @@ class NormalChat:
|
|||||||
# 计算接收概率(receive_count的影响)
|
# 计算接收概率(receive_count的影响)
|
||||||
receive_ratio = receive_count / total_messages if total_messages > 0 else 0
|
receive_ratio = receive_count / total_messages if total_messages > 0 else 0
|
||||||
# 接收概率使用更温和的对数曲线,最大0.5,基础0.08
|
# 接收概率使用更温和的对数曲线,最大0.5,基础0.08
|
||||||
k_receive = 6 * global_config.relationship.relation_frequency
|
k_receive = 10 * global_config.relationship.relation_frequency
|
||||||
base_receive_prob = 0.08 # 基础概率8%
|
base_receive_prob = 0.08 # 基础概率8%
|
||||||
receive_build_probability = (
|
receive_build_probability = (
|
||||||
(math.log(1 + receive_ratio * k_receive) / math.log(1 + k_receive)) * 0.42 + base_receive_prob
|
(math.log(1 + receive_ratio * k_receive) / math.log(1 + k_receive)) * 0.42 + base_receive_prob
|
||||||
|
|||||||
@@ -41,6 +41,20 @@ def get_raw_msg_by_timestamp_with_chat(
|
|||||||
return find_messages(message_filter=filter_query, sort=sort_order, limit=limit, limit_mode=limit_mode)
|
return find_messages(message_filter=filter_query, sort=sort_order, limit=limit, limit_mode=limit_mode)
|
||||||
|
|
||||||
|
|
||||||
|
def get_raw_msg_by_timestamp_with_chat_inclusive(
|
||||||
|
chat_id: str, timestamp_start: float, timestamp_end: float, limit: int = 0, limit_mode: str = "latest"
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""获取在特定聊天从指定时间戳到指定时间戳的消息(包含边界),按时间升序排序,返回消息列表
|
||||||
|
limit: 限制返回的消息数量,0为不限制
|
||||||
|
limit_mode: 当 limit > 0 时生效。 'earliest' 表示获取最早的记录, 'latest' 表示获取最新的记录。默认为 'latest'。
|
||||||
|
"""
|
||||||
|
filter_query = {"chat_id": chat_id, "time": {"$gte": timestamp_start, "$lte": timestamp_end}}
|
||||||
|
# 只有当 limit 为 0 时才应用外部 sort
|
||||||
|
sort_order = [("time", 1)] if limit == 0 else None
|
||||||
|
# 直接将 limit_mode 传递给 find_messages
|
||||||
|
return find_messages(message_filter=filter_query, sort=sort_order, limit=limit, limit_mode=limit_mode)
|
||||||
|
|
||||||
|
|
||||||
def get_raw_msg_by_timestamp_with_chat_users(
|
def get_raw_msg_by_timestamp_with_chat_users(
|
||||||
chat_id: str,
|
chat_id: str,
|
||||||
timestamp_start: float,
|
timestamp_start: float,
|
||||||
|
|||||||
@@ -208,6 +208,9 @@ class RelationshipManager:
|
|||||||
|
|
||||||
readable_messages = self.build_focus_readable_messages(messages=user_messages, target_person_id=person_id)
|
readable_messages = self.build_focus_readable_messages(messages=user_messages, target_person_id=person_id)
|
||||||
|
|
||||||
|
if not readable_messages:
|
||||||
|
return
|
||||||
|
|
||||||
for original_name, mapped_name in name_mapping.items():
|
for original_name, mapped_name in name_mapping.items():
|
||||||
# print(f"original_name: {original_name}, mapped_name: {mapped_name}")
|
# print(f"original_name: {original_name}, mapped_name: {mapped_name}")
|
||||||
readable_messages = readable_messages.replace(f"{original_name}", f"{mapped_name}")
|
readable_messages = readable_messages.replace(f"{original_name}", f"{mapped_name}")
|
||||||
@@ -470,60 +473,14 @@ class RelationshipManager:
|
|||||||
logger.info(f"印象更新完成 for {person_name}")
|
logger.info(f"印象更新完成 for {person_name}")
|
||||||
|
|
||||||
def build_focus_readable_messages(self, messages: list, target_person_id: str = None) -> str:
|
def build_focus_readable_messages(self, messages: list, target_person_id: str = None) -> str:
|
||||||
"""格式化消息,只保留目标用户和bot消息附近的内容"""
|
"""格式化消息,处理所有消息内容"""
|
||||||
# 找到目标用户和bot的消息索引
|
if not messages:
|
||||||
target_indices = []
|
|
||||||
for i, msg in enumerate(messages):
|
|
||||||
user_id = msg.get("user_id")
|
|
||||||
platform = msg.get("chat_info_platform")
|
|
||||||
person_id = PersonInfoManager.get_person_id(platform, user_id)
|
|
||||||
if person_id == target_person_id:
|
|
||||||
target_indices.append(i)
|
|
||||||
|
|
||||||
if not target_indices:
|
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
# 获取需要保留的消息索引
|
# 直接处理所有消息,不进行过滤
|
||||||
keep_indices = set()
|
return build_readable_messages(
|
||||||
for idx in target_indices:
|
messages=messages, replace_bot_name=True, timestamp_mode="normal_no_YMD", truncate=False
|
||||||
# 获取前后5条消息的索引
|
)
|
||||||
start_idx = max(0, idx - 5)
|
|
||||||
end_idx = min(len(messages), idx + 6)
|
|
||||||
keep_indices.update(range(start_idx, end_idx))
|
|
||||||
|
|
||||||
print(keep_indices)
|
|
||||||
|
|
||||||
# 将索引排序
|
|
||||||
keep_indices = sorted(list(keep_indices))
|
|
||||||
|
|
||||||
# 按顺序构建消息组
|
|
||||||
message_groups = []
|
|
||||||
current_group = []
|
|
||||||
|
|
||||||
for i in range(len(messages)):
|
|
||||||
if i in keep_indices:
|
|
||||||
current_group.append(messages[i])
|
|
||||||
elif current_group:
|
|
||||||
# 如果当前组不为空,且遇到不保留的消息,则结束当前组
|
|
||||||
if current_group:
|
|
||||||
message_groups.append(current_group)
|
|
||||||
current_group = []
|
|
||||||
|
|
||||||
# 添加最后一组
|
|
||||||
if current_group:
|
|
||||||
message_groups.append(current_group)
|
|
||||||
|
|
||||||
# 构建最终的消息文本
|
|
||||||
result = []
|
|
||||||
for i, group in enumerate(message_groups):
|
|
||||||
if i > 0:
|
|
||||||
result.append("...")
|
|
||||||
group_text = build_readable_messages(
|
|
||||||
messages=group, replace_bot_name=True, timestamp_mode="normal_no_YMD", truncate=False
|
|
||||||
)
|
|
||||||
result.append(group_text)
|
|
||||||
|
|
||||||
return "\n".join(result)
|
|
||||||
|
|
||||||
def calculate_time_weight(self, point_time: str, current_time: str) -> float:
|
def calculate_time_weight(self, point_time: str, current_time: str) -> float:
|
||||||
"""计算基于时间的权重系数"""
|
"""计算基于时间的权重系数"""
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ description = "群聊禁言管理插件,提供智能禁言功能"
|
|||||||
|
|
||||||
# 组件启用控制
|
# 组件启用控制
|
||||||
[components]
|
[components]
|
||||||
enable_smart_mute = true # 启用智能禁言Action
|
enable_smart_mute = true # 启用智能禁言Action
|
||||||
enable_mute_command = false # 启用禁言命令Command
|
enable_mute_command = false # 启用禁言命令Command
|
||||||
|
|
||||||
# 禁言配置
|
# 禁言配置
|
||||||
|
|||||||
Reference in New Issue
Block a user