From ddf0d08fac3c2dc920f1a3d2f6bdbb14592a5568 Mon Sep 17 00:00:00 2001 From: Windpicker-owo <3431391539@qq.com> Date: Wed, 17 Sep 2025 20:50:03 +0800 Subject: [PATCH] =?UTF-8?q?feat(affinity-flow):=20=E4=BC=98=E5=8C=96?= =?UTF-8?q?=E5=85=B4=E8=B6=A3=E5=BA=A6=E8=AF=84=E5=88=86=E5=92=8C=E5=9B=9E?= =?UTF-8?q?=E5=A4=8D=E5=86=B3=E7=AD=96=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 降低回复阈值从0.6到0.55以增加回复可能性 - 在最终分数计算中加入标签数量奖励机制,每多匹配一个标签加0.05分,最高加0.3分 - 引入分级相似度匹配系统(高/中/低)并应用不同加成系数 - 增加关键词直接匹配奖励机制,支持完全匹配、包含匹配和部分匹配 - 在计划过滤器中处理回复动作不可用时的自动转换逻辑 - 增加兴趣度阈值80%检查,低于该阈值直接返回no_action - 优化日志输出和统计信息,提供更详细的匹配分析 --- src/chat/affinity_flow/interest_scoring.py | 15 ++- .../interest_system/bot_interest_manager.py | 127 ++++++++++++++++-- src/chat/planner_actions/plan_filter.py | 12 +- src/chat/planner_actions/planner.py | 36 ++++- src/chat/planner_actions/planner_prompts.py | 1 - 5 files changed, 167 insertions(+), 24 deletions(-) diff --git a/src/chat/affinity_flow/interest_scoring.py b/src/chat/affinity_flow/interest_scoring.py index bc404c70b..fd05bb22a 100644 --- a/src/chat/affinity_flow/interest_scoring.py +++ b/src/chat/affinity_flow/interest_scoring.py @@ -30,7 +30,7 @@ class InterestScoringSystem: } # 评分阈值 - self.reply_threshold = 0.6 # 默认回复阈值 + self.reply_threshold = 0.55 # 默认回复阈值 self.mention_threshold = 0.3 # 提及阈值 # 连续不回复概率提升 @@ -147,9 +147,10 @@ class InterestScoringSystem: logger.debug(f" 📈 置信度: {match_result.confidence:.3f}") logger.debug(f" 🔢 匹配详情: {match_result.match_scores}") - # 返回匹配分数,考虑置信度 - final_score = match_result.overall_score * 1.15 * match_result.confidence - logger.debug(f"⚖️ 最终分数(总分×置信度): {final_score:.3f}") + # 返回匹配分数,考虑置信度和匹配标签数量 + match_count_bonus = min(len(match_result.matched_tags) * 0.05, 0.3) # 每多匹配一个标签+0.05,最高+0.3 + final_score = match_result.overall_score * 1.3 * match_result.confidence + match_count_bonus + logger.debug(f"⚖️ 最终分数计算: 总分({match_result.overall_score:.3f}) × 1.3 × 置信度({match_result.confidence:.3f}) + 标签数量奖励({match_count_bonus:.3f}) = {final_score:.3f}") return final_score else: logger.warning("⚠️ 智能兴趣匹配未返回结果") @@ -265,7 +266,7 @@ class InterestScoringSystem: logger.info(f"🎯 回复决策: {decision}") logger.info(f"📊 决策依据: {score.total_score:.3f} {'>=' if should_reply else '<'} {effective_threshold:.3f}") - return should_reply + return should_reply, score.total_score def record_reply_action(self, did_reply: bool): """记录回复动作""" @@ -273,10 +274,10 @@ class InterestScoringSystem: if did_reply: self.no_reply_count = max(0, self.no_reply_count - 1) - action = "✅ 回复了消息" + action = "✅ reply动作可用" else: self.no_reply_count += 1 - action = "❌ 选择不回复" + action = "❌ reply动作不可用" # 限制最大计数 self.no_reply_count = min(self.no_reply_count, self.max_no_reply_count) diff --git a/src/chat/interest_system/bot_interest_manager.py b/src/chat/interest_system/bot_interest_manager.py index 2c98c8bd2..4b2ea8a70 100644 --- a/src/chat/interest_system/bot_interest_manager.py +++ b/src/chat/interest_system/bot_interest_manager.py @@ -429,26 +429,64 @@ class BotInterestManager: # 计算与每个兴趣标签的相似度 match_count = 0 high_similarity_count = 0 - similarity_threshold = 0.3 + medium_similarity_count = 0 + low_similarity_count = 0 - logger.debug(f"🔍 使用相似度阈值: {similarity_threshold}") + # 分级相似度阈值 + high_threshold = 0.5 + medium_threshold = 0.3 + low_threshold = 0.15 + + logger.debug(f"🔍 使用分级相似度阈值: 高={high_threshold}, 中={medium_threshold}, 低={low_threshold}") for tag in active_tags: if tag.embedding: similarity = self._calculate_cosine_similarity(message_embedding, tag.embedding) + + # 基础加权分数 weighted_score = similarity * tag.weight - if similarity > similarity_threshold: + # 根据相似度等级应用不同的加成 + if similarity > high_threshold: + # 高相似度:强加成 + enhanced_score = weighted_score * 1.5 match_count += 1 - result.add_match(tag.tag_name, weighted_score, [tag.tag_name]) + high_similarity_count += 1 + result.add_match(tag.tag_name, enhanced_score, [tag.tag_name]) + logger.debug(f" 🏷️ '{tag.tag_name}': 相似度={similarity:.3f}, 权重={tag.weight:.2f}, 基础分数={weighted_score:.3f}, 增强分数={enhanced_score:.3f} [高匹配]") - if similarity > 0.7: - high_similarity_count += 1 + elif similarity > medium_threshold: + # 中相似度:中等加成 + enhanced_score = weighted_score * 1.2 + match_count += 1 + medium_similarity_count += 1 + result.add_match(tag.tag_name, enhanced_score, [tag.tag_name]) + logger.debug(f" 🏷️ '{tag.tag_name}': 相似度={similarity:.3f}, 权重={tag.weight:.2f}, 基础分数={weighted_score:.3f}, 增强分数={enhanced_score:.3f} [中匹配]") - logger.debug(f" 🏷️ '{tag.tag_name}': 相似度={similarity:.3f}, 权重={tag.weight:.2f}, 加权分数={weighted_score:.3f}") + elif similarity > low_threshold: + # 低相似度:轻微加成 + enhanced_score = weighted_score * 1.05 + match_count += 1 + low_similarity_count += 1 + result.add_match(tag.tag_name, enhanced_score, [tag.tag_name]) + logger.debug(f" 🏷️ '{tag.tag_name}': 相似度={similarity:.3f}, 权重={tag.weight:.2f}, 基础分数={weighted_score:.3f}, 增强分数={enhanced_score:.3f} [低匹配]") logger.info(f"📈 匹配统计: {match_count}/{len(active_tags)} 个标签超过阈值") - logger.info(f"🔥 高相似度匹配(>0.7): {high_similarity_count} 个") + logger.info(f"🔥 高相似度匹配(>{high_threshold}): {high_similarity_count} 个") + logger.info(f"⚡ 中相似度匹配(>{medium_threshold}): {medium_similarity_count} 个") + logger.info(f"🌊 低相似度匹配(>{low_threshold}): {low_similarity_count} 个") + + # 添加直接关键词匹配奖励 + keyword_bonus = self._calculate_keyword_match_bonus(keywords, result.matched_tags) + logger.debug(f"🎯 关键词直接匹配奖励: {keyword_bonus}") + + # 应用关键词奖励到匹配分数 + for tag_name in result.matched_tags: + if tag_name in keyword_bonus: + original_score = result.match_scores[tag_name] + bonus = keyword_bonus[tag_name] + result.match_scores[tag_name] = original_score + bonus + logger.debug(f" 🏷️ '{tag_name}': 原始分数={original_score:.3f}, 奖励={bonus:.3f}, 最终分数={result.match_scores[tag_name]:.3f}") # 计算总体分数 result.calculate_overall_score() @@ -463,6 +501,79 @@ class BotInterestManager: return result + def _calculate_keyword_match_bonus(self, keywords: List[str], matched_tags: List[str]) -> Dict[str, float]: + """计算关键词直接匹配奖励""" + if not keywords or not matched_tags: + return {} + + bonus_dict = {} + + for tag_name in matched_tags: + bonus = 0.0 + + # 检查关键词与标签的直接匹配 + for keyword in keywords: + keyword_lower = keyword.lower().strip() + tag_name_lower = tag_name.lower() + + # 完全匹配 + if keyword_lower == tag_name_lower: + bonus += 0.3 + logger.debug(f" 🎯 关键词完全匹配: '{keyword}' == '{tag_name}' (+0.3)") + + # 包含匹配 + elif keyword_lower in tag_name_lower or tag_name_lower in keyword_lower: + bonus += 0.15 + logger.debug(f" 🎯 关键词包含匹配: '{keyword}' ⊃ '{tag_name}' (+0.15)") + + # 部分匹配(编辑距离) + elif self._calculate_partial_match(keyword_lower, tag_name_lower): + bonus += 0.08 + logger.debug(f" 🎯 关键词部分匹配: '{keyword}' ≈ '{tag_name}' (+0.08)") + + if bonus > 0: + bonus_dict[tag_name] = min(bonus, 0.5) # 最大奖励限制为0.5 + + return bonus_dict + + def _calculate_partial_match(self, text1: str, text2: str) -> bool: + """计算部分匹配(基于编辑距离)""" + try: + # 简单的编辑距离计算 + max_len = max(len(text1), len(text2)) + if max_len == 0: + return False + + # 计算编辑距离 + distance = self._levenshtein_distance(text1, text2) + + # 如果编辑距离小于较短字符串长度的一半,认为是部分匹配 + min_len = min(len(text1), len(text2)) + return distance <= min_len // 2 + + except Exception: + return False + + def _levenshtein_distance(self, s1: str, s2: str) -> int: + """计算莱文斯坦距离""" + if len(s1) < len(s2): + return self._levenshtein_distance(s2, s1) + + if len(s2) == 0: + return len(s1) + + previous_row = range(len(s2) + 1) + for i, c1 in enumerate(s1): + current_row = [i + 1] + for j, c2 in enumerate(s2): + insertions = previous_row[j + 1] + 1 + deletions = current_row[j] + 1 + substitutions = previous_row[j] + (c1 != c2) + current_row.append(min(insertions, deletions, substitutions)) + previous_row = current_row + + return previous_row[-1] + def _calculate_cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float: """计算余弦相似度""" try: diff --git a/src/chat/planner_actions/plan_filter.py b/src/chat/planner_actions/plan_filter.py index de3a47ec6..ee1f8e843 100644 --- a/src/chat/planner_actions/plan_filter.py +++ b/src/chat/planner_actions/plan_filter.py @@ -38,7 +38,7 @@ class PlanFilter: ) self.last_obs_time_mark = 0.0 - async def filter(self, plan: Plan) -> Plan: + async def filter(self, reply_not_available: bool, plan: Plan) -> Plan: """ 执行筛选逻辑,并填充 Plan 对象的 decided_actions 字段。 """ @@ -58,6 +58,16 @@ class PlanFilter: prased_json = {"action": "no_action", "reason": "返回内容无法解析为JSON"} logger.debug(f"墨墨在这里加了日志 -> 解析后的 JSON: {parsed_json}") + if "reply" in plan.available_actions and reply_not_available: + # 如果reply动作不可用,但llm返回的仍然有reply,则改为no_reply + if isinstance(parsed_json, dict) and parsed_json.get("action") == "reply": + parsed_json["action"] = "no_reply" + elif isinstance(parsed_json, list): + for item in parsed_json: + if isinstance(item, dict) and item.get("action") == "reply": + item["action"] = "no_reply" + item["reason"] += " (但由于兴趣度不足,reply动作不可用,已改为no_reply)" + if isinstance(parsed_json, dict): parsed_json = [parsed_json] diff --git a/src/chat/planner_actions/planner.py b/src/chat/planner_actions/planner.py index 85269a756..1ae92b8c2 100644 --- a/src/chat/planner_actions/planner.py +++ b/src/chat/planner_actions/planner.py @@ -104,16 +104,38 @@ class ActionPlanner: # 3. 根据兴趣度调整可用动作 if interest_scores: latest_score = max(interest_scores, key=lambda s: s.total_score) - should_reply = self.interest_scoring.should_reply(latest_score) + should_reply, score = self.interest_scoring.should_reply(latest_score) + reply_not_available = False if not should_reply and "reply" in initial_plan.available_actions: logger.info(f"消息兴趣度不足({latest_score.total_score:.2f}),移除reply动作") - del initial_plan.available_actions["reply"] - self.interest_scoring.record_reply_action(False) - else: - self.interest_scoring.record_reply_action(True) - # 4. 筛选 Plan - filtered_plan = await self.filter.filter(initial_plan) + reply_not_available = True + + base_threshold = self.interest_scoring.reply_threshold + # 检查兴趣度是否达到阈值的0.8 + threshold_requirement = base_threshold * 0.8 + if score < threshold_requirement: + logger.info(f"❌ 兴趣度不足阈值的80%: {score:.3f} < {threshold_requirement:.3f},直接返回no_action") + logger.info(f"📊 最低要求: 阈值({base_threshold:.3f}) × 0.8 = {threshold_requirement:.3f}") + # 直接返回 no_action + no_action = { + "action_type": "no_action", + "reason": f"兴趣度评分 {score:.3f} 未达阈值80% {threshold_requirement:.3f}", + "action_data": {}, + "action_message": None, + } + filtered_plan = initial_plan + filtered_plan.decided_actions = [no_action] + else: + # 4. 筛选 Plan + filtered_plan = await self.filter.filter(reply_not_available,initial_plan) + + # 检查filtered_plan是否有reply动作,以便记录reply action + has_reply_action = False + for decision in filtered_plan.decided_actions: + if decision.action_type == "reply": + has_reply_action = True + self.interest_scoring.record_reply_action(has_reply_action) # 5. 使用 PlanExecutor 执行 Plan execution_result = await self.executor.execute(filtered_plan) diff --git a/src/chat/planner_actions/planner_prompts.py b/src/chat/planner_actions/planner_prompts.py index ac674fcce..29ef4b916 100644 --- a/src/chat/planner_actions/planner_prompts.py +++ b/src/chat/planner_actions/planner_prompts.py @@ -41,7 +41,6 @@ def init_prompts(): 4. 如果用户明确要求了某个动作,请务必优先满足。 **如果可选动作中没有reply,请不要使用** -**反之如果可选动作中有reply,应尽量考虑使用,不过也要考虑当前情景** **可用动作:** {actions_before_now_block}