From ddf0d08fac3c2dc920f1a3d2f6bdbb14592a5568 Mon Sep 17 00:00:00 2001
From: Windpicker-owo <3431391539@qq.com>
Date: Wed, 17 Sep 2025 20:50:03 +0800
Subject: [PATCH] =?UTF-8?q?feat(affinity-flow):=20=E4=BC=98=E5=8C=96?=
 =?UTF-8?q?=E5=85=B4=E8=B6=A3=E5=BA=A6=E8=AF=84=E5=88=86=E5=92=8C=E5=9B=9E?=
 =?UTF-8?q?=E5=A4=8D=E5=86=B3=E7=AD=96=E9=80=BB=E8=BE=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 降低回复阈值从0.6到0.55以增加回复可能性
- 在最终分数计算中加入标签数量奖励机制，每多匹配一个标签加0.05分，最高加0.3分
- 引入分级相似度匹配系统（高/中/低）并应用不同加成系数
- 增加关键词直接匹配奖励机制，支持完全匹配、包含匹配和部分匹配
- 在计划过滤器中处理回复动作不可用时的自动转换逻辑
- 增加兴趣度阈值80%检查，低于该阈值直接返回no_action
- 优化日志输出和统计信息，提供更详细的匹配分析
---
 src/chat/affinity_flow/interest_scoring.py    |  15 ++-
 .../interest_system/bot_interest_manager.py   | 127 ++++++++++++++++--
 src/chat/planner_actions/plan_filter.py       |  12 +-
 src/chat/planner_actions/planner.py           |  36 ++++-
 src/chat/planner_actions/planner_prompts.py   |   1 -
 5 files changed, 167 insertions(+), 24 deletions(-)

diff --git a/src/chat/affinity_flow/interest_scoring.py b/src/chat/affinity_flow/interest_scoring.py
index bc404c70b..fd05bb22a 100644
--- a/src/chat/affinity_flow/interest_scoring.py
+++ b/src/chat/affinity_flow/interest_scoring.py
@@ -30,7 +30,7 @@ class InterestScoringSystem:
         }
 
         # 评分阈值
-        self.reply_threshold = 0.6    # 默认回复阈值
+        self.reply_threshold = 0.55    # 默认回复阈值
         self.mention_threshold = 0.3   # 提及阈值
 
         # 连续不回复概率提升
@@ -147,9 +147,10 @@ class InterestScoringSystem:
                 logger.debug(f"   📈 置信度: {match_result.confidence:.3f}")
                 logger.debug(f"   🔢 匹配详情: {match_result.match_scores}")
 
-                # 返回匹配分数，考虑置信度
-                final_score = match_result.overall_score * 1.15 * match_result.confidence
-                logger.debug(f"⚖️  最终分数(总分×置信度): {final_score:.3f}")
+                # 返回匹配分数，考虑置信度和匹配标签数量
+                match_count_bonus = min(len(match_result.matched_tags) * 0.05, 0.3)  # 每多匹配一个标签+0.05，最高+0.3
+                final_score = match_result.overall_score * 1.3 * match_result.confidence + match_count_bonus
+                logger.debug(f"⚖️  最终分数计算: 总分({match_result.overall_score:.3f}) × 1.3 × 置信度({match_result.confidence:.3f}) + 标签数量奖励({match_count_bonus:.3f}) = {final_score:.3f}")
                 return final_score
             else:
                 logger.warning("⚠️ 智能兴趣匹配未返回结果")
@@ -265,7 +266,7 @@ class InterestScoringSystem:
         logger.info(f"🎯 回复决策: {decision}")
         logger.info(f"📊 决策依据: {score.total_score:.3f} {'>=' if should_reply else '<'} {effective_threshold:.3f}")
 
-        return should_reply
+        return should_reply, score.total_score
 
     def record_reply_action(self, did_reply: bool):
         """记录回复动作"""
@@ -273,10 +274,10 @@ class InterestScoringSystem:
 
         if did_reply:
             self.no_reply_count = max(0, self.no_reply_count - 1)
-            action = "✅ 回复了消息"
+            action = "✅ reply动作可用"
         else:
             self.no_reply_count += 1
-            action = "❌ 选择不回复"
+            action = "❌ reply动作不可用"
 
         # 限制最大计数
         self.no_reply_count = min(self.no_reply_count, self.max_no_reply_count)
diff --git a/src/chat/interest_system/bot_interest_manager.py b/src/chat/interest_system/bot_interest_manager.py
index 2c98c8bd2..4b2ea8a70 100644
--- a/src/chat/interest_system/bot_interest_manager.py
+++ b/src/chat/interest_system/bot_interest_manager.py
@@ -429,26 +429,64 @@ class BotInterestManager:
         # 计算与每个兴趣标签的相似度
         match_count = 0
         high_similarity_count = 0
-        similarity_threshold = 0.3
+        medium_similarity_count = 0
+        low_similarity_count = 0
 
-        logger.debug(f"🔍 使用相似度阈值: {similarity_threshold}")
+        # 分级相似度阈值
+        high_threshold = 0.5
+        medium_threshold = 0.3
+        low_threshold = 0.15
+
+        logger.debug(f"🔍 使用分级相似度阈值: 高={high_threshold}, 中={medium_threshold}, 低={low_threshold}")
 
         for tag in active_tags:
             if tag.embedding:
                 similarity = self._calculate_cosine_similarity(message_embedding, tag.embedding)
+
+                # 基础加权分数
                 weighted_score = similarity * tag.weight
 
-                if similarity > similarity_threshold:
+                # 根据相似度等级应用不同的加成
+                if similarity > high_threshold:
+                    # 高相似度：强加成
+                    enhanced_score = weighted_score * 1.5
                     match_count += 1
-                    result.add_match(tag.tag_name, weighted_score, [tag.tag_name])
+                    high_similarity_count += 1
+                    result.add_match(tag.tag_name, enhanced_score, [tag.tag_name])
+                    logger.debug(f"   🏷️  '{tag.tag_name}': 相似度={similarity:.3f}, 权重={tag.weight:.2f}, 基础分数={weighted_score:.3f}, 增强分数={enhanced_score:.3f} [高匹配]")
 
-                    if similarity > 0.7:
-                        high_similarity_count += 1
+                elif similarity > medium_threshold:
+                    # 中相似度：中等加成
+                    enhanced_score = weighted_score * 1.2
+                    match_count += 1
+                    medium_similarity_count += 1
+                    result.add_match(tag.tag_name, enhanced_score, [tag.tag_name])
+                    logger.debug(f"   🏷️  '{tag.tag_name}': 相似度={similarity:.3f}, 权重={tag.weight:.2f}, 基础分数={weighted_score:.3f}, 增强分数={enhanced_score:.3f} [中匹配]")
 
-                    logger.debug(f"   🏷️  '{tag.tag_name}': 相似度={similarity:.3f}, 权重={tag.weight:.2f}, 加权分数={weighted_score:.3f}")
+                elif similarity > low_threshold:
+                    # 低相似度：轻微加成
+                    enhanced_score = weighted_score * 1.05
+                    match_count += 1
+                    low_similarity_count += 1
+                    result.add_match(tag.tag_name, enhanced_score, [tag.tag_name])
+                    logger.debug(f"   🏷️  '{tag.tag_name}': 相似度={similarity:.3f}, 权重={tag.weight:.2f}, 基础分数={weighted_score:.3f}, 增强分数={enhanced_score:.3f} [低匹配]")
 
         logger.info(f"📈 匹配统计: {match_count}/{len(active_tags)} 个标签超过阈值")
-        logger.info(f"🔥 高相似度匹配(>0.7): {high_similarity_count} 个")
+        logger.info(f"🔥 高相似度匹配(>{high_threshold}): {high_similarity_count} 个")
+        logger.info(f"⚡ 中相似度匹配(>{medium_threshold}): {medium_similarity_count} 个")
+        logger.info(f"🌊 低相似度匹配(>{low_threshold}): {low_similarity_count} 个")
+
+        # 添加直接关键词匹配奖励
+        keyword_bonus = self._calculate_keyword_match_bonus(keywords, result.matched_tags)
+        logger.debug(f"🎯 关键词直接匹配奖励: {keyword_bonus}")
+
+        # 应用关键词奖励到匹配分数
+        for tag_name in result.matched_tags:
+            if tag_name in keyword_bonus:
+                original_score = result.match_scores[tag_name]
+                bonus = keyword_bonus[tag_name]
+                result.match_scores[tag_name] = original_score + bonus
+                logger.debug(f"   🏷️  '{tag_name}': 原始分数={original_score:.3f}, 奖励={bonus:.3f}, 最终分数={result.match_scores[tag_name]:.3f}")
 
         # 计算总体分数
         result.calculate_overall_score()
@@ -463,6 +501,79 @@ class BotInterestManager:
         return result
 
     
+    def _calculate_keyword_match_bonus(self, keywords: List[str], matched_tags: List[str]) -> Dict[str, float]:
+        """计算关键词直接匹配奖励"""
+        if not keywords or not matched_tags:
+            return {}
+
+        bonus_dict = {}
+
+        for tag_name in matched_tags:
+            bonus = 0.0
+
+            # 检查关键词与标签的直接匹配
+            for keyword in keywords:
+                keyword_lower = keyword.lower().strip()
+                tag_name_lower = tag_name.lower()
+
+                # 完全匹配
+                if keyword_lower == tag_name_lower:
+                    bonus += 0.3
+                    logger.debug(f"   🎯 关键词完全匹配: '{keyword}' == '{tag_name}' (+0.3)")
+
+                # 包含匹配
+                elif keyword_lower in tag_name_lower or tag_name_lower in keyword_lower:
+                    bonus += 0.15
+                    logger.debug(f"   🎯 关键词包含匹配: '{keyword}' ⊃ '{tag_name}' (+0.15)")
+
+                # 部分匹配（编辑距离）
+                elif self._calculate_partial_match(keyword_lower, tag_name_lower):
+                    bonus += 0.08
+                    logger.debug(f"   🎯 关键词部分匹配: '{keyword}' ≈ '{tag_name}' (+0.08)")
+
+            if bonus > 0:
+                bonus_dict[tag_name] = min(bonus, 0.5)  # 最大奖励限制为0.5
+
+        return bonus_dict
+
+    def _calculate_partial_match(self, text1: str, text2: str) -> bool:
+        """计算部分匹配（基于编辑距离）"""
+        try:
+            # 简单的编辑距离计算
+            max_len = max(len(text1), len(text2))
+            if max_len == 0:
+                return False
+
+            # 计算编辑距离
+            distance = self._levenshtein_distance(text1, text2)
+
+            # 如果编辑距离小于较短字符串长度的一半，认为是部分匹配
+            min_len = min(len(text1), len(text2))
+            return distance <= min_len // 2
+
+        except Exception:
+            return False
+
+    def _levenshtein_distance(self, s1: str, s2: str) -> int:
+        """计算莱文斯坦距离"""
+        if len(s1) < len(s2):
+            return self._levenshtein_distance(s2, s1)
+
+        if len(s2) == 0:
+            return len(s1)
+
+        previous_row = range(len(s2) + 1)
+        for i, c1 in enumerate(s1):
+            current_row = [i + 1]
+            for j, c2 in enumerate(s2):
+                insertions = previous_row[j + 1] + 1
+                deletions = current_row[j] + 1
+                substitutions = previous_row[j] + (c1 != c2)
+                current_row.append(min(insertions, deletions, substitutions))
+            previous_row = current_row
+
+        return previous_row[-1]
+
     def _calculate_cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float:
         """计算余弦相似度"""
         try:
diff --git a/src/chat/planner_actions/plan_filter.py b/src/chat/planner_actions/plan_filter.py
index de3a47ec6..ee1f8e843 100644
--- a/src/chat/planner_actions/plan_filter.py
+++ b/src/chat/planner_actions/plan_filter.py
@@ -38,7 +38,7 @@ class PlanFilter:
         )
         self.last_obs_time_mark = 0.0
 
-    async def filter(self, plan: Plan) -> Plan:
+    async def filter(self, reply_not_available: bool, plan: Plan) -> Plan:
         """
         执行筛选逻辑，并填充 Plan 对象的 decided_actions 字段。
         """
@@ -58,6 +58,16 @@ class PlanFilter:
                     prased_json = {"action": "no_action", "reason": "返回内容无法解析为JSON"}
                 logger.debug(f"墨墨在这里加了日志 -> 解析后的 JSON: {parsed_json}")
                 
+                if "reply" in plan.available_actions and reply_not_available:
+                    # 如果reply动作不可用，但llm返回的仍然有reply，则改为no_reply
+                    if isinstance(parsed_json, dict) and parsed_json.get("action") == "reply":
+                        parsed_json["action"] = "no_reply"
+                    elif isinstance(parsed_json, list):
+                        for item in parsed_json:
+                            if isinstance(item, dict) and item.get("action") == "reply":
+                                item["action"] = "no_reply"
+                                item["reason"] += " (但由于兴趣度不足，reply动作不可用，已改为no_reply)"
+
                 if isinstance(parsed_json, dict):
                     parsed_json = [parsed_json]
 
diff --git a/src/chat/planner_actions/planner.py b/src/chat/planner_actions/planner.py
index 85269a756..1ae92b8c2 100644
--- a/src/chat/planner_actions/planner.py
+++ b/src/chat/planner_actions/planner.py
@@ -104,16 +104,38 @@ class ActionPlanner:
                 # 3. 根据兴趣度调整可用动作
                 if interest_scores:
                     latest_score = max(interest_scores, key=lambda s: s.total_score)
-                    should_reply = self.interest_scoring.should_reply(latest_score)
+                    should_reply, score = self.interest_scoring.should_reply(latest_score)
 
+                    reply_not_available = False
                     if not should_reply and "reply" in initial_plan.available_actions:
                         logger.info(f"消息兴趣度不足({latest_score.total_score:.2f})，移除reply动作")
-                        del initial_plan.available_actions["reply"]
-                        self.interest_scoring.record_reply_action(False)
-                    else:
-                        self.interest_scoring.record_reply_action(True)
-            # 4. 筛选 Plan
-            filtered_plan = await self.filter.filter(initial_plan)
+                        reply_not_available = True
+
+            base_threshold = self.interest_scoring.reply_threshold
+            # 检查兴趣度是否达到阈值的0.8
+            threshold_requirement = base_threshold * 0.8
+            if score < threshold_requirement:
+                logger.info(f"❌ 兴趣度不足阈值的80%: {score:.3f} < {threshold_requirement:.3f}，直接返回no_action")
+                logger.info(f"📊 最低要求: 阈值({base_threshold:.3f}) × 0.8 = {threshold_requirement:.3f}")
+                # 直接返回 no_action
+                no_action = {
+                    "action_type": "no_action",
+                    "reason": f"兴趣度评分 {score:.3f} 未达阈值80% {threshold_requirement:.3f}",
+                    "action_data": {},
+                    "action_message": None,
+                }
+                filtered_plan = initial_plan
+                filtered_plan.decided_actions = [no_action]
+            else:
+                # 4. 筛选 Plan
+                filtered_plan = await self.filter.filter(reply_not_available,initial_plan)
+
+            # 检查filtered_plan是否有reply动作，以便记录reply action
+            has_reply_action = False
+            for decision in filtered_plan.decided_actions:
+                if decision.action_type == "reply":
+                    has_reply_action = True
+            self.interest_scoring.record_reply_action(has_reply_action)
 
             # 5. 使用 PlanExecutor 执行 Plan
             execution_result = await self.executor.execute(filtered_plan)
diff --git a/src/chat/planner_actions/planner_prompts.py b/src/chat/planner_actions/planner_prompts.py
index ac674fcce..29ef4b916 100644
--- a/src/chat/planner_actions/planner_prompts.py
+++ b/src/chat/planner_actions/planner_prompts.py
@@ -41,7 +41,6 @@ def init_prompts():
 4.  如果用户明确要求了某个动作，请务必优先满足。
 
 **如果可选动作中没有reply，请不要使用**
-**反之如果可选动作中有reply，应尽量考虑使用，不过也要考虑当前情景**
 
 **可用动作:**
 {actions_before_now_block}