refactor(chat): 重构消息管理器以使用集中式上下文管理和能量系统

- 将流上下文管理从MessageManager迁移到专门的ContextManager - 使用统一的能量系统计算focus_energy和分发间隔 - 重构ChatStream的消息数据转换逻辑，支持更完整的数据字段 - 更新数据库模型，移除interest_degree字段，统一使用interest_value - 集成新的兴趣度管理系统替代原有的评分系统 - 添加消息存储的interest_value修复功能
2025-09-27 14:23:48 +08:00
parent 5962b44294
commit 1177b81285
15 changed files with 3520 additions and 506 deletions
--- a/src/chat/interest_system/interest_manager.py
+++ b/src/chat/interest_system/interest_manager.py
@@ -0,0 +1,430 @@
+"""
+重构后的消息兴趣值计算系统
+提供稳定、可靠的消息兴趣度计算和管理功能
+"""
+
+import time
+from typing import Dict, List, Optional, Tuple, Any, Union, TypedDict
+from dataclasses import dataclass, field
+from enum import Enum
+from abc import ABC, abstractmethod
+
+from src.common.logger import get_logger
+
+logger = get_logger("interest_system")
+
+
+class InterestSourceType(Enum):
+    """兴趣度来源类型"""
+    MESSAGE_CONTENT = "message_content"  # 消息内容
+    USER_INTERACTION = "user_interaction"  # 用户交互
+    TOPIC_RELEVANCE = "topic_relevance"  # 话题相关性
+    RELATIONSHIP_SCORE = "relationship_score"  # 关系分数
+    HISTORICAL_PATTERN = "historical_pattern"  # 历史模式
+
+
+@dataclass
+class InterestFactor:
+    """兴趣度因子"""
+    source_type: InterestSourceType
+    value: float
+    weight: float = 1.0
+    decay_rate: float = 0.1  # 衰减率
+    last_updated: float = field(default_factory=time.time)
+
+    def get_current_value(self) -> float:
+        """获取当前值（考虑时间衰减）"""
+        age = time.time() - self.last_updated
+        decay_factor = max(0.1, 1.0 - (age * self.decay_rate / (24 * 3600)))  # 按天衰减
+        return self.value * decay_factor
+
+    def update_value(self, new_value: float) -> None:
+        """更新值"""
+        self.value = max(0.0, min(1.0, new_value))
+        self.last_updated = time.time()
+
+
+class InterestCalculator(ABC):
+    """兴趣度计算器抽象基类"""
+
+    @abstractmethod
+    def calculate(self, context: Dict[str, Any]) -> float:
+        """计算兴趣度"""
+        pass
+
+    @abstractmethod
+    def get_confidence(self) -> float:
+        """获取计算置信度"""
+        pass
+
+
+class MessageData(TypedDict):
+    """消息数据类型定义"""
+    message_id: str
+    processed_plain_text: str
+    is_emoji: bool
+    is_picid: bool
+    is_mentioned: bool
+    is_command: bool
+    key_words: str
+    user_id: str
+    time: float
+
+
+class InterestContext(TypedDict):
+    """兴趣度计算上下文"""
+    stream_id: str
+    user_id: Optional[str]
+    message: MessageData
+
+
+class InterestResult(TypedDict):
+    """兴趣度计算结果"""
+    value: float
+    confidence: float
+    source_scores: Dict[InterestSourceType, float]
+    cached: bool
+
+
+class MessageContentInterestCalculator(InterestCalculator):
+    """消息内容兴趣度计算器"""
+
+    def calculate(self, context: Dict[str, Any]) -> float:
+        """基于消息内容计算兴趣度"""
+        message = context.get("message", {})
+        if not message:
+            return 0.3  # 默认值
+
+        # 提取消息特征
+        text_length = len(message.get("processed_plain_text", ""))
+        has_emoji = message.get("is_emoji", False)
+        has_image = message.get("is_picid", False)
+        is_mentioned = message.get("is_mentioned", False)
+        is_command = message.get("is_command", False)
+
+        # 基础分数
+        base_score = 0.3
+
+        # 文本长度加权
+        if text_length > 0:
+            text_score = min(0.3, text_length / 200)  # 200字符为满分
+            base_score += text_score * 0.3
+
+        # 多媒体内容加权
+        if has_emoji:
+            base_score += 0.1
+        if has_image:
+            base_score += 0.2
+
+        # 交互特征加权
+        if is_mentioned:
+            base_score += 0.2
+        if is_command:
+            base_score += 0.1
+
+        return min(1.0, base_score)
+
+    def get_confidence(self) -> float:
+        return 0.8
+
+
+class TopicInterestCalculator(InterestCalculator):
+    """话题兴趣度计算器"""
+
+    def __init__(self):
+        self.topic_interests: Dict[str, float] = {}
+        self.topic_decay_rate = 0.05  # 话题兴趣度衰减率
+
+    def update_topic_interest(self, topic: str, interest_value: float):
+        """更新话题兴趣度"""
+        current_interest = self.topic_interests.get(topic, 0.3)
+        # 平滑更新
+        new_interest = current_interest * 0.7 + interest_value * 0.3
+        self.topic_interests[topic] = max(0.0, min(1.0, new_interest))
+
+        logger.debug(f"更新话题 '{topic}' 兴趣度: {current_interest:.3f} -> {new_interest:.3f}")
+
+    def calculate(self, context: Dict[str, Any]) -> float:
+        """基于话题相关性计算兴趣度"""
+        message = context.get("message", {})
+        keywords = message.get("key_words", "[]")
+
+        try:
+            import json
+            keyword_list = json.loads(keywords) if keywords else []
+        except (json.JSONDecodeError, TypeError):
+            keyword_list = []
+
+        if not keyword_list:
+            return 0.4  # 无关键词时的默认值
+
+        # 计算相关话题的平均兴趣度
+        total_interest = 0.0
+        relevant_topics = 0
+
+        for keyword in keyword_list[:5]:  # 最多取前5个关键词
+            # 查找相关话题
+            for topic, interest in self.topic_interests.items():
+                if keyword.lower() in topic.lower() or topic.lower() in keyword.lower():
+                    total_interest += interest
+                    relevant_topics += 1
+                    break
+
+        if relevant_topics > 0:
+            return min(1.0, total_interest / relevant_topics)
+        else:
+            # 新话题，给予基础兴趣度
+            for keyword in keyword_list[:3]:
+                self.topic_interests[keyword] = 0.5
+            return 0.5
+
+    def get_confidence(self) -> float:
+        return 0.7
+
+
+class UserInteractionInterestCalculator(InterestCalculator):
+    """用户交互兴趣度计算器"""
+
+    def __init__(self):
+        self.interaction_history: List[Dict] = []
+        self.max_history_size = 100
+
+    def add_interaction(self, user_id: str, interaction_type: str, value: float):
+        """添加交互记录"""
+        self.interaction_history.append({
+            "user_id": user_id,
+            "type": interaction_type,
+            "value": value,
+            "timestamp": time.time()
+        })
+
+        # 保持历史记录大小
+        if len(self.interaction_history) > self.max_history_size:
+            self.interaction_history = self.interaction_history[-self.max_history_size:]
+
+    def calculate(self, context: Dict[str, Any]) -> float:
+        """基于用户交互历史计算兴趣度"""
+        user_id = context.get("user_id")
+        if not user_id:
+            return 0.3
+
+        # 获取该用户的最近交互记录
+        user_interactions = [
+            interaction for interaction in self.interaction_history
+            if interaction["user_id"] == user_id
+        ]
+
+        if not user_interactions:
+            return 0.3
+
+        # 计算加权平均（最近的交互权重更高）
+        total_weight = 0.0
+        weighted_sum = 0.0
+
+        for interaction in user_interactions[-20:]:  # 最近20次交互
+            age = time.time() - interaction["timestamp"]
+            weight = max(0.1, 1.0 - age / (7 * 24 * 3600))  # 7天内衰减
+
+            weighted_sum += interaction["value"] * weight
+            total_weight += weight
+
+        if total_weight > 0:
+            return min(1.0, weighted_sum / total_weight)
+        else:
+            return 0.3
+
+    def get_confidence(self) -> float:
+        return 0.6
+
+
+class InterestManager:
+    """兴趣度管理器 - 统一管理所有兴趣度计算"""
+
+    def __init__(self) -> None:
+        self.calculators: Dict[InterestSourceType, InterestCalculator] = {
+            InterestSourceType.MESSAGE_CONTENT: MessageContentInterestCalculator(),
+            InterestSourceType.TOPIC_RELEVANCE: TopicInterestCalculator(),
+            InterestSourceType.USER_INTERACTION: UserInteractionInterestCalculator(),
+        }
+
+        # 权重配置
+        self.source_weights: Dict[InterestSourceType, float] = {
+            InterestSourceType.MESSAGE_CONTENT: 0.4,
+            InterestSourceType.TOPIC_RELEVANCE: 0.3,
+            InterestSourceType.USER_INTERACTION: 0.3,
+        }
+
+        # 兴趣度缓存
+        self.interest_cache: Dict[str, Tuple[float, float]] = {}  # message_id -> (value, timestamp)
+        self.cache_ttl: int = 300  # 5分钟缓存
+
+        # 统计信息
+        self.stats: Dict[str, Union[int, float, List[str]]] = {
+            "total_calculations": 0,
+            "cache_hits": 0,
+            "cache_misses": 0,
+            "average_calculation_time": 0.0,
+            "calculator_usage": {calc_type.value: 0 for calc_type in InterestSourceType}
+        }
+
+        logger.info("兴趣度管理器初始化完成")
+
+    def calculate_message_interest(self, message: Dict[str, Any], context: Dict[str, Any]) -> float:
+        """计算消息兴趣度"""
+        start_time = time.time()
+        message_id = message.get("message_id", "")
+
+        # 更新统计
+        self.stats["total_calculations"] += 1
+
+        # 检查缓存
+        if message_id in self.interest_cache:
+            cached_value, cached_time = self.interest_cache[message_id]
+            if time.time() - cached_time < self.cache_ttl:
+                self.stats["cache_hits"] += 1
+                logger.debug(f"使用缓存兴趣度: {message_id} = {cached_value:.3f}")
+                return cached_value
+        else:
+            self.stats["cache_misses"] += 1
+
+        # 构建计算上下文
+        calc_context: Dict[str, Any] = {
+            "message": message,
+            "user_id": message.get("user_id"),
+            **context
+        }
+
+        # 计算各来源的兴趣度
+        source_scores: Dict[InterestSourceType, float] = {}
+        total_confidence = 0.0
+
+        for source_type, calculator in self.calculators.items():
+            try:
+                score = calculator.calculate(calc_context)
+                confidence = calculator.get_confidence()
+
+                source_scores[source_type] = score
+                total_confidence += confidence
+
+                # 更新计算器使用统计
+                self.stats["calculator_usage"][source_type.value] += 1
+
+                logger.debug(f"{source_type.value} 兴趣度: {score:.3f} (置信度: {confidence:.3f})")
+
+            except Exception as e:
+                logger.warning(f"计算 {source_type.value} 兴趣度失败: {e}")
+                source_scores[source_type] = 0.3
+
+        # 加权计算最终兴趣度
+        final_interest = 0.0
+        total_weight = 0.0
+
+        for source_type, score in source_scores.items():
+            weight = self.source_weights.get(source_type, 0.0)
+            final_interest += score * weight
+            total_weight += weight
+
+        if total_weight > 0:
+            final_interest /= total_weight
+
+        # 确保在合理范围内
+        final_interest = max(0.0, min(1.0, final_interest))
+
+        # 缓存结果
+        self.interest_cache[message_id] = (final_interest, time.time())
+
+        # 清理过期缓存
+        self._cleanup_cache()
+
+        # 更新平均计算时间
+        calculation_time = time.time() - start_time
+        total_calculations = self.stats["total_calculations"]
+        self.stats["average_calculation_time"] = (
+            (self.stats["average_calculation_time"] * (total_calculations - 1) + calculation_time)
+            / total_calculations
+        )
+
+        logger.info(f"消息 {message_id} 最终兴趣度: {final_interest:.3f} (耗时: {calculation_time:.3f}s)")
+        return final_interest
+
+    def update_topic_interest(self, message: Dict[str, Any], interest_value: float) -> None:
+        """更新话题兴趣度"""
+        topic_calc = self.calculators.get(InterestSourceType.TOPIC_RELEVANCE)
+        if isinstance(topic_calc, TopicInterestCalculator):
+            # 提取关键词作为话题
+            keywords = message.get("key_words", "[]")
+            try:
+                import json
+                keyword_list: List[str] = json.loads(keywords) if keywords else []
+                for keyword in keyword_list[:3]:  # 更新前3个关键词
+                    topic_calc.update_topic_interest(keyword, interest_value)
+            except (json.JSONDecodeError, TypeError):
+                pass
+
+    def add_user_interaction(self, user_id: str, interaction_type: str, value: float) -> None:
+        """添加用户交互记录"""
+        interaction_calc = self.calculators.get(InterestSourceType.USER_INTERACTION)
+        if isinstance(interaction_calc, UserInteractionInterestCalculator):
+            interaction_calc.add_interaction(user_id, interaction_type, value)
+
+    def get_topic_interests(self) -> Dict[str, float]:
+        """获取所有话题兴趣度"""
+        topic_calc = self.calculators.get(InterestSourceType.TOPIC_RELEVANCE)
+        if isinstance(topic_calc, TopicInterestCalculator):
+            return topic_calc.topic_interests.copy()
+        return {}
+
+    def _cleanup_cache(self) -> None:
+        """清理过期缓存"""
+        current_time = time.time()
+        expired_keys = [
+            message_id for message_id, (_, timestamp) in self.interest_cache.items()
+            if current_time - timestamp > self.cache_ttl
+        ]
+
+        for key in expired_keys:
+            del self.interest_cache[key]
+
+        if expired_keys:
+            logger.debug(f"清理了 {len(expired_keys)} 个过期兴趣度缓存")
+
+    def get_statistics(self) -> Dict[str, Any]:
+        """获取统计信息"""
+        return {
+            "cache_size": len(self.interest_cache),
+            "topic_count": len(self.get_topic_interests()),
+            "calculators": list(self.calculators.keys()),
+            "performance_stats": self.stats.copy(),
+        }
+
+    def add_calculator(self, source_type: InterestSourceType, calculator: InterestCalculator) -> None:
+        """添加自定义计算器"""
+        self.calculators[source_type] = calculator
+        logger.info(f"添加计算器: {source_type.value}")
+
+    def remove_calculator(self, source_type: InterestSourceType) -> None:
+        """移除计算器"""
+        if source_type in self.calculators:
+            del self.calculators[source_type]
+            logger.info(f"移除计算器: {source_type.value}")
+
+    def set_source_weight(self, source_type: InterestSourceType, weight: float) -> None:
+        """设置来源权重"""
+        self.source_weights[source_type] = max(0.0, min(1.0, weight))
+        logger.info(f"设置 {source_type.value} 权重: {weight}")
+
+    def clear_cache(self) -> None:
+        """清空缓存"""
+        self.interest_cache.clear()
+        logger.info("清空兴趣度缓存")
+
+    def get_cache_hit_rate(self) -> float:
+        """获取缓存命中率"""
+        total_requests = self.stats.get("cache_hits", 0) + self.stats.get("cache_misses", 0)
+        if total_requests == 0:
+            return 0.0
+        return self.stats["cache_hits"] / total_requests
+
+
+# 全局兴趣度管理器实例
+interest_manager = InterestManager()