ruff

2025-11-07 21:01:45 +08:00
parent 80b040da2f
commit c8d7c09625
49 changed files with 854 additions and 872 deletions
--- a/src/chat/express/expression_learner.py
+++ b/src/chat/express/expression_learner.py
@@ -1,5 +1,4 @@
 import os
-import random
 import time
 from datetime import datetime
 from typing import Any
@@ -135,20 +134,20 @@ class ExpressionLearner:
    async def cleanup_expired_expressions(self, expiration_days: int | None = None) -> int:
        """
        清理过期的表达方式
-        
+
        Args:
            expiration_days: 过期天数，超过此天数未激活的表达方式将被删除（不指定则从配置读取）
-        
+
        Returns:
            int: 删除的表达方式数量
        """
        # 从配置读取过期天数
        if expiration_days is None:
            expiration_days = global_config.expression.expiration_days
-        
+
        current_time = time.time()
        expiration_threshold = current_time - (expiration_days * 24 * 3600)
-        
+
        try:
            deleted_count = 0
            async with get_db_session() as session:
@@ -160,15 +159,15 @@ class ExpressionLearner:
                    )
                )
                expired_expressions = list(query.scalars())
-                
+
                if expired_expressions:
                    for expr in expired_expressions:
                        await session.delete(expr)
                        deleted_count += 1
-                    
+
                    await session.commit()
                    logger.info(f"清理了 {deleted_count} 个过期表达方式（超过 {expiration_days} 天未使用）")
-                    
+
                    # 清除缓存
                    from src.common.database.optimization.cache_manager import get_cache
                    from src.common.database.utils.decorators import generate_cache_key
@@ -176,7 +175,7 @@ class ExpressionLearner:
                    await cache.delete(generate_cache_key("chat_expressions", self.chat_id))
                else:
                    logger.debug(f"没有发现过期的表达方式（阈值：{expiration_days} 天）")
-            
+
            return deleted_count
        except Exception as e:
            logger.error(f"清理过期表达方式失败: {e}")
@@ -460,7 +459,7 @@ class ExpressionLearner:
                        )
                    )
                    same_situation_expr = query_same_situation.scalar()
-                    
+
                    # 情况2：相同 chat_id + type + style（相同表达，不同情景）
                    query_same_style = await session.execute(
                        select(Expression).where(
@@ -470,7 +469,7 @@ class ExpressionLearner:
                        )
                    )
                    same_style_expr = query_same_style.scalar()
-                    
+
                    # 情况3：完全相同（相同情景+相同表达）
                    query_exact_match = await session.execute(
                        select(Expression).where(
@@ -481,7 +480,7 @@ class ExpressionLearner:
                        )
                    )
                    exact_match_expr = query_exact_match.scalar()
-                    
+
                    # 优先处理完全匹配的情况
                    if exact_match_expr:
                        # 完全相同：增加count，更新时间
--- a/src/chat/express/expressor_model/model.py
+++ b/src/chat/express/expressor_model/model.py
@@ -72,21 +72,21 @@ class ExpressorModel:
            是否删除成功
        """
        removed = False
-        
+
        if cid in self._candidates:
            del self._candidates[cid]
            removed = True
-        
+
        if cid in self._situations:
            del self._situations[cid]
-        
+
        # 从nb模型中删除
        if cid in self.nb.cls_counts:
            del self.nb.cls_counts[cid]
-        
+
        if cid in self.nb.token_counts:
            del self.nb.token_counts[cid]
-        
+
        return removed

    def predict(self, text: str, k: int | None = None) -> tuple[str | None, dict[str, float]]:
--- a/src/chat/express/style_learner.py
+++ b/src/chat/express/style_learner.py
@@ -72,7 +72,7 @@ class StyleLearner:
            # 检查是否需要清理
            current_count = len(self.style_to_id)
            cleanup_trigger = int(self.max_styles * self.cleanup_threshold)
-            
+
            if current_count >= cleanup_trigger:
                if current_count >= self.max_styles:
                    # 已经达到最大限制，必须清理
@@ -109,7 +109,7 @@ class StyleLearner:
    def _cleanup_styles(self):
        """
        清理低价值的风格，为新风格腾出空间
-        
+
        清理策略：
        1. 综合考虑使用次数和最后使用时间
        2. 删除得分最低的风格
@@ -118,34 +118,34 @@ class StyleLearner:
        try:
            current_time = time.time()
            cleanup_count = max(1, int(len(self.style_to_id) * self.cleanup_ratio))
-            
+
            # 计算每个风格的价值分数
            style_scores = []
            for style_id in self.style_to_id.values():
                # 使用次数
                usage_count = self.learning_stats["style_counts"].get(style_id, 0)
-                
+
                # 最后使用时间（越近越好）
                last_used = self.learning_stats["style_last_used"].get(style_id, 0)
-                time_since_used = current_time - last_used if last_used > 0 else float('inf')
-                
+                time_since_used = current_time - last_used if last_used > 0 else float("inf")
+
                # 综合分数：使用次数越多越好，距离上次使用时间越短越好
                # 使用对数来平滑使用次数的影响
                import math
                usage_score = math.log1p(usage_count)  # log(1 + count)
-                
+
                # 时间分数：转换为天数，使用指数衰减
                days_unused = time_since_used / 86400  # 转换为天
                time_score = math.exp(-days_unused / 30)  # 30天衰减因子
-                
+
                # 综合分数：80%使用频率 + 20%时间新鲜度
                total_score = 0.8 * usage_score + 0.2 * time_score
-                
+
                style_scores.append((style_id, total_score, usage_count, days_unused))
-            
+
            # 按分数排序，分数低的先删除
            style_scores.sort(key=lambda x: x[1])
-            
+
            # 删除分数最低的风格
            deleted_styles = []
            for style_id, score, usage, days in style_scores[:cleanup_count]:
@@ -156,27 +156,27 @@ class StyleLearner:
                    del self.id_to_style[style_id]
                    if style_id in self.id_to_situation:
                        del self.id_to_situation[style_id]
-                    
+
                    # 从统计中删除
                    if style_id in self.learning_stats["style_counts"]:
                        del self.learning_stats["style_counts"][style_id]
                    if style_id in self.learning_stats["style_last_used"]:
                        del self.learning_stats["style_last_used"][style_id]
-                    
+
                    # 从expressor模型中删除
                    self.expressor.remove_candidate(style_id)
-                    
+
                    deleted_styles.append((style_text[:30], usage, f"{days:.1f}天"))
-            
+
            logger.info(
                f"风格清理完成: 删除了 {len(deleted_styles)}/{len(style_scores)} 个风格，"
                f"剩余 {len(self.style_to_id)} 个风格"
            )
-            
+
            # 记录前5个被删除的风格（用于调试）
            if deleted_styles:
                logger.debug(f"被删除的风格样例(前5): {deleted_styles[:5]}")
-            
+
        except Exception as e:
            logger.error(f"清理风格失败: {e}", exc_info=True)

@@ -303,10 +303,10 @@ class StyleLearner:
    def cleanup_old_styles(self, ratio: float | None = None) -> int:
        """
        手动清理旧风格
-        
+
        Args:
            ratio: 清理比例，如果为None则使用默认的cleanup_ratio
-            
+
        Returns:
            清理的风格数量
        """
@@ -318,7 +318,7 @@ class StyleLearner:
            self.cleanup_ratio = old_cleanup_ratio
        else:
            self._cleanup_styles()
-        
+
        new_count = len(self.style_to_id)
        cleaned = old_count - new_count
        logger.info(f"手动清理完成: chat_id={self.chat_id}, 清理了 {cleaned} 个风格")
@@ -357,11 +357,11 @@ class StyleLearner:
            import pickle

            meta_path = os.path.join(save_dir, "meta.pkl")
-            
+
            # 确保 learning_stats 包含所有必要字段
            if "style_last_used" not in self.learning_stats:
                self.learning_stats["style_last_used"] = {}
-            
+
            meta_data = {
                "style_to_id": self.style_to_id,
                "id_to_style": self.id_to_style,
@@ -416,7 +416,7 @@ class StyleLearner:
                self.id_to_situation = meta_data["id_to_situation"]
                self.next_style_id = meta_data["next_style_id"]
                self.learning_stats = meta_data["learning_stats"]
-                
+
                # 确保旧数据兼容：如果没有 style_last_used 字段，添加它
                if "style_last_used" not in self.learning_stats:
                    self.learning_stats["style_last_used"] = {}
@@ -526,10 +526,10 @@ class StyleLearnerManager:
    def cleanup_all_old_styles(self, ratio: float | None = None) -> dict[str, int]:
        """
        对所有学习器清理旧风格
-        
+
        Args:
            ratio: 清理比例
-            
+
        Returns:
            {chat_id: 清理数量}
        """
@@ -538,7 +538,7 @@ class StyleLearnerManager:
            cleaned = learner.cleanup_old_styles(ratio)
            if cleaned > 0:
                cleanup_results[chat_id] = cleaned
-        
+
        total_cleaned = sum(cleanup_results.values())
        logger.info(f"清理所有StyleLearner完成: 总共清理了 {total_cleaned} 个风格")
        return cleanup_results