ruff
This commit is contained in:
@@ -1,5 +1,4 @@
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
@@ -135,20 +134,20 @@ class ExpressionLearner:
|
||||
async def cleanup_expired_expressions(self, expiration_days: int | None = None) -> int:
|
||||
"""
|
||||
清理过期的表达方式
|
||||
|
||||
|
||||
Args:
|
||||
expiration_days: 过期天数,超过此天数未激活的表达方式将被删除(不指定则从配置读取)
|
||||
|
||||
|
||||
Returns:
|
||||
int: 删除的表达方式数量
|
||||
"""
|
||||
# 从配置读取过期天数
|
||||
if expiration_days is None:
|
||||
expiration_days = global_config.expression.expiration_days
|
||||
|
||||
|
||||
current_time = time.time()
|
||||
expiration_threshold = current_time - (expiration_days * 24 * 3600)
|
||||
|
||||
|
||||
try:
|
||||
deleted_count = 0
|
||||
async with get_db_session() as session:
|
||||
@@ -160,15 +159,15 @@ class ExpressionLearner:
|
||||
)
|
||||
)
|
||||
expired_expressions = list(query.scalars())
|
||||
|
||||
|
||||
if expired_expressions:
|
||||
for expr in expired_expressions:
|
||||
await session.delete(expr)
|
||||
deleted_count += 1
|
||||
|
||||
|
||||
await session.commit()
|
||||
logger.info(f"清理了 {deleted_count} 个过期表达方式(超过 {expiration_days} 天未使用)")
|
||||
|
||||
|
||||
# 清除缓存
|
||||
from src.common.database.optimization.cache_manager import get_cache
|
||||
from src.common.database.utils.decorators import generate_cache_key
|
||||
@@ -176,7 +175,7 @@ class ExpressionLearner:
|
||||
await cache.delete(generate_cache_key("chat_expressions", self.chat_id))
|
||||
else:
|
||||
logger.debug(f"没有发现过期的表达方式(阈值:{expiration_days} 天)")
|
||||
|
||||
|
||||
return deleted_count
|
||||
except Exception as e:
|
||||
logger.error(f"清理过期表达方式失败: {e}")
|
||||
@@ -460,7 +459,7 @@ class ExpressionLearner:
|
||||
)
|
||||
)
|
||||
same_situation_expr = query_same_situation.scalar()
|
||||
|
||||
|
||||
# 情况2:相同 chat_id + type + style(相同表达,不同情景)
|
||||
query_same_style = await session.execute(
|
||||
select(Expression).where(
|
||||
@@ -470,7 +469,7 @@ class ExpressionLearner:
|
||||
)
|
||||
)
|
||||
same_style_expr = query_same_style.scalar()
|
||||
|
||||
|
||||
# 情况3:完全相同(相同情景+相同表达)
|
||||
query_exact_match = await session.execute(
|
||||
select(Expression).where(
|
||||
@@ -481,7 +480,7 @@ class ExpressionLearner:
|
||||
)
|
||||
)
|
||||
exact_match_expr = query_exact_match.scalar()
|
||||
|
||||
|
||||
# 优先处理完全匹配的情况
|
||||
if exact_match_expr:
|
||||
# 完全相同:增加count,更新时间
|
||||
|
||||
@@ -72,21 +72,21 @@ class ExpressorModel:
|
||||
是否删除成功
|
||||
"""
|
||||
removed = False
|
||||
|
||||
|
||||
if cid in self._candidates:
|
||||
del self._candidates[cid]
|
||||
removed = True
|
||||
|
||||
|
||||
if cid in self._situations:
|
||||
del self._situations[cid]
|
||||
|
||||
|
||||
# 从nb模型中删除
|
||||
if cid in self.nb.cls_counts:
|
||||
del self.nb.cls_counts[cid]
|
||||
|
||||
|
||||
if cid in self.nb.token_counts:
|
||||
del self.nb.token_counts[cid]
|
||||
|
||||
|
||||
return removed
|
||||
|
||||
def predict(self, text: str, k: int | None = None) -> tuple[str | None, dict[str, float]]:
|
||||
|
||||
@@ -72,7 +72,7 @@ class StyleLearner:
|
||||
# 检查是否需要清理
|
||||
current_count = len(self.style_to_id)
|
||||
cleanup_trigger = int(self.max_styles * self.cleanup_threshold)
|
||||
|
||||
|
||||
if current_count >= cleanup_trigger:
|
||||
if current_count >= self.max_styles:
|
||||
# 已经达到最大限制,必须清理
|
||||
@@ -109,7 +109,7 @@ class StyleLearner:
|
||||
def _cleanup_styles(self):
|
||||
"""
|
||||
清理低价值的风格,为新风格腾出空间
|
||||
|
||||
|
||||
清理策略:
|
||||
1. 综合考虑使用次数和最后使用时间
|
||||
2. 删除得分最低的风格
|
||||
@@ -118,34 +118,34 @@ class StyleLearner:
|
||||
try:
|
||||
current_time = time.time()
|
||||
cleanup_count = max(1, int(len(self.style_to_id) * self.cleanup_ratio))
|
||||
|
||||
|
||||
# 计算每个风格的价值分数
|
||||
style_scores = []
|
||||
for style_id in self.style_to_id.values():
|
||||
# 使用次数
|
||||
usage_count = self.learning_stats["style_counts"].get(style_id, 0)
|
||||
|
||||
|
||||
# 最后使用时间(越近越好)
|
||||
last_used = self.learning_stats["style_last_used"].get(style_id, 0)
|
||||
time_since_used = current_time - last_used if last_used > 0 else float('inf')
|
||||
|
||||
time_since_used = current_time - last_used if last_used > 0 else float("inf")
|
||||
|
||||
# 综合分数:使用次数越多越好,距离上次使用时间越短越好
|
||||
# 使用对数来平滑使用次数的影响
|
||||
import math
|
||||
usage_score = math.log1p(usage_count) # log(1 + count)
|
||||
|
||||
|
||||
# 时间分数:转换为天数,使用指数衰减
|
||||
days_unused = time_since_used / 86400 # 转换为天
|
||||
time_score = math.exp(-days_unused / 30) # 30天衰减因子
|
||||
|
||||
|
||||
# 综合分数:80%使用频率 + 20%时间新鲜度
|
||||
total_score = 0.8 * usage_score + 0.2 * time_score
|
||||
|
||||
|
||||
style_scores.append((style_id, total_score, usage_count, days_unused))
|
||||
|
||||
|
||||
# 按分数排序,分数低的先删除
|
||||
style_scores.sort(key=lambda x: x[1])
|
||||
|
||||
|
||||
# 删除分数最低的风格
|
||||
deleted_styles = []
|
||||
for style_id, score, usage, days in style_scores[:cleanup_count]:
|
||||
@@ -156,27 +156,27 @@ class StyleLearner:
|
||||
del self.id_to_style[style_id]
|
||||
if style_id in self.id_to_situation:
|
||||
del self.id_to_situation[style_id]
|
||||
|
||||
|
||||
# 从统计中删除
|
||||
if style_id in self.learning_stats["style_counts"]:
|
||||
del self.learning_stats["style_counts"][style_id]
|
||||
if style_id in self.learning_stats["style_last_used"]:
|
||||
del self.learning_stats["style_last_used"][style_id]
|
||||
|
||||
|
||||
# 从expressor模型中删除
|
||||
self.expressor.remove_candidate(style_id)
|
||||
|
||||
|
||||
deleted_styles.append((style_text[:30], usage, f"{days:.1f}天"))
|
||||
|
||||
|
||||
logger.info(
|
||||
f"风格清理完成: 删除了 {len(deleted_styles)}/{len(style_scores)} 个风格,"
|
||||
f"剩余 {len(self.style_to_id)} 个风格"
|
||||
)
|
||||
|
||||
|
||||
# 记录前5个被删除的风格(用于调试)
|
||||
if deleted_styles:
|
||||
logger.debug(f"被删除的风格样例(前5): {deleted_styles[:5]}")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"清理风格失败: {e}", exc_info=True)
|
||||
|
||||
@@ -303,10 +303,10 @@ class StyleLearner:
|
||||
def cleanup_old_styles(self, ratio: float | None = None) -> int:
|
||||
"""
|
||||
手动清理旧风格
|
||||
|
||||
|
||||
Args:
|
||||
ratio: 清理比例,如果为None则使用默认的cleanup_ratio
|
||||
|
||||
|
||||
Returns:
|
||||
清理的风格数量
|
||||
"""
|
||||
@@ -318,7 +318,7 @@ class StyleLearner:
|
||||
self.cleanup_ratio = old_cleanup_ratio
|
||||
else:
|
||||
self._cleanup_styles()
|
||||
|
||||
|
||||
new_count = len(self.style_to_id)
|
||||
cleaned = old_count - new_count
|
||||
logger.info(f"手动清理完成: chat_id={self.chat_id}, 清理了 {cleaned} 个风格")
|
||||
@@ -357,11 +357,11 @@ class StyleLearner:
|
||||
import pickle
|
||||
|
||||
meta_path = os.path.join(save_dir, "meta.pkl")
|
||||
|
||||
|
||||
# 确保 learning_stats 包含所有必要字段
|
||||
if "style_last_used" not in self.learning_stats:
|
||||
self.learning_stats["style_last_used"] = {}
|
||||
|
||||
|
||||
meta_data = {
|
||||
"style_to_id": self.style_to_id,
|
||||
"id_to_style": self.id_to_style,
|
||||
@@ -416,7 +416,7 @@ class StyleLearner:
|
||||
self.id_to_situation = meta_data["id_to_situation"]
|
||||
self.next_style_id = meta_data["next_style_id"]
|
||||
self.learning_stats = meta_data["learning_stats"]
|
||||
|
||||
|
||||
# 确保旧数据兼容:如果没有 style_last_used 字段,添加它
|
||||
if "style_last_used" not in self.learning_stats:
|
||||
self.learning_stats["style_last_used"] = {}
|
||||
@@ -526,10 +526,10 @@ class StyleLearnerManager:
|
||||
def cleanup_all_old_styles(self, ratio: float | None = None) -> dict[str, int]:
|
||||
"""
|
||||
对所有学习器清理旧风格
|
||||
|
||||
|
||||
Args:
|
||||
ratio: 清理比例
|
||||
|
||||
|
||||
Returns:
|
||||
{chat_id: 清理数量}
|
||||
"""
|
||||
@@ -538,7 +538,7 @@ class StyleLearnerManager:
|
||||
cleaned = learner.cleanup_old_styles(ratio)
|
||||
if cleaned > 0:
|
||||
cleanup_results[chat_id] = cleaned
|
||||
|
||||
|
||||
total_cleaned = sum(cleanup_results.values())
|
||||
logger.info(f"清理所有StyleLearner完成: 总共清理了 {total_cleaned} 个风格")
|
||||
return cleanup_results
|
||||
|
||||
Reference in New Issue
Block a user