feat: 提升语义兴趣评分与拼写错误生成

- 为中文拼写生成器实现了背景预热功能，以提升首次使用时的性能。 - 更新了MessageStorageBatcher以支持可配置的提交批次大小和间隔，优化数据库写入性能。 - 增强版数据集生成器，对样本规模设置硬性限制并提升采样效率。 - 将AutoTrainer中的最大样本数增加至1000，以优化训练数据利用率。 - 对亲和兴趣计算器进行了重构，以避免并发初始化并优化模型加载逻辑。 - 引入批量处理机制用于语义兴趣评分，以应对高频聊天场景。 - 更新了配置模板以反映新的评分参数，并移除了已弃用的兴趣阈值。
2025-12-12 14:11:36 +08:00
parent 9d01b81cef
commit e6a4f855a2
17 changed files with 433 additions and 554 deletions
--- a/template/bot_config_template.toml
+++ b/template/bot_config_template.toml
@@ -539,14 +539,11 @@ enable_normal_mode = true # 是否启用 Normal 聊天模式。启用后，在
 # 兴趣评分系统参数
 reply_action_interest_threshold = 0.75 # 回复动作兴趣阈值
 non_reply_action_interest_threshold = 0.65 # 非回复动作兴趣阈值
-high_match_interest_threshold = 0.6 # 高匹配兴趣阈值
-medium_match_interest_threshold = 0.4 # 中匹配兴趣阈值
-low_match_interest_threshold = 0.2 # 低匹配兴趣阈值
-high_match_keyword_multiplier = 4 # 高匹配关键词兴趣倍率
-medium_match_keyword_multiplier = 2.5 # 中匹配关键词兴趣倍率
-low_match_keyword_multiplier = 1.15 # 低匹配关键词兴趣倍率
-match_count_bonus = 0.01 # 匹配数关键词加成值
-max_match_bonus = 0.1 # 最大匹配数加成值
+
+# 语义兴趣度评分优化参数
+use_batch_scoring = true # 是否启用批处理评分模式，适合高频群聊场景
+batch_size = 3 # 批处理大小，达到后立即处理
+batch_flush_interval_ms = 30.0 # 批处理刷新间隔（毫秒），超过后强制处理

 # 回复决策系统参数
 no_reply_threshold_adjustment = 0.02 # 不回复兴趣阈值调整值