feat(expression): 增强表达学习与选择系统的健壮性和智能匹配

- 改进表达学习器的提示词格式规范，增强LLM输出解析的容错性 - 优化表达选择器的模型预测模式，添加情境提取和模糊匹配机制 - 增强StyleLearner的错误处理和日志记录，提高训练和预测的稳定性 - 改进流循环管理器的日志输出，避免重复信息刷屏 - 扩展SendAPI的消息查找功能，支持DatabaseMessages对象兼容 - 添加智能回退机制，当模型预测失败时自动切换到经典模式 - 优化数据库查询逻辑，支持跨聊天流的表达方式共享 BREAKING CHANGE: 表达选择器的模型预测模式现在需要情境提取器配合使用，旧版本配置可能需要更新依赖关系
2025-10-30 11:16:30 +08:00
parent f6349f278d
commit cfa642cf0a
9 changed files with 795 additions and 83 deletions
--- a/scripts/debug_style_learner.py
+++ b/scripts/debug_style_learner.py
@@ -0,0 +1,88 @@
+"""
+检查 StyleLearner 模型状态的诊断脚本
+"""
+import sys
+from pathlib import Path
+
+# 添加项目根目录到路径
+project_root = Path(__file__).parent.parent
+sys.path.insert(0, str(project_root))
+
+from src.chat.express.style_learner import style_learner_manager
+from src.common.logger import get_logger
+
+logger = get_logger("debug_style_learner")
+
+
+def check_style_learner_status(chat_id: str):
+    """检查指定 chat_id 的 StyleLearner 状态"""
+    
+    print("=" * 60)
+    print(f"StyleLearner 状态诊断 - Chat ID: {chat_id}")
+    print("=" * 60)
+    
+    # 获取 learner
+    learner = style_learner_manager.get_learner(chat_id)
+    
+    # 1. 基本信息
+    print(f"\n📊 基本信息:")
+    print(f"  Chat ID: {learner.chat_id}")
+    print(f"  风格数量: {len(learner.style_to_id)}")
+    print(f"  下一个ID: {learner.next_style_id}")
+    print(f"  最大风格数: {learner.max_styles}")
+    
+    # 2. 学习统计
+    print(f"\n📈 学习统计:")
+    print(f"  总样本数: {learner.learning_stats['total_samples']}")
+    print(f"  最后更新: {learner.learning_stats.get('last_update', 'N/A')}")
+    
+    # 3. 风格列表（前20个）
+    print(f"\n📋 已学习的风格 (前20个):")
+    all_styles = learner.get_all_styles()
+    if not all_styles:
+        print("  ⚠️  没有任何风格！模型尚未训练")
+    else:
+        for i, style in enumerate(all_styles[:20], 1):
+            style_id = learner.style_to_id.get(style)
+            situation = learner.id_to_situation.get(style_id, "N/A")
+            print(f"  [{i}] {style}")
+            print(f"      (ID: {style_id}, Situation: {situation})")
+    
+    # 4. 测试预测
+    print(f"\n🔮 测试预测功能:")
+    if not all_styles:
+        print("  ⚠️  无法测试，模型没有训练数据")
+    else:
+        test_situations = [
+            "表示惊讶",
+            "讨论游戏",
+            "表达赞同"
+        ]
+        
+        for test_sit in test_situations:
+            print(f"\n  测试输入: '{test_sit}'")
+            best_style, scores = learner.predict_style(test_sit, top_k=3)
+            
+            if best_style:
+                print(f"  ✓ 最佳匹配: {best_style}")
+                print(f"  Top 3:")
+                for style, score in list(scores.items())[:3]:
+                    print(f"    - {style}: {score:.4f}")
+            else:
+                print(f"  ✗ 预测失败")
+    
+    print("\n" + "=" * 60)
+    print("诊断完成")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    # 从诊断报告中看到的 chat_id
+    test_chat_ids = [
+        "52fb94af9f500a01e023ea780e43606e",  # 有78个表达方式
+        "46c8714c8a9b7ee169941fe99fcde07d",  # 有22个表达方式
+    ]
+    
+    for chat_id in test_chat_ids:
+        check_style_learner_status(chat_id)
+        print("\n")