Files
Mofox-Core/benchmark_semantic_interest.py
Windpicker-owo 9d01b81cef feat: 通过FastScorer与批处理功能增强关联兴趣计算器
- 集成FastScorer用于优化评分,绕过sklearn以提升性能。
- 新增批量处理功能,以应对高频聊天场景。
- 实现了一个全局线程池以避免重复创建执行器。
- 将评分操作的超时时间缩短至2秒。
- 重构了ChatterActionPlanner以利用新的利息计算器。
- 引入了一个基准测试脚本,用于比较原始sklearn与FastScorer之间的性能差异。
开发了一款优化后的评分器,具备权重剪枝和异步评分等功能。
2025-12-12 12:14:21 +08:00

283 lines
8.7 KiB
Python

"""语义兴趣度评分器性能测试
对比测试:
1. 原始 sklearn 路径 vs FastScorer
2. 单条评分 vs 批处理
3. 同步 vs 异步
"""
import asyncio
import time
from pathlib import Path
# 测试样本
SAMPLE_TEXTS = [
"今天天气真好",
"这个游戏太好玩了!",
"无聊死了",
"我对这个话题很感兴趣",
"能不能聊点别的",
"哇这个真的很厉害",
"你好",
"有人在吗",
"这个问题很有深度",
"随便说说",
"真是太棒了,我非常喜欢",
"算了算了不想说了",
"来聊聊最近的新闻吧",
"emmmm",
"哈哈哈哈",
"666",
]
def benchmark_sklearn_scorer(model_path: str, iterations: int = 100):
"""测试原始 sklearn 评分器"""
from src.chat.semantic_interest.runtime_scorer import SemanticInterestScorer
scorer = SemanticInterestScorer(model_path, use_fast_scorer=False)
scorer.load()
# 预热
for text in SAMPLE_TEXTS[:3]:
scorer.score(text)
# 单条评分测试
start = time.perf_counter()
for _ in range(iterations):
for text in SAMPLE_TEXTS:
scorer.score(text)
single_time = time.perf_counter() - start
total_single = iterations * len(SAMPLE_TEXTS)
# 批量评分测试
start = time.perf_counter()
for _ in range(iterations):
scorer.score_batch(SAMPLE_TEXTS)
batch_time = time.perf_counter() - start
total_batch = iterations * len(SAMPLE_TEXTS)
return {
"mode": "sklearn",
"single_total_time": single_time,
"single_avg_ms": single_time / total_single * 1000,
"single_qps": total_single / single_time,
"batch_total_time": batch_time,
"batch_avg_ms": batch_time / total_batch * 1000,
"batch_qps": total_batch / batch_time,
}
def benchmark_fast_scorer(model_path: str, iterations: int = 100):
"""测试 FastScorer"""
from src.chat.semantic_interest.runtime_scorer import SemanticInterestScorer
scorer = SemanticInterestScorer(model_path, use_fast_scorer=True)
scorer.load()
# 预热
for text in SAMPLE_TEXTS[:3]:
scorer.score(text)
# 单条评分测试
start = time.perf_counter()
for _ in range(iterations):
for text in SAMPLE_TEXTS:
scorer.score(text)
single_time = time.perf_counter() - start
total_single = iterations * len(SAMPLE_TEXTS)
# 批量评分测试
start = time.perf_counter()
for _ in range(iterations):
scorer.score_batch(SAMPLE_TEXTS)
batch_time = time.perf_counter() - start
total_batch = iterations * len(SAMPLE_TEXTS)
return {
"mode": "fast_scorer",
"single_total_time": single_time,
"single_avg_ms": single_time / total_single * 1000,
"single_qps": total_single / single_time,
"batch_total_time": batch_time,
"batch_avg_ms": batch_time / total_batch * 1000,
"batch_qps": total_batch / batch_time,
}
async def benchmark_async_scoring(model_path: str, iterations: int = 100):
"""测试异步评分"""
from src.chat.semantic_interest.runtime_scorer import get_semantic_scorer
scorer = await get_semantic_scorer(model_path, use_async=True)
# 预热
for text in SAMPLE_TEXTS[:3]:
await scorer.score_async(text)
# 单条异步评分
start = time.perf_counter()
for _ in range(iterations):
for text in SAMPLE_TEXTS:
await scorer.score_async(text)
single_time = time.perf_counter() - start
total_single = iterations * len(SAMPLE_TEXTS)
# 并发评分(模拟高并发场景)
start = time.perf_counter()
for _ in range(iterations):
tasks = [scorer.score_async(text) for text in SAMPLE_TEXTS]
await asyncio.gather(*tasks)
concurrent_time = time.perf_counter() - start
total_concurrent = iterations * len(SAMPLE_TEXTS)
return {
"mode": "async",
"single_total_time": single_time,
"single_avg_ms": single_time / total_single * 1000,
"single_qps": total_single / single_time,
"concurrent_total_time": concurrent_time,
"concurrent_avg_ms": concurrent_time / total_concurrent * 1000,
"concurrent_qps": total_concurrent / concurrent_time,
}
async def benchmark_batch_queue(model_path: str, iterations: int = 100):
"""测试批处理队列"""
from src.chat.semantic_interest.optimized_scorer import get_fast_scorer
queue = await get_fast_scorer(
model_path,
use_batch_queue=True,
batch_size=8,
flush_interval_ms=20.0
)
# 预热
for text in SAMPLE_TEXTS[:3]:
await queue.score(text)
# 并发提交评分请求
start = time.perf_counter()
for _ in range(iterations):
tasks = [queue.score(text) for text in SAMPLE_TEXTS]
await asyncio.gather(*tasks)
total_time = time.perf_counter() - start
total_requests = iterations * len(SAMPLE_TEXTS)
stats = queue.get_statistics()
await queue.stop()
return {
"mode": "batch_queue",
"total_time": total_time,
"avg_ms": total_time / total_requests * 1000,
"qps": total_requests / total_time,
"total_batches": stats["total_batches"],
"avg_batch_size": stats["avg_batch_size"],
}
def print_results(results: dict):
"""打印测试结果"""
print(f"\n{'='*60}")
print(f"模式: {results['mode']}")
print(f"{'='*60}")
if "single_avg_ms" in results:
print(f"单条评分: {results['single_avg_ms']:.3f} ms/条, QPS: {results['single_qps']:.1f}")
if "batch_avg_ms" in results:
print(f"批量评分: {results['batch_avg_ms']:.3f} ms/条, QPS: {results['batch_qps']:.1f}")
if "concurrent_avg_ms" in results:
print(f"并发评分: {results['concurrent_avg_ms']:.3f} ms/条, QPS: {results['concurrent_qps']:.1f}")
if "total_batches" in results:
print(f"批处理队列: {results['avg_ms']:.3f} ms/条, QPS: {results['qps']:.1f}")
print(f" 总批次: {results['total_batches']}, 平均批大小: {results['avg_batch_size']:.1f}")
async def main():
"""运行性能测试"""
import sys
# 检查模型路径
model_dir = Path("data/semantic_interest/models")
model_files = list(model_dir.glob("semantic_interest_*.pkl"))
if not model_files:
print("错误: 未找到模型文件,请先训练模型")
print(f"模型目录: {model_dir}")
sys.exit(1)
# 使用最新的模型
model_path = str(max(model_files, key=lambda p: p.stat().st_mtime))
print(f"使用模型: {model_path}")
iterations = 50 # 测试迭代次数
print(f"\n测试配置: {iterations} 次迭代, {len(SAMPLE_TEXTS)} 条样本/次")
print(f"总评分次数: {iterations * len(SAMPLE_TEXTS)}")
# 1. sklearn 原始路径
print("\n[1/4] 测试 sklearn 原始路径...")
try:
sklearn_results = benchmark_sklearn_scorer(model_path, iterations)
print_results(sklearn_results)
except Exception as e:
print(f"sklearn 测试失败: {e}")
# 2. FastScorer
print("\n[2/4] 测试 FastScorer...")
try:
fast_results = benchmark_fast_scorer(model_path, iterations)
print_results(fast_results)
except Exception as e:
print(f"FastScorer 测试失败: {e}")
# 3. 异步评分
print("\n[3/4] 测试异步评分...")
try:
async_results = await benchmark_async_scoring(model_path, iterations)
print_results(async_results)
except Exception as e:
print(f"异步测试失败: {e}")
# 4. 批处理队列
print("\n[4/4] 测试批处理队列...")
try:
queue_results = await benchmark_batch_queue(model_path, iterations)
print_results(queue_results)
except Exception as e:
print(f"批处理队列测试失败: {e}")
# 性能对比总结
print(f"\n{'='*60}")
print("性能对比总结")
print(f"{'='*60}")
try:
speedup = sklearn_results["single_avg_ms"] / fast_results["single_avg_ms"]
print(f"FastScorer vs sklearn 单条: {speedup:.2f}x 加速")
speedup = sklearn_results["batch_avg_ms"] / fast_results["batch_avg_ms"]
print(f"FastScorer vs sklearn 批量: {speedup:.2f}x 加速")
except:
pass
print("\n清理资源...")
from src.chat.semantic_interest.optimized_scorer import shutdown_global_executor, clear_fast_scorer_instances
from src.chat.semantic_interest.runtime_scorer import clear_scorer_instances
shutdown_global_executor()
clear_fast_scorer_instances()
clear_scorer_instances()
print("测试完成!")
if __name__ == "__main__":
asyncio.run(main())