修复代码格式和文件名大小写问题
This commit is contained in:
@@ -14,18 +14,18 @@ logger = get_logger("message_chunker")
|
||||
|
||||
class MessageReassembler:
|
||||
"""消息重组器,用于重组来自 Ada 的切片消息"""
|
||||
|
||||
|
||||
def __init__(self, timeout: int = 30):
|
||||
self.timeout = timeout
|
||||
self.chunk_buffers: Dict[str, Dict[str, Any]] = {}
|
||||
self._cleanup_task = None
|
||||
|
||||
|
||||
async def start_cleanup_task(self):
|
||||
"""启动清理任务"""
|
||||
if self._cleanup_task is None:
|
||||
self._cleanup_task = asyncio.create_task(self._cleanup_expired_chunks())
|
||||
logger.info("消息重组器清理任务已启动")
|
||||
|
||||
|
||||
async def stop_cleanup_task(self):
|
||||
"""停止清理任务"""
|
||||
if self._cleanup_task:
|
||||
@@ -36,84 +36,84 @@ class MessageReassembler:
|
||||
pass
|
||||
self._cleanup_task = None
|
||||
logger.info("消息重组器清理任务已停止")
|
||||
|
||||
|
||||
async def _cleanup_expired_chunks(self):
|
||||
"""清理过期的切片缓冲区"""
|
||||
while True:
|
||||
try:
|
||||
await asyncio.sleep(10) # 每10秒检查一次
|
||||
current_time = time.time()
|
||||
|
||||
|
||||
expired_chunks = []
|
||||
for chunk_id, buffer_info in self.chunk_buffers.items():
|
||||
if current_time - buffer_info['timestamp'] > self.timeout:
|
||||
if current_time - buffer_info["timestamp"] > self.timeout:
|
||||
expired_chunks.append(chunk_id)
|
||||
|
||||
|
||||
for chunk_id in expired_chunks:
|
||||
logger.warning(f"清理过期的切片缓冲区: {chunk_id}")
|
||||
del self.chunk_buffers[chunk_id]
|
||||
|
||||
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"清理过期切片时出错: {e}")
|
||||
|
||||
|
||||
def is_chunk_message(self, message: Dict[str, Any]) -> bool:
|
||||
"""检查是否是来自 Ada 的切片消息"""
|
||||
return (
|
||||
isinstance(message, dict) and
|
||||
"__mmc_chunk_info__" in message and
|
||||
"__mmc_chunk_data__" in message and
|
||||
"__mmc_is_chunked__" in message
|
||||
isinstance(message, dict)
|
||||
and "__mmc_chunk_info__" in message
|
||||
and "__mmc_chunk_data__" in message
|
||||
and "__mmc_is_chunked__" in message
|
||||
)
|
||||
|
||||
|
||||
async def process_chunk(self, message: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
处理切片消息,如果切片完整则返回重组后的消息
|
||||
|
||||
|
||||
Args:
|
||||
message: 可能的切片消息
|
||||
|
||||
|
||||
Returns:
|
||||
如果切片完整则返回重组后的原始消息,否则返回None
|
||||
"""
|
||||
# 如果不是切片消息,直接返回
|
||||
if not self.is_chunk_message(message):
|
||||
return message
|
||||
|
||||
|
||||
try:
|
||||
chunk_info = message["__mmc_chunk_info__"]
|
||||
chunk_content = message["__mmc_chunk_data__"]
|
||||
|
||||
|
||||
chunk_id = chunk_info["chunk_id"]
|
||||
chunk_index = chunk_info["chunk_index"]
|
||||
total_chunks = chunk_info["total_chunks"]
|
||||
chunk_timestamp = chunk_info.get("timestamp", time.time())
|
||||
|
||||
|
||||
# 初始化缓冲区
|
||||
if chunk_id not in self.chunk_buffers:
|
||||
self.chunk_buffers[chunk_id] = {
|
||||
"chunks": {},
|
||||
"total_chunks": total_chunks,
|
||||
"received_chunks": 0,
|
||||
"timestamp": chunk_timestamp
|
||||
"timestamp": chunk_timestamp,
|
||||
}
|
||||
logger.debug(f"初始化切片缓冲区: {chunk_id} (总计 {total_chunks} 个切片)")
|
||||
|
||||
|
||||
buffer = self.chunk_buffers[chunk_id]
|
||||
|
||||
|
||||
# 检查切片是否已经接收过
|
||||
if chunk_index in buffer["chunks"]:
|
||||
logger.warning(f"重复接收切片: {chunk_id}#{chunk_index}")
|
||||
return None
|
||||
|
||||
|
||||
# 添加切片
|
||||
buffer["chunks"][chunk_index] = chunk_content
|
||||
buffer["received_chunks"] += 1
|
||||
buffer["timestamp"] = time.time() # 更新时间戳
|
||||
|
||||
|
||||
logger.debug(f"接收切片: {chunk_id}#{chunk_index} ({buffer['received_chunks']}/{total_chunks})")
|
||||
|
||||
|
||||
# 检查是否接收完整
|
||||
if buffer["received_chunks"] == total_chunks:
|
||||
# 重组消息
|
||||
@@ -123,26 +123,26 @@ class MessageReassembler:
|
||||
logger.error(f"切片 {chunk_id}#{i} 缺失,无法重组")
|
||||
return None
|
||||
reassembled_message += buffer["chunks"][i]
|
||||
|
||||
|
||||
# 清理缓冲区
|
||||
del self.chunk_buffers[chunk_id]
|
||||
|
||||
|
||||
logger.info(f"消息重组完成: {chunk_id} ({len(reassembled_message)} chars)")
|
||||
|
||||
|
||||
# 尝试反序列化重组后的消息
|
||||
try:
|
||||
return orjson.loads(reassembled_message)
|
||||
except orjson.JSONDecodeError as e:
|
||||
logger.error(f"重组消息反序列化失败: {e}")
|
||||
return None
|
||||
|
||||
|
||||
# 还没收集完所有切片,返回None表示继续等待
|
||||
return None
|
||||
|
||||
|
||||
except (KeyError, TypeError, ValueError) as e:
|
||||
logger.error(f"处理切片消息时出错: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def get_pending_chunks_info(self) -> Dict[str, Any]:
|
||||
"""获取待处理切片信息"""
|
||||
info = {}
|
||||
@@ -151,7 +151,7 @@ class MessageReassembler:
|
||||
"received": buffer["received_chunks"],
|
||||
"total": buffer["total_chunks"],
|
||||
"progress": f"{buffer['received_chunks']}/{buffer['total_chunks']}",
|
||||
"age_seconds": time.time() - buffer["timestamp"]
|
||||
"age_seconds": time.time() - buffer["timestamp"],
|
||||
}
|
||||
return info
|
||||
|
||||
|
||||
@@ -19,39 +19,39 @@ from functools import lru_cache
|
||||
def _calculate_sigma_bounds(base_interval: int, sigma_percentage: float, use_3sigma_rule: bool) -> tuple:
|
||||
"""
|
||||
缓存sigma边界计算,避免重复计算相同参数
|
||||
|
||||
|
||||
🚀 性能优化:LRU缓存常用配置,避免重复数学计算
|
||||
"""
|
||||
sigma = base_interval * sigma_percentage
|
||||
|
||||
|
||||
if use_3sigma_rule:
|
||||
three_sigma_min = max(1, base_interval - 3 * sigma)
|
||||
three_sigma_max = base_interval + 3 * sigma
|
||||
return three_sigma_min, three_sigma_max
|
||||
|
||||
|
||||
return 1, base_interval * 50 # 更宽松的边界
|
||||
|
||||
|
||||
def get_normal_distributed_interval(
|
||||
base_interval: int,
|
||||
base_interval: int,
|
||||
sigma_percentage: float = 0.1,
|
||||
min_interval: Optional[int] = None,
|
||||
max_interval: Optional[int] = None,
|
||||
use_3sigma_rule: bool = True
|
||||
use_3sigma_rule: bool = True,
|
||||
) -> int:
|
||||
"""
|
||||
获取符合正态分布的时间间隔,基于3-sigma规则
|
||||
|
||||
|
||||
Args:
|
||||
base_interval: 基础时间间隔(秒),作为正态分布的均值μ
|
||||
sigma_percentage: 标准差占基础间隔的百分比,默认10%
|
||||
min_interval: 最小间隔时间(秒),防止间隔过短
|
||||
max_interval: 最大间隔时间(秒),防止间隔过长
|
||||
use_3sigma_rule: 是否使用3-sigma规则限制分布范围,默认True
|
||||
|
||||
|
||||
Returns:
|
||||
int: 符合正态分布的时间间隔(秒)
|
||||
|
||||
|
||||
Example:
|
||||
>>> # 基础间隔1500秒(25分钟),标准差为150秒(10%)
|
||||
>>> interval = get_normal_distributed_interval(1500, 0.1)
|
||||
@@ -60,79 +60,79 @@ def get_normal_distributed_interval(
|
||||
# 🚨 基本输入保护:处理负数
|
||||
if base_interval < 0:
|
||||
base_interval = abs(base_interval)
|
||||
|
||||
|
||||
if sigma_percentage < 0:
|
||||
sigma_percentage = abs(sigma_percentage)
|
||||
|
||||
|
||||
# 特殊情况:基础间隔为0,使用纯随机模式
|
||||
if base_interval == 0:
|
||||
if sigma_percentage == 0:
|
||||
return 1 # 都为0时返回1秒
|
||||
return _generate_pure_random_interval(sigma_percentage, min_interval, max_interval, use_3sigma_rule)
|
||||
|
||||
|
||||
# 特殊情况:sigma为0,返回固定间隔
|
||||
if sigma_percentage == 0:
|
||||
return base_interval
|
||||
|
||||
|
||||
# 计算标准差
|
||||
sigma = base_interval * sigma_percentage
|
||||
|
||||
|
||||
# 📊 使用缓存的边界计算(性能优化)
|
||||
if use_3sigma_rule:
|
||||
three_sigma_min, three_sigma_max = _calculate_sigma_bounds(base_interval, sigma_percentage, True)
|
||||
|
||||
|
||||
# 应用用户设定的边界(如果更严格的话)
|
||||
if min_interval is not None:
|
||||
three_sigma_min = max(three_sigma_min, min_interval)
|
||||
if max_interval is not None:
|
||||
three_sigma_max = min(three_sigma_max, max_interval)
|
||||
|
||||
|
||||
effective_min = int(three_sigma_min)
|
||||
effective_max = int(three_sigma_max)
|
||||
else:
|
||||
# 不使用3-sigma规则,使用更宽松的边界
|
||||
effective_min = max(1, min_interval or 1)
|
||||
effective_max = max(effective_min + 1, max_interval or int(base_interval * 50))
|
||||
|
||||
|
||||
# 向量化生成:一次性生成多个候选值,避免循环
|
||||
# 对于3-sigma规则,理论成功率99.7%,生成10个候选值基本确保成功
|
||||
batch_size = 10 if use_3sigma_rule else 5
|
||||
|
||||
|
||||
# 一次性生成多个正态分布值
|
||||
candidates = np.random.normal(loc=base_interval, scale=sigma, size=batch_size)
|
||||
|
||||
|
||||
# 向量化处理负数:对负数取绝对值
|
||||
candidates = np.abs(candidates)
|
||||
|
||||
|
||||
# 转换为整数数组
|
||||
candidates = np.round(candidates).astype(int)
|
||||
|
||||
|
||||
# 向量化筛选:找到第一个满足条件的值
|
||||
valid_mask = (candidates >= effective_min) & (candidates <= effective_max)
|
||||
valid_candidates = candidates[valid_mask]
|
||||
|
||||
|
||||
if len(valid_candidates) > 0:
|
||||
return int(valid_candidates[0]) # 返回第一个有效值
|
||||
|
||||
|
||||
# 如果向量化生成失败(极低概率),使用均匀分布作为备用
|
||||
return int(np.random.randint(effective_min, effective_max + 1))
|
||||
|
||||
|
||||
def _generate_pure_random_interval(
|
||||
sigma_percentage: float,
|
||||
min_interval: Optional[int] = None,
|
||||
sigma_percentage: float,
|
||||
min_interval: Optional[int] = None,
|
||||
max_interval: Optional[int] = None,
|
||||
use_3sigma_rule: bool = True
|
||||
use_3sigma_rule: bool = True,
|
||||
) -> int:
|
||||
"""
|
||||
当base_interval=0时的纯随机模式,基于3-sigma规则
|
||||
|
||||
|
||||
Args:
|
||||
sigma_percentage: 标准差百分比,将被转换为实际时间值
|
||||
min_interval: 最小间隔
|
||||
max_interval: 最大间隔
|
||||
use_3sigma_rule: 是否使用3-sigma规则
|
||||
|
||||
|
||||
Returns:
|
||||
int: 随机生成的时间间隔(秒)
|
||||
"""
|
||||
@@ -140,47 +140,47 @@ def _generate_pure_random_interval(
|
||||
# sigma_percentage=0.3 -> sigma=300秒
|
||||
base_reference = 1000 # 基准时间
|
||||
sigma = abs(sigma_percentage) * base_reference
|
||||
|
||||
|
||||
# 使用sigma作为均值,sigma/3作为标准差
|
||||
# 这样3σ范围约为[0, 2*sigma]
|
||||
mean = sigma
|
||||
std = sigma / 3
|
||||
|
||||
std = sigma / 3
|
||||
|
||||
if use_3sigma_rule:
|
||||
# 3-sigma边界:μ±3σ = sigma±3*(sigma/3) = sigma±sigma = [0, 2*sigma]
|
||||
three_sigma_min = max(1, mean - 3 * std) # 理论上约为0,但最小1秒
|
||||
three_sigma_max = mean + 3 * std # 约为2*sigma
|
||||
|
||||
|
||||
# 应用用户边界
|
||||
if min_interval is not None:
|
||||
three_sigma_min = max(three_sigma_min, min_interval)
|
||||
if max_interval is not None:
|
||||
three_sigma_max = min(three_sigma_max, max_interval)
|
||||
|
||||
|
||||
effective_min = int(three_sigma_min)
|
||||
effective_max = int(three_sigma_max)
|
||||
else:
|
||||
# 不使用3-sigma规则
|
||||
effective_min = max(1, min_interval or 1)
|
||||
effective_max = max(effective_min + 1, max_interval or int(mean * 10))
|
||||
|
||||
|
||||
# 向量化生成随机值
|
||||
batch_size = 8 # 小批量生成提高效率
|
||||
candidates = np.random.normal(loc=mean, scale=std, size=batch_size)
|
||||
|
||||
|
||||
# 向量化处理负数
|
||||
candidates = np.abs(candidates)
|
||||
|
||||
|
||||
# 转换为整数
|
||||
candidates = np.round(candidates).astype(int)
|
||||
|
||||
|
||||
# 向量化筛选
|
||||
valid_mask = (candidates >= effective_min) & (candidates <= effective_max)
|
||||
valid_candidates = candidates[valid_mask]
|
||||
|
||||
|
||||
if len(valid_candidates) > 0:
|
||||
return int(valid_candidates[0])
|
||||
|
||||
|
||||
# 备用方案:直接随机整数
|
||||
return int(np.random.randint(effective_min, effective_max + 1))
|
||||
|
||||
@@ -188,28 +188,28 @@ def _generate_pure_random_interval(
|
||||
def format_time_duration(seconds: int) -> str:
|
||||
"""
|
||||
将秒数格式化为易读的时间格式
|
||||
|
||||
|
||||
Args:
|
||||
seconds: 秒数
|
||||
|
||||
|
||||
Returns:
|
||||
str: 格式化的时间字符串,如"2小时30分15秒"
|
||||
"""
|
||||
if seconds < 60:
|
||||
return f"{seconds}秒"
|
||||
|
||||
|
||||
minutes = seconds // 60
|
||||
remaining_seconds = seconds % 60
|
||||
|
||||
|
||||
if minutes < 60:
|
||||
if remaining_seconds > 0:
|
||||
return f"{minutes}分{remaining_seconds}秒"
|
||||
else:
|
||||
return f"{minutes}分"
|
||||
|
||||
|
||||
hours = minutes // 60
|
||||
remaining_minutes = minutes % 60
|
||||
|
||||
|
||||
if hours < 24:
|
||||
if remaining_minutes > 0 and remaining_seconds > 0:
|
||||
return f"{hours}小时{remaining_minutes}分{remaining_seconds}秒"
|
||||
@@ -217,10 +217,10 @@ def format_time_duration(seconds: int) -> str:
|
||||
return f"{hours}小时{remaining_minutes}分"
|
||||
else:
|
||||
return f"{hours}小时"
|
||||
|
||||
|
||||
days = hours // 24
|
||||
remaining_hours = hours % 24
|
||||
|
||||
|
||||
if remaining_hours > 0:
|
||||
return f"{days}天{remaining_hours}小时"
|
||||
else:
|
||||
@@ -230,47 +230,47 @@ def format_time_duration(seconds: int) -> str:
|
||||
def benchmark_timing_performance(iterations: int = 1000) -> dict:
|
||||
"""
|
||||
性能基准测试函数,用于评估当前环境下的计算性能
|
||||
|
||||
|
||||
🚀 用于系统性能监控和优化验证
|
||||
|
||||
|
||||
Args:
|
||||
iterations: 测试迭代次数
|
||||
|
||||
|
||||
Returns:
|
||||
dict: 包含各种场景的性能指标
|
||||
"""
|
||||
import time
|
||||
|
||||
|
||||
scenarios = {
|
||||
'standard': (600, 0.25, 1, 86400, True),
|
||||
'pure_random': (0, 0.3, 1, 86400, True),
|
||||
'fixed': (300, 0, 1, 86400, True),
|
||||
'extreme': (60, 5.0, 1, 86400, True)
|
||||
"standard": (600, 0.25, 1, 86400, True),
|
||||
"pure_random": (0, 0.3, 1, 86400, True),
|
||||
"fixed": (300, 0, 1, 86400, True),
|
||||
"extreme": (60, 5.0, 1, 86400, True),
|
||||
}
|
||||
|
||||
|
||||
results = {}
|
||||
|
||||
|
||||
for name, params in scenarios.items():
|
||||
start = time.perf_counter()
|
||||
|
||||
|
||||
for _ in range(iterations):
|
||||
get_normal_distributed_interval(*params)
|
||||
|
||||
|
||||
end = time.perf_counter()
|
||||
duration = (end - start) * 1000 # 转换为毫秒
|
||||
|
||||
|
||||
results[name] = {
|
||||
'total_ms': round(duration, 2),
|
||||
'avg_ms': round(duration / iterations, 6),
|
||||
'ops_per_sec': round(iterations / (duration / 1000))
|
||||
"total_ms": round(duration, 2),
|
||||
"avg_ms": round(duration / iterations, 6),
|
||||
"ops_per_sec": round(iterations / (duration / 1000)),
|
||||
}
|
||||
|
||||
|
||||
# 计算缓存效果
|
||||
results['cache_info'] = {
|
||||
'hits': _calculate_sigma_bounds.cache_info().hits,
|
||||
'misses': _calculate_sigma_bounds.cache_info().misses,
|
||||
'hit_rate': _calculate_sigma_bounds.cache_info().hits /
|
||||
max(1, _calculate_sigma_bounds.cache_info().hits + _calculate_sigma_bounds.cache_info().misses)
|
||||
results["cache_info"] = {
|
||||
"hits": _calculate_sigma_bounds.cache_info().hits,
|
||||
"misses": _calculate_sigma_bounds.cache_info().misses,
|
||||
"hit_rate": _calculate_sigma_bounds.cache_info().hits
|
||||
/ max(1, _calculate_sigma_bounds.cache_info().hits + _calculate_sigma_bounds.cache_info().misses),
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
return results
|
||||
|
||||
Reference in New Issue
Block a user