修复代码格式和文件名大小写问题

2025-08-31 20:50:17 +08:00
parent df29014e41
commit 8149731925
218 changed files with 6913 additions and 8257 deletions
--- a/src/utils/message_chunker.py
+++ b/src/utils/message_chunker.py
@@ -14,18 +14,18 @@ logger = get_logger("message_chunker")

 class MessageReassembler:
    """消息重组器，用于重组来自 Ada 的切片消息"""
-    
+
    def __init__(self, timeout: int = 30):
        self.timeout = timeout
        self.chunk_buffers: Dict[str, Dict[str, Any]] = {}
        self._cleanup_task = None
-        
+
    async def start_cleanup_task(self):
        """启动清理任务"""
        if self._cleanup_task is None:
            self._cleanup_task = asyncio.create_task(self._cleanup_expired_chunks())
            logger.info("消息重组器清理任务已启动")
-    
+
    async def stop_cleanup_task(self):
        """停止清理任务"""
        if self._cleanup_task:
@@ -36,84 +36,84 @@ class MessageReassembler:
                pass
            self._cleanup_task = None
            logger.info("消息重组器清理任务已停止")
-    
+
    async def _cleanup_expired_chunks(self):
        """清理过期的切片缓冲区"""
        while True:
            try:
                await asyncio.sleep(10)  # 每10秒检查一次
                current_time = time.time()
-                
+
                expired_chunks = []
                for chunk_id, buffer_info in self.chunk_buffers.items():
-                    if current_time - buffer_info['timestamp'] > self.timeout:
+                    if current_time - buffer_info["timestamp"] > self.timeout:
                        expired_chunks.append(chunk_id)
-                
+
                for chunk_id in expired_chunks:
                    logger.warning(f"清理过期的切片缓冲区: {chunk_id}")
                    del self.chunk_buffers[chunk_id]
-                    
+
            except asyncio.CancelledError:
                break
            except Exception as e:
                logger.error(f"清理过期切片时出错: {e}")
-    
+
    def is_chunk_message(self, message: Dict[str, Any]) -> bool:
        """检查是否是来自 Ada 的切片消息"""
        return (
-            isinstance(message, dict) and
-            "__mmc_chunk_info__" in message and
-            "__mmc_chunk_data__" in message and
-            "__mmc_is_chunked__" in message
+            isinstance(message, dict)
+            and "__mmc_chunk_info__" in message
+            and "__mmc_chunk_data__" in message
+            and "__mmc_is_chunked__" in message
        )
-    
+
    async def process_chunk(self, message: Dict[str, Any]) -> Optional[Dict[str, Any]]:
        """
        处理切片消息，如果切片完整则返回重组后的消息
-        
+
        Args:
            message: 可能的切片消息
-            
+
        Returns:
            如果切片完整则返回重组后的原始消息，否则返回None
        """
        # 如果不是切片消息，直接返回
        if not self.is_chunk_message(message):
            return message
-        
+
        try:
            chunk_info = message["__mmc_chunk_info__"]
            chunk_content = message["__mmc_chunk_data__"]
-            
+
            chunk_id = chunk_info["chunk_id"]
            chunk_index = chunk_info["chunk_index"]
            total_chunks = chunk_info["total_chunks"]
            chunk_timestamp = chunk_info.get("timestamp", time.time())
-            
+
            # 初始化缓冲区
            if chunk_id not in self.chunk_buffers:
                self.chunk_buffers[chunk_id] = {
                    "chunks": {},
                    "total_chunks": total_chunks,
                    "received_chunks": 0,
-                    "timestamp": chunk_timestamp
+                    "timestamp": chunk_timestamp,
                }
                logger.debug(f"初始化切片缓冲区: {chunk_id} (总计 {total_chunks} 个切片)")
-            
+
            buffer = self.chunk_buffers[chunk_id]
-            
+
            # 检查切片是否已经接收过
            if chunk_index in buffer["chunks"]:
                logger.warning(f"重复接收切片: {chunk_id}#{chunk_index}")
                return None
-            
+
            # 添加切片
            buffer["chunks"][chunk_index] = chunk_content
            buffer["received_chunks"] += 1
            buffer["timestamp"] = time.time()  # 更新时间戳
-            
+
            logger.debug(f"接收切片: {chunk_id}#{chunk_index} ({buffer['received_chunks']}/{total_chunks})")
-            
+
            # 检查是否接收完整
            if buffer["received_chunks"] == total_chunks:
                # 重组消息
@@ -123,26 +123,26 @@ class MessageReassembler:
                        logger.error(f"切片 {chunk_id}#{i} 缺失，无法重组")
                        return None
                    reassembled_message += buffer["chunks"][i]
-                
+
                # 清理缓冲区
                del self.chunk_buffers[chunk_id]
-                
+
                logger.info(f"消息重组完成: {chunk_id} ({len(reassembled_message)} chars)")
-                
+
                # 尝试反序列化重组后的消息
                try:
                    return orjson.loads(reassembled_message)
                except orjson.JSONDecodeError as e:
                    logger.error(f"重组消息反序列化失败: {e}")
                    return None
-                
+
            # 还没收集完所有切片，返回None表示继续等待
            return None
-            
+
        except (KeyError, TypeError, ValueError) as e:
            logger.error(f"处理切片消息时出错: {e}")
            return None
-    
+
    def get_pending_chunks_info(self) -> Dict[str, Any]:
        """获取待处理切片信息"""
        info = {}
@@ -151,7 +151,7 @@ class MessageReassembler:
                "received": buffer["received_chunks"],
                "total": buffer["total_chunks"],
                "progress": f"{buffer['received_chunks']}/{buffer['total_chunks']}",
-                "age_seconds": time.time() - buffer["timestamp"]
+                "age_seconds": time.time() - buffer["timestamp"],
            }
        return info

--- a/src/utils/timing_utils.py
+++ b/src/utils/timing_utils.py
@@ -19,39 +19,39 @@ from functools import lru_cache
 def _calculate_sigma_bounds(base_interval: int, sigma_percentage: float, use_3sigma_rule: bool) -> tuple:
    """
    缓存sigma边界计算，避免重复计算相同参数
-    
+
    🚀 性能优化：LRU缓存常用配置，避免重复数学计算
    """
    sigma = base_interval * sigma_percentage
-    
+
    if use_3sigma_rule:
        three_sigma_min = max(1, base_interval - 3 * sigma)
        three_sigma_max = base_interval + 3 * sigma
        return three_sigma_min, three_sigma_max
-    
+
    return 1, base_interval * 50  # 更宽松的边界


 def get_normal_distributed_interval(
-    base_interval: int, 
+    base_interval: int,
    sigma_percentage: float = 0.1,
    min_interval: Optional[int] = None,
    max_interval: Optional[int] = None,
-    use_3sigma_rule: bool = True
+    use_3sigma_rule: bool = True,
 ) -> int:
    """
    获取符合正态分布的时间间隔，基于3-sigma规则
-    
+
    Args:
        base_interval: 基础时间间隔（秒），作为正态分布的均值μ
        sigma_percentage: 标准差占基础间隔的百分比，默认10%
        min_interval: 最小间隔时间（秒），防止间隔过短
        max_interval: 最大间隔时间（秒），防止间隔过长
        use_3sigma_rule: 是否使用3-sigma规则限制分布范围，默认True
-        
+
    Returns:
        int: 符合正态分布的时间间隔（秒）
-        
+
    Example:
        >>> # 基础间隔1500秒（25分钟），标准差为150秒（10%）
        >>> interval = get_normal_distributed_interval(1500, 0.1)
@@ -60,79 +60,79 @@ def get_normal_distributed_interval(
    # 🚨 基本输入保护：处理负数
    if base_interval < 0:
        base_interval = abs(base_interval)
-    
+
    if sigma_percentage < 0:
        sigma_percentage = abs(sigma_percentage)
-    
+
    # 特殊情况：基础间隔为0，使用纯随机模式
    if base_interval == 0:
        if sigma_percentage == 0:
            return 1  # 都为0时返回1秒
        return _generate_pure_random_interval(sigma_percentage, min_interval, max_interval, use_3sigma_rule)
-    
+
    # 特殊情况：sigma为0，返回固定间隔
    if sigma_percentage == 0:
        return base_interval
-    
+
    # 计算标准差
    sigma = base_interval * sigma_percentage
-    
+
    # 📊 使用缓存的边界计算（性能优化）
    if use_3sigma_rule:
        three_sigma_min, three_sigma_max = _calculate_sigma_bounds(base_interval, sigma_percentage, True)
-        
+
        # 应用用户设定的边界（如果更严格的话）
        if min_interval is not None:
            three_sigma_min = max(three_sigma_min, min_interval)
        if max_interval is not None:
            three_sigma_max = min(three_sigma_max, max_interval)
-        
+
        effective_min = int(three_sigma_min)
        effective_max = int(three_sigma_max)
    else:
        # 不使用3-sigma规则，使用更宽松的边界
        effective_min = max(1, min_interval or 1)
        effective_max = max(effective_min + 1, max_interval or int(base_interval * 50))
-    
+
    # 向量化生成：一次性生成多个候选值，避免循环
    # 对于3-sigma规则，理论成功率99.7%，生成10个候选值基本确保成功
    batch_size = 10 if use_3sigma_rule else 5
-    
+
    # 一次性生成多个正态分布值
    candidates = np.random.normal(loc=base_interval, scale=sigma, size=batch_size)
-    
+
    # 向量化处理负数：对负数取绝对值
    candidates = np.abs(candidates)
-    
+
    # 转换为整数数组
    candidates = np.round(candidates).astype(int)
-    
+
    # 向量化筛选：找到第一个满足条件的值
    valid_mask = (candidates >= effective_min) & (candidates <= effective_max)
    valid_candidates = candidates[valid_mask]
-    
+
    if len(valid_candidates) > 0:
        return int(valid_candidates[0])  # 返回第一个有效值
-    
+
    # 如果向量化生成失败（极低概率），使用均匀分布作为备用
    return int(np.random.randint(effective_min, effective_max + 1))


 def _generate_pure_random_interval(
-    sigma_percentage: float, 
-    min_interval: Optional[int] = None, 
+    sigma_percentage: float,
+    min_interval: Optional[int] = None,
    max_interval: Optional[int] = None,
-    use_3sigma_rule: bool = True
+    use_3sigma_rule: bool = True,
 ) -> int:
    """
    当base_interval=0时的纯随机模式，基于3-sigma规则
-    
+
    Args:
        sigma_percentage: 标准差百分比，将被转换为实际时间值
        min_interval: 最小间隔
        max_interval: 最大间隔
        use_3sigma_rule: 是否使用3-sigma规则
-        
+
    Returns:
        int: 随机生成的时间间隔（秒）
    """
@@ -140,47 +140,47 @@ def _generate_pure_random_interval(
    # sigma_percentage=0.3 -> sigma=300秒
    base_reference = 1000  # 基准时间
    sigma = abs(sigma_percentage) * base_reference
-    
+
    # 使用sigma作为均值，sigma/3作为标准差
    # 这样3σ范围约为[0, 2*sigma]
    mean = sigma
-    std = sigma / 3  
-    
+    std = sigma / 3
+
    if use_3sigma_rule:
        # 3-sigma边界：μ±3σ = sigma±3*(sigma/3) = sigma±sigma = [0, 2*sigma]
        three_sigma_min = max(1, mean - 3 * std)  # 理论上约为0，但最小1秒
        three_sigma_max = mean + 3 * std  # 约为2*sigma
-        
+
        # 应用用户边界
        if min_interval is not None:
            three_sigma_min = max(three_sigma_min, min_interval)
        if max_interval is not None:
            three_sigma_max = min(three_sigma_max, max_interval)
-        
+
        effective_min = int(three_sigma_min)
        effective_max = int(three_sigma_max)
    else:
        # 不使用3-sigma规则
        effective_min = max(1, min_interval or 1)
        effective_max = max(effective_min + 1, max_interval or int(mean * 10))
-    
+
    # 向量化生成随机值
    batch_size = 8  # 小批量生成提高效率
    candidates = np.random.normal(loc=mean, scale=std, size=batch_size)
-    
+
    # 向量化处理负数
    candidates = np.abs(candidates)
-    
+
    # 转换为整数
    candidates = np.round(candidates).astype(int)
-    
+
    # 向量化筛选
    valid_mask = (candidates >= effective_min) & (candidates <= effective_max)
    valid_candidates = candidates[valid_mask]
-    
+
    if len(valid_candidates) > 0:
        return int(valid_candidates[0])
-    
+
    # 备用方案：直接随机整数
    return int(np.random.randint(effective_min, effective_max + 1))

@@ -188,28 +188,28 @@ def _generate_pure_random_interval(
 def format_time_duration(seconds: int) -> str:
    """
    将秒数格式化为易读的时间格式
-    
+
    Args:
        seconds: 秒数
-        
+
    Returns:
        str: 格式化的时间字符串，如"2小时30分15秒"
    """
    if seconds < 60:
        return f"{seconds}秒"
-    
+
    minutes = seconds // 60
    remaining_seconds = seconds % 60
-    
+
    if minutes < 60:
        if remaining_seconds > 0:
            return f"{minutes}分{remaining_seconds}秒"
        else:
            return f"{minutes}分"
-    
+
    hours = minutes // 60
    remaining_minutes = minutes % 60
-    
+
    if hours < 24:
        if remaining_minutes > 0 and remaining_seconds > 0:
            return f"{hours}小时{remaining_minutes}分{remaining_seconds}秒"
@@ -217,10 +217,10 @@ def format_time_duration(seconds: int) -> str:
            return f"{hours}小时{remaining_minutes}分"
        else:
            return f"{hours}小时"
-    
+
    days = hours // 24
    remaining_hours = hours % 24
-    
+
    if remaining_hours > 0:
        return f"{days}天{remaining_hours}小时"
    else:
@@ -230,47 +230,47 @@ def format_time_duration(seconds: int) -> str:
 def benchmark_timing_performance(iterations: int = 1000) -> dict:
    """
    性能基准测试函数，用于评估当前环境下的计算性能
-    
+
    🚀 用于系统性能监控和优化验证
-    
+
    Args:
        iterations: 测试迭代次数
-        
+
    Returns:
        dict: 包含各种场景的性能指标
    """
    import time
-    
+
    scenarios = {
-        'standard': (600, 0.25, 1, 86400, True),
-        'pure_random': (0, 0.3, 1, 86400, True),
-        'fixed': (300, 0, 1, 86400, True),
-        'extreme': (60, 5.0, 1, 86400, True)
+        "standard": (600, 0.25, 1, 86400, True),
+        "pure_random": (0, 0.3, 1, 86400, True),
+        "fixed": (300, 0, 1, 86400, True),
+        "extreme": (60, 5.0, 1, 86400, True),
    }
-    
+
    results = {}
-    
+
    for name, params in scenarios.items():
        start = time.perf_counter()
-        
+
        for _ in range(iterations):
            get_normal_distributed_interval(*params)
-            
+
        end = time.perf_counter()
        duration = (end - start) * 1000  # 转换为毫秒
-        
+
        results[name] = {
-            'total_ms': round(duration, 2),
-            'avg_ms': round(duration / iterations, 6),
-            'ops_per_sec': round(iterations / (duration / 1000))
+            "total_ms": round(duration, 2),
+            "avg_ms": round(duration / iterations, 6),
+            "ops_per_sec": round(iterations / (duration / 1000)),
        }
-    
+
    # 计算缓存效果
-    results['cache_info'] = {
-        'hits': _calculate_sigma_bounds.cache_info().hits,
-        'misses': _calculate_sigma_bounds.cache_info().misses,
-        'hit_rate': _calculate_sigma_bounds.cache_info().hits / 
-                   max(1, _calculate_sigma_bounds.cache_info().hits + _calculate_sigma_bounds.cache_info().misses)
+    results["cache_info"] = {
+        "hits": _calculate_sigma_bounds.cache_info().hits,
+        "misses": _calculate_sigma_bounds.cache_info().misses,
+        "hit_rate": _calculate_sigma_bounds.cache_info().hits
+        / max(1, _calculate_sigma_bounds.cache_info().hits + _calculate_sigma_bounds.cache_info().misses),
    }
-    
-    return results
+
+    return results