feat: 添加视频处理多线程优化和消息切片重组功能

- 新增视频帧提取的线程池支持，提升大视频文件处理性能 - 集成消息切片重组器，支持长消息的自动重组处理 - 优化视频帧提取算法，使用numpy进行数值计算优化 - 重构权限管理插件，修复属性访问和方法签名问题 - 清理未使用的导入和代码，提升代码质量 - 默认启用插件管理功能
2025-08-25 16:16:33 +08:00
parent 38b64eb339
commit 0bcc0ba8b5
9 changed files with 513 additions and 65 deletions
--- a/src/chat/utils/utils_video.py
+++ b/src/chat/utils/utils_video.py
@@ -12,10 +12,14 @@ import asyncio
 import base64
 import hashlib
 import time
+import numpy as np
 from PIL import Image
 from pathlib import Path
 from typing import List, Tuple, Optional, Dict
 import io
+from concurrent.futures import ThreadPoolExecutor
+from functools import partial
+import numpy as np

 from src.llm_models.utils_model import LLMRequest
 from src.config.config import global_config, model_config
@@ -33,6 +37,68 @@ video_events = {}
 video_lock_manager = asyncio.Lock()


+def _extract_frames_worker(video_path: str, max_frames: int, frame_quality: int, max_image_size: int) -> List[Tuple[str, float]]:
+    """线程池中提取视频帧的工作函数"""
+    frames = []
+    try:
+        cap = cv2.VideoCapture(video_path)
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        duration = total_frames / fps if fps > 0 else 0
+        
+        # 使用numpy优化帧间隔计算
+        if duration > 0:
+            frame_interval = max(1, int(duration / max_frames * fps))
+        else:
+            frame_interval = 30  # 默认间隔
+            
+        # 使用numpy计算目标帧位置
+        target_frames = np.arange(0, min(max_frames, total_frames // frame_interval + 1)) * frame_interval
+        target_frames = target_frames[target_frames < total_frames].astype(int)
+        
+        for target_frame in target_frames:
+            # 跳转到目标帧
+            cap.set(cv2.CAP_PROP_POS_FRAMES, target_frame)
+            ret, frame = cap.read()
+            if not ret:
+                continue
+                
+            # 使用numpy优化图像处理
+            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            
+            # 转换为PIL图像并使用numpy进行尺寸计算
+            height, width = frame_rgb.shape[:2]
+            max_dim = max(height, width)
+            
+            if max_dim > max_image_size:
+                # 使用numpy计算缩放比例
+                ratio = max_image_size / max_dim
+                new_width = int(width * ratio)
+                new_height = int(height * ratio)
+                
+                # 使用opencv进行高效缩放
+                frame_resized = cv2.resize(frame_rgb, (new_width, new_height), interpolation=cv2.INTER_LANCZOS4)
+                pil_image = Image.fromarray(frame_resized)
+            else:
+                pil_image = Image.fromarray(frame_rgb)
+            
+            # 转换为base64
+            buffer = io.BytesIO()
+            pil_image.save(buffer, format='JPEG', quality=frame_quality)
+            frame_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
+            
+            # 计算时间戳
+            timestamp = target_frame / fps if fps > 0 else 0
+            frames.append((frame_base64, timestamp))
+        
+        cap.release()
+        return frames
+        
+    except Exception as e:
+        # 返回错误信息
+        return [("ERROR", str(e))]
+
+
 class VideoAnalyzer:
    """优化的视频分析器类"""

@@ -61,6 +127,9 @@ class VideoAnalyzer:
            self.max_image_size = config.max_image_size
            self.enable_frame_timing = config.enable_frame_timing
            self.batch_analysis_prompt = config.batch_analysis_prompt
+            # 新增的线程池配置
+            self.use_multiprocessing = getattr(config, 'use_multiprocessing', True)
+            self.max_workers = getattr(config, 'max_workers', 2)
            
            # 将配置文件中的模式映射到内部使用的模式名称
            config_mode = config.analysis_mode
@@ -92,6 +161,8 @@ class VideoAnalyzer:
            self.batch_size = 3  # 批处理时每批处理的帧数
            self.timeout = 60.0  # 分析超时时间（秒）
            self.enable_frame_timing = True
+            self.use_multiprocessing = True  # 默认启用线程池
+            self.max_workers = 2  # 默认最大2个线程
            self.batch_analysis_prompt = """请分析这个视频的内容。这些图片是从视频中按时间顺序提取的关键帧。

 请提供详细的分析，包括：
@@ -107,7 +178,7 @@ class VideoAnalyzer:
        # 系统提示词
        self.system_prompt = "你是一个专业的视频内容分析助手。请仔细观察用户提供的视频关键帧，详细描述视频内容。"
        
-        logger.info(f"✅ 视频分析器初始化完成，分析模式: {self.analysis_mode}")
+        logger.info(f"✅ 视频分析器初始化完成，分析模式: {self.analysis_mode}, 线程池: {self.use_multiprocessing}")

    def _calculate_video_hash(self, video_data: bytes) -> str:
        """计算视频文件的hash值"""
@@ -182,7 +253,66 @@ class VideoAnalyzer:
            logger.warning(f"无效的分析模式: {mode}")

    async def extract_frames(self, video_path: str) -> List[Tuple[str, float]]:
-        """提取视频帧"""
+        """提取视频帧 - 支持多进程和单线程模式"""
+        # 先获取视频信息
+        cap = cv2.VideoCapture(video_path)
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        duration = total_frames / fps if fps > 0 else 0
+        cap.release()
+        
+        logger.info(f"视频信息: {total_frames}帧, {fps:.2f}FPS, {duration:.2f}秒")
+        
+        # 估算提取帧数
+        if duration > 0:
+            frame_interval = max(1, int(duration / self.max_frames * fps))
+            estimated_frames = min(self.max_frames, total_frames // frame_interval + 1)
+        else:
+            estimated_frames = self.max_frames
+            
+        logger.info(f"计算得出帧间隔: {frame_interval} (将提取约{estimated_frames}帧)")
+        
+        # 根据配置选择处理方式
+        if self.use_multiprocessing:
+            return await self._extract_frames_multiprocess(video_path)
+        else:
+            return await self._extract_frames_fallback(video_path)
+
+    async def _extract_frames_multiprocess(self, video_path: str) -> List[Tuple[str, float]]:
+        """线程池版本的帧提取"""
+        loop = asyncio.get_event_loop()
+        
+        try:
+            logger.info("🔄 启动线程池帧提取...")
+            # 使用线程池，避免进程间的导入问题
+            with ThreadPoolExecutor(max_workers=1) as executor:
+                frames = await loop.run_in_executor(
+                    executor,
+                    _extract_frames_worker,
+                    video_path,
+                    self.max_frames,
+                    self.frame_quality,
+                    self.max_image_size
+                )
+            
+            # 检查是否有错误
+            if frames and frames[0][0] == "ERROR":
+                logger.error(f"线程池帧提取失败: {frames[0][1]}")
+                # 降级到单线程模式
+                logger.info("🔄 降级到单线程模式...")
+                return await self._extract_frames_fallback(video_path)
+            
+            logger.info(f"✅ 成功提取{len(frames)}帧 (线程池模式)")
+            return frames
+            
+        except Exception as e:
+            logger.error(f"线程池帧提取失败: {e}")
+            # 降级到原始方法
+            logger.info("🔄 降级到单线程模式...")
+            return await self._extract_frames_fallback(video_path)
+
+    async def _extract_frames_fallback(self, video_path: str) -> List[Tuple[str, float]]:
+        """帧提取的降级方法 - 原始异步版本"""
        frames = []
        cap = cv2.VideoCapture(video_path)
        fps = cap.get(cv2.CAP_PROP_FPS)
@@ -191,45 +321,61 @@ class VideoAnalyzer:
        
        logger.info(f"视频信息: {total_frames}帧, {fps:.2f}FPS, {duration:.2f}秒")
        
-        # 动态计算帧间隔
+        # 使用numpy优化帧间隔计算
        if duration > 0:
            frame_interval = max(1, int(duration / self.max_frames * fps))
        else:
            frame_interval = 30  # 默认间隔
+            
+        logger.info(f"计算得出帧间隔: {frame_interval} (将提取约{min(self.max_frames, total_frames // frame_interval + 1)}帧)")
+
+        # 使用numpy计算目标帧位置
+        target_frames = np.arange(0, min(self.max_frames, total_frames // frame_interval + 1)) * frame_interval
+        target_frames = target_frames[target_frames < total_frames].astype(int)
        
-        frame_count = 0
        extracted_count = 0
        
-        while cap.isOpened() and extracted_count < self.max_frames:
+        for target_frame in target_frames:
+            # 跳转到目标帧
+            cap.set(cv2.CAP_PROP_POS_FRAMES, target_frame)
            ret, frame = cap.read()
            if not ret:
-                break
+                continue
                
-            if frame_count % frame_interval == 0:
-                # 转换为PIL图像并压缩
-                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-                pil_image = Image.fromarray(frame_rgb)
-                
-                # 调整图像大小
-                if max(pil_image.size) > self.max_image_size:
-                    ratio = self.max_image_size / max(pil_image.size)
-                    new_size = tuple(int(dim * ratio) for dim in pil_image.size)
-                    pil_image = pil_image.resize(new_size, Image.Resampling.LANCZOS)
-                
-                # 转换为base64
-                buffer = io.BytesIO()
-                pil_image.save(buffer, format='JPEG', quality=self.frame_quality)
-                frame_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
-                
-                # 计算时间戳
-                timestamp = frame_count / fps if fps > 0 else 0
-                frames.append((frame_base64, timestamp))
-                extracted_count += 1
-                
-                logger.debug(f"提取第{extracted_count}帧 (时间: {timestamp:.2f}s)")
+            # 使用numpy优化图像处理
+            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            
+            # 转换为PIL图像并使用numpy进行尺寸计算
+            height, width = frame_rgb.shape[:2]
+            max_dim = max(height, width)
+            
+            if max_dim > self.max_image_size:
+                # 使用numpy计算缩放比例
+                ratio = self.max_image_size / max_dim
+                new_width = int(width * ratio)
+                new_height = int(height * ratio)
+                
+                # 使用opencv进行高效缩放
+                frame_resized = cv2.resize(frame_rgb, (new_width, new_height), interpolation=cv2.INTER_LANCZOS4)
+                pil_image = Image.fromarray(frame_resized)
+            else:
+                pil_image = Image.fromarray(frame_rgb)
+            
+            # 转换为base64
+            buffer = io.BytesIO()
+            pil_image.save(buffer, format='JPEG', quality=self.frame_quality)
+            frame_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
+            
+            # 计算时间戳
+            timestamp = target_frame / fps if fps > 0 else 0
+            frames.append((frame_base64, timestamp))
+            extracted_count += 1
+            
+            logger.debug(f"提取第{extracted_count}帧 (时间: {timestamp:.2f}s, 帧号: {target_frame})")
+            
+            # 每提取一帧让步一次
+            await asyncio.sleep(0.001)
            
-            frame_count += 1
-        
        cap.release()
        logger.info(f"✅ 成功提取{len(frames)}帧")
        return frames