diff --git a/rust_video/Cargo.toml b/rust_video/Cargo.toml new file mode 100644 index 000000000..beb03b188 --- /dev/null +++ b/rust_video/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "rust_video" +version = "0.1.0" +edition = "2021" +authors = ["VideoAnalysis Team"] +description = "Ultra-fast video keyframe extraction tool in Rust" +license = "MIT" + +[dependencies] +anyhow = "1.0" +clap = { version = "4.0", features = ["derive"] } +rayon = "1.11" + +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" + +chrono = { version = "0.4", features = ["serde"] } + +# PyO3 dependencies +pyo3 = { version = "0.22", features = ["extension-module"] } + +[lib] +name = "rust_video" +crate-type = ["cdylib"] + +[profile.release] +opt-level = 3 +lto = true +codegen-units = 1 +panic = "abort" +strip = true diff --git a/rust_video/pyproject.toml b/rust_video/pyproject.toml new file mode 100644 index 000000000..3cbb6fadd --- /dev/null +++ b/rust_video/pyproject.toml @@ -0,0 +1,15 @@ +[build-system] +requires = ["maturin>=1.9,<2.0"] +build-backend = "maturin" + +[project] +name = "rust_video" +requires-python = ">=3.8" +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dynamic = ["version"] +[tool.maturin] +features = ["pyo3/extension-module"] diff --git a/rust_video/rust_video.pyi b/rust_video/rust_video.pyi new file mode 100644 index 000000000..83bccf591 --- /dev/null +++ b/rust_video/rust_video.pyi @@ -0,0 +1,391 @@ +""" +Rust Video Keyframe Extractor - Python Type Hints + +Ultra-fast video keyframe extraction tool with SIMD optimization. +""" + +from typing import Dict, List, Optional, Tuple, Union, Any +from pathlib import Path + +class PyVideoFrame: + """ + Python绑定的视频帧结构 + + 表示一个视频帧,包含帧编号、尺寸和像素数据。 + """ + + frame_number: int + """帧编号""" + + width: int + """帧宽度(像素)""" + + height: int + """帧高度(像素)""" + + def __init__(self, frame_number: int, width: int, height: int, data: List[int]) -> None: + """ + 创建新的视频帧 + + Args: + frame_number: 帧编号 + width: 帧宽度 + height: 帧高度 + data: 像素数据(灰度值列表) + """ + ... + + def get_data(self) -> List[int]: + """ + 获取帧的像素数据 + + Returns: + 像素数据列表(灰度值) + """ + ... + + def calculate_difference(self, other: 'PyVideoFrame') -> float: + """ + 计算与另一帧的差异 + + Args: + other: 要比较的另一帧 + + Returns: + 帧差异值(0-255范围) + """ + ... + + def calculate_difference_simd(self, other: 'PyVideoFrame', block_size: Optional[int] = None) -> float: + """ + 使用SIMD优化计算帧差异 + + Args: + other: 要比较的另一帧 + block_size: 处理块大小,默认8192 + + Returns: + 帧差异值(0-255范围) + """ + ... + +class PyPerformanceResult: + """ + 性能测试结果 + + 包含详细的性能统计信息。 + """ + + test_name: str + """测试名称""" + + video_file: str + """视频文件名""" + + total_time_ms: float + """总处理时间(毫秒)""" + + frame_extraction_time_ms: float + """帧提取时间(毫秒)""" + + keyframe_analysis_time_ms: float + """关键帧分析时间(毫秒)""" + + total_frames: int + """总帧数""" + + keyframes_extracted: int + """提取的关键帧数""" + + keyframe_ratio: float + """关键帧比例(百分比)""" + + processing_fps: float + """处理速度(帧每秒)""" + + threshold: float + """检测阈值""" + + optimization_type: str + """优化类型""" + + simd_enabled: bool + """是否启用SIMD""" + + threads_used: int + """使用的线程数""" + + timestamp: str + """时间戳""" + + def to_dict(self) -> Dict[str, Any]: + """ + 转换为Python字典 + + Returns: + 包含所有结果字段的字典 + """ + ... + +class VideoKeyframeExtractor: + """ + 主要的视频关键帧提取器类 + + 提供完整的视频关键帧提取功能,包括SIMD优化和多线程处理。 + """ + + def __init__( + self, + ffmpeg_path: str = "ffmpeg", + threads: int = 0, + verbose: bool = False + ) -> None: + """ + 创建关键帧提取器 + + Args: + ffmpeg_path: FFmpeg可执行文件路径,默认"ffmpeg" + threads: 线程数,0表示自动检测 + verbose: 是否启用详细输出 + """ + ... + + def extract_frames( + self, + video_path: str, + max_frames: Optional[int] = None + ) -> Tuple[List[PyVideoFrame], int, int]: + """ + 从视频中提取帧 + + Args: + video_path: 视频文件路径 + max_frames: 最大提取帧数,None表示提取所有帧 + + Returns: + (帧列表, 宽度, 高度) + """ + ... + + def extract_keyframes( + self, + frames: List[PyVideoFrame], + threshold: float, + use_simd: Optional[bool] = None, + block_size: Optional[int] = None + ) -> List[int]: + """ + 提取关键帧索引 + + Args: + frames: 视频帧列表 + threshold: 检测阈值 + use_simd: 是否使用SIMD优化,默认True + block_size: 处理块大小,默认8192 + + Returns: + 关键帧索引列表 + """ + ... + + def save_keyframes( + self, + video_path: str, + keyframe_indices: List[int], + output_dir: str, + max_save: Optional[int] = None + ) -> int: + """ + 保存关键帧为图片 + + Args: + video_path: 原视频文件路径 + keyframe_indices: 关键帧索引列表 + output_dir: 输出目录 + max_save: 最大保存数量,默认50 + + Returns: + 实际保存的关键帧数量 + """ + ... + + def benchmark( + self, + video_path: str, + threshold: float, + test_name: str, + max_frames: Optional[int] = None, + use_simd: Optional[bool] = None, + block_size: Optional[int] = None + ) -> PyPerformanceResult: + """ + 运行性能测试 + + Args: + video_path: 视频文件路径 + threshold: 检测阈值 + test_name: 测试名称 + max_frames: 最大处理帧数,默认1000 + use_simd: 是否使用SIMD优化,默认True + block_size: 处理块大小,默认8192 + + Returns: + 性能测试结果 + """ + ... + + def process_video( + self, + video_path: str, + output_dir: str, + threshold: Optional[float] = None, + max_frames: Optional[int] = None, + max_save: Optional[int] = None, + use_simd: Optional[bool] = None, + block_size: Optional[int] = None + ) -> PyPerformanceResult: + """ + 完整的处理流程 + + 执行完整的视频关键帧提取和保存流程。 + + Args: + video_path: 视频文件路径 + output_dir: 输出目录 + threshold: 检测阈值,默认2.0 + max_frames: 最大处理帧数,0表示处理所有帧 + max_save: 最大保存数量,默认50 + use_simd: 是否使用SIMD优化,默认True + block_size: 处理块大小,默认8192 + + Returns: + 处理结果 + """ + ... + + def get_cpu_features(self) -> Dict[str, bool]: + """ + 获取CPU特性信息 + + Returns: + CPU特性字典,包含AVX2、SSE2等支持信息 + """ + ... + + def get_thread_count(self) -> int: + """ + 获取当前配置的线程数 + + Returns: + 配置的线程数 + """ + ... + + def get_configured_threads(self) -> int: + """ + 获取配置的线程数 + + Returns: + 配置的线程数 + """ + ... + + def get_actual_thread_count(self) -> int: + """ + 获取实际运行的线程数 + + Returns: + 实际运行的线程数 + """ + ... + +def extract_keyframes_from_video( + video_path: str, + output_dir: str, + threshold: Optional[float] = None, + max_frames: Optional[int] = None, + max_save: Optional[int] = None, + ffmpeg_path: Optional[str] = None, + use_simd: Optional[bool] = None, + threads: Optional[int] = None, + verbose: Optional[bool] = None +) -> PyPerformanceResult: + """ + 便捷函数:从视频提取关键帧 + + 这是一个便捷函数,封装了完整的关键帧提取流程。 + + Args: + video_path: 视频文件路径 + output_dir: 输出目录 + threshold: 检测阈值,默认2.0 + max_frames: 最大处理帧数,0表示处理所有帧 + max_save: 最大保存数量,默认50 + ffmpeg_path: FFmpeg路径,默认"ffmpeg" + use_simd: 是否使用SIMD优化,默认True + threads: 线程数,0表示自动检测 + verbose: 是否启用详细输出,默认False + + Returns: + 处理结果 + + Example: + >>> result = extract_keyframes_from_video( + ... "video.mp4", + ... "./output", + ... threshold=2.5, + ... max_save=30, + ... verbose=True + ... ) + >>> print(f"提取了 {result.keyframes_extracted} 个关键帧") + """ + ... + +def get_system_info() -> Dict[str, Any]: + """ + 获取系统信息 + + Returns: + 系统信息字典,包含: + - threads: 可用线程数 + - avx2_supported: 是否支持AVX2(x86_64) + - sse2_supported: 是否支持SSE2(x86_64) + - simd_supported: 是否支持SIMD(非x86_64) + - version: 库版本 + + Example: + >>> info = get_system_info() + >>> print(f"线程数: {info['threads']}") + >>> print(f"AVX2支持: {info.get('avx2_supported', False)}") + """ + ... + +# 类型别名 +VideoPath = Union[str, Path] +"""视频文件路径类型""" + +OutputPath = Union[str, Path] +"""输出路径类型""" + +FrameData = List[int] +"""帧数据类型(像素值列表)""" + +KeyframeIndices = List[int] +"""关键帧索引类型""" + +# 常量 +DEFAULT_THRESHOLD: float = 2.0 +"""默认检测阈值""" + +DEFAULT_BLOCK_SIZE: int = 8192 +"""默认处理块大小""" + +DEFAULT_MAX_SAVE: int = 50 +"""默认最大保存数量""" + +MAX_FRAME_DIFFERENCE: float = 255.0 +"""最大帧差异值""" + +# 版本信息 +__version__: str = "0.1.0" +"""库版本""" diff --git a/rust_video/src/lib.rs b/rust_video/src/lib.rs new file mode 100644 index 000000000..69382f298 --- /dev/null +++ b/rust_video/src/lib.rs @@ -0,0 +1,831 @@ +use pyo3::prelude::*; +use anyhow::{Context, Result}; +use chrono::prelude::*; +use rayon::prelude::*; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::fs; +use std::io::{BufReader, Read}; +use std::path::PathBuf; +use std::process::{Command, Stdio}; +use std::time::Instant; + +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; + +/// Python绑定的视频帧结构 +#[pyclass] +#[derive(Debug, Clone)] +pub struct PyVideoFrame { + #[pyo3(get)] + pub frame_number: usize, + #[pyo3(get)] + pub width: usize, + #[pyo3(get)] + pub height: usize, + pub data: Vec, +} + +#[pymethods] +impl PyVideoFrame { + #[new] + fn new(frame_number: usize, width: usize, height: usize, data: Vec) -> Self { + // 确保数据长度是32的倍数以支持AVX2处理 + let mut aligned_data = data; + let remainder = aligned_data.len() % 32; + if remainder != 0 { + aligned_data.resize(aligned_data.len() + (32 - remainder), 0); + } + + Self { + frame_number, + width, + height, + data: aligned_data, + } + } + + /// 获取帧数据 + fn get_data(&self) -> &[u8] { + let pixel_count = self.width * self.height; + &self.data[..pixel_count] + } + + /// 计算与另一帧的差异 + fn calculate_difference(&self, other: &PyVideoFrame) -> PyResult { + if self.width != other.width || self.height != other.height { + return Ok(f64::MAX); + } + + let total_pixels = self.width * self.height; + let total_diff: u64 = self.data[..total_pixels] + .iter() + .zip(other.data[..total_pixels].iter()) + .map(|(a, b)| (*a as i32 - *b as i32).abs() as u64) + .sum(); + + Ok(total_diff as f64 / total_pixels as f64) + } + + /// 使用SIMD优化计算帧差异 + #[pyo3(signature = (other, block_size=None))] + fn calculate_difference_simd(&self, other: &PyVideoFrame, block_size: Option) -> PyResult { + let block_size = block_size.unwrap_or(8192); + Ok(self.calculate_difference_parallel_simd(other, block_size, true)) + } +} + +impl PyVideoFrame { + /// 使用并行SIMD处理计算帧差异 + fn calculate_difference_parallel_simd(&self, other: &PyVideoFrame, block_size: usize, use_simd: bool) -> f64 { + if self.width != other.width || self.height != other.height { + return f64::MAX; + } + + let total_pixels = self.width * self.height; + let num_blocks = (total_pixels + block_size - 1) / block_size; + + let total_diff: u64 = (0..num_blocks) + .into_par_iter() + .map(|block_idx| { + let start = block_idx * block_size; + let end = ((block_idx + 1) * block_size).min(total_pixels); + let block_len = end - start; + + if use_simd { + #[cfg(target_arch = "x86_64")] + { + unsafe { + if std::arch::is_x86_feature_detected!("avx2") { + return self.calculate_difference_avx2_block(&other.data, start, block_len); + } else if std::arch::is_x86_feature_detected!("sse2") { + return self.calculate_difference_sse2_block(&other.data, start, block_len); + } + } + } + } + + // 标量实现回退 + self.data[start..end] + .iter() + .zip(other.data[start..end].iter()) + .map(|(a, b)| (*a as i32 - *b as i32).abs() as u64) + .sum() + }) + .sum(); + + total_diff as f64 / total_pixels as f64 + } + + /// AVX2 优化的块处理 + #[cfg(target_arch = "x86_64")] + #[target_feature(enable = "avx2")] + unsafe fn calculate_difference_avx2_block(&self, other_data: &[u8], start: usize, len: usize) -> u64 { + let mut total_diff = 0u64; + let chunks = len / 32; + + for i in 0..chunks { + let offset = start + i * 32; + + let a = _mm256_loadu_si256(self.data.as_ptr().add(offset) as *const __m256i); + let b = _mm256_loadu_si256(other_data.as_ptr().add(offset) as *const __m256i); + + let diff = _mm256_sad_epu8(a, b); + let result = _mm256_extract_epi64(diff, 0) as u64 + + _mm256_extract_epi64(diff, 1) as u64 + + _mm256_extract_epi64(diff, 2) as u64 + + _mm256_extract_epi64(diff, 3) as u64; + + total_diff += result; + } + + // 处理剩余字节 + for i in (start + chunks * 32)..(start + len) { + total_diff += (self.data[i] as i32 - other_data[i] as i32).abs() as u64; + } + + total_diff + } + + /// SSE2 优化的块处理 + #[cfg(target_arch = "x86_64")] + #[target_feature(enable = "sse2")] + unsafe fn calculate_difference_sse2_block(&self, other_data: &[u8], start: usize, len: usize) -> u64 { + let mut total_diff = 0u64; + let chunks = len / 16; + + for i in 0..chunks { + let offset = start + i * 16; + + let a = _mm_loadu_si128(self.data.as_ptr().add(offset) as *const __m128i); + let b = _mm_loadu_si128(other_data.as_ptr().add(offset) as *const __m128i); + + let diff = _mm_sad_epu8(a, b); + let result = _mm_extract_epi64(diff, 0) as u64 + _mm_extract_epi64(diff, 1) as u64; + + total_diff += result; + } + + // 处理剩余字节 + for i in (start + chunks * 16)..(start + len) { + total_diff += (self.data[i] as i32 - other_data[i] as i32).abs() as u64; + } + + total_diff + } +} + +/// 性能测试结果 +#[pyclass] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PyPerformanceResult { + #[pyo3(get)] + pub test_name: String, + #[pyo3(get)] + pub video_file: String, + #[pyo3(get)] + pub total_time_ms: f64, + #[pyo3(get)] + pub frame_extraction_time_ms: f64, + #[pyo3(get)] + pub keyframe_analysis_time_ms: f64, + #[pyo3(get)] + pub total_frames: usize, + #[pyo3(get)] + pub keyframes_extracted: usize, + #[pyo3(get)] + pub keyframe_ratio: f64, + #[pyo3(get)] + pub processing_fps: f64, + #[pyo3(get)] + pub threshold: f64, + #[pyo3(get)] + pub optimization_type: String, + #[pyo3(get)] + pub simd_enabled: bool, + #[pyo3(get)] + pub threads_used: usize, + #[pyo3(get)] + pub timestamp: String, +} + +#[pymethods] +impl PyPerformanceResult { + /// 转换为Python字典 + fn to_dict(&self) -> PyResult> { + Python::with_gil(|py| { + let mut dict = HashMap::new(); + dict.insert("test_name".to_string(), self.test_name.to_object(py)); + dict.insert("video_file".to_string(), self.video_file.to_object(py)); + dict.insert("total_time_ms".to_string(), self.total_time_ms.to_object(py)); + dict.insert("frame_extraction_time_ms".to_string(), self.frame_extraction_time_ms.to_object(py)); + dict.insert("keyframe_analysis_time_ms".to_string(), self.keyframe_analysis_time_ms.to_object(py)); + dict.insert("total_frames".to_string(), self.total_frames.to_object(py)); + dict.insert("keyframes_extracted".to_string(), self.keyframes_extracted.to_object(py)); + dict.insert("keyframe_ratio".to_string(), self.keyframe_ratio.to_object(py)); + dict.insert("processing_fps".to_string(), self.processing_fps.to_object(py)); + dict.insert("threshold".to_string(), self.threshold.to_object(py)); + dict.insert("optimization_type".to_string(), self.optimization_type.to_object(py)); + dict.insert("simd_enabled".to_string(), self.simd_enabled.to_object(py)); + dict.insert("threads_used".to_string(), self.threads_used.to_object(py)); + dict.insert("timestamp".to_string(), self.timestamp.to_object(py)); + Ok(dict) + }) + } +} + +/// 主要的视频关键帧提取器类 +#[pyclass] +pub struct VideoKeyframeExtractor { + ffmpeg_path: String, + threads: usize, + verbose: bool, +} + +#[pymethods] +impl VideoKeyframeExtractor { + #[new] + #[pyo3(signature = (ffmpeg_path = "ffmpeg".to_string(), threads = 0, verbose = false))] + fn new(ffmpeg_path: String, threads: usize, verbose: bool) -> PyResult { + // 设置线程池(如果还没有初始化) + if threads > 0 { + // 尝试设置线程池,如果已经初始化则忽略错误 + let _ = rayon::ThreadPoolBuilder::new() + .num_threads(threads) + .build_global(); + } + + Ok(Self { + ffmpeg_path, + threads: if threads == 0 { rayon::current_num_threads() } else { threads }, + verbose, + }) + } + + /// 从视频中提取帧 + #[pyo3(signature = (video_path, max_frames=None))] + fn extract_frames(&self, video_path: &str, max_frames: Option) -> PyResult<(Vec, usize, usize)> { + let video_path = PathBuf::from(video_path); + let max_frames = max_frames.unwrap_or(0); + + extract_frames_memory_stream(&video_path, &PathBuf::from(&self.ffmpeg_path), max_frames, self.verbose) + .map_err(|e| PyErr::new::(format!("Frame extraction failed: {}", e))) + } + + /// 提取关键帧索引 + #[pyo3(signature = (frames, threshold, use_simd=None, block_size=None))] + fn extract_keyframes( + &self, + frames: Vec, + threshold: f64, + use_simd: Option, + block_size: Option + ) -> PyResult> { + let use_simd = use_simd.unwrap_or(true); + let block_size = block_size.unwrap_or(8192); + + extract_keyframes_optimized(&frames, threshold, use_simd, block_size, self.verbose) + .map_err(|e| PyErr::new::(format!("Keyframe extraction failed: {}", e))) + } + + /// 保存关键帧为图片 + #[pyo3(signature = (video_path, keyframe_indices, output_dir, max_save=None))] + fn save_keyframes( + &self, + video_path: &str, + keyframe_indices: Vec, + output_dir: &str, + max_save: Option + ) -> PyResult { + let video_path = PathBuf::from(video_path); + let output_dir = PathBuf::from(output_dir); + let max_save = max_save.unwrap_or(50); + + save_keyframes_optimized( + &video_path, + &keyframe_indices, + &output_dir, + &PathBuf::from(&self.ffmpeg_path), + max_save, + self.verbose + ).map_err(|e| PyErr::new::(format!("Save keyframes failed: {}", e))) + } + + /// 运行性能测试 + #[pyo3(signature = (video_path, threshold, test_name, max_frames=None, use_simd=None, block_size=None))] + fn benchmark( + &self, + video_path: &str, + threshold: f64, + test_name: &str, + max_frames: Option, + use_simd: Option, + block_size: Option + ) -> PyResult { + let video_path = PathBuf::from(video_path); + let max_frames = max_frames.unwrap_or(1000); + let use_simd = use_simd.unwrap_or(true); + let block_size = block_size.unwrap_or(8192); + + let result = run_performance_test( + &video_path, + threshold, + test_name, + &PathBuf::from(&self.ffmpeg_path), + max_frames, + use_simd, + block_size, + self.verbose + ).map_err(|e| PyErr::new::(format!("Benchmark failed: {}", e)))?; + + Ok(PyPerformanceResult { + test_name: result.test_name, + video_file: result.video_file, + total_time_ms: result.total_time_ms, + frame_extraction_time_ms: result.frame_extraction_time_ms, + keyframe_analysis_time_ms: result.keyframe_analysis_time_ms, + total_frames: result.total_frames, + keyframes_extracted: result.keyframes_extracted, + keyframe_ratio: result.keyframe_ratio, + processing_fps: result.processing_fps, + threshold: result.threshold, + optimization_type: result.optimization_type, + simd_enabled: result.simd_enabled, + threads_used: result.threads_used, + timestamp: result.timestamp, + }) + } + + /// 完整的处理流程 + #[pyo3(signature = (video_path, output_dir, threshold=None, max_frames=None, max_save=None, use_simd=None, block_size=None))] + fn process_video( + &self, + video_path: &str, + output_dir: &str, + threshold: Option, + max_frames: Option, + max_save: Option, + use_simd: Option, + block_size: Option + ) -> PyResult { + let threshold = threshold.unwrap_or(2.0); + let max_frames = max_frames.unwrap_or(0); + let max_save = max_save.unwrap_or(50); + let use_simd = use_simd.unwrap_or(true); + let block_size = block_size.unwrap_or(8192); + + let video_path_buf = PathBuf::from(video_path); + let output_dir_buf = PathBuf::from(output_dir); + + // 运行性能测试 + let result = run_performance_test( + &video_path_buf, + threshold, + "Python Processing", + &PathBuf::from(&self.ffmpeg_path), + max_frames, + use_simd, + block_size, + self.verbose + ).map_err(|e| PyErr::new::(format!("Processing failed: {}", e)))?; + + // 提取并保存关键帧 + let (frames, _, _) = extract_frames_memory_stream(&video_path_buf, &PathBuf::from(&self.ffmpeg_path), max_frames, self.verbose) + .map_err(|e| PyErr::new::(format!("Frame extraction failed: {}", e)))?; + + let frames: Vec = frames.into_iter().map(|f| PyVideoFrame { + frame_number: f.frame_number, + width: f.width, + height: f.height, + data: f.data, + }).collect(); + + let keyframe_indices = extract_keyframes_optimized(&frames, threshold, use_simd, block_size, self.verbose) + .map_err(|e| PyErr::new::(format!("Keyframe extraction failed: {}", e)))?; + + save_keyframes_optimized(&video_path_buf, &keyframe_indices, &output_dir_buf, &PathBuf::from(&self.ffmpeg_path), max_save, self.verbose) + .map_err(|e| PyErr::new::(format!("Save keyframes failed: {}", e)))?; + + Ok(PyPerformanceResult { + test_name: result.test_name, + video_file: result.video_file, + total_time_ms: result.total_time_ms, + frame_extraction_time_ms: result.frame_extraction_time_ms, + keyframe_analysis_time_ms: result.keyframe_analysis_time_ms, + total_frames: result.total_frames, + keyframes_extracted: result.keyframes_extracted, + keyframe_ratio: result.keyframe_ratio, + processing_fps: result.processing_fps, + threshold: result.threshold, + optimization_type: result.optimization_type, + simd_enabled: result.simd_enabled, + threads_used: result.threads_used, + timestamp: result.timestamp, + }) + } + + /// 获取CPU特性信息 + fn get_cpu_features(&self) -> PyResult> { + let mut features = HashMap::new(); + + #[cfg(target_arch = "x86_64")] + { + features.insert("avx2".to_string(), std::arch::is_x86_feature_detected!("avx2")); + features.insert("sse2".to_string(), std::arch::is_x86_feature_detected!("sse2")); + features.insert("sse4_1".to_string(), std::arch::is_x86_feature_detected!("sse4.1")); + features.insert("sse4_2".to_string(), std::arch::is_x86_feature_detected!("sse4.2")); + features.insert("fma".to_string(), std::arch::is_x86_feature_detected!("fma")); + } + + #[cfg(not(target_arch = "x86_64"))] + { + features.insert("simd_supported".to_string(), false); + } + + Ok(features) + } + + /// 获取当前使用的线程数 + fn get_thread_count(&self) -> usize { + self.threads + } + + /// 获取配置的线程数 + fn get_configured_threads(&self) -> usize { + self.threads + } + + /// 获取实际运行的线程数 + fn get_actual_thread_count(&self) -> usize { + rayon::current_num_threads() + } +} + +// 从main.rs中复制的核心函数 + +struct PerformanceResult { + test_name: String, + video_file: String, + total_time_ms: f64, + frame_extraction_time_ms: f64, + keyframe_analysis_time_ms: f64, + total_frames: usize, + keyframes_extracted: usize, + keyframe_ratio: f64, + processing_fps: f64, + threshold: f64, + optimization_type: String, + simd_enabled: bool, + threads_used: usize, + timestamp: String, +} + +fn extract_frames_memory_stream( + video_path: &PathBuf, + ffmpeg_path: &PathBuf, + max_frames: usize, + verbose: bool, +) -> Result<(Vec, usize, usize)> { + if verbose { + println!("🎬 Extracting frames using FFmpeg memory streaming..."); + println!("📁 Video: {}", video_path.display()); + } + + // 获取视频信息 + let probe_output = Command::new(ffmpeg_path) + .args(["-i", video_path.to_str().unwrap(), "-hide_banner"]) + .output() + .context("Failed to probe video with FFmpeg")?; + + let probe_info = String::from_utf8_lossy(&probe_output.stderr); + let (width, height) = parse_video_dimensions(&probe_info) + .ok_or_else(|| anyhow::anyhow!("Cannot parse video dimensions"))?; + + if verbose { + println!("📐 Video dimensions: {}x{}", width, height); + } + + // 构建优化的FFmpeg命令 + let mut cmd = Command::new(ffmpeg_path); + cmd.args([ + "-i", video_path.to_str().unwrap(), + "-f", "rawvideo", + "-pix_fmt", "gray", + "-an", + "-threads", "0", + "-preset", "ultrafast", + ]); + + if max_frames > 0 { + cmd.args(["-frames:v", &max_frames.to_string()]); + } + + cmd.args(["-"]).stdout(Stdio::piped()).stderr(Stdio::null()); + + let start_time = Instant::now(); + let mut child = cmd.spawn().context("Failed to spawn FFmpeg process")?; + let stdout = child.stdout.take().unwrap(); + let mut reader = BufReader::with_capacity(1024 * 1024, stdout); + + let frame_size = width * height; + let mut frames = Vec::new(); + let mut frame_count = 0; + let mut frame_buffer = vec![0u8; frame_size]; + + if verbose { + println!("📦 Frame size: {} bytes", frame_size); + } + + // 直接流式读取帧数据到内存 + loop { + match reader.read_exact(&mut frame_buffer) { + Ok(()) => { + frames.push(PyVideoFrame::new( + frame_count, + width, + height, + frame_buffer.clone(), + )); + frame_count += 1; + + if verbose && frame_count % 200 == 0 { + print!("\r⚡ Frames processed: {}", frame_count); + } + + if max_frames > 0 && frame_count >= max_frames { + break; + } + } + Err(_) => break, + } + } + + let _ = child.wait(); + + if verbose { + println!("\r✅ Frame extraction complete: {} frames in {:.2}s", + frame_count, start_time.elapsed().as_secs_f64()); + } + + Ok((frames, width, height)) +} + +fn parse_video_dimensions(probe_info: &str) -> Option<(usize, usize)> { + for line in probe_info.lines() { + if line.contains("Video:") && line.contains("x") { + for part in line.split_whitespace() { + if let Some(x_pos) = part.find('x') { + let width_str = &part[..x_pos]; + let height_part = &part[x_pos + 1..]; + let height_str = height_part.split(',').next().unwrap_or(height_part); + + if let (Ok(width), Ok(height)) = (width_str.parse::(), height_str.parse::()) { + return Some((width, height)); + } + } + } + } + } + None +} + +fn extract_keyframes_optimized( + frames: &[PyVideoFrame], + threshold: f64, + use_simd: bool, + block_size: usize, + verbose: bool, +) -> Result> { + if frames.len() < 2 { + return Ok(Vec::new()); + } + + let optimization_name = if use_simd { "SIMD+Parallel" } else { "Standard Parallel" }; + if verbose { + println!("🚀 Keyframe analysis (threshold: {}, optimization: {})", threshold, optimization_name); + } + + let start_time = Instant::now(); + + // 并行计算帧差异 + let differences: Vec = frames + .par_windows(2) + .map(|pair| { + if use_simd { + pair[0].calculate_difference_parallel_simd(&pair[1], block_size, true) + } else { + pair[0].calculate_difference(&pair[1]).unwrap_or(f64::MAX) + } + }) + .collect(); + + // 基于阈值查找关键帧 + let keyframe_indices: Vec = differences + .par_iter() + .enumerate() + .filter_map(|(i, &diff)| { + if diff > threshold { + Some(i + 1) + } else { + None + } + }) + .collect(); + + if verbose { + println!("⚡ Analysis complete in {:.2}s", start_time.elapsed().as_secs_f64()); + println!("🎯 Found {} keyframes", keyframe_indices.len()); + } + + Ok(keyframe_indices) +} + +fn save_keyframes_optimized( + video_path: &PathBuf, + keyframe_indices: &[usize], + output_dir: &PathBuf, + ffmpeg_path: &PathBuf, + max_save: usize, + verbose: bool, +) -> Result { + if keyframe_indices.is_empty() { + if verbose { + println!("⚠️ No keyframes to save"); + } + return Ok(0); + } + + if verbose { + println!("💾 Saving keyframes..."); + } + + fs::create_dir_all(output_dir).context("Failed to create output directory")?; + + let save_count = keyframe_indices.len().min(max_save); + let mut saved = 0; + + for (i, &frame_idx) in keyframe_indices.iter().take(save_count).enumerate() { + let output_path = output_dir.join(format!("keyframe_{:03}.jpg", i + 1)); + let timestamp = frame_idx as f64 / 30.0; // 假设30 FPS + + let output = Command::new(ffmpeg_path) + .args([ + "-i", video_path.to_str().unwrap(), + "-ss", ×tamp.to_string(), + "-vframes", "1", + "-q:v", "2", + "-y", + output_path.to_str().unwrap(), + ]) + .output() + .context("Failed to extract keyframe with FFmpeg")?; + + if output.status.success() { + saved += 1; + if verbose && (saved % 10 == 0 || saved == save_count) { + print!("\r💾 Saved: {}/{} keyframes", saved, save_count); + } + } else if verbose { + eprintln!("⚠️ Failed to save keyframe {}", frame_idx); + } + } + + if verbose { + println!("\r✅ Keyframe saving complete: {}/{}", saved, save_count); + } + + Ok(saved) +} + +fn run_performance_test( + video_path: &PathBuf, + threshold: f64, + test_name: &str, + ffmpeg_path: &PathBuf, + max_frames: usize, + use_simd: bool, + block_size: usize, + verbose: bool, +) -> Result { + if verbose { + println!("\n{}", "=".repeat(60)); + println!("⚡ Running test: {}", test_name); + println!("{}", "=".repeat(60)); + } + + let total_start = Instant::now(); + + // 帧提取 + let extraction_start = Instant::now(); + let (frames, _width, _height) = extract_frames_memory_stream(video_path, ffmpeg_path, max_frames, verbose)?; + let extraction_time = extraction_start.elapsed().as_secs_f64() * 1000.0; + + // 关键帧分析 + let analysis_start = Instant::now(); + let keyframe_indices = extract_keyframes_optimized(&frames, threshold, use_simd, block_size, verbose)?; + let analysis_time = analysis_start.elapsed().as_secs_f64() * 1000.0; + + let total_time = total_start.elapsed().as_secs_f64() * 1000.0; + + let optimization_type = if use_simd { + format!("SIMD+Parallel(block:{})", block_size) + } else { + "Standard Parallel".to_string() + }; + + let result = PerformanceResult { + test_name: test_name.to_string(), + video_file: video_path.file_name().unwrap().to_string_lossy().to_string(), + total_time_ms: total_time, + frame_extraction_time_ms: extraction_time, + keyframe_analysis_time_ms: analysis_time, + total_frames: frames.len(), + keyframes_extracted: keyframe_indices.len(), + keyframe_ratio: keyframe_indices.len() as f64 / frames.len() as f64 * 100.0, + processing_fps: frames.len() as f64 / (total_time / 1000.0), + threshold, + optimization_type, + simd_enabled: use_simd, + threads_used: rayon::current_num_threads(), + timestamp: Local::now().format("%Y-%m-%d %H:%M:%S").to_string(), + }; + + if verbose { + println!("\n⚡ Test Results:"); + println!(" 🕐 Total time: {:.2}ms ({:.2}s)", result.total_time_ms, result.total_time_ms / 1000.0); + println!(" 📥 Extraction: {:.2}ms ({:.1}%)", result.frame_extraction_time_ms, + result.frame_extraction_time_ms / result.total_time_ms * 100.0); + println!(" 🧮 Analysis: {:.2}ms ({:.1}%)", result.keyframe_analysis_time_ms, + result.keyframe_analysis_time_ms / result.total_time_ms * 100.0); + println!(" 📊 Frames: {}", result.total_frames); + println!(" 🎯 Keyframes: {}", result.keyframes_extracted); + println!(" 🚀 Speed: {:.1} FPS", result.processing_fps); + println!(" ⚙️ Optimization: {}", result.optimization_type); + } + + Ok(result) +} + +/// Python模块定义 +#[pymodule] +fn rust_video(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + + // 便捷函数 + #[pyfn(m)] + #[pyo3(signature = (video_path, output_dir, threshold=None, max_frames=None, max_save=None, ffmpeg_path=None, use_simd=None, threads=None, verbose=None))] + fn extract_keyframes_from_video( + video_path: &str, + output_dir: &str, + threshold: Option, + max_frames: Option, + max_save: Option, + ffmpeg_path: Option, + use_simd: Option, + threads: Option, + verbose: Option + ) -> PyResult { + let extractor = VideoKeyframeExtractor::new( + ffmpeg_path.unwrap_or_else(|| "ffmpeg".to_string()), + threads.unwrap_or(0), + verbose.unwrap_or(false) + )?; + + extractor.process_video( + video_path, + output_dir, + threshold, + max_frames, + max_save, + use_simd, + None + ) + } + + #[pyfn(m)] + fn get_system_info() -> PyResult> { + Python::with_gil(|py| { + let mut info = HashMap::new(); + info.insert("threads".to_string(), rayon::current_num_threads().to_object(py)); + + #[cfg(target_arch = "x86_64")] + { + info.insert("avx2_supported".to_string(), std::arch::is_x86_feature_detected!("avx2").to_object(py)); + info.insert("sse2_supported".to_string(), std::arch::is_x86_feature_detected!("sse2").to_object(py)); + } + + #[cfg(not(target_arch = "x86_64"))] + { + info.insert("simd_supported".to_string(), false.to_object(py)); + } + + info.insert("version".to_string(), "0.1.0".to_object(py)); + + Ok(info) + }) + } + + Ok(()) +}