Mofox-Core/src/chat/utils/utils_video.py

#!/usr/bin/env python3
"""纯 inkfox 视频关键帧分析工具

仅依赖 `inkfox.video` 提供的 Rust 扩展能力：
    - extract_keyframes_from_video
    - get_system_info

功能：
    - 关键帧提取 (base64, timestamp)
    - 批量 / 逐帧 LLM 描述
    - 自动模式 (<=3 帧批量，否则逐帧)
"""

from __future__ import annotations

import asyncio
import base64
import tempfile
from pathlib import Path
from typing import List, Tuple, Optional, Dict, Any
import hashlib
import io
import os
import tempfile
import time
from pathlib import Path
from typing import Any

from PIL import Image
from sqlalchemy import exc as sa_exc  # type: ignore
from sqlalchemy import insert, select, update  # type: ignore

from src.common.database.sqlalchemy_models import Videos, get_db_session  # type: ignore
from src.common.logger import get_logger
from src.config.config import global_config, model_config
from src.llm_models.utils_model import LLMRequest

# 简易并发控制：同一 hash 只处理一次
_video_locks: dict[str, asyncio.Lock] = {}
_locks_guard = asyncio.Lock()

logger = get_logger("utils_video")

from inkfox import video


class VideoAnalyzer:
    """基于 inkfox 的视频关键帧 + LLM 描述分析器"""

    def __init__(self) -> None:
        cfg = getattr(global_config, "video_analysis", object())
        self.max_frames: int = getattr(cfg, "max_frames", 20)
        self.frame_quality: int = getattr(cfg, "frame_quality", 85)
        self.max_image_size: int = getattr(cfg, "max_image_size", 600)
        self.enable_frame_timing: bool = getattr(cfg, "enable_frame_timing", True)
        self.use_simd: bool = getattr(cfg, "rust_use_simd", True)
        self.threads: int = getattr(cfg, "rust_threads", 0)
        self.ffmpeg_path: str = getattr(cfg, "ffmpeg_path", "ffmpeg")
        self.analysis_mode: str = getattr(cfg, "analysis_mode", "auto")
        self.frame_analysis_delay: float = 0.3

        # 人格与提示模板
        try:
            persona = global_config.personality
            self.personality_core = getattr(persona, "personality_core", "是一个积极向上的女大学生")
            self.personality_side = getattr(persona, "personality_side", "用一句话或几句话描述人格的侧面特点")
        except Exception:  # pragma: no cover
            self.personality_core = "是一个积极向上的女大学生"
            self.personality_side = "用一句话或几句话描述人格的侧面特点"

        self.batch_analysis_prompt = getattr(
            cfg,
            "batch_analysis_prompt",
            """请以第一人称视角阅读这些按时间顺序提取的关键帧。\n核心：{personality_core}\n人格：{personality_side}\n请详细描述视频(主题/人物与场景/动作与时间线/视觉风格/情绪氛围/特殊元素)。""",
        )

        try:
            self.video_llm = LLMRequest(
                model_set=model_config.model_task_config.video_analysis, request_type="video_analysis"
            )
        except Exception:
            self.video_llm = LLMRequest(model_set=model_config.model_task_config.vlm, request_type="vlm")

        self._log_system()

    # ---- 系统信息 ----
    def _log_system(self) -> None:
        try:
            info = video.get_system_info()  # type: ignore[attr-defined]
            logger.info(
                f"inkfox: threads={info.get('threads')} version={info.get('version')} simd={info.get('simd_supported')}"
            )
        except Exception as e:  # pragma: no cover
            logger.debug(f"获取系统信息失败: {e}")

    # ---- 关键帧提取 ----
    async def extract_keyframes(self, video_path: str) -> list[tuple[str, float]]:
        """提取关键帧并返回 (base64, timestamp_seconds) 列表"""
        with tempfile.TemporaryDirectory() as tmp:
            result = video.extract_keyframes_from_video(  # type: ignore[attr-defined]
                video_path=video_path,
                output_dir=tmp,
                max_keyframes=self.max_frames * 2,  # 先多抓一点再截断
                max_save=self.max_frames,
                ffmpeg_path=self.ffmpeg_path,
                use_simd=self.use_simd,
                threads=self.threads,
                verbose=False,
            )
            files = sorted(Path(tmp).glob("keyframe_*.jpg"))[: self.max_frames]
            total_ms = getattr(result, "total_time_ms", 0)
            frames: list[tuple[str, float]] = []
            for i, f in enumerate(files):
                img = Image.open(f).convert("RGB")
                if max(img.size) > self.max_image_size:
                    scale = self.max_image_size / max(img.size)
                    img = img.resize((int(img.width * scale), int(img.height * scale)), Image.Resampling.LANCZOS)
                buf = io.BytesIO()
                img.save(buf, format="JPEG", quality=self.frame_quality)
                b64 = base64.b64encode(buf.getvalue()).decode()
                ts = (i / max(1, len(files) - 1)) * (total_ms / 1000.0) if total_ms else float(i)
                frames.append((b64, ts))
            return frames

    # ---- 批量分析 ----
    async def _analyze_batch(self, frames: list[tuple[str, float]], question: str | None) -> str:
        from src.llm_models.payload_content.message import MessageBuilder
        from src.llm_models.utils_model import RequestType

        prompt = self.batch_analysis_prompt.format(
            personality_core=self.personality_core, personality_side=self.personality_side
        )
        if question:
            prompt += f"\n用户关注: {question}"

        desc = [
            (f"第{i+1}帧 (时间: {ts:.2f}s)" if self.enable_frame_timing else f"第{i+1}帧")
            for i, (_b, ts) in enumerate(frames)
        ]
        prompt += "\n帧列表: " + ", ".join(desc)

        message_builder = MessageBuilder().add_text_content(prompt)
        for b64, _ in frames:
            message_builder.add_image_content(image_format="jpeg", image_base64=b64)
        messages = [message_builder.build()]

        # 使用封装好的高级策略执行请求，而不是直接调用内部方法
        response, _ = await self.video_llm._strategy.execute_with_failover(
            RequestType.RESPONSE,
            raise_when_empty=False,  # 即使失败也返回默认值，避免程序崩溃
            message_list=messages,
            temperature=self.video_llm.model_for_task.temperature,
            max_tokens=self.video_llm.model_for_task.max_tokens,
        )

        return response.content or "❌ 未获得响应"

    # ---- 逐帧分析 ----
    async def _analyze_sequential(self, frames: list[tuple[str, float]], question: str | None) -> str:
        results: list[str] = []
        for i, (b64, ts) in enumerate(frames):
            prompt = f"分析第{i+1}帧" + (f" (时间: {ts:.2f}s)" if self.enable_frame_timing else "")
            if question:
                prompt += f"\n关注: {question}"
            try:
                text, _ = await self.video_llm.generate_response_for_image(
                    prompt=prompt, image_base64=b64, image_format="jpeg"
                )
                results.append(f"第{i+1}帧: {text}")
            except Exception as e:  # pragma: no cover
                results.append(f"第{i+1}帧: 失败 {e}")
            if i < len(frames) - 1:
                await asyncio.sleep(self.frame_analysis_delay)
        summary_prompt = "基于以下逐帧结果给出完整总结:\n\n" + "\n".join(results)
        try:
            final, _ = await self.video_llm.generate_response_for_image(
                prompt=summary_prompt, image_base64=frames[-1][0], image_format="jpeg"
            )
            return final
        except Exception:  # pragma: no cover
            return "\n".join(results)

    # ---- 主入口 ----
    async def analyze_video(self, video_path: str, question: str | None = None) -> tuple[bool, str]:
        if not os.path.exists(video_path):
            return False, "❌ 文件不存在"
        frames = await self.extract_keyframes(video_path)
        if not frames:
            return False, "❌ 未提取到关键帧"
        mode = self.analysis_mode
        if mode == "auto":
            mode = "batch" if len(frames) <= 20 else "sequential"
        text = await (self._analyze_batch(frames, question) if mode == "batch" else self._analyze_sequential(frames, question))
        return True, text

    async def analyze_video_from_bytes(
        self,
        video_bytes: bytes,
        filename: str | None = None,
        prompt: str | None = None,
        question: str | None = None,
    ) -> dict[str, str]:
        """从内存字节分析视频，兼容旧调用 (prompt / question 二选一) 返回 {"summary": str}."""
        if not video_bytes:
            return {"summary": "❌ 空视频数据"}
        # 兼容参数：prompt 优先，其次 question
        q = prompt if prompt is not None else question
        video_hash = hashlib.sha256(video_bytes).hexdigest()

        # 查缓存（第一次，未加锁）
        cached = await self._get_cached(video_hash)
        if cached:
            logger.info(f"视频缓存命中(预检查) hash={video_hash[:16]}")
            return {"summary": cached}

        # 获取锁避免重复处理
        async with _locks_guard:
            lock = _video_locks.get(video_hash)
            if lock is None:
                lock = asyncio.Lock()
                _video_locks[video_hash] = lock
        async with lock:
            # 双检缓存
            cached2 = await self._get_cached(video_hash)
            if cached2:
                logger.info(f"视频缓存命中(锁后) hash={video_hash[:16]}")
                return {"summary": cached2}

            try:
                with tempfile.NamedTemporaryFile(delete=False) as fp:
                    fp.write(video_bytes)
                    temp_path = fp.name
                try:
                    ok, summary = await self.analyze_video(temp_path, q)
                    # 写入缓存（仅成功）
                    if ok:
                        await self._save_cache(video_hash, summary, len(video_bytes))
                    return {"summary": summary}
                finally:
                    if os.path.exists(temp_path):
                        try:
                            os.remove(temp_path)
                        except Exception:  # pragma: no cover
                            pass
            except Exception as e:  # pragma: no cover
                return {"summary": f"❌ 处理失败: {e}"}

    # ---- 缓存辅助 ----
    async def _get_cached(self, video_hash: str) -> str | None:
        try:
            async with get_db_session() as session:  # type: ignore
                result = await session.execute(select(Videos).where(Videos.video_hash == video_hash))  # type: ignore
                obj: Videos | None = result.scalar_one_or_none()  # type: ignore
                if obj and obj.vlm_processed and obj.description:
                    # 更新使用次数
                    try:
                        await session.execute(
                            update(Videos)
                            .where(Videos.id == obj.id)  # type: ignore
                            .values(count=obj.count + 1 if obj.count is not None else 1)
                        )
                        await session.commit()
                    except Exception:  # pragma: no cover
                        await session.rollback()
                    return obj.description
        except Exception:  # pragma: no cover
            pass
        return None

    async def _save_cache(self, video_hash: str, summary: str, file_size: int) -> None:
        try:
            async with get_db_session() as session:  # type: ignore
                stmt = insert(Videos).values(  # type: ignore
                    video_id="",
                    video_hash=video_hash,
                    description=summary,
                    count=1,
                    timestamp=time.time(),
                    vlm_processed=True,
                    duration=None,
                    frame_count=None,
                    fps=None,
                    resolution=None,
                    file_size=file_size,
                )
                try:
                    await session.execute(stmt)
                    await session.commit()
                    logger.debug(f"视频缓存写入 success hash={video_hash}")
                except sa_exc.IntegrityError:  # 可能并发已写入
                    await session.rollback()
                    logger.debug(f"视频缓存已存在 hash={video_hash}")
        except Exception:  # pragma: no cover
                logger.debug("视频缓存写入失败")


# ---- 外部接口 ----
_INSTANCE: VideoAnalyzer | None = None


def get_video_analyzer() -> VideoAnalyzer:
    global _INSTANCE
    if _INSTANCE is None:
        _INSTANCE = VideoAnalyzer()
    return _INSTANCE


def is_video_analysis_available() -> bool:
    return True


def get_video_analysis_status() -> dict[str, Any]:
    try:
        info = video.get_system_info()  # type: ignore[attr-defined]
    except Exception as e:  # pragma: no cover
        return {"available": False, "error": str(e)}
    inst = get_video_analyzer()
    return {
        "available": True,
        "system": info,
        "modes": ["auto", "batch", "sequential"],
        "max_frames_default": inst.max_frames,
        "implementation": "inkfox",
    }