增加videoid修复同一个视频重复识别的问题

ps：通过数据库检查的去重：视频hash计算：使用SHA256算法计算视频文件的唯一标识数据库去重检查：在分析前先检查数据库中是否已存在相同hash的视频结果缓存：将分析结果存储到Videos表中，避免重复分析
2025-08-17 14:00:00 +08:00
parent 6b3d916fd0
commit 8902f6b7cb
2 changed files with 92 additions and 0 deletions
--- a/src/common/database/sqlalchemy_models.py
+++ b/src/common/database/sqlalchemy_models.py
@@ -217,6 +217,33 @@ class ImageDescriptions(Base):
    )
 class Videos(Base):
    """视频信息模型"""
    __tablename__ = 'videos'
    id = Column(Integer, primary_key=True, autoincrement=True)
    video_id = Column(Text, nullable=False, default="")
    video_hash = Column(get_string_field(64), nullable=False, index=True, unique=True)
    description = Column(Text, nullable=True)
    path = Column(get_string_field(500), nullable=False, unique=True)
    count = Column(Integer, nullable=False, default=1)
    timestamp = Column(Float, nullable=False)
    vlm_processed = Column(Boolean, nullable=False, default=False)
    # 视频特有属性
    duration = Column(Float, nullable=True)  # 视频时长（秒）
    frame_count = Column(Integer, nullable=True)  # 总帧数
    fps = Column(Float, nullable=True)  # 帧率
    resolution = Column(Text, nullable=True)  # 分辨率
    file_size = Column(Integer, nullable=True)  # 文件大小（字节）
    __table_args__ = (
        Index('idx_videos_video_hash', 'video_hash'),
        Index('idx_videos_path', 'path'),
        Index('idx_videos_timestamp', 'timestamp'),
    )
 class OnlineTime(Base):
    """在线时长记录模型"""
    __tablename__ = 'online_time'
--- a/src/multimodal/video_analyzer.py
+++ b/src/multimodal/video_analyzer.py
@@ -10,6 +10,8 @@ import cv2
 import tempfile
 import asyncio
 import base64
 import hashlib
 import time
 from PIL import Image
 from pathlib import Path
 from typing import List, Tuple, Optional, Dict
@@ -18,6 +20,7 @@ import io
 from src.llm_models.utils_model import LLMRequest
 from src.config.config import global_config, model_config
 from src.common.logger import get_logger
 from src.common.database.sqlalchemy_models import get_db_session, Videos
 logger = get_logger("src.multimodal.video_analyzer")
@@ -98,6 +101,44 @@ class VideoAnalyzer:
        logger.info(f"✅ 视频分析器初始化完成，分析模式: {self.analysis_mode}")
    def _calculate_video_hash(self, video_data: bytes) -> str:
        """计算视频文件的hash值"""
        hash_obj = hashlib.sha256()
        hash_obj.update(video_data)
        return hash_obj.hexdigest()
    def _check_video_exists(self, video_hash: str) -> Optional[Videos]:
        """检查视频是否已经分析过"""
        try:
            with get_db_session() as session:
                return session.query(Videos).filter(Videos.video_hash == video_hash).first()
        except Exception as e:
            self.logger.warning(f"检查视频是否存在时出错: {e}")
            return None
    def _store_video_result(self, video_hash: str, description: str, path: str = "", metadata: Optional[Dict] = None) -> Optional[Videos]:
        """存储视频分析结果到数据库"""
        try:
            with get_db_session() as session:
                # 如果path为空，使用hash作为路径
                if not path:
                    path = f"video_{video_hash[:16]}.unknown"
                video_record = Videos(
                    video_hash=video_hash,
                    description=description,
                    path=path,
                    timestamp=time.time()
                )
                session.add(video_record)
                session.commit()
                session.refresh(video_record)
                self.logger.info(f"✅ 视频分析结果已保存到数据库，hash: {video_hash[:16]}...")
                return video_record
        except Exception as e:
            self.logger.error(f"存储视频分析结果时出错: {e}")
            return None
    def set_analysis_mode(self, mode: str):
        """设置分析模式"""
        if mode in ["batch", "sequential", "auto"]:
@@ -309,6 +350,16 @@ class VideoAnalyzer:
            if not video_bytes:
                return {"summary": "❌ 视频数据为空"}
            # 计算视频hash值
            video_hash = self._calculate_video_hash(video_bytes)
            logger.info(f"视频hash: {video_hash[:16]}...")
            # 检查数据库中是否已存在该视频的分析结果
            existing_video = self._check_video_exists(video_hash)
            if existing_video:
                logger.info(f"✅ 找到已存在的视频分析结果，直接返回 (id: {existing_video.id})")
                return {"summary": existing_video.description}
            # 创建临时文件保存视频数据
            with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file:
                temp_file.write(video_bytes)
@@ -321,6 +372,20 @@ class VideoAnalyzer:
                # 使用临时文件进行分析
                result = await self.analyze_video(temp_path, question)
                # 保存分析结果到数据库
                metadata = {
                    "filename": filename,
                    "file_size": len(video_bytes),
                    "analysis_timestamp": time.time()
                }
                self._store_video_result(
                    video_hash=video_hash,
                    description=result,
                    path=filename or "",
                    metadata=metadata
                )
                return {"summary": result}
            finally:
                # 清理临时文件