feat(maizone): 实现对说说中图片的识别与理解

重构了原有的评论生成逻辑，使其能够处理和理解说说中的图片内容。现在，插件可以调用视觉模型来描述图片，并将图片描述作为上下文信息，从而生成更贴切、更具情景感的评论。主要变更： - 新增了对说说中图片的识别能力，在生成评论时会考虑图片内容。 - 将原有的直接调用LLM API的评论生成方式，重构为使用更高级的 `generator_api`，以更好地融入框架的对话管理和人格系统。 - 为评论和回复生成的逻辑增加了重试机制，提高了在网络波动或API不稳定情况下的成功率。 - 修复了监控自己说说时，因QQ号类型不匹配（int vs str）导致无法正确识别自己身份的bug。 - 优化了对自己说说的评论回复逻辑，确保不会回复自己发表的评论。 - 增强了HTML解析能力，现在可以从网页版QQ空间中提取图片和更完整的评论结构。
2025-08-28 16:35:52 +08:00
parent 829ff4cd4f
commit 2a67ad1e86
3 changed files with 224 additions and 94 deletions
--- a/src/plugins/built_in/maizone_refactored/plugin.py
+++ b/src/plugins/built_in/maizone_refactored/plugin.py
@@ -42,6 +42,7 @@ class MaiZoneRefactoredPlugin(BasePlugin):
        "plugin": {"enable": ConfigField(type=bool, default=True, description="是否启用插件")},
        "models": {
            "text_model": ConfigField(type=str, default="maizone", description="生成文本的模型名称"),
+            "vision_model": ConfigField(type=str, default="YISHAN-gemini-2.5-flash", description="识别图片的模型名称"),
            "siliconflow_apikey": ConfigField(type=str, default="", description="硅基流动AI生图API密钥"),
        },
        "send": {
--- a/src/plugins/built_in/maizone_refactored/services/content_service.py
+++ b/src/plugins/built_in/maizone_refactored/services/content_service.py
@@ -6,8 +6,19 @@
 from typing import Callable, Optional
 import datetime

+import base64
+import aiohttp
 from src.common.logger import get_logger
-from src.plugin_system.apis import llm_api, config_api
+import base64
+import aiohttp
+import imghdr
+import asyncio
+from src.common.logger import get_logger
+from src.plugin_system.apis import llm_api, config_api, generator_api, person_api
+from src.chat.message_receive.chat_stream import get_chat_manager
+from maim_message import UserInfo
+from src.llm_models.utils_model import LLMRequest
+from src.config.api_ada_configs import TaskConfig

 # 导入旧的工具函数，我们稍后会考虑是否也需要重构它
 from ..utils.history_utils import get_send_history
@@ -97,110 +108,181 @@ class ContentService:
            logger.error(f"生成说说内容时发生异常: {e}")
            return ""

-    async def generate_comment(self, content: str, target_name: str, rt_con: str = "") -> str:
+    async def generate_comment(self, content: str, target_name: str, rt_con: str = "", images: list = []) -> str:
        """
        针对一条具体的说说内容生成评论。
-
-        :param content: 好友的说说内容。
-        :param target_name: 好友的昵称。
-        :param rt_con: 如果是转发的说说，这里是原说说内容。
-        :return: 生成的评论内容，如果失败则返回空字符串。
        """
-        try:
-            # 获取模型配置
-            models = llm_api.get_available_models()
-            text_model = str(self.get_config("models.text_model", "replyer_1"))
-            model_config = models.get(text_model)
+        for i in range(3): # 重试3次
+            try:
+                chat_manager = get_chat_manager()
+                bot_platform = config_api.get_global_config('bot.platform')
+                bot_qq = str(config_api.get_global_config('bot.qq_account'))
+                bot_nickname = config_api.get_global_config('bot.nickname')
+                
+                bot_user_info = UserInfo(
+                    platform=bot_platform,
+                    user_id=bot_qq,
+                    user_nickname=bot_nickname
+                )

-            if not model_config:
-                logger.error("未配置LLM模型")
-                return ""
+                chat_stream = await chat_manager.get_or_create_stream(
+                    platform=bot_platform,
+                    user_info=bot_user_info
+                )

-            # 获取机器人信息
-            bot_personality = config_api.get_global_config("personality.personality_core", "一个机器人")
-            bot_expression = config_api.get_global_config("expression.expression_style", "内容积极向上")
+                if not chat_stream:
+                    logger.error(f"无法为QQ号 {bot_qq} 创建聊天流")
+                    return ""

-            # 构建提示词
-            if not rt_con:
-                prompt = f"""
-                你是'{bot_personality}'，你正在浏览你好友'{target_name}'的QQ空间，
-                你看到了你的好友'{target_name}'qq空间上内容是'{content}'的说说，你想要发表你的一条评论，
-                {bot_expression}，回复的平淡一些，简短一些，说中文，
-                不要刻意突出自身学科背景，不要浮夸，不要夸张修辞，不要输出多余内容(包括前后缀，冒号和引号，括号()，表情包，at或 @等 )。只输出回复内容
-                """
-            else:
-                prompt = f"""
-                你是'{bot_personality}'，你正在浏览你好友'{target_name}'的QQ空间，
-                你看到了你的好友'{target_name}'在qq空间上转发了一条内容为'{rt_con}'的说说，你的好友的评论为'{content}'
-                你想要发表你的一条评论，{bot_expression}，回复的平淡一些，简短一些，说中文，
-                不要刻意突出自身学科背景，不要浮夸，不要夸张修辞，不要输出多余内容(包括前后缀，冒号和引号，括号()，表情包，at或 @等 )。只输出回复内容
-                """
+                image_descriptions = []
+                if images:
+                    for image_url in images:
+                        description = await self._describe_image(image_url)
+                        if description:
+                            image_descriptions.append(description)
+                
+                extra_info = "正在评论QQ空间的好友说说。"
+                if image_descriptions:
+                    extra_info += "说说中包含的图片内容如下：\n" + "\n".join(image_descriptions)

-            logger.info(f"正在为'{target_name}'的说说生成评论: {content[:20]}...")
+                reply_to = f"{target_name}:{content}"
+                if rt_con:
+                    reply_to += f"\n[转发内容]: {rt_con}"

-            # 调用LLM生成评论
-            success, comment, _, _ = await llm_api.generate_with_model(
-                prompt=prompt,
-                model_config=model_config,
-                request_type="comment.generate",
-                temperature=0.3,
-                max_tokens=100
-            )
+                success, reply_set, _ = await generator_api.generate_reply(
+                    chat_stream=chat_stream,
+                    reply_to=reply_to,
+                    extra_info=extra_info,
+                    request_type="maizone.comment"
+                )

-            if success:
-                logger.info(f"成功生成评论内容：'{comment}'")
-                return comment
-            else:
-                logger.error("生成评论内容失败")
-                return ""
-
-        except Exception as e:
-            logger.error(f"生成评论内容时发生异常: {e}")
-            return ""
+                if success and reply_set:
+                    comment = "".join([content for type, content in reply_set if type == 'text'])
+                    logger.info(f"成功生成评论内容：'{comment}'")
+                    return comment
+                else:
+                    # 如果生成失败，则进行重试
+                    if i < 2:
+                        logger.warning(f"生成评论失败，将在5秒后重试 (尝试 {i+1}/3)")
+                        await asyncio.sleep(5)
+                        continue
+                    else:
+                        logger.error("使用 generator_api 生成评论失败")
+                        return ""
+            except Exception as e:
+                if i < 2:
+                    logger.warning(f"生成评论时发生异常，将在5秒后重试 (尝试 {i+1}/3): {e}")
+                    await asyncio.sleep(5)
+                    continue
+                else:
+                    logger.error(f"生成评论时发生异常: {e}")
+                    return ""
+        return ""

    async def generate_comment_reply(self, story_content: str, comment_content: str, commenter_name: str) -> str:
        """
        针对自己说说的评论，生成回复。
-
-        :param story_content: 原始说说内容。
-        :param comment_content: 好友的评论内容。
-        :param commenter_name: 评论者的昵称。
-        :return: 生成的回复内容。
        """
-        try:
-            models = llm_api.get_available_models()
-            text_model = str(self.get_config("models.text_model", "replyer_1"))
-            model_config = models.get(text_model)
-            if not model_config:
-                return ""
+        for i in range(3): # 重试3次
+            try:
+                chat_manager = get_chat_manager()
+                bot_platform = config_api.get_global_config('bot.platform')
+                bot_qq = str(config_api.get_global_config('bot.qq_account'))
+                bot_nickname = config_api.get_global_config('bot.nickname')

-            bot_personality = config_api.get_global_config("personality.personality_core", "一个机器人")
-            bot_expression = config_api.get_global_config("expression.expression_style", "内容积极向上")
+                bot_user_info = UserInfo(
+                    platform=bot_platform,
+                    user_id=bot_qq,
+                    user_nickname=bot_nickname
+                )

-            prompt = f"""
-            你是'{bot_personality}'，你的好友'{commenter_name}'评论了你QQ空间上的一条内容为“{story_content}”说说，
-            你的好友对该说说的评论为:“{comment_content}”，你想要对此评论进行回复
-            {bot_expression}，回复的平淡一些，简短一些，说中文，
-            不要刻意突出自身学科背景，不要浮夸，不要夸张修辞，不要输出多余内容(包括前后缀，冒号和引号，括号()，表情包，at或 @等 )。只输出回复内容
-            """
-            
-            success, reply, _, _ = await llm_api.generate_with_model(
-                prompt=prompt,
-                model_config=model_config,
-                request_type="comment.reply.generate",
-                temperature=0.3,
-                max_tokens=100
-            )
+                chat_stream = await chat_manager.get_or_create_stream(
+                    platform=bot_platform,
+                    user_info=bot_user_info
+                )

-            if success:
-                logger.info(f"成功为'{commenter_name}'的评论生成回复: '{reply}'")
-                return reply
-            else:
-                logger.error("生成评论回复失败")
-                return ""
-        except Exception as e:
-            logger.error(f"生成评论回复时发生异常: {e}")
-            return ""
+                if not chat_stream:
+                    logger.error(f"无法为QQ号 {bot_qq} 创建聊天流")
+                    return ""
+
+                reply_to = f"{commenter_name}:{comment_content}"
+                extra_info = f"正在回复我的QQ空间说说“{story_content}”下的评论。"
+
+                success, reply_set, _ = await generator_api.generate_reply(
+                    chat_stream=chat_stream,
+                    reply_to=reply_to,
+                    extra_info=extra_info,
+                    request_type="maizone.comment_reply"
+                )
+
+                if success and reply_set:
+                    reply = "".join([content for type, content in reply_set if type == 'text'])
+                    logger.info(f"成功为'{commenter_name}'的评论生成回复: '{reply}'")
+                    return reply
+                else:
+                    if i < 2:
+                        logger.warning(f"生成评论回复失败，将在5秒后重试 (尝试 {i+1}/3)")
+                        await asyncio.sleep(5)
+                        continue
+                    else:
+                        logger.error("使用 generator_api 生成评论回复失败")
+                        return ""
+            except Exception as e:
+                if i < 2:
+                    logger.warning(f"生成评论回复时发生异常，将在5秒后重试 (尝试 {i+1}/3): {e}")
+                    await asyncio.sleep(5)
+                    continue
+                else:
+                    logger.error(f"生成评论回复时发生异常: {e}")
+                    return ""
+        return ""
+
+    async def _describe_image(self, image_url: str) -> Optional[str]:
+        """
+        使用LLM识别图片内容。
+        """
+        for i in range(3): # 重试3次
+            try:
+                async with aiohttp.ClientSession() as session:
+                    async with session.get(image_url, timeout=30) as resp:
+                        if resp.status != 200:
+                            logger.error(f"下载图片失败: {image_url}, status: {resp.status}")
+                            await asyncio.sleep(2)
+                            continue
+                        image_bytes = await resp.read()
+
+                image_format = imghdr.what(None, image_bytes)
+                if not image_format:
+                    logger.error(f"无法识别图片格式: {image_url}")
+                    return None
+
+                image_base64 = base64.b64encode(image_bytes).decode("utf-8")
+
+                vision_model_name = self.get_config("models.vision_model", "vision")
+                if not vision_model_name:
+                    logger.error("未在插件配置中指定视觉模型")
+                    return None
+
+                vision_model_config = TaskConfig(
+                    model_list=[vision_model_name],
+                    temperature=0.3,
+                    max_tokens=1500
+                )
+                
+                llm_request = LLMRequest(model_set=vision_model_config, request_type="maizone.image_describe")
+                
+                prompt = config_api.get_global_config("custom_prompt.image_prompt", "请描述这张图片")
+
+                description, _ = await llm_request.generate_response_for_image(
+                    prompt=prompt,
+                    image_base64=image_base64,
+                    image_format=image_format,
+                )
+                return description
+            except Exception as e:
+                logger.error(f"识别图片时发生异常 (尝试 {i+1}/3): {e}")
+                await asyncio.sleep(2)
+        return None

    async def generate_story_from_activity(self, activity: str) -> str:
        """
--- a/src/plugins/built_in/maizone_refactored/services/qzone_service.py
+++ b/src/plugins/built_in/maizone_refactored/services/qzone_service.py
@@ -163,7 +163,7 @@ class QZoneService:
                    continue

                # 区分是自己的说说还是他人的说说
-                if target_qq == qq_account:
+                if str(target_qq) == str(qq_account):
                    if self.get_config("monitor.enable_auto_reply", False):
                        await self._reply_to_own_feed_comments(feed, api_client)
                else:
@@ -244,12 +244,20 @@ class QZoneService:
        if not comments:
            return

-        # 筛选出未被自己回复过的主评论
-        my_comment_tids = {
-            c["parent_tid"] for c in comments if c.get("parent_tid") and c.get("qq_account") == qq_account
+        # 筛选出未被自己回复过的评论
+        if not comments:
+            return
+
+        # 找到所有我已经回复过的评论的ID
+        replied_to_tids = {
+            c['parent_tid'] for c in comments
+            if c.get('parent_tid') and str(c.get('qq_account')) == str(qq_account)
        }
+
+        # 找出所有非我发出且我未回复过的评论
        comments_to_reply = [
-            c for c in comments if not c.get("parent_tid") and c.get("comment_tid") not in my_comment_tids
+            c for c in comments
+            if str(c.get('qq_account')) != str(qq_account) and c.get('comment_tid') not in replied_to_tids
        ]

        if not comments_to_reply:
@@ -275,9 +283,10 @@ class QZoneService:
        content = feed.get("content", "")
        fid = feed.get("tid", "")
        rt_con = feed.get("rt_con", "")
+        images = feed.get("images", [])

        if random.random() <= self.get_config("read.comment_possibility", 0.3):
-            comment_text = await self.content_service.generate_comment(content, target_name, rt_con)
+            comment_text = await self.content_service.generate_comment(content, target_name, rt_con, images)
            if comment_text:
                await api_client["comment"](target_qq, fid, comment_text)

@@ -655,6 +664,8 @@ class QZoneService:
                        c.get("name") == my_name for c in msg.get("commentlist", []) if isinstance(c, dict)
                    )
                    if not is_commented:
+                        images = [pic['url1'] for pic in msg.get('pictotal', []) if 'url1' in pic]
+
                        feeds_list.append(
                            {
                                "tid": msg.get("tid", ""),
@@ -665,6 +676,7 @@ class QZoneService:
                                "rt_con": msg.get("rt_con", {}).get("content", "")
                                if isinstance(msg.get("rt_con"), dict)
                                else "",
+                                "images": images
                            }
                        )
                return feeds_list
@@ -815,10 +827,45 @@ class QZoneService:
                    text_div = soup.find('div', class_='f-info')
                    text = text_div.get_text(strip=True) if text_div else ""
                    
+                    images = [img['src'] for img in soup.find_all('img') if 'src' in img.attrs and 'user-avatar' not in img.get('class', [])]
+                    
+                    comments = []
+                    comment_divs = soup.find_all('div', class_='f-single-comment')
+                    for comment_div in comment_divs:
+                        # --- 处理主评论 ---
+                        author_a = comment_div.find('a', class_='f-nick')
+                        content_span = comment_div.find('span', class_='f-re-con')
+                        
+                        if author_a and content_span:
+                            comments.append({
+                                'qq_account': str(comment_div.get('data-uin', '')),
+                                'nickname': author_a.get_text(strip=True),
+                                'content': content_span.get_text(strip=True),
+                                'comment_tid': comment_div.get('data-tid', ''),
+                                'parent_tid': None  # 主评论没有父ID
+                            })
+
+                        # --- 处理这条主评论下的所有回复 ---
+                        reply_divs = comment_div.find_all('div', class_='f-single-re')
+                        for reply_div in reply_divs:
+                            reply_author_a = reply_div.find('a', class_='f-nick')
+                            reply_content_span = reply_div.find('span', class_='f-re-con')
+                            
+                            if reply_author_a and reply_content_span:
+                                comments.append({
+                                    'qq_account': str(reply_div.get('data-uin', '')),
+                                    'nickname': reply_author_a.get_text(strip=True),
+                                    'content': reply_content_span.get_text(strip=True).lstrip(': '), # 移除回复内容前多余的冒号和空格
+                                    'comment_tid': reply_div.get('data-tid', ''),
+                                    'parent_tid': reply_div.get('data-parent-tid', comment_div.get('data-tid', '')) # 如果没有父ID，则将父ID设为主评论ID
+                                })
+
                    feeds_list.append({
                        'target_qq': target_qq,
                        'tid': tid,
                        'content': text,
+                        'images': images,
+                        'comments': comments
                    })
                logger.info(f"监控任务发现 {len(feeds_list)} 条未处理的新说说。")
                return feeds_list