feat(maizone): 实现对说说中图片的识别与理解

重构了原有的评论生成逻辑，使其能够处理和理解说说中的图片内容。现在，插件可以调用视觉模型来描述图片，并将图片描述作为上下文信息，从而生成更贴切、更具情景感的评论。主要变更： - 新增了对说说中图片的识别能力，在生成评论时会考虑图片内容。 - 将原有的直接调用LLM API的评论生成方式，重构为使用更高级的 `generator_api`，以更好地融入框架的对话管理和人格系统。 - 为评论和回复生成的逻辑增加了重试机制，提高了在网络波动或API不稳定情况下的成功率。 - 修复了监控自己说说时，因QQ号类型不匹配（int vs str）导致无法正确识别自己身份的bug。 - 优化了对自己说说的评论回复逻辑，确保不会回复自己发表的评论。 - 增强了HTML解析能力，现在可以从网页版QQ空间中提取图片和更完整的评论结构。
2025-08-28 16:35:52 +08:00
parent 829ff4cd4f
commit 2a67ad1e86
3 changed files with 224 additions and 94 deletions
--- a/src/plugins/built_in/maizone_refactored/plugin.py
+++ b/src/plugins/built_in/maizone_refactored/plugin.py
@@ -42,6 +42,7 @@ class MaiZoneRefactoredPlugin(BasePlugin):
        "plugin": {"enable": ConfigField(type=bool, default=True, description="是否启用插件")},
        "models": {
            "text_model": ConfigField(type=str, default="maizone", description="生成文本的模型名称"),
            "vision_model": ConfigField(type=str, default="YISHAN-gemini-2.5-flash", description="识别图片的模型名称"),
            "siliconflow_apikey": ConfigField(type=str, default="", description="硅基流动AI生图API密钥"),
        },
        "send": {
--- a/src/plugins/built_in/maizone_refactored/services/content_service.py
+++ b/src/plugins/built_in/maizone_refactored/services/content_service.py
@@ -6,8 +6,19 @@
 from typing import Callable, Optional
 import datetime
 import base64
 import aiohttp
 from src.common.logger import get_logger
-from src.plugin_system.apis import llm_api, config_api
+import base64
 import aiohttp
 import imghdr
 import asyncio
 from src.common.logger import get_logger
 from src.plugin_system.apis import llm_api, config_api, generator_api, person_api
 from src.chat.message_receive.chat_stream import get_chat_manager
 from maim_message import UserInfo
 from src.llm_models.utils_model import LLMRequest
 from src.config.api_ada_configs import TaskConfig
 # 导入旧的工具函数，我们稍后会考虑是否也需要重构它
 from ..utils.history_utils import get_send_history
@@ -97,110 +108,181 @@ class ContentService:
            logger.error(f"生成说说内容时发生异常: {e}")
            return ""
-    async def generate_comment(self, content: str, target_name: str, rt_con: str = "") -> str:
+    async def generate_comment(self, content: str, target_name: str, rt_con: str = "", images: list = []) -> str:
        """
        针对一条具体的说说内容生成评论。
        :param content: 好友的说说内容。
        :param target_name: 好友的昵称。
        :param rt_con: 如果是转发的说说，这里是原说说内容。
        :return: 生成的评论内容，如果失败则返回空字符串。
        """
        for i in range(3): # 重试3次
            try:
-            # 获取模型配置
+                chat_manager = get_chat_manager()
-            models = llm_api.get_available_models()
+                bot_platform = config_api.get_global_config('bot.platform')
-            text_model = str(self.get_config("models.text_model", "replyer_1"))
+                bot_qq = str(config_api.get_global_config('bot.qq_account'))
-            model_config = models.get(text_model)
+                bot_nickname = config_api.get_global_config('bot.nickname')
-            if not model_config:
+                bot_user_info = UserInfo(
-                logger.error("未配置LLM模型")
+                    platform=bot_platform,
-                return ""
+                    user_id=bot_qq,
-
+                    user_nickname=bot_nickname
            # 获取机器人信息
            bot_personality = config_api.get_global_config("personality.personality_core", "一个机器人")
            bot_expression = config_api.get_global_config("expression.expression_style", "内容积极向上")
            # 构建提示词
            if not rt_con:
                prompt = f"""
                你是'{bot_personality}'，你正在浏览你好友'{target_name}'的QQ空间，
                你看到了你的好友'{target_name}'qq空间上内容是'{content}'的说说，你想要发表你的一条评论，
                {bot_expression}，回复的平淡一些，简短一些，说中文，
                不要刻意突出自身学科背景，不要浮夸，不要夸张修辞，不要输出多余内容(包括前后缀，冒号和引号，括号()，表情包，at或 @等 )。只输出回复内容
                """
            else:
                prompt = f"""
                你是'{bot_personality}'，你正在浏览你好友'{target_name}'的QQ空间，
                你看到了你的好友'{target_name}'在qq空间上转发了一条内容为'{rt_con}'的说说，你的好友的评论为'{content}'
                你想要发表你的一条评论，{bot_expression}，回复的平淡一些，简短一些，说中文，
                不要刻意突出自身学科背景，不要浮夸，不要夸张修辞，不要输出多余内容(包括前后缀，冒号和引号，括号()，表情包，at或 @等 )。只输出回复内容
                """
            logger.info(f"正在为'{target_name}'的说说生成评论: {content[:20]}...")
            # 调用LLM生成评论
            success, comment, _, _ = await llm_api.generate_with_model(
                prompt=prompt,
                model_config=model_config,
                request_type="comment.generate",
                temperature=0.3,
                max_tokens=100
                )
-            if success:
+                chat_stream = await chat_manager.get_or_create_stream(
                    platform=bot_platform,
                    user_info=bot_user_info
                )
                if not chat_stream:
                    logger.error(f"无法为QQ号 {bot_qq} 创建聊天流")
                    return ""
                image_descriptions = []
                if images:
                    for image_url in images:
                        description = await self._describe_image(image_url)
                        if description:
                            image_descriptions.append(description)
                extra_info = "正在评论QQ空间的好友说说。"
                if image_descriptions:
                    extra_info += "说说中包含的图片内容如下：\n" + "\n".join(image_descriptions)
                reply_to = f"{target_name}:{content}"
                if rt_con:
                    reply_to += f"\n[转发内容]: {rt_con}"
                success, reply_set, _ = await generator_api.generate_reply(
                    chat_stream=chat_stream,
                    reply_to=reply_to,
                    extra_info=extra_info,
                    request_type="maizone.comment"
                )
                if success and reply_set:
                    comment = "".join([content for type, content in reply_set if type == 'text'])
                    logger.info(f"成功生成评论内容：'{comment}'")
                    return comment
                else:
-                logger.error("生成评论内容失败")
+                    # 如果生成失败，则进行重试
                    if i < 2:
                        logger.warning(f"生成评论失败，将在5秒后重试 (尝试 {i+1}/3)")
                        await asyncio.sleep(5)
                        continue
                    else:
                        logger.error("使用 generator_api 生成评论失败")
                        return ""
            except Exception as e:
-            logger.error(f"生成评论内容时发生异常: {e}")
+                if i < 2:
                    logger.warning(f"生成评论时发生异常，将在5秒后重试 (尝试 {i+1}/3): {e}")
                    await asyncio.sleep(5)
                    continue
                else:
                    logger.error(f"生成评论时发生异常: {e}")
                    return ""
        return ""
    async def generate_comment_reply(self, story_content: str, comment_content: str, commenter_name: str) -> str:
        """
        针对自己说说的评论，生成回复。
        :param story_content: 原始说说内容。
        :param comment_content: 好友的评论内容。
        :param commenter_name: 评论者的昵称。
        :return: 生成的回复内容。
        """
        for i in range(3): # 重试3次
            try:
-            models = llm_api.get_available_models()
+                chat_manager = get_chat_manager()
-            text_model = str(self.get_config("models.text_model", "replyer_1"))
+                bot_platform = config_api.get_global_config('bot.platform')
-            model_config = models.get(text_model)
+                bot_qq = str(config_api.get_global_config('bot.qq_account'))
-            if not model_config:
+                bot_nickname = config_api.get_global_config('bot.nickname')
                return ""
-            bot_personality = config_api.get_global_config("personality.personality_core", "一个机器人")
+                bot_user_info = UserInfo(
-            bot_expression = config_api.get_global_config("expression.expression_style", "内容积极向上")
+                    platform=bot_platform,
-
+                    user_id=bot_qq,
-            prompt = f"""
+                    user_nickname=bot_nickname
            你是'{bot_personality}'，你的好友'{commenter_name}'评论了你QQ空间上的一条内容为“{story_content}”说说，
            你的好友对该说说的评论为:“{comment_content}”，你想要对此评论进行回复
            {bot_expression}，回复的平淡一些，简短一些，说中文，
            不要刻意突出自身学科背景，不要浮夸，不要夸张修辞，不要输出多余内容(包括前后缀，冒号和引号，括号()，表情包，at或 @等 )。只输出回复内容
            """
            success, reply, _, _ = await llm_api.generate_with_model(
                prompt=prompt,
                model_config=model_config,
                request_type="comment.reply.generate",
                temperature=0.3,
                max_tokens=100
                )
-            if success:
+                chat_stream = await chat_manager.get_or_create_stream(
                    platform=bot_platform,
                    user_info=bot_user_info
                )
                if not chat_stream:
                    logger.error(f"无法为QQ号 {bot_qq} 创建聊天流")
                    return ""
                reply_to = f"{commenter_name}:{comment_content}"
                extra_info = f"正在回复我的QQ空间说说“{story_content}”下的评论。"
                success, reply_set, _ = await generator_api.generate_reply(
                    chat_stream=chat_stream,
                    reply_to=reply_to,
                    extra_info=extra_info,
                    request_type="maizone.comment_reply"
                )
                if success and reply_set:
                    reply = "".join([content for type, content in reply_set if type == 'text'])
                    logger.info(f"成功为'{commenter_name}'的评论生成回复: '{reply}'")
                    return reply
                else:
-                logger.error("生成评论回复失败")
+                    if i < 2:
                        logger.warning(f"生成评论回复失败，将在5秒后重试 (尝试 {i+1}/3)")
                        await asyncio.sleep(5)
                        continue
                    else:
                        logger.error("使用 generator_api 生成评论回复失败")
                        return ""
            except Exception as e:
                if i < 2:
                    logger.warning(f"生成评论回复时发生异常，将在5秒后重试 (尝试 {i+1}/3): {e}")
                    await asyncio.sleep(5)
                    continue
                else:
                    logger.error(f"生成评论回复时发生异常: {e}")
                    return ""
        return ""
    async def _describe_image(self, image_url: str) -> Optional[str]:
        """
        使用LLM识别图片内容。
        """
        for i in range(3): # 重试3次
            try:
                async with aiohttp.ClientSession() as session:
                    async with session.get(image_url, timeout=30) as resp:
                        if resp.status != 200:
                            logger.error(f"下载图片失败: {image_url}, status: {resp.status}")
                            await asyncio.sleep(2)
                            continue
                        image_bytes = await resp.read()
                image_format = imghdr.what(None, image_bytes)
                if not image_format:
                    logger.error(f"无法识别图片格式: {image_url}")
                    return None
                image_base64 = base64.b64encode(image_bytes).decode("utf-8")
                vision_model_name = self.get_config("models.vision_model", "vision")
                if not vision_model_name:
                    logger.error("未在插件配置中指定视觉模型")
                    return None
                vision_model_config = TaskConfig(
                    model_list=[vision_model_name],
                    temperature=0.3,
                    max_tokens=1500
                )
                llm_request = LLMRequest(model_set=vision_model_config, request_type="maizone.image_describe")
                prompt = config_api.get_global_config("custom_prompt.image_prompt", "请描述这张图片")
                description, _ = await llm_request.generate_response_for_image(
                    prompt=prompt,
                    image_base64=image_base64,
                    image_format=image_format,
                )
                return description
            except Exception as e:
                logger.error(f"识别图片时发生异常 (尝试 {i+1}/3): {e}")
                await asyncio.sleep(2)
        return None
    async def generate_story_from_activity(self, activity: str) -> str:
        """
--- a/src/plugins/built_in/maizone_refactored/services/qzone_service.py
+++ b/src/plugins/built_in/maizone_refactored/services/qzone_service.py
@@ -163,7 +163,7 @@ class QZoneService:
                    continue
                # 区分是自己的说说还是他人的说说
-                if target_qq == qq_account:
+                if str(target_qq) == str(qq_account):
                    if self.get_config("monitor.enable_auto_reply", False):
                        await self._reply_to_own_feed_comments(feed, api_client)
                else:
@@ -244,12 +244,20 @@ class QZoneService:
        if not comments:
            return
-        # 筛选出未被自己回复过的主评论
+        # 筛选出未被自己回复过的评论
-        my_comment_tids = {
+        if not comments:
-            c["parent_tid"] for c in comments if c.get("parent_tid") and c.get("qq_account") == qq_account
+            return
        # 找到所有我已经回复过的评论的ID
        replied_to_tids = {
            c['parent_tid'] for c in comments
            if c.get('parent_tid') and str(c.get('qq_account')) == str(qq_account)
        }
        # 找出所有非我发出且我未回复过的评论
        comments_to_reply = [
-            c for c in comments if not c.get("parent_tid") and c.get("comment_tid") not in my_comment_tids
+            c for c in comments
            if str(c.get('qq_account')) != str(qq_account) and c.get('comment_tid') not in replied_to_tids
        ]
        if not comments_to_reply:
@@ -275,9 +283,10 @@ class QZoneService:
        content = feed.get("content", "")
        fid = feed.get("tid", "")
        rt_con = feed.get("rt_con", "")
        images = feed.get("images", [])
        if random.random() <= self.get_config("read.comment_possibility", 0.3):
-            comment_text = await self.content_service.generate_comment(content, target_name, rt_con)
+            comment_text = await self.content_service.generate_comment(content, target_name, rt_con, images)
            if comment_text:
                await api_client["comment"](target_qq, fid, comment_text)
@@ -655,6 +664,8 @@ class QZoneService:
                        c.get("name") == my_name for c in msg.get("commentlist", []) if isinstance(c, dict)
                    )
                    if not is_commented:
                        images = [pic['url1'] for pic in msg.get('pictotal', []) if 'url1' in pic]
                        feeds_list.append(
                            {
                                "tid": msg.get("tid", ""),
@@ -665,6 +676,7 @@ class QZoneService:
                                "rt_con": msg.get("rt_con", {}).get("content", "")
                                if isinstance(msg.get("rt_con"), dict)
                                else "",
                                "images": images
                            }
                        )
                return feeds_list
@@ -815,10 +827,45 @@ class QZoneService:
                    text_div = soup.find('div', class_='f-info')
                    text = text_div.get_text(strip=True) if text_div else ""
                    images = [img['src'] for img in soup.find_all('img') if 'src' in img.attrs and 'user-avatar' not in img.get('class', [])]
                    comments = []
                    comment_divs = soup.find_all('div', class_='f-single-comment')
                    for comment_div in comment_divs:
                        # --- 处理主评论 ---
                        author_a = comment_div.find('a', class_='f-nick')
                        content_span = comment_div.find('span', class_='f-re-con')
                        if author_a and content_span:
                            comments.append({
                                'qq_account': str(comment_div.get('data-uin', '')),
                                'nickname': author_a.get_text(strip=True),
                                'content': content_span.get_text(strip=True),
                                'comment_tid': comment_div.get('data-tid', ''),
                                'parent_tid': None  # 主评论没有父ID
                            })
                        # --- 处理这条主评论下的所有回复 ---
                        reply_divs = comment_div.find_all('div', class_='f-single-re')
                        for reply_div in reply_divs:
                            reply_author_a = reply_div.find('a', class_='f-nick')
                            reply_content_span = reply_div.find('span', class_='f-re-con')
                            if reply_author_a and reply_content_span:
                                comments.append({
                                    'qq_account': str(reply_div.get('data-uin', '')),
                                    'nickname': reply_author_a.get_text(strip=True),
                                    'content': reply_content_span.get_text(strip=True).lstrip(': '), # 移除回复内容前多余的冒号和空格
                                    'comment_tid': reply_div.get('data-tid', ''),
                                    'parent_tid': reply_div.get('data-parent-tid', comment_div.get('data-tid', '')) # 如果没有父ID，则将父ID设为主评论ID
                                })
                    feeds_list.append({
                        'target_qq': target_qq,
                        'tid': tid,
                        'content': text,
                        'images': images,
                        'comments': comments
                    })
                logger.info(f"监控任务发现 {len(feeds_list)} 条未处理的新说说。")
                return feeds_list