From 04f957627e5aedbc259d335c1051b67675c1fc1b Mon Sep 17 00:00:00 2001 From: tt-P607 <68868379+tt-P607@users.noreply.github.com> Date: Mon, 15 Sep 2025 16:47:33 +0800 Subject: [PATCH] =?UTF-8?q?feat(knowledge):=20=E4=BC=98=E5=8C=96=E7=9F=A5?= =?UTF-8?q?=E8=AF=86=E5=BA=93=E8=BF=94=E5=9B=9E=E7=BB=93=E6=9E=84=E5=92=8C?= =?UTF-8?q?=E9=9B=86=E6=88=90=E6=B5=81=E7=A8=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 重构QAManager.get_knowledge()返回结构化字典包含知识项和摘要 - 在bot.py中添加initialize_lpmm_knowledge()初始化调用 - 更新Prompt类知识库集成逻辑,改进错误处理 - 优化lpmm知识工具显示格式,增强用户体验 - 调整异步任务基础超时时间从10秒增至20秒 --- bot.py | 2 + src/chat/knowledge/qa_manager.py | 68 ++++++++++++------- src/chat/utils/prompt.py | 24 +++---- .../built_in/knowledge/lpmm_get_knowledge.py | 12 +++- 4 files changed, 63 insertions(+), 43 deletions(-) diff --git a/bot.py b/bot.py index 2490e6a97..b4c448670 100644 --- a/bot.py +++ b/bot.py @@ -21,6 +21,7 @@ initialize_logging() from src.main import MainSystem # noqa from src import BaseMain # noqa from src.manager.async_task_manager import async_task_manager # noqa +from src.chat.knowledge.knowledge_lib import initialize_lpmm_knowledge # noqa from src.config.config import global_config # noqa from src.common.database.database import initialize_sql_database # noqa from src.common.database.sqlalchemy_models import initialize_database as init_db # noqa @@ -228,6 +229,7 @@ if __name__ == "__main__": try: # 执行初始化和任务调度 loop.run_until_complete(main_system.initialize()) + initialize_lpmm_knowledge() # Schedule tasks returns a future that runs forever. # We can run console_input_loop concurrently. main_tasks = loop.create_task(main_system.schedule_tasks()) diff --git a/src/chat/knowledge/qa_manager.py b/src/chat/knowledge/qa_manager.py index b8b31efb4..f539659fb 100644 --- a/src/chat/knowledge/qa_manager.py +++ b/src/chat/knowledge/qa_manager.py @@ -1,5 +1,5 @@ import time -from typing import Tuple, List, Dict, Optional +from typing import Tuple, List, Dict, Optional, Any from .global_logger import logger from .embedding_store import EmbeddingManager @@ -98,30 +98,46 @@ class QAManager: return result, ppr_node_weights - async def get_knowledge(self, question: str) -> Optional[str]: - """获取知识""" - # 处理查询 - processed_result = await self.process_query(question) - if processed_result is not None: - query_res = processed_result[0] - # 检查查询结果是否为空 - if not query_res: - logger.debug("知识库查询结果为空,可能是知识库中没有相关内容") - return None + async def get_knowledge(self, question: str) -> Optional[Dict[str, Any]]: + """ + 获取知识,返回结构化字典 + + Args: + question: 用户提出的问题 - knowledge = [ - ( - self.embed_manager.paragraphs_embedding_store.store[res[0]].str, - res[1], - ) - for res in query_res - ] - found_knowledge = "\n".join( - [f"第{i + 1}条知识:{k[0]}\n 该条知识对于问题的相关性:{k[1]}" for i, k in enumerate(knowledge)] - ) - if len(found_knowledge) > MAX_KNOWLEDGE_LENGTH: - found_knowledge = found_knowledge[:MAX_KNOWLEDGE_LENGTH] + "\n" - return found_knowledge - else: - logger.debug("LPMM知识库并未初始化,可能是从未导入过知识...") + Returns: + 一个包含 'knowledge_items' 和 'summary' 的字典,或者在没有结果时返回 None + """ + processed_result = await self.process_query(question) + if not processed_result or not processed_result[0]: + logger.debug("知识库查询结果为空。") return None + + query_res = processed_result[0] + + knowledge_items = [] + for res_hash, relevance, *_ in query_res: + if store_item := self.embed_manager.paragraphs_embedding_store.store.get(res_hash): + knowledge_items.append({ + "content": store_item.str, + "source": "内部知识库", + "relevance": f"{relevance:.4f}" + }) + + if not knowledge_items: + return None + + # 使用LLM生成总结 + knowledge_text_for_summary = "\n\n".join([item['content'] for item in knowledge_items[:5]]) # 最多总结前5条 + summary_prompt = f"根据以下信息,为问题 '{question}' 生成一个简洁的、不超过50字的摘要:\n\n{knowledge_text_for_summary}" + + try: + summary, (_, _, _) = await self.qa_model.generate_response_async(summary_prompt) + except Exception as e: + logger.error(f"生成知识摘要失败: {e}") + summary = "无法生成摘要。" + + return { + "knowledge_items": knowledge_items, + "summary": summary.strip() if summary else "没有可用的摘要。" + } diff --git a/src/chat/utils/prompt.py b/src/chat/utils/prompt.py index 217a2071b..ec5446e64 100644 --- a/src/chat/utils/prompt.py +++ b/src/chat/utils/prompt.py @@ -369,7 +369,7 @@ class Prompt: task_names.append("cross_context") # 性能优化 - base_timeout = 10.0 + base_timeout = 20.0 task_timeout = 2.0 timeout_seconds = min( max(base_timeout, len(tasks) * task_timeout), @@ -676,22 +676,21 @@ class Prompt: return {"knowledge_prompt": ""} try: - from src.chat.knowledge.knowledge_lib import QAManager + from src.chat.knowledge.knowledge_lib import qa_manager # 获取问题文本(当前消息) question = self.parameters.target or "" if not question: return {"knowledge_prompt": ""} - # 创建QA管理器 - qa_manager = QAManager() + # 检查QA管理器是否已成功初始化 + if not qa_manager: + logger.warning("QA管理器未初始化 (可能lpmm_knowledge被禁用),跳过知识库搜索。") + return {"knowledge_prompt": ""} # 搜索相关知识 knowledge_results = await qa_manager.get_knowledge( - question=question, - chat_id=self.parameters.chat_id, - max_results=5, - min_similarity=0.5 + question=question ) # 构建知识块 @@ -704,13 +703,10 @@ class Prompt: relevance = item.get("relevance", 0.0) if content: - if source: - knowledge_parts.append(f"- [{relevance:.2f}] {content} (来源: {source})") - else: - knowledge_parts.append(f"- [{relevance:.2f}] {content}") + knowledge_parts.append(f"- [相关度: {relevance}] {content}") - if knowledge_results.get("summary"): - knowledge_parts.append(f"\n知识总结: {knowledge_results['summary']}") + if summary := knowledge_results.get("summary"): + knowledge_parts.append(f"\n知识总结: {summary}") knowledge_prompt = "\n".join(knowledge_parts) else: diff --git a/src/plugins/built_in/knowledge/lpmm_get_knowledge.py b/src/plugins/built_in/knowledge/lpmm_get_knowledge.py index fd3d811b2..91dea3105 100644 --- a/src/plugins/built_in/knowledge/lpmm_get_knowledge.py +++ b/src/plugins/built_in/knowledge/lpmm_get_knowledge.py @@ -43,10 +43,16 @@ class SearchKnowledgeFromLPMMTool(BaseTool): logger.debug(f"知识库查询结果: {knowledge_info}") - if knowledge_info: - content = f"你知道这些知识: {knowledge_info}" + if knowledge_info and knowledge_info.get("knowledge_items"): + knowledge_parts = [] + for i, item in enumerate(knowledge_info["knowledge_items"]): + knowledge_parts.append(f"- {item.get('content', 'N/A')}") + + knowledge_text = "\n".join(knowledge_parts) + summary = knowledge_info.get('summary', '无总结') + content = f"关于 '{query}', 你知道以下信息:\n{knowledge_text}\n\n总结: {summary}" else: - content = f"你不太了解有关{query}的知识" + content = f"关于 '{query}',你的知识库里好像没有相关的信息呢" return {"type": "lpmm_knowledge", "id": query, "content": content} except Exception as e: # 捕获异常并记录错误