feat(knowledge): 优化知识库返回结构和集成流程

- 重构QAManager.get_knowledge()返回结构化字典包含知识项和摘要
- 在bot.py中添加initialize_lpmm_knowledge()初始化调用
- 更新Prompt类知识库集成逻辑,改进错误处理
- 优化lpmm知识工具显示格式,增强用户体验
- 调整异步任务基础超时时间从10秒增至20秒
This commit is contained in:
tt-P607
2025-09-15 16:47:33 +08:00
parent 5d0e0de8b6
commit 04f957627e
4 changed files with 63 additions and 43 deletions

2
bot.py
View File

@@ -21,6 +21,7 @@ initialize_logging()
from src.main import MainSystem # noqa from src.main import MainSystem # noqa
from src import BaseMain # noqa from src import BaseMain # noqa
from src.manager.async_task_manager import async_task_manager # noqa from src.manager.async_task_manager import async_task_manager # noqa
from src.chat.knowledge.knowledge_lib import initialize_lpmm_knowledge # noqa
from src.config.config import global_config # noqa from src.config.config import global_config # noqa
from src.common.database.database import initialize_sql_database # noqa from src.common.database.database import initialize_sql_database # noqa
from src.common.database.sqlalchemy_models import initialize_database as init_db # noqa from src.common.database.sqlalchemy_models import initialize_database as init_db # noqa
@@ -228,6 +229,7 @@ if __name__ == "__main__":
try: try:
# 执行初始化和任务调度 # 执行初始化和任务调度
loop.run_until_complete(main_system.initialize()) loop.run_until_complete(main_system.initialize())
initialize_lpmm_knowledge()
# Schedule tasks returns a future that runs forever. # Schedule tasks returns a future that runs forever.
# We can run console_input_loop concurrently. # We can run console_input_loop concurrently.
main_tasks = loop.create_task(main_system.schedule_tasks()) main_tasks = loop.create_task(main_system.schedule_tasks())

View File

@@ -1,5 +1,5 @@
import time import time
from typing import Tuple, List, Dict, Optional from typing import Tuple, List, Dict, Optional, Any
from .global_logger import logger from .global_logger import logger
from .embedding_store import EmbeddingManager from .embedding_store import EmbeddingManager
@@ -98,30 +98,46 @@ class QAManager:
return result, ppr_node_weights return result, ppr_node_weights
async def get_knowledge(self, question: str) -> Optional[str]: async def get_knowledge(self, question: str) -> Optional[Dict[str, Any]]:
"""获取知识""" """
# 处理查询 获取知识,返回结构化字典
processed_result = await self.process_query(question)
if processed_result is not None:
query_res = processed_result[0]
# 检查查询结果是否为空
if not query_res:
logger.debug("知识库查询结果为空,可能是知识库中没有相关内容")
return None
knowledge = [ Args:
( question: 用户提出的问题
self.embed_manager.paragraphs_embedding_store.store[res[0]].str,
res[1], Returns:
) 一个包含 'knowledge_items''summary' 的字典,或者在没有结果时返回 None
for res in query_res """
] processed_result = await self.process_query(question)
found_knowledge = "\n".join( if not processed_result or not processed_result[0]:
[f"{i + 1}条知识:{k[0]}\n 该条知识对于问题的相关性:{k[1]}" for i, k in enumerate(knowledge)] logger.debug("知识库查询结果为空。")
)
if len(found_knowledge) > MAX_KNOWLEDGE_LENGTH:
found_knowledge = found_knowledge[:MAX_KNOWLEDGE_LENGTH] + "\n"
return found_knowledge
else:
logger.debug("LPMM知识库并未初始化可能是从未导入过知识...")
return None return None
query_res = processed_result[0]
knowledge_items = []
for res_hash, relevance, *_ in query_res:
if store_item := self.embed_manager.paragraphs_embedding_store.store.get(res_hash):
knowledge_items.append({
"content": store_item.str,
"source": "内部知识库",
"relevance": f"{relevance:.4f}"
})
if not knowledge_items:
return None
# 使用LLM生成总结
knowledge_text_for_summary = "\n\n".join([item['content'] for item in knowledge_items[:5]]) # 最多总结前5条
summary_prompt = f"根据以下信息,为问题 '{question}' 生成一个简洁的、不超过50字的摘要\n\n{knowledge_text_for_summary}"
try:
summary, (_, _, _) = await self.qa_model.generate_response_async(summary_prompt)
except Exception as e:
logger.error(f"生成知识摘要失败: {e}")
summary = "无法生成摘要。"
return {
"knowledge_items": knowledge_items,
"summary": summary.strip() if summary else "没有可用的摘要。"
}

View File

@@ -369,7 +369,7 @@ class Prompt:
task_names.append("cross_context") task_names.append("cross_context")
# 性能优化 # 性能优化
base_timeout = 10.0 base_timeout = 20.0
task_timeout = 2.0 task_timeout = 2.0
timeout_seconds = min( timeout_seconds = min(
max(base_timeout, len(tasks) * task_timeout), max(base_timeout, len(tasks) * task_timeout),
@@ -676,22 +676,21 @@ class Prompt:
return {"knowledge_prompt": ""} return {"knowledge_prompt": ""}
try: try:
from src.chat.knowledge.knowledge_lib import QAManager from src.chat.knowledge.knowledge_lib import qa_manager
# 获取问题文本(当前消息) # 获取问题文本(当前消息)
question = self.parameters.target or "" question = self.parameters.target or ""
if not question: if not question:
return {"knowledge_prompt": ""} return {"knowledge_prompt": ""}
# 创建QA管理器 # 检查QA管理器是否已成功初始化
qa_manager = QAManager() if not qa_manager:
logger.warning("QA管理器未初始化 (可能lpmm_knowledge被禁用),跳过知识库搜索。")
return {"knowledge_prompt": ""}
# 搜索相关知识 # 搜索相关知识
knowledge_results = await qa_manager.get_knowledge( knowledge_results = await qa_manager.get_knowledge(
question=question, question=question
chat_id=self.parameters.chat_id,
max_results=5,
min_similarity=0.5
) )
# 构建知识块 # 构建知识块
@@ -704,13 +703,10 @@ class Prompt:
relevance = item.get("relevance", 0.0) relevance = item.get("relevance", 0.0)
if content: if content:
if source: knowledge_parts.append(f"- [相关度: {relevance}] {content}")
knowledge_parts.append(f"- [{relevance:.2f}] {content} (来源: {source})")
else:
knowledge_parts.append(f"- [{relevance:.2f}] {content}")
if knowledge_results.get("summary"): if summary := knowledge_results.get("summary"):
knowledge_parts.append(f"\n知识总结: {knowledge_results['summary']}") knowledge_parts.append(f"\n知识总结: {summary}")
knowledge_prompt = "\n".join(knowledge_parts) knowledge_prompt = "\n".join(knowledge_parts)
else: else:

View File

@@ -43,10 +43,16 @@ class SearchKnowledgeFromLPMMTool(BaseTool):
logger.debug(f"知识库查询结果: {knowledge_info}") logger.debug(f"知识库查询结果: {knowledge_info}")
if knowledge_info: if knowledge_info and knowledge_info.get("knowledge_items"):
content = f"你知道这些知识: {knowledge_info}" knowledge_parts = []
for i, item in enumerate(knowledge_info["knowledge_items"]):
knowledge_parts.append(f"- {item.get('content', 'N/A')}")
knowledge_text = "\n".join(knowledge_parts)
summary = knowledge_info.get('summary', '无总结')
content = f"关于 '{query}', 你知道以下信息:\n{knowledge_text}\n\n总结: {summary}"
else: else:
content = f"你不太了解有{query}的知识" content = f"'{query}',你的知识库里好像没有相关的信息呢"
return {"type": "lpmm_knowledge", "id": query, "content": content} return {"type": "lpmm_knowledge", "id": query, "content": content}
except Exception as e: except Exception as e:
# 捕获异常并记录错误 # 捕获异常并记录错误