feat(knowledge): 优化知识库返回结构和集成流程
- 重构QAManager.get_knowledge()返回结构化字典包含知识项和摘要 - 在bot.py中添加initialize_lpmm_knowledge()初始化调用 - 更新Prompt类知识库集成逻辑,改进错误处理 - 优化lpmm知识工具显示格式,增强用户体验 - 调整异步任务基础超时时间从10秒增至20秒
This commit is contained in:
2
bot.py
2
bot.py
@@ -21,6 +21,7 @@ initialize_logging()
|
||||
from src.main import MainSystem # noqa
|
||||
from src import BaseMain # noqa
|
||||
from src.manager.async_task_manager import async_task_manager # noqa
|
||||
from src.chat.knowledge.knowledge_lib import initialize_lpmm_knowledge # noqa
|
||||
from src.config.config import global_config # noqa
|
||||
from src.common.database.database import initialize_sql_database # noqa
|
||||
from src.common.database.sqlalchemy_models import initialize_database as init_db # noqa
|
||||
@@ -228,6 +229,7 @@ if __name__ == "__main__":
|
||||
try:
|
||||
# 执行初始化和任务调度
|
||||
loop.run_until_complete(main_system.initialize())
|
||||
initialize_lpmm_knowledge()
|
||||
# Schedule tasks returns a future that runs forever.
|
||||
# We can run console_input_loop concurrently.
|
||||
main_tasks = loop.create_task(main_system.schedule_tasks())
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import time
|
||||
from typing import Tuple, List, Dict, Optional
|
||||
from typing import Tuple, List, Dict, Optional, Any
|
||||
|
||||
from .global_logger import logger
|
||||
from .embedding_store import EmbeddingManager
|
||||
@@ -98,30 +98,46 @@ class QAManager:
|
||||
|
||||
return result, ppr_node_weights
|
||||
|
||||
async def get_knowledge(self, question: str) -> Optional[str]:
|
||||
"""获取知识"""
|
||||
# 处理查询
|
||||
async def get_knowledge(self, question: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
获取知识,返回结构化字典
|
||||
|
||||
Args:
|
||||
question: 用户提出的问题
|
||||
|
||||
Returns:
|
||||
一个包含 'knowledge_items' 和 'summary' 的字典,或者在没有结果时返回 None
|
||||
"""
|
||||
processed_result = await self.process_query(question)
|
||||
if processed_result is not None:
|
||||
query_res = processed_result[0]
|
||||
# 检查查询结果是否为空
|
||||
if not query_res:
|
||||
logger.debug("知识库查询结果为空,可能是知识库中没有相关内容")
|
||||
if not processed_result or not processed_result[0]:
|
||||
logger.debug("知识库查询结果为空。")
|
||||
return None
|
||||
|
||||
knowledge = [
|
||||
(
|
||||
self.embed_manager.paragraphs_embedding_store.store[res[0]].str,
|
||||
res[1],
|
||||
)
|
||||
for res in query_res
|
||||
]
|
||||
found_knowledge = "\n".join(
|
||||
[f"第{i + 1}条知识:{k[0]}\n 该条知识对于问题的相关性:{k[1]}" for i, k in enumerate(knowledge)]
|
||||
)
|
||||
if len(found_knowledge) > MAX_KNOWLEDGE_LENGTH:
|
||||
found_knowledge = found_knowledge[:MAX_KNOWLEDGE_LENGTH] + "\n"
|
||||
return found_knowledge
|
||||
else:
|
||||
logger.debug("LPMM知识库并未初始化,可能是从未导入过知识...")
|
||||
query_res = processed_result[0]
|
||||
|
||||
knowledge_items = []
|
||||
for res_hash, relevance, *_ in query_res:
|
||||
if store_item := self.embed_manager.paragraphs_embedding_store.store.get(res_hash):
|
||||
knowledge_items.append({
|
||||
"content": store_item.str,
|
||||
"source": "内部知识库",
|
||||
"relevance": f"{relevance:.4f}"
|
||||
})
|
||||
|
||||
if not knowledge_items:
|
||||
return None
|
||||
|
||||
# 使用LLM生成总结
|
||||
knowledge_text_for_summary = "\n\n".join([item['content'] for item in knowledge_items[:5]]) # 最多总结前5条
|
||||
summary_prompt = f"根据以下信息,为问题 '{question}' 生成一个简洁的、不超过50字的摘要:\n\n{knowledge_text_for_summary}"
|
||||
|
||||
try:
|
||||
summary, (_, _, _) = await self.qa_model.generate_response_async(summary_prompt)
|
||||
except Exception as e:
|
||||
logger.error(f"生成知识摘要失败: {e}")
|
||||
summary = "无法生成摘要。"
|
||||
|
||||
return {
|
||||
"knowledge_items": knowledge_items,
|
||||
"summary": summary.strip() if summary else "没有可用的摘要。"
|
||||
}
|
||||
|
||||
@@ -369,7 +369,7 @@ class Prompt:
|
||||
task_names.append("cross_context")
|
||||
|
||||
# 性能优化
|
||||
base_timeout = 10.0
|
||||
base_timeout = 20.0
|
||||
task_timeout = 2.0
|
||||
timeout_seconds = min(
|
||||
max(base_timeout, len(tasks) * task_timeout),
|
||||
@@ -676,22 +676,21 @@ class Prompt:
|
||||
return {"knowledge_prompt": ""}
|
||||
|
||||
try:
|
||||
from src.chat.knowledge.knowledge_lib import QAManager
|
||||
from src.chat.knowledge.knowledge_lib import qa_manager
|
||||
|
||||
# 获取问题文本(当前消息)
|
||||
question = self.parameters.target or ""
|
||||
if not question:
|
||||
return {"knowledge_prompt": ""}
|
||||
|
||||
# 创建QA管理器
|
||||
qa_manager = QAManager()
|
||||
# 检查QA管理器是否已成功初始化
|
||||
if not qa_manager:
|
||||
logger.warning("QA管理器未初始化 (可能lpmm_knowledge被禁用),跳过知识库搜索。")
|
||||
return {"knowledge_prompt": ""}
|
||||
|
||||
# 搜索相关知识
|
||||
knowledge_results = await qa_manager.get_knowledge(
|
||||
question=question,
|
||||
chat_id=self.parameters.chat_id,
|
||||
max_results=5,
|
||||
min_similarity=0.5
|
||||
question=question
|
||||
)
|
||||
|
||||
# 构建知识块
|
||||
@@ -704,13 +703,10 @@ class Prompt:
|
||||
relevance = item.get("relevance", 0.0)
|
||||
|
||||
if content:
|
||||
if source:
|
||||
knowledge_parts.append(f"- [{relevance:.2f}] {content} (来源: {source})")
|
||||
else:
|
||||
knowledge_parts.append(f"- [{relevance:.2f}] {content}")
|
||||
knowledge_parts.append(f"- [相关度: {relevance}] {content}")
|
||||
|
||||
if knowledge_results.get("summary"):
|
||||
knowledge_parts.append(f"\n知识总结: {knowledge_results['summary']}")
|
||||
if summary := knowledge_results.get("summary"):
|
||||
knowledge_parts.append(f"\n知识总结: {summary}")
|
||||
|
||||
knowledge_prompt = "\n".join(knowledge_parts)
|
||||
else:
|
||||
|
||||
@@ -43,10 +43,16 @@ class SearchKnowledgeFromLPMMTool(BaseTool):
|
||||
|
||||
logger.debug(f"知识库查询结果: {knowledge_info}")
|
||||
|
||||
if knowledge_info:
|
||||
content = f"你知道这些知识: {knowledge_info}"
|
||||
if knowledge_info and knowledge_info.get("knowledge_items"):
|
||||
knowledge_parts = []
|
||||
for i, item in enumerate(knowledge_info["knowledge_items"]):
|
||||
knowledge_parts.append(f"- {item.get('content', 'N/A')}")
|
||||
|
||||
knowledge_text = "\n".join(knowledge_parts)
|
||||
summary = knowledge_info.get('summary', '无总结')
|
||||
content = f"关于 '{query}', 你知道以下信息:\n{knowledge_text}\n\n总结: {summary}"
|
||||
else:
|
||||
content = f"你不太了解有关{query}的知识"
|
||||
content = f"关于 '{query}',你的知识库里好像没有相关的信息呢"
|
||||
return {"type": "lpmm_knowledge", "id": query, "content": content}
|
||||
except Exception as e:
|
||||
# 捕获异常并记录错误
|
||||
|
||||
Reference in New Issue
Block a user