feat(knowledge): 优化知识库返回结构和集成流程
- 重构QAManager.get_knowledge()返回结构化字典包含知识项和摘要 - 在bot.py中添加initialize_lpmm_knowledge()初始化调用 - 更新Prompt类知识库集成逻辑,改进错误处理 - 优化lpmm知识工具显示格式,增强用户体验 - 调整异步任务基础超时时间从10秒增至20秒
This commit is contained in:
2
bot.py
2
bot.py
@@ -21,6 +21,7 @@ initialize_logging()
|
|||||||
from src.main import MainSystem # noqa
|
from src.main import MainSystem # noqa
|
||||||
from src import BaseMain # noqa
|
from src import BaseMain # noqa
|
||||||
from src.manager.async_task_manager import async_task_manager # noqa
|
from src.manager.async_task_manager import async_task_manager # noqa
|
||||||
|
from src.chat.knowledge.knowledge_lib import initialize_lpmm_knowledge # noqa
|
||||||
from src.config.config import global_config # noqa
|
from src.config.config import global_config # noqa
|
||||||
from src.common.database.database import initialize_sql_database # noqa
|
from src.common.database.database import initialize_sql_database # noqa
|
||||||
from src.common.database.sqlalchemy_models import initialize_database as init_db # noqa
|
from src.common.database.sqlalchemy_models import initialize_database as init_db # noqa
|
||||||
@@ -228,6 +229,7 @@ if __name__ == "__main__":
|
|||||||
try:
|
try:
|
||||||
# 执行初始化和任务调度
|
# 执行初始化和任务调度
|
||||||
loop.run_until_complete(main_system.initialize())
|
loop.run_until_complete(main_system.initialize())
|
||||||
|
initialize_lpmm_knowledge()
|
||||||
# Schedule tasks returns a future that runs forever.
|
# Schedule tasks returns a future that runs forever.
|
||||||
# We can run console_input_loop concurrently.
|
# We can run console_input_loop concurrently.
|
||||||
main_tasks = loop.create_task(main_system.schedule_tasks())
|
main_tasks = loop.create_task(main_system.schedule_tasks())
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import time
|
import time
|
||||||
from typing import Tuple, List, Dict, Optional
|
from typing import Tuple, List, Dict, Optional, Any
|
||||||
|
|
||||||
from .global_logger import logger
|
from .global_logger import logger
|
||||||
from .embedding_store import EmbeddingManager
|
from .embedding_store import EmbeddingManager
|
||||||
@@ -98,30 +98,46 @@ class QAManager:
|
|||||||
|
|
||||||
return result, ppr_node_weights
|
return result, ppr_node_weights
|
||||||
|
|
||||||
async def get_knowledge(self, question: str) -> Optional[str]:
|
async def get_knowledge(self, question: str) -> Optional[Dict[str, Any]]:
|
||||||
"""获取知识"""
|
"""
|
||||||
# 处理查询
|
获取知识,返回结构化字典
|
||||||
processed_result = await self.process_query(question)
|
|
||||||
if processed_result is not None:
|
|
||||||
query_res = processed_result[0]
|
|
||||||
# 检查查询结果是否为空
|
|
||||||
if not query_res:
|
|
||||||
logger.debug("知识库查询结果为空,可能是知识库中没有相关内容")
|
|
||||||
return None
|
|
||||||
|
|
||||||
knowledge = [
|
Args:
|
||||||
(
|
question: 用户提出的问题
|
||||||
self.embed_manager.paragraphs_embedding_store.store[res[0]].str,
|
|
||||||
res[1],
|
Returns:
|
||||||
)
|
一个包含 'knowledge_items' 和 'summary' 的字典,或者在没有结果时返回 None
|
||||||
for res in query_res
|
"""
|
||||||
]
|
processed_result = await self.process_query(question)
|
||||||
found_knowledge = "\n".join(
|
if not processed_result or not processed_result[0]:
|
||||||
[f"第{i + 1}条知识:{k[0]}\n 该条知识对于问题的相关性:{k[1]}" for i, k in enumerate(knowledge)]
|
logger.debug("知识库查询结果为空。")
|
||||||
)
|
|
||||||
if len(found_knowledge) > MAX_KNOWLEDGE_LENGTH:
|
|
||||||
found_knowledge = found_knowledge[:MAX_KNOWLEDGE_LENGTH] + "\n"
|
|
||||||
return found_knowledge
|
|
||||||
else:
|
|
||||||
logger.debug("LPMM知识库并未初始化,可能是从未导入过知识...")
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
query_res = processed_result[0]
|
||||||
|
|
||||||
|
knowledge_items = []
|
||||||
|
for res_hash, relevance, *_ in query_res:
|
||||||
|
if store_item := self.embed_manager.paragraphs_embedding_store.store.get(res_hash):
|
||||||
|
knowledge_items.append({
|
||||||
|
"content": store_item.str,
|
||||||
|
"source": "内部知识库",
|
||||||
|
"relevance": f"{relevance:.4f}"
|
||||||
|
})
|
||||||
|
|
||||||
|
if not knowledge_items:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 使用LLM生成总结
|
||||||
|
knowledge_text_for_summary = "\n\n".join([item['content'] for item in knowledge_items[:5]]) # 最多总结前5条
|
||||||
|
summary_prompt = f"根据以下信息,为问题 '{question}' 生成一个简洁的、不超过50字的摘要:\n\n{knowledge_text_for_summary}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
summary, (_, _, _) = await self.qa_model.generate_response_async(summary_prompt)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"生成知识摘要失败: {e}")
|
||||||
|
summary = "无法生成摘要。"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"knowledge_items": knowledge_items,
|
||||||
|
"summary": summary.strip() if summary else "没有可用的摘要。"
|
||||||
|
}
|
||||||
|
|||||||
@@ -369,7 +369,7 @@ class Prompt:
|
|||||||
task_names.append("cross_context")
|
task_names.append("cross_context")
|
||||||
|
|
||||||
# 性能优化
|
# 性能优化
|
||||||
base_timeout = 10.0
|
base_timeout = 20.0
|
||||||
task_timeout = 2.0
|
task_timeout = 2.0
|
||||||
timeout_seconds = min(
|
timeout_seconds = min(
|
||||||
max(base_timeout, len(tasks) * task_timeout),
|
max(base_timeout, len(tasks) * task_timeout),
|
||||||
@@ -676,22 +676,21 @@ class Prompt:
|
|||||||
return {"knowledge_prompt": ""}
|
return {"knowledge_prompt": ""}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from src.chat.knowledge.knowledge_lib import QAManager
|
from src.chat.knowledge.knowledge_lib import qa_manager
|
||||||
|
|
||||||
# 获取问题文本(当前消息)
|
# 获取问题文本(当前消息)
|
||||||
question = self.parameters.target or ""
|
question = self.parameters.target or ""
|
||||||
if not question:
|
if not question:
|
||||||
return {"knowledge_prompt": ""}
|
return {"knowledge_prompt": ""}
|
||||||
|
|
||||||
# 创建QA管理器
|
# 检查QA管理器是否已成功初始化
|
||||||
qa_manager = QAManager()
|
if not qa_manager:
|
||||||
|
logger.warning("QA管理器未初始化 (可能lpmm_knowledge被禁用),跳过知识库搜索。")
|
||||||
|
return {"knowledge_prompt": ""}
|
||||||
|
|
||||||
# 搜索相关知识
|
# 搜索相关知识
|
||||||
knowledge_results = await qa_manager.get_knowledge(
|
knowledge_results = await qa_manager.get_knowledge(
|
||||||
question=question,
|
question=question
|
||||||
chat_id=self.parameters.chat_id,
|
|
||||||
max_results=5,
|
|
||||||
min_similarity=0.5
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# 构建知识块
|
# 构建知识块
|
||||||
@@ -704,13 +703,10 @@ class Prompt:
|
|||||||
relevance = item.get("relevance", 0.0)
|
relevance = item.get("relevance", 0.0)
|
||||||
|
|
||||||
if content:
|
if content:
|
||||||
if source:
|
knowledge_parts.append(f"- [相关度: {relevance}] {content}")
|
||||||
knowledge_parts.append(f"- [{relevance:.2f}] {content} (来源: {source})")
|
|
||||||
else:
|
|
||||||
knowledge_parts.append(f"- [{relevance:.2f}] {content}")
|
|
||||||
|
|
||||||
if knowledge_results.get("summary"):
|
if summary := knowledge_results.get("summary"):
|
||||||
knowledge_parts.append(f"\n知识总结: {knowledge_results['summary']}")
|
knowledge_parts.append(f"\n知识总结: {summary}")
|
||||||
|
|
||||||
knowledge_prompt = "\n".join(knowledge_parts)
|
knowledge_prompt = "\n".join(knowledge_parts)
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -43,10 +43,16 @@ class SearchKnowledgeFromLPMMTool(BaseTool):
|
|||||||
|
|
||||||
logger.debug(f"知识库查询结果: {knowledge_info}")
|
logger.debug(f"知识库查询结果: {knowledge_info}")
|
||||||
|
|
||||||
if knowledge_info:
|
if knowledge_info and knowledge_info.get("knowledge_items"):
|
||||||
content = f"你知道这些知识: {knowledge_info}"
|
knowledge_parts = []
|
||||||
|
for i, item in enumerate(knowledge_info["knowledge_items"]):
|
||||||
|
knowledge_parts.append(f"- {item.get('content', 'N/A')}")
|
||||||
|
|
||||||
|
knowledge_text = "\n".join(knowledge_parts)
|
||||||
|
summary = knowledge_info.get('summary', '无总结')
|
||||||
|
content = f"关于 '{query}', 你知道以下信息:\n{knowledge_text}\n\n总结: {summary}"
|
||||||
else:
|
else:
|
||||||
content = f"你不太了解有关{query}的知识"
|
content = f"关于 '{query}',你的知识库里好像没有相关的信息呢"
|
||||||
return {"type": "lpmm_knowledge", "id": query, "content": content}
|
return {"type": "lpmm_knowledge", "id": query, "content": content}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# 捕获异常并记录错误
|
# 捕获异常并记录错误
|
||||||
|
|||||||
Reference in New Issue
Block a user