139 lines
4.7 KiB
Python
139 lines
4.7 KiB
Python
from src.chat.knowledge.lpmmconfig import global_config
|
||
from src.chat.knowledge.embedding_store import EmbeddingManager
|
||
from src.chat.knowledge.llm_client import LLMClient
|
||
from src.chat.knowledge.mem_active_manager import MemoryActiveManager
|
||
from src.chat.knowledge.qa_manager import QAManager
|
||
from src.chat.knowledge.kg_manager import KGManager
|
||
from src.chat.knowledge.global_logger import logger
|
||
from src.config.config import global_config as bot_global_config
|
||
from src.manager.local_store_manager import local_storage
|
||
import os
|
||
|
||
INVALID_ENTITY = [
|
||
"",
|
||
"你",
|
||
"他",
|
||
"她",
|
||
"它",
|
||
"我们",
|
||
"你们",
|
||
"他们",
|
||
"她们",
|
||
"它们",
|
||
]
|
||
PG_NAMESPACE = "paragraph"
|
||
ENT_NAMESPACE = "entity"
|
||
REL_NAMESPACE = "relation"
|
||
|
||
RAG_GRAPH_NAMESPACE = "rag-graph"
|
||
RAG_ENT_CNT_NAMESPACE = "rag-ent-cnt"
|
||
RAG_PG_HASH_NAMESPACE = "rag-pg-hash"
|
||
|
||
|
||
ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
|
||
DATA_PATH = os.path.join(ROOT_PATH, "data")
|
||
|
||
def _initialize_knowledge_local_storage():
|
||
"""
|
||
初始化知识库相关的本地存储配置
|
||
使用字典批量设置,避免重复的if判断
|
||
"""
|
||
# 定义所有需要初始化的配置项
|
||
default_configs = {
|
||
# 路径配置
|
||
'root_path': ROOT_PATH,
|
||
'data_path': f"{ROOT_PATH}/data",
|
||
|
||
# 实体和命名空间配置
|
||
'lpmm_invalid_entity': INVALID_ENTITY,
|
||
'pg_namespace': PG_NAMESPACE,
|
||
'ent_namespace': ENT_NAMESPACE,
|
||
'rel_namespace': REL_NAMESPACE,
|
||
|
||
# RAG相关命名空间配置
|
||
'rag_graph_namespace': RAG_GRAPH_NAMESPACE,
|
||
'rag_ent_cnt_namespace': RAG_ENT_CNT_NAMESPACE,
|
||
'rag_pg_hash_namespace': RAG_PG_HASH_NAMESPACE
|
||
}
|
||
|
||
# 日志级别映射:重要配置用info,其他用debug
|
||
important_configs = {'root_path', 'data_path'}
|
||
|
||
# 批量设置配置项
|
||
initialized_count = 0
|
||
for key, default_value in default_configs.items():
|
||
if local_storage[key] is None:
|
||
local_storage[key] = default_value
|
||
|
||
# 根据重要性选择日志级别
|
||
if key in important_configs:
|
||
logger.info(f"设置{key}: {default_value}")
|
||
else:
|
||
logger.debug(f"设置{key}: {default_value}")
|
||
|
||
initialized_count += 1
|
||
|
||
if initialized_count > 0:
|
||
logger.info(f"知识库本地存储初始化完成,共设置 {initialized_count} 项配置")
|
||
else:
|
||
logger.debug("知识库本地存储配置已存在,跳过初始化")
|
||
|
||
# 初始化本地存储路径
|
||
_initialize_knowledge_local_storage()
|
||
|
||
# 检查LPMM知识库是否启用
|
||
if bot_global_config.lpmm_knowledge.enable:
|
||
logger.info("正在初始化Mai-LPMM\n")
|
||
logger.info("创建LLM客户端")
|
||
llm_client_list = dict()
|
||
for key in global_config["llm_providers"]:
|
||
llm_client_list[key] = LLMClient(
|
||
global_config["llm_providers"][key]["base_url"],
|
||
global_config["llm_providers"][key]["api_key"],
|
||
)
|
||
|
||
# 初始化Embedding库
|
||
embed_manager = EmbeddingManager()
|
||
logger.info("正在从文件加载Embedding库")
|
||
try:
|
||
embed_manager.load_from_file()
|
||
except Exception as e:
|
||
logger.warning("此消息不会影响正常使用:从文件加载Embedding库时,{}".format(e))
|
||
# logger.warning("如果你是第一次导入知识,或者还未导入知识,请忽略此错误")
|
||
logger.info("Embedding库加载完成")
|
||
# 初始化KG
|
||
kg_manager = KGManager()
|
||
logger.info("正在从文件加载KG")
|
||
try:
|
||
kg_manager.load_from_file()
|
||
except Exception as e:
|
||
logger.warning("此消息不会影响正常使用:从文件加载KG时,{}".format(e))
|
||
# logger.warning("如果你是第一次导入知识,或者还未导入知识,请忽略此错误")
|
||
logger.info("KG加载完成")
|
||
|
||
logger.info(f"KG节点数量:{len(kg_manager.graph.get_node_list())}")
|
||
logger.info(f"KG边数量:{len(kg_manager.graph.get_edge_list())}")
|
||
|
||
# 数据比对:Embedding库与KG的段落hash集合
|
||
for pg_hash in kg_manager.stored_paragraph_hashes:
|
||
key = PG_NAMESPACE + "-" + pg_hash
|
||
if key not in embed_manager.stored_pg_hashes:
|
||
logger.warning(f"KG中存在Embedding库中不存在的段落:{key}")
|
||
|
||
# 问答系统(用于知识库)
|
||
qa_manager = QAManager(
|
||
embed_manager,
|
||
kg_manager,
|
||
)
|
||
|
||
# 记忆激活(用于记忆库)
|
||
inspire_manager = MemoryActiveManager(
|
||
embed_manager,
|
||
llm_client_list[global_config["embedding"]["provider"]],
|
||
)
|
||
else:
|
||
logger.info("LPMM知识库已禁用,跳过初始化")
|
||
# 创建空的占位符对象,避免导入错误
|
||
qa_manager = None
|
||
inspire_manager = None
|