优化多线程处理,调整嵌入获取和存储逻辑,增强模型一致性校验
This commit is contained in:
@@ -6,7 +6,6 @@ from src.chat.knowledge.qa_manager import QAManager
|
||||
from src.chat.knowledge.kg_manager import KGManager
|
||||
from src.chat.knowledge.global_logger import logger
|
||||
from src.config.config import global_config as bot_global_config
|
||||
from src.manager.local_store_manager import local_storage
|
||||
import os
|
||||
|
||||
INVALID_ENTITY = [
|
||||
@@ -21,9 +20,6 @@ INVALID_ENTITY = [
|
||||
"她们",
|
||||
"它们",
|
||||
]
|
||||
PG_NAMESPACE = "paragraph"
|
||||
ENT_NAMESPACE = "entity"
|
||||
REL_NAMESPACE = "relation"
|
||||
|
||||
RAG_GRAPH_NAMESPACE = "rag-graph"
|
||||
RAG_ENT_CNT_NAMESPACE = "rag-ent-cnt"
|
||||
@@ -34,54 +30,6 @@ ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..",
|
||||
DATA_PATH = os.path.join(ROOT_PATH, "data")
|
||||
|
||||
|
||||
def _initialize_knowledge_local_storage():
|
||||
"""
|
||||
初始化知识库相关的本地存储配置
|
||||
使用字典批量设置,避免重复的if判断
|
||||
"""
|
||||
# 定义所有需要初始化的配置项
|
||||
default_configs = {
|
||||
# 路径配置
|
||||
"root_path": ROOT_PATH,
|
||||
"data_path": f"{ROOT_PATH}/data",
|
||||
# 实体和命名空间配置
|
||||
"lpmm_invalid_entity": INVALID_ENTITY,
|
||||
"pg_namespace": PG_NAMESPACE,
|
||||
"ent_namespace": ENT_NAMESPACE,
|
||||
"rel_namespace": REL_NAMESPACE,
|
||||
# RAG相关命名空间配置
|
||||
"rag_graph_namespace": RAG_GRAPH_NAMESPACE,
|
||||
"rag_ent_cnt_namespace": RAG_ENT_CNT_NAMESPACE,
|
||||
"rag_pg_hash_namespace": RAG_PG_HASH_NAMESPACE,
|
||||
}
|
||||
|
||||
# 日志级别映射:重要配置用info,其他用debug
|
||||
important_configs = {"root_path", "data_path"}
|
||||
|
||||
# 批量设置配置项
|
||||
initialized_count = 0
|
||||
for key, default_value in default_configs.items():
|
||||
if local_storage[key] is None:
|
||||
local_storage[key] = default_value
|
||||
|
||||
# 根据重要性选择日志级别
|
||||
if key in important_configs:
|
||||
logger.info(f"设置{key}: {default_value}")
|
||||
else:
|
||||
logger.debug(f"设置{key}: {default_value}")
|
||||
|
||||
initialized_count += 1
|
||||
|
||||
if initialized_count > 0:
|
||||
logger.info(f"知识库本地存储初始化完成,共设置 {initialized_count} 项配置")
|
||||
else:
|
||||
logger.debug("知识库本地存储配置已存在,跳过初始化")
|
||||
|
||||
|
||||
# 初始化本地存储路径
|
||||
# sourcery skip: dict-comprehension
|
||||
_initialize_knowledge_local_storage()
|
||||
|
||||
qa_manager = None
|
||||
inspire_manager = None
|
||||
|
||||
@@ -120,7 +68,7 @@ if bot_global_config.lpmm_knowledge.enable:
|
||||
|
||||
# 数据比对:Embedding库与KG的段落hash集合
|
||||
for pg_hash in kg_manager.stored_paragraph_hashes:
|
||||
key = f"{PG_NAMESPACE}-{pg_hash}"
|
||||
key = f"paragraph-{pg_hash}"
|
||||
if key not in embed_manager.stored_pg_hashes:
|
||||
logger.warning(f"KG中存在Embedding库中不存在的段落:{key}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user