feat: 移除不必要的命名空间导入,优化本地存储初始化

This commit is contained in:
墨梓柒
2025-07-08 00:18:19 +08:00
parent 3c46d996fe
commit e339f0b228
4 changed files with 111 additions and 40 deletions

View File

@@ -10,13 +10,14 @@ from time import sleep
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from src.chat.knowledge.lpmmconfig import PG_NAMESPACE, global_config
from src.chat.knowledge.lpmmconfig import global_config
from src.chat.knowledge.embedding_store import EmbeddingManager
from src.chat.knowledge.llm_client import LLMClient
from src.chat.knowledge.open_ie import OpenIE
from src.chat.knowledge.kg_manager import KGManager
from src.common.logger import get_logger
from src.chat.knowledge.utils.hash import get_sha256
from src.manager.local_store_manager import local_storage
# 添加项目根目录到 sys.path
@@ -61,7 +62,7 @@ def hash_deduplicate(
for _, (raw_paragraph, triple_list) in enumerate(zip(raw_paragraphs.values(), triple_list_data.values())):
# 段落hash
paragraph_hash = get_sha256(raw_paragraph)
if f"{PG_NAMESPACE}-{paragraph_hash}" in stored_pg_hashes and paragraph_hash in stored_paragraph_hashes:
if f"{local_storage['pg_namespace']}-{paragraph_hash}" in stored_pg_hashes and paragraph_hash in stored_paragraph_hashes:
continue
new_raw_paragraphs[paragraph_hash] = raw_paragraph
new_triple_list_data[paragraph_hash] = triple_list
@@ -228,7 +229,7 @@ def main(): # sourcery skip: dict-comprehension
# 数据比对Embedding库与KG的段落hash集合
for pg_hash in kg_manager.stored_paragraph_hashes:
key = f"{PG_NAMESPACE}-{pg_hash}"
key = f"{local_storage['pg_namespace']}-{pg_hash}"
if key not in embed_manager.stored_pg_hashes:
logger.warning(f"KG中存在Embedding库中不存在的段落{key}")