From 22a625ce4605bfef9c9a47fd87c0856167447235 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A2=A8=E6=A2=93=E6=9F=92?= <1787882683@qq.com> Date: Sun, 10 Aug 2025 20:43:53 +0800 Subject: [PATCH] =?UTF-8?q?fix=EF=BC=9A=E7=BB=9F=E4=B8=80=E6=AE=B5?= =?UTF-8?q?=E8=90=BDhash=E5=91=BD=E5=90=8D=E7=A9=BA=E9=97=B4=E6=A0=BC?= =?UTF-8?q?=E5=BC=8F=EF=BC=8C=E7=A1=AE=E4=BF=9D=E4=B8=8EEmbeddingStore?= =?UTF-8?q?=E4=B8=80=E8=87=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/import_openie.py | 8 +++++--- src/chat/knowledge/knowledge_lib.py | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/scripts/import_openie.py b/scripts/import_openie.py index eabeb9965..fe9f5269a 100644 --- a/scripts/import_openie.py +++ b/scripts/import_openie.py @@ -14,7 +14,6 @@ from src.chat.knowledge.open_ie import OpenIE from src.chat.knowledge.kg_manager import KGManager from src.common.logger import get_logger from src.chat.knowledge.utils.hash import get_sha256 -from src.manager.local_store_manager import local_storage # 添加项目根目录到 sys.path @@ -60,7 +59,9 @@ def hash_deduplicate( ): # 段落hash paragraph_hash = get_sha256(raw_paragraph) - if f"{local_storage['pg_namespace']}-{paragraph_hash}" in stored_pg_hashes and paragraph_hash in stored_paragraph_hashes: + # 使用与EmbeddingStore中一致的命名空间格式:namespace-hash + paragraph_key = f"paragraph-{paragraph_hash}" + if paragraph_key in stored_pg_hashes and paragraph_hash in stored_paragraph_hashes: continue new_raw_paragraphs[paragraph_hash] = raw_paragraph new_triple_list_data[paragraph_hash] = triple_list @@ -221,7 +222,8 @@ def main(): # sourcery skip: dict-comprehension # 数据比对:Embedding库与KG的段落hash集合 for pg_hash in kg_manager.stored_paragraph_hashes: - key = f"{local_storage['pg_namespace']}-{pg_hash}" + # 使用与EmbeddingStore中一致的命名空间格式:namespace-hash + key = f"paragraph-{pg_hash}" if key not in embed_manager.stored_pg_hashes: logger.warning(f"KG中存在Embedding库中不存在的段落:{key}") diff --git a/src/chat/knowledge/knowledge_lib.py b/src/chat/knowledge/knowledge_lib.py index 13629f18b..f3e6eca6d 100644 --- a/src/chat/knowledge/knowledge_lib.py +++ b/src/chat/knowledge/knowledge_lib.py @@ -59,6 +59,7 @@ if global_config.lpmm_knowledge.enable: # 数据比对:Embedding库与KG的段落hash集合 for pg_hash in kg_manager.stored_paragraph_hashes: + # 使用与EmbeddingStore中一致的命名空间格式 key = f"paragraph-{pg_hash}" if key not in embed_manager.stored_pg_hashes: logger.warning(f"KG中存在Embedding库中不存在的段落:{key}")