From 110b5b1841ab8e222b5f224555b4fc65df3e722e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A2=A8=E6=A2=93=E6=9F=92?= <1787882683@qq.com> Date: Sun, 15 Jun 2025 16:27:22 +0800 Subject: [PATCH] =?UTF-8?q?fix=EF=BC=9A=E7=A1=AE=E4=BF=9DOpenIE=E5=92=8C?= =?UTF-8?q?=E4=B8=B4=E6=97=B6=E7=9B=AE=E5=BD=95=E5=AD=98=E5=9C=A8=E5=B9=B6?= =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E8=B7=AF=E5=BE=84=E5=88=86=E9=9A=94=E7=AC=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/import_openie.py | 11 +++++++++-- scripts/info_extraction.py | 17 ++++++++++++++--- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/scripts/import_openie.py b/scripts/import_openie.py index 2dbaf6eb4..0d33cb43e 100644 --- a/scripts/import_openie.py +++ b/scripts/import_openie.py @@ -21,10 +21,17 @@ from src.chat.knowledge.utils.hash import get_sha256 # 添加项目根目录到 sys.path ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) -OPENIE_DIR = global_config["persistence"]["openie_data_path"] or os.path.join(ROOT_PATH, "data/openie") +OPENIE_DIR = global_config["persistence"]["openie_data_path"] or os.path.join(ROOT_PATH, "data", "openie") logger = get_logger("OpenIE导入") +def ensure_openie_dir(): + """确保OpenIE数据目录存在""" + if not os.path.exists(OPENIE_DIR): + os.makedirs(OPENIE_DIR) + logger.info(f"创建OpenIE数据目录:{OPENIE_DIR}") + else: + logger.info(f"OpenIE数据目录已存在:{OPENIE_DIR}") def hash_deduplicate( raw_paragraphs: dict[str, str], @@ -178,7 +185,7 @@ def main(): # sourcery skip: dict-comprehension print("操作已取消") sys.exit(1) print("\n" + "=" * 40 + "\n") - + ensure_openie_dir() # 确保OpenIE目录存在 logger.info("----开始导入openie数据----\n") logger.info("创建LLM客户端") diff --git a/scripts/info_extraction.py b/scripts/info_extraction.py index b9815d2f6..cf0f55c95 100644 --- a/scripts/info_extraction.py +++ b/scripts/info_extraction.py @@ -34,9 +34,9 @@ logger = get_logger("LPMM知识库-信息提取") ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) TEMP_DIR = os.path.join(ROOT_PATH, "temp") IMPORTED_DATA_PATH = global_config["persistence"]["imported_data_path"] or os.path.join( - ROOT_PATH, "data/imported_lpmm_data" + ROOT_PATH, "data", "imported_lpmm_data" ) -OPENIE_OUTPUT_DIR = global_config["persistence"]["openie_data_path"] or os.path.join(ROOT_PATH, "data/openie") +OPENIE_OUTPUT_DIR = global_config["persistence"]["openie_data_path"] or os.path.join(ROOT_PATH, "data", "openie") # 创建一个线程安全的锁,用于保护文件操作和共享数据 file_lock = Lock() @@ -45,6 +45,17 @@ open_ie_doc_lock = Lock() # 创建一个事件标志,用于控制程序终止 shutdown_event = Event() +def ensure_dirs(): + """确保临时目录和输出目录存在""" + if not os.path.exists(TEMP_DIR): + os.makedirs(TEMP_DIR) + logger.info(f"已创建临时目录: {TEMP_DIR}") + if not os.path.exists(OPENIE_OUTPUT_DIR): + os.makedirs(OPENIE_OUTPUT_DIR) + logger.info(f"已创建输出目录: {OPENIE_OUTPUT_DIR}") + if not os.path.exists(IMPORTED_DATA_PATH): + os.makedirs(IMPORTED_DATA_PATH) + logger.info(f"已创建导入数据目录: {IMPORTED_DATA_PATH}") def process_single_text(pg_hash, raw_data, llm_client_list): """处理单个文本的函数,用于线程池""" @@ -114,7 +125,7 @@ def main(): # sourcery skip: comprehension-to-generator, extract-method print("操作已取消") sys.exit(1) print("\n" + "=" * 40 + "\n") - + ensure_dirs() # 确保目录存在 logger.info("--------进行信息提取--------\n") logger.info("创建LLM客户端")