diff --git a/scripts/import_openie.py b/scripts/import_openie.py index 2dbaf6eb4..0d33cb43e 100644 --- a/scripts/import_openie.py +++ b/scripts/import_openie.py @@ -21,10 +21,17 @@ from src.chat.knowledge.utils.hash import get_sha256 # 添加项目根目录到 sys.path ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) -OPENIE_DIR = global_config["persistence"]["openie_data_path"] or os.path.join(ROOT_PATH, "data/openie") +OPENIE_DIR = global_config["persistence"]["openie_data_path"] or os.path.join(ROOT_PATH, "data", "openie") logger = get_logger("OpenIE导入") +def ensure_openie_dir(): + """确保OpenIE数据目录存在""" + if not os.path.exists(OPENIE_DIR): + os.makedirs(OPENIE_DIR) + logger.info(f"创建OpenIE数据目录:{OPENIE_DIR}") + else: + logger.info(f"OpenIE数据目录已存在:{OPENIE_DIR}") def hash_deduplicate( raw_paragraphs: dict[str, str], @@ -178,7 +185,7 @@ def main(): # sourcery skip: dict-comprehension print("操作已取消") sys.exit(1) print("\n" + "=" * 40 + "\n") - + ensure_openie_dir() # 确保OpenIE目录存在 logger.info("----开始导入openie数据----\n") logger.info("创建LLM客户端") diff --git a/scripts/info_extraction.py b/scripts/info_extraction.py index b9815d2f6..cf0f55c95 100644 --- a/scripts/info_extraction.py +++ b/scripts/info_extraction.py @@ -34,9 +34,9 @@ logger = get_logger("LPMM知识库-信息提取") ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) TEMP_DIR = os.path.join(ROOT_PATH, "temp") IMPORTED_DATA_PATH = global_config["persistence"]["imported_data_path"] or os.path.join( - ROOT_PATH, "data/imported_lpmm_data" + ROOT_PATH, "data", "imported_lpmm_data" ) -OPENIE_OUTPUT_DIR = global_config["persistence"]["openie_data_path"] or os.path.join(ROOT_PATH, "data/openie") +OPENIE_OUTPUT_DIR = global_config["persistence"]["openie_data_path"] or os.path.join(ROOT_PATH, "data", "openie") # 创建一个线程安全的锁,用于保护文件操作和共享数据 file_lock = Lock() @@ -45,6 +45,17 @@ open_ie_doc_lock = Lock() # 创建一个事件标志,用于控制程序终止 shutdown_event = Event() +def ensure_dirs(): + """确保临时目录和输出目录存在""" + if not os.path.exists(TEMP_DIR): + os.makedirs(TEMP_DIR) + logger.info(f"已创建临时目录: {TEMP_DIR}") + if not os.path.exists(OPENIE_OUTPUT_DIR): + os.makedirs(OPENIE_OUTPUT_DIR) + logger.info(f"已创建输出目录: {OPENIE_OUTPUT_DIR}") + if not os.path.exists(IMPORTED_DATA_PATH): + os.makedirs(IMPORTED_DATA_PATH) + logger.info(f"已创建导入数据目录: {IMPORTED_DATA_PATH}") def process_single_text(pg_hash, raw_data, llm_client_list): """处理单个文本的函数,用于线程池""" @@ -114,7 +125,7 @@ def main(): # sourcery skip: comprehension-to-generator, extract-method print("操作已取消") sys.exit(1) print("\n" + "=" * 40 + "\n") - + ensure_dirs() # 确保目录存在 logger.info("--------进行信息提取--------\n") logger.info("创建LLM客户端")