diff --git a/scripts/import_openie.py b/scripts/import_openie.py index 0d33cb43e..fc677877f 100644 --- a/scripts/import_openie.py +++ b/scripts/import_openie.py @@ -25,6 +25,7 @@ OPENIE_DIR = global_config["persistence"]["openie_data_path"] or os.path.join(RO logger = get_logger("OpenIE导入") + def ensure_openie_dir(): """确保OpenIE数据目录存在""" if not os.path.exists(OPENIE_DIR): @@ -33,6 +34,7 @@ def ensure_openie_dir(): else: logger.info(f"OpenIE数据目录已存在:{OPENIE_DIR}") + def hash_deduplicate( raw_paragraphs: dict[str, str], triple_list_data: dict[str, list[list[str]]], diff --git a/scripts/info_extraction.py b/scripts/info_extraction.py index cf0f55c95..b9f278325 100644 --- a/scripts/info_extraction.py +++ b/scripts/info_extraction.py @@ -45,6 +45,7 @@ open_ie_doc_lock = Lock() # 创建一个事件标志,用于控制程序终止 shutdown_event = Event() + def ensure_dirs(): """确保临时目录和输出目录存在""" if not os.path.exists(TEMP_DIR): @@ -57,6 +58,7 @@ def ensure_dirs(): os.makedirs(IMPORTED_DATA_PATH) logger.info(f"已创建导入数据目录: {IMPORTED_DATA_PATH}") + def process_single_text(pg_hash, raw_data, llm_client_list): """处理单个文本的函数,用于线程池""" temp_file_path = f"{TEMP_DIR}/{pg_hash}.json"