fix:确保OpenIE和临时目录存在并修正路径分隔符

This commit is contained in:
墨梓柒
2025-06-15 16:27:22 +08:00
parent c96963ce6a
commit 110b5b1841
2 changed files with 23 additions and 5 deletions

View File

@@ -21,10 +21,17 @@ from src.chat.knowledge.utils.hash import get_sha256
# 添加项目根目录到 sys.path # 添加项目根目录到 sys.path
ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
OPENIE_DIR = global_config["persistence"]["openie_data_path"] or os.path.join(ROOT_PATH, "data/openie") OPENIE_DIR = global_config["persistence"]["openie_data_path"] or os.path.join(ROOT_PATH, "data", "openie")
logger = get_logger("OpenIE导入") logger = get_logger("OpenIE导入")
def ensure_openie_dir():
"""确保OpenIE数据目录存在"""
if not os.path.exists(OPENIE_DIR):
os.makedirs(OPENIE_DIR)
logger.info(f"创建OpenIE数据目录{OPENIE_DIR}")
else:
logger.info(f"OpenIE数据目录已存在{OPENIE_DIR}")
def hash_deduplicate( def hash_deduplicate(
raw_paragraphs: dict[str, str], raw_paragraphs: dict[str, str],
@@ -178,7 +185,7 @@ def main(): # sourcery skip: dict-comprehension
print("操作已取消") print("操作已取消")
sys.exit(1) sys.exit(1)
print("\n" + "=" * 40 + "\n") print("\n" + "=" * 40 + "\n")
ensure_openie_dir() # 确保OpenIE目录存在
logger.info("----开始导入openie数据----\n") logger.info("----开始导入openie数据----\n")
logger.info("创建LLM客户端") logger.info("创建LLM客户端")

View File

@@ -34,9 +34,9 @@ logger = get_logger("LPMM知识库-信息提取")
ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
TEMP_DIR = os.path.join(ROOT_PATH, "temp") TEMP_DIR = os.path.join(ROOT_PATH, "temp")
IMPORTED_DATA_PATH = global_config["persistence"]["imported_data_path"] or os.path.join( IMPORTED_DATA_PATH = global_config["persistence"]["imported_data_path"] or os.path.join(
ROOT_PATH, "data/imported_lpmm_data" ROOT_PATH, "data", "imported_lpmm_data"
) )
OPENIE_OUTPUT_DIR = global_config["persistence"]["openie_data_path"] or os.path.join(ROOT_PATH, "data/openie") OPENIE_OUTPUT_DIR = global_config["persistence"]["openie_data_path"] or os.path.join(ROOT_PATH, "data", "openie")
# 创建一个线程安全的锁,用于保护文件操作和共享数据 # 创建一个线程安全的锁,用于保护文件操作和共享数据
file_lock = Lock() file_lock = Lock()
@@ -45,6 +45,17 @@ open_ie_doc_lock = Lock()
# 创建一个事件标志,用于控制程序终止 # 创建一个事件标志,用于控制程序终止
shutdown_event = Event() shutdown_event = Event()
def ensure_dirs():
"""确保临时目录和输出目录存在"""
if not os.path.exists(TEMP_DIR):
os.makedirs(TEMP_DIR)
logger.info(f"已创建临时目录: {TEMP_DIR}")
if not os.path.exists(OPENIE_OUTPUT_DIR):
os.makedirs(OPENIE_OUTPUT_DIR)
logger.info(f"已创建输出目录: {OPENIE_OUTPUT_DIR}")
if not os.path.exists(IMPORTED_DATA_PATH):
os.makedirs(IMPORTED_DATA_PATH)
logger.info(f"已创建导入数据目录: {IMPORTED_DATA_PATH}")
def process_single_text(pg_hash, raw_data, llm_client_list): def process_single_text(pg_hash, raw_data, llm_client_list):
"""处理单个文本的函数,用于线程池""" """处理单个文本的函数,用于线程池"""
@@ -114,7 +125,7 @@ def main(): # sourcery skip: comprehension-to-generator, extract-method
print("操作已取消") print("操作已取消")
sys.exit(1) sys.exit(1)
print("\n" + "=" * 40 + "\n") print("\n" + "=" * 40 + "\n")
ensure_dirs() # 确保目录存在
logger.info("--------进行信息提取--------\n") logger.info("--------进行信息提取--------\n")
logger.info("创建LLM客户端") logger.info("创建LLM客户端")