fix:确保OpenIE和临时目录存在并修正路径分隔符
This commit is contained in:
@@ -21,10 +21,17 @@ from src.chat.knowledge.utils.hash import get_sha256
|
|||||||
|
|
||||||
# 添加项目根目录到 sys.path
|
# 添加项目根目录到 sys.path
|
||||||
ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
||||||
OPENIE_DIR = global_config["persistence"]["openie_data_path"] or os.path.join(ROOT_PATH, "data/openie")
|
OPENIE_DIR = global_config["persistence"]["openie_data_path"] or os.path.join(ROOT_PATH, "data", "openie")
|
||||||
|
|
||||||
logger = get_logger("OpenIE导入")
|
logger = get_logger("OpenIE导入")
|
||||||
|
|
||||||
|
def ensure_openie_dir():
|
||||||
|
"""确保OpenIE数据目录存在"""
|
||||||
|
if not os.path.exists(OPENIE_DIR):
|
||||||
|
os.makedirs(OPENIE_DIR)
|
||||||
|
logger.info(f"创建OpenIE数据目录:{OPENIE_DIR}")
|
||||||
|
else:
|
||||||
|
logger.info(f"OpenIE数据目录已存在:{OPENIE_DIR}")
|
||||||
|
|
||||||
def hash_deduplicate(
|
def hash_deduplicate(
|
||||||
raw_paragraphs: dict[str, str],
|
raw_paragraphs: dict[str, str],
|
||||||
@@ -178,7 +185,7 @@ def main(): # sourcery skip: dict-comprehension
|
|||||||
print("操作已取消")
|
print("操作已取消")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
print("\n" + "=" * 40 + "\n")
|
print("\n" + "=" * 40 + "\n")
|
||||||
|
ensure_openie_dir() # 确保OpenIE目录存在
|
||||||
logger.info("----开始导入openie数据----\n")
|
logger.info("----开始导入openie数据----\n")
|
||||||
|
|
||||||
logger.info("创建LLM客户端")
|
logger.info("创建LLM客户端")
|
||||||
|
|||||||
@@ -34,9 +34,9 @@ logger = get_logger("LPMM知识库-信息提取")
|
|||||||
ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
||||||
TEMP_DIR = os.path.join(ROOT_PATH, "temp")
|
TEMP_DIR = os.path.join(ROOT_PATH, "temp")
|
||||||
IMPORTED_DATA_PATH = global_config["persistence"]["imported_data_path"] or os.path.join(
|
IMPORTED_DATA_PATH = global_config["persistence"]["imported_data_path"] or os.path.join(
|
||||||
ROOT_PATH, "data/imported_lpmm_data"
|
ROOT_PATH, "data", "imported_lpmm_data"
|
||||||
)
|
)
|
||||||
OPENIE_OUTPUT_DIR = global_config["persistence"]["openie_data_path"] or os.path.join(ROOT_PATH, "data/openie")
|
OPENIE_OUTPUT_DIR = global_config["persistence"]["openie_data_path"] or os.path.join(ROOT_PATH, "data", "openie")
|
||||||
|
|
||||||
# 创建一个线程安全的锁,用于保护文件操作和共享数据
|
# 创建一个线程安全的锁,用于保护文件操作和共享数据
|
||||||
file_lock = Lock()
|
file_lock = Lock()
|
||||||
@@ -45,6 +45,17 @@ open_ie_doc_lock = Lock()
|
|||||||
# 创建一个事件标志,用于控制程序终止
|
# 创建一个事件标志,用于控制程序终止
|
||||||
shutdown_event = Event()
|
shutdown_event = Event()
|
||||||
|
|
||||||
|
def ensure_dirs():
|
||||||
|
"""确保临时目录和输出目录存在"""
|
||||||
|
if not os.path.exists(TEMP_DIR):
|
||||||
|
os.makedirs(TEMP_DIR)
|
||||||
|
logger.info(f"已创建临时目录: {TEMP_DIR}")
|
||||||
|
if not os.path.exists(OPENIE_OUTPUT_DIR):
|
||||||
|
os.makedirs(OPENIE_OUTPUT_DIR)
|
||||||
|
logger.info(f"已创建输出目录: {OPENIE_OUTPUT_DIR}")
|
||||||
|
if not os.path.exists(IMPORTED_DATA_PATH):
|
||||||
|
os.makedirs(IMPORTED_DATA_PATH)
|
||||||
|
logger.info(f"已创建导入数据目录: {IMPORTED_DATA_PATH}")
|
||||||
|
|
||||||
def process_single_text(pg_hash, raw_data, llm_client_list):
|
def process_single_text(pg_hash, raw_data, llm_client_list):
|
||||||
"""处理单个文本的函数,用于线程池"""
|
"""处理单个文本的函数,用于线程池"""
|
||||||
@@ -114,7 +125,7 @@ def main(): # sourcery skip: comprehension-to-generator, extract-method
|
|||||||
print("操作已取消")
|
print("操作已取消")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
print("\n" + "=" * 40 + "\n")
|
print("\n" + "=" * 40 + "\n")
|
||||||
|
ensure_dirs() # 确保目录存在
|
||||||
logger.info("--------进行信息提取--------\n")
|
logger.info("--------进行信息提取--------\n")
|
||||||
|
|
||||||
logger.info("创建LLM客户端")
|
logger.info("创建LLM客户端")
|
||||||
|
|||||||
Reference in New Issue
Block a user