import os import toml import sys # import argparse from .global_logger import logger PG_NAMESPACE = "paragraph" ENT_NAMESPACE = "entity" REL_NAMESPACE = "relation" RAG_GRAPH_NAMESPACE = "rag-graph" RAG_ENT_CNT_NAMESPACE = "rag-ent-cnt" RAG_PG_HASH_NAMESPACE = "rag-pg-hash" # 无效实体 INVALID_ENTITY = [ "", "你", "他", "她", "它", "我们", "你们", "他们", "她们", "它们", ] def _load_config(config, config_file_path): """读取TOML格式的配置文件""" if not os.path.exists(config_file_path): return with open(config_file_path, "r", encoding="utf-8") as f: file_config = toml.load(f) # Check if all top-level keys from default config exist in the file config for key in config.keys(): if key not in file_config: logger.critical(f"警告: 配置文件 '{config_file_path}' 缺少必需的顶级键: '{key}'。请检查配置文件。") logger.critical("请通过template/lpmm_config_template.toml文件进行更新") sys.exit(1) if "llm_providers" in file_config: for provider in file_config["llm_providers"]: if provider["name"] not in config["llm_providers"]: config["llm_providers"][provider["name"]] = dict() config["llm_providers"][provider["name"]]["base_url"] = provider["base_url"] config["llm_providers"][provider["name"]]["api_key"] = provider["api_key"] if "entity_extract" in file_config: config["entity_extract"] = file_config["entity_extract"] if "rdf_build" in file_config: config["rdf_build"] = file_config["rdf_build"] if "embedding" in file_config: config["embedding"] = file_config["embedding"] if "rag" in file_config: config["rag"] = file_config["rag"] if "qa" in file_config: config["qa"] = file_config["qa"] if "persistence" in file_config: config["persistence"] = file_config["persistence"] # print(config) logger.info(f"从文件中读取配置: {config_file_path}") global_config = dict( { "lpmm": { "version": "0.1.0", }, "llm_providers": { "localhost": { "base_url": "https://api.siliconflow.cn/v1", "api_key": "sk-ospynxadyorf", } }, "entity_extract": { "llm": { "provider": "localhost", "model": "Pro/deepseek-ai/DeepSeek-V3", } }, "rdf_build": { "llm": { "provider": "localhost", "model": "Pro/deepseek-ai/DeepSeek-V3", } }, "embedding": { "provider": "localhost", "model": "Pro/BAAI/bge-m3", "dimension": 1024, }, "rag": { "params": { "synonym_search_top_k": 10, "synonym_threshold": 0.75, } }, "qa": { "params": { "relation_search_top_k": 10, "relation_threshold": 0.75, "paragraph_search_top_k": 10, "paragraph_node_weight": 0.05, "ent_filter_top_k": 10, "ppr_damping": 0.8, "res_top_k": 10, }, "llm": { "provider": "localhost", "model": "qa", }, }, "persistence": { "data_root_path": "data", "raw_data_path": "data/raw.json", "openie_data_path": "data/openie.json", "embedding_data_dir": "data/embedding", "rag_data_dir": "data/rag", }, "info_extraction": { "workers": 10, }, } ) # _load_config(global_config, parser.parse_args().config_path) # file_path = os.path.abspath(__file__) # dir_path = os.path.dirname(file_path) ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..")) config_path = os.path.join(ROOT_PATH, "config", "lpmm_config.toml") _load_config(global_config, config_path)