Files
Mofox-Core/src/chat/knowledge/lpmmconfig.py
2025-06-07 13:46:04 +08:00

141 lines
4.1 KiB
Python

import os
import toml
import sys
# import argparse
from .global_logger import logger
PG_NAMESPACE = "paragraph"
ENT_NAMESPACE = "entity"
REL_NAMESPACE = "relation"
RAG_GRAPH_NAMESPACE = "rag-graph"
RAG_ENT_CNT_NAMESPACE = "rag-ent-cnt"
RAG_PG_HASH_NAMESPACE = "rag-pg-hash"
# 无效实体
INVALID_ENTITY = [
"",
"",
"",
"",
"",
"我们",
"你们",
"他们",
"她们",
"它们",
]
def _load_config(config, config_file_path):
"""读取TOML格式的配置文件"""
if not os.path.exists(config_file_path):
return
with open(config_file_path, "r", encoding="utf-8") as f:
file_config = toml.load(f)
# Check if all top-level keys from default config exist in the file config
for key in config.keys():
if key not in file_config:
logger.critical(f"警告: 配置文件 '{config_file_path}' 缺少必需的顶级键: '{key}'。请检查配置文件。")
logger.critical("请通过template/lpmm_config_template.toml文件进行更新")
sys.exit(1)
if "llm_providers" in file_config:
for provider in file_config["llm_providers"]:
if provider["name"] not in config["llm_providers"]:
config["llm_providers"][provider["name"]] = dict()
config["llm_providers"][provider["name"]]["base_url"] = provider["base_url"]
config["llm_providers"][provider["name"]]["api_key"] = provider["api_key"]
if "entity_extract" in file_config:
config["entity_extract"] = file_config["entity_extract"]
if "rdf_build" in file_config:
config["rdf_build"] = file_config["rdf_build"]
if "embedding" in file_config:
config["embedding"] = file_config["embedding"]
if "rag" in file_config:
config["rag"] = file_config["rag"]
if "qa" in file_config:
config["qa"] = file_config["qa"]
if "persistence" in file_config:
config["persistence"] = file_config["persistence"]
# print(config)
logger.info(f"从文件中读取配置: {config_file_path}")
global_config = dict(
{
"lpmm": {
"version": "0.1.0",
},
"llm_providers": {
"localhost": {
"base_url": "https://api.siliconflow.cn/v1",
"api_key": "sk-ospynxadyorf",
}
},
"entity_extract": {
"llm": {
"provider": "localhost",
"model": "Pro/deepseek-ai/DeepSeek-V3",
}
},
"rdf_build": {
"llm": {
"provider": "localhost",
"model": "Pro/deepseek-ai/DeepSeek-V3",
}
},
"embedding": {
"provider": "localhost",
"model": "Pro/BAAI/bge-m3",
"dimension": 1024,
},
"rag": {
"params": {
"synonym_search_top_k": 10,
"synonym_threshold": 0.75,
}
},
"qa": {
"params": {
"relation_search_top_k": 10,
"relation_threshold": 0.75,
"paragraph_search_top_k": 10,
"paragraph_node_weight": 0.05,
"ent_filter_top_k": 10,
"ppr_damping": 0.8,
"res_top_k": 10,
},
"llm": {
"provider": "localhost",
"model": "qa",
},
},
"persistence": {
"data_root_path": "data",
"raw_data_path": "data/raw.json",
"openie_data_path": "data/openie.json",
"embedding_data_dir": "data/embedding",
"rag_data_dir": "data/rag",
},
"info_extraction": {
"workers": 10,
},
}
)
# _load_config(global_config, parser.parse_args().config_path)
# file_path = os.path.abspath(__file__)
# dir_path = os.path.dirname(file_path)
ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))
config_path = os.path.join(ROOT_PATH, "config", "lpmm_config.toml")
_load_config(global_config, config_path)