feat: 知识库小重构
This commit is contained in:
140
src/chat/knowledge/lpmmconfig.py
Normal file
140
src/chat/knowledge/lpmmconfig.py
Normal file
@@ -0,0 +1,140 @@
|
||||
import os
|
||||
import toml
|
||||
import sys
|
||||
|
||||
# import argparse
|
||||
from .global_logger import logger
|
||||
|
||||
PG_NAMESPACE = "paragraph"
|
||||
ENT_NAMESPACE = "entity"
|
||||
REL_NAMESPACE = "relation"
|
||||
|
||||
RAG_GRAPH_NAMESPACE = "rag-graph"
|
||||
RAG_ENT_CNT_NAMESPACE = "rag-ent-cnt"
|
||||
RAG_PG_HASH_NAMESPACE = "rag-pg-hash"
|
||||
|
||||
# 无效实体
|
||||
INVALID_ENTITY = [
|
||||
"",
|
||||
"你",
|
||||
"他",
|
||||
"她",
|
||||
"它",
|
||||
"我们",
|
||||
"你们",
|
||||
"他们",
|
||||
"她们",
|
||||
"它们",
|
||||
]
|
||||
|
||||
|
||||
def _load_config(config, config_file_path):
|
||||
"""读取TOML格式的配置文件"""
|
||||
if not os.path.exists(config_file_path):
|
||||
return
|
||||
with open(config_file_path, "r", encoding="utf-8") as f:
|
||||
file_config = toml.load(f)
|
||||
|
||||
# Check if all top-level keys from default config exist in the file config
|
||||
for key in config.keys():
|
||||
if key not in file_config:
|
||||
logger.critical(f"警告: 配置文件 '{config_file_path}' 缺少必需的顶级键: '{key}'。请检查配置文件。")
|
||||
logger.critical("请通过template/lpmm_config_template.toml文件进行更新")
|
||||
sys.exit(1)
|
||||
|
||||
if "llm_providers" in file_config:
|
||||
for provider in file_config["llm_providers"]:
|
||||
if provider["name"] not in config["llm_providers"]:
|
||||
config["llm_providers"][provider["name"]] = dict()
|
||||
config["llm_providers"][provider["name"]]["base_url"] = provider["base_url"]
|
||||
config["llm_providers"][provider["name"]]["api_key"] = provider["api_key"]
|
||||
|
||||
if "entity_extract" in file_config:
|
||||
config["entity_extract"] = file_config["entity_extract"]
|
||||
|
||||
if "rdf_build" in file_config:
|
||||
config["rdf_build"] = file_config["rdf_build"]
|
||||
|
||||
if "embedding" in file_config:
|
||||
config["embedding"] = file_config["embedding"]
|
||||
|
||||
if "rag" in file_config:
|
||||
config["rag"] = file_config["rag"]
|
||||
|
||||
if "qa" in file_config:
|
||||
config["qa"] = file_config["qa"]
|
||||
|
||||
if "persistence" in file_config:
|
||||
config["persistence"] = file_config["persistence"]
|
||||
# print(config)
|
||||
logger.info(f"从文件中读取配置: {config_file_path}")
|
||||
|
||||
|
||||
global_config = dict(
|
||||
{
|
||||
"lpmm": {
|
||||
"version": "0.1.0",
|
||||
},
|
||||
"llm_providers": {
|
||||
"localhost": {
|
||||
"base_url": "https://api.siliconflow.cn/v1",
|
||||
"api_key": "sk-ospynxadyorf",
|
||||
}
|
||||
},
|
||||
"entity_extract": {
|
||||
"llm": {
|
||||
"provider": "localhost",
|
||||
"model": "Pro/deepseek-ai/DeepSeek-V3",
|
||||
}
|
||||
},
|
||||
"rdf_build": {
|
||||
"llm": {
|
||||
"provider": "localhost",
|
||||
"model": "Pro/deepseek-ai/DeepSeek-V3",
|
||||
}
|
||||
},
|
||||
"embedding": {
|
||||
"provider": "localhost",
|
||||
"model": "Pro/BAAI/bge-m3",
|
||||
"dimension": 1024,
|
||||
},
|
||||
"rag": {
|
||||
"params": {
|
||||
"synonym_search_top_k": 10,
|
||||
"synonym_threshold": 0.75,
|
||||
}
|
||||
},
|
||||
"qa": {
|
||||
"params": {
|
||||
"relation_search_top_k": 10,
|
||||
"relation_threshold": 0.75,
|
||||
"paragraph_search_top_k": 10,
|
||||
"paragraph_node_weight": 0.05,
|
||||
"ent_filter_top_k": 10,
|
||||
"ppr_damping": 0.8,
|
||||
"res_top_k": 10,
|
||||
},
|
||||
"llm": {
|
||||
"provider": "localhost",
|
||||
"model": "qa",
|
||||
},
|
||||
},
|
||||
"persistence": {
|
||||
"data_root_path": "data",
|
||||
"raw_data_path": "data/raw.json",
|
||||
"openie_data_path": "data/openie.json",
|
||||
"embedding_data_dir": "data/embedding",
|
||||
"rag_data_dir": "data/rag",
|
||||
},
|
||||
"info_extraction": {
|
||||
"workers": 10,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
# _load_config(global_config, parser.parse_args().config_path)
|
||||
# file_path = os.path.abspath(__file__)
|
||||
# dir_path = os.path.dirname(file_path)
|
||||
ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))
|
||||
config_path = os.path.join(ROOT_PATH, "config", "lpmm_config.toml")
|
||||
_load_config(global_config, config_path)
|
||||
Reference in New Issue
Block a user