61 lines
2.2 KiB
TOML
61 lines
2.2 KiB
TOML
[lpmm]
|
||
version = "0.1.0"
|
||
|
||
# LLM API 服务提供商,可配置多个
|
||
[[llm_providers]]
|
||
name = "localhost"
|
||
base_url = "http://127.0.0.1:8888/v1/"
|
||
api_key = "lm_studio"
|
||
|
||
[[llm_providers]]
|
||
name = "siliconflow"
|
||
base_url = "https://api.siliconflow.cn/v1/"
|
||
api_key = ""
|
||
|
||
[entity_extract.llm]
|
||
# 设置用于实体提取的LLM模型
|
||
provider = "siliconflow" # 服务提供商
|
||
model = "deepseek-ai/DeepSeek-V3" # 模型名称
|
||
|
||
[rdf_build.llm]
|
||
# 设置用于RDF构建的LLM模型
|
||
provider = "siliconflow" # 服务提供商
|
||
model = "deepseek-ai/DeepSeek-V3" # 模型名称
|
||
|
||
[embedding]
|
||
# 设置用于文本嵌入的Embedding模型
|
||
provider = "siliconflow" # 服务提供商
|
||
model = "Pro/BAAI/bge-m3" # 模型名称
|
||
dimension = 1024 # 嵌入维度
|
||
|
||
[rag.params]
|
||
# RAG参数配置
|
||
synonym_search_top_k = 10 # 同义词搜索TopK
|
||
synonym_threshold = 0.8 # 同义词阈值(相似度高于此阈值的词语会被认为是同义词)
|
||
|
||
[qa.llm]
|
||
# 设置用于QA的LLM模型
|
||
provider = "siliconflow" # 服务提供商
|
||
model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" # 模型名称
|
||
|
||
[info_extraction]
|
||
workers = 3 # 实体提取同时执行线程数,非Pro模型不要设置超过5
|
||
|
||
[qa.params]
|
||
# QA参数配置
|
||
relation_search_top_k = 10 # 关系搜索TopK
|
||
relation_threshold = 0.5 # 关系阈值(相似度高于此阈值的关系会被认为是相关的关系)
|
||
paragraph_search_top_k = 1000 # 段落搜索TopK(不能过小,可能影响搜索结果)
|
||
paragraph_node_weight = 0.05 # 段落节点权重(在图搜索&PPR计算中的权重,当搜索仅使用DPR时,此参数不起作用)
|
||
ent_filter_top_k = 10 # 实体过滤TopK
|
||
ppr_damping = 0.8 # PPR阻尼系数
|
||
res_top_k = 3 # 最终提供的文段TopK
|
||
|
||
[persistence]
|
||
# 持久化配置(存储中间数据,防止重复计算)
|
||
data_root_path = "data" # 数据根目录
|
||
raw_data_path = "data/imported_lpmm_data" # 原始数据路径
|
||
openie_data_path = "data/openie" # OpenIE数据路径
|
||
embedding_data_dir = "data/embedding" # 嵌入数据目录
|
||
rag_data_dir = "data/rag" # RAG数据目录
|