Files
Mofox-Core/template/lpmm_config_template.toml
2025-05-13 22:14:26 +08:00

61 lines
2.2 KiB
TOML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

[lpmm]
version = "0.1.0"
# LLM API 服务提供商,可配置多个
[[llm_providers]]
name = "localhost"
base_url = "http://127.0.0.1:8888/v1/"
api_key = "lm_studio"
[[llm_providers]]
name = "siliconflow"
base_url = "https://api.siliconflow.cn/v1/"
api_key = ""
[entity_extract.llm]
# 设置用于实体提取的LLM模型
provider = "siliconflow" # 服务提供商
model = "deepseek-ai/DeepSeek-V3" # 模型名称
[rdf_build.llm]
# 设置用于RDF构建的LLM模型
provider = "siliconflow" # 服务提供商
model = "deepseek-ai/DeepSeek-V3" # 模型名称
[embedding]
# 设置用于文本嵌入的Embedding模型
provider = "siliconflow" # 服务提供商
model = "Pro/BAAI/bge-m3" # 模型名称
dimension = 1024 # 嵌入维度
[rag.params]
# RAG参数配置
synonym_search_top_k = 10 # 同义词搜索TopK
synonym_threshold = 0.8 # 同义词阈值(相似度高于此阈值的词语会被认为是同义词)
[qa.llm]
# 设置用于QA的LLM模型
provider = "siliconflow" # 服务提供商
model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" # 模型名称
[info_extraction]
workers = 3 # 实体提取同时执行线程数非Pro模型不要设置超过5
[qa.params]
# QA参数配置
relation_search_top_k = 10 # 关系搜索TopK
relation_threshold = 0.5 # 关系阈值(相似度高于此阈值的关系会被认为是相关的关系)
paragraph_search_top_k = 1000 # 段落搜索TopK不能过小可能影响搜索结果
paragraph_node_weight = 0.05 # 段落节点权重(在图搜索&PPR计算中的权重当搜索仅使用DPR时此参数不起作用
ent_filter_top_k = 10 # 实体过滤TopK
ppr_damping = 0.8 # PPR阻尼系数
res_top_k = 3 # 最终提供的文段TopK
[persistence]
# 持久化配置(存储中间数据,防止重复计算)
data_root_path = "data" # 数据根目录
imported_data_path = "data/imported_lpmm_data" # 转换为json的raw文件数据路径
openie_data_path = "data/openie" # OpenIE数据路径
embedding_data_dir = "data/embedding" # 嵌入数据目录
rag_data_dir = "data/rag" # RAG数据目录