From 2bb7e512520ab7760870ccefc94b55eac3f0054d Mon Sep 17 00:00:00 2001 From: tt-P607 <68868379+tt-P607@users.noreply.github.com> Date: Sat, 18 Oct 2025 23:17:24 +0800 Subject: [PATCH] =?UTF-8?q?feat(knowledge):=20=E6=96=B0=E5=A2=9E=E9=97=AE?= =?UTF-8?q?=E7=AD=94=E6=AE=B5=E8=90=BD=E7=9B=B8=E4=BC=BC=E5=BA=A6=E9=98=88?= =?UTF-8?q?=E5=80=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 为知识库问答中的段落检索增加相似度阈值过滤功能。 此前,即使相似度很低的段落也会被返回,可能导致问答结果不准确。通过新增 `qa_paragraph_threshold` 配置项,可以过滤掉低于设定阈值的段落,从而提高问答的精准度和相关性。 --- src/chat/knowledge/qa_manager.py | 2 ++ src/config/official_configs.py | 1 + template/bot_config_template.toml | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/chat/knowledge/qa_manager.py b/src/chat/knowledge/qa_manager.py index b08fb24e0..56fa6275f 100644 --- a/src/chat/knowledge/qa_manager.py +++ b/src/chat/knowledge/qa_manager.py @@ -88,6 +88,8 @@ class QAManager: else: logger.info("未找到相关关系,将使用文段检索结果") result = paragraph_search_res + if result and result[0][1] < global_config.lpmm_knowledge.qa_paragraph_threshold: + result = [] ppr_node_weights = None # 过滤阈值 diff --git a/src/config/official_configs.py b/src/config/official_configs.py index c5968822f..3a7e5cac1 100644 --- a/src/config/official_configs.py +++ b/src/config/official_configs.py @@ -481,6 +481,7 @@ class LPMMKnowledgeConfig(ValidatedConfigBase): info_extraction_workers: int = Field(default=3, description="信息提取工作线程数") qa_relation_search_top_k: int = Field(default=10, description="QA关系搜索Top K") qa_relation_threshold: float = Field(default=0.75, description="QA关系阈值") + qa_paragraph_threshold: float = Field(default=0.3, description="QA段落阈值") qa_paragraph_search_top_k: int = Field(default=1000, description="QA段落搜索Top K") qa_paragraph_node_weight: float = Field(default=0.05, description="QA段落节点权重") qa_ent_filter_top_k: int = Field(default=10, description="QA实体过滤Top K") diff --git a/template/bot_config_template.toml b/template/bot_config_template.toml index 0d4581d07..75b9a6963 100644 --- a/template/bot_config_template.toml +++ b/template/bot_config_template.toml @@ -1,5 +1,5 @@ [inner] -version = "7.3.2" +version = "7.3.3" #----以下是给开发人员阅读的,如果你只是部署了MoFox-Bot,不需要阅读---- #如果你想要修改配置文件,请递增version的值 @@ -318,6 +318,7 @@ info_extraction_workers = 3 # 实体提取同时执行线程数,非Pro模型 qa_relation_search_top_k = 10 # 关系搜索TopK qa_relation_threshold = 0.5 # 关系阈值(相似度高于此阈值的关系会被认为是相关的关系) qa_paragraph_search_top_k = 1000 # 段落搜索TopK(不能过小,可能影响搜索结果) +qa_paragraph_threshold = 0.4 # 段落阈值(相似度高于此阈值的段落才会被认为是相关的) qa_paragraph_node_weight = 0.05 # 段落节点权重(在图搜索&PPR计算中的权重,当搜索仅使用DPR时,此参数不起作用) qa_ent_filter_top_k = 10 # 实体过滤TopK qa_ppr_damping = 0.8 # PPR阻尼系数