From 4f2cb56740f6ee214b38806a5ce275388e355622 Mon Sep 17 00:00:00 2001 From: tt-P607 <68868379+tt-P607@users.noreply.github.com> Date: Sat, 18 Oct 2025 23:38:03 +0800 Subject: [PATCH] =?UTF-8?q?refactor(learning):=20=E5=9C=A8=E7=BA=BF?= =?UTF-8?q?=E7=A8=8B=E6=B1=A0=E5=B7=A5=E4=BD=9C=E5=87=BD=E6=95=B0=E4=B8=AD?= =?UTF-8?q?=E5=AE=9E=E4=BE=8B=E5=8C=96LLMRequest?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 将 `LLMRequest` 对象的实例化从 `extract_information` 函数移动到线程池执行的 `extract_info_sync` 函数内部。 此更改确保每个工作线程都有自己独立的 `LLMRequest` 实例,避免了在多线程环境下共享同一实例可能引发的潜在并发问题。 --- scripts/lpmm_learning_tool.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/lpmm_learning_tool.py b/scripts/lpmm_learning_tool.py index 19b2baf7c..dc9bda57b 100644 --- a/scripts/lpmm_learning_tool.py +++ b/scripts/lpmm_learning_tool.py @@ -192,7 +192,8 @@ async def extract_info_async(pg_hash, paragraph, llm_api): return None, pg_hash -def extract_info_sync(pg_hash, paragraph, llm_api): +def extract_info_sync(pg_hash, paragraph, model_set): + llm_api = LLMRequest(model_set=model_set) return asyncio.run(extract_info_async(pg_hash, paragraph, llm_api)) @@ -201,12 +202,12 @@ def extract_information(paragraphs_dict, model_set): os.makedirs(OPENIE_OUTPUT_DIR, exist_ok=True) os.makedirs(TEMP_DIR, exist_ok=True) - llm_api = LLMRequest(model_set=model_set) failed_hashes, open_ie_docs = [], [] with ThreadPoolExecutor(max_workers=5) as executor: f_to_hash = { - executor.submit(extract_info_sync, p_hash, p, llm_api): p_hash for p_hash, p in paragraphs_dict.items() + executor.submit(extract_info_sync, p_hash, p, model_set): p_hash + for p_hash, p in paragraphs_dict.items() } with Progress( SpinnerColumn(),