From 4f2cb56740f6ee214b38806a5ce275388e355622 Mon Sep 17 00:00:00 2001
From: tt-P607 <68868379+tt-P607@users.noreply.github.com>
Date: Sat, 18 Oct 2025 23:38:03 +0800
Subject: [PATCH] =?UTF-8?q?refactor(learning):=20=E5=9C=A8=E7=BA=BF?=
 =?UTF-8?q?=E7=A8=8B=E6=B1=A0=E5=B7=A5=E4=BD=9C=E5=87=BD=E6=95=B0=E4=B8=AD?=
 =?UTF-8?q?=E5=AE=9E=E4=BE=8B=E5=8C=96LLMRequest?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

将 `LLMRequest` 对象的实例化从 `extract_information` 函数移动到线程池执行的 `extract_info_sync` 函数内部。

此更改确保每个工作线程都有自己独立的 `LLMRequest` 实例，避免了在多线程环境下共享同一实例可能引发的潜在并发问题。
---
 scripts/lpmm_learning_tool.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/scripts/lpmm_learning_tool.py b/scripts/lpmm_learning_tool.py
index 19b2baf7c..dc9bda57b 100644
--- a/scripts/lpmm_learning_tool.py
+++ b/scripts/lpmm_learning_tool.py
@@ -192,7 +192,8 @@ async def extract_info_async(pg_hash, paragraph, llm_api):
         return None, pg_hash
 
 
-def extract_info_sync(pg_hash, paragraph, llm_api):
+def extract_info_sync(pg_hash, paragraph, model_set):
+    llm_api = LLMRequest(model_set=model_set)
     return asyncio.run(extract_info_async(pg_hash, paragraph, llm_api))
 
 
@@ -201,12 +202,12 @@ def extract_information(paragraphs_dict, model_set):
     os.makedirs(OPENIE_OUTPUT_DIR, exist_ok=True)
     os.makedirs(TEMP_DIR, exist_ok=True)
 
-    llm_api = LLMRequest(model_set=model_set)
     failed_hashes, open_ie_docs = [], []
 
     with ThreadPoolExecutor(max_workers=5) as executor:
         f_to_hash = {
-            executor.submit(extract_info_sync, p_hash, p, llm_api): p_hash for p_hash, p in paragraphs_dict.items()
+            executor.submit(extract_info_sync, p_hash, p, model_set): p_hash
+            for p_hash, p in paragraphs_dict.items()
         }
         with Progress(
             SpinnerColumn(),