refactor(learning): 在线程池工作函数中实例化LLMRequest

将 `LLMRequest` 对象的实例化从 `extract_information` 函数移动到线程池执行的 `extract_info_sync` 函数内部。

此更改确保每个工作线程都有自己独立的 `LLMRequest` 实例,避免了在多线程环境下共享同一实例可能引发的潜在并发问题。
This commit is contained in:
tt-P607
2025-10-18 23:38:03 +08:00
parent 2bb7e51252
commit 4f2cb56740

View File

@@ -192,7 +192,8 @@ async def extract_info_async(pg_hash, paragraph, llm_api):
return None, pg_hash return None, pg_hash
def extract_info_sync(pg_hash, paragraph, llm_api): def extract_info_sync(pg_hash, paragraph, model_set):
llm_api = LLMRequest(model_set=model_set)
return asyncio.run(extract_info_async(pg_hash, paragraph, llm_api)) return asyncio.run(extract_info_async(pg_hash, paragraph, llm_api))
@@ -201,12 +202,12 @@ def extract_information(paragraphs_dict, model_set):
os.makedirs(OPENIE_OUTPUT_DIR, exist_ok=True) os.makedirs(OPENIE_OUTPUT_DIR, exist_ok=True)
os.makedirs(TEMP_DIR, exist_ok=True) os.makedirs(TEMP_DIR, exist_ok=True)
llm_api = LLMRequest(model_set=model_set)
failed_hashes, open_ie_docs = [], [] failed_hashes, open_ie_docs = [], []
with ThreadPoolExecutor(max_workers=5) as executor: with ThreadPoolExecutor(max_workers=5) as executor:
f_to_hash = { f_to_hash = {
executor.submit(extract_info_sync, p_hash, p, llm_api): p_hash for p_hash, p in paragraphs_dict.items() executor.submit(extract_info_sync, p_hash, p, model_set): p_hash
for p_hash, p in paragraphs_dict.items()
} }
with Progress( with Progress(
SpinnerColumn(), SpinnerColumn(),