fix: 修复代码质量和隐形问题 - 移除未使用导入、修复asyncio任务引用、修复类型注解

Co-authored-by: Windpicker-owo <221029311+Windpicker-owo@users.noreply.github.com>
2025-11-26 14:43:44 +00:00
parent 1c5028e719
commit 1ca3aa6a07
5 changed files with 114 additions and 118 deletions
--- a/scripts/generate_missing_embeddings.py
+++ b/scripts/generate_missing_embeddings.py
@@ -10,7 +10,7 @@

 使用方法:
    python scripts/generate_missing_embeddings.py [--node-types TOPIC,OBJECT] [--batch-size 50]
-    
+
 参数说明:
    --node-types: 需要生成嵌入的节点类型，默认为 TOPIC,OBJECT
    --batch-size: 批量处理大小，默认为 50
@@ -25,7 +25,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent))


 async def generate_missing_embeddings(
-    target_node_types: list[str] = None,
+    target_node_types: list[str] | None = None,
    batch_size: int = 50,
 ):
    """
--- a/scripts/lpmm_learning_tool.py
+++ b/scripts/lpmm_learning_tool.py
@@ -174,12 +174,12 @@ def get_extraction_prompt(paragraph: str) -> str:
 async def extract_info_async(pg_hash, paragraph, llm_api):
    """
    异步提取单个段落的信息（带缓存支持）
-    
+
    Args:
        pg_hash: 段落哈希值
        paragraph: 段落文本
        llm_api: LLM请求实例
-    
+
    Returns:
        tuple: (doc_item或None, failed_hash或None)
    """
@@ -231,15 +231,15 @@ async def extract_info_async(pg_hash, paragraph, llm_api):
 async def extract_information(paragraphs_dict, model_set):
    """
    🔧 优化：使用真正的异步并发代替多线程
-    
+
    这样可以：
    1. 避免 event loop closed 错误
    2. 更高效地利用 I/O 资源
    3. 与我们优化的 LLM 请求层无缝集成
-    
+
    并发控制：
    - 使用信号量限制最大并发数为 5，防止触发 API 速率限制
-    
+
    Args:
        paragraphs_dict: {hash: paragraph} 字典
        model_set: 模型配置
@@ -307,8 +307,8 @@ async def extract_information(paragraphs_dict, model_set):
        now = datetime.datetime.now()
        filename = now.strftime("%Y-%m-%d-%H-%M-%S-openie.json")
        output_path = os.path.join(OPENIE_OUTPUT_DIR, filename)
-        with open(output_path, "wb") as f:
-            f.write(orjson.dumps(openie_obj._to_dict()))
+        async with aiofiles.open(output_path, "wb") as f:
+            await f.write(orjson.dumps(openie_obj._to_dict()))
        logger.info(f"信息提取结果已保存到: {output_path}")
        logger.info(f"成功提取 {len(open_ie_docs)} 个段落的信息")