From 7d547b7b801b3bc50b6c3e4d6820a577453e2410 Mon Sep 17 00:00:00 2001 From: Windpicker-owo <3431391539@qq.com> Date: Fri, 12 Dec 2025 15:09:00 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E4=BF=AE=E5=A4=8DJSON=E8=A7=A3?= =?UTF-8?q?=E6=9E=90=E9=97=AE=E9=A2=98=E5=B9=B6=E5=A2=9E=E5=8A=A0=E6=89=B9?= =?UTF-8?q?=E9=87=8F=E6=A0=87=E6=B3=A8=E5=A4=A7=E5=B0=8F=E8=87=B350?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chat/semantic_interest/dataset.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/chat/semantic_interest/dataset.py b/src/chat/semantic_interest/dataset.py index f2ff61a20..181788254 100644 --- a/src/chat/semantic_interest/dataset.py +++ b/src/chat/semantic_interest/dataset.py @@ -346,6 +346,8 @@ class DatasetGenerator: response = response.split("```")[1].split("```")[0].strip() # 解析JSON + import json_repair + response = json_repair.repair_json(response) data = json.loads(response) # 验证格式 @@ -416,7 +418,7 @@ class DatasetGenerator: messages: list[dict[str, Any]], persona_info: dict[str, Any], save_path: Path | None = None, - batch_size: int = 20, + batch_size: int = 50, ) -> list[dict[str, Any]]: """批量标注消息(真正的批量模式)