refactor(json_parser): 统一 LLM 响应的 JSON 解析逻辑,简化代码并提高解析成功率

This commit is contained in:
Windpicker-owo
2025-11-02 12:18:53 +08:00
parent 7235c681d8
commit 0e024d30c2
8 changed files with 511 additions and 179 deletions

View File

@@ -13,6 +13,7 @@ from src.common.logger import get_logger
from src.config.config import model_config
from src.llm_models.utils_model import LLMRequest
from src.plugin_system import BaseTool, ToolParamType
from src.utils.json_parser import extract_and_parse_json
logger = get_logger("chat_stream_impression_tool")
@@ -290,9 +291,11 @@ class ChatStreamImpressionTool(BaseTool):
logger.warning("LLM未返回有效响应")
return None
# 清理并解析响应
cleaned_response = self._clean_llm_json_response(llm_response)
response_data = json.loads(cleaned_response)
# 使用统一的 JSON 解析工具
response_data = extract_and_parse_json(llm_response, strict=False)
if not response_data or not isinstance(response_data, dict):
logger.warning("解析LLM响应失败")
return None
# 提取最终决定的数据
final_impression = {
@@ -373,35 +376,18 @@ class ChatStreamImpressionTool(BaseTool):
logger.error(f"更新聊天流印象到数据库失败: {e}", exc_info=True)
raise
def _clean_llm_json_response(self, response: str) -> str:
"""清理LLM响应移除可能的JSON格式标记
Args:
response: LLM原始响应
Returns:
str: 清理后的JSON字符串
# 已移除自定义的 _clean_llm_json_response 方法,统一使用 src.utils.json_parser.extract_and_parse_json
def _clean_llm_json_response_deprecated(self, response: str) -> str:
"""已废弃,保留仅用于兼容性
请使用 src.utils.json_parser.extract_and_parse_json 替代
"""
from src.utils.json_parser import extract_and_parse_json
try:
import re
cleaned = response.strip()
# 移除 ```json 或 ``` 等标记
cleaned = re.sub(r"^```(?:json)?\s*", "", cleaned, flags=re.MULTILINE | re.IGNORECASE)
cleaned = re.sub(r"\s*```$", "", cleaned, flags=re.MULTILINE)
# 尝试找到JSON对象的开始和结束
json_start = cleaned.find("{")
json_end = cleaned.rfind("}")
if json_start != -1 and json_end != -1 and json_end > json_start:
cleaned = cleaned[json_start : json_end + 1]
cleaned = cleaned.strip()
return cleaned
import json
result = extract_and_parse_json(response, strict=False)
return json.dumps(result) if result else response
except Exception as e:
logger.warning(f"清理LLM响应失败: {e}")
return response

View File

@@ -17,6 +17,7 @@ from src.config.config import global_config, model_config
from src.individuality.individuality import Individuality
from src.llm_models.utils_model import LLMRequest
from src.plugin_system.apis import message_api, send_api
from src.utils.json_parser import extract_and_parse_json
logger = get_logger("proactive_thinking_executor")
@@ -339,19 +340,17 @@ class ProactiveThinkingPlanner:
logger.warning("LLM未返回有效响应")
return None
# 清理并解析JSON响应
cleaned_response = self._clean_json_response(response)
decision = json.loads(cleaned_response)
# 使用统一的 JSON 解析工具
decision = extract_and_parse_json(response, strict=False)
if not decision or not isinstance(decision, dict):
logger.error("解析决策JSON失败")
if response:
logger.debug(f"原始响应: {response[:500]}")
return None
logger.info(f"决策结果: {decision.get('action', 'unknown')} - {decision.get('reasoning', '无理由')}")
return decision
except json.JSONDecodeError as e:
logger.error(f"解析决策JSON失败: {e}")
if response:
logger.debug(f"原始响应: {response}")
return None
except Exception as e:
logger.error(f"决策过程失败: {e}", exc_info=True)
return None
@@ -539,21 +538,7 @@ class ProactiveThinkingPlanner:
logger.warning(f"获取表达方式失败: {e}")
return ""
def _clean_json_response(self, response: str) -> str:
"""清理LLM响应中的JSON格式标记"""
import re
cleaned = response.strip()
cleaned = re.sub(r"^```(?:json)?\s*", "", cleaned, flags=re.MULTILINE | re.IGNORECASE)
cleaned = re.sub(r"\s*```$", "", cleaned, flags=re.MULTILINE)
json_start = cleaned.find("{")
json_end = cleaned.rfind("}")
if json_start != -1 and json_end != -1 and json_end > json_start:
cleaned = cleaned[json_start : json_end + 1]
return cleaned.strip()
# 已移除自定义的 _clean_json_response 方法,统一使用 src.utils.json_parser.extract_and_parse_json
# 全局规划器实例

View File

@@ -16,6 +16,7 @@ from src.common.logger import get_logger
from src.config.config import global_config, model_config
from src.llm_models.utils_model import LLMRequest
from src.plugin_system import BaseTool, ToolParamType
from src.utils.json_parser import extract_and_parse_json
logger = get_logger("user_profile_tool")
@@ -269,9 +270,12 @@ class UserProfileTool(BaseTool):
logger.warning("LLM未返回有效响应")
return None
# 清理并解析响应
cleaned_response = self._clean_llm_json_response(llm_response)
response_data = orjson.loads(cleaned_response)
# 使用统一的 JSON 解析工具
response_data = extract_and_parse_json(llm_response, strict=False)
if not response_data or not isinstance(response_data, dict):
logger.error("LLM响应JSON解析失败")
logger.debug(f"LLM原始响应: {llm_response[:500] if llm_response else 'N/A'}")
return None
# 提取最终决定的数据
final_profile = {
@@ -285,11 +289,6 @@ class UserProfileTool(BaseTool):
logger.debug(f"决策理由: {response_data.get('reasoning', '')}")
return final_profile
except orjson.JSONDecodeError as e:
logger.error(f"LLM响应JSON解析失败: {e}")
logger.debug(f"LLM原始响应: {llm_response if 'llm_response' in locals() else 'N/A'}")
return None
except Exception as e:
logger.error(f"LLM决策失败: {e}", exc_info=True)
return None
@@ -336,35 +335,4 @@ class UserProfileTool(BaseTool):
logger.error(f"更新用户画像到数据库失败: {e}", exc_info=True)
raise
def _clean_llm_json_response(self, response: str) -> str:
"""清理LLM响应移除可能的JSON格式标记
Args:
response: LLM原始响应
Returns:
str: 清理后的JSON字符串
"""
try:
import re
cleaned = response.strip()
# 移除 ```json 或 ``` 等标记
cleaned = re.sub(r"^```(?:json)?\s*", "", cleaned, flags=re.MULTILINE | re.IGNORECASE)
cleaned = re.sub(r"\s*```$", "", cleaned, flags=re.MULTILINE)
# 尝试找到JSON对象的开始和结束
json_start = cleaned.find("{")
json_end = cleaned.rfind("}")
if json_start != -1 and json_end != -1 and json_end > json_start:
cleaned = cleaned[json_start:json_end + 1]
cleaned = cleaned.strip()
return cleaned
except Exception as e:
logger.warning(f"清理LLM响应失败: {e}")
return response
# 已移除自定义的 _clean_llm_json_response 方法,统一使用 src.utils.json_parser.extract_and_parse_json