refactor(json_parser): 统一 LLM 响应的 JSON 解析逻辑,简化代码并提高解析成功率

This commit is contained in:
Windpicker-owo
2025-11-02 12:18:53 +08:00
parent bd1624a018
commit d1c3d2196a
8 changed files with 511 additions and 179 deletions

View File

@@ -15,6 +15,7 @@ from src.common.config_helpers import resolve_embedding_dimension
from src.common.data_models.bot_interest_data_model import BotInterestTag, BotPersonalityInterests, InterestMatchResult
from src.common.logger import get_logger
from src.config.config import global_config
from src.utils.json_parser import extract_and_parse_json
logger = get_logger("bot_interest_manager")
@@ -194,7 +195,10 @@ class BotInterestManager:
raise RuntimeError("❌ LLM未返回有效响应")
logger.info("✅ LLM响应成功开始解析兴趣标签...")
interests_data = orjson.loads(response)
# 使用统一的 JSON 解析工具
interests_data = extract_and_parse_json(response, strict=False)
if not interests_data or not isinstance(interests_data, dict):
raise RuntimeError("❌ 解析LLM响应失败未获取到有效的JSON数据")
bot_interests = BotPersonalityInterests(
personality_id=personality_id, personality_description=personality_description
@@ -225,9 +229,6 @@ class BotInterestManager:
logger.info("✅ 兴趣标签生成完成")
return bot_interests
except orjson.JSONDecodeError as e:
logger.error(f"❌ 解析LLM响应JSON失败: {e}")
raise
except Exception as e:
logger.error(f"❌ 根据人设生成兴趣标签失败: {e}")
traceback.print_exc()
@@ -270,9 +271,8 @@ class BotInterestManager:
if reasoning_content:
logger.debug(f"🧠 推理内容: {reasoning_content[:100]}...")
# 清理响应内容,移除可能的代码块标记
cleaned_response = self._clean_llm_response(response)
return cleaned_response
# 直接返回原始响应,后续使用统一的 JSON 解析工具
return response
else:
logger.warning("⚠️ LLM返回空响应或调用失败")
return None
@@ -283,25 +283,6 @@ class BotInterestManager:
traceback.print_exc()
return None
def _clean_llm_response(self, response: str) -> str:
"""清理LLM响应移除代码块标记和其他非JSON内容"""
import re
# 移除 ```json 和 ``` 标记
cleaned = re.sub(r"```json\s*", "", response)
cleaned = re.sub(r"\s*```", "", cleaned)
# 移除可能的多余空格和换行
cleaned = cleaned.strip()
# 尝试提取JSON对象如果响应中有其他文本
json_match = re.search(r"\{.*\}", cleaned, re.DOTALL)
if json_match:
cleaned = json_match.group(0)
logger.debug(f"🧹 清理后的响应: {cleaned[:200]}..." if len(cleaned) > 200 else f"🧹 清理后的响应: {cleaned}")
return cleaned
async def _generate_embeddings_for_tags(self, interests: BotPersonalityInterests):
"""为所有兴趣标签生成embedding"""
if not hasattr(self, "embedding_request"):

View File

@@ -11,6 +11,7 @@ import orjson
from src.chat.memory_system.memory_chunk import MemoryType
from src.common.logger import get_logger
from src.llm_models.utils_model import LLMRequest
from src.utils.json_parser import extract_and_parse_json
logger = get_logger(__name__)
@@ -58,16 +59,10 @@ class MemoryQueryPlanner:
try:
response, _ = await self.model.generate_response_async(prompt, temperature=0.2)
payload = self._extract_json_payload(response)
if not payload:
logger.debug("查询规划模型未返回结构化结果,使用默认规划")
return self._default_plan(query_text)
try:
data = orjson.loads(payload)
except orjson.JSONDecodeError as exc:
preview = payload[:200]
logger.warning("解析查询规划JSON失败: %s,片段: %s", exc, preview)
# 使用统一的 JSON 解析工具
data = extract_and_parse_json(response, strict=False)
if not data or not isinstance(data, dict):
logger.debug("查询规划模型未返回有效的结构化结果,使用默认规划")
return self._default_plan(query_text)
plan = self._parse_plan_dict(data, query_text)
@@ -205,24 +200,6 @@ class MemoryQueryPlanner:
请直接输出符合要求的 JSON 对象,禁止添加额外文本或 Markdown 代码块。
"""
def _extract_json_payload(self, response: str) -> str | None:
if not response:
return None
stripped = response.strip()
code_block_match = re.search(r"```(?:json)?\s*(.*?)```", stripped, re.IGNORECASE | re.DOTALL)
if code_block_match:
candidate = code_block_match.group(1).strip()
if candidate:
return candidate
start = stripped.find("{")
end = stripped.rfind("}")
if start != -1 and end != -1 and end > start:
return stripped[start : end + 1]
return stripped if stripped.startswith("{") and stripped.endswith("}") else None
@staticmethod
def _safe_str(value: Any) -> str:
if isinstance(value, str):

View File

@@ -19,6 +19,7 @@ from src.chat.memory_system.memory_builder import MemoryBuilder, MemoryExtractio
from src.chat.memory_system.memory_chunk import MemoryChunk
from src.chat.memory_system.memory_fusion import MemoryFusionEngine
from src.chat.memory_system.memory_query_planner import MemoryQueryPlanner
from src.utils.json_parser import extract_and_parse_json
# 全局背景任务集合
_background_tasks = set()
@@ -987,28 +988,7 @@ class MemorySystem:
return [chunk]
@staticmethod
def _extract_json_payload(response: str) -> str | None:
"""从模型响应中提取JSON部分兼容Markdown代码块等格式"""
if not response:
return None
stripped = response.strip()
# 优先处理Markdown代码块格式 ```json ... ```
code_block_match = re.search(r"```(?:json)?\s*(.*?)```", stripped, re.IGNORECASE | re.DOTALL)
if code_block_match:
candidate = code_block_match.group(1).strip()
if candidate:
return candidate
# 回退到查找第一个 JSON 对象的大括号范围
start = stripped.find("{")
end = stripped.rfind("}")
if start != -1 and end != -1 and end > start:
return stripped[start : end + 1].strip()
return stripped if stripped.startswith("{") and stripped.endswith("}") else None
# 已移除自定义的 _extract_json_payload 方法,统一使用 src.utils.json_parser.extract_and_parse_json
def _normalize_context(
self, raw_context: dict[str, Any] | None, user_id: str | None, timestamp: float | None
@@ -1414,13 +1394,13 @@ class MemorySystem:
return 0.5
response, _ = await self.value_assessment_model.generate_response_async(prompt, temperature=0.3)
# 解析响应
try:
payload = self._extract_json_payload(response)
if not payload:
raise ValueError("未在响应中找到有效的JSON负载")
# 解析响应 - 使用统一的 JSON 解析工具
result = extract_and_parse_json(response, strict=False)
if not result or not isinstance(result, dict):
logger.warning(f"解析价值评估响应失败,响应片段: {response[:200]}")
return 0.5 # 默认中等价值
result = orjson.loads(payload)
try:
value_score = float(result.get("value_score", 0.0))
reasoning = result.get("reasoning", "")
key_factors = result.get("key_factors", [])
@@ -1431,9 +1411,8 @@ class MemorySystem:
return max(0.0, min(1.0, value_score))
except (orjson.JSONDecodeError, ValueError) as e:
preview = response[:200].replace("\n", " ")
logger.warning(f"解析价值评估响应失败: {e}, 响应片段: {preview}")
except (ValueError, TypeError) as e:
logger.warning(f"解析价值评估数值失败: {e}")
return 0.5 # 默认中等价值
except Exception as e: