refactor(json_parser): 统一 LLM 响应的 JSON 解析逻辑,简化代码并提高解析成功率
This commit is contained in:
@@ -15,6 +15,7 @@ from src.common.config_helpers import resolve_embedding_dimension
|
||||
from src.common.data_models.bot_interest_data_model import BotInterestTag, BotPersonalityInterests, InterestMatchResult
|
||||
from src.common.logger import get_logger
|
||||
from src.config.config import global_config
|
||||
from src.utils.json_parser import extract_and_parse_json
|
||||
|
||||
logger = get_logger("bot_interest_manager")
|
||||
|
||||
@@ -194,7 +195,10 @@ class BotInterestManager:
|
||||
raise RuntimeError("❌ LLM未返回有效响应")
|
||||
|
||||
logger.info("✅ LLM响应成功,开始解析兴趣标签...")
|
||||
interests_data = orjson.loads(response)
|
||||
# 使用统一的 JSON 解析工具
|
||||
interests_data = extract_and_parse_json(response, strict=False)
|
||||
if not interests_data or not isinstance(interests_data, dict):
|
||||
raise RuntimeError("❌ 解析LLM响应失败,未获取到有效的JSON数据")
|
||||
|
||||
bot_interests = BotPersonalityInterests(
|
||||
personality_id=personality_id, personality_description=personality_description
|
||||
@@ -225,9 +229,6 @@ class BotInterestManager:
|
||||
logger.info("✅ 兴趣标签生成完成")
|
||||
return bot_interests
|
||||
|
||||
except orjson.JSONDecodeError as e:
|
||||
logger.error(f"❌ 解析LLM响应JSON失败: {e}")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"❌ 根据人设生成兴趣标签失败: {e}")
|
||||
traceback.print_exc()
|
||||
@@ -270,9 +271,8 @@ class BotInterestManager:
|
||||
if reasoning_content:
|
||||
logger.debug(f"🧠 推理内容: {reasoning_content[:100]}...")
|
||||
|
||||
# 清理响应内容,移除可能的代码块标记
|
||||
cleaned_response = self._clean_llm_response(response)
|
||||
return cleaned_response
|
||||
# 直接返回原始响应,后续使用统一的 JSON 解析工具
|
||||
return response
|
||||
else:
|
||||
logger.warning("⚠️ LLM返回空响应或调用失败")
|
||||
return None
|
||||
@@ -283,25 +283,6 @@ class BotInterestManager:
|
||||
traceback.print_exc()
|
||||
return None
|
||||
|
||||
def _clean_llm_response(self, response: str) -> str:
|
||||
"""清理LLM响应,移除代码块标记和其他非JSON内容"""
|
||||
import re
|
||||
|
||||
# 移除 ```json 和 ``` 标记
|
||||
cleaned = re.sub(r"```json\s*", "", response)
|
||||
cleaned = re.sub(r"\s*```", "", cleaned)
|
||||
|
||||
# 移除可能的多余空格和换行
|
||||
cleaned = cleaned.strip()
|
||||
|
||||
# 尝试提取JSON对象(如果响应中有其他文本)
|
||||
json_match = re.search(r"\{.*\}", cleaned, re.DOTALL)
|
||||
if json_match:
|
||||
cleaned = json_match.group(0)
|
||||
|
||||
logger.debug(f"🧹 清理后的响应: {cleaned[:200]}..." if len(cleaned) > 200 else f"🧹 清理后的响应: {cleaned}")
|
||||
return cleaned
|
||||
|
||||
async def _generate_embeddings_for_tags(self, interests: BotPersonalityInterests):
|
||||
"""为所有兴趣标签生成embedding"""
|
||||
if not hasattr(self, "embedding_request"):
|
||||
|
||||
@@ -11,6 +11,7 @@ import orjson
|
||||
from src.chat.memory_system.memory_chunk import MemoryType
|
||||
from src.common.logger import get_logger
|
||||
from src.llm_models.utils_model import LLMRequest
|
||||
from src.utils.json_parser import extract_and_parse_json
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@@ -58,16 +59,10 @@ class MemoryQueryPlanner:
|
||||
|
||||
try:
|
||||
response, _ = await self.model.generate_response_async(prompt, temperature=0.2)
|
||||
payload = self._extract_json_payload(response)
|
||||
if not payload:
|
||||
logger.debug("查询规划模型未返回结构化结果,使用默认规划")
|
||||
return self._default_plan(query_text)
|
||||
|
||||
try:
|
||||
data = orjson.loads(payload)
|
||||
except orjson.JSONDecodeError as exc:
|
||||
preview = payload[:200]
|
||||
logger.warning("解析查询规划JSON失败: %s,片段: %s", exc, preview)
|
||||
# 使用统一的 JSON 解析工具
|
||||
data = extract_and_parse_json(response, strict=False)
|
||||
if not data or not isinstance(data, dict):
|
||||
logger.debug("查询规划模型未返回有效的结构化结果,使用默认规划")
|
||||
return self._default_plan(query_text)
|
||||
|
||||
plan = self._parse_plan_dict(data, query_text)
|
||||
@@ -205,24 +200,6 @@ class MemoryQueryPlanner:
|
||||
请直接输出符合要求的 JSON 对象,禁止添加额外文本或 Markdown 代码块。
|
||||
"""
|
||||
|
||||
def _extract_json_payload(self, response: str) -> str | None:
|
||||
if not response:
|
||||
return None
|
||||
|
||||
stripped = response.strip()
|
||||
code_block_match = re.search(r"```(?:json)?\s*(.*?)```", stripped, re.IGNORECASE | re.DOTALL)
|
||||
if code_block_match:
|
||||
candidate = code_block_match.group(1).strip()
|
||||
if candidate:
|
||||
return candidate
|
||||
|
||||
start = stripped.find("{")
|
||||
end = stripped.rfind("}")
|
||||
if start != -1 and end != -1 and end > start:
|
||||
return stripped[start : end + 1]
|
||||
|
||||
return stripped if stripped.startswith("{") and stripped.endswith("}") else None
|
||||
|
||||
@staticmethod
|
||||
def _safe_str(value: Any) -> str:
|
||||
if isinstance(value, str):
|
||||
|
||||
@@ -19,6 +19,7 @@ from src.chat.memory_system.memory_builder import MemoryBuilder, MemoryExtractio
|
||||
from src.chat.memory_system.memory_chunk import MemoryChunk
|
||||
from src.chat.memory_system.memory_fusion import MemoryFusionEngine
|
||||
from src.chat.memory_system.memory_query_planner import MemoryQueryPlanner
|
||||
from src.utils.json_parser import extract_and_parse_json
|
||||
|
||||
# 全局背景任务集合
|
||||
_background_tasks = set()
|
||||
@@ -987,28 +988,7 @@ class MemorySystem:
|
||||
|
||||
return [chunk]
|
||||
|
||||
@staticmethod
|
||||
def _extract_json_payload(response: str) -> str | None:
|
||||
"""从模型响应中提取JSON部分,兼容Markdown代码块等格式"""
|
||||
if not response:
|
||||
return None
|
||||
|
||||
stripped = response.strip()
|
||||
|
||||
# 优先处理Markdown代码块格式 ```json ... ```
|
||||
code_block_match = re.search(r"```(?:json)?\s*(.*?)```", stripped, re.IGNORECASE | re.DOTALL)
|
||||
if code_block_match:
|
||||
candidate = code_block_match.group(1).strip()
|
||||
if candidate:
|
||||
return candidate
|
||||
|
||||
# 回退到查找第一个 JSON 对象的大括号范围
|
||||
start = stripped.find("{")
|
||||
end = stripped.rfind("}")
|
||||
if start != -1 and end != -1 and end > start:
|
||||
return stripped[start : end + 1].strip()
|
||||
|
||||
return stripped if stripped.startswith("{") and stripped.endswith("}") else None
|
||||
# 已移除自定义的 _extract_json_payload 方法,统一使用 src.utils.json_parser.extract_and_parse_json
|
||||
|
||||
def _normalize_context(
|
||||
self, raw_context: dict[str, Any] | None, user_id: str | None, timestamp: float | None
|
||||
@@ -1414,13 +1394,13 @@ class MemorySystem:
|
||||
return 0.5
|
||||
response, _ = await self.value_assessment_model.generate_response_async(prompt, temperature=0.3)
|
||||
|
||||
# 解析响应
|
||||
try:
|
||||
payload = self._extract_json_payload(response)
|
||||
if not payload:
|
||||
raise ValueError("未在响应中找到有效的JSON负载")
|
||||
# 解析响应 - 使用统一的 JSON 解析工具
|
||||
result = extract_and_parse_json(response, strict=False)
|
||||
if not result or not isinstance(result, dict):
|
||||
logger.warning(f"解析价值评估响应失败,响应片段: {response[:200]}")
|
||||
return 0.5 # 默认中等价值
|
||||
|
||||
result = orjson.loads(payload)
|
||||
try:
|
||||
value_score = float(result.get("value_score", 0.0))
|
||||
reasoning = result.get("reasoning", "")
|
||||
key_factors = result.get("key_factors", [])
|
||||
@@ -1431,9 +1411,8 @@ class MemorySystem:
|
||||
|
||||
return max(0.0, min(1.0, value_score))
|
||||
|
||||
except (orjson.JSONDecodeError, ValueError) as e:
|
||||
preview = response[:200].replace("\n", " ")
|
||||
logger.warning(f"解析价值评估响应失败: {e}, 响应片段: {preview}")
|
||||
except (ValueError, TypeError) as e:
|
||||
logger.warning(f"解析价值评估数值失败: {e}")
|
||||
return 0.5 # 默认中等价值
|
||||
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user