feat: 尝试使用json_repair库代替HRAG的JSON修复
This commit is contained in:
@@ -1,76 +1,24 @@
|
||||
import json
|
||||
from json_repair import repair_json
|
||||
|
||||
# 以下代码用于修复损坏的 JSON 字符串。
|
||||
|
||||
def _find_unclosed(json_str):
|
||||
"""
|
||||
Identifies the unclosed braces and brackets in the JSON string.
|
||||
|
||||
Args:
|
||||
json_str (str): The JSON string to analyze.
|
||||
|
||||
Returns:
|
||||
list: A list of unclosed elements in the order they were opened.
|
||||
"""
|
||||
unclosed = []
|
||||
inside_string = False
|
||||
escape_next = False
|
||||
|
||||
for char in json_str:
|
||||
if inside_string:
|
||||
if escape_next:
|
||||
escape_next = False
|
||||
elif char == "\\":
|
||||
escape_next = True
|
||||
elif char == '"':
|
||||
inside_string = False
|
||||
else:
|
||||
if char == '"':
|
||||
inside_string = True
|
||||
elif char in "{[":
|
||||
unclosed.append(char)
|
||||
elif char in "}]":
|
||||
if unclosed and ((char == "}" and unclosed[-1] == "{") or (char == "]" and unclosed[-1] == "[")):
|
||||
unclosed.pop()
|
||||
|
||||
return unclosed
|
||||
|
||||
|
||||
# The following code is used to fix a broken JSON string.
|
||||
# From HippoRAG2 (GitHub: OSU-NLP-Group/HippoRAG)
|
||||
def fix_broken_generated_json(json_str: str) -> str:
|
||||
"""
|
||||
Fixes a malformed JSON string by:
|
||||
- Removing the last comma and any trailing content.
|
||||
- Iterating over the JSON string once to determine and fix unclosed braces or brackets.
|
||||
- Ensuring braces and brackets inside string literals are not considered.
|
||||
使用 json-repair 库修复格式错误的 JSON 字符串。
|
||||
|
||||
If the original json_str string can be successfully loaded by json.loads(), will directly return it without any modification.
|
||||
如果原始 json_str 字符串可以被 json.loads() 成功加载,则直接返回而不进行任何修改。
|
||||
|
||||
Args:
|
||||
json_str (str): The malformed JSON string to be fixed.
|
||||
参数:
|
||||
json_str (str): 需要修复的格式错误的 JSON 字符串。
|
||||
|
||||
Returns:
|
||||
str: The corrected JSON string.
|
||||
返回:
|
||||
str: 修复后的 JSON 字符串。
|
||||
"""
|
||||
|
||||
try:
|
||||
# Try to load the JSON to see if it is valid
|
||||
# 尝试加载 JSON 以查看其是否有效
|
||||
json.loads(json_str)
|
||||
return json_str # Return as-is if valid
|
||||
return json_str # 如果有效则按原样返回
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Step 1: Remove trailing content after the last comma.
|
||||
last_comma_index = json_str.rfind(",")
|
||||
if last_comma_index != -1:
|
||||
json_str = json_str[:last_comma_index]
|
||||
|
||||
# Step 2: Identify unclosed braces and brackets.
|
||||
unclosed_elements = _find_unclosed(json_str)
|
||||
|
||||
# Step 3: Append the necessary closing elements in reverse order of opening.
|
||||
closing_map = {"{": "}", "[": "]"}
|
||||
for open_char in reversed(unclosed_elements):
|
||||
json_str += closing_map[open_char]
|
||||
|
||||
return json_str
|
||||
# 如果无效,则尝试修复它
|
||||
return repair_json(json_str)
|
||||
|
||||
Reference in New Issue
Block a user