import orjson from json_repair import repair_json def _find_unclosed(json_str): """ Identifies the unclosed braces and brackets in the JSON string. Args: json_str (str): The JSON string to analyze. Returns: list: A list of unclosed elements in the order they were opened. """ unclosed = [] inside_string = False escape_next = False for char in json_str: if inside_string: if escape_next: escape_next = False elif char == "\\": escape_next = True elif char == '"': inside_string = False else: if char == '"': inside_string = True elif char in "{[": unclosed.append(char) elif char in "}]": if unclosed and ((char == "}" and unclosed[-1] == "{") or (char == "]" and unclosed[-1] == "[")): unclosed.pop() return unclosed # The following code is used to fix a broken JSON string. # From HippoRAG2 (GitHub: OSU-NLP-Group/HippoRAG) def fix_broken_generated_json(json_str: str) -> str: """ Fixes a malformed JSON string by: - Removing the last comma and any trailing content. - Iterating over the JSON string once to determine and fix unclosed braces or brackets. - Ensuring braces and brackets inside string literals are not considered. If the original json_str string can be successfully loaded by orjson.loads(), will directly return it without any modification. Args: json_str (str): The malformed JSON string to be fixed. Returns: str: The corrected JSON string. """ try: # Try to load the JSON to see if it is valid orjson.loads(json_str) return json_str # Return as-is if valid except orjson.JSONDecodeError: ... # Step 1: Remove trailing content after the last comma. last_comma_index = json_str.rfind(",") if last_comma_index != -1: json_str = json_str[:last_comma_index] # Step 2: Identify unclosed braces and brackets. unclosed_elements = _find_unclosed(json_str) # Step 3: Append the necessary closing elements in reverse order of opening. closing_map = {"{": "}", "[": "]"} for open_char in reversed(unclosed_elements): json_str += closing_map[open_char] return json_str def new_fix_broken_generated_json(json_str: str) -> str: """ 使用 json-repair 库修复格式错误的 JSON 字符串。 如果原始 json_str 字符串可以被 orjson.loads() 成功加载,则直接返回而不进行任何修改。 参数: json_str (str): 需要修复的格式错误的 JSON 字符串。 返回: str: 修复后的 JSON 字符串。 """ try: # 尝试加载 JSON 以查看其是否有效 orjson.loads(json_str) return json_str # 如果有效则按原样返回 except orjson.JSONDecodeError: # 如果无效,则尝试修复它 return repair_json(json_str)