全面更换orjson
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
import time
|
||||
import random
|
||||
import json
|
||||
import orjson
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
@@ -558,7 +558,7 @@ class ExpressionLearnerManager:
|
||||
continue
|
||||
try:
|
||||
with open(expr_file, "r", encoding="utf-8") as f:
|
||||
expressions = json.load(f)
|
||||
expressions = orjson.loads(f.read())
|
||||
|
||||
if not isinstance(expressions, list):
|
||||
logger.warning(f"表达方式文件格式错误,跳过: {expr_file}")
|
||||
@@ -604,7 +604,7 @@ class ExpressionLearnerManager:
|
||||
|
||||
migrated_count += 1
|
||||
logger.info(f"已迁移 {expr_file} 到数据库,包含 {len(expressions)} 个表达方式")
|
||||
except json.JSONDecodeError as e:
|
||||
except orjson.JSONDecodeError as e:
|
||||
logger.error(f"JSON解析失败 {expr_file}: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"迁移表达方式 {expr_file} 失败: {e}")
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import json
|
||||
import orjson
|
||||
import time
|
||||
import random
|
||||
import hashlib
|
||||
@@ -304,7 +304,7 @@ class ExpressionSelector:
|
||||
# 5. 解析结果
|
||||
result = repair_json(content)
|
||||
if isinstance(result, str):
|
||||
result = json.loads(result)
|
||||
result = orjson.loads(result)
|
||||
|
||||
if not isinstance(result, dict) or "selected_situations" not in result:
|
||||
logger.error("LLM返回格式错误")
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from dataclasses import dataclass
|
||||
import json
|
||||
import orjson
|
||||
import os
|
||||
import math
|
||||
import asyncio
|
||||
@@ -277,8 +277,11 @@ class EmbeddingStore:
|
||||
test_vectors[str(idx)] = self._get_embedding(s)
|
||||
|
||||
with open(self.get_test_file_path(), "w", encoding="utf-8") as f:
|
||||
json.dump(test_vectors, f, ensure_ascii=False, indent=2)
|
||||
|
||||
f.write(orjson.dumps(
|
||||
test_vectors,
|
||||
option=orjson.OPT_INDENT_2
|
||||
).decode('utf-8'))
|
||||
|
||||
logger.info("测试字符串嵌入向量保存完成")
|
||||
|
||||
def load_embedding_test_vectors(self):
|
||||
@@ -287,7 +290,7 @@ class EmbeddingStore:
|
||||
if not os.path.exists(path):
|
||||
return None
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
return orjson.loads(f.read())
|
||||
|
||||
def check_embedding_model_consistency(self):
|
||||
"""校验当前模型与本地嵌入模型是否一致(使用多线程优化)"""
|
||||
@@ -416,7 +419,9 @@ class EmbeddingStore:
|
||||
logger.info(f"{self.namespace}嵌入库的FaissIndex保存成功")
|
||||
logger.info(f"正在保存{self.namespace}嵌入库的idx2hash映射到文件{self.idx2hash_file_path}")
|
||||
with open(self.idx2hash_file_path, "w", encoding="utf-8") as f:
|
||||
f.write(json.dumps(self.idx2hash, ensure_ascii=False, indent=4))
|
||||
f.write(orjson.dumps(
|
||||
self.idx2hash, option=orjson.OPT_INDENT_2
|
||||
).decode('utf-8'))
|
||||
logger.info(f"{self.namespace}嵌入库的idx2hash映射保存成功")
|
||||
|
||||
def load_from_file(self) -> None:
|
||||
@@ -457,7 +462,7 @@ class EmbeddingStore:
|
||||
logger.info(f"正在加载{self.namespace}嵌入库的idx2hash映射...")
|
||||
logger.debug(f"正在从文件{self.idx2hash_file_path}中加载{self.namespace}嵌入库的idx2hash映射")
|
||||
with open(self.idx2hash_file_path, "r") as f:
|
||||
self.idx2hash = json.load(f)
|
||||
self.idx2hash = orjson.loads(f.read())
|
||||
logger.info(f"{self.namespace}嵌入库的idx2hash映射加载成功")
|
||||
else:
|
||||
raise Exception(f"文件{self.idx2hash_file_path}不存在")
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import asyncio
|
||||
import json
|
||||
import orjson
|
||||
import time
|
||||
from typing import List, Union
|
||||
|
||||
@@ -20,7 +20,7 @@ def _extract_json_from_text(text: str):
|
||||
try:
|
||||
fixed_json = repair_json(text)
|
||||
if isinstance(fixed_json, str):
|
||||
parsed_json = json.loads(fixed_json)
|
||||
parsed_json = orjson.loads(fixed_json)
|
||||
else:
|
||||
parsed_json = fixed_json
|
||||
|
||||
@@ -95,9 +95,10 @@ def _entity_extract(llm_req: LLMRequest, paragraph: str) -> List[str]:
|
||||
def _rdf_triple_extract(llm_req: LLMRequest, paragraph: str, entities: list) -> List[List[str]]:
|
||||
"""对段落进行实体提取,返回提取出的实体列表(JSON格式)"""
|
||||
rdf_extract_context = prompt_template.build_rdf_triple_extract_context(
|
||||
paragraph, entities=json.dumps(entities, ensure_ascii=False)
|
||||
paragraph, entities=orjson.dumps(entities).decode('utf-8')
|
||||
)
|
||||
|
||||
|
||||
# 使用 asyncio.run 来运行异步方法
|
||||
try:
|
||||
# 如果当前已有事件循环在运行,使用它
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import json
|
||||
import orjson
|
||||
import os
|
||||
import time
|
||||
from typing import Dict, List, Tuple
|
||||
@@ -74,7 +74,7 @@ class KGManager:
|
||||
# 保存段落hash到文件
|
||||
with open(self.pg_hash_file_path, "w", encoding="utf-8") as f:
|
||||
data = {"stored_paragraph_hashes": list(self.stored_paragraph_hashes)}
|
||||
f.write(json.dumps(data, ensure_ascii=False, indent=4))
|
||||
f.write(orjson.dumps(data, option=orjson.OPT_INDENT_2).decode('utf-8'))
|
||||
|
||||
def load_from_file(self):
|
||||
"""从文件加载KG数据"""
|
||||
@@ -88,7 +88,7 @@ class KGManager:
|
||||
|
||||
# 加载段落hash
|
||||
with open(self.pg_hash_file_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
data = orjson.loads(f.read())
|
||||
self.stored_paragraph_hashes = set(data["stored_paragraph_hashes"])
|
||||
|
||||
# 加载实体计数
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import json
|
||||
import orjson
|
||||
import os
|
||||
import glob
|
||||
from typing import Any, Dict, List
|
||||
@@ -113,7 +113,7 @@ class OpenIE:
|
||||
data_list = []
|
||||
for file in json_files:
|
||||
with open(file, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
data = orjson.loads(f.read())
|
||||
data_list.append(data)
|
||||
if not data_list:
|
||||
# print(f"111111111111111111111Root Path : \n{ROOT_PATH}")
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import json
|
||||
import orjson
|
||||
from json_repair import repair_json
|
||||
|
||||
|
||||
@@ -45,7 +45,7 @@ def fix_broken_generated_json(json_str: str) -> str:
|
||||
- Iterating over the JSON string once to determine and fix unclosed braces or brackets.
|
||||
- Ensuring braces and brackets inside string literals are not considered.
|
||||
|
||||
If the original json_str string can be successfully loaded by json.loads(), will directly return it without any modification.
|
||||
If the original json_str string can be successfully loaded by orjson.loads(), will directly return it without any modification.
|
||||
|
||||
Args:
|
||||
json_str (str): The malformed JSON string to be fixed.
|
||||
@@ -56,9 +56,9 @@ def fix_broken_generated_json(json_str: str) -> str:
|
||||
|
||||
try:
|
||||
# Try to load the JSON to see if it is valid
|
||||
json.loads(json_str)
|
||||
orjson.loads(json_str)
|
||||
return json_str # Return as-is if valid
|
||||
except json.JSONDecodeError: ...
|
||||
except orjson.JSONDecodeError: ...
|
||||
|
||||
# Step 1: Remove trailing content after the last comma.
|
||||
last_comma_index = json_str.rfind(",")
|
||||
@@ -80,7 +80,7 @@ def new_fix_broken_generated_json(json_str: str) -> str:
|
||||
"""
|
||||
使用 json-repair 库修复格式错误的 JSON 字符串。
|
||||
|
||||
如果原始 json_str 字符串可以被 json.loads() 成功加载,则直接返回而不进行任何修改。
|
||||
如果原始 json_str 字符串可以被 orjson.loads() 成功加载,则直接返回而不进行任何修改。
|
||||
|
||||
参数:
|
||||
json_str (str): 需要修复的格式错误的 JSON 字符串。
|
||||
@@ -90,8 +90,8 @@ def new_fix_broken_generated_json(json_str: str) -> str:
|
||||
"""
|
||||
try:
|
||||
# 尝试加载 JSON 以查看其是否有效
|
||||
json.loads(json_str)
|
||||
orjson.loads(json_str)
|
||||
return json_str # 如果有效则按原样返回
|
||||
except json.JSONDecodeError:
|
||||
except orjson.JSONDecodeError:
|
||||
# 如果无效,则尝试修复它
|
||||
return repair_json(json_str)
|
||||
|
||||
@@ -4,7 +4,7 @@ import math
|
||||
import random
|
||||
import time
|
||||
import re
|
||||
import json
|
||||
import orjson
|
||||
import jieba
|
||||
import networkx as nx
|
||||
import numpy as np
|
||||
@@ -912,7 +912,7 @@ class EntorhinalCortex:
|
||||
# 将memory_items转换为JSON字符串
|
||||
try:
|
||||
memory_items = [str(item) for item in memory_items]
|
||||
memory_items_json = json.dumps(memory_items, ensure_ascii=False)
|
||||
memory_items_json = orjson.dumps(memory_items).decode("utf-8")
|
||||
if not memory_items_json:
|
||||
continue
|
||||
except Exception:
|
||||
@@ -1082,7 +1082,7 @@ class EntorhinalCortex:
|
||||
|
||||
try:
|
||||
memory_items = [str(item) for item in memory_items]
|
||||
if memory_items_json := json.dumps(memory_items, ensure_ascii=False):
|
||||
if memory_items_json := orjson.dumps(memory_items).decode("utf-8"):
|
||||
nodes_data.append(
|
||||
{
|
||||
"concept": concept,
|
||||
@@ -1156,7 +1156,7 @@ class EntorhinalCortex:
|
||||
for node in nodes:
|
||||
concept = node.concept
|
||||
try:
|
||||
memory_items = json.loads(node.memory_items)
|
||||
memory_items = orjson.loads(node.memory_items)
|
||||
if not isinstance(memory_items, list):
|
||||
memory_items = [memory_items] if memory_items else []
|
||||
|
||||
|
||||
@@ -356,10 +356,12 @@ def main():
|
||||
result = diagnostics.run_full_diagnosis()
|
||||
|
||||
# 保存诊断结果
|
||||
import json
|
||||
import orjson
|
||||
with open("action_diagnosis_results.json", "w", encoding="utf-8") as f:
|
||||
json.dump(result, f, indent=2, ensure_ascii=False, default=str)
|
||||
|
||||
f.write(orjson.dumps(
|
||||
result, option=orjson.OPT_INDENT_2).decode('utf-8')
|
||||
)
|
||||
|
||||
logger.info("📄 诊断结果已保存到: action_diagnosis_results.json")
|
||||
|
||||
# 根据诊断结果返回适当的退出代码
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import time
|
||||
import re
|
||||
import json
|
||||
import orjson
|
||||
import ast
|
||||
import traceback
|
||||
|
||||
@@ -69,7 +69,7 @@ class InstantMemory:
|
||||
return None
|
||||
try:
|
||||
repaired = repair_json(response)
|
||||
result = json.loads(repaired)
|
||||
result = orjson.loads(repaired)
|
||||
memory_text = result.get("memory_text", "")
|
||||
keywords = result.get("keywords", "")
|
||||
if isinstance(keywords, str):
|
||||
@@ -142,7 +142,7 @@ class InstantMemory:
|
||||
return None
|
||||
try:
|
||||
repaired = repair_json(response)
|
||||
result = json.loads(repaired)
|
||||
result = orjson.loads(repaired)
|
||||
# 解析keywords
|
||||
keywords = result.get("keywords", "")
|
||||
if isinstance(keywords, str):
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import difflib
|
||||
import json
|
||||
import orjson
|
||||
|
||||
from json_repair import repair_json
|
||||
from typing import List, Dict
|
||||
@@ -30,7 +30,7 @@ def get_keywords_from_json(json_str) -> List:
|
||||
fixed_json = repair_json(json_str)
|
||||
|
||||
# 如果repair_json返回的是字符串,需要解析为Python对象
|
||||
result = json.loads(fixed_json) if isinstance(fixed_json, str) else fixed_json
|
||||
result = orjson.loads(fixed_json) if isinstance(fixed_json, str) else fixed_json
|
||||
return result.get("keywords", [])
|
||||
except Exception as e:
|
||||
logger.error(f"解析关键词JSON失败: {e}")
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import re
|
||||
import traceback
|
||||
import json
|
||||
import orjson
|
||||
from typing import Union
|
||||
|
||||
from src.common.database.sqlalchemy_models import Messages, Images
|
||||
@@ -67,7 +67,7 @@ class MessageStorage:
|
||||
user_info_from_chat = chat_info_dict.get("user_info") or {}
|
||||
|
||||
# 将priority_info字典序列化为JSON字符串,以便存储到数据库的Text字段
|
||||
priority_info_json = json.dumps(priority_info) if priority_info else None
|
||||
priority_info_json = orjson.dumps(priority_info).decode('utf-8') if priority_info else None
|
||||
|
||||
# 获取数据库会话
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import json
|
||||
import orjson
|
||||
import time
|
||||
import traceback
|
||||
from typing import Dict, Any, Optional, Tuple
|
||||
@@ -264,7 +264,7 @@ class ActionPlanner:
|
||||
|
||||
if llm_content:
|
||||
try:
|
||||
parsed_json = json.loads(repair_json(llm_content))
|
||||
parsed_json = orjson.loads(repair_json(llm_content))
|
||||
|
||||
if isinstance(parsed_json, list):
|
||||
if parsed_json:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
错别字生成器 - 基于拼音和字频的中文错别字生成工具
|
||||
"""
|
||||
|
||||
import json
|
||||
import orjson
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
@@ -52,7 +52,7 @@ class ChineseTypoGenerator:
|
||||
# 如果缓存文件存在,直接加载
|
||||
if cache_file.exists():
|
||||
with open(cache_file, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
return orjson.loads(f.read())
|
||||
|
||||
# 使用内置的词频文件
|
||||
char_freq = defaultdict(int)
|
||||
@@ -73,7 +73,9 @@ class ChineseTypoGenerator:
|
||||
|
||||
# 保存到缓存文件
|
||||
with open(cache_file, "w", encoding="utf-8") as f:
|
||||
json.dump(normalized_freq, f, ensure_ascii=False, indent=2)
|
||||
f.write(orjson.dumps(
|
||||
normalized_freq, option=orjson.OPT_INDENT_2).decode('utf-8')
|
||||
)
|
||||
|
||||
return normalized_freq
|
||||
|
||||
|
||||
Reference in New Issue
Block a user