style: 格式化代码
This commit is contained in:
@@ -40,6 +40,7 @@ file_lock = Lock()
|
||||
|
||||
# --- 缓存清理 ---
|
||||
|
||||
|
||||
def clear_cache():
|
||||
"""清理 lpmm_learning_tool.py 生成的缓存文件"""
|
||||
logger.info("--- 开始清理缓存 ---")
|
||||
@@ -53,6 +54,7 @@ def clear_cache():
|
||||
logger.info("缓存目录不存在,无需清理。")
|
||||
logger.info("--- 缓存清理完成 ---")
|
||||
|
||||
|
||||
# --- 模块一:数据预处理 ---
|
||||
|
||||
|
||||
@@ -108,7 +110,7 @@ def _parse_and_repair_json(json_string: str) -> Optional[dict]:
|
||||
cleaned_string = cleaned_string[7:].strip()
|
||||
elif cleaned_string.startswith("```"):
|
||||
cleaned_string = cleaned_string[3:].strip()
|
||||
|
||||
|
||||
if cleaned_string.endswith("```"):
|
||||
cleaned_string = cleaned_string[:-3].strip()
|
||||
|
||||
@@ -117,7 +119,7 @@ def _parse_and_repair_json(json_string: str) -> Optional[dict]:
|
||||
return orjson.loads(cleaned_string)
|
||||
except orjson.JSONDecodeError:
|
||||
logger.warning("直接解析JSON失败,将尝试修复...")
|
||||
|
||||
|
||||
# 3. 修复与最终解析
|
||||
repaired_json_str = ""
|
||||
try:
|
||||
@@ -164,10 +166,10 @@ async def extract_info_async(pg_hash, paragraph, llm_api):
|
||||
content = None
|
||||
try:
|
||||
content, (_, _, _) = await llm_api.generate_response_async(prompt)
|
||||
|
||||
|
||||
# 改进点:调用封装好的函数处理JSON解析和修复
|
||||
extracted_data = _parse_and_repair_json(content)
|
||||
|
||||
|
||||
if extracted_data is None:
|
||||
# 如果解析失败,抛出异常以触发统一的错误处理逻辑
|
||||
raise ValueError("无法从LLM输出中解析有效的JSON数据")
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
"""
|
||||
从现有ChromaDB数据重建JSON元数据索引
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
@@ -15,53 +16,53 @@ from src.common.logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
async def rebuild_metadata_index():
|
||||
"""从ChromaDB重建元数据索引"""
|
||||
print("="*80)
|
||||
print("=" * 80)
|
||||
print("重建JSON元数据索引")
|
||||
print("="*80)
|
||||
|
||||
print("=" * 80)
|
||||
|
||||
# 初始化记忆系统
|
||||
print("\n🔧 初始化记忆系统...")
|
||||
ms = MemorySystem()
|
||||
await ms.initialize()
|
||||
print("✅ 记忆系统已初始化")
|
||||
|
||||
if not hasattr(ms.unified_storage, 'metadata_index'):
|
||||
|
||||
if not hasattr(ms.unified_storage, "metadata_index"):
|
||||
print("❌ 元数据索引管理器未初始化")
|
||||
return
|
||||
|
||||
|
||||
# 获取所有记忆
|
||||
print("\n📥 从ChromaDB获取所有记忆...")
|
||||
from src.common.vector_db import vector_db_service
|
||||
|
||||
|
||||
try:
|
||||
# 获取集合中的所有记忆ID
|
||||
collection_name = ms.unified_storage.config.memory_collection
|
||||
result = vector_db_service.get(
|
||||
collection_name=collection_name,
|
||||
include=["documents", "metadatas", "embeddings"]
|
||||
collection_name=collection_name, include=["documents", "metadatas", "embeddings"]
|
||||
)
|
||||
|
||||
|
||||
if not result or not result.get("ids"):
|
||||
print("❌ ChromaDB中没有找到记忆数据")
|
||||
return
|
||||
|
||||
|
||||
ids = result["ids"]
|
||||
metadatas = result.get("metadatas", [])
|
||||
|
||||
|
||||
print(f"✅ 找到 {len(ids)} 条记忆")
|
||||
|
||||
|
||||
# 重建元数据索引
|
||||
print("\n🔨 开始重建元数据索引...")
|
||||
entries = []
|
||||
success_count = 0
|
||||
|
||||
for i, (memory_id, metadata) in enumerate(zip(ids, metadatas), 1):
|
||||
|
||||
for i, (memory_id, metadata) in enumerate(zip(ids, metadatas, strict=False), 1):
|
||||
try:
|
||||
# 从ChromaDB元数据重建索引条目
|
||||
import orjson
|
||||
|
||||
|
||||
entry = MemoryMetadataIndexEntry(
|
||||
memory_id=memory_id,
|
||||
user_id=metadata.get("user_id", "unknown"),
|
||||
@@ -75,9 +76,9 @@ async def rebuild_metadata_index():
|
||||
created_at=metadata.get("created_at", 0.0),
|
||||
access_count=metadata.get("access_count", 0),
|
||||
chat_id=metadata.get("chat_id"),
|
||||
content_preview=None
|
||||
content_preview=None,
|
||||
)
|
||||
|
||||
|
||||
# 尝试解析importance和confidence的枚举名称
|
||||
if "importance" in metadata:
|
||||
imp_str = metadata["importance"]
|
||||
@@ -89,7 +90,7 @@ async def rebuild_metadata_index():
|
||||
entry.importance = 3
|
||||
elif imp_str == "CRITICAL":
|
||||
entry.importance = 4
|
||||
|
||||
|
||||
if "confidence" in metadata:
|
||||
conf_str = metadata["confidence"]
|
||||
if conf_str == "LOW":
|
||||
@@ -100,40 +101,41 @@ async def rebuild_metadata_index():
|
||||
entry.confidence = 3
|
||||
elif conf_str == "VERIFIED":
|
||||
entry.confidence = 4
|
||||
|
||||
|
||||
entries.append(entry)
|
||||
success_count += 1
|
||||
|
||||
|
||||
if i % 100 == 0:
|
||||
print(f" 处理进度: {i}/{len(ids)} ({success_count} 成功)")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"处理记忆 {memory_id} 失败: {e}")
|
||||
continue
|
||||
|
||||
|
||||
print(f"\n✅ 成功解析 {success_count}/{len(ids)} 条记忆元数据")
|
||||
|
||||
|
||||
# 批量更新索引
|
||||
print("\n💾 保存元数据索引...")
|
||||
ms.unified_storage.metadata_index.batch_add_or_update(entries)
|
||||
ms.unified_storage.metadata_index.save()
|
||||
|
||||
|
||||
# 显示统计信息
|
||||
stats = ms.unified_storage.metadata_index.get_stats()
|
||||
print(f"\n📊 重建后的索引统计:")
|
||||
print("\n📊 重建后的索引统计:")
|
||||
print(f" - 总记忆数: {stats['total_memories']}")
|
||||
print(f" - 主语数量: {stats['subjects_count']}")
|
||||
print(f" - 关键词数量: {stats['keywords_count']}")
|
||||
print(f" - 标签数量: {stats['tags_count']}")
|
||||
print(f" - 类型分布:")
|
||||
for mtype, count in stats['types'].items():
|
||||
print(" - 类型分布:")
|
||||
for mtype, count in stats["types"].items():
|
||||
print(f" - {mtype}: {count}")
|
||||
|
||||
|
||||
print("\n✅ 元数据索引重建完成!")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"重建索引失败: {e}", exc_info=True)
|
||||
print(f"❌ 重建索引失败: {e}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(rebuild_metadata_index())
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
"""
|
||||
轻量烟雾测试:初始化 MemorySystem 并运行一次检索,验证 MemoryMetadata.source 访问不再报错
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
@@ -11,6 +12,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from src.chat.memory_system.memory_system import MemorySystem
|
||||
|
||||
|
||||
async def main():
|
||||
ms = MemorySystem()
|
||||
await ms.initialize()
|
||||
@@ -19,5 +21,6 @@ async def main():
|
||||
for i, m in enumerate(results, 1):
|
||||
print(f"{i}. id={m.metadata.memory_id} source={getattr(m.metadata, 'source', None)}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
|
||||
Reference in New Issue
Block a user