🤖 自动格式化代码 [skip ci]

This commit is contained in:
github-actions[bot]
2025-05-28 12:51:51 +00:00
parent 460c7fb75a
commit 847bd23a62

View File

@@ -1,6 +1,7 @@
import os import os
import json import json
import sys # 新增系统模块导入 import sys # 新增系统模块导入
# import time # import time
import pickle import pickle
from pathlib import Path from pathlib import Path
@@ -22,7 +23,7 @@ from rich.progress import (
TaskProgressColumn, TaskProgressColumn,
TimeRemainingColumn, TimeRemainingColumn,
TimeElapsedColumn, TimeElapsedColumn,
SpinnerColumn SpinnerColumn,
) )
from rich.table import Table from rich.table import Table
from rich.panel import Panel from rich.panel import Panel
@@ -30,17 +31,29 @@ from rich.panel import Panel
from src.common.database.database import db from src.common.database.database import db
from src.common.database.database_model import ( from src.common.database.database_model import (
ChatStreams, LLMUsage, Emoji, Messages, Images, ImageDescriptions, ChatStreams,
PersonInfo, Knowledges, ThinkingLog, GraphNodes, GraphEdges LLMUsage,
Emoji,
Messages,
Images,
ImageDescriptions,
PersonInfo,
Knowledges,
ThinkingLog,
GraphNodes,
GraphEdges,
) )
from src.common.logger_manager import get_logger from src.common.logger_manager import get_logger
logger = get_logger("mongodb_to_sqlite") logger = get_logger("mongodb_to_sqlite")
ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
@dataclass @dataclass
class MigrationConfig: class MigrationConfig:
"""迁移配置类""" """迁移配置类"""
mongo_collection: str mongo_collection: str
target_model: Type[Model] target_model: Type[Model]
field_mapping: Dict[str, str] field_mapping: Dict[str, str]
@@ -56,6 +69,7 @@ class MigrationConfig:
@dataclass @dataclass
class MigrationCheckpoint: class MigrationCheckpoint:
"""迁移断点数据""" """迁移断点数据"""
collection_name: str collection_name: str
processed_count: int processed_count: int
last_processed_id: Any last_processed_id: Any
@@ -66,6 +80,7 @@ class MigrationCheckpoint:
@dataclass @dataclass
class MigrationStats: class MigrationStats:
"""迁移统计信息""" """迁移统计信息"""
total_documents: int = 0 total_documents: int = 0
processed_count: int = 0 processed_count: int = 0
success_count: int = 0 success_count: int = 0
@@ -80,12 +95,9 @@ class MigrationStats:
def add_error(self, doc_id: Any, error: str, doc_data: Optional[Dict] = None): def add_error(self, doc_id: Any, error: str, doc_data: Optional[Dict] = None):
"""添加错误记录""" """添加错误记录"""
self.errors.append({ self.errors.append(
'doc_id': str(doc_id), {"doc_id": str(doc_id), "error": error, "timestamp": datetime.now().isoformat(), "doc_data": doc_data}
'error': error, )
'timestamp': datetime.now().isoformat(),
'doc_data': doc_data
})
self.error_count += 1 self.error_count += 1
def add_validation_error(self, doc_id: Any, field: str, error: str): def add_validation_error(self, doc_id: Any, field: str, error: str):
@@ -120,11 +132,11 @@ class MongoToSQLiteMigrator:
if mongo_uri := os.getenv("MONGODB_URI"): if mongo_uri := os.getenv("MONGODB_URI"):
return mongo_uri return mongo_uri
user = os.getenv('MONGODB_USER') user = os.getenv("MONGODB_USER")
password = os.getenv('MONGODB_PASS') password = os.getenv("MONGODB_PASS")
host = os.getenv('MONGODB_HOST', 'localhost') host = os.getenv("MONGODB_HOST", "localhost")
port = os.getenv('MONGODB_PORT', '27017') port = os.getenv("MONGODB_PORT", "27017")
auth_source = os.getenv('MONGODB_AUTH_SOURCE', 'admin') auth_source = os.getenv("MONGODB_AUTH_SOURCE", "admin")
if user and password: if user and password:
return f"mongodb://{user}:{password}@{host}:{port}/{self.database_name}?authSource={auth_source}" return f"mongodb://{user}:{password}@{host}:{port}/{self.database_name}?authSource={auth_source}"
@@ -144,11 +156,11 @@ class MongoToSQLiteMigrator:
"description": "description", "description": "description",
"emotion": "emotion", "emotion": "emotion",
"usage_count": "usage_count", "usage_count": "usage_count",
"last_used_time": "last_used_time" "last_used_time": "last_used_time",
# record_time字段将在转换时自动设置为当前时间 # record_time字段将在转换时自动设置为当前时间
}, },
enable_validation=False, # 禁用数据验证 enable_validation=False, # 禁用数据验证
unique_fields=["full_path", "emoji_hash"] unique_fields=["full_path", "emoji_hash"],
), ),
# 聊天流迁移配置 # 聊天流迁移配置
MigrationConfig( MigrationConfig(
@@ -165,10 +177,10 @@ class MongoToSQLiteMigrator:
"user_info.platform": "user_platform", "user_info.platform": "user_platform",
"user_info.user_id": "user_id", "user_info.user_id": "user_id",
"user_info.user_nickname": "user_nickname", "user_info.user_nickname": "user_nickname",
"user_info.user_cardname": "user_cardname" "user_info.user_cardname": "user_cardname",
}, },
enable_validation=False, # 禁用数据验证 enable_validation=False, # 禁用数据验证
unique_fields=["stream_id"] unique_fields=["stream_id"],
), ),
# LLM使用记录迁移配置 # LLM使用记录迁移配置
MigrationConfig( MigrationConfig(
@@ -184,10 +196,10 @@ class MongoToSQLiteMigrator:
"total_tokens": "total_tokens", "total_tokens": "total_tokens",
"cost": "cost", "cost": "cost",
"status": "status", "status": "status",
"timestamp": "timestamp" "timestamp": "timestamp",
}, },
enable_validation=False, # 禁用数据验证 enable_validation=False, # 禁用数据验证
unique_fields=["user_id", "timestamp"] # 组合唯一性 unique_fields=["user_id", "timestamp"], # 组合唯一性
), ),
# 消息迁移配置 # 消息迁移配置
MigrationConfig( MigrationConfig(
@@ -214,12 +226,11 @@ class MongoToSQLiteMigrator:
"user_info.user_cardname": "user_cardname", "user_info.user_cardname": "user_cardname",
"processed_plain_text": "processed_plain_text", "processed_plain_text": "processed_plain_text",
"detailed_plain_text": "detailed_plain_text", "detailed_plain_text": "detailed_plain_text",
"memorized_times": "memorized_times" "memorized_times": "memorized_times",
}, },
enable_validation=False, # 禁用数据验证 enable_validation=False, # 禁用数据验证
unique_fields=["message_id"] unique_fields=["message_id"],
), ),
# 图片迁移配置 # 图片迁移配置
MigrationConfig( MigrationConfig(
mongo_collection="images", mongo_collection="images",
@@ -229,11 +240,10 @@ class MongoToSQLiteMigrator:
"description": "description", "description": "description",
"path": "path", "path": "path",
"timestamp": "timestamp", "timestamp": "timestamp",
"type": "type" "type": "type",
}, },
unique_fields=["path"] unique_fields=["path"],
), ),
# 图片描述迁移配置 # 图片描述迁移配置
MigrationConfig( MigrationConfig(
mongo_collection="image_descriptions", mongo_collection="image_descriptions",
@@ -242,10 +252,10 @@ class MongoToSQLiteMigrator:
"type": "type", "type": "type",
"hash": "image_description_hash", "hash": "image_description_hash",
"description": "description", "description": "description",
"timestamp": "timestamp" "timestamp": "timestamp",
}, unique_fields=["image_description_hash", "type"] },
unique_fields=["image_description_hash", "type"],
), ),
# 个人信息迁移配置 # 个人信息迁移配置
MigrationConfig( MigrationConfig(
mongo_collection="person_info", mongo_collection="person_info",
@@ -260,22 +270,17 @@ class MongoToSQLiteMigrator:
"relationship_value": "relationship_value", "relationship_value": "relationship_value",
"konw_time": "know_time", "konw_time": "know_time",
"msg_interval": "msg_interval", "msg_interval": "msg_interval",
"msg_interval_list": "msg_interval_list" "msg_interval_list": "msg_interval_list",
}, },
unique_fields=["person_id"] unique_fields=["person_id"],
), ),
# 知识库迁移配置 # 知识库迁移配置
MigrationConfig( MigrationConfig(
mongo_collection="knowledges", mongo_collection="knowledges",
target_model=Knowledges, target_model=Knowledges,
field_mapping={ field_mapping={"content": "content", "embedding": "embedding"},
"content": "content", unique_fields=["content"], # 假设内容唯一
"embedding": "embedding"
},
unique_fields=["content"] # 假设内容唯一
), ),
# 思考日志迁移配置 # 思考日志迁移配置
MigrationConfig( MigrationConfig(
mongo_collection="thinking_log", mongo_collection="thinking_log",
@@ -293,9 +298,8 @@ class MongoToSQLiteMigrator:
"heartflow_data": "heartflow_data_json", "heartflow_data": "heartflow_data_json",
"reasoning_data": "reasoning_data_json", "reasoning_data": "reasoning_data_json",
}, },
unique_fields=["chat_id", "trigger_text"] unique_fields=["chat_id", "trigger_text"],
), ),
# 图节点迁移配置 # 图节点迁移配置
MigrationConfig( MigrationConfig(
mongo_collection="graph_data.nodes", mongo_collection="graph_data.nodes",
@@ -305,11 +309,10 @@ class MongoToSQLiteMigrator:
"memory_items": "memory_items", "memory_items": "memory_items",
"hash": "hash", "hash": "hash",
"created_time": "created_time", "created_time": "created_time",
"last_modified": "last_modified" "last_modified": "last_modified",
}, },
unique_fields=["concept"] unique_fields=["concept"],
), ),
# 图边迁移配置 # 图边迁移配置
MigrationConfig( MigrationConfig(
mongo_collection="graph_data.edges", mongo_collection="graph_data.edges",
@@ -320,11 +323,12 @@ class MongoToSQLiteMigrator:
"strength": "strength", "strength": "strength",
"hash": "hash", "hash": "hash",
"created_time": "created_time", "created_time": "created_time",
"last_modified": "last_modified" "last_modified": "last_modified",
}, },
unique_fields=["source", "target"] # 组合唯一性 unique_fields=["source", "target"], # 组合唯一性
) ),
] ]
def _initialize_validation_rules(self) -> Dict[str, Any]: def _initialize_validation_rules(self) -> Dict[str, Any]:
"""数据验证已禁用 - 返回空字典""" """数据验证已禁用 - 返回空字典"""
return {} return {}
@@ -333,14 +337,11 @@ class MongoToSQLiteMigrator:
"""连接到MongoDB""" """连接到MongoDB"""
try: try:
self.mongo_client = MongoClient( self.mongo_client = MongoClient(
self.mongo_uri, self.mongo_uri, serverSelectionTimeoutMS=5000, connectTimeoutMS=10000, maxPoolSize=10
serverSelectionTimeoutMS=5000,
connectTimeoutMS=10000,
maxPoolSize=10
) )
# 测试连接 # 测试连接
self.mongo_client.admin.command('ping') self.mongo_client.admin.command("ping")
self.mongo_db = self.mongo_client[self.database_name] self.mongo_db = self.mongo_client[self.database_name]
logger.info(f"成功连接到MongoDB: {self.database_name}") logger.info(f"成功连接到MongoDB: {self.database_name}")
@@ -398,7 +399,7 @@ class MongoToSQLiteMigrator:
if isinstance(value, str): if isinstance(value, str):
# 处理字符串数字 # 处理字符串数字
clean_value = value.strip() clean_value = value.strip()
if clean_value.replace('.', '').replace('-', '').isdigit(): if clean_value.replace(".", "").replace("-", "").isdigit():
return int(float(clean_value)) return int(float(clean_value))
return 0 return 0
return int(value) if value is not None else 0 return int(value) if value is not None else 0
@@ -408,7 +409,7 @@ class MongoToSQLiteMigrator:
elif field_type == "BooleanField": elif field_type == "BooleanField":
if isinstance(value, str): if isinstance(value, str):
return value.lower() in ('true', '1', 'yes', 'on') return value.lower() in ("true", "1", "yes", "on")
return bool(value) return bool(value)
elif field_type == "DateTimeField": elif field_type == "DateTimeField":
@@ -417,7 +418,7 @@ class MongoToSQLiteMigrator:
elif isinstance(value, str): elif isinstance(value, str):
try: try:
# 尝试解析ISO格式日期 # 尝试解析ISO格式日期
return datetime.fromisoformat(value.replace('Z', '+00:00')) return datetime.fromisoformat(value.replace("Z", "+00:00"))
except ValueError: except ValueError:
try: try:
# 尝试解析时间戳字符串 # 尝试解析时间戳字符串
@@ -436,7 +437,7 @@ class MongoToSQLiteMigrator:
"""获取字段的默认值""" """获取字段的默认值"""
field_type = field.__class__.__name__ field_type = field.__class__.__name__
if hasattr(field, 'default') and field.default is not None: if hasattr(field, "default") and field.default is not None:
return field.default return field.default
if field.null: if field.null:
@@ -455,6 +456,7 @@ class MongoToSQLiteMigrator:
return datetime.now() return datetime.now()
return None return None
def _validate_data(self, collection_name: str, data: Dict[str, Any], doc_id: Any, stats: MigrationStats) -> bool: def _validate_data(self, collection_name: str, data: Dict[str, Any], doc_id: Any, stats: MigrationStats) -> bool:
"""数据验证已禁用 - 始终返回True""" """数据验证已禁用 - 始终返回True"""
return True return True
@@ -465,12 +467,12 @@ class MongoToSQLiteMigrator:
collection_name=collection_name, collection_name=collection_name,
processed_count=processed_count, processed_count=processed_count,
last_processed_id=last_id, last_processed_id=last_id,
timestamp=datetime.now() timestamp=datetime.now(),
) )
checkpoint_file = self.checkpoint_dir / f"{collection_name}_checkpoint.pkl" checkpoint_file = self.checkpoint_dir / f"{collection_name}_checkpoint.pkl"
try: try:
with open(checkpoint_file, 'wb') as f: with open(checkpoint_file, "wb") as f:
pickle.dump(checkpoint, f) pickle.dump(checkpoint, f)
except Exception as e: except Exception as e:
logger.warning(f"保存断点失败: {e}") logger.warning(f"保存断点失败: {e}")
@@ -482,7 +484,7 @@ class MongoToSQLiteMigrator:
return None return None
try: try:
with open(checkpoint_file, 'rb') as f: with open(checkpoint_file, "rb") as f:
return pickle.load(f) return pickle.load(f)
except Exception as e: except Exception as e:
logger.warning(f"加载断点失败: {e}") logger.warning(f"加载断点失败: {e}")
@@ -514,8 +516,9 @@ class MongoToSQLiteMigrator:
return success_count return success_count
def _check_duplicate_by_unique_fields(self, model: Type[Model], data: Dict[str, Any], def _check_duplicate_by_unique_fields(
unique_fields: List[str]) -> bool: self, model: Type[Model], data: Dict[str, Any], unique_fields: List[str]
) -> bool:
"""根据唯一字段检查重复""" """根据唯一字段检查重复"""
if not unique_fields: if not unique_fields:
return False return False
@@ -554,6 +557,7 @@ class MongoToSQLiteMigrator:
except Exception as e: except Exception as e:
logger.error(f"创建模型实例失败: {e}") logger.error(f"创建模型实例失败: {e}")
return None return None
def migrate_collection(self, config: MigrationConfig) -> MigrationStats: def migrate_collection(self, config: MigrationConfig) -> MigrationStats:
"""迁移单个集合 - 使用优化的批量插入和进度条""" """迁移单个集合 - 使用优化的批量插入和进度条"""
stats = MigrationStats() stats = MigrationStats()
@@ -594,13 +598,9 @@ class MongoToSQLiteMigrator:
TimeElapsedColumn(), TimeElapsedColumn(),
TimeRemainingColumn(), TimeRemainingColumn(),
console=self.console, console=self.console,
refresh_per_second=10 refresh_per_second=10,
) as progress: ) as progress:
task = progress.add_task(f"迁移 {config.mongo_collection}", total=stats.total_documents)
task = progress.add_task(
f"迁移 {config.mongo_collection}",
total=stats.total_documents
)
# 批量处理数据 # 批量处理数据
batch_data = [] batch_data = []
batch_count = 0 batch_count = 0
@@ -608,7 +608,7 @@ class MongoToSQLiteMigrator:
for mongo_doc in mongo_collection.find(query).batch_size(config.batch_size): for mongo_doc in mongo_collection.find(query).batch_size(config.batch_size):
try: try:
doc_id = mongo_doc.get('_id', 'unknown') doc_id = mongo_doc.get("_id", "unknown")
last_processed_id = doc_id last_processed_id = doc_id
# 构建目标数据 # 构建目标数据
@@ -657,7 +657,7 @@ class MongoToSQLiteMigrator:
progress.update(task, advance=config.batch_size) progress.update(task, advance=config.batch_size)
except Exception as e: except Exception as e:
doc_id = mongo_doc.get('_id', 'unknown') doc_id = mongo_doc.get("_id", "unknown")
stats.add_error(doc_id, f"处理文档异常: {e}", mongo_doc) stats.add_error(doc_id, f"处理文档异常: {e}", mongo_doc)
logger.error(f"处理文档失败 (ID: {doc_id}): {e}") logger.error(f"处理文档失败 (ID: {doc_id}): {e}")
@@ -691,6 +691,7 @@ class MongoToSQLiteMigrator:
stats.add_error("collection_error", str(e)) stats.add_error("collection_error", str(e))
return stats return stats
def migrate_all(self) -> Dict[str, MigrationStats]: def migrate_all(self) -> Dict[str, MigrationStats]:
"""执行所有迁移任务""" """执行所有迁移任务"""
logger.info("开始执行数据库迁移...") logger.info("开始执行数据库迁移...")
@@ -704,20 +705,24 @@ class MongoToSQLiteMigrator:
try: try:
# 创建总体进度表格 # 创建总体进度表格
total_collections = len(self.migration_configs) total_collections = len(self.migration_configs)
self.console.print(Panel( self.console.print(
Panel(
f"[bold blue]MongoDB 到 SQLite 数据迁移[/bold blue]\n" f"[bold blue]MongoDB 到 SQLite 数据迁移[/bold blue]\n"
f"[yellow]总集合数: {total_collections}[/yellow]", f"[yellow]总集合数: {total_collections}[/yellow]",
title="迁移开始", title="迁移开始",
expand=False expand=False,
)) )
)
for idx, config in enumerate(self.migration_configs, 1): for idx, config in enumerate(self.migration_configs, 1):
self.console.print(f"\n[bold green]正在处理集合 {idx}/{total_collections}: {config.mongo_collection}[/bold green]") self.console.print(
f"\n[bold green]正在处理集合 {idx}/{total_collections}: {config.mongo_collection}[/bold green]"
)
stats = self.migrate_collection(config) stats = self.migrate_collection(config)
all_stats[config.mongo_collection] = stats all_stats[config.mongo_collection] = stats
# 显示单个集合的快速统计 # 显示单个集合的快速统计
if stats.processed_count > 0: if stats.processed_count > 0:
success_rate = (stats.success_count / stats.processed_count * 100) success_rate = stats.success_count / stats.processed_count * 100
if success_rate >= 95: if success_rate >= 95:
status_emoji = "" status_emoji = ""
status_color = "bright_green" status_color = "bright_green"
@@ -805,7 +810,7 @@ class MongoToSQLiteMigrator:
f"[red]{stats.validation_errors}[/red]" if stats.validation_errors > 0 else "0", f"[red]{stats.validation_errors}[/red]" if stats.validation_errors > 0 else "0",
str(stats.batch_insert_count), str(stats.batch_insert_count),
f"{success_rate_style}{success_rate:.1f}%[/{success_rate_style[1:]}", f"{success_rate_style}{success_rate:.1f}%[/{success_rate_style[1:]}",
f"{duration:.2f}" f"{duration:.2f}",
) )
# 添加总计行 # 添加总计行
@@ -825,11 +830,13 @@ class MongoToSQLiteMigrator:
f"[bold]{total_success}[/bold]", f"[bold]{total_success}[/bold]",
f"[bold red]{total_errors}[/bold red]" if total_errors > 0 else "[bold]0[/bold]", f"[bold red]{total_errors}[/bold red]" if total_errors > 0 else "[bold]0[/bold]",
f"[bold yellow]{total_skipped}[/bold yellow]" if total_skipped > 0 else "[bold]0[/bold]", f"[bold yellow]{total_skipped}[/bold yellow]" if total_skipped > 0 else "[bold]0[/bold]",
f"[bold bright_yellow]{total_duplicates}[/bold bright_yellow]" if total_duplicates > 0 else "[bold]0[/bold]", f"[bold bright_yellow]{total_duplicates}[/bold bright_yellow]"
if total_duplicates > 0
else "[bold]0[/bold]",
f"[bold red]{total_validation_errors}[/bold red]" if total_validation_errors > 0 else "[bold]0[/bold]", f"[bold red]{total_validation_errors}[/bold red]" if total_validation_errors > 0 else "[bold]0[/bold]",
f"[bold]{total_batch_inserts}[/bold]", f"[bold]{total_batch_inserts}[/bold]",
f"[bold]{total_rate_style}{total_success_rate:.1f}%[/{total_rate_style[1:]}[/bold]", f"[bold]{total_rate_style}{total_success_rate:.1f}%[/{total_rate_style[1:]}[/bold]",
f"[bold]{total_duration_seconds:.2f}[/bold]" f"[bold]{total_duration_seconds:.2f}[/bold]",
) )
self.console.print(table) self.console.print(table)
@@ -854,9 +861,7 @@ class MongoToSQLiteMigrator:
if status_items: if status_items:
status_panel = Panel( status_panel = Panel(
"\n".join(status_items), "\n".join(status_items), title="[bold yellow]迁移状态总结[/bold yellow]", border_style="yellow"
title="[bold yellow]迁移状态总结[/bold yellow]",
border_style="yellow"
) )
self.console.print(status_panel) self.console.print(status_panel)
@@ -868,11 +873,7 @@ class MongoToSQLiteMigrator:
f"[cyan]批量插入优化:[/cyan] 执行了 {total_batch_inserts} 次批量操作" f"[cyan]批量插入优化:[/cyan] 执行了 {total_batch_inserts} 次批量操作"
) )
performance_panel = Panel( performance_panel = Panel(performance_info, title="[bold green]性能统计[/bold green]", border_style="green")
performance_info,
title="[bold green]性能统计[/bold green]",
border_style="green"
)
self.console.print(performance_panel) self.console.print(performance_panel)
def add_migration_config(self, config: MigrationConfig): def add_migration_config(self, config: MigrationConfig):
@@ -900,27 +901,23 @@ class MongoToSQLiteMigrator:
def export_error_report(self, all_stats: Dict[str, MigrationStats], filepath: str): def export_error_report(self, all_stats: Dict[str, MigrationStats], filepath: str):
"""导出错误报告""" """导出错误报告"""
error_report = { error_report = {
'timestamp': datetime.now().isoformat(), "timestamp": datetime.now().isoformat(),
'summary': { "summary": {
collection: { collection: {
'total': stats.total_documents, "total": stats.total_documents,
'processed': stats.processed_count, "processed": stats.processed_count,
'success': stats.success_count, "success": stats.success_count,
'errors': stats.error_count, "errors": stats.error_count,
'skipped': stats.skipped_count, "skipped": stats.skipped_count,
'duplicates': stats.duplicate_count "duplicates": stats.duplicate_count,
} }
for collection, stats in all_stats.items() for collection, stats in all_stats.items()
}, },
'errors': { "errors": {collection: stats.errors for collection, stats in all_stats.items() if stats.errors},
collection: stats.errors
for collection, stats in all_stats.items()
if stats.errors
}
} }
try: try:
with open(filepath, 'w', encoding='utf-8') as f: with open(filepath, "w", encoding="utf-8") as f:
json.dump(error_report, f, ensure_ascii=False, indent=2) json.dump(error_report, f, ensure_ascii=False, indent=2)
logger.info(f"错误报告已导出到: {filepath}") logger.info(f"错误报告已导出到: {filepath}")
except Exception as e: except Exception as e: