数据库重构

This commit is contained in:
雅诺狐
2025-08-16 23:43:45 +08:00
committed by Windpicker-owo
parent 6bd4170c90
commit 875e02d42f
21 changed files with 841 additions and 1034 deletions

View File

@@ -14,7 +14,7 @@ from PIL import Image
from rich.traceback import install
from sqlalchemy import select
from src.common.database.database import db
from src.common.database.sqlalchemy_database_api import get_session
from src.common.database.sqlalchemy_database_api import get_db_session
from src.common.database.sqlalchemy_models import Emoji, Images
from src.common.logger import get_logger
from src.config.config import global_config, model_config
@@ -30,8 +30,6 @@ EMOJI_DIR = os.path.join(BASE_DIR, "emoji") # 表情包存储目录
EMOJI_REGISTERED_DIR = os.path.join(BASE_DIR, "emoji_registed") # 已注册的表情包注册目录
MAX_EMOJI_FOR_PROMPT = 20 # 最大允许的表情包描述数量于图片替换的 prompt 中
session = get_session()
"""
还没经过测试,有些地方数据库和内存数据同步可能不完全
@@ -152,28 +150,29 @@ class MaiEmoji:
# --- 数据库操作 ---
try:
# 准备数据库记录 for emoji collection
emotion_str = ",".join(self.emotion) if self.emotion else ""
with get_db_session() as session:
emotion_str = ",".join(self.emotion) if self.emotion else ""
emoji = Emoji(
emoji_hash=self.hash,
full_path=self.full_path,
format=self.format,
description=self.description,
emotion=emotion_str, # Store as comma-separated string
query_count=0, # Default value
is_registered=True,
is_banned=False, # Default value
record_time=self.register_time, # Use MaiEmoji's register_time for DB record_time
register_time=self.register_time,
usage_count=self.usage_count,
last_used_time=self.last_used_time,
)
session.add(emoji)
session.commit()
emoji = Emoji(
emoji_hash=self.hash,
full_path=self.full_path,
format=self.format,
description=self.description,
emotion=emotion_str, # Store as comma-separated string
query_count=0, # Default value
is_registered=True,
is_banned=False, # Default value
record_time=self.register_time, # Use MaiEmoji's register_time for DB record_time
register_time=self.register_time,
usage_count=self.usage_count,
last_used_time=self.last_used_time,
)
session.add(emoji)
session.commit()
logger.info(f"[注册] 表情包信息保存到数据库: {self.filename} ({self.emotion})")
logger.info(f"[注册] 表情包信息保存到数据库: {self.filename} ({self.emotion})")
return True
return True
except Exception as db_error:
logger.error(f"[错误] 保存数据库失败 ({self.filename}): {str(db_error)}")
@@ -205,14 +204,15 @@ class MaiEmoji:
# 2. 删除数据库记录
try:
will_delete_emoji = session.execute(select(Emoji).where(Emoji.emoji_hash == self.hash)).scalar_one_or_none()
if will_delete_emoji is None:
logger.warning(f"[删除] 数据库中未找到哈希值为 {self.hash} 的表情包记录。")
result = 0 # Indicate no DB record was deleted
else:
session.delete(will_delete_emoji)
session.commit()
result = 1 # Successfully deleted one record
with get_db_session() as session:
will_delete_emoji = session.execute(select(Emoji).where(Emoji.emoji_hash == self.hash)).scalar_one_or_none()
if will_delete_emoji is None:
logger.warning(f"[删除] 数据库中未找到哈希值为 {self.hash} 的表情包记录。")
result = 0 # Indicate no DB record was deleted
else:
session.delete(will_delete_emoji)
result = 1 # Successfully deleted one record
session.commit()
except Exception as e:
logger.error(f"[错误] 删除数据库记录时出错: {str(e)}")
result = 0
@@ -403,35 +403,36 @@ class EmojiManager:
def initialize(self) -> None:
"""初始化数据库连接和表情目录"""
try:
db.connect(reuse_if_open=True)
if db.is_closed():
raise RuntimeError("数据库连接失败")
_ensure_emoji_dir()
self._initialized = True # 标记为已初始化
logger.info("EmojiManager初始化成功")
except Exception as e:
logger.error(f"EmojiManager初始化失败: {e}")
self._initialized = False
raise
# try:
# db.connect(reuse_if_open=True)
# if db.is_closed():
# raise RuntimeError("数据库连接失败")
# _ensure_emoji_dir()
# self._initialized = True # 标记为已初始化
# logger.info("EmojiManager初始化成功")
# except Exception as e:
# logger.error(f"EmojiManager初始化失败: {e}")
# self._initialized = False
# raise
def _ensure_db(self) -> None:
"""确保数据库已初始化"""
if not self._initialized:
self.initialize()
if not self._initialized:
raise RuntimeError("EmojiManager not initialized")
# def _ensure_db(self) -> None:
# """确保数据库已初始化"""
# if not self._initialized:
# self.initialize()
# if not self._initialized:
# raise RuntimeError("EmojiManager not initialized")
def record_usage(self, emoji_hash: str) -> None:
"""记录表情使用次数"""
try:
emoji_update = session.execute(select(Emoji).where(Emoji.emoji_hash == emoji_hash)).scalar_one_or_none()
if emoji_update is None:
logger.error(f"记录表情使用失败: 未找到 hash 为 {emoji_hash} 的表情包")
else:
emoji_update.usage_count += 1
with get_db_session() as session:
emoji_update = session.execute(select(Emoji).where(Emoji.emoji_hash == emoji_hash)).scalar_one_or_none()
if emoji_update is None:
logger.error(f"记录表情使用失败: 未找到 hash 为 {emoji_hash} 的表情包")
else:
emoji_update.usage_count += 1
emoji_update.last_used_time = time.time() # Update last used time
session.commit() # Persist changes to DB
session.commit()
except Exception as e:
logger.error(f"记录表情使用失败: {str(e)}")
@@ -659,11 +660,12 @@ class EmojiManager:
async def get_all_emoji_from_db(self) -> None:
"""获取所有表情包并初始化为MaiEmoji类对象更新 self.emoji_objects"""
try:
self._ensure_db()
logger.debug("[数据库] 开始加载所有表情包记录 ...")
with get_db_session() as session:
self._ensure_db()
logger.debug("[数据库] 开始加载所有表情包记录 ...")
emoji_instances = session.execute(select(Emoji)).scalars().all()
emoji_objects, load_errors = _to_emoji_objects(emoji_instances)
emoji_instances = session.execute(select(Emoji)).scalars().all()
emoji_objects, load_errors = _to_emoji_objects(emoji_instances)
# 更新内存中的列表和数量
self.emoji_objects = emoji_objects
@@ -672,6 +674,7 @@ class EmojiManager:
logger.info(f"[数据库] 加载完成: 共加载 {self.emoji_num} 个表情包记录。")
if load_errors > 0:
logger.warning(f"[数据库] 加载过程中出现 {load_errors} 个错误。")
except Exception as e:
logger.error(f"[错误] 从数据库加载所有表情包对象失败: {str(e)}")
@@ -688,7 +691,8 @@ class EmojiManager:
list[MaiEmoji]: 表情包对象列表
"""
try:
self._ensure_db()
with get_db_session() as session:
self._ensure_db()
if emoji_hash:
query = session.execute(select(Emoji).where(Emoji.emoji_hash == emoji_hash)).scalars().all()
@@ -703,7 +707,6 @@ class EmojiManager:
if load_errors > 0:
logger.warning(f"[查询] 加载过程中出现 {load_errors} 个错误。")
return emoji_objects
except Exception as e:
@@ -776,13 +779,15 @@ class EmojiManager:
# 如果内存中没有,从数据库查找
self._ensure_db()
try:
emoji_record = session.execute(select(Emoji).where(Emoji.emoji_hash == emoji_hash)).scalar_one_or_none()
with get_db_session() as session:
emoji_record = session.execute(select(Emoji).where(Emoji.emoji_hash == emoji_hash)).scalar_one_or_none()
if emoji_record and emoji_record.description:
logger.info(f"[缓存命中] 从数据库获取表情包描述: {emoji_record.description[:50]}...")
return emoji_record.description
except Exception as e:
logger.error(f"从数据库查询表情包描述时出错: {e}")
return None
except Exception as e:
@@ -937,13 +942,14 @@ class EmojiManager:
# 尝试从Images表获取已有的详细描述可能在收到表情包时已生成
existing_description = None
try:
with get_db_session() as session:
# from src.common.database.database_model_compat import Images
stmt = select(Images).where((Images.emoji_hash == image_hash) & (Images.type == "emoji"))
existing_image = session.execute(stmt).scalar_one_or_none()
if existing_image and existing_image.description:
existing_description = existing_image.description
logger.info(f"[复用描述] 找到已有详细描述: {existing_description[:50]}...")
stmt = select(Images).where((Images.emoji_hash == image_hash) & (Images.type == "emoji"))
existing_image = session.execute(stmt).scalar_one_or_none()
if existing_image and existing_image.description:
existing_description = existing_image.description
logger.info(f"[复用描述] 找到已有详细描述: {existing_description[:50]}...")
except Exception as e:
logger.debug(f"查询已有描述时出错: {e}")

View File

@@ -7,7 +7,7 @@ from datetime import datetime
from typing import List, Dict, Optional, Any, Tuple
from src.common.logger import get_logger
from src.common.database.sqlalchemy_database_api import get_session
from src.common.database.sqlalchemy_database_api import get_db_session
from sqlalchemy import select
from src.common.database.sqlalchemy_models import Expression
from src.llm_models.utils_model import LLMRequest
@@ -22,7 +22,6 @@ DECAY_DAYS = 30 # 30天衰减到0.01
DECAY_MIN = 0.01 # 最小衰减值
logger = get_logger("expressor")
session = get_session()
def format_create_date(timestamp: float) -> str:
"""
@@ -179,7 +178,8 @@ class ExpressionLearner:
learnt_grammar_expressions = []
# 直接从数据库查询
style_query = session.execute(select(Expression).where((Expression.chat_id == self.chat_id) & (Expression.type == "style")))
with get_db_session() as session:
style_query = session.execute(select(Expression).where((Expression.chat_id == self.chat_id) & (Expression.type == "style")))
for expr in style_query.scalars():
# 确保create_date存在如果不存在则使用last_active_time
create_date = expr.create_date if expr.create_date is not None else expr.last_active_time
@@ -222,8 +222,9 @@ class ExpressionLearner:
对数据库中的所有表达方式应用全局衰减
"""
try:
# 获取所有表达方式
all_expressions = session.execute(select(Expression)).scalars()
with get_db_session() as session:
# 获取所有表达方式
all_expressions = session.execute(select(Expression)).scalars()
updated_count = 0
deleted_count = 0
@@ -240,19 +241,19 @@ class ExpressionLearner:
if new_count <= 0.01:
# 如果count太小删除这个表达方式
session.delete(expr)
session.commit()
deleted_count += 1
else:
# 更新count
expr.count = new_count
updated_count += 1
session.commit()
if updated_count > 0 or deleted_count > 0:
logger.info(f"全局衰减完成:更新了 {updated_count} 个表达方式,删除了 {deleted_count} 个表达方式")
except Exception as e:
session.rollback()
logger.error(f"数据库全局衰减失败: {e}")
def calculate_decay_factor(self, time_diff_days: float) -> float:
@@ -321,43 +322,46 @@ class ExpressionLearner:
for chat_id, expr_list in chat_dict.items():
for new_expr in expr_list:
# 查找是否已存在相似表达方式
query = session.execute(select(Expression).where(
with get_db_session() as session:
query = session.execute(select(Expression).where(
(Expression.chat_id == chat_id)
& (Expression.type == "style")
& (Expression.situation == new_expr["situation"])
& (Expression.style == new_expr["style"])
)).scalar()
if query:
expr_obj = query
if query:
expr_obj = query
# 50%概率替换内容
if random.random() < 0.5:
expr_obj.situation = new_expr["situation"]
expr_obj.style = new_expr["style"]
expr_obj.count = expr_obj.count + 1
expr_obj.last_active_time = current_time
else:
new_expression = Expression(
situation=new_expr["situation"],
style=new_expr["style"],
count=1,
last_active_time=current_time,
chat_id=chat_id,
type="style",
create_date=current_time, # 手动设置创建日期
)
session.add(new_expression)
# 限制最大数量
exprs = list(
session.execute(select(Expression)
.where((Expression.chat_id == chat_id) & (Expression.type == type))
.order_by(Expression.count.asc())).scalars()
)
if len(exprs) > MAX_EXPRESSION_COUNT:
# 删除count最小的多余表达方式
for expr in exprs[: len(exprs) - MAX_EXPRESSION_COUNT]:
session.delete(expr)
session.commit()
return learnt_expressions
if random.random() < 0.5:
expr_obj.situation = new_expr["situation"]
expr_obj.style = new_expr["style"]
expr_obj.count = expr_obj.count + 1
expr_obj.last_active_time = current_time
else:
new_expression = Expression(
situation=new_expr["situation"],
style=new_expr["style"],
count=1,
last_active_time=current_time,
chat_id=chat_id,
type=type,
create_date=current_time, # 手动设置创建日期
)
session.add(new_expression)
session.commit()
# 限制最大数量
exprs = list(
session.execute(select(Expression)
.where((Expression.chat_id == chat_id) & (Expression.type == type))
.order_by(Expression.count.asc())).scalars()
)
if len(exprs) > MAX_EXPRESSION_COUNT:
# 删除count最小的多余表达方式
for expr in exprs[: len(exprs) - MAX_EXPRESSION_COUNT]:
session.delete(expr)
session.commit()
return learnt_expressions
async def learn_expression(self, num: int = 10) -> Optional[Tuple[List[Tuple[str, str, str]], str]]:
"""从指定聊天流学习表达方式
@@ -535,8 +539,8 @@ class ExpressionLearnerManager:
continue
# 查重同chat_id+type+situation+style
query = session.execute(select(Expression).where(
with get_db_session() as session:
query = session.execute(select(Expression).where(
(Expression.chat_id == chat_id)
& (Expression.type == type_str)
& (Expression.situation == situation)
@@ -557,6 +561,8 @@ class ExpressionLearnerManager:
create_date=last_active_time, # 迁移时使用last_active_time作为创建时间
)
session.add(new_expression)
session.commit()
migrated_count += 1
logger.info(f"已迁移 {expr_file} 到数据库,包含 {len(expressions)} 个表达方式")
except json.JSONDecodeError as e:
@@ -586,21 +592,22 @@ class ExpressionLearnerManager:
使用last_active_time作为create_date的默认值
"""
try:
# 查找所有create_date为空的表达方式
old_expressions = session.execute(select(Expression).where(Expression.create_date.is_(None))).scalars()
updated_count = 0
with get_db_session() as session:
# 查找所有create_date为空的表达方式
old_expressions = session.execute(select(Expression).where(Expression.create_date.is_(None))).scalars()
updated_count = 0
for expr in old_expressions:
# 使用last_active_time作为create_date
expr.create_date = expr.last_active_time
updated_count += 1
for expr in old_expressions:
# 使用last_active_time作为create_date
expr.create_date = expr.last_active_time
updated_count += 1
session.commit()
if updated_count > 0:
logger.info(f"已为 {updated_count} 个老的表达方式设置创建日期")
if updated_count > 0:
logger.info(f"已为 {updated_count} 个老的表达方式设置创建日期")
session.commit()
except Exception as e:
session.rollback()
logger.error(f"迁移老数据创建日期失败: {e}")
def delete_all_grammar_expressions(self) -> int:

View File

@@ -12,8 +12,7 @@ from src.common.logger import get_logger
from sqlalchemy import select
from src.common.database.sqlalchemy_models import Expression
from src.chat.utils.prompt_builder import Prompt, global_prompt_manager
from src.common.database.sqlalchemy_database_api import get_session
session = get_session()
from src.common.database.sqlalchemy_database_api import get_db_session
logger = get_logger("expression_selector")
@@ -132,14 +131,14 @@ class ExpressionSelector:
# sourcery skip: extract-duplicate-method, move-assign
# 支持多chat_id合并抽选
related_chat_ids = self.get_related_chat_ids(chat_id)
with get_db_session() as session:
# 优化一次性查询所有相关chat_id的表达方式
style_query = session.execute(select(Expression).where(
(Expression.chat_id.in_(related_chat_ids)) & (Expression.type == "style")
))
grammar_query = session.execute(select(Expression).where(
(Expression.chat_id.in_(related_chat_ids)) & (Expression.type == "grammar")
))
style_query = session.execute(select(Expression).where(
(Expression.chat_id.in_(related_chat_ids)) & (Expression.type == "style")
))
grammar_query = session.execute(select(Expression).where(
(Expression.chat_id.in_(related_chat_ids)) & (Expression.type == "grammar")
))
style_exprs = [
{
@@ -176,7 +175,13 @@ class ExpressionSelector:
selected_style = weighted_sample(style_exprs, style_weights, total_num)
else:
selected_style = []
return selected_style
if grammar_exprs:
grammar_weights = [expr.get("count", 1) for expr in grammar_exprs]
selected_grammar = weighted_sample(grammar_exprs, grammar_weights, grammar_num)
else:
selected_grammar = []
return selected_style, selected_grammar
def update_expressions_count_batch(self, expressions_to_update: List[Dict[str, Any]], increment: float = 0.1):
"""对一批表达方式更新count值按chat_id+type分组后一次性写入数据库"""
@@ -195,7 +200,8 @@ class ExpressionSelector:
if key not in updates_by_key:
updates_by_key[key] = expr
for chat_id, expr_type, situation, style in updates_by_key:
query = session.execute(select(Expression).where(
with get_db_session() as session:
query = session.execute(select(Expression).where(
(Expression.chat_id == chat_id)
& (Expression.type == expr_type)
& (Expression.situation == situation)
@@ -207,10 +213,11 @@ class ExpressionSelector:
new_count = min(current_count + increment, 5.0)
expr_obj.count = new_count
expr_obj.last_active_time = time.time()
session.commit()
logger.debug(
f"表达方式激活: 原count={current_count:.3f}, 增量={increment}, 新count={new_count:.3f} in db"
)
session.commit()
async def select_suitable_expressions_llm(
self,

View File

@@ -19,7 +19,7 @@ from src.config.config import global_config, model_config
from sqlalchemy import select,insert,update,delete
from src.common.database.sqlalchemy_models import Messages, GraphNodes, GraphEdges # SQLAlchemy Models导入
from src.common.logger import get_logger
from src.common.database.sqlalchemy_database_api import get_session
from src.common.database.sqlalchemy_database_api import get_db_session
from src.chat.memory_system.sample_distribution import MemoryBuildScheduler # 分布生成器
from src.chat.utils.chat_message_builder import (
get_raw_msg_by_timestamp,
@@ -39,7 +39,6 @@ def cosine_similarity(v1, v2):
install(extra_lines=3)
session = get_session()
def calculate_information_content(text):
"""计算文本的信息量(熵)"""
@@ -949,13 +948,13 @@ class EntorhinalCortex:
for message in messages:
# 确保在更新前获取最新的 memorized_times
current_memorized_times = message.get("memorized_times", 0)
# 使用 SQLAlchemy 2.0 更新记录
session.execute(
with get_db_session() as session:
session.execute(
update(Messages)
.where(Messages.message_id == message["message_id"])
.values(memorized_times=current_memorized_times + 1)
)
session.commit()
session.commit()
return messages # 直接返回原始的消息列表
target_timestamp -= 120 # 如果第一次尝试失败,稍微向前调整时间戳再试
@@ -969,267 +968,261 @@ class EntorhinalCortex:
current_time = datetime.datetime.now().timestamp()
# 获取数据库中所有节点和内存中所有节点
db_nodes = {node.concept: node for node in session.execute(select(GraphNodes)).scalars()}
memory_nodes = list(self.memory_graph.G.nodes(data=True))
with get_db_session() as session:
db_nodes = {node.concept: node for node in session.execute(select(GraphNodes)).scalars()}
memory_nodes = list(self.memory_graph.G.nodes(data=True))
# 批量准备节点数据
nodes_to_create = []
nodes_to_update = []
nodes_to_delete = set()
# 批量准备节点数据
nodes_to_create = []
nodes_to_update = []
nodes_to_delete = set()
# 处理节点
for concept, data in memory_nodes:
if not concept or not isinstance(concept, str):
self.memory_graph.G.remove_node(concept)
continue
# 处理节点
for concept, data in memory_nodes:
if not concept or not isinstance(concept, str):
self.memory_graph.G.remove_node(concept)
continue
memory_items = data.get("memory_items", "")
# 直接检查字符串是否为空,不需要分割成列表
if not memory_items or memory_items.strip() == "":
self.memory_graph.G.remove_node(concept)
continue
memory_items = data.get("memory_items", [])
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
# 计算内存中节点的特征值
memory_hash = self.hippocampus.calculate_node_hash(concept, memory_items)
created_time = data.get("created_time", current_time)
last_modified = data.get("last_modified", current_time)
if not memory_items:
self.memory_graph.G.remove_node(concept)
continue
# memory_items直接作为字符串存储不需要JSON序列化
if not memory_items:
continue
# 计算内存中节点的特征值
memory_hash = self.hippocampus.calculate_node_hash(concept, memory_items)
created_time = data.get("created_time", current_time)
last_modified = data.get("last_modified", current_time)
# 获取权重属性
weight = data.get("weight", 1.0)
# 将memory_items转换为JSON字符串
try:
memory_items = [str(item) for item in memory_items]
memory_items_json = json.dumps(memory_items, ensure_ascii=False)
if not memory_items_json:
continue
except Exception:
self.memory_graph.G.remove_node(concept)
continue
if concept not in db_nodes:
nodes_to_create.append(
{
"concept": concept,
"memory_items": memory_items,
"weight": weight,
"hash": memory_hash,
"created_time": created_time,
"last_modified": last_modified,
}
)
else:
db_node = db_nodes[concept]
if db_node.hash != memory_hash:
nodes_to_update.append(
if concept not in db_nodes:
nodes_to_create.append(
{
"concept": concept,
"memory_items": memory_items,
"weight": weight,
"hash": memory_hash,
"created_time": created_time,
"last_modified": last_modified,
}
)
else:
db_node = db_nodes[concept]
if db_node.hash != memory_hash:
nodes_to_update.append(
{
"concept": concept,
"memory_items": memory_items_json,
"hash": memory_hash,
"last_modified": last_modified,
}
)
# 计算需要删除的节点
memory_concepts = {concept for concept, _ in memory_nodes}
nodes_to_delete = set(db_nodes.keys()) - memory_concepts
# 批量处理节点
if nodes_to_create:
batch_size = 100
for i in range(0, len(nodes_to_create), batch_size):
batch = nodes_to_create[i : i + batch_size]
session.execute(insert(GraphNodes), batch)
if nodes_to_update:
batch_size = 100
for i in range(0, len(nodes_to_update), batch_size):
batch = nodes_to_update[i : i + batch_size]
for node_data in batch:
session.execute(
update(GraphNodes)
.where(GraphNodes.concept == node_data["concept"])
.values(**{k: v for k, v in node_data.items() if k != "concept"})
)
if nodes_to_delete:
session.execute(delete(GraphNodes).where(GraphNodes.concept.in_(nodes_to_delete)))
# 处理边的信息
db_edges = list(session.execute(select(GraphEdges)).scalars())
memory_edges = list(self.memory_graph.G.edges(data=True))
# 创建边的哈希值字典
db_edge_dict = {}
for edge in db_edges:
edge_hash = self.hippocampus.calculate_edge_hash(edge.source, edge.target)
db_edge_dict[(edge.source, edge.target)] = {"hash": edge_hash, "strength": edge.strength}
# 批量准备边数据
edges_to_create = []
edges_to_update = []
# 处理边
for source, target, data in memory_edges:
edge_hash = self.hippocampus.calculate_edge_hash(source, target)
edge_key = (source, target)
strength = data.get("strength", 1)
created_time = data.get("created_time", current_time)
last_modified = data.get("last_modified", current_time)
if edge_key not in db_edge_dict:
edges_to_create.append(
{
"source": source,
"target": target,
"strength": strength,
"hash": edge_hash,
"created_time": created_time,
"last_modified": last_modified,
}
)
elif db_edge_dict[edge_key]["hash"] != edge_hash:
edges_to_update.append(
{
"source": source,
"target": target,
"strength": strength,
"hash": edge_hash,
"last_modified": last_modified,
}
)
# 计算需要删除的节点
memory_concepts = {concept for concept, _ in memory_nodes}
nodes_to_delete = set(db_nodes.keys()) - memory_concepts
# 计算需要删除的
memory_edge_keys = {(source, target) for source, target, _ in memory_edges}
edges_to_delete = set(db_edge_dict.keys()) - memory_edge_keys
# 批量处理节点
if nodes_to_create:
batch_size = 100
for i in range(0, len(nodes_to_create), batch_size):
batch = nodes_to_create[i : i + batch_size]
session.execute(insert(GraphNodes), batch)
session.commit()
# 批量处理
if edges_to_create:
batch_size = 100
for i in range(0, len(edges_to_create), batch_size):
batch = edges_to_create[i : i + batch_size]
session.execute(insert(GraphEdges), batch)
if nodes_to_update:
batch_size = 100
for i in range(0, len(nodes_to_update), batch_size):
batch = nodes_to_update[i : i + batch_size]
for node_data in batch:
if edges_to_update:
batch_size = 100
for i in range(0, len(edges_to_update), batch_size):
batch = edges_to_update[i : i + batch_size]
for edge_data in batch:
session.execute(
update(GraphEdges)
.where(
(GraphEdges.source == edge_data["source"]) & (GraphEdges.target == edge_data["target"])
)
.values(**{k: v for k, v in edge_data.items() if k not in ["source", "target"]})
)
if edges_to_delete:
for source, target in edges_to_delete:
session.execute(
update(GraphNodes)
.where(GraphNodes.concept == node_data["concept"])
.values(**{k: v for k, v in node_data.items() if k != "concept"})
delete(GraphEdges).where((GraphEdges.source == source) & (GraphEdges.target == target))
)
session.commit()
if nodes_to_delete:
session.execute(delete(GraphNodes).where(GraphNodes.concept.in_(nodes_to_delete)))
# 提交事务
session.commit()
# 处理边的信息
db_edges = list(session.execute(select(GraphEdges)).scalars())
memory_edges = list(self.memory_graph.G.edges(data=True))
# 创建边的哈希值字典
db_edge_dict = {}
for edge in db_edges:
edge_hash = self.hippocampus.calculate_edge_hash(edge.source, edge.target)
db_edge_dict[(edge.source, edge.target)] = {"hash": edge_hash, "strength": edge.strength}
# 批量准备边数据
edges_to_create = []
edges_to_update = []
# 处理边
for source, target, data in memory_edges:
edge_hash = self.hippocampus.calculate_edge_hash(source, target)
edge_key = (source, target)
strength = data.get("strength", 1)
created_time = data.get("created_time", current_time)
last_modified = data.get("last_modified", current_time)
if edge_key not in db_edge_dict:
edges_to_create.append(
{
"source": source,
"target": target,
"strength": strength,
"hash": edge_hash,
"created_time": created_time,
"last_modified": last_modified,
}
)
elif db_edge_dict[edge_key]["hash"] != edge_hash:
edges_to_update.append(
{
"source": source,
"target": target,
"strength": strength,
"hash": edge_hash,
"last_modified": last_modified,
}
)
# 计算需要删除的边
memory_edge_keys = {(source, target) for source, target, _ in memory_edges}
edges_to_delete = set(db_edge_dict.keys()) - memory_edge_keys
# 批量处理边
if edges_to_create:
batch_size = 100
for i in range(0, len(edges_to_create), batch_size):
batch = edges_to_create[i : i + batch_size]
session.execute(insert(GraphEdges), batch)
session.commit()
if edges_to_update:
batch_size = 100
for i in range(0, len(edges_to_update), batch_size):
batch = edges_to_update[i : i + batch_size]
for edge_data in batch:
session.execute(
update(GraphEdges)
.where(
(GraphEdges.source == edge_data["source"]) & (GraphEdges.target == edge_data["target"])
)
.values(**{k: v for k, v in edge_data.items() if k not in ["source", "target"]})
)
session.commit()
if edges_to_delete:
for source, target in edges_to_delete:
session.execute(
delete(GraphEdges).where((GraphEdges.source == source) & (GraphEdges.target == target))
)
session.commit()
end_time = time.time()
logger.info(f"[数据库] 同步完成,总耗时: {end_time - start_time:.2f}")
logger.info(f"[数据库] 同步了 {len(nodes_to_create) + len(nodes_to_update)} 个节点和 {len(edges_to_create) + len(edges_to_update)} 条边")
logger.info(f"[同步] 总耗时: {end_time - start_time:.2f}")
logger.info(f"[同步] 同步了 {len(memory_nodes)} 个节点和 {len(memory_edges)} 条边")
async def resync_memory_to_db(self):
"""清空数据库并重新同步所有记忆数据"""
start_time = time.time()
logger.info("[数据库] 开始重新同步所有记忆数据...")
# 清空数据库
clear_start = time.time()
session.execute(delete(GraphNodes))
session.execute(delete(GraphEdges))
session.commit()
clear_end = time.time()
logger.info(f"[数据库] 清空数据库耗时: {clear_end - clear_start:.2f}")
# 获取所有节点和边
memory_nodes = list(self.memory_graph.G.nodes(data=True))
memory_edges = list(self.memory_graph.G.edges(data=True))
current_time = datetime.datetime.now().timestamp()
# 批量准备节点数据
nodes_data = []
for concept, data in memory_nodes:
memory_items = data.get("memory_items", "")
with get_db_session() as session:
clear_start = time.time()
session.execute(delete(GraphNodes))
session.execute(delete(GraphEdges))
# 直接检查字符串是否为空,不需要分割成列表
if not memory_items or memory_items.strip() == "":
self.memory_graph.G.remove_node(concept)
continue
clear_end = time.time()
logger.info(f"[数据库] 清空数据库耗时: {clear_end - clear_start:.2f}")
# 计算内存中节点的特征值
memory_hash = self.hippocampus.calculate_node_hash(concept, memory_items)
created_time = data.get("created_time", current_time)
last_modified = data.get("last_modified", current_time)
# 获取所有节点和边
memory_nodes = list(self.memory_graph.G.nodes(data=True))
memory_edges = list(self.memory_graph.G.edges(data=True))
current_time = datetime.datetime.now().timestamp()
# memory_items直接作为字符串存储不需要JSON序列化
if not memory_items:
continue
# 批量准备节点数据
nodes_data = []
for concept, data in memory_nodes:
memory_items = data.get("memory_items", [])
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
# 获取权重属性
weight = data.get("weight", 1.0)
try:
memory_items = [str(item) for item in memory_items]
if memory_items_json := json.dumps(memory_items, ensure_ascii=False):
nodes_data.append(
{
"concept": concept,
"memory_items": memory_items_json,
"hash": self.hippocampus.calculate_node_hash(concept, memory_items),
"created_time": data.get("created_time", current_time),
"last_modified": data.get("last_modified", current_time),
}
)
nodes_data.append(
{
"concept": concept,
"memory_items": memory_items,
"weight": weight,
"hash": memory_hash,
"created_time": created_time,
"last_modified": last_modified,
}
)
except Exception as e:
logger.error(f"准备节点 {concept} 数据时发生错误: {e}")
continue
# 批量插入节点
if nodes_data:
batch_size = 100
for i in range(0, len(nodes_data), batch_size):
batch = nodes_data[i : i + batch_size]
GraphNodes.insert_many(batch).execute()
# 批量准备边数据
edges_data = []
for source, target, data in memory_edges:
try:
edges_data.append(
{
"source": source,
"target": target,
"strength": data.get("strength", 1),
"hash": self.hippocampus.calculate_edge_hash(source, target),
"created_time": data.get("created_time", current_time),
"last_modified": data.get("last_modified", current_time),
}
)
except Exception as e:
logger.error(f"准备边 {source}-{target} 数据时发生错误: {e}")
continue
# 批量准备边数据
edges_data = []
for source, target, data in memory_edges:
try:
edges_data.append(
{
"source": source,
"target": target,
"strength": data.get("strength", 1),
"hash": self.hippocampus.calculate_edge_hash(source, target),
"created_time": data.get("created_time", current_time),
"last_modified": data.get("last_modified", current_time),
}
)
except Exception as e:
logger.error(f"准备边 {source}-{target} 数据时发生错误: {e}")
continue
# 批量写入节点
node_start = time.time()
if nodes_data:
batch_size = 500 # 增加批量大小
for i in range(0, len(nodes_data), batch_size):
batch = nodes_data[i : i + batch_size]
session.execute(insert(GraphNodes), batch)
node_end = time.time()
logger.info(f"[数据库] 写入 {len(nodes_data)} 个节点耗时: {node_end - node_start:.2f}")
# 批量写入节点
node_start = time.time()
if nodes_data:
batch_size = 500 # 增加批量大小
for i in range(0, len(nodes_data), batch_size):
batch = nodes_data[i : i + batch_size]
session.execute(insert(GraphNodes), batch)
session.commit()
node_end = time.time()
logger.info(f"[数据库] 写入 {len(nodes_data)} 个节点耗时: {node_end - node_start:.2f}")
# 批量写入边
edge_start = time.time()
if edges_data:
batch_size = 500 # 增加批量大小
for i in range(0, len(edges_data), batch_size):
batch = edges_data[i : i + batch_size]
session.execute(insert(GraphEdges), batch)
session.commit()
# 批量写入
edge_start = time.time()
if edges_data:
batch_size = 500 # 增加批量大小
for i in range(0, len(edges_data), batch_size):
batch = edges_data[i : i + batch_size]
session.execute(insert(GraphEdges), batch)
session.commit()
edge_end = time.time()
logger.info(f"[数据库] 写入 {len(edges_data)} 条边耗时: {edge_end - edge_start:.2f}")
@@ -1251,89 +1244,79 @@ class EntorhinalCortex:
skipped_nodes = 0
# 从数据库加载所有节点
nodes = list(session.execute(select(GraphNodes)).scalars())
for node in nodes:
concept = node.concept
try:
# 处理空字符串或None的情况
if not node.memory_items or node.memory_items.strip() == "":
logger.warning(f"节点 {concept}memory_items为空,跳过")
skipped_nodes += 1
with get_db_session() as session:
nodes = list(session.execute(select(GraphNodes)).scalars())
for node in nodes:
concept = node.concept
try:
memory_items = json.loads(node.memory_items)
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
# 检查时间字段是否存在
if not node.created_time or not node.last_modified:
need_update = True
# 更新数据库中的节点
update_data = {}
if not node.created_time:
update_data["created_time"] = current_time
if not node.last_modified:
update_data["last_modified"] = current_time
session.execute(
update(GraphNodes).where(GraphNodes.concept == concept).values(**update_data)
)
# 获取时间信息(如果不存在则使用当前时间)
created_time = node.created_time or current_time
last_modified = node.last_modified or current_time
# 添加节点到图中
self.memory_graph.G.add_node(
concept, memory_items=memory_items, created_time=created_time, last_modified=last_modified
)
except Exception as e:
logger.error(f"加载节点 {concept} 时发生错误: {e}")
continue
# 直接使用memory_items
memory_items = node.memory_items.strip()
# 从数据库加载所有边
edges = list(session.execute(select(GraphEdges)).scalars())
for edge in edges:
source = edge.source
target = edge.target
strength = edge.strength
# 检查时间字段是否存在
if not node.created_time or not node.last_modified:
# 更新数据库中的节点
if not edge.created_time or not edge.last_modified:
need_update = True
# 更新数据库中的边
update_data = {}
if not node.created_time:
if not edge.created_time:
update_data["created_time"] = current_time
if not node.last_modified:
if not edge.last_modified:
update_data["last_modified"] = current_time
session.execute(
update(GraphNodes).where(GraphNodes.concept == concept).values(**update_data)
update(GraphEdges)
.where((GraphEdges.source == source) & (GraphEdges.target == target))
.values(**update_data)
)
session.commit()
# 获取时间信息(如果不存在则使用当前时间)
created_time = node.created_time or current_time
last_modified = node.last_modified or current_time
created_time = edge.created_time or current_time
last_modified = edge.last_modified or current_time
# 获取权重属性
weight = node.weight if hasattr(node, 'weight') and node.weight is not None else 1.0
# 添加节点到图中
self.memory_graph.G.add_node(
concept, memory_items=memory_items, weight=weight, created_time=created_time, last_modified=last_modified
)
loaded_nodes += 1
except Exception as e:
logger.error(f"加载节点 {concept} 时发生错误: {e}")
skipped_nodes += 1
continue
# 只有当源节点和目标节点都存在时才添加边
if source in self.memory_graph.G and target in self.memory_graph.G:
self.memory_graph.G.add_edge(
source, target, strength=strength, created_time=created_time, last_modified=last_modified
)
session.commit()
# 从数据库加载所有边
edges = list(session.execute(select(GraphEdges)).scalars())
for edge in edges:
source = edge.source
target = edge.target
strength = edge.strength
# 检查时间字段是否存在
if not edge.created_time or not edge.last_modified:
need_update = True
# 更新数据库中的边
update_data = {}
if not edge.created_time:
update_data["created_time"] = current_time
if not edge.last_modified:
update_data["last_modified"] = current_time
session.execute(
update(GraphEdges)
.where((GraphEdges.source == source) & (GraphEdges.target == target))
.values(**update_data)
)
session.commit()
# 获取时间信息(如果不存在则使用当前时间)
created_time = edge.created_time or current_time
last_modified = edge.last_modified or current_time
# 只有当源节点和目标节点都存在时才添加边
if source in self.memory_graph.G and target in self.memory_graph.G:
self.memory_graph.G.add_edge(
source, target, strength=strength, created_time=created_time, last_modified=last_modified
)
if need_update:
logger.info("[数据库] 已为缺失的时间字段进行补充")
# 输出加载统计信息
logger.info(f"[数据库] 记忆加载完成: 总计 {total_nodes} 个节点, 成功加载 {loaded_nodes} 个, 跳过 {skipped_nodes}")
if need_update:
logger.info("[数据库] 已为缺失的时间字段进行补充")
# 负责整合,遗忘,合并记忆

View File

@@ -11,12 +11,11 @@ from datetime import datetime, timedelta
from src.llm_models.utils_model import LLMRequest
from src.common.logger import get_logger
from src.common.database.sqlalchemy_models import Memory # SQLAlchemy Models导入
from src.common.database.sqlalchemy_database_api import get_session
from src.common.database.sqlalchemy_database_api import get_db_session
from src.config.config import model_config
from sqlalchemy import select
logger = get_logger(__name__)
session = get_session()
class MemoryItem:
def __init__(self, memory_id: str, chat_id: str, memory_text: str, keywords: list[str]):
@@ -113,7 +112,8 @@ class InstantMemory:
logger.info(f"不需要记忆:{text}")
async def store_memory(self, memory_item: MemoryItem):
memory = Memory(
with get_db_session() as session:
memory = Memory(
memory_id=memory_item.memory_id,
chat_id=memory_item.chat_id,
memory_text=memory_item.memory_text,
@@ -121,8 +121,8 @@ class InstantMemory:
create_time=memory_item.create_time,
last_view_time=memory_item.last_view_time,
)
session.add(memory)
session.commit()
session.add(memory)
session.commit()
async def get_memory(self, target: str):
from json_repair import repair_json
@@ -165,17 +165,18 @@ class InstantMemory:
logger.info(f"start_time: {start_time}, end_time: {end_time}")
# 检索包含关键词的记忆
memories_set = set()
if start_time and end_time:
start_ts = start_time.timestamp()
end_ts = end_time.timestamp()
query = session.execute(select(Memory).where(
(Memory.chat_id == self.chat_id)
& (Memory.create_time >= start_ts)
& (Memory.create_time < end_ts)
)).scalars()
else:
query = session.execute(select(Memory).where(Memory.chat_id == self.chat_id)).scalars()
with get_db_session() as session:
if start_time and end_time:
start_ts = start_time.timestamp()
end_ts = end_time.timestamp()
query = session.execute(select(Memory).where(
(Memory.chat_id == self.chat_id)
& (Memory.create_time >= start_ts)
& (Memory.create_time < end_ts)
)).scalars()
else:
query = session.execute(select(Memory).where(Memory.chat_id == self.chat_id)).scalars()
for mem in query:
# 对每条记忆
mem_keywords = mem.keywords or ""

View File

@@ -277,6 +277,7 @@ class ChatBot:
# logger.warning(f"检测到消息中含有违法,色情,暴力,反动,敏感内容,消息内容:{message.processed_plain_text},发送者:{message.message_info.user_info.user_nickname}")
# return
# 过滤检查
if _check_ban_words(message.processed_plain_text, chat, user_info) or _check_ban_regex( # type: ignore
message.raw_message, # type: ignore

View File

@@ -12,7 +12,7 @@ from sqlalchemy import select, text
from sqlalchemy.dialects.sqlite import insert as sqlite_insert
from sqlalchemy.dialects.mysql import insert as mysql_insert
from src.common.database.sqlalchemy_models import ChatStreams # 新增导入
from src.common.database.sqlalchemy_database_api import get_session
from src.common.database.sqlalchemy_database_api import get_db_session
from src.config.config import global_config # 新增导入
# 避免循环导入使用TYPE_CHECKING进行类型提示
if TYPE_CHECKING:
@@ -23,7 +23,6 @@ install(extra_lines=3)
logger = get_logger("chat_stream")
session = get_session()
class ChatMessageContext:
"""聊天消息上下文,存储消息的上下文信息"""
@@ -132,13 +131,14 @@ class ChatManager:
if not self._initialized:
self.streams: Dict[str, ChatStream] = {} # stream_id -> ChatStream
self.last_messages: Dict[str, "MessageRecv"] = {} # stream_id -> last_message
try:
db.connect(reuse_if_open=True)
# 确保 ChatStreams 表存在
session.execute(text("CREATE TABLE IF NOT EXISTS chat_streams (stream_id TEXT PRIMARY KEY, platform TEXT, create_time REAL, last_active_time REAL, user_platform TEXT, user_id TEXT, user_nickname TEXT, user_cardname TEXT, group_platform TEXT, group_id TEXT, group_name TEXT)"))
session.commit()
except Exception as e:
logger.error(f"数据库连接或 ChatStreams 表创建失败: {e}")
# try:
# with get_db_session() as session:
# db.connect(reuse_if_open=True)
# # 确保 ChatStreams 表存在
# session.execute(text("CREATE TABLE IF NOT EXISTS chat_streams (stream_id TEXT PRIMARY KEY, platform TEXT, create_time REAL, last_active_time REAL, user_platform TEXT, user_id TEXT, user_nickname TEXT, user_cardname TEXT, group_platform TEXT, group_id TEXT, group_name TEXT)"))
# session.commit()
# except Exception as e:
# logger.error(f"数据库连接或 ChatStreams 表创建失败: {e}")
self._initialized = True
# 在事件循环中启动初始化
@@ -236,7 +236,8 @@ class ChatManager:
# 检查数据库中是否存在
def _db_find_stream_sync(s_id: str):
return session.execute(select(ChatStreams).where(ChatStreams.stream_id == s_id)).scalar()
with get_db_session() as session:
return session.execute(select(ChatStreams).where(ChatStreams.stream_id == s_id)).scalar()
model_instance = await asyncio.to_thread(_db_find_stream_sync, stream_id)
@@ -331,44 +332,45 @@ class ChatManager:
stream_data_dict = stream.to_dict()
def _db_save_stream_sync(s_data_dict: dict):
user_info_d = s_data_dict.get("user_info")
group_info_d = s_data_dict.get("group_info")
with get_db_session() as session:
user_info_d = s_data_dict.get("user_info")
group_info_d = s_data_dict.get("group_info")
fields_to_save = {
"platform": s_data_dict["platform"],
"create_time": s_data_dict["create_time"],
"last_active_time": s_data_dict["last_active_time"],
"user_platform": user_info_d["platform"] if user_info_d else "",
"user_id": user_info_d["user_id"] if user_info_d else "",
"user_nickname": user_info_d["user_nickname"] if user_info_d else "",
"user_cardname": user_info_d.get("user_cardname", "") if user_info_d else None,
"group_platform": group_info_d["platform"] if group_info_d else "",
"group_id": group_info_d["group_id"] if group_info_d else "",
"group_name": group_info_d["group_name"] if group_info_d else "",
}
fields_to_save = {
"platform": s_data_dict["platform"],
"create_time": s_data_dict["create_time"],
"last_active_time": s_data_dict["last_active_time"],
"user_platform": user_info_d["platform"] if user_info_d else "",
"user_id": user_info_d["user_id"] if user_info_d else "",
"user_nickname": user_info_d["user_nickname"] if user_info_d else "",
"user_cardname": user_info_d.get("user_cardname", "") if user_info_d else None,
"group_platform": group_info_d["platform"] if group_info_d else "",
"group_id": group_info_d["group_id"] if group_info_d else "",
"group_name": group_info_d["group_name"] if group_info_d else "",
}
# 根据数据库类型选择插入语句
if global_config.database.database_type == "sqlite":
stmt = sqlite_insert(ChatStreams).values(stream_id=s_data_dict["stream_id"], **fields_to_save)
stmt = stmt.on_conflict_do_update(
index_elements=['stream_id'],
set_=fields_to_save
)
elif global_config.database.database_type == "mysql":
stmt = mysql_insert(ChatStreams).values(stream_id=s_data_dict["stream_id"], **fields_to_save)
stmt = stmt.on_duplicate_key_update(
**{key: value for key, value in fields_to_save.items() if key != "stream_id"}
)
else:
# 默认使用通用插入尝试SQLite语法
stmt = sqlite_insert(ChatStreams).values(stream_id=s_data_dict["stream_id"], **fields_to_save)
stmt = stmt.on_conflict_do_update(
index_elements=['stream_id'],
set_=fields_to_save
)
# 根据数据库类型选择插入语句
if global_config.database.database_type == "sqlite":
stmt = sqlite_insert(ChatStreams).values(stream_id=s_data_dict["stream_id"], **fields_to_save)
stmt = stmt.on_conflict_do_update(
index_elements=['stream_id'],
set_=fields_to_save
)
elif global_config.database.database_type == "mysql":
stmt = mysql_insert(ChatStreams).values(stream_id=s_data_dict["stream_id"], **fields_to_save)
stmt = stmt.on_duplicate_key_update(
**{key: value for key, value in fields_to_save.items() if key != "stream_id"}
)
else:
# 默认使用通用插入尝试SQLite语法
stmt = sqlite_insert(ChatStreams).values(stream_id=s_data_dict["stream_id"], **fields_to_save)
stmt = stmt.on_conflict_do_update(
index_elements=['stream_id'],
set_=fields_to_save
)
session.execute(stmt)
session.commit()
session.execute(stmt)
session.commit()
try:
await asyncio.to_thread(_db_save_stream_sync, stream_data_dict)
@@ -387,30 +389,32 @@ class ChatManager:
def _db_load_all_streams_sync():
loaded_streams_data = []
for model_instance in session.execute(select(ChatStreams)).scalars():
user_info_data = {
"platform": model_instance.user_platform,
"user_id": model_instance.user_id,
"user_nickname": model_instance.user_nickname,
"user_cardname": model_instance.user_cardname or "",
}
group_info_data = None
if model_instance.group_id:
group_info_data = {
"platform": model_instance.group_platform,
"group_id": model_instance.group_id,
"group_name": model_instance.group_name,
with get_db_session() as session:
for model_instance in session.execute(select(ChatStreams)).scalars():
user_info_data = {
"platform": model_instance.user_platform,
"user_id": model_instance.user_id,
"user_nickname": model_instance.user_nickname,
"user_cardname": model_instance.user_cardname or "",
}
group_info_data = None
if model_instance.group_id:
group_info_data = {
"platform": model_instance.group_platform,
"group_id": model_instance.group_id,
"group_name": model_instance.group_name,
}
data_for_from_dict = {
"stream_id": model_instance.stream_id,
"platform": model_instance.platform,
"user_info": user_info_data,
"group_info": group_info_data,
"create_time": model_instance.create_time,
"last_active_time": model_instance.last_active_time,
}
loaded_streams_data.append(data_for_from_dict)
data_for_from_dict = {
"stream_id": model_instance.stream_id,
"platform": model_instance.platform,
"user_info": user_info_data,
"group_info": group_info_data,
"create_time": model_instance.create_time,
"last_active_time": model_instance.last_active_time,
}
loaded_streams_data.append(data_for_from_dict)
session.commit()
return loaded_streams_data
try:

View File

@@ -8,7 +8,7 @@ from src.common.database.sqlalchemy_models import Messages, Images
from src.common.logger import get_logger
from .chat_stream import ChatStream
from .message import MessageSending, MessageRecv
from src.common.database.sqlalchemy_database_api import get_session
from src.common.database.sqlalchemy_database_api import get_db_session
from sqlalchemy import select, update, desc
logger = get_logger("message_storage")
@@ -95,7 +95,6 @@ class MessageStorage:
priority_info_json = json.dumps(priority_info) if priority_info else None
# 获取数据库会话
session = get_session()
new_message = Messages(
message_id=msg_id,
@@ -132,8 +131,10 @@ class MessageStorage:
key_words_lite=key_words_lite,
selected_expressions=selected_expressions,
)
session.add(new_message)
session.commit()
with get_db_session() as session:
session.add(new_message)
session.commit()
except Exception:
logger.exception("存储消息失败")
logger.error(f"消息:{message}")
@@ -183,7 +184,8 @@ class MessageStorage:
session.execute(
update(Messages).where(Messages.id == matched_message.id).values(message_id=qq_message_id)
)
# session.commit() 会在上下文管理器中自动调用
session.commit()
# 会在上下文管理器中自动调用
logger.debug(f"更新消息ID成功: {matched_message.message_id} -> {qq_message_id}")
else:
logger.warning(f"未找到匹配的消息记录: {mmc_message_id}")
@@ -212,6 +214,7 @@ class MessageStorage:
image_record = session.execute(
select(Images).where(Images.description == description).order_by(desc(Images.timestamp))
).scalar()
session.commit()
return f"[picid:{image_record.image_id}]" if image_record else match.group(0)
except Exception:
return match.group(0)

View File

@@ -10,7 +10,7 @@ from src.common.message_repository import find_messages, count_messages
from src.common.database.sqlalchemy_models import ActionRecords, Images
from src.person_info.person_info import PersonInfoManager, get_person_info_manager
from src.chat.utils.utils import translate_timestamp_to_human_readable, assign_message_ids
from src.common.database.sqlalchemy_database_api import get_session
from src.common.database.sqlalchemy_database_api import get_db_session
from sqlalchemy import select, and_
install(extra_lines=3)

View File

@@ -432,7 +432,7 @@ class StatisticOutputTask(AsyncTask):
stats[period_key][COST_BY_MODULE][module_name] += cost
# 收集time_cost数据
time_cost = record.time_cost or 0.0
time_cost = record.get('time_cost') or 0.0
if time_cost > 0: # 只记录有效的time_cost
stats[period_key][TIME_COST_BY_TYPE][request_type].append(time_cost)
stats[period_key][TIME_COST_BY_USER][user_id].append(time_cost)

View File

@@ -41,12 +41,12 @@ class ImageManager:
self._initialized = True
self.vlm = LLMRequest(model_set=model_config.model_task_config.vlm, request_type="image")
try:
db.connect(reuse_if_open=True)
# 使用SQLAlchemy创建表已在初始化时完成
logger.debug("使用SQLAlchemy进行表管理")
except Exception as e:
logger.error(f"数据库连接失败: {e}")
# try:
# db.connect(reuse_if_open=True)
# # 使用SQLAlchemy创建表已在初始化时完成
# logger.debug("使用SQLAlchemy进行表管理")
# except Exception as e:
# logger.error(f"数据库连接失败: {e}")
self._initialized = True
@@ -105,7 +105,8 @@ class ImageManager:
timestamp=current_timestamp
)
session.add(new_desc)
# session.commit() 会在上下文管理器中自动调用
session.commit()
# 会在上下文管理器中自动调用
except Exception as e:
logger.error(f"保存描述到数据库失败 (SQLAlchemy): {str(e)}")
@@ -249,7 +250,8 @@ class ImageManager:
timestamp=current_timestamp,
)
session.add(new_img)
# session.commit() 会在上下文管理器中自动调用
session.commit()
# 会在上下文管理器中自动调用
except Exception as e:
logger.error(f"保存到Images表失败: {str(e)}")
@@ -326,7 +328,7 @@ class ImageManager:
existing_image.image_id = str(uuid.uuid4())
if not hasattr(existing_image, "vlm_processed") or existing_image.vlm_processed is None:
existing_image.vlm_processed = True
session.commit()
logger.debug(f"[数据库] 更新已有图片记录: {image_hash[:8]}...")
else:
new_img = Images(
@@ -340,7 +342,7 @@ class ImageManager:
count=1,
)
session.add(new_img)
session.commit()
logger.debug(f"[数据库] 创建新图片记录: {image_hash[:8]}...")
except Exception as e:
logger.error(f"保存图片文件或元数据失败: {str(e)}")
@@ -514,35 +516,35 @@ class ImageManager:
existing_image.vlm_processed = False
existing_image.count += 1
session.commit()
return existing_image.image_id, f"[picid:{existing_image.image_id}]"
# print(f"图片不存在: {image_hash}")
image_id = str(uuid.uuid4())
# print(f"图片不存在: {image_hash}")
image_id = str(uuid.uuid4())
# 保存新图片
current_timestamp = time.time()
image_dir = os.path.join(self.IMAGE_DIR, "images")
os.makedirs(image_dir, exist_ok=True)
filename = f"{image_id}.png"
file_path = os.path.join(image_dir, filename)
# 保存新图片
current_timestamp = time.time()
image_dir = os.path.join(self.IMAGE_DIR, "images")
os.makedirs(image_dir, exist_ok=True)
filename = f"{image_id}.png"
file_path = os.path.join(image_dir, filename)
# 保存文件
with open(file_path, "wb") as f:
f.write(image_bytes)
# 保存文件
with open(file_path, "wb") as f:
f.write(image_bytes)
# 保存到数据库
new_img = Images(
image_id=image_id,
emoji_hash=image_hash,
path=file_path,
type="image",
timestamp=current_timestamp,
vlm_processed=False,
count=1,
)
session.add(new_img)
session.commit()
# 保存到数据库
new_img = Images(
image_id=image_id,
emoji_hash=image_hash,
path=file_path,
type="image",
timestamp=current_timestamp,
vlm_processed=False,
count=1,
)
session.add(new_img)
session.commit()
# 启动异步VLM处理
asyncio.create_task(self._process_image_with_vlm(image_id, image_base64))
@@ -584,7 +586,7 @@ class ImageManager:
logger.debug(f"[缓存复用] 从其他相同图片记录复用描述: {existing_with_description.description[:50]}...")
image.description = existing_with_description.description
image.vlm_processed = True
session.commit()
# 同时保存到ImageDescriptions表作为备用缓存
self._save_description_to_db(image_hash, existing_with_description.description, "image")
return
@@ -594,7 +596,7 @@ class ImageManager:
logger.debug(f"[缓存复用] 从ImageDescriptions表复用描述: {cached_description[:50]}...")
image.description = cached_description
image.vlm_processed = True
session.commit()
return
# 获取图片格式