fix:修复一处关系构建错误,修复一处表达方式错误

This commit is contained in:
SengokuCola
2025-07-26 09:28:14 +08:00
parent 7b5520a667
commit 16e238a1c8
3 changed files with 95 additions and 112 deletions

View File

@@ -330,48 +330,8 @@ class ExpressionLearner:
"""
current_time = time.time()
# 全局衰减所有已存储的表达方式
for type in ["style", "grammar"]:
base_dir = os.path.join("data", "expression", f"learnt_{type}")
if not os.path.exists(base_dir):
logger.debug(f"目录不存在,跳过衰减: {base_dir}")
continue
try:
chat_ids = os.listdir(base_dir)
logger.debug(f"{base_dir} 中找到 {len(chat_ids)} 个聊天ID目录进行衰减")
except Exception as e:
logger.error(f"读取目录失败 {base_dir}: {e}")
continue
for chat_id in chat_ids:
file_path = os.path.join(base_dir, chat_id, "expressions.json")
if not os.path.exists(file_path):
continue
try:
with open(file_path, "r", encoding="utf-8") as f:
expressions = json.load(f)
if not isinstance(expressions, list):
logger.warning(f"表达方式文件格式错误,跳过衰减: {file_path}")
continue
# 应用全局衰减
decayed_expressions = self.apply_decay_to_expressions(expressions, current_time)
# 保存衰减后的结果
with open(file_path, "w", encoding="utf-8") as f:
json.dump(decayed_expressions, f, ensure_ascii=False, indent=2)
logger.debug(f"已对 {file_path} 应用衰减,剩余 {len(decayed_expressions)} 个表达方式")
except json.JSONDecodeError as e:
logger.error(f"JSON解析失败跳过衰减 {file_path}: {e}")
except PermissionError as e:
logger.error(f"权限不足,无法更新 {file_path}: {e}")
except Exception as e:
logger.error(f"全局衰减{type}表达方式失败 {file_path}: {e}")
continue
# 全局衰减所有已存储的表达方式(直接操作数据库)
self._apply_global_decay_to_database(current_time)
learnt_style: Optional[List[Tuple[str, str, str]]] = []
learnt_grammar: Optional[List[Tuple[str, str, str]]] = []
@@ -388,6 +348,42 @@ class ExpressionLearner:
return learnt_style, learnt_grammar
def _apply_global_decay_to_database(self, current_time: float) -> None:
"""
对数据库中的所有表达方式应用全局衰减
"""
try:
# 获取所有表达方式
all_expressions = Expression.select()
updated_count = 0
deleted_count = 0
for expr in all_expressions:
# 计算时间差
last_active = expr.last_active_time
time_diff_days = (current_time - last_active) / (24 * 3600) # 转换为天
# 计算衰减值
decay_value = self.calculate_decay_factor(time_diff_days)
new_count = max(0.01, expr.count - decay_value)
if new_count <= 0.01:
# 如果count太小删除这个表达方式
expr.delete_instance()
deleted_count += 1
else:
# 更新count
expr.count = new_count
expr.save()
updated_count += 1
if updated_count > 0 or deleted_count > 0:
logger.info(f"全局衰减完成:更新了 {updated_count} 个表达方式,删除了 {deleted_count} 个表达方式")
except Exception as e:
logger.error(f"数据库全局衰减失败: {e}")
def calculate_decay_factor(self, time_diff_days: float) -> float:
"""
计算衰减值
@@ -410,30 +406,6 @@ class ExpressionLearner:
return min(0.01, decay)
def apply_decay_to_expressions(
self, expressions: List[Dict[str, Any]], current_time: float
) -> List[Dict[str, Any]]:
"""
对表达式列表应用衰减
返回衰减后的表达式列表移除count小于0的项
"""
result = []
for expr in expressions:
# 确保last_active_time存在如果不存在则使用current_time
if "last_active_time" not in expr:
expr["last_active_time"] = current_time
last_active = expr["last_active_time"]
time_diff_days = (current_time - last_active) / (24 * 3600) # 转换为天
decay_value = self.calculate_decay_factor(time_diff_days)
expr["count"] = max(0.01, expr.get("count", 1) - decay_value)
if expr["count"] > 0:
result.append(expr)
return result
async def learn_and_store(self, type: str, num: int = 10) -> List[Tuple[str, str, str]]:
# sourcery skip: use-join
"""

View File

@@ -2,7 +2,7 @@ import json
import time
import random
from typing import List, Dict, Tuple, Optional
from typing import List, Dict, Tuple, Optional, Any
from json_repair import repair_json
from src.llm_models.utils_model import LLMRequest
@@ -117,36 +117,42 @@ class ExpressionSelector:
def get_random_expressions(
self, chat_id: str, total_num: int, style_percentage: float, grammar_percentage: float
) -> Tuple[List[Dict[str, str]], List[Dict[str, str]]]:
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
# 支持多chat_id合并抽选
related_chat_ids = self.get_related_chat_ids(chat_id)
style_exprs = []
grammar_exprs = []
for cid in related_chat_ids:
style_query = Expression.select().where((Expression.chat_id == cid) & (Expression.type == "style"))
grammar_query = Expression.select().where((Expression.chat_id == cid) & (Expression.type == "grammar"))
style_exprs.extend([
{
"situation": expr.situation,
"style": expr.style,
"count": expr.count,
"last_active_time": expr.last_active_time,
"source_id": cid,
"type": "style",
"create_date": expr.create_date if expr.create_date is not None else expr.last_active_time,
} for expr in style_query
])
grammar_exprs.extend([
{
"situation": expr.situation,
"style": expr.style,
"count": expr.count,
"last_active_time": expr.last_active_time,
"source_id": cid,
"type": "grammar",
"create_date": expr.create_date if expr.create_date is not None else expr.last_active_time,
} for expr in grammar_query
])
# 优化一次性查询所有相关chat_id的表达方式
style_query = Expression.select().where(
(Expression.chat_id.in_(related_chat_ids)) & (Expression.type == "style")
)
grammar_query = Expression.select().where(
(Expression.chat_id.in_(related_chat_ids)) & (Expression.type == "grammar")
)
style_exprs = [
{
"situation": expr.situation,
"style": expr.style,
"count": expr.count,
"last_active_time": expr.last_active_time,
"source_id": expr.chat_id,
"type": "style",
"create_date": expr.create_date if expr.create_date is not None else expr.last_active_time,
} for expr in style_query
]
grammar_exprs = [
{
"situation": expr.situation,
"style": expr.style,
"count": expr.count,
"last_active_time": expr.last_active_time,
"source_id": expr.chat_id,
"type": "grammar",
"create_date": expr.create_date if expr.create_date is not None else expr.last_active_time,
} for expr in grammar_query
]
style_num = int(total_num * style_percentage)
grammar_num = int(total_num * grammar_percentage)
# 按权重抽样使用count作为权重
@@ -162,7 +168,7 @@ class ExpressionSelector:
selected_grammar = []
return selected_style, selected_grammar
def update_expressions_count_batch(self, expressions_to_update: List[Dict[str, str]], increment: float = 0.1):
def update_expressions_count_batch(self, expressions_to_update: List[Dict[str, Any]], increment: float = 0.1):
"""对一批表达方式更新count值按chat_id+type分组后一次性写入数据库"""
if not expressions_to_update:
return
@@ -203,7 +209,7 @@ class ExpressionSelector:
max_num: int = 10,
min_num: int = 5,
target_message: Optional[str] = None,
) -> List[Dict[str, str]]:
) -> List[Dict[str, Any]]:
# sourcery skip: inline-variable, list-comprehension
"""使用LLM选择适合的表达方式"""

View File

@@ -139,7 +139,7 @@ class RelationshipManager:
请用json格式输出引起了你的兴趣或者有什么需要你记忆的点。
并为每个点赋予1-10的权重权重越高表示越重要。
格式如下:
{{
[
{{
"point": "{person_name}想让我记住他的生日我回答确认了他的生日是11月23日",
"weight": 10
@@ -156,13 +156,10 @@ class RelationshipManager:
"point": "{person_name}喜欢吃辣具体来说没有辣的食物ta都不喜欢吃可能是因为ta是湖南人。",
"weight": 7
}}
}}
]
如果没有就输出none,或points为空
{{
"point": "none",
"weight": 0
}}
如果没有就输出none,或返回空数组
[]
"""
# 调用LLM生成印象
@@ -184,17 +181,25 @@ class RelationshipManager:
try:
points = repair_json(points)
points_data = json.loads(points)
if points_data == "none" or not points_data or points_data.get("point") == "none":
# 只处理正确的格式,错误格式直接跳过
if points_data == "none" or not points_data:
points_list = []
elif isinstance(points_data, str) and points_data.lower() == "none":
points_list = []
elif isinstance(points_data, list):
# 正确格式:数组格式 [{"point": "...", "weight": 10}, ...]
if not points_data: # 空数组
points_list = []
else:
points_list = [(item["point"], float(item["weight"]), current_time) for item in points_data]
else:
# logger.info(f"points_data: {points_data}")
if isinstance(points_data, dict) and "points" in points_data:
points_data = points_data["points"]
if not isinstance(points_data, list):
points_data = [points_data]
# 添加可读时间到每个point
points_list = [(item["point"], float(item["weight"]), current_time) for item in points_data]
# 错误格式,直接跳过不解析
logger.warning(f"LLM返回了错误的JSON格式跳过解析: {type(points_data)}, 内容: {points_data}")
points_list = []
# 权重过滤逻辑
if points_list:
original_points_list = list(points_list)
points_list.clear()
discarded_count = 0