better:重构personinfo,使用Person类和类属性

This commit is contained in:
SengokuCola
2025-08-12 14:33:13 +08:00
parent 1e7f3a92a6
commit ae254de494
21 changed files with 468 additions and 1202 deletions

View File

@@ -1,11 +1,11 @@
import copy
import hashlib
import datetime
import asyncio
import json
import time
import random
from json_repair import repair_json
from typing import Any, Callable, Dict, Union, Optional
from typing import Union
from src.common.logger import get_logger
from src.common.database.database import db
@@ -14,45 +14,276 @@ from src.llm_models.utils_model import LLMRequest
from src.config.config import global_config, model_config
"""
PersonInfoManager 类方法功能摘要:
1. get_person_id - 根据平台和用户ID生成MD5哈希的唯一person_id
2. create_person_info - 创建新个人信息文档(自动合并默认值)
3. update_one_field - 更新单个字段值(若文档不存在则创建)
4. del_one_document - 删除指定person_id的文档
5. get_value - 获取单个字段值(返回实际值或默认值)
6. get_values - 批量获取字段值(任一字段无效则返回空字典)
7. del_all_undefined_field - 清理全集合中未定义的字段
8. get_specific_value_list - 根据指定条件返回person_id,value字典
"""
logger = get_logger("person_info")
JSON_SERIALIZED_FIELDS = ["points"]
def get_person_id(platform: str, user_id: Union[int, str]) -> str:
"""获取唯一id"""
if "-" in platform:
platform = platform.split("-")[1]
components = [platform, str(user_id)]
key = "_".join(components)
return hashlib.md5(key.encode()).hexdigest()
person_info_default = {
"person_id": None,
"person_name": None,
"name_reason": None, # Corrected from person_name_reason to match common usage if intended
"platform": "unknown",
"user_id": "unknown",
"nickname": "Unknown",
"know_times": 0,
"know_since": None,
"last_know": None,
"attitude_to_me": "0,1",
"friendly_value": 50,
"rudeness":50,
"neuroticism":"5,1",
"conscientiousness": 50,
"likeness": 50,
"points": None,
}
def get_person_id_by_person_name(person_name: str) -> str:
"""根据用户名获取用户ID"""
try:
record = PersonInfo.get_or_none(PersonInfo.person_name == person_name)
return record.person_id if record else ""
except Exception as e:
logger.error(f"根据用户名 {person_name} 获取用户ID时出错 (Peewee): {e}")
return ""
class Person:
def __init__(self, platform: str = "", user_id: str = "",person_id: str = "",person_name: str = "",nickname: str = ""):
if person_id:
self.person_id = person_id
elif person_name:
self.person_id = get_person_id_by_person_name(person_name)
if not self.person_id:
logger.error(f"根据用户名 {person_name} 获取用户ID时出错不存在用户{person_name}")
return ""
elif platform and user_id:
self.person_id = get_person_id(platform, user_id)
else:
logger.error("Person 初始化失败,缺少必要参数")
return ""
self.is_known = False
self.platform = platform
self.user_id = user_id
# 初始化默认值
self.nickname = nickname
self.person_name = None
self.name_reason = None
self.know_times = 0
self.know_since = None
self.last_know = None
self.points = []
# 初始化性格特征相关字段
self.attitude_to_me:float = 0
self.attitude_to_me_confidence:float = 1
self.neuroticism:float = 5
self.neuroticism_confidence:float = 1
self.friendly_value:float = 50
self.friendly_value_confidence:float = 1
self.rudeness:float = 50
self.rudeness_confidence:float = 1
self.conscientiousness:float = 50
self.conscientiousness_confidence:float = 1
self.likeness:float = 50
self.likeness_confidence:float = 1
# 从数据库加载数据
self.load_from_database()
def load_from_database(self):
"""从数据库加载个人信息数据"""
try:
# 查询数据库中的记录
record = PersonInfo.get_or_none(PersonInfo.person_id == self.person_id)
if record:
self.is_known = record.is_known if record.is_known else False
self.nickname = record.nickname if record.nickname else self.nickname
if not self.is_known:
if self.nickname:
self.is_known = True
self.person_name = self.nickname
logger.info(f"用户 {self.person_id} 已认识,昵称:{self.nickname}")
else:
logger.warning(f"用户 {self.person_id} 尚未认识,昵称为空")
else:
self.person_name = record.person_name if record.person_name else self.nickname
self.name_reason = record.name_reason if record.name_reason else None
self.know_times = record.know_times if record.know_times else 0
self.know_since = record.know_since if record.know_since else time.time()
self.last_know = record.last_know if record.last_know else time.time()
# 处理points字段JSON格式的列表
if record.points:
try:
self.points = json.loads(record.points)
except (json.JSONDecodeError, TypeError):
logger.warning(f"解析用户 {self.person_id} 的points字段失败使用默认值")
self.points = []
else:
self.points = []
# 加载性格特征相关字段
if record.attitude_to_me and not isinstance(record.attitude_to_me, str):
self.attitude_to_me = record.attitude_to_me
if record.attitude_to_me_confidence is not None:
self.attitude_to_me_confidence = float(record.attitude_to_me_confidence)
if record.friendly_value is not None:
self.friendly_value = float(record.friendly_value)
if record.friendly_value_confidence is not None:
self.friendly_value_confidence = float(record.friendly_value_confidence)
if record.rudeness is not None:
self.rudeness = float(record.rudeness)
if record.rudeness_confidence is not None:
self.rudeness_confidence = float(record.rudeness_confidence)
if record.neuroticism and not isinstance(record.neuroticism, str):
self.neuroticism = float(record.neuroticism)
if record.neuroticism_confidence is not None:
self.neuroticism_confidence = float(record.neuroticism_confidence)
if record.conscientiousness is not None:
self.conscientiousness = float(record.conscientiousness)
if record.conscientiousness_confidence is not None:
self.conscientiousness_confidence = float(record.conscientiousness_confidence)
if record.likeness is not None:
self.likeness = float(record.likeness)
if record.likeness_confidence is not None:
self.likeness_confidence = float(record.likeness_confidence)
logger.info(f"已从数据库加载用户 {self.person_id} 的信息")
else:
self.sync_to_database()
logger.info(f"用户 {self.person_id} 在数据库中不存在,使用默认值并创建")
except Exception as e:
logger.error(f"从数据库加载用户 {self.person_id} 信息时出错: {e}")
# 出错时保持默认值
def sync_to_database(self):
"""将所有属性同步回数据库"""
try:
# 准备数据
data = {
'person_id': self.person_id,
'is_known': self.is_known,
'platform': self.platform,
'user_id': self.user_id,
'nickname': self.nickname,
'person_name': self.person_name,
'name_reason': self.name_reason,
'know_times': self.know_times,
'know_since': self.know_since,
'last_know': self.last_know,
'points': json.dumps(self.points, ensure_ascii=False) if self.points else json.dumps([], ensure_ascii=False),
'attitude_to_me': self.attitude_to_me,
'attitude_to_me_confidence': self.attitude_to_me_confidence,
'friendly_value': self.friendly_value,
'friendly_value_confidence': self.friendly_value_confidence,
'rudeness': self.rudeness,
'rudeness_confidence': self.rudeness_confidence,
'neuroticism': self.neuroticism,
'neuroticism_confidence': self.neuroticism_confidence,
'conscientiousness': self.conscientiousness,
'conscientiousness_confidence': self.conscientiousness_confidence,
'likeness': self.likeness,
'likeness_confidence': self.likeness_confidence,
}
# 检查记录是否存在
record = PersonInfo.get_or_none(PersonInfo.person_id == self.person_id)
if record:
# 更新现有记录
for field, value in data.items():
if hasattr(record, field):
setattr(record, field, value)
record.save()
logger.debug(f"已同步用户 {self.person_id} 的信息到数据库")
else:
# 创建新记录
PersonInfo.create(**data)
logger.debug(f"已创建用户 {self.person_id} 的信息到数据库")
except Exception as e:
logger.error(f"同步用户 {self.person_id} 信息到数据库时出错: {e}")
def build_relationship(self,points_num=3):
if not self.is_known:
return ""
# 按时间排序forgotten_points
current_points = self.points
current_points.sort(key=lambda x: x[2])
# 按权重加权随机抽取最多3个不重复的pointspoint[1]的值在1-10之间权重越高被抽到概率越大
if len(current_points) > points_num:
# point[1] 取值范围1-10直接作为权重
weights = [max(1, min(10, int(point[1]))) for point in current_points]
# 使用加权采样不放回,保证不重复
indices = list(range(len(current_points)))
points = []
for _ in range(points_num):
if not indices:
break
sub_weights = [weights[i] for i in indices]
chosen_idx = random.choices(indices, weights=sub_weights, k=1)[0]
points.append(current_points[chosen_idx])
indices.remove(chosen_idx)
else:
points = current_points
# 构建points文本
points_text = "\n".join([f"{point[2]}{point[0]}" for point in points])
nickname_str = ""
if self.person_name != nickname_str:
nickname_str = f"(ta在{self.platform}上的昵称是{nickname_str})"
relation_info = ""
attitude_info = ""
if self.attitude_to_me:
if self.attitude_to_me > 8:
attitude_info = f"{self.person_name}对你的态度十分好,"
elif self.attitude_to_me > 5:
attitude_info = f"{self.person_name}对你的态度较好,"
if self.attitude_to_me < -8:
attitude_info = f"{self.person_name}对你的态度十分恶劣,"
elif self.attitude_to_me < -4:
attitude_info = f"{self.person_name}对你的态度不好,"
elif self.attitude_to_me < 0:
attitude_info = f"{self.person_name}对你的态度一般,"
neuroticism_info = ""
if self.neuroticism:
if self.neuroticism > 8:
neuroticism_info = f"{self.person_name}的情绪十分活跃,容易情绪化,"
elif self.neuroticism > 6:
neuroticism_info = f"{self.person_name}的情绪比较活跃,"
elif self.neuroticism > 4:
neuroticism_info = ""
elif self.neuroticism > 2:
neuroticism_info = f"{self.person_name}的情绪比较稳定,"
else:
neuroticism_info = f"{self.person_name}的情绪非常稳定,毫无波动"
points_info = ""
if points_text:
points_info = f"你还记得ta最近做的事{points_text}"
relation_info = f"{self.person_name}:{nickname_str}{attitude_info}{neuroticism_info}{points_info}"
return relation_info
class PersonInfoManager:
def __init__(self):
self.person_name_list = {}
self.qv_name_llm = LLMRequest(model_set=model_config.model_task_config.utils, request_type="relation.qv_name")
try:
@@ -77,158 +308,11 @@ class PersonInfoManager:
logger.debug(f"已加载 {len(self.person_name_list)} 个用户名称 (Peewee)")
except Exception as e:
logger.error(f"从 Peewee 加载 person_name_list 失败: {e}")
def get_person(self, platform: str, user_id: Union[int, str]) -> Person:
person = Person(platform, user_id)
return person
@staticmethod
def get_person_id(platform: str, user_id: Union[int, str]) -> str:
"""获取唯一id"""
# 添加空值检查,防止 platform 为 None 时出错
if platform is None:
platform = "unknown"
elif "-" in platform:
platform = platform.split("-")[1]
components = [platform, str(user_id)]
key = "_".join(components)
return hashlib.md5(key.encode()).hexdigest()
async def is_person_known(self, platform: str, user_id: int):
"""判断是否认识某人"""
person_id = self.get_person_id(platform, user_id)
def _db_check_known_sync(p_id: str):
return PersonInfo.get_or_none(PersonInfo.person_id == p_id) is not None
try:
return await asyncio.to_thread(_db_check_known_sync, person_id)
except Exception as e:
logger.error(f"检查用户 {person_id} 是否已知时出错 (Peewee): {e}")
return False
def get_person_id_by_person_name(self, person_name: str) -> str:
"""根据用户名获取用户ID"""
try:
record = PersonInfo.get_or_none(PersonInfo.person_name == person_name)
return record.person_id if record else ""
except Exception as e:
logger.error(f"根据用户名 {person_name} 获取用户ID时出错 (Peewee): {e}")
return ""
async def _safe_create_person_info(self, person_id: str, data: Optional[dict] = None):
"""安全地创建用户信息,处理竞态条件"""
if not person_id:
logger.debug("创建失败person_id不存在")
return
_person_info_default = copy.deepcopy(person_info_default)
model_fields = PersonInfo._meta.fields.keys() # type: ignore
final_data = {"person_id": person_id}
# Start with defaults for all model fields
for key, default_value in _person_info_default.items():
if key in model_fields:
final_data[key] = default_value
# Override with provided data
if data:
for key, value in data.items():
if key in model_fields:
final_data[key] = value
# Ensure person_id is correctly set from the argument
final_data["person_id"] = person_id
# Serialize JSON fields
for key in JSON_SERIALIZED_FIELDS:
if key in final_data:
if isinstance(final_data[key], (list, dict)):
final_data[key] = json.dumps(final_data[key], ensure_ascii=False)
elif final_data[key] is None: # Default for lists is [], store as "[]"
final_data[key] = json.dumps([], ensure_ascii=False)
def _db_safe_create_sync(p_data: dict):
try:
# 首先检查是否已存在
existing = PersonInfo.get_or_none(PersonInfo.person_id == p_data["person_id"])
if existing:
logger.debug(f"用户 {p_data['person_id']} 已存在,跳过创建")
return True
# 尝试创建
PersonInfo.create(**p_data)
return True
except Exception as e:
if "UNIQUE constraint failed" in str(e):
logger.debug(f"检测到并发创建用户 {p_data.get('person_id')},跳过错误")
return True # 其他协程已创建,视为成功
else:
logger.error(f"创建 PersonInfo 记录 {p_data.get('person_id')} 失败 (Peewee): {e}")
return False
await asyncio.to_thread(_db_safe_create_sync, final_data)
async def update_one_field(self, person_id: str, field_name: str, value, data: Optional[Dict] = None):
"""更新某一个字段,会补全"""
if field_name not in PersonInfo._meta.fields: # type: ignore
logger.debug(f"更新'{field_name}'失败,未在 PersonInfo Peewee 模型中定义的字段。")
return
processed_value = value
if field_name in JSON_SERIALIZED_FIELDS:
if isinstance(value, (list, dict)):
processed_value = json.dumps(value, ensure_ascii=False, indent=None)
elif value is None: # Store None as "[]" for JSON list fields
processed_value = json.dumps([], ensure_ascii=False, indent=None)
def _db_update_sync(p_id: str, f_name: str, val_to_set):
import time
start_time = time.time()
try:
record = PersonInfo.get_or_none(PersonInfo.person_id == p_id)
query_time = time.time()
if record:
setattr(record, f_name, val_to_set)
record.save()
save_time = time.time()
total_time = save_time - start_time
if total_time > 0.5: # 如果超过500ms就记录日志
logger.warning(
f"数据库更新操作耗时 {total_time:.3f}秒 (查询: {query_time - start_time:.3f}s, 保存: {save_time - query_time:.3f}s) person_id={p_id}, field={f_name}"
)
return True, False # Found and updated, no creation needed
else:
total_time = time.time() - start_time
if total_time > 0.5:
logger.warning(f"数据库查询操作耗时 {total_time:.3f}秒 person_id={p_id}, field={f_name}")
return False, True # Not found, needs creation
except Exception as e:
total_time = time.time() - start_time
logger.error(f"数据库操作异常,耗时 {total_time:.3f}秒: {e}")
raise
found, needs_creation = await asyncio.to_thread(_db_update_sync, person_id, field_name, processed_value)
if needs_creation:
logger.info(f"{person_id} 不存在,将新建。")
creation_data = data if data is not None else {}
# Ensure platform and user_id are present for context if available from 'data'
# but primarily, set the field that triggered the update.
# The create_person_info will handle defaults and serialization.
creation_data[field_name] = value # Pass original value to create_person_info
# Ensure platform and user_id are in creation_data if available,
# otherwise create_person_info will use defaults.
if data and "platform" in data:
creation_data["platform"] = data["platform"]
if data and "user_id" in data:
creation_data["user_id"] = data["user_id"]
# 使用安全的创建方法,处理竞态条件
await self._safe_create_person_info(person_id, creation_data)
@staticmethod
def _extract_json_from_text(text: str) -> dict:
@@ -279,8 +363,9 @@ class PersonInfoManager:
logger.debug("取名失败person_id不能为空")
return None
old_name = await self.get_value(person_id, "person_name")
old_reason = await self.get_value(person_id, "name_reason")
person = Person(person_id=person_id)
old_name = person.person_name
old_reason = person.name_reason
max_retries = 8
current_try = 0
@@ -338,8 +423,9 @@ class PersonInfoManager:
current_name_set.add(generated_nickname)
if not is_duplicate:
await self.update_one_field(person_id, "person_name", generated_nickname)
await self.update_one_field(person_id, "name_reason", result.get("reason", "未提供理由"))
person.person_name = generated_nickname
person.name_reason = result.get("reason", "未提供理由")
person.sync_to_database()
logger.info(
f"成功给用户{user_nickname} {person_id} 取名 {generated_nickname},理由:{result.get('reason', '未提供理由')}"
@@ -357,186 +443,11 @@ class PersonInfoManager:
# 如果多次尝试后仍未成功,使用唯一的 user_nickname 作为默认值
unique_nickname = await self._generate_unique_person_name(user_nickname)
logger.warning(f"{max_retries}次尝试后未能生成唯一昵称,使用默认昵称 {unique_nickname}")
await self.update_one_field(person_id, "person_name", unique_nickname)
await self.update_one_field(person_id, "name_reason", "使用用户原始昵称作为默认值")
person.person_name = unique_nickname
person.name_reason = "使用用户原始昵称作为默认值"
person.sync_to_database()
self.person_name_list[person_id] = unique_nickname
return {"nickname": unique_nickname, "reason": "使用用户原始昵称作为默认值"}
@staticmethod
async def get_value(person_id: str, field_name: str):
"""获取指定用户指定字段的值"""
default_value_for_field = person_info_default.get(field_name)
if field_name in JSON_SERIALIZED_FIELDS and default_value_for_field is None:
default_value_for_field = [] # Ensure JSON fields default to [] if not in DB
def _db_get_value_sync(p_id: str, f_name: str):
record = PersonInfo.get_or_none(PersonInfo.person_id == p_id)
if record:
val = getattr(record, f_name, None)
if f_name in JSON_SERIALIZED_FIELDS:
if isinstance(val, str):
try:
return json.loads(val)
except json.JSONDecodeError:
logger.warning(f"字段 {f_name} for {p_id} 包含无效JSON: {val}. 返回默认值.")
return [] # Default for JSON fields on error
elif val is None: # Field exists in DB but is None
return [] # Default for JSON fields
# If val is already a list/dict (e.g. if somehow set without serialization)
return val # Should ideally not happen if update_one_field is always used
return val
return None # Record not found
try:
value_from_db = await asyncio.to_thread(_db_get_value_sync, person_id, field_name)
if value_from_db is not None:
return value_from_db
if field_name in person_info_default:
return default_value_for_field
logger.warning(f"字段 {field_name} 在 person_info_default 中未定义,且在数据库中未找到。")
return None # Ultimate fallback
except Exception as e:
logger.error(f"获取字段 {field_name} for {person_id} 时出错 (Peewee): {e}")
# Fallback to default in case of any error during DB access
return default_value_for_field if field_name in person_info_default else None
@staticmethod
def get_value_sync(person_id: str, field_name: str):
"""同步获取指定用户指定字段的值"""
default_value_for_field = person_info_default.get(field_name)
if field_name in JSON_SERIALIZED_FIELDS and default_value_for_field is None:
default_value_for_field = []
record = PersonInfo.get_or_none(PersonInfo.person_id == person_id)
if record:
val = getattr(record, field_name, None)
if field_name in JSON_SERIALIZED_FIELDS:
if isinstance(val, str):
try:
return json.loads(val)
except json.JSONDecodeError:
logger.warning(f"字段 {field_name} for {person_id} 包含无效JSON: {val}. 返回默认值.")
return []
elif val is None:
return []
return val
return val
if field_name in person_info_default:
return default_value_for_field
logger.warning(f"字段 {field_name} 在 person_info_default 中未定义,且在数据库中未找到。")
return None
@staticmethod
async def get_values(person_id: str, field_names: list) -> dict:
"""获取指定person_id文档的多个字段值若不存在该字段则返回该字段的全局默认值"""
if not person_id:
logger.debug("get_values获取失败person_id不能为空")
return {}
result = {}
def _db_get_record_sync(p_id: str):
return PersonInfo.get_or_none(PersonInfo.person_id == p_id)
record = await asyncio.to_thread(_db_get_record_sync, person_id)
for field_name in field_names:
if field_name not in PersonInfo._meta.fields: # type: ignore
if field_name in person_info_default:
result[field_name] = copy.deepcopy(person_info_default[field_name])
logger.debug(f"字段'{field_name}'不在Peewee模型中使用默认配置值。")
else:
logger.debug(f"get_values查询失败字段'{field_name}'未在Peewee模型和默认配置中定义。")
result[field_name] = None
continue
if record:
value = getattr(record, field_name)
if value is not None:
result[field_name] = value
else:
result[field_name] = copy.deepcopy(person_info_default.get(field_name))
else:
result[field_name] = copy.deepcopy(person_info_default.get(field_name))
return result
async def get_or_create_person(
self, platform: str, user_id: int, nickname: str, user_cardname: str, user_avatar: Optional[str] = None
) -> str:
"""
根据 platform 和 user_id 获取 person_id。
如果对应的用户不存在,则使用提供的可选信息创建新用户。
使用try-except处理竞态条件避免重复创建错误。
"""
person_id = self.get_person_id(platform, user_id)
def _db_get_or_create_sync(p_id: str, init_data: dict):
"""原子性的获取或创建操作"""
# 首先尝试获取现有记录
record = PersonInfo.get_or_none(PersonInfo.person_id == p_id)
if record:
return record, False # 记录存在,未创建
# 记录不存在,尝试创建
try:
PersonInfo.create(**init_data)
return PersonInfo.get(PersonInfo.person_id == p_id), True # 创建成功
except Exception as e:
# 如果创建失败(可能是因为竞态条件),再次尝试获取
if "UNIQUE constraint failed" in str(e):
logger.debug(f"检测到并发创建用户 {p_id},获取现有记录")
record = PersonInfo.get_or_none(PersonInfo.person_id == p_id)
if record:
return record, False # 其他协程已创建,返回现有记录
# 如果仍然失败,重新抛出异常
raise e
unique_nickname = await self._generate_unique_person_name(nickname)
initial_data = {
"person_id": person_id,
"platform": platform,
"user_id": str(user_id),
"nickname": nickname,
"person_name": unique_nickname, # 使用群昵称作为person_name
"name_reason": "从群昵称获取",
"know_times": 0,
"know_since": int(datetime.datetime.now().timestamp()),
"last_know": int(datetime.datetime.now().timestamp()),
"impression": None,
"points": [],
"forgotten_points": [],
}
# 序列化JSON字段
for key in JSON_SERIALIZED_FIELDS:
if key in initial_data:
if isinstance(initial_data[key], (list, dict)):
initial_data[key] = json.dumps(initial_data[key], ensure_ascii=False)
elif initial_data[key] is None:
initial_data[key] = json.dumps([], ensure_ascii=False)
model_fields = PersonInfo._meta.fields.keys() # type: ignore
filtered_initial_data = {k: v for k, v in initial_data.items() if v is not None and k in model_fields}
record, was_created = await asyncio.to_thread(_db_get_or_create_sync, person_id, filtered_initial_data)
if was_created:
logger.info(f"用户 {platform}:{user_id} (person_id: {person_id}) 不存在,将创建新记录 (Peewee)。")
logger.info(f"已为 {person_id} 创建新记录,初始数据 (filtered for model): {filtered_initial_data}")
else:
logger.debug(f"用户 {platform}:{user_id} (person_id: {person_id}) 已存在,返回现有记录。")
return person_id
person_info_manager = None
def get_person_info_manager():
global person_info_manager
if person_info_manager is None:
person_info_manager = PersonInfoManager()
return person_info_manager
person_info_manager = PersonInfoManager()