refactor: 全部代码格式化

This commit is contained in:
Rikki
2025-03-30 04:56:46 +08:00
parent 7adaa2f5a8
commit b2fc824afd
21 changed files with 491 additions and 514 deletions

View File

@@ -56,7 +56,7 @@ class ChatBot:
5. 更新关系
6. 更新情绪
"""
message = MessageRecv(message_data)
groupinfo = message.message_info.group_info
userinfo = message.message_info.user_info
@@ -68,7 +68,7 @@ class ChatBot:
chat = await chat_manager.get_or_create_stream(
platform=messageinfo.platform,
user_info=userinfo,
group_info=groupinfo,
group_info=groupinfo,
)
message.update_chat_stream(chat)
@@ -81,15 +81,12 @@ class ChatBot:
logger.debug(f"2消息处理时间: {timer2 - timer1}")
# 过滤词/正则表达式过滤
if (
self._check_ban_words(message.processed_plain_text, chat, userinfo)
or self._check_ban_regex(message.raw_message, chat, userinfo)
if self._check_ban_words(message.processed_plain_text, chat, userinfo) or self._check_ban_regex(
message.raw_message, chat, userinfo
):
return
await self.storage.store_message(message, chat)
timer1 = time.time()
interested_rate = 0
@@ -99,7 +96,6 @@ class ChatBot:
timer2 = time.time()
logger.debug(f"3记忆激活时间: {timer2 - timer1}")
is_mentioned = is_mentioned_bot_in_message(message)
if global_config.enable_think_flow:
@@ -124,17 +120,17 @@ class ChatBot:
timer2 = time.time()
logger.debug(f"4计算意愿激活时间: {timer2 - timer1}")
#神秘的消息流数据结构处理
# 神秘的消息流数据结构处理
if chat.group_info:
if chat.group_info.group_name:
mes_name_dict = chat.group_info.group_name
mes_name = mes_name_dict.get('group_name', '无名群聊')
mes_name = mes_name_dict.get("group_name", "无名群聊")
else:
mes_name = '群聊'
mes_name = "群聊"
else:
mes_name = '私聊'
#打印收到的信息的信息
mes_name = "私聊"
# 打印收到的信息的信息
current_time = time.strftime("%H:%M:%S", time.localtime(messageinfo.time))
logger.info(
f"[{current_time}][{mes_name}]"
@@ -145,48 +141,47 @@ class ChatBot:
if message.message_info.additional_config:
if "maimcore_reply_probability_gain" in message.message_info.additional_config.keys():
reply_probability += message.message_info.additional_config["maimcore_reply_probability_gain"]
# 开始组织语言
if random() < reply_probability:
timer1 = time.time()
response_set, thinking_id = await self._generate_response_from_message(message, chat, userinfo, messageinfo)
timer2 = time.time()
logger.info(f"5生成回复时间: {timer2 - timer1}")
if not response_set:
logger.info("为什么生成回复失败?")
return
# 发送消息
timer1 = time.time()
await self._send_response_messages(message, chat, response_set, thinking_id)
timer2 = time.time()
logger.info(f"7发送消息时间: {timer2 - timer1}")
# 处理表情包
timer1 = time.time()
await self._handle_emoji(message, chat, response_set)
timer2 = time.time()
logger.debug(f"8处理表情包时间: {timer2 - timer1}")
timer1 = time.time()
await self._update_using_response(message, chat, response_set)
timer2 = time.time()
logger.info(f"6更新htfl时间: {timer2 - timer1}")
# 更新情绪和关系
# await self._update_emotion_and_relationship(message, chat, response_set)
async def _generate_response_from_message(self, message, chat, userinfo, messageinfo):
"""生成回复内容
Args:
message: 接收到的消息
chat: 聊天流对象
userinfo: 用户信息对象
messageinfo: 消息信息对象
Returns:
tuple: (response, raw_content) 回复内容和原始内容
"""
@@ -195,7 +190,7 @@ class ChatBot:
user_nickname=global_config.BOT_NICKNAME,
platform=messageinfo.platform,
)
thinking_time_point = round(time.time(), 2)
thinking_id = "mt" + str(thinking_time_point)
thinking_message = MessageThinking(
@@ -208,9 +203,9 @@ class ChatBot:
message_manager.add_message(thinking_message)
willing_manager.change_reply_willing_sent(chat)
response_set = await self.gpt.generate_response(message)
return response_set, thinking_id
async def _update_using_response(self, message, response_set):
@@ -221,14 +216,13 @@ class ChatBot:
chat_talking_prompt = get_recent_group_detailed_plain_text(
stream_id, limit=global_config.MAX_CONTEXT_SIZE, combine=True
)
heartflow.get_subheartflow(stream_id).do_after_reply(response_set, chat_talking_prompt)
heartflow.get_subheartflow(stream_id).do_after_reply(response_set, chat_talking_prompt)
async def _send_response_messages(self, message, chat, response_set, thinking_id):
container = message_manager.get_container(chat.stream_id)
thinking_message = None
# logger.info(f"开始发送消息准备")
for msg in container.messages:
if isinstance(msg, MessageThinking) and msg.message_info.message_id == thinking_id:
@@ -243,7 +237,7 @@ class ChatBot:
# logger.info(f"开始发送消息")
thinking_start_time = thinking_message.thinking_start_time
message_set = MessageSet(chat, thinking_id)
mark_head = False
for msg in response_set:
message_segment = Seg(type="text", data=msg)
@@ -270,7 +264,7 @@ class ChatBot:
async def _handle_emoji(self, message, chat, response):
"""处理表情包
Args:
message: 接收到的消息
chat: 聊天流对象
@@ -281,10 +275,10 @@ class ChatBot:
if emoji_raw:
emoji_path, description = emoji_raw
emoji_cq = image_path_to_base64(emoji_path)
thinking_time_point = round(message.message_info.time, 2)
bot_response_time = thinking_time_point + (1 if random() < 0.5 else -1)
message_segment = Seg(type="emoji", data=emoji_cq)
bot_message = MessageSending(
message_id="mt" + str(thinking_time_point),
@@ -304,7 +298,7 @@ class ChatBot:
async def _update_emotion_and_relationship(self, message, chat, response, raw_content):
"""更新情绪和关系
Args:
message: 接收到的消息
chat: 聊天流对象
@@ -313,27 +307,24 @@ class ChatBot:
"""
stance, emotion = await self.gpt._get_emotion_tags(raw_content, message.processed_plain_text)
logger.debug(f"'{response}' 立场为:{stance} 获取到的情感标签为:{emotion}")
await relationship_manager.calculate_update_relationship_value(
chat_stream=chat, label=emotion, stance=stance
)
await relationship_manager.calculate_update_relationship_value(chat_stream=chat, label=emotion, stance=stance)
self.mood_manager.update_mood_from_emotion(emotion, global_config.mood_intensity_factor)
def _check_ban_words(self, text: str, chat, userinfo) -> bool:
"""检查消息中是否包含过滤词
Args:
text: 要检查的文本
chat: 聊天流对象
userinfo: 用户信息对象
Returns:
bool: 如果包含过滤词返回True否则返回False
"""
for word in global_config.ban_words:
if word in text:
logger.info(
f"[{chat.group_info.group_name if chat.group_info else '私聊'}]"
f"{userinfo.user_nickname}:{text}"
f"[{chat.group_info.group_name if chat.group_info else '私聊'}]{userinfo.user_nickname}:{text}"
)
logger.info(f"[过滤词识别]消息中含有{word}filtered")
return True
@@ -341,24 +332,24 @@ class ChatBot:
def _check_ban_regex(self, text: str, chat, userinfo) -> bool:
"""检查消息是否匹配过滤正则表达式
Args:
text: 要检查的文本
chat: 聊天流对象
userinfo: 用户信息对象
Returns:
bool: 如果匹配过滤正则返回True否则返回False
"""
for pattern in global_config.ban_msgs_regex:
if re.search(pattern, text):
logger.info(
f"[{chat.group_info.group_name if chat.group_info else '私聊'}]"
f"{userinfo.user_nickname}:{text}"
f"[{chat.group_info.group_name if chat.group_info else '私聊'}]{userinfo.user_nickname}:{text}"
)
logger.info(f"[正则表达式过滤]消息匹配到{pattern}filtered")
return True
return False
# 创建全局ChatBot实例
chat_bot = ChatBot()

View File

@@ -343,7 +343,7 @@ class EmojiManager:
while True:
logger.info("[扫描] 开始扫描新表情包...")
await self.scan_new_emojis()
await asyncio.sleep(global_config.EMOJI_CHECK_INTERVAL * 60)
await asyncio.sleep(global_config.EMOJI_CHECK_INTERVAL * 60)
def check_emoji_file_integrity(self):
"""检查表情包文件完整性

View File

@@ -31,12 +31,9 @@ class ResponseGenerator:
request_type="response",
)
self.model_normal = LLM_request(
model=global_config.llm_normal,
temperature=0.7,
max_tokens=3000,
request_type="response"
model=global_config.llm_normal, temperature=0.7, max_tokens=3000, request_type="response"
)
self.model_sum = LLM_request(
model=global_config.llm_summary_by_topic, temperature=0.7, max_tokens=3000, request_type="relation"
)
@@ -53,8 +50,9 @@ class ResponseGenerator:
self.current_model_type = "浅浅的"
current_model = self.model_normal
logger.info(f"{self.current_model_type}思考:{message.processed_plain_text[:30] + '...' if len(message.processed_plain_text) > 30 else message.processed_plain_text}") # noqa: E501
logger.info(
f"{self.current_model_type}思考:{message.processed_plain_text[:30] + '...' if len(message.processed_plain_text) > 30 else message.processed_plain_text}"
) # noqa: E501
model_response = await self._generate_response_with_model(message, current_model)
@@ -64,7 +62,6 @@ class ResponseGenerator:
logger.info(f"{global_config.BOT_NICKNAME}的回复是:{model_response}")
model_response = await self._process_response(model_response)
return model_response
else:
logger.info(f"{self.current_model_type}思考,失败")
@@ -93,7 +90,7 @@ class ResponseGenerator:
)
timer2 = time.time()
logger.info(f"构建prompt时间: {timer2 - timer1}")
try:
content, reasoning_content, self.current_model_name = await model.generate_response(prompt)
except Exception:

View File

@@ -37,7 +37,6 @@ class PromptBuilder:
current_mind_info = heartflow.get_subheartflow(stream_id).current_mind
# relation_prompt = ""
# for person in who_chat_in_group:
# relation_prompt += relationship_manager.build_relationship_info(person)
@@ -52,7 +51,7 @@ class PromptBuilder:
# 心情
mood_manager = MoodManager.get_instance()
mood_prompt = mood_manager.get_prompt()
logger.info(f"心情prompt: {mood_prompt}")
# 日程构建
@@ -72,13 +71,12 @@ class PromptBuilder:
chat_in_group = False
chat_talking_prompt = chat_talking_prompt
# print(f"\033[1;34m[调试]\033[0m 已从数据库获取群 {group_id} 的消息记录:{chat_talking_prompt}")
# 使用新的记忆获取方法
memory_prompt = ""
start_time = time.time()
#调用 hippocampus 的 get_relevant_memories 方法
# 调用 hippocampus 的 get_relevant_memories 方法
relevant_memories = await HippocampusManager.get_instance().get_memory_from_text(
text=message_txt, max_memory_num=3, max_memory_length=2, max_depth=2, fast_retrieval=False
)
@@ -165,11 +163,8 @@ class PromptBuilder:
请注意不要输出多余内容(包括前后缀,冒号和引号,括号,表情等),只输出回复内容。
{moderation_prompt}不要输出多余内容(包括前后缀冒号和引号括号表情包at或 @等 )。"""
return prompt
def _build_initiative_prompt_select(self, group_id, probability_1=0.8, probability_2=0.1):
current_date = time.strftime("%Y-%m-%d", time.localtime())
current_time = time.strftime("%H:%M:%S", time.localtime())

View File

@@ -9,9 +9,7 @@ logger = get_module_logger("message_storage")
class MessageStorage:
async def store_message(
self, message: Union[MessageSending, MessageRecv], chat_stream: ChatStream
) -> None:
async def store_message(self, message: Union[MessageSending, MessageRecv], chat_stream: ChatStream) -> None:
"""存储消息到数据库"""
try:
message_data = {

View File

@@ -11,7 +11,7 @@ from collections import Counter
from ...common.database import db
from ...plugins.models.utils_model import LLM_request
from src.common.logger import get_module_logger, LogConfig, MEMORY_STYLE_CONFIG
from src.plugins.memory_system.sample_distribution import MemoryBuildScheduler #分布生成器
from src.plugins.memory_system.sample_distribution import MemoryBuildScheduler # 分布生成器
from .memory_config import MemoryConfig
@@ -56,6 +56,7 @@ def get_closest_chat_from_db(length: int, timestamp: str):
return []
def calculate_information_content(text):
"""计算文本的信息量(熵)"""
char_count = Counter(text)
@@ -68,6 +69,7 @@ def calculate_information_content(text):
return entropy
def cosine_similarity(v1, v2):
"""计算余弦相似度"""
dot_product = np.dot(v1, v2)
@@ -223,7 +225,8 @@ class Memory_graph:
return None
#负责海马体与其他部分的交互
# 负责海马体与其他部分的交互
class EntorhinalCortex:
def __init__(self, hippocampus):
self.hippocampus = hippocampus
@@ -243,7 +246,7 @@ class EntorhinalCortex:
n_hours2=self.config.memory_build_distribution[3],
std_hours2=self.config.memory_build_distribution[4],
weight2=self.config.memory_build_distribution[5],
total_samples=self.config.build_memory_sample_num
total_samples=self.config.build_memory_sample_num,
)
timestamps = sample_scheduler.get_timestamp_array()
@@ -251,9 +254,7 @@ class EntorhinalCortex:
chat_samples = []
for timestamp in timestamps:
messages = self.random_get_msg_snippet(
timestamp,
self.config.build_memory_sample_length,
max_memorized_time_per_msg
timestamp, self.config.build_memory_sample_length, max_memorized_time_per_msg
)
if messages:
time_diff = (datetime.datetime.now().timestamp() - timestamp) / 3600
@@ -455,25 +456,25 @@ class EntorhinalCortex:
"""清空数据库并重新同步所有记忆数据"""
start_time = time.time()
logger.info("[数据库] 开始重新同步所有记忆数据...")
# 清空数据库
clear_start = time.time()
db.graph_data.nodes.delete_many({})
db.graph_data.edges.delete_many({})
clear_end = time.time()
logger.info(f"[数据库] 清空数据库耗时: {clear_end - clear_start:.2f}")
# 获取所有节点和边
memory_nodes = list(self.memory_graph.G.nodes(data=True))
memory_edges = list(self.memory_graph.G.edges(data=True))
# 重新写入节点
node_start = time.time()
for concept, data in memory_nodes:
memory_items = data.get("memory_items", [])
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
node_data = {
"concept": concept,
"memory_items": memory_items,
@@ -484,7 +485,7 @@ class EntorhinalCortex:
db.graph_data.nodes.insert_one(node_data)
node_end = time.time()
logger.info(f"[数据库] 写入 {len(memory_nodes)} 个节点耗时: {node_end - node_start:.2f}")
# 重新写入边
edge_start = time.time()
for source, target, data in memory_edges:
@@ -499,12 +500,13 @@ class EntorhinalCortex:
db.graph_data.edges.insert_one(edge_data)
edge_end = time.time()
logger.info(f"[数据库] 写入 {len(memory_edges)} 条边耗时: {edge_end - edge_start:.2f}")
end_time = time.time()
logger.success(f"[数据库] 重新同步完成,总耗时: {end_time - start_time:.2f}")
logger.success(f"[数据库] 同步了 {len(memory_nodes)} 个节点和 {len(memory_edges)} 条边")
#负责整合,遗忘,合并记忆
# 负责整合,遗忘,合并记忆
class ParahippocampalGyrus:
def __init__(self, hippocampus):
self.hippocampus = hippocampus
@@ -567,26 +569,26 @@ class ParahippocampalGyrus:
topic_num = self.hippocampus.calculate_topic_num(input_text, compress_rate)
topics_response = await self.hippocampus.llm_topic_judge.generate_response(
self.hippocampus.find_topic_llm(input_text, topic_num))
self.hippocampus.find_topic_llm(input_text, topic_num)
)
# 使用正则表达式提取<>中的内容
topics = re.findall(r'<([^>]+)>', topics_response[0])
topics = re.findall(r"<([^>]+)>", topics_response[0])
# 如果没有找到<>包裹的内容,返回['none']
if not topics:
topics = ['none']
topics = ["none"]
else:
# 处理提取出的话题
topics = [
topic.strip()
for topic in ','.join(topics).replace("", ",").replace("", ",").replace(" ", ",").split(",")
for topic in ",".join(topics).replace("", ",").replace("", ",").replace(" ", ",").split(",")
if topic.strip()
]
# 过滤掉包含禁用关键词的topic
filtered_topics = [
topic for topic in topics
if not any(keyword in topic for keyword in self.config.memory_ban_words)
topic for topic in topics if not any(keyword in topic for keyword in self.config.memory_ban_words)
]
logger.debug(f"过滤后话题: {filtered_topics}")
@@ -601,12 +603,12 @@ class ParahippocampalGyrus:
# 等待所有任务完成
compressed_memory = set()
similar_topics_dict = {}
for topic, task in tasks:
response = await task
if response:
compressed_memory.add((topic, response[0]))
existing_topics = list(self.memory_graph.G.nodes())
similar_topics = []
@@ -651,7 +653,7 @@ class ParahippocampalGyrus:
current_time = datetime.datetime.now().timestamp()
logger.debug(f"添加节点: {', '.join(topic for topic, _ in compressed_memory)}")
all_added_nodes.extend(topic for topic, _ in compressed_memory)
for topic, memory in compressed_memory:
self.memory_graph.add_dot(topic, memory)
all_topics.append(topic)
@@ -661,13 +663,13 @@ class ParahippocampalGyrus:
for similar_topic, similarity in similar_topics:
if topic != similar_topic:
strength = int(similarity * 10)
logger.debug(f"连接相似节点: {topic}{similar_topic} (强度: {strength})")
all_added_edges.append(f"{topic}-{similar_topic}")
all_connected_nodes.append(topic)
all_connected_nodes.append(similar_topic)
self.memory_graph.G.add_edge(
topic,
similar_topic,
@@ -685,14 +687,11 @@ class ParahippocampalGyrus:
logger.success(f"更新记忆: {', '.join(all_added_nodes)}")
logger.debug(f"强化连接: {', '.join(all_added_edges)}")
logger.info(f"强化连接节点: {', '.join(all_connected_nodes)}")
await self.hippocampus.entorhinal_cortex.sync_memory_to_db()
end_time = time.time()
logger.success(
f"---------------------记忆构建耗时: {end_time - start_time:.2f} "
"秒---------------------"
)
logger.success(f"---------------------记忆构建耗时: {end_time - start_time:.2f} 秒---------------------")
async def operation_forget_topic(self, percentage=0.005):
start_time = time.time()
@@ -714,11 +713,11 @@ class ParahippocampalGyrus:
# 使用列表存储变化信息
edge_changes = {
"weakened": [], # 存储减弱的边
"removed": [] # 存储移除的边
"removed": [], # 存储移除的边
}
node_changes = {
"reduced": [], # 存储减少记忆的节点
"removed": [] # 存储移除的节点
"reduced": [], # 存储减少记忆的节点
"removed": [], # 存储移除的节点
}
current_time = datetime.datetime.now().timestamp()
@@ -771,35 +770,40 @@ class ParahippocampalGyrus:
if any(edge_changes.values()) or any(node_changes.values()):
sync_start = time.time()
await self.hippocampus.entorhinal_cortex.resync_memory_to_db()
sync_end = time.time()
logger.info(f"[遗忘] 数据库同步耗时: {sync_end - sync_start:.2f}")
# 汇总输出所有变化
logger.info("[遗忘] 遗忘操作统计:")
if edge_changes["weakened"]:
logger.info(
f"[遗忘] 减弱的连接 ({len(edge_changes['weakened'])}个): {', '.join(edge_changes['weakened'])}")
f"[遗忘] 减弱的连接 ({len(edge_changes['weakened'])}个): {', '.join(edge_changes['weakened'])}"
)
if edge_changes["removed"]:
logger.info(
f"[遗忘] 移除的连接 ({len(edge_changes['removed'])}个): {', '.join(edge_changes['removed'])}")
f"[遗忘] 移除的连接 ({len(edge_changes['removed'])}个): {', '.join(edge_changes['removed'])}"
)
if node_changes["reduced"]:
logger.info(
f"[遗忘] 减少记忆的节点 ({len(node_changes['reduced'])}个): {', '.join(node_changes['reduced'])}")
f"[遗忘] 减少记忆的节点 ({len(node_changes['reduced'])}个): {', '.join(node_changes['reduced'])}"
)
if node_changes["removed"]:
logger.info(
f"[遗忘] 移除的节点 ({len(node_changes['removed'])}个): {', '.join(node_changes['removed'])}")
f"[遗忘] 移除的节点 ({len(node_changes['removed'])}个): {', '.join(node_changes['removed'])}"
)
else:
logger.info("[遗忘] 本次检查没有节点或连接满足遗忘条件")
end_time = time.time()
logger.info(f"[遗忘] 总耗时: {end_time - start_time:.2f}")
# 海马体
class Hippocampus:
def __init__(self):
@@ -817,8 +821,8 @@ class Hippocampus:
self.parahippocampal_gyrus = ParahippocampalGyrus(self)
# 从数据库加载记忆图
self.entorhinal_cortex.sync_memory_from_db()
self.llm_topic_judge = LLM_request(self.config.llm_topic_judge,request_type="memory")
self.llm_summary_by_topic = LLM_request(self.config.llm_summary_by_topic,request_type="memory")
self.llm_topic_judge = LLM_request(self.config.llm_topic_judge, request_type="memory")
self.llm_summary_by_topic = LLM_request(self.config.llm_summary_by_topic, request_type="memory")
def get_all_node_names(self) -> list:
"""获取记忆图中所有节点的名字列表"""
@@ -901,16 +905,21 @@ class Hippocampus:
memory_items = node_data.get("memory_items", [])
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
memories.append((node, memory_items, similarity))
# 按相似度降序排序
memories.sort(key=lambda x: x[2], reverse=True)
return memories
async def get_memory_from_text(self, text: str, max_memory_num: int = 3, max_memory_length: int = 2,
max_depth: int = 3,
fast_retrieval: bool = False) -> list:
async def get_memory_from_text(
self,
text: str,
max_memory_num: int = 3,
max_memory_length: int = 2,
max_depth: int = 3,
fast_retrieval: bool = False,
) -> list:
"""从文本中提取关键词并获取相关记忆。
Args:
@@ -943,18 +952,16 @@ class Hippocampus:
# 使用LLM提取关键词
topic_num = min(5, max(1, int(len(text) * 0.1))) # 根据文本长度动态调整关键词数量
# logger.info(f"提取关键词数量: {topic_num}")
topics_response = await self.llm_topic_judge.generate_response(
self.find_topic_llm(text, topic_num)
)
topics_response = await self.llm_topic_judge.generate_response(self.find_topic_llm(text, topic_num))
# 提取关键词
keywords = re.findall(r'<([^>]+)>', topics_response[0])
keywords = re.findall(r"<([^>]+)>", topics_response[0])
if not keywords:
keywords = []
else:
keywords = [
keyword.strip()
for keyword in ','.join(keywords).replace("", ",").replace("", ",").replace(" ", ",").split(",")
for keyword in ",".join(keywords).replace("", ",").replace("", ",").replace(" ", ",").split(",")
if keyword.strip()
]
@@ -965,7 +972,7 @@ class Hippocampus:
if not valid_keywords:
logger.info("没有找到有效的关键词节点")
return []
logger.info(f"有效的关键词: {', '.join(valid_keywords)}")
# 从每个关键词获取记忆
@@ -981,35 +988,36 @@ class Hippocampus:
visited_nodes = {keyword}
# 待处理的节点队列,每个元素是(节点, 激活值, 当前深度)
nodes_to_process = [(keyword, 1.0, 0)]
while nodes_to_process:
current_node, current_activation, current_depth = nodes_to_process.pop(0)
# 如果激活值小于0或超过最大深度停止扩散
if current_activation <= 0 or current_depth >= max_depth:
continue
# 获取当前节点的所有邻居
neighbors = list(self.memory_graph.G.neighbors(current_node))
for neighbor in neighbors:
if neighbor in visited_nodes:
continue
# 获取连接强度
edge_data = self.memory_graph.G[current_node][neighbor]
strength = edge_data.get("strength", 1)
# 计算新的激活值
new_activation = current_activation - (1 / strength)
if new_activation > 0:
activation_values[neighbor] = new_activation
visited_nodes.add(neighbor)
nodes_to_process.append((neighbor, new_activation, current_depth + 1))
logger.debug(
f"节点 '{neighbor}' 被激活,激活值: {new_activation:.2f} (通过 '{current_node}' 连接,强度: {strength}, 深度: {current_depth + 1})") # noqa: E501
f"节点 '{neighbor}' 被激活,激活值: {new_activation:.2f} (通过 '{current_node}' 连接,强度: {strength}, 深度: {current_depth + 1})"
) # noqa: E501
# 更新激活映射
for node, activation_value in activation_values.items():
if activation_value > 0:
@@ -1017,7 +1025,7 @@ class Hippocampus:
activate_map[node] += activation_value
else:
activate_map[node] = activation_value
# 输出激活映射
# logger.info("激活映射统计:")
# for node, total_activation in sorted(activate_map.items(), key=lambda x: x[1], reverse=True):
@@ -1026,28 +1034,24 @@ class Hippocampus:
# 基于激活值平方的独立概率选择
remember_map = {}
# logger.info("基于激活值平方的归一化选择:")
# 计算所有激活值的平方和
total_squared_activation = sum(activation ** 2 for activation in activate_map.values())
total_squared_activation = sum(activation**2 for activation in activate_map.values())
if total_squared_activation > 0:
# 计算归一化的激活值
normalized_activations = {
node: (activation ** 2) / total_squared_activation
for node, activation in activate_map.items()
node: (activation**2) / total_squared_activation for node, activation in activate_map.items()
}
# 按归一化激活值排序并选择前max_memory_num个
sorted_nodes = sorted(
normalized_activations.items(),
key=lambda x: x[1],
reverse=True
)[:max_memory_num]
sorted_nodes = sorted(normalized_activations.items(), key=lambda x: x[1], reverse=True)[:max_memory_num]
# 将选中的节点添加到remember_map
for node, normalized_activation in sorted_nodes:
remember_map[node] = activate_map[node] # 使用原始激活值
logger.debug(
f"节点 '{node}' (归一化激活值: {normalized_activation:.2f}, 激活值: {activate_map[node]:.2f})")
f"节点 '{node}' (归一化激活值: {normalized_activation:.2f}, 激活值: {activate_map[node]:.2f})"
)
else:
logger.info("没有有效的激活值")
@@ -1060,7 +1064,7 @@ class Hippocampus:
memory_items = node_data.get("memory_items", [])
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
if memory_items:
logger.debug(f"节点包含 {len(memory_items)} 条记忆")
# 计算每条记忆与输入文本的相似度
@@ -1079,7 +1083,7 @@ class Hippocampus:
memory_similarities.sort(key=lambda x: x[1], reverse=True)
# 获取最匹配的记忆
top_memories = memory_similarities[:max_memory_length]
# 添加到结果中
for memory, similarity in top_memories:
all_memories.append((node, [memory], similarity))
@@ -1106,11 +1110,10 @@ class Hippocampus:
memory = memory_items[0] # 因为每个topic只有一条记忆
result.append((topic, memory))
logger.info(f"选中记忆: {memory} (来自节点: {topic})")
return result
async def get_activate_from_text(self, text: str, max_depth: int = 3,
fast_retrieval: bool = False) -> float:
async def get_activate_from_text(self, text: str, max_depth: int = 3, fast_retrieval: bool = False) -> float:
"""从文本中提取关键词并获取相关记忆。
Args:
@@ -1140,18 +1143,16 @@ class Hippocampus:
# 使用LLM提取关键词
topic_num = min(5, max(1, int(len(text) * 0.1))) # 根据文本长度动态调整关键词数量
# logger.info(f"提取关键词数量: {topic_num}")
topics_response = await self.llm_topic_judge.generate_response(
self.find_topic_llm(text, topic_num)
)
topics_response = await self.llm_topic_judge.generate_response(self.find_topic_llm(text, topic_num))
# 提取关键词
keywords = re.findall(r'<([^>]+)>', topics_response[0])
keywords = re.findall(r"<([^>]+)>", topics_response[0])
if not keywords:
keywords = []
else:
keywords = [
keyword.strip()
for keyword in ','.join(keywords).replace("", ",").replace("", ",").replace(" ", ",").split(",")
for keyword in ",".join(keywords).replace("", ",").replace("", ",").replace(" ", ",").split(",")
if keyword.strip()
]
@@ -1162,7 +1163,7 @@ class Hippocampus:
if not valid_keywords:
logger.info("没有找到有效的关键词节点")
return 0
logger.info(f"有效的关键词: {', '.join(valid_keywords)}")
# 从每个关键词获取记忆
@@ -1177,35 +1178,35 @@ class Hippocampus:
visited_nodes = {keyword}
# 待处理的节点队列,每个元素是(节点, 激活值, 当前深度)
nodes_to_process = [(keyword, 1.0, 0)]
while nodes_to_process:
current_node, current_activation, current_depth = nodes_to_process.pop(0)
# 如果激活值小于0或超过最大深度停止扩散
if current_activation <= 0 or current_depth >= max_depth:
continue
# 获取当前节点的所有邻居
neighbors = list(self.memory_graph.G.neighbors(current_node))
for neighbor in neighbors:
if neighbor in visited_nodes:
continue
# 获取连接强度
edge_data = self.memory_graph.G[current_node][neighbor]
strength = edge_data.get("strength", 1)
# 计算新的激活值
new_activation = current_activation - (1 / strength)
if new_activation > 0:
activation_values[neighbor] = new_activation
visited_nodes.add(neighbor)
nodes_to_process.append((neighbor, new_activation, current_depth + 1))
# logger.debug(
# f"节点 '{neighbor}' 被激活,激活值: {new_activation:.2f} (通过 '{current_node}' 连接,强度: {strength}, 深度: {current_depth + 1})") # noqa: E501
# f"节点 '{neighbor}' 被激活,激活值: {new_activation:.2f} (通过 '{current_node}' 连接,强度: {strength}, 深度: {current_depth + 1})") # noqa: E501
# 更新激活映射
for node, activation_value in activation_values.items():
if activation_value > 0:
@@ -1213,23 +1214,24 @@ class Hippocampus:
activate_map[node] += activation_value
else:
activate_map[node] = activation_value
# 输出激活映射
# logger.info("激活映射统计:")
# for node, total_activation in sorted(activate_map.items(), key=lambda x: x[1], reverse=True):
# logger.info(f"节点 '{node}': 累计激活值 = {total_activation:.2f}")
# 计算激活节点数与总节点数的比值
total_activation = sum(activate_map.values())
logger.info(f"总激活值: {total_activation:.2f}")
total_nodes = len(self.memory_graph.G.nodes())
# activated_nodes = len(activate_map)
activation_ratio = total_activation / total_nodes if total_nodes > 0 else 0
activation_ratio = activation_ratio*60
activation_ratio = activation_ratio * 60
logger.info(f"总激活值: {total_activation:.2f}, 总节点数: {total_nodes}, 激活: {activation_ratio}")
return activation_ratio
class HippocampusManager:
_instance = None
_hippocampus = None
@@ -1252,12 +1254,12 @@ class HippocampusManager:
"""初始化海马体实例"""
if self._initialized:
return self._hippocampus
self._global_config = global_config
self._hippocampus = Hippocampus()
self._hippocampus.initialize(global_config)
self._initialized = True
# 输出记忆系统参数信息
config = self._hippocampus.config
@@ -1265,16 +1267,15 @@ class HippocampusManager:
memory_graph = self._hippocampus.memory_graph.G
node_count = len(memory_graph.nodes())
edge_count = len(memory_graph.edges())
logger.success(f'''--------------------------------
logger.success(f"""--------------------------------
记忆系统参数配置:
构建间隔: {global_config.build_memory_interval}秒|样本数: {config.build_memory_sample_num},长度: {config.build_memory_sample_length}|压缩率: {config.memory_compress_rate}
记忆构建分布: {config.memory_build_distribution}
遗忘间隔: {global_config.forget_memory_interval}秒|遗忘比例: {global_config.memory_forget_percentage}|遗忘: {config.memory_forget_time}小时之后
记忆图统计信息: 节点数量: {node_count}, 连接数量: {edge_count}
--------------------------------''') #noqa: E501
--------------------------------""") # noqa: E501
return self._hippocampus
async def build_memory(self):
@@ -1289,17 +1290,22 @@ class HippocampusManager:
raise RuntimeError("HippocampusManager 尚未初始化,请先调用 initialize 方法")
return await self._hippocampus.parahippocampal_gyrus.operation_forget_topic(percentage)
async def get_memory_from_text(self, text: str, max_memory_num: int = 3,
max_memory_length: int = 2, max_depth: int = 3,
fast_retrieval: bool = False) -> list:
async def get_memory_from_text(
self,
text: str,
max_memory_num: int = 3,
max_memory_length: int = 2,
max_depth: int = 3,
fast_retrieval: bool = False,
) -> list:
"""从文本中获取相关记忆的公共接口"""
if not self._initialized:
raise RuntimeError("HippocampusManager 尚未初始化,请先调用 initialize 方法")
return await self._hippocampus.get_memory_from_text(
text, max_memory_num, max_memory_length, max_depth, fast_retrieval)
text, max_memory_num, max_memory_length, max_depth, fast_retrieval
)
async def get_activate_from_text(self, text: str, max_depth: int = 3,
fast_retrieval: bool = False) -> float:
async def get_activate_from_text(self, text: str, max_depth: int = 3, fast_retrieval: bool = False) -> float:
"""从文本中获取激活值的公共接口"""
if not self._initialized:
raise RuntimeError("HippocampusManager 尚未初始化,请先调用 initialize 方法")
@@ -1316,5 +1322,3 @@ class HippocampusManager:
if not self._initialized:
raise RuntimeError("HippocampusManager 尚未初始化,请先调用 initialize 方法")
return self._hippocampus.get_all_node_names()

View File

@@ -3,11 +3,13 @@ import asyncio
import time
import sys
import os
# 添加项目根目录到系统路径
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))))
from src.plugins.memory_system.Hippocampus import HippocampusManager
from src.plugins.config.config import global_config
async def test_memory_system():
"""测试记忆系统的主要功能"""
try:
@@ -24,7 +26,7 @@ async def test_memory_system():
# 测试记忆检索
test_text = "千石可乐在群里聊天"
test_text = '''[03-24 10:39:37] 麦麦(ta的id:2814567326): 早说散步结果下雨改成室内运动啊
test_text = """[03-24 10:39:37] 麦麦(ta的id:2814567326): 早说散步结果下雨改成室内运动啊
[03-24 10:39:37] 麦麦(ta的id:2814567326): [回复:变量] 变量就像今天计划总变
[03-24 10:39:44] 状态异常(ta的id:535554838): 要把本地文件改成弹出来的路径吗
[03-24 10:40:35] 状态异常(ta的id:535554838): [图片这张图片显示的是Windows系统的环境变量设置界面。界面左侧列出了多个环境变量的值包括Intel Dev Redist、Windows、Windows PowerShell、OpenSSH、NVIDIA Corporation的目录等。右侧有新建、编辑、浏览、删除、上移、下移和编辑文本等操作按钮。图片下方有一个错误提示框显示"Windows找不到文件'mongodb\\bin\\mongod.exe'。请确定文件名是否正确后,再试一次。"这意味着用户试图运行MongoDB的mongod.exe程序时系统找不到该文件。这可能是因为MongoDB的安装路径未正确添加到系统环境变量中或者文件路径有误。
@@ -39,28 +41,21 @@ async def test_memory_system():
[03-24 10:46:12] (ta的id:3229291803): [表情包:这张表情包显示了一只手正在做"点赞"的动作,通常表示赞同、喜欢或支持。这个表情包所表达的情感是积极的、赞同的或支持的。]
[03-24 10:46:37] 星野風禾(ta的id:2890165435): 还能思考高达
[03-24 10:46:39] 星野風禾(ta的id:2890165435): 什么知识库
[03-24 10:46:49] ❦幻凌慌てない(ta的id:2459587037): 为什么改了回复系数麦麦还是不怎么回复?大佬们''' # noqa: E501
[03-24 10:46:49] ❦幻凌慌てない(ta的id:2459587037): 为什么改了回复系数麦麦还是不怎么回复?大佬们""" # noqa: E501
# test_text = '''千石可乐分不清AI的陪伴和人类的陪伴,是这样吗?'''
print(f"开始测试记忆检索,测试文本: {test_text}\n")
memories = await hippocampus_manager.get_memory_from_text(
text=test_text,
max_memory_num=3,
max_memory_length=2,
max_depth=3,
fast_retrieval=False
text=test_text, max_memory_num=3, max_memory_length=2, max_depth=3, fast_retrieval=False
)
await asyncio.sleep(1)
print("检索到的记忆:")
for topic, memory_items in memories:
print(f"主题: {topic}")
print(f"- {memory_items}")
# 测试记忆遗忘
# forget_start_time = time.time()
# # print("开始测试记忆遗忘...")
@@ -80,6 +75,7 @@ async def test_memory_system():
print(f"测试过程中出现错误: {e}")
raise
async def main():
"""主函数"""
try:
@@ -91,5 +87,6 @@ async def main():
print(f"程序执行出错: {e}")
raise
if __name__ == "__main__":
asyncio.run(main())
asyncio.run(main())

View File

@@ -1,24 +1,26 @@
from dataclasses import dataclass
from typing import List
@dataclass
class MemoryConfig:
"""记忆系统配置类"""
# 记忆构建相关配置
memory_build_distribution: List[float] # 记忆构建的时间分布参数
build_memory_sample_num: int # 每次构建记忆的样本数量
build_memory_sample_length: int # 每个样本的消息长度
memory_compress_rate: float # 记忆压缩率
# 记忆遗忘相关配置
memory_forget_time: int # 记忆遗忘时间(小时)
# 记忆过滤相关配置
memory_ban_words: List[str] # 记忆过滤词列表
llm_topic_judge: str # 话题判断模型
llm_summary_by_topic: str # 话题总结模型
@classmethod
def from_global_config(cls, global_config):
"""从全局配置创建记忆系统配置"""
@@ -30,5 +32,5 @@ class MemoryConfig:
memory_forget_time=global_config.memory_forget_time,
memory_ban_words=global_config.memory_ban_words,
llm_topic_judge=global_config.llm_topic_judge,
llm_summary_by_topic=global_config.llm_summary_by_topic
)
llm_summary_by_topic=global_config.llm_summary_by_topic,
)

View File

@@ -2,11 +2,12 @@ import numpy as np
from scipy import stats
from datetime import datetime, timedelta
class DistributionVisualizer:
def __init__(self, mean=0, std=1, skewness=0, sample_size=10):
"""
初始化分布可视化器
参数:
mean (float): 期望均值
std (float): 标准差
@@ -18,7 +19,7 @@ class DistributionVisualizer:
self.skewness = skewness
self.sample_size = sample_size
self.samples = None
def generate_samples(self):
"""生成具有指定参数的样本"""
if self.skewness == 0:
@@ -26,37 +27,28 @@ class DistributionVisualizer:
self.samples = np.random.normal(loc=self.mean, scale=self.std, size=self.sample_size)
else:
# 使用 scipy.stats 生成具有偏度的分布
self.samples = stats.skewnorm.rvs(a=self.skewness,
loc=self.mean,
scale=self.std,
size=self.sample_size)
self.samples = stats.skewnorm.rvs(a=self.skewness, loc=self.mean, scale=self.std, size=self.sample_size)
def get_weighted_samples(self):
"""获取加权后的样本数列"""
if self.samples is None:
self.generate_samples()
# 将样本值乘以样本大小
return self.samples * self.sample_size
def get_statistics(self):
"""获取分布的统计信息"""
if self.samples is None:
self.generate_samples()
return {
"均值": np.mean(self.samples),
"标准差": np.std(self.samples),
"实际偏度": stats.skew(self.samples)
}
return {"均值": np.mean(self.samples), "标准差": np.std(self.samples), "实际偏度": stats.skew(self.samples)}
class MemoryBuildScheduler:
def __init__(self,
n_hours1, std_hours1, weight1,
n_hours2, std_hours2, weight2,
total_samples=50):
def __init__(self, n_hours1, std_hours1, weight1, n_hours2, std_hours2, weight2, total_samples=50):
"""
初始化记忆构建调度器
参数:
n_hours1 (float): 第一个分布的均值(距离现在的小时数)
std_hours1 (float): 第一个分布的标准差(小时)
@@ -70,39 +62,31 @@ class MemoryBuildScheduler:
total_weight = weight1 + weight2
self.weight1 = weight1 / total_weight
self.weight2 = weight2 / total_weight
self.n_hours1 = n_hours1
self.std_hours1 = std_hours1
self.n_hours2 = n_hours2
self.std_hours2 = std_hours2
self.total_samples = total_samples
self.base_time = datetime.now()
def generate_time_samples(self):
"""生成混合分布的时间采样点"""
# 根据权重计算每个分布的样本数
samples1 = int(self.total_samples * self.weight1)
samples2 = self.total_samples - samples1
# 生成两个正态分布的小时偏移
hours_offset1 = np.random.normal(
loc=self.n_hours1,
scale=self.std_hours1,
size=samples1
)
hours_offset2 = np.random.normal(
loc=self.n_hours2,
scale=self.std_hours2,
size=samples2
)
hours_offset1 = np.random.normal(loc=self.n_hours1, scale=self.std_hours1, size=samples1)
hours_offset2 = np.random.normal(loc=self.n_hours2, scale=self.std_hours2, size=samples2)
# 合并两个分布的偏移
hours_offset = np.concatenate([hours_offset1, hours_offset2])
# 将偏移转换为实际时间戳(使用绝对值确保时间点在过去)
timestamps = [self.base_time - timedelta(hours=abs(offset)) for offset in hours_offset]
# 按时间排序(从最早到最近)
return sorted(timestamps)
@@ -111,54 +95,56 @@ class MemoryBuildScheduler:
timestamps = self.generate_time_samples()
return [int(t.timestamp()) for t in timestamps]
def print_time_samples(timestamps, show_distribution=True):
"""打印时间样本和分布信息"""
print(f"\n生成的{len(timestamps)}个时间点分布:")
print("序号".ljust(5), "时间戳".ljust(25), "距现在(小时)")
print("-" * 50)
now = datetime.now()
time_diffs = []
for i, timestamp in enumerate(timestamps, 1):
hours_diff = (now - timestamp).total_seconds() / 3600
time_diffs.append(hours_diff)
print(f"{str(i).ljust(5)} {timestamp.strftime('%Y-%m-%d %H:%M:%S').ljust(25)} {hours_diff:.2f}")
# 打印统计信息
print("\n统计信息:")
print(f"平均时间偏移:{np.mean(time_diffs):.2f}小时")
print(f"标准差:{np.std(time_diffs):.2f}小时")
print(f"最早时间:{min(timestamps).strftime('%Y-%m-%d %H:%M:%S')} ({max(time_diffs):.2f}小时前)")
print(f"最近时间:{max(timestamps).strftime('%Y-%m-%d %H:%M:%S')} ({min(time_diffs):.2f}小时前)")
if show_distribution:
# 计算时间分布的直方图
hist, bins = np.histogram(time_diffs, bins=40)
print("\n时间分布(每个*代表一个时间点):")
for i in range(len(hist)):
if hist[i] > 0:
print(f"{bins[i]:6.1f}-{bins[i+1]:6.1f}小时: {'*' * int(hist[i])}")
print(f"{bins[i]:6.1f}-{bins[i + 1]:6.1f}小时: {'*' * int(hist[i])}")
# 使用示例
if __name__ == "__main__":
# 创建一个双峰分布的记忆调度器
scheduler = MemoryBuildScheduler(
n_hours1=12, # 第一个分布均值12小时前
std_hours1=8, # 第一个分布标准差
weight1=0.7, # 第一个分布权重 70%
n_hours2=36, # 第二个分布均值36小时前
std_hours2=24, # 第二个分布标准差
weight2=0.3, # 第二个分布权重 30%
total_samples=50 # 总共生成50个时间点
n_hours1=12, # 第一个分布均值12小时前
std_hours1=8, # 第一个分布标准差
weight1=0.7, # 第一个分布权重 70%
n_hours2=36, # 第二个分布均值36小时前
std_hours2=24, # 第二个分布标准差
weight2=0.3, # 第二个分布权重 30%
total_samples=50, # 总共生成50个时间点
)
# 生成时间分布
timestamps = scheduler.generate_time_samples()
# 打印结果,包含分布可视化
print_time_samples(timestamps, show_distribution=True)
# 打印时间戳数组
timestamp_array = scheduler.get_timestamp_array()
print("\n时间戳数组Unix时间戳")
@@ -167,4 +153,4 @@ if __name__ == "__main__":
if i > 0:
print(", ", end="")
print(ts, end="")
print("]")
print("]")

View File

@@ -54,9 +54,7 @@ class TestLiveAPI(unittest.IsolatedAsyncioTestCase):
# 准备测试消息
user_info = UserInfo(user_id=12345678, user_nickname="测试用户", platform="qq")
group_info = GroupInfo(group_id=12345678, group_name="测试群", platform="qq")
format_info = FormatInfo(
content_format=["text"], accept_format=["text", "emoji", "reply"]
)
format_info = FormatInfo(content_format=["text"], accept_format=["text", "emoji", "reply"])
template_info = None
message_info = BaseMessageInfo(
platform="qq",

View File

@@ -35,6 +35,7 @@ else:
print(f"未找到环境变量文件: {env_path}")
print("将使用默认配置")
class ChatBasedPersonalityEvaluator:
def __init__(self):
self.personality_traits = {"开放性": 0, "严谨性": 0, "外向性": 0, "宜人性": 0, "神经质": 0}
@@ -50,16 +51,14 @@ class ChatBasedPersonalityEvaluator:
continue
scene_keys = list(scenes.keys())
selected_scenes = random.sample(scene_keys, min(3, len(scene_keys)))
for scene_key in selected_scenes:
scene = scenes[scene_key]
other_traits = [t for t in PERSONALITY_SCENES if t != trait]
secondary_trait = random.choice(other_traits)
self.scenarios.append({
"场景": scene["scenario"],
"评估维度": [trait, secondary_trait],
"场景编号": scene_key
})
self.scenarios.append(
{"场景": scene["scenario"], "评估维度": [trait, secondary_trait], "场景编号": scene_key}
)
def analyze_chat_context(self, messages: List[Dict]) -> str:
"""
@@ -67,20 +66,21 @@ class ChatBasedPersonalityEvaluator:
"""
context = ""
for msg in messages:
nickname = msg.get('user_info', {}).get('user_nickname', '未知用户')
content = msg.get('processed_plain_text', msg.get('detailed_plain_text', ''))
nickname = msg.get("user_info", {}).get("user_nickname", "未知用户")
content = msg.get("processed_plain_text", msg.get("detailed_plain_text", ""))
if content:
context += f"{nickname}: {content}\n"
return context
def evaluate_chat_response(
self, user_nickname: str, chat_context: str, dimensions: List[str] = None) -> Dict[str, float]:
self, user_nickname: str, chat_context: str, dimensions: List[str] = None
) -> Dict[str, float]:
"""
评估聊天内容在各个人格维度上的得分
"""
# 使用所有维度进行评估
dimensions = list(self.personality_traits.keys())
dimension_descriptions = []
for dim in dimensions:
desc = FACTOR_DESCRIPTIONS.get(dim, "")
@@ -136,18 +136,19 @@ class ChatBasedPersonalityEvaluator:
def evaluate_user_personality(self, qq_id: str, num_samples: int = 10, context_length: int = 5) -> Dict:
"""
基于用户的聊天记录评估人格特征
Args:
qq_id (str): 用户QQ号
num_samples (int): 要分析的聊天片段数量
context_length (int): 每个聊天片段的上下文长度
Returns:
Dict: 评估结果
"""
# 获取用户的随机消息及其上下文
chat_contexts, user_nickname = self.message_analyzer.get_user_random_contexts(
qq_id, num_messages=num_samples, context_length=context_length)
qq_id, num_messages=num_samples, context_length=context_length
)
if not chat_contexts:
return {"error": f"没有找到QQ号 {qq_id} 的消息记录"}
@@ -155,7 +156,7 @@ class ChatBasedPersonalityEvaluator:
final_scores = defaultdict(float)
dimension_counts = defaultdict(int)
chat_samples = []
# 清空历史记录
self.trait_scores_history.clear()
@@ -163,13 +164,11 @@ class ChatBasedPersonalityEvaluator:
for chat_context in chat_contexts:
# 评估这段聊天内容的所有维度
scores = self.evaluate_chat_response(user_nickname, chat_context)
# 记录样本
chat_samples.append({
"聊天内容": chat_context,
"评估维度": list(self.personality_traits.keys()),
"评分": scores
})
chat_samples.append(
{"聊天内容": chat_context, "评估维度": list(self.personality_traits.keys()), "评分": scores}
)
# 更新总分和历史记录
for dimension, score in scores.items():
@@ -196,7 +195,7 @@ class ChatBasedPersonalityEvaluator:
"人格特征评分": average_scores,
"维度评估次数": dict(dimension_counts),
"详细样本": chat_samples,
"特质得分历史": {k: v for k, v in self.trait_scores_history.items()}
"特质得分历史": {k: v for k, v in self.trait_scores_history.items()},
}
# 保存结果
@@ -215,40 +214,41 @@ class ChatBasedPersonalityEvaluator:
chinese_fonts = []
for f in fm.fontManager.ttflist:
try:
if '' in f.name or 'SC' in f.name or '' in f.name or '' in f.name or '微软' in f.name:
if "" in f.name or "SC" in f.name or "" in f.name or "" in f.name or "微软" in f.name:
chinese_fonts.append(f.name)
except Exception:
continue
if chinese_fonts:
plt.rcParams['font.sans-serif'] = chinese_fonts + ['SimHei', 'Microsoft YaHei', 'Arial Unicode MS']
plt.rcParams["font.sans-serif"] = chinese_fonts + ["SimHei", "Microsoft YaHei", "Arial Unicode MS"]
else:
# 如果没有找到中文字体,使用默认字体,并将中文昵称转换为拼音或英文
try:
from pypinyin import lazy_pinyin
user_nickname = ''.join(lazy_pinyin(user_nickname))
user_nickname = "".join(lazy_pinyin(user_nickname))
except ImportError:
user_nickname = "User" # 如果无法转换为拼音,使用默认英文
plt.rcParams['axes.unicode_minus'] = False # 解决负号显示问题
plt.rcParams["axes.unicode_minus"] = False # 解决负号显示问题
plt.figure(figsize=(12, 6))
plt.style.use('bmh') # 使用内置的bmh样式它有类似seaborn的美观效果
plt.style.use("bmh") # 使用内置的bmh样式它有类似seaborn的美观效果
colors = {
"开放性": "#FF9999",
"严谨性": "#66B2FF",
"外向性": "#99FF99",
"宜人性": "#FFCC99",
"神经质": "#FF99CC"
"神经质": "#FF99CC",
}
# 计算每个维度在每个时间点的累计平均分
cumulative_averages = {}
for trait, scores in self.trait_scores_history.items():
if not scores:
continue
averages = []
total = 0
valid_count = 0
@@ -264,25 +264,25 @@ class ChatBasedPersonalityEvaluator:
averages.append(averages[-1])
else:
continue # 跳过无效分数
if averages: # 只有在有有效分数的情况下才添加到累计平均中
cumulative_averages[trait] = averages
# 绘制每个维度的累计平均分变化趋势
for trait, averages in cumulative_averages.items():
x = range(1, len(averages) + 1)
plt.plot(x, averages, 'o-', label=trait, color=colors.get(trait), linewidth=2, markersize=8)
plt.plot(x, averages, "o-", label=trait, color=colors.get(trait), linewidth=2, markersize=8)
# 添加趋势线
z = np.polyfit(x, averages, 1)
p = np.poly1d(z)
plt.plot(x, p(x), '--', color=colors.get(trait), alpha=0.5)
plt.plot(x, p(x), "--", color=colors.get(trait), alpha=0.5)
plt.title(f"{user_nickname} 的人格特质累计平均分变化趋势", fontsize=14, pad=20)
plt.xlabel("评估次数", fontsize=12)
plt.ylabel("累计平均分", fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.grid(True, linestyle="--", alpha=0.7)
plt.legend(loc="center left", bbox_to_anchor=(1, 0.5))
plt.ylim(0, 7)
plt.tight_layout()
@@ -290,38 +290,39 @@ class ChatBasedPersonalityEvaluator:
os.makedirs("results/plots", exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
plot_file = f"results/plots/personality_trend_{qq_id}_{timestamp}.png"
plt.savefig(plot_file, dpi=300, bbox_inches='tight')
plt.savefig(plot_file, dpi=300, bbox_inches="tight")
plt.close()
def analyze_user_personality(qq_id: str, num_samples: int = 10, context_length: int = 5) -> str:
"""
分析用户人格特征的便捷函数
Args:
qq_id (str): 用户QQ号
num_samples (int): 要分析的聊天片段数量
context_length (int): 每个聊天片段的上下文长度
Returns:
str: 格式化的分析结果
"""
evaluator = ChatBasedPersonalityEvaluator()
result = evaluator.evaluate_user_personality(qq_id, num_samples, context_length)
if "error" in result:
return result["error"]
# 格式化输出
output = f"QQ号 {qq_id} ({result['用户昵称']}) 的人格特征分析结果:\n"
output += "=" * 50 + "\n\n"
output += "人格特征评分:\n"
for trait, score in result["人格特征评分"].items():
if score == 0:
output += f"{trait}: 数据不足,无法判断 (评估次数: {result['维度评估次数'].get(trait, 0)})\n"
else:
output += f"{trait}: {score}/6 (评估次数: {result['维度评估次数'].get(trait, 0)})\n"
# 添加变化趋势描述
if trait in result["特质得分历史"] and len(result["特质得分历史"][trait]) > 1:
scores = [s for s in result["特质得分历史"][trait] if s != 0] # 过滤掉无效分数
@@ -334,13 +335,14 @@ def analyze_user_personality(qq_id: str, num_samples: int = 10, context_length:
else:
trend_desc = "呈下降趋势"
output += f" 变化趋势: {trend_desc} (斜率: {trend:.2f})\n"
output += f"\n分析样本数量:{result['样本数量']}\n"
output += f"结果已保存至results/personality_result_{qq_id}.json\n"
output += "变化趋势图已保存至results/plots/目录\n"
return output
if __name__ == "__main__":
# 测试代码
# test_qq = "" # 替换为要测试的QQ号

View File

@@ -82,7 +82,6 @@ class PersonalityEvaluator_direct:
dimensions_text = "\n".join(dimension_descriptions)
prompt = f"""请根据以下场景和用户描述评估用户在大五人格模型中的相关维度得分1-6分
场景描述:

View File

@@ -14,18 +14,19 @@ sys.path.append(root_path)
from src.common.database import db # noqa: E402
class MessageAnalyzer:
def __init__(self):
self.messages_collection = db["messages"]
def get_message_context(self, message_id: int, context_length: int = 5) -> Optional[List[Dict]]:
"""
获取指定消息ID的上下文消息列表
Args:
message_id (int): 消息ID
context_length (int): 上下文长度(单侧,总长度为 2*context_length + 1
Returns:
Optional[List[Dict]]: 消息列表如果未找到则返回None
"""
@@ -33,110 +34,110 @@ class MessageAnalyzer:
target_message = self.messages_collection.find_one({"message_id": message_id})
if not target_message:
return None
# 获取该消息的stream_id
stream_id = target_message.get('chat_info', {}).get('stream_id')
stream_id = target_message.get("chat_info", {}).get("stream_id")
if not stream_id:
return None
# 获取同一stream_id的所有消息
stream_messages = list(self.messages_collection.find({
"chat_info.stream_id": stream_id
}).sort("time", 1))
stream_messages = list(self.messages_collection.find({"chat_info.stream_id": stream_id}).sort("time", 1))
# 找到目标消息在列表中的位置
target_index = None
for i, msg in enumerate(stream_messages):
if msg['message_id'] == message_id:
if msg["message_id"] == message_id:
target_index = i
break
if target_index is None:
return None
# 获取目标消息前后的消息
start_index = max(0, target_index - context_length)
end_index = min(len(stream_messages), target_index + context_length + 1)
return stream_messages[start_index:end_index]
def format_messages(self, messages: List[Dict], target_message_id: Optional[int] = None) -> str:
"""
格式化消息列表为可读字符串
Args:
messages (List[Dict]): 消息列表
target_message_id (Optional[int]): 目标消息ID用于标记
Returns:
str: 格式化的消息字符串
"""
if not messages:
return "没有消息记录"
reply = ""
for msg in messages:
# 消息时间
msg_time = datetime.datetime.fromtimestamp(int(msg['time'])).strftime("%Y-%m-%d %H:%M:%S")
msg_time = datetime.datetime.fromtimestamp(int(msg["time"])).strftime("%Y-%m-%d %H:%M:%S")
# 获取消息内容
message_text = msg.get('processed_plain_text', msg.get('detailed_plain_text', '无消息内容'))
nickname = msg.get('user_info', {}).get('user_nickname', '未知用户')
message_text = msg.get("processed_plain_text", msg.get("detailed_plain_text", "无消息内容"))
nickname = msg.get("user_info", {}).get("user_nickname", "未知用户")
# 标记当前消息
is_target = "" if target_message_id and msg['message_id'] == target_message_id else " "
is_target = "" if target_message_id and msg["message_id"] == target_message_id else " "
reply += f"{is_target}[{msg_time}] {nickname}: {message_text}\n"
if target_message_id and msg['message_id'] == target_message_id:
if target_message_id and msg["message_id"] == target_message_id:
reply += " " + "-" * 50 + "\n"
return reply
def get_user_random_contexts(
self, qq_id: str, num_messages: int = 10, context_length: int = 5) -> tuple[List[str], str]: # noqa: E501
self, qq_id: str, num_messages: int = 10, context_length: int = 5
) -> tuple[List[str], str]: # noqa: E501
"""
获取用户的随机消息及其上下文
Args:
qq_id (str): QQ号
num_messages (int): 要获取的随机消息数量
context_length (int): 每条消息的上下文长度(单侧)
Returns:
tuple[List[str], str]: (每个消息上下文的格式化字符串列表, 用户昵称)
"""
if not qq_id:
return [], ""
# 获取用户所有消息
all_messages = list(self.messages_collection.find({"user_info.user_id": int(qq_id)}))
if not all_messages:
return [], ""
# 获取用户昵称
user_nickname = all_messages[0].get('chat_info', {}).get('user_info', {}).get('user_nickname', '未知用户')
user_nickname = all_messages[0].get("chat_info", {}).get("user_info", {}).get("user_nickname", "未知用户")
# 随机选择指定数量的消息
selected_messages = random.sample(all_messages, min(num_messages, len(all_messages)))
# 按时间排序
selected_messages.sort(key=lambda x: int(x['time']))
selected_messages.sort(key=lambda x: int(x["time"]))
# 存储所有上下文消息
context_list = []
# 获取每条消息的上下文
for msg in selected_messages:
message_id = msg['message_id']
message_id = msg["message_id"]
# 获取消息上下文
context_messages = self.get_message_context(message_id, context_length)
if context_messages:
formatted_context = self.format_messages(context_messages, message_id)
context_list.append(formatted_context)
return context_list, user_nickname
if __name__ == "__main__":
# 测试代码
analyzer = MessageAnalyzer()
@@ -145,7 +146,7 @@ if __name__ == "__main__":
print("-" * 50)
# 获取5条消息每条消息前后各3条上下文
contexts, nickname = analyzer.get_user_random_contexts(test_qq, num_messages=5, context_length=3)
print(f"用户昵称: {nickname}\n")
# 打印每个上下文
for i, context in enumerate(contexts, 1):

View File

@@ -46,17 +46,15 @@ class LLMStatistics:
"""记录在线时间"""
current_time = datetime.now()
# 检查5分钟内是否已有记录
recent_record = db.online_time.find_one({
"timestamp": {
"$gte": current_time - timedelta(minutes=5)
}
})
recent_record = db.online_time.find_one({"timestamp": {"$gte": current_time - timedelta(minutes=5)}})
if not recent_record:
db.online_time.insert_one({
"timestamp": current_time,
"duration": 5 # 5分钟
})
db.online_time.insert_one(
{
"timestamp": current_time,
"duration": 5, # 5分钟
}
)
def _collect_statistics_for_period(self, start_time: datetime) -> Dict[str, Any]:
"""收集指定时间段的LLM请求统计数据

View File

@@ -41,10 +41,9 @@ class WillingManager:
interested_rate = interested_rate * config.response_interested_rate_amplifier
if interested_rate > 0.4:
current_willing += interested_rate - 0.3
if is_mentioned_bot and current_willing < 1.0:
current_willing += 1
elif is_mentioned_bot: