记忆系统接入关键词,重新启动自主发言功能
This commit is contained in:
SengokuCola
2025-03-06 14:27:22 +08:00
parent 8bca235ecf
commit eaa711ada7
10 changed files with 520 additions and 112 deletions

View File

@@ -15,7 +15,7 @@ from .message import Message_Thinking # 导入 Message_Thinking 类
from .relationship_manager import relationship_manager
from .willing_manager import willing_manager # 导入意愿管理器
from .utils import is_mentioned_bot_in_txt, calculate_typing_time
from ..memory_system.memory import memory_graph
from ..memory_system.memory import memory_graph,hippocampus
from loguru import logger
class ChatBot:
@@ -70,24 +70,12 @@ class ChatBot:
topic=await topic_identifier.identify_topic_llm(message.processed_plain_text)
# topic1 = topic_identifier.identify_topic_jieba(message.processed_plain_text)
# topic2 = await topic_identifier.identify_topic_llm(message.processed_plain_text)
# topic3 = topic_identifier.identify_topic_snownlp(message.processed_plain_text)
logger.info(f"\033[1;32m[主题识别]\033[0m 使用{global_config.topic_extract}主题: {topic}")
all_num = 0
interested_num = 0
if topic:
for current_topic in topic:
all_num += 1
first_layer_items, second_layer_items = memory_graph.get_related_item(current_topic, depth=2)
if first_layer_items:
interested_num += 1
print(f"\033[1;32m[前额叶]\033[0m 对|{current_topic}|有印象")
interested_rate = interested_num / all_num if all_num > 0 else 0
# topic=await topic_identifier.identify_topic_llm(message.processed_plain_text)
topic = ''
interested_rate = 0
interested_rate = await hippocampus.memory_activate_value(message.processed_plain_text)/100
print(f"\033[1;32m[记忆激活]\033[0m 对{message.processed_plain_text}的激活度:---------------------------------------{interested_rate}\n")
# logger.info(f"\033[1;32m[主题识别]\033[0m 使用{global_config.topic_extract}主题: {topic}")
await self.storage.store_message(message, topic[0] if topic else None)
@@ -134,7 +122,7 @@ class ChatBot:
if isinstance(msg, Message_Thinking) and msg.message_id == think_id:
thinking_message = msg
container.messages.remove(msg)
print(f"\033[1;32m[思考消息删除]\033[0m 已找到思考消息对象,开始删除")
# print(f"\033[1;32m[思考消息删除]\033[0m 已找到思考消息对象,开始删除")
break
#记录开始思考的时间,避免从思考到回复的时间太久
@@ -167,7 +155,7 @@ class ChatBot:
message_set.add_message(bot_message)
#message_set 可以直接加入 message_manager
print(f"\033[1;32m[回复]\033[0m 将回复载入发送容器")
# print(f"\033[1;32m[回复]\033[0m 将回复载入发送容器")
message_manager.add_message(message_set)
bot_response_time = tinking_time_point
@@ -205,7 +193,7 @@ class ChatBot:
)
message_manager.add_message(bot_message)
willing_manager.change_reply_willing_after_sent(event.group_id)
# willing_manager.change_reply_willing_after_sent(event.group_id)
# 创建全局ChatBot实例
chat_bot = ChatBot()

View File

@@ -40,6 +40,7 @@ class BotConfig:
llm_normal_minor: Dict[str, str] = field(default_factory=lambda: {})
embedding: Dict[str, str] = field(default_factory=lambda: {})
vlm: Dict[str, str] = field(default_factory=lambda: {})
rerank: Dict[str, str] = field(default_factory=lambda: {})
# 主题提取配置
topic_extract: str = 'snownlp' # 只支持jieba,snownlp,llm
@@ -136,6 +137,9 @@ class BotConfig:
if "embedding" in model_config:
config.embedding = model_config["embedding"]
if "rerank" in model_config:
config.rerank = model_config["rerank"]
if 'topic' in toml_dict:
topic_config=toml_dict['topic']
if 'topic_extract' in topic_config:

View File

@@ -63,10 +63,11 @@ class ResponseGenerator:
# 获取关系值
relationship_value = relationship_manager.get_relationship(message.user_id).relationship_value if relationship_manager.get_relationship(message.user_id) else 0.0
if relationship_value != 0.0:
print(f"\033[1;32m[关系管理]\033[0m 回复中_当前关系值: {relationship_value}")
# print(f"\033[1;32m[关系管理]\033[0m 回复中_当前关系值: {relationship_value}")
pass
# 构建prompt
prompt, prompt_check = prompt_builder._build_prompt(
prompt, prompt_check = await prompt_builder._build_prompt(
message_txt=message.processed_plain_text,
sender_name=sender_name,
relationship_value=relationship_value,

View File

@@ -103,7 +103,7 @@ class MessageContainer:
def add_message(self, message: Union[Message_Thinking, Message_Sending]) -> None:
"""添加消息到队列"""
print(f"\033[1;32m[添加消息]\033[0m 添加消息到对应群")
# print(f"\033[1;32m[添加消息]\033[0m 添加消息到对应群")
if isinstance(message, MessageSet):
for single_message in message.messages:
self.messages.append(single_message)
@@ -156,17 +156,13 @@ class MessageManager:
#最早的对象,可能是思考消息,也可能是发送消息
message_earliest = container.get_earliest_message() #一个message_thinking or message_sending
#一个月后删了
if not message_earliest:
print(f"\033[1;34m[BUG如果出现这个说明有BUG3月4日留]\033[0m ")
return
#如果是思考消息
if isinstance(message_earliest, Message_Thinking):
#优先等待这条消息
message_earliest.update_thinking_time()
thinking_time = message_earliest.thinking_time
print(f"\033[1;34m[调试]\033[0m 消息正在思考中,已思考{int(thinking_time)}")
if thinking_time % 10 == 0:
print(f"\033[1;34m[调试]\033[0m 消息正在思考中,已思考{int(thinking_time)}")
else:# 如果不是message_thinking就只能是message_sending
print(f"\033[1;34m[调试]\033[0m 消息'{message_earliest.processed_plain_text}'正在发送中")
#直接发,等什么呢

View File

@@ -2,13 +2,15 @@ import time
import random
from ..schedule.schedule_generator import bot_schedule
import os
from .utils import get_embedding, combine_messages, get_recent_group_detailed_plain_text
from .utils import get_embedding, combine_messages, get_recent_group_detailed_plain_text,find_similar_topics
from ...common.database import Database
from .config import global_config
from .topic_identifier import topic_identifier
from ..memory_system.memory import memory_graph
from ..memory_system.memory import memory_graph,hippocampus
from random import choice
import numpy as np
import jieba
from collections import Counter
class PromptBuilder:
def __init__(self):
@@ -16,7 +18,9 @@ class PromptBuilder:
self.activate_messages = ''
self.db = Database.get_instance()
def _build_prompt(self,
async def _build_prompt(self,
message_txt: str,
sender_name: str = "某人",
relationship_value: float = 0.0,
@@ -31,60 +35,7 @@ class PromptBuilder:
Returns:
str: 构建好的prompt
"""
memory_prompt = ''
start_time = time.time() # 记录开始时间
# topic = await topic_identifier.identify_topic_llm(message_txt)
topic = topic_identifier.identify_topic_snownlp(message_txt)
# print(f"\033[1;32m[pb主题识别]\033[0m 主题: {topic}")
all_first_layer_items = [] # 存储所有第一层记忆
all_second_layer_items = {} # 用字典存储每个topic的第二层记忆
overlapping_second_layer = set() # 存储重叠的第二层记忆
if topic:
# 遍历所有topic
for current_topic in topic:
first_layer_items, second_layer_items = memory_graph.get_related_item(current_topic, depth=2)
# if first_layer_items:
# print(f"\033[1;32m[前额叶]\033[0m 主题 '{current_topic}' 的第一层记忆: {first_layer_items}")
# 记录第一层数据
all_first_layer_items.extend(first_layer_items)
# 记录第二层数据
all_second_layer_items[current_topic] = second_layer_items
# 检查是否有重叠的第二层数据
for other_topic, other_second_layer in all_second_layer_items.items():
if other_topic != current_topic:
# 找到重叠的记忆
overlap = set(second_layer_items) & set(other_second_layer)
if overlap:
# print(f"\033[1;32m[前额叶]\033[0m 发现主题 '{current_topic}' 和 '{other_topic}' 有共同的第二层记忆: {overlap}")
overlapping_second_layer.update(overlap)
selected_first_layer = random.sample(all_first_layer_items, min(2, len(all_first_layer_items))) if all_first_layer_items else []
selected_second_layer = random.sample(list(overlapping_second_layer), min(2, len(overlapping_second_layer))) if overlapping_second_layer else []
# 合并并去重
all_memories = list(set(selected_first_layer + selected_second_layer))
if all_memories:
print(f"\033[1;32m[前额叶]\033[0m 合并所有需要的记忆: {all_memories}")
random_item = " ".join(all_memories)
memory_prompt = f"看到这些聊天,你想起来{random_item}\n"
else:
memory_prompt = "" # 如果没有记忆,则返回空字符串
end_time = time.time() # 记录结束时间
print(f"\033[1;32m[回忆耗时]\033[0m 耗时: {(end_time - start_time):.3f}") # 输出耗时
"""
#先禁用关系
if 0 > 30:
relation_prompt = "关系特别特别好,你很喜欢喜欢他"
@@ -112,22 +63,48 @@ class PromptBuilder:
prompt_info = self.get_prompt_info(message_txt,threshold=0.5)
if prompt_info:
prompt_info = f'''\n----------------------------------------------------\n你有以下这些[知识]\n{prompt_info}\n请你记住上面的[知识],之后可能会用到\n----------------------------------------------------\n'''
# promt_info_prompt = '你有一些[知识],在上面可以参考。'
end_time = time.time()
print(f"\033[1;32m[知识检索]\033[0m 耗时: {(end_time - start_time):.3f}")
# print(f"\033[1;34m[调试]\033[0m 获取知识库内容结果: {prompt_info}")
# print(f"\033[1;34m[调试信息]\033[0m 正在构建聊天上下文")
# 获取聊天上下文
chat_talking_prompt = ''
if group_id:
chat_talking_prompt = get_recent_group_detailed_plain_text(self.db, group_id, limit=global_config.MAX_CONTEXT_SIZE,combine = True)
chat_talking_prompt = f"以下是群里正在聊天的内容:\n{chat_talking_prompt}"
# print(f"\033[1;34m[调试]\033[0m 已从数据库获取群 {group_id} 的消息记录:{chat_talking_prompt}")
# 使用新的记忆获取方法
memory_prompt = ''
start_time = time.time()
# 调用 hippocampus 的 get_relevant_memories 方法
relevant_memories = await hippocampus.get_relevant_memories(
text=message_txt,
max_topics=5,
similarity_threshold=0.4
)
if relevant_memories:
# 格式化记忆内容
memory_items = []
for memory in relevant_memories:
memory_items.append(f"关于「{memory['topic']}」的记忆:{memory['content']}")
memory_prompt = f"看到这些聊天,你想起来:\n" + "\n".join(memory_items) + "\n"
# 打印调试信息
print("\n\033[1;32m[记忆检索]\033[0m 找到以下相关记忆:")
for memory in relevant_memories:
print(f"- 主题「{memory['topic']}」[相似度: {memory['similarity']:.2f}]: {memory['content']}")
end_time = time.time()
print(f"\033[1;32m[回忆耗时]\033[0m 耗时: {(end_time - start_time):.3f}")
#激活prompt构建
activate_prompt = ''
activate_prompt = f"以上是群里正在进行的聊天,{memory_prompt} 现在昵称为 '{sender_name}' 的用户说的:{message_txt}。引起了你的注意,你和他{relation_prompt},你想要{relation_prompt_2}"
@@ -162,29 +139,19 @@ class PromptBuilder:
if random.random() < 0.01:
prompt_ger += '你喜欢用文言文'
#额外信息要求
extra_info = '''但是记得回复平淡一些,简短一些,尤其注意在没明确提到时不要过多提及自身的背景, 记住不要输出多余内容(包括前后缀,冒号和引号,括号,表情等),只需要输出回复内容就好,不要输出其他任何内容'''
#合并prompt
prompt = ""
prompt += f"{prompt_info}\n"
prompt += f"{prompt_date}\n"
prompt += f"{chat_talking_prompt}\n"
# prompt += f"{memory_prompt}\n"
# prompt += f"{activate_prompt}\n"
prompt += f"{prompt_personality}\n"
prompt += f"{prompt_ger}\n"
prompt += f"{extra_info}\n"
'''读空气prompt处理'''
activate_prompt_check=f"以上是群里正在进行的聊天,昵称为 '{sender_name}' 的用户说的:{message_txt}。引起了你的注意,你和他{relation_prompt},你想要{relation_prompt_2},但是这不一定是合适的时机,请你决定是否要回应这条消息。"
prompt_personality_check = ''
extra_check_info=f"请注意把握群里的聊天内容的基础上,综合群内的氛围,例如,和{global_config.BOT_NICKNAME}相关的话题要积极回复,如果是at自己的消息一定要回复如果自己正在和别人聊天一定要回复其他话题如果合适搭话也可以回复如果认为应该回复请输出yes否则输出no请注意是决定是否需要回复而不是编写回复内容除了yes和no不要输出任何回复内容。"

View File

@@ -42,7 +42,7 @@ class TopicIdentifier:
print(f"\033[1;32m[主题识别]\033[0m 主题: {topic_list}")
return topic_list if topic_list else None
def identify_topic_snownlp(self, text: str) -> Optional[List[str]]:
def identify_topic_snownlp(self, text: str,num:int=5) -> Optional[List[str]]:
"""使用 SnowNLP 进行主题识别
Args:
@@ -57,7 +57,7 @@ class TopicIdentifier:
try:
s = SnowNLP(text)
# 提取前3个关键词作为主题
keywords = s.keywords(5)
keywords = s.keywords(num)
return keywords if keywords else None
except Exception as e:
print(f"\033[1;31m[错误]\033[0m SnowNLP 处理失败: {str(e)}")

View File

@@ -11,6 +11,8 @@ from collections import Counter
import math
from nonebot import get_driver
from ..models.utils_model import LLM_request
import aiohttp
import jieba
driver = get_driver()
config = driver.config
@@ -117,7 +119,7 @@ def get_cloest_chat_from_db(db, length: int, timestamp: str):
chat_text += record["detailed_plain_text"]
return chat_text
print(f"消息已读取3次跳过")
# print(f"消息已读取3次跳过")
return ''
def get_recent_group_messages(db, group_id: int, limit: int = 12) -> list:
@@ -421,3 +423,62 @@ def calculate_typing_time(input_string: str, chinese_time: float = 0.2, english_
return total_time
def find_similar_topics(message_txt: str, all_memory_topic: list, top_k: int = 5) -> list:
"""使用重排序API找出与输入文本最相似的话题
Args:
message_txt: 输入文本
all_memory_topic: 所有记忆主题列表
top_k: 返回最相似的话题数量
Returns:
list: 最相似话题列表及其相似度分数
"""
if not all_memory_topic:
return []
try:
llm = LLM_request(model=global_config.rerank)
return llm.rerank_sync(message_txt, all_memory_topic, top_k)
except Exception as e:
print(f"重排序API调用出错: {str(e)}")
return []
def cosine_similarity(v1, v2):
"""计算余弦相似度"""
dot_product = np.dot(v1, v2)
norm1 = np.linalg.norm(v1)
norm2 = np.linalg.norm(v2)
if norm1 == 0 or norm2 == 0:
return 0
return dot_product / (norm1 * norm2)
def text_to_vector(text):
"""将文本转换为词频向量"""
# 分词
words = jieba.lcut(text)
# 统计词频
word_freq = Counter(words)
return word_freq
def find_similar_topics_simple(text: str, topics: list, top_k: int = 5) -> list:
"""使用简单的余弦相似度计算文本相似度"""
# 将输入文本转换为词频向量
text_vector = text_to_vector(text)
# 计算每个主题的相似度
similarities = []
for topic in topics:
topic_vector = text_to_vector(topic)
# 获取所有唯一词
all_words = set(text_vector.keys()) | set(topic_vector.keys())
# 构建向量
v1 = [text_vector.get(word, 0) for word in all_words]
v2 = [topic_vector.get(word, 0) for word in all_words]
# 计算相似度
similarity = cosine_similarity(v1, v2)
similarities.append((topic, similarity))
# 按相似度降序排序并返回前k个
return sorted(similarities, key=lambda x: x[1], reverse=True)[:top_k]

View File

@@ -37,13 +37,13 @@ class WillingManager:
current_willing *= 0.15
print(f"表情包, 当前意愿: {current_willing}")
if interested_rate > 0.65:
if interested_rate > 0.4:
print(f"兴趣度: {interested_rate}, 当前意愿: {current_willing}")
current_willing += interested_rate-0.6
current_willing += interested_rate-0.1
self.group_reply_willing[group_id] = min(current_willing, 3.0)
reply_probability = max((current_willing - 0.55) * 1.9, 0)
reply_probability = max((current_willing - 0.45) * 2, 0)
if group_id not in config.talk_allowed_groups:
current_willing = 0
reply_probability = 0