From 467056d928eb2241e00c91a3b2eee3847ebf4557 Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Sat, 1 Mar 2025 13:24:41 +0800 Subject: [PATCH 1/6] v0.2.6 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 重新启动数据库 --- knowledge.bat | 5 ---- src/plugins/chat/message_send_control.py | 2 +- src/plugins/chat/prompt_builder.py | 35 ++++++++++-------------- 3 files changed, 15 insertions(+), 27 deletions(-) delete mode 100644 knowledge.bat diff --git a/knowledge.bat b/knowledge.bat deleted file mode 100644 index e6ad209e4..000000000 --- a/knowledge.bat +++ /dev/null @@ -1,5 +0,0 @@ -call conda activate niuniu -cd "C:\GitHub\MegMeg-bot" - -REM 执行nb run命令 -nb run \ No newline at end of file diff --git a/src/plugins/chat/message_send_control.py b/src/plugins/chat/message_send_control.py index c5bd122f7..cb45b3132 100644 --- a/src/plugins/chat/message_send_control.py +++ b/src/plugins/chat/message_send_control.py @@ -184,7 +184,7 @@ class MessageSendControl: message.update_thinking_time() thinking_time = message.thinking_time if thinking_time < 90: # 最少思考2秒 - if int(thinking_time) % 10 == 0: + if int(thinking_time) % 15 == 0: print(f"\033[1;34m[调试]\033[0m 消息正在思考中,已思考{thinking_time:.1f}秒") return else: diff --git a/src/plugins/chat/prompt_builder.py b/src/plugins/chat/prompt_builder.py index 5572ae1dc..ac865d9ef 100644 --- a/src/plugins/chat/prompt_builder.py +++ b/src/plugins/chat/prompt_builder.py @@ -54,22 +54,22 @@ class PromptBuilder: bot_schedule_now_time,bot_schedule_now_activity = bot_schedule.get_current_task() prompt_date = f'''今天是{current_date},现在是{current_time},你今天的日程是:\n{bot_schedule.today_schedule}\n你现在正在{bot_schedule_now_activity}\n''' - #知识构建(暂时禁用,因为知识库太少了) + #知识构建 prompt_info = '' promt_info_prompt = '' - prompt_info = self.get_prompt_info(message_txt) + prompt_info = self.get_prompt_info(message_txt,threshold=0.5) if prompt_info: - prompt_info = f'''\n----------------------------------------------------\n你有以下这些[知识]: - \n{prompt_info}\n - 请你记住上面的[知识],之后可能会用到\n----------------------------------------------------\n''' + prompt_info = f'''\n----------------------------------------------------\n你有以下这些[知识]:\n{prompt_info}\n请你记住上面的[知识],之后可能会用到\n----------------------------------------------------\n''' promt_info_prompt = '你有一些[知识],在上面可以参考。' + # print(f"\033[1;34m[调试]\033[0m 获取知识库内容结果: {prompt_info}") + - print(f"\033[1;34m[调试信息]\033[0m 正在构建聊天上下文") + # print(f"\033[1;34m[调试信息]\033[0m 正在构建聊天上下文") chat_talking_prompt = '' if group_id: chat_talking_prompt = get_recent_group_detailed_plain_text(self.db, group_id, limit=global_config.MAX_CONTEXT_SIZE,combine = True) - print(f"\033[1;34m[调试]\033[0m 已从数据库获取群 {group_id} 的消息记录:{chat_talking_prompt}") + # print(f"\033[1;34m[调试]\033[0m 已从数据库获取群 {group_id} 的消息记录:{chat_talking_prompt}") #激活prompt构建 activate_prompt = '' @@ -114,7 +114,7 @@ class PromptBuilder: #合并prompt prompt = "" - # prompt += f"{prompt_info}\n" + prompt += f"{prompt_info}\n" prompt += f"{prompt_date}\n" prompt += f"{chat_talking_prompt}\n" # prompt += f"{activate_prompt}\n" @@ -124,31 +124,23 @@ class PromptBuilder: return prompt - def get_prompt_info(self,message:str): + def get_prompt_info(self,message:str,threshold:float): related_info = '' if len(message) > 10: message_segments = [message[i:i+10] for i in range(0, len(message), 10)] for segment in message_segments: embedding = get_embedding(segment) - related_info += self.get_info_from_db(embedding) + related_info += self.get_info_from_db(embedding,threshold=threshold) else: embedding = get_embedding(message) - related_info += self.get_info_from_db(embedding) + related_info += self.get_info_from_db(embedding,threshold=threshold) + return related_info + def get_info_from_db(self, query_embedding: list, limit: int = 1, threshold: float = 0.5) -> str: - """ - 从知识库中查找与输入向量最相似的内容 - Args: - query_embedding: 查询向量 - limit: 返回结果数量,默认为2 - threshold: 相似度阈值,默认为0.5 - Returns: - str: 找到的相关信息,如果相似度低于阈值则返回空字符串 - """ if not query_embedding: return '' - # 使用余弦相似度计算 pipeline = [ { @@ -206,6 +198,7 @@ class PromptBuilder: ] results = list(self.db.db.knowledges.aggregate(pipeline)) + # print(f"\033[1;34m[调试]\033[0m获取知识库内容结果: {results}") if not results: return '' From 11f90d82f7d1abaec99c84db572b6bb3e1bc02e5 Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Sat, 1 Mar 2025 17:01:39 +0800 Subject: [PATCH 2/6] =?UTF-8?q?v0.3.0=20=E8=AE=B0=E5=BF=86=E5=92=8C?= =?UTF-8?q?=E7=9F=A5=E8=AF=86=E5=BA=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit beta --- src/plugins/chat/__init__.py | 4 + src/plugins/chat/bot.py | 8 +- src/plugins/chat/config.py | 10 +- .../chat/knowledege/knowledge_library.py | 186 +++++++++ src/plugins/chat/prompt_builder.py | 79 +++- src/plugins/memory_system/llm_module.py | 45 ++- src/plugins/memory_system/memory copy.py | 376 ++++++++++++++++++ src/plugins/memory_system/memory.py | 158 ++++++-- 8 files changed, 798 insertions(+), 68 deletions(-) create mode 100644 src/plugins/chat/knowledege/knowledge_library.py create mode 100644 src/plugins/memory_system/memory copy.py diff --git a/src/plugins/chat/__init__.py b/src/plugins/chat/__init__.py index 7a3e2c758..a2b54eaa5 100644 --- a/src/plugins/chat/__init__.py +++ b/src/plugins/chat/__init__.py @@ -10,6 +10,8 @@ from .relationship_manager import relationship_manager from ..schedule.schedule_generator import bot_schedule from .willing_manager import willing_manager +from ..memory_system.memory import memory_graph + # 获取驱动器 driver = get_driver() @@ -23,6 +25,8 @@ Database.initialize( print("\033[1;32m[初始化配置和数据库完成]\033[0m") + + # 导入其他模块 from .bot import ChatBot from .emoji_manager import emoji_manager diff --git a/src/plugins/chat/bot.py b/src/plugins/chat/bot.py index efa8e1014..09ee2f063 100644 --- a/src/plugins/chat/bot.py +++ b/src/plugins/chat/bot.py @@ -5,7 +5,7 @@ from .storage import MessageStorage from .llm_generator import LLMResponseGenerator from .message_stream import MessageStream, MessageStreamContainer from .topic_identifier import topic_identifier -from random import random +from random import random, choice from .emoji_manager import emoji_manager # 导入表情包管理器 import time import os @@ -15,6 +15,7 @@ from .message import Message_Thinking # 导入 Message_Thinking 类 from .relationship_manager import relationship_manager from .willing_manager import willing_manager # 导入意愿管理器 from .utils import is_mentioned_bot_in_txt, calculate_typing_time +from ..memory_system.memory import memory_graph class ChatBot: def __init__(self, config: BotConfig): @@ -99,6 +100,11 @@ class ChatBot: topic = topic_identifier.identify_topic_jieba(message.processed_plain_text) print(f"\033[1;32m[主题识别]\033[0m 主题: {topic}") + if topic: + for current_topic in topic: + first_layer_items, second_layer_items = memory_graph.get_related_item(current_topic, depth=2) + if first_layer_items: + print(f"\033[1;32m[记忆检索-bot]\033[0m 有印象:{current_topic}") await self.storage.store_message(message, topic[0] if topic else None) diff --git a/src/plugins/chat/config.py b/src/plugins/chat/config.py index b9965470c..f34317c92 100644 --- a/src/plugins/chat/config.py +++ b/src/plugins/chat/config.py @@ -133,8 +133,8 @@ llm_config.DEEP_SEEK_BASE_URL = os.getenv('DEEP_SEEK_BASE_URL') if not global_config.enable_advance_output: logger.remove() - logging.getLogger('nonebot').handlers.clear() - console_handler = logging.StreamHandler() - console_handler.setLevel(logging.WARNING) # 只输出 WARNING 及以上级别 - logging.getLogger('nonebot').addHandler(console_handler) - logging.getLogger('nonebot').setLevel(logging.WARNING) + # logging.getLogger('nonebot').handlers.clear() + # console_handler = logging.StreamHandler() + # console_handler.setLevel(logging.WARNING) # 只输出 WARNING 及以上级别 + # logging.getLogger('nonebot').addHandler(console_handler) + # logging.getLogger('nonebot').setLevel(logging.WARNING) diff --git a/src/plugins/chat/knowledege/knowledge_library.py b/src/plugins/chat/knowledege/knowledge_library.py new file mode 100644 index 000000000..40756b413 --- /dev/null +++ b/src/plugins/chat/knowledege/knowledge_library.py @@ -0,0 +1,186 @@ +import os +import sys +import numpy as np +import requests +import time + +# 添加项目根目录到 Python 路径 +root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../..")) +sys.path.append(root_path) + +from src.common.database import Database +from src.plugins.chat.config import llm_config + +# 直接配置数据库连接信息 +Database.initialize( + "127.0.0.1", # MongoDB 主机 + 27017, # MongoDB 端口 + "MegBot" # 数据库名称 +) + +class KnowledgeLibrary: + def __init__(self): + self.db = Database.get_instance() + self.raw_info_dir = "data/raw_info" + self._ensure_dirs() + + def _ensure_dirs(self): + """确保必要的目录存在""" + os.makedirs(self.raw_info_dir, exist_ok=True) + + def get_embedding(self, text: str) -> list: + """获取文本的embedding向量""" + url = "https://api.siliconflow.cn/v1/embeddings" + payload = { + "model": "BAAI/bge-m3", + "input": text, + "encoding_format": "float" + } + headers = { + "Authorization": f"Bearer {llm_config.SILICONFLOW_API_KEY}", + "Content-Type": "application/json" + } + + response = requests.post(url, json=payload, headers=headers) + if response.status_code != 200: + print(f"获取embedding失败: {response.text}") + return None + + return response.json()['data'][0]['embedding'] + + def process_files(self): + """处理raw_info目录下的所有txt文件""" + for filename in os.listdir(self.raw_info_dir): + if filename.endswith('.txt'): + file_path = os.path.join(self.raw_info_dir, filename) + self.process_single_file(file_path) + + def process_single_file(self, file_path: str): + """处理单个文件""" + try: + # 检查文件是否已处理 + if self.db.db.processed_files.find_one({"file_path": file_path}): + print(f"文件已处理过,跳过: {file_path}") + return + + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # 按1024字符分段 + segments = [content[i:i+300] for i in range(0, len(content), 300)] + + # 处理每个分段 + for segment in segments: + if not segment.strip(): # 跳过空段 + continue + + # 获取embedding + embedding = self.get_embedding(segment) + if not embedding: + continue + + # 存储到数据库 + doc = { + "content": segment, + "embedding": embedding, + "file_path": file_path, + "segment_length": len(segment) + } + + # 使用文本内容的哈希值作为唯一标识 + content_hash = hash(segment) + + # 更新或插入文档 + self.db.db.knowledges.update_one( + {"content_hash": content_hash}, + {"$set": doc}, + upsert=True + ) + + # 记录文件已处理 + self.db.db.processed_files.insert_one({ + "file_path": file_path, + "processed_time": time.time() + }) + + print(f"成功处理文件: {file_path}") + + except Exception as e: + print(f"处理文件 {file_path} 时出错: {str(e)}") + + def search_similar_segments(self, query: str, limit: int = 5) -> list: + """搜索与查询文本相似的片段""" + query_embedding = self.get_embedding(query) + if not query_embedding: + return [] + + # 使用余弦相似度计算 + pipeline = [ + { + "$addFields": { + "dotProduct": { + "$reduce": { + "input": {"$range": [0, {"$size": "$embedding"}]}, + "initialValue": 0, + "in": { + "$add": [ + "$$value", + {"$multiply": [ + {"$arrayElemAt": ["$embedding", "$$this"]}, + {"$arrayElemAt": [query_embedding, "$$this"]} + ]} + ] + } + } + }, + "magnitude1": { + "$sqrt": { + "$reduce": { + "input": "$embedding", + "initialValue": 0, + "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]} + } + } + }, + "magnitude2": { + "$sqrt": { + "$reduce": { + "input": query_embedding, + "initialValue": 0, + "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]} + } + } + } + } + }, + { + "$addFields": { + "similarity": { + "$divide": ["$dotProduct", {"$multiply": ["$magnitude1", "$magnitude2"]}] + } + } + }, + {"$sort": {"similarity": -1}}, + {"$limit": limit}, + {"$project": {"content": 1, "similarity": 1, "file_path": 1}} + ] + + results = list(self.db.db.knowledges.aggregate(pipeline)) + return results + +# 创建单例实例 +knowledge_library = KnowledgeLibrary() + +if __name__ == "__main__": + # 测试知识库功能 + print("开始处理知识库文件...") + knowledge_library.process_files() + + # 测试搜索功能 + test_query = "麦麦评价一下僕と花" + print(f"\n搜索与'{test_query}'相似的内容:") + results = knowledge_library.search_similar_segments(test_query) + for result in results: + print(f"相似度: {result['similarity']:.4f}") + print(f"内容: {result['content'][:100]}...") + print("-" * 50) diff --git a/src/plugins/chat/prompt_builder.py b/src/plugins/chat/prompt_builder.py index ac865d9ef..0116969a7 100644 --- a/src/plugins/chat/prompt_builder.py +++ b/src/plugins/chat/prompt_builder.py @@ -6,6 +6,9 @@ import os from .utils import get_embedding, combine_messages, get_recent_group_detailed_plain_text from ...common.database import Database from .config import global_config +from .topic_identifier import topic_identifier +from ..memory_system.memory import memory_graph +from random import choice # 获取当前文件的绝对路径 current_dir = os.path.dirname(os.path.abspath(__file__)) @@ -35,6 +38,59 @@ class PromptBuilder: Returns: str: 构建好的prompt """ + + + memory_prompt = '' + start_time = time.time() # 记录开始时间 + topic = topic_identifier.identify_topic_jieba(message_txt) + # print(f"\033[1;32m[pb主题识别]\033[0m 主题: {topic}") + + all_first_layer_items = [] # 存储所有第一层记忆 + all_second_layer_items = {} # 用字典存储每个topic的第二层记忆 + overlapping_second_layer = set() # 存储重叠的第二层记忆 + + if topic: + # 遍历所有topic + for current_topic in topic: + first_layer_items, second_layer_items = memory_graph.get_related_item(current_topic, depth=2) + if first_layer_items: + print(f"\033[1;32m[pb记忆检索]\033[0m 主题 '{current_topic}' 的第一层记忆: {first_layer_items}") + + # 记录第一层数据 + all_first_layer_items.extend(first_layer_items) + + # 记录第二层数据 + all_second_layer_items[current_topic] = second_layer_items + + # 检查是否有重叠的第二层数据 + for other_topic, other_second_layer in all_second_layer_items.items(): + if other_topic != current_topic: + # 找到重叠的记忆 + overlap = set(second_layer_items) & set(other_second_layer) + if overlap: + print(f"\033[1;32m[pb记忆检索]\033[0m 发现主题 '{current_topic}' 和 '{other_topic}' 有共同的第二层记忆: {overlap}") + overlapping_second_layer.update(overlap) + + # 合并所有需要的记忆 + if all_first_layer_items: + print(f"\033[1;32m[pb记忆检索]\033[0m 合并所有需要的记忆1: {all_first_layer_items}") + if overlapping_second_layer: + print(f"\033[1;32m[pb记忆检索]\033[0m 合并所有需要的记忆2: {list(overlapping_second_layer)}") + + all_memories = all_first_layer_items + list(overlapping_second_layer) + + if all_memories: # 只在列表非空时选择随机项 + random_item = choice(all_memories) + memory_prompt = f"看到这些聊天,你想起来{random_item}\n" + else: + memory_prompt = "" # 如果没有记忆,则返回空字符串 + + end_time = time.time() # 记录结束时间 + print(f"\033[1;32m[回忆耗时]\033[0m 耗时: {(end_time - start_time):.3f}秒") # 输出耗时 + + + + #先禁用关系 if 0 > 30: relation_prompt = "关系特别特别好,你很喜欢喜欢他" @@ -55,12 +111,17 @@ class PromptBuilder: prompt_date = f'''今天是{current_date},现在是{current_time},你今天的日程是:\n{bot_schedule.today_schedule}\n你现在正在{bot_schedule_now_activity}\n''' #知识构建 + start_time = time.time() + prompt_info = '' promt_info_prompt = '' prompt_info = self.get_prompt_info(message_txt,threshold=0.5) if prompt_info: prompt_info = f'''\n----------------------------------------------------\n你有以下这些[知识]:\n{prompt_info}\n请你记住上面的[知识],之后可能会用到\n----------------------------------------------------\n''' promt_info_prompt = '你有一些[知识],在上面可以参考。' + + end_time = time.time() + print(f"\033[1;32m[知识检索]\033[0m 耗时: {(end_time - start_time):.3f}秒") # print(f"\033[1;34m[调试]\033[0m 获取知识库内容结果: {prompt_info}") @@ -69,11 +130,13 @@ class PromptBuilder: chat_talking_prompt = '' if group_id: chat_talking_prompt = get_recent_group_detailed_plain_text(self.db, group_id, limit=global_config.MAX_CONTEXT_SIZE,combine = True) + + chat_talking_prompt = f"以下是群里正在聊天的内容:\n{chat_talking_prompt}" # print(f"\033[1;34m[调试]\033[0m 已从数据库获取群 {group_id} 的消息记录:{chat_talking_prompt}") #激活prompt构建 activate_prompt = '' - activate_prompt = f"以上是群里正在进行的聊天,昵称为 '{sender_name}' 的用户说的:{message_txt}。引起了你的注意,你和他{relation_prompt},你想要{relation_prompt_2}。" + activate_prompt = f"以上是群里正在进行的聊天,{memory_prompt} 现在昵称为 '{sender_name}' 的用户说的:{message_txt}。引起了你的注意,你和他{relation_prompt},你想要{relation_prompt_2}。" #检测机器人相关词汇 bot_keywords = ['人机', 'bot', '机器', '入机', 'robot', '机器人'] @@ -87,13 +150,12 @@ class PromptBuilder: prompt_personality = '' personality_choice = random.random() if personality_choice < 4/6: # 第一种人格 - prompt_personality = f'''你的网名叫{global_config.BOT_NICKNAME},是一个学习地质的女大学生,喜欢摄影,你会刷贴吧,你正在浏览qq群,{promt_info_prompt}, - {activate_prompt} + prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},是一个学习地质的女大学生,喜欢摄影,你会刷贴吧,你正在浏览qq群,{promt_info_prompt}, 现在请你给出日常且口语化的回复,平淡一些,尽量简短一些。{is_bot_prompt} 请注意把握群里的聊天内容,不要回复的太有条理,可以有个性。''' elif personality_choice < 1: # 第二种人格 - prompt_personality = f'''你的网名叫{global_config.BOT_NICKNAME},是一个女大学生,你有黑色头发,你会刷小红书, 你正在浏览qq群,{promt_info_prompt}, - {activate_prompt} + prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},是一个女大学生,你有黑色头发,你会刷小红书, 你正在浏览qq群,{promt_info_prompt}, + 现在请你给出日常且口语化的回复,请表现你自己的见解,不要一昧迎合,尽量简短一些。{is_bot_prompt} 请你表达自己的见解和观点。可以有个性。''' @@ -108,7 +170,7 @@ class PromptBuilder: #额外信息要求 - extra_info = '''但是记得回复平淡一些,简短一些,不要过多提及自身的背景, 记住不要输出多余内容(包括前后缀,冒号和引号,括号,表情等),只需要输出回复内容就好,不要输出其他任何内容''' + extra_info = '''但是记得回复平淡一些,简短一些,记住不要输出多余内容(包括前后缀,冒号和引号,括号,表情等),只需要输出回复内容就好,不要输出其他任何内容''' @@ -116,7 +178,10 @@ class PromptBuilder: prompt = "" prompt += f"{prompt_info}\n" prompt += f"{prompt_date}\n" - prompt += f"{chat_talking_prompt}\n" + prompt += f"{chat_talking_prompt}\n" + + # prompt += f"{memory_prompt}\n" + # prompt += f"{activate_prompt}\n" prompt += f"{prompt_personality}\n" prompt += f"{prompt_ger}\n" diff --git a/src/plugins/memory_system/llm_module.py b/src/plugins/memory_system/llm_module.py index a5516012f..fa879afdc 100644 --- a/src/plugins/memory_system/llm_module.py +++ b/src/plugins/memory_system/llm_module.py @@ -2,6 +2,7 @@ import os import requests from dotenv import load_dotenv from typing import Tuple, Union +import time # 加载环境变量 load_dotenv() @@ -32,16 +33,34 @@ class LLMModel: # 发送请求到完整的chat/completions端点 api_url = f"{self.base_url.rstrip('/')}/chat/completions" - try: - response = requests.post(api_url, headers=headers, json=data) - response.raise_for_status() # 检查响应状态 - - result = response.json() - if "choices" in result and len(result["choices"]) > 0: - content = result["choices"][0]["message"]["content"] - reasoning_content = result["choices"][0]["message"].get("reasoning_content", "") - return content, reasoning_content # 返回内容和推理内容 - return "没有返回结果", "" # 返回两个值 - - except requests.exceptions.RequestException as e: - return f"请求失败: {str(e)}", "" # 返回错误信息和空字符串 \ No newline at end of file + max_retries = 3 + base_wait_time = 15 # 基础等待时间(秒) + + for retry in range(max_retries): + try: + response = requests.post(api_url, headers=headers, json=data) + + if response.status_code == 429: + wait_time = base_wait_time * (2 ** retry) # 指数退避 + print(f"遇到请求限制(429),等待{wait_time}秒后重试...") + time.sleep(wait_time) + continue + + response.raise_for_status() # 检查其他响应状态 + + result = response.json() + if "choices" in result and len(result["choices"]) > 0: + content = result["choices"][0]["message"]["content"] + reasoning_content = result["choices"][0]["message"].get("reasoning_content", "") + return content, reasoning_content + return "没有返回结果", "" + + except requests.exceptions.RequestException as e: + if retry < max_retries - 1: # 如果还有重试机会 + wait_time = base_wait_time * (2 ** retry) + print(f"请求失败,等待{wait_time}秒后重试... 错误: {str(e)}") + time.sleep(wait_time) + else: + return f"请求失败: {str(e)}", "" + + return "达到最大重试次数,请求仍然失败", "" \ No newline at end of file diff --git a/src/plugins/memory_system/memory copy.py b/src/plugins/memory_system/memory copy.py new file mode 100644 index 000000000..074a95b19 --- /dev/null +++ b/src/plugins/memory_system/memory copy.py @@ -0,0 +1,376 @@ +# -*- coding: utf-8 -*- +import sys +import jieba +from llm_module import LLMModel +import networkx as nx +import matplotlib.pyplot as plt +import math +from collections import Counter +import datetime +import random +import time +# from chat.config import global_config +import sys +sys.path.append("C:/GitHub/MaiMBot") # 添加项目根目录到 Python 路径 +from src.common.database import Database # 使用正确的导入语法 + +class Memory_graph: + def __init__(self): + self.G = nx.Graph() # 使用 networkx 的图结构 + self.db = Database.get_instance() + + def connect_dot(self, concept1, concept2): + self.G.add_edge(concept1, concept2) + + def add_dot(self, concept, memory): + if concept in self.G: + # 如果节点已存在,将新记忆添加到现有列表中 + if 'memory_items' in self.G.nodes[concept]: + if not isinstance(self.G.nodes[concept]['memory_items'], list): + # 如果当前不是列表,将其转换为列表 + self.G.nodes[concept]['memory_items'] = [self.G.nodes[concept]['memory_items']] + self.G.nodes[concept]['memory_items'].append(memory) + else: + self.G.nodes[concept]['memory_items'] = [memory] + else: + # 如果是新节点,创建新的记忆列表 + self.G.add_node(concept, memory_items=[memory]) + + def get_dot(self, concept): + # 检查节点是否存在于图中 + if concept in self.G: + # 从图中获取节点数据 + node_data = self.G.nodes[concept] + # print(node_data) + # 创建新的Memory_dot对象 + return concept,node_data + return None + + def get_related_item(self, topic, depth=1): + if topic not in self.G: + return [], [] + + first_layer_items = [] + second_layer_items = [] + + # 获取相邻节点 + neighbors = list(self.G.neighbors(topic)) + # print(f"第一层: {topic}") + + # 获取当前节点的记忆项 + node_data = self.get_dot(topic) + if node_data: + concept, data = node_data + if 'memory_items' in data: + memory_items = data['memory_items'] + if isinstance(memory_items, list): + first_layer_items.extend(memory_items) + else: + first_layer_items.append(memory_items) + + # 只在depth=2时获取第二层记忆 + if depth >= 2: + # 获取相邻节点的记忆项 + for neighbor in neighbors: + # print(f"第二层: {neighbor}") + node_data = self.get_dot(neighbor) + if node_data: + concept, data = node_data + if 'memory_items' in data: + memory_items = data['memory_items'] + if isinstance(memory_items, list): + second_layer_items.extend(memory_items) + else: + second_layer_items.append(memory_items) + + return first_layer_items, second_layer_items + + def store_memory(self): + for node in self.G.nodes(): + dot_data = { + "concept": node + } + self.db.db.store_memory_dots.insert_one(dot_data) + + @property + def dots(self): + # 返回所有节点对应的 Memory_dot 对象 + return [self.get_dot(node) for node in self.G.nodes()] + + + def get_random_chat_from_db(self, length: int, timestamp: str): + # 从数据库中根据时间戳获取离其最近的聊天记录 + chat_text = '' + closest_record = self.db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)]) # 调试输出 + print(f"距离time最近的消息时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(closest_record['time'])))}") + + if closest_record: + closest_time = closest_record['time'] + group_id = closest_record['group_id'] # 获取groupid + # 获取该时间戳之后的length条消息,且groupid相同 + chat_record = list(self.db.db.messages.find({"time": {"$gt": closest_time}, "group_id": group_id}).sort('time', 1).limit(length)) + for record in chat_record: + time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(record['time']))) + chat_text += f'[{time_str}] {record["user_nickname"] or "用户" + str(record["user_id"])}: {record["processed_plain_text"]}\n' # 添加发送者和时间信息 + return chat_text + + return [] # 如果没有找到记录,返回空列表 + + def save_graph_to_db(self): + # 清空现有的图数据 + self.db.db.graph_data.delete_many({}) + # 保存节点 + for node in self.G.nodes(data=True): + node_data = { + 'concept': node[0], + 'memory_items': node[1].get('memory_items', []) # 默认为空列表 + } + self.db.db.graph_data.nodes.insert_one(node_data) + # 保存边 + for edge in self.G.edges(): + edge_data = { + 'source': edge[0], + 'target': edge[1] + } + self.db.db.graph_data.edges.insert_one(edge_data) + + def load_graph_from_db(self): + # 清空当前图 + self.G.clear() + # 加载节点 + nodes = self.db.db.graph_data.nodes.find() + for node in nodes: + memory_items = node.get('memory_items', []) + if not isinstance(memory_items, list): + memory_items = [memory_items] if memory_items else [] + self.G.add_node(node['concept'], memory_items=memory_items) + # 加载边 + edges = self.db.db.graph_data.edges.find() + for edge in edges: + self.G.add_edge(edge['source'], edge['target']) + +def calculate_information_content(text): + + """计算文本的信息量(熵)""" + # 统计字符频率 + char_count = Counter(text) + total_chars = len(text) + + # 计算熵 + entropy = 0 + for count in char_count.values(): + probability = count / total_chars + entropy -= probability * math.log2(probability) + + return entropy + + +# Database.initialize( +# global_config.MONGODB_HOST, +# global_config.MONGODB_PORT, +# global_config.DATABASE_NAME +# ) +# memory_graph = Memory_graph() + +# llm_model = LLMModel() +# llm_model_small = LLMModel(model_name="deepseek-ai/DeepSeek-V2.5") + +# memory_graph.load_graph_from_db() + + + +def main(): + # 初始化数据库 + Database.initialize( + "127.0.0.1", + 27017, + "MegBot" + ) + + memory_graph = Memory_graph() + # 创建LLM模型实例 + llm_model = LLMModel() + llm_model_small = LLMModel(model_name="deepseek-ai/DeepSeek-V2.5") + + # 使用当前时间戳进行测试 + current_timestamp = datetime.datetime.now().timestamp() + chat_text = [] + + chat_size =40 + + for _ in range(100): # 循环10次 + random_time = current_timestamp - random.randint(1, 3600*39) # 随机时间 + print(f"随机时间戳对应的时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(random_time))}") + chat_ = memory_graph.get_random_chat_from_db(chat_size, random_time) + chat_text.append(chat_) # 拼接所有text + # time.sleep(1) + + + + for i, input_text in enumerate(chat_text, 1): + + progress = (i / len(chat_text)) * 100 + bar_length = 30 + filled_length = int(bar_length * i // len(chat_text)) + bar = '█' * filled_length + '-' * (bar_length - filled_length) + print(f"\n进度: [{bar}] {progress:.1f}% ({i}/{len(chat_text)})") + + # print(input_text) + first_memory = set() + first_memory = memory_compress(input_text, llm_model_small, llm_model_small, rate=2.5) + time.sleep(5) + + #将记忆加入到图谱中 + for topic, memory in first_memory: + topics = segment_text(topic) + print(f"\033[1;34m话题\033[0m: {topic},节点: {topics}, 记忆: {memory}") + for split_topic in topics: + memory_graph.add_dot(split_topic,memory) + for split_topic in topics: + for other_split_topic in topics: + if split_topic != other_split_topic: + memory_graph.connect_dot(split_topic, other_split_topic) + + # memory_graph.store_memory() + + # 展示两种不同的可视化方式 + print("\n按连接数量着色的图谱:") + visualize_graph(memory_graph, color_by_memory=False) + + print("\n按记忆数量着色的图谱:") + visualize_graph(memory_graph, color_by_memory=True) + + memory_graph.save_graph_to_db() + # memory_graph.load_graph_from_db() + + while True: + query = input("请输入新的查询概念(输入'退出'以结束):") + if query.lower() == '退出': + break + items_list = memory_graph.get_related_item(query) + if items_list: + # print(items_list) + for memory_item in items_list: + print(memory_item) + else: + print("未找到相关记忆。") + + while True: + query = input("请输入问题:") + + if query.lower() == '退出': + break + + topic_prompt = find_topic(query, 3) + topic_response = llm_model.generate_response(topic_prompt) + # 检查 topic_response 是否为元组 + if isinstance(topic_response, tuple): + topics = topic_response[0].split(",") # 假设第一个元素是我们需要的字符串 + else: + topics = topic_response.split(",") + print(topics) + + for keyword in topics: + items_list = memory_graph.get_related_item(keyword) + if items_list: + print(items_list) + +def memory_compress(input_text, llm_model, llm_model_small, rate=1): + information_content = calculate_information_content(input_text) + print(f"文本的信息量(熵): {information_content:.4f} bits") + topic_num = max(1, min(5, int(information_content * rate / 4))) + print(topic_num) + topic_prompt = find_topic(input_text, topic_num) + topic_response = llm_model.generate_response(topic_prompt) + # 检查 topic_response 是否为元组 + if isinstance(topic_response, tuple): + topics = topic_response[0].split(",") # 假设第一个元素是我们需要的字符串 + else: + topics = topic_response.split(",") + print(topics) + compressed_memory = set() + for topic in topics: + topic_what_prompt = topic_what(input_text,topic) + topic_what_response = llm_model_small.generate_response(topic_what_prompt) + compressed_memory.add((topic.strip(), topic_what_response[0])) # 将话题和记忆作为元组存储 + return compressed_memory + + +def segment_text(text): + seg_text = list(jieba.cut(text)) + return seg_text + +def find_topic(text, topic_num): + prompt = f'这是一段文字:{text}。请你从这段话中总结出{topic_num}个话题,帮我列出来,用逗号隔开,尽可能精简。只需要列举{topic_num}个话题就好,不要告诉我其他内容。' + return prompt + +def topic_what(text, topic): + prompt = f'这是一段文字:{text}。我想知道这记忆里有什么关于{topic}的话题,帮我总结成一句自然的话,可以包含时间和人物。只输出这句话就好' + return prompt + +def visualize_graph(memory_graph: Memory_graph, color_by_memory: bool = False): + # 设置中文字体 + plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 + plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 + + G = memory_graph.G + + # 保存图到本地 + nx.write_gml(G, "memory_graph.gml") # 保存为 GML 格式 + + # 根据连接条数或记忆数量设置节点颜色 + node_colors = [] + nodes = list(G.nodes()) # 获取图中实际的节点列表 + + if color_by_memory: + # 计算每个节点的记忆数量 + memory_counts = [] + for node in nodes: + memory_items = G.nodes[node].get('memory_items', []) + if isinstance(memory_items, list): + count = len(memory_items) + else: + count = 1 if memory_items else 0 + memory_counts.append(count) + max_memories = max(memory_counts) if memory_counts else 1 + + for count in memory_counts: + # 使用不同的颜色方案:红色表示记忆多,蓝色表示记忆少 + if max_memories > 0: + intensity = min(1.0, count / max_memories) + color = (intensity, 0, 1.0 - intensity) # 从蓝色渐变到红色 + else: + color = (0, 0, 1) # 如果没有记忆,则为蓝色 + node_colors.append(color) + else: + # 使用原来的连接数量着色方案 + max_degree = max(G.degree(), key=lambda x: x[1])[1] if G.degree() else 1 + for node in nodes: + degree = G.degree(node) + if max_degree > 0: + red = min(1.0, degree / max_degree) + blue = 1.0 - red + color = (red, 0, blue) + else: + color = (0, 0, 1) + node_colors.append(color) + + # 绘制图形 + plt.figure(figsize=(12, 8)) + pos = nx.spring_layout(G, k=1, iterations=50) + nx.draw(G, pos, + with_labels=True, + node_color=node_colors, + node_size=2000, + font_size=10, + font_family='SimHei', + font_weight='bold') + + title = '记忆图谱可视化 - ' + ('按记忆数量着色' if color_by_memory else '按连接数量着色') + plt.title(title, fontsize=16, fontfamily='SimHei') + plt.show() + +if __name__ == "__main__": + main() + + diff --git a/src/plugins/memory_system/memory.py b/src/plugins/memory_system/memory.py index d8f644d7c..3f216997f 100644 --- a/src/plugins/memory_system/memory.py +++ b/src/plugins/memory_system/memory.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import sys import jieba -from llm_module import LLMModel +from .llm_module import LLMModel import networkx as nx import matplotlib.pyplot as plt import math @@ -9,9 +9,9 @@ from collections import Counter import datetime import random import time - +from ..chat.config import global_config import sys -sys.path.append("C:/GitHub/MegMeg-bot") # 添加项目根目录到 Python 路径 +sys.path.append("C:/GitHub/MaiMBot") # 添加项目根目录到 Python 路径 from src.common.database import Database # 使用正确的导入语法 class Memory_graph: @@ -23,44 +23,67 @@ class Memory_graph: self.G.add_edge(concept1, concept2) def add_dot(self, concept, memory): - self.G.add_node(concept, memory_items=memory) + if concept in self.G: + # 如果节点已存在,将新记忆添加到现有列表中 + if 'memory_items' in self.G.nodes[concept]: + if not isinstance(self.G.nodes[concept]['memory_items'], list): + # 如果当前不是列表,将其转换为列表 + self.G.nodes[concept]['memory_items'] = [self.G.nodes[concept]['memory_items']] + self.G.nodes[concept]['memory_items'].append(memory) + else: + self.G.nodes[concept]['memory_items'] = [memory] + else: + # 如果是新节点,创建新的记忆列表 + self.G.add_node(concept, memory_items=[memory]) def get_dot(self, concept): # 检查节点是否存在于图中 if concept in self.G: # 从图中获取节点数据 node_data = self.G.nodes[concept] - print(node_data) + # print(node_data) # 创建新的Memory_dot对象 return concept,node_data return None def get_related_item(self, topic, depth=1): if topic not in self.G: - return set() + return [], [] - items_set = set() + first_layer_items = [] + second_layer_items = [] + # 获取相邻节点 neighbors = list(self.G.neighbors(topic)) - print(f"第一层: {topic}") + # print(f"第一层: {topic}") # 获取当前节点的记忆项 node_data = self.get_dot(topic) if node_data: concept, data = node_data if 'memory_items' in data: - items_set.add(data['memory_items']) + memory_items = data['memory_items'] + if isinstance(memory_items, list): + first_layer_items.extend(memory_items) + else: + first_layer_items.append(memory_items) - # 获取相邻节点的记忆项 - for neighbor in neighbors: - print(f"第二层: {neighbor}") - node_data = self.get_dot(neighbor) - if node_data: - concept, data = node_data - if 'memory_items' in data: - items_set.add(data['memory_items']) + # 只在depth=2时获取第二层记忆 + if depth >= 2: + # 获取相邻节点的记忆项 + for neighbor in neighbors: + # print(f"第二层: {neighbor}") + node_data = self.get_dot(neighbor) + if node_data: + concept, data = node_data + if 'memory_items' in data: + memory_items = data['memory_items'] + if isinstance(memory_items, list): + second_layer_items.extend(memory_items) + else: + second_layer_items.append(memory_items) - return items_set + return first_layer_items, second_layer_items def store_memory(self): for node in self.G.nodes(): @@ -100,7 +123,7 @@ class Memory_graph: for node in self.G.nodes(data=True): node_data = { 'concept': node[0], - 'memory_items': node[1].get('memory_items', None) + 'memory_items': node[1].get('memory_items', []) # 默认为空列表 } self.db.db.graph_data.nodes.insert_one(node_data) # 保存边 @@ -117,7 +140,10 @@ class Memory_graph: # 加载节点 nodes = self.db.db.graph_data.nodes.find() for node in nodes: - self.G.add_node(node['concept'], memory_items=node['memory_items']) + memory_items = node.get('memory_items', []) + if not isinstance(memory_items, list): + memory_items = [memory_items] if memory_items else [] + self.G.add_node(node['concept'], memory_items=memory_items) # 加载边 edges = self.db.db.graph_data.edges.find() for edge in edges: @@ -138,6 +164,26 @@ def calculate_information_content(text): return entropy + +start_time = time.time() + +Database.initialize( + global_config.MONGODB_HOST, + global_config.MONGODB_PORT, + global_config.DATABASE_NAME +) +memory_graph = Memory_graph() + +llm_model = LLMModel() +llm_model_small = LLMModel(model_name="deepseek-ai/DeepSeek-V2.5") + +memory_graph.load_graph_from_db() + +end_time = time.time() +print(f"加载海马体耗时: {end_time - start_time:.2f} 秒") + + + def main(): # 初始化数据库 Database.initialize( @@ -155,13 +201,14 @@ def main(): current_timestamp = datetime.datetime.now().timestamp() chat_text = [] - chat_size =30 + chat_size =40 - for _ in range(60): # 循环10次 - random_time = current_timestamp - random.randint(1, 3600*3) # 随机时间 + for _ in range(100): # 循环10次 + random_time = current_timestamp - random.randint(1, 3600*39) # 随机时间 print(f"随机时间戳对应的时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(random_time))}") chat_ = memory_graph.get_random_chat_from_db(chat_size, random_time) chat_text.append(chat_) # 拼接所有text + time.sleep(5) @@ -173,7 +220,7 @@ def main(): #将记忆加入到图谱中 for topic, memory in first_memory: topics = segment_text(topic) - print(f"话题: {topic},节点: {topics}, 记忆: {memory}") + print(f"\033[1;34m话题\033[0m: {topic},节点: {topics}, 记忆: {memory}") for split_topic in topics: memory_graph.add_dot(split_topic,memory) for split_topic in topics: @@ -182,7 +229,13 @@ def main(): memory_graph.connect_dot(split_topic, other_split_topic) # memory_graph.store_memory() - visualize_graph(memory_graph) + + # 展示两种不同的可视化方式 + print("\n按连接数量着色的图谱:") + visualize_graph(memory_graph, color_by_memory=False) + + print("\n按记忆数量着色的图谱:") + visualize_graph(memory_graph, color_by_memory=True) memory_graph.save_graph_to_db() # memory_graph.load_graph_from_db() @@ -252,45 +305,66 @@ def topic_what(text, topic): prompt = f'这是一段文字:{text}。我想知道这记忆里有什么关于{topic}的话题,帮我总结成一句自然的话,可以包含时间和人物。只输出这句话就好' return prompt -def visualize_graph(memory_graph: Memory_graph): +def visualize_graph(memory_graph: Memory_graph, color_by_memory: bool = False): # 设置中文字体 plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 G = memory_graph.G - # 保存图到本地 nx.write_gml(G, "memory_graph.gml") # 保存为 GML 格式 - # 根据连接条数设置节点颜色 + # 根据连接条数或记忆数量设置节点颜色 node_colors = [] nodes = list(G.nodes()) # 获取图中实际的节点列表 - max_degree = max(G.degree(), key=lambda x: x[1])[1] if G.degree() else 1 # 获取最大连接数 - for node in nodes: - degree = G.degree(node) # 获取节点的度 - # 计算颜色,使用渐变效果 - if max_degree > 0: - red = min(1.0, degree / max_degree) # 红色分量随连接数增加而增加 - blue = 1.0 - red # 蓝色分量随连接数增加而减少 - color = (red, 0, blue) - else: - color = (0, 0, 1) # 如果没有连接,则为蓝色 - node_colors.append(color) + if color_by_memory: + # 计算每个节点的记忆数量 + memory_counts = [] + for node in nodes: + memory_items = G.nodes[node].get('memory_items', []) + if isinstance(memory_items, list): + count = len(memory_items) + else: + count = 1 if memory_items else 0 + memory_counts.append(count) + max_memories = max(memory_counts) if memory_counts else 1 + + for count in memory_counts: + # 使用不同的颜色方案:红色表示记忆多,蓝色表示记忆少 + if max_memories > 0: + intensity = min(1.0, count / max_memories) + color = (intensity, 0, 1.0 - intensity) # 从蓝色渐变到红色 + else: + color = (0, 0, 1) # 如果没有记忆,则为蓝色 + node_colors.append(color) + else: + # 使用原来的连接数量着色方案 + max_degree = max(G.degree(), key=lambda x: x[1])[1] if G.degree() else 1 + for node in nodes: + degree = G.degree(node) + if max_degree > 0: + red = min(1.0, degree / max_degree) + blue = 1.0 - red + color = (red, 0, blue) + else: + color = (0, 0, 1) + node_colors.append(color) # 绘制图形 plt.figure(figsize=(12, 8)) - pos = nx.spring_layout(G, k=1, iterations=50) # 使用弹簧布局,调整参数使布局更合理 + pos = nx.spring_layout(G, k=1, iterations=50) nx.draw(G, pos, with_labels=True, node_color=node_colors, node_size=2000, font_size=10, - font_family='SimHei', # 设置节点标签的字体 + font_family='SimHei', font_weight='bold') - plt.title('记忆图谱可视化', fontsize=16, fontfamily='SimHei') + title = '记忆图谱可视化 - ' + ('按记忆数量着色' if color_by_memory else '按连接数量着色') + plt.title(title, fontsize=16, fontfamily='SimHei') plt.show() if __name__ == "__main__": From fd615f33277d4de959409b27f4b3b43512160a56 Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Sat, 1 Mar 2025 17:34:29 +0800 Subject: [PATCH 3/6] Update memory copy.py --- src/plugins/memory_system/memory copy.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/plugins/memory_system/memory copy.py b/src/plugins/memory_system/memory copy.py index 074a95b19..07dea2a8b 100644 --- a/src/plugins/memory_system/memory copy.py +++ b/src/plugins/memory_system/memory copy.py @@ -196,10 +196,10 @@ def main(): current_timestamp = datetime.datetime.now().timestamp() chat_text = [] - chat_size =40 + chat_size =20 - for _ in range(100): # 循环10次 - random_time = current_timestamp - random.randint(1, 3600*39) # 随机时间 + for _ in range(10): # 循环10次 + random_time = current_timestamp - random.randint(1, 3600*3) # 随机时间 print(f"随机时间戳对应的时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(random_time))}") chat_ = memory_graph.get_random_chat_from_db(chat_size, random_time) chat_text.append(chat_) # 拼接所有text From 0dd5b0ba94d3c46cdcf0a15a40ff2ec96e929eec Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Sat, 1 Mar 2025 20:56:05 +0800 Subject: [PATCH 4/6] =?UTF-8?q?=E9=BA=A6=E9=BA=A6=E8=BD=AC=E8=A1=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1 --- README.md | 2 +- src/plugins/chat/config.py | 10 +++++----- src/plugins/chat/prompt_builder.py | 2 +- src/plugins/schedule/schedule_generator.py | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 3e2da652a..d0a41209c 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@
-![Python Version](https://img.shields.io/badge/Python-3.8-blue) +![Python Version](https://img.shields.io/badge/Python-3.x-blue) ![License](https://img.shields.io/github/license/SengokuCola/MaiMBot) ![Status](https://img.shields.io/badge/状态-开发中-yellow) diff --git a/src/plugins/chat/config.py b/src/plugins/chat/config.py index f34317c92..fa34f2a74 100644 --- a/src/plugins/chat/config.py +++ b/src/plugins/chat/config.py @@ -5,6 +5,7 @@ from nonebot.log import logger, default_format import logging import configparser import tomli +import sys @@ -131,10 +132,9 @@ llm_config.DEEP_SEEK_BASE_URL = os.getenv('DEEP_SEEK_BASE_URL') if not global_config.enable_advance_output: + # 只降低日志级别而不是完全移除 logger.remove() + logger.add(sys.stderr, level="WARNING") # 添加一个只输出 WARNING 及以上级别的处理器 - # logging.getLogger('nonebot').handlers.clear() - # console_handler = logging.StreamHandler() - # console_handler.setLevel(logging.WARNING) # 只输出 WARNING 及以上级别 - # logging.getLogger('nonebot').addHandler(console_handler) - # logging.getLogger('nonebot').setLevel(logging.WARNING) + # 设置 nonebot 的日志级别 + logging.getLogger('nonebot').setLevel(logging.WARNING) diff --git a/src/plugins/chat/prompt_builder.py b/src/plugins/chat/prompt_builder.py index 0116969a7..4e72c6304 100644 --- a/src/plugins/chat/prompt_builder.py +++ b/src/plugins/chat/prompt_builder.py @@ -150,7 +150,7 @@ class PromptBuilder: prompt_personality = '' personality_choice = random.random() if personality_choice < 4/6: # 第一种人格 - prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},是一个学习地质的女大学生,喜欢摄影,你会刷贴吧,你正在浏览qq群,{promt_info_prompt}, + prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},曾经是一个学习地质的女大学生,现在学习心理学和脑科学,你会刷贴吧,你正在浏览qq群,{promt_info_prompt}, 现在请你给出日常且口语化的回复,平淡一些,尽量简短一些。{is_bot_prompt} 请注意把握群里的聊天内容,不要回复的太有条理,可以有个性。''' elif personality_choice < 1: # 第二种人格 diff --git a/src/plugins/schedule/schedule_generator.py b/src/plugins/schedule/schedule_generator.py index 13b6ebb88..f342d119a 100644 --- a/src/plugins/schedule/schedule_generator.py +++ b/src/plugins/schedule/schedule_generator.py @@ -59,7 +59,7 @@ class ScheduleGenerator: elif read_only == False: print(f"{date_str}的日程不存在,准备生成新的日程。") - prompt = f"""我是{global_config.BOT_NICKNAME},一个地质学大二女大学生,喜欢刷qq,贴吧,知乎和小红书,请为我生成{date_str}({weekday})的日程安排,包括: + prompt = f"""我是{global_config.BOT_NICKNAME},一个曾经学习地质,现在学习心理学和脑科学的女大学生,喜欢刷qq,贴吧,知乎和小红书,请为我生成{date_str}({weekday})的日程安排,包括: 1. 早上的学习和工作安排 2. 下午的活动和任务 3. 晚上的计划和休息时间 From f501f7d21845090207ad5d53a14a5cbbba59a136 Mon Sep 17 00:00:00 2001 From: HYY1116 Date: Sat, 1 Mar 2025 21:07:54 +0800 Subject: [PATCH 5/6] =?UTF-8?q?chore:=E4=BF=AE=E6=94=B9toml=E9=85=8D?= =?UTF-8?q?=E7=BD=AE=E6=96=87=E4=BB=B6=E4=BD=8D=E7=BD=AE=EF=BC=8C=E5=A2=9E?= =?UTF-8?q?=E5=8A=A0=E9=BB=98=E8=AE=A4=E9=85=8D=E7=BD=AE=E6=96=87=E4=BB=B6?= =?UTF-8?q?=EF=BC=8C=E4=BF=AE=E6=94=B9docs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 33 ++++++++++--------- .../chat/bot_config_toml => bot_config.toml | 18 +++++----- env.example | 4 +-- src/plugins/chat/config.py | 2 +- 4 files changed, 29 insertions(+), 28 deletions(-) rename src/plugins/chat/bot_config_toml => bot_config.toml (68%) diff --git a/README.md b/README.md index 3e2da652a..add3a72b5 100644 --- a/README.md +++ b/README.md @@ -52,11 +52,9 @@ NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker compose restart #### 手动运行 1. **创建Python环境** - 推荐使用conda或其他环境管理来管理你的python环境 + 推荐使用conda或其他虚拟环境进行依赖安装,防止出现依赖版本冲突问题 ```bash - # 安装requirements(还没检查好,可能有包漏了) - conda activate 你的环境 - cd 对应路径 + # 安装requirements pip install -r requirements.txt ``` 2. **MongoDB设置** @@ -68,8 +66,8 @@ NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker compose restart - 在Napcat的网络设置中添加ws反向代理:ws://localhost:8080/onebot/v11/ws 4. **配置文件设置** - - 把env.example改成.env,并填上你的apikey(硅基流动或deepseekapi) - - 把bot_config_toml改名为bot_config.toml,并填写相关内容,不然无法正常运行 + - 将.env文件打开,填上你的apikey(硅基流动或deepseekapi) + - 将bot_config.toml文件打开,并填写相关内容,不然无法正常运行 #### .env 文件配置说明 ```ini @@ -92,14 +90,10 @@ NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker compose restart MONGODB_PASSWORD="" # MongoDB密码(可选) MONGODB_AUTH_SOURCE="" # MongoDB认证源(可选) - # API密钥配置 - CHAT_ANY_WHERE_KEY= # ChatAnyWhere API密钥 - SILICONFLOW_KEY= # 硅基流动 API密钥(必填) - DEEP_SEEK_KEY= # DeepSeek API密钥(必填) - - # API地址配置 - CHAT_ANY_WHERE_BASE_URL=https://api.chatanywhere.tech/v1 + #api配置项,建议siliconflow必填,识图需要这个 + SILICONFLOW_KEY= SILICONFLOW_BASE_URL=https://api.siliconflow.cn/v1/ + DEEP_SEEK_KEY= DEEP_SEEK_BASE_URL=https://api.deepseek.com/v1 ``` @@ -158,9 +152,8 @@ NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker compose restart ``` 5. **运行麦麦** + 在含有bot.py程序的目录下运行(如果使用了虚拟环境需要先进入虚拟环境) ```bash - conda activate 你的环境 - cd 对应路径 nb run ``` 6. **运行其他组件** @@ -205,3 +198,13 @@ NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker compose restart 纯编程外行,面向cursor编程,很多代码史一样多多包涵 > ⚠️ **警告**:本应用生成内容来自人工智能模型,由 AI 生成,请仔细甄别,请勿用于违反法律的用途,AI生成内容不代表本人观点和立场。 + +## 致谢 +[nonebot2](https://github.com/nonebot/nonebot2): 跨平台 Python 异步聊天机器人框架 +[NapCat](https://github.com/NapNeko/NapCatQQ): 现代化的基于 NTQQ 的 Bot 协议端实现 + +### 贡献者 + +感谢各位大佬! + +[![Contributors](https://contributors-img.web.app/image?repo=SengokuCola/MaiMBot)](https://github.com/SengokuCola/MaiMBot/graphs/contributors) diff --git a/src/plugins/chat/bot_config_toml b/bot_config.toml similarity index 68% rename from src/plugins/chat/bot_config_toml rename to bot_config.toml index fe6b702d8..6730f0481 100644 --- a/src/plugins/chat/bot_config_toml +++ b/bot_config.toml @@ -7,8 +7,8 @@ password = "" # 默认空值 auth_source = "" # 默认空值 [bot] -qq = #填入你的机器人QQ -nickname = "麦麦" +qq = 123456 #填入你的机器人QQ +nickname = "麦麦" #你希望bot被称呼的名字 [message] min_text_length = 2 # 与麦麦聊天时麦麦只会回答文本大于等于此数的消息 @@ -24,7 +24,7 @@ enable_pic_translate = false [response] -api_using = "siliconflow" # 选择大模型API +api_using = "siliconflow" # 选择大模型API,可选值为siliconflow,deepseek,建议使用siliconflow,因为识图api目前只支持siliconflow的deepseek-vl2模型 model_r1_probability = 0.8 # 麦麦回答时选择R1模型的概率 model_v3_probability = 0.1 # 麦麦回答时选择V3模型的概率 model_r1_distill_probability = 0.1 # 麦麦回答时选择R1蒸馏模型的概率 @@ -36,13 +36,13 @@ enable_advance_output = true # 开启后输出更多日志,false关闭true开启 [groups] talk_allowed = [ - #可以回复消息的群 -] + 123456,12345678 +] #可以回复消息的群 talk_frequency_down = [ - #降低回复频率的群 -] + 123456,12345678 +] #降低回复频率的群 ban_user_id = [ - #禁止回复消息的QQ号 -] + 123456,12345678 +] #禁止回复消息的QQ号 diff --git a/env.example b/env.example index c8ed650d6..9988d58f3 100644 --- a/env.example +++ b/env.example @@ -15,10 +15,8 @@ MONGODB_USERNAME = "" # 默认空值 MONGODB_PASSWORD = "" # 默认空值 MONGODB_AUTH_SOURCE = "" # 默认空值 -#key and url -CHAT_ANY_WHERE_KEY= +#api配置项 SILICONFLOW_KEY= -CHAT_ANY_WHERE_BASE_URL=https://api.chatanywhere.tech/v1 SILICONFLOW_BASE_URL=https://api.siliconflow.cn/v1/ DEEP_SEEK_KEY= DEEP_SEEK_BASE_URL=https://api.deepseek.com/v1 diff --git a/src/plugins/chat/config.py b/src/plugins/chat/config.py index b9965470c..43df5bb1f 100644 --- a/src/plugins/chat/config.py +++ b/src/plugins/chat/config.py @@ -107,7 +107,7 @@ class BotConfig: return config -global_config = BotConfig.load_config("./src/plugins/chat/bot_config.toml") +global_config = BotConfig.load_config(".bot_config.toml") from dotenv import load_dotenv current_dir = os.path.dirname(os.path.abspath(__file__)) From 50c1765b81331afad227b1a0e4c965cb798e36c8 Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Sun, 2 Mar 2025 00:14:25 +0800 Subject: [PATCH 6/6] =?UTF-8?q?v0.3.1=20=E5=AE=9E=E8=A3=85=E4=BA=86?= =?UTF-8?q?=E8=AE=B0=E5=BF=86=E7=B3=BB=E7=BB=9F=E5=92=8C=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E5=8F=91=E8=A8=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 哈哈哈 --- .gitignore | 2 +- README.md | 14 +- bot_config.toml => config/bot_config_toml | 5 + env.example => config/env.example | 0 src/plugins/chat/__init__.py | 43 +- src/plugins/chat/bot.py | 17 +- src/plugins/chat/config.py | 53 ++- src/plugins/chat/llm_generator.py | 4 +- src/plugins/chat/message.py | 10 +- src/plugins/chat/message_send_control.py | 10 +- src/plugins/chat/prompt_builder.py | 10 +- src/plugins/chat/utils.py | 35 ++ src/plugins/chat/utils_image.py | 16 +- src/plugins/chat/willing_manager.py | 27 +- .../knowledege/knowledge_library.py | 0 src/plugins/memory_system/draw_memory.py | 264 ++++++++++++ .../memory_system/llm_module_memory_make.py | 82 ++++ src/plugins/memory_system/memory.py | 377 +++++++----------- .../{memory copy.py => memory_make.py} | 90 ++++- 19 files changed, 732 insertions(+), 327 deletions(-) rename bot_config.toml => config/bot_config_toml (95%) rename env.example => config/env.example (100%) rename src/plugins/{chat => }/knowledege/knowledge_library.py (100%) create mode 100644 src/plugins/memory_system/draw_memory.py create mode 100644 src/plugins/memory_system/llm_module_memory_make.py rename src/plugins/memory_system/{memory copy.py => memory_make.py} (82%) diff --git a/.gitignore b/.gitignore index a70c66cdf..265108181 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,7 @@ mongodb/ NapCat.Framework.Windows.Once/ log/ src/plugins/memory -src/plugins/chat/bot_config.toml +config/bot_config.toml /test message_queue_content.txt message_queue_content.bat diff --git a/README.md b/README.md index 2366fe87b..a85fcc4e8 100644 --- a/README.md +++ b/README.md @@ -16,11 +16,19 @@ 基于llm、napcat、nonebot和mongodb的专注于群聊天的qqbot +
+ + 麦麦演示视频 +
+ 👆 点击观看麦麦演示视频 👆 +
+
+ > ⚠️ **警告**:代码可能随时更改,目前版本不一定是稳定版本 > ⚠️ **警告**:请自行了解qqbot的风险,麦麦有时候一天被腾讯肘七八次 > ⚠️ **警告**:由于麦麦一直在迭代,所以可能存在一些bug,请自行测试,包括胡言乱语( -关于麦麦的开发和部署相关的讨论群(不建议发布无关消息)这里不会有麦麦发言! +关于麦麦的开发和建议相关的讨论群(不建议发布无关消息)这里不会有麦麦发言! ## 开发计划TODO:LIST @@ -29,6 +37,10 @@ - 对思考链长度限制 - 修复已知bug - 完善文档 +- 修复转发 +- config自动生成和检测 +- log别用print +- 给发送消息写专门的类
diff --git a/bot_config.toml b/config/bot_config_toml similarity index 95% rename from bot_config.toml rename to config/bot_config_toml index 6730f0481..b5011c7f9 100644 --- a/bot_config.toml +++ b/config/bot_config_toml @@ -29,6 +29,11 @@ model_r1_probability = 0.8 # 麦麦回答时选择R1模型的概率 model_v3_probability = 0.1 # 麦麦回答时选择V3模型的概率 model_r1_distill_probability = 0.1 # 麦麦回答时选择R1蒸馏模型的概率 +[memory] +build_memory_interval = 300 # 记忆构建间隔 + + + [others] enable_advance_output = true # 开启后输出更多日志,false关闭true开启 diff --git a/env.example b/config/env.example similarity index 100% rename from env.example rename to config/env.example diff --git a/src/plugins/chat/__init__.py b/src/plugins/chat/__init__.py index a2b54eaa5..1c25a24f1 100644 --- a/src/plugins/chat/__init__.py +++ b/src/plugins/chat/__init__.py @@ -1,3 +1,4 @@ +from loguru import logger from nonebot import on_message, on_command, require, get_driver from nonebot.adapters.onebot.v11 import Bot, GroupMessageEvent, Message, MessageSegment from nonebot.typing import T_State @@ -10,9 +11,6 @@ from .relationship_manager import relationship_manager from ..schedule.schedule_generator import bot_schedule from .willing_manager import willing_manager -from ..memory_system.memory import memory_graph - - # 获取驱动器 driver = get_driver() @@ -21,10 +19,7 @@ Database.initialize( global_config.MONGODB_PORT, global_config.DATABASE_NAME ) - -print("\033[1;32m[初始化配置和数据库完成]\033[0m") - - +print("\033[1;32m[初始化数据库完成]\033[0m") # 导入其他模块 @@ -32,6 +27,7 @@ from .bot import ChatBot from .emoji_manager import emoji_manager from .message_send_control import message_sender from .relationship_manager import relationship_manager +from ..memory_system.memory import memory_graph,hippocampus # 初始化表情管理器 emoji_manager.initialize() @@ -39,21 +35,26 @@ emoji_manager.initialize() print(f"\033[1;32m正在唤醒{global_config.BOT_NICKNAME}......\033[0m") # 创建机器人实例 chat_bot = ChatBot(global_config) - # 注册消息处理器 group_msg = on_message() - # 创建定时任务 scheduler = require("nonebot_plugin_apscheduler").scheduler -# 启动后台任务 + + @driver.on_startup async def start_background_tasks(): """启动后台任务""" # 只启动表情包管理任务 asyncio.create_task(emoji_manager.start_periodic_check(interval_MINS=global_config.EMOJI_CHECK_INTERVAL)) - bot_schedule.print_schedule() + +@driver.on_startup +async def init_relationships(): + """在 NoneBot2 启动时初始化关系管理器""" + print("\033[1;32m[初始化]\033[0m 正在加载用户关系数据...") + await relationship_manager.load_all_relationships() + asyncio.create_task(relationship_manager._start_relationship_manager()) @driver.on_bot_connect async def _(bot: Bot): @@ -68,19 +69,23 @@ async def _(bot: Bot): print("\033[1;38;5;208m-----------开始偷表情包!-----------\033[0m") # 启动消息发送控制任务 -@driver.on_startup -async def init_relationships(): - """在 NoneBot2 启动时初始化关系管理器""" - print("\033[1;32m[初始化]\033[0m 正在加载用户关系数据...") - await relationship_manager.load_all_relationships() - asyncio.create_task(relationship_manager._start_relationship_manager()) - @group_msg.handle() async def _(bot: Bot, event: GroupMessageEvent, state: T_State): await chat_bot.handle_message(event, bot) - + +''' @scheduler.scheduled_job("interval", seconds=300000, id="monitor_relationships") async def monitor_relationships(): """每15秒打印一次关系数据""" relationship_manager.print_all_relationships() +''' +# 添加build_memory定时任务 +@scheduler.scheduled_job("interval", seconds=global_config.build_memory_interval, id="build_memory") +async def build_memory_task(): + """每30秒执行一次记忆构建""" + print("\033[1;32m[记忆构建]\033[0m 开始构建记忆...") + hippocampus.build_memory(chat_size=12) + print("\033[1;32m[记忆构建]\033[0m 记忆构建完成") + + diff --git a/src/plugins/chat/bot.py b/src/plugins/chat/bot.py index 09ee2f063..1b5201645 100644 --- a/src/plugins/chat/bot.py +++ b/src/plugins/chat/bot.py @@ -83,7 +83,7 @@ class ChatBot: await relationship_manager.update_relationship(user_id = event.user_id, data = sender_info) await relationship_manager.update_relationship_value(user_id = event.user_id, relationship_value = 0.5) - print(f"\033[1;32m[关系管理]\033[0m 更新关系值: {relationship_manager.get_relationship(event.user_id).relationship_value}") + # print(f"\033[1;32m[关系管理]\033[0m 更新关系值: {relationship_manager.get_relationship(event.user_id).relationship_value}") message = Message( @@ -100,14 +100,19 @@ class ChatBot: topic = topic_identifier.identify_topic_jieba(message.processed_plain_text) print(f"\033[1;32m[主题识别]\033[0m 主题: {topic}") + all_num = 0 + interested_num = 0 if topic: for current_topic in topic: + all_num += 1 first_layer_items, second_layer_items = memory_graph.get_related_item(current_topic, depth=2) if first_layer_items: - print(f"\033[1;32m[记忆检索-bot]\033[0m 有印象:{current_topic}") + interested_num += 1 + print(f"\033[1;32m[前额叶]\033[0m 对|{current_topic}|有印象") + interested_rate = interested_num / all_num if all_num > 0 else 0 + await self.storage.store_message(message, topic[0] if topic else None) - is_mentioned = is_mentioned_bot_in_txt(message.processed_plain_text) @@ -117,7 +122,8 @@ class ChatBot: is_mentioned, self.config, event.user_id, - message.is_emoji + message.is_emoji, + interested_rate ) current_willing = willing_manager.get_willing(event.group_id) @@ -188,7 +194,8 @@ class ChatBot: user_nickname=global_config.BOT_NICKNAME, group_name=message.group_name, time=bot_response_time, - is_emoji=True + is_emoji=True, + translate_cq=False ) message_sender.send_temp_container.add_message(bot_message) diff --git a/src/plugins/chat/config.py b/src/plugins/chat/config.py index 69e59ed5b..05d492789 100644 --- a/src/plugins/chat/config.py +++ b/src/plugins/chat/config.py @@ -6,6 +6,8 @@ import logging import configparser import tomli import sys +from loguru import logger +from dotenv import load_dotenv @@ -21,7 +23,7 @@ class BotConfig: MONGODB_PASSWORD: Optional[str] = None # 默认空值 MONGODB_AUTH_SOURCE: Optional[str] = None # 默认空值 - BOT_QQ: Optional[int] = None + BOT_QQ: Optional[int] = 1 BOT_NICKNAME: Optional[str] = None # 消息处理相关配置 @@ -35,6 +37,7 @@ class BotConfig: talk_frequency_down_groups = set() ban_user_id = set() + build_memory_interval: int = 60 # 记忆构建间隔(秒) EMOJI_CHECK_INTERVAL: int = 120 # 表情包检查间隔(分钟) EMOJI_REGISTER_INTERVAL: int = 10 # 表情包注册间隔(分钟) @@ -45,9 +48,21 @@ class BotConfig: enable_advance_output: bool = False # 是否启用高级输出 + @staticmethod + def get_default_config_path() -> str: + """获取默认配置文件路径""" + current_dir = os.path.dirname(os.path.abspath(__file__)) + root_dir = os.path.abspath(os.path.join(current_dir, '..', '..', '..')) + config_dir = os.path.join(root_dir, 'config') + return os.path.join(config_dir, 'bot_config.toml') + @classmethod - def load_config(cls, config_path: str = "bot_config.toml") -> "BotConfig": + def load_config(cls, config_path: str = None) -> "BotConfig": """从TOML配置文件加载配置""" + if config_path is None: + config_path = cls.get_default_config_path() + logger.info(f"使用默认配置文件路径: {config_path}") + config = cls() if os.path.exists(config_path): with open(config_path, "rb") as f: @@ -93,6 +108,10 @@ class BotConfig: config.MAX_CONTEXT_SIZE = msg_config.get("max_context_size", config.MAX_CONTEXT_SIZE) config.emoji_chance = msg_config.get("emoji_chance", config.emoji_chance) + if "memory" in toml_dict: + memory_config = toml_dict["memory"] + config.build_memory_interval = memory_config.get("build_memory_interval", config.build_memory_interval) + # 群组配置 if "groups" in toml_dict: groups_config = toml_dict["groups"] @@ -104,16 +123,26 @@ class BotConfig: others_config = toml_dict["others"] config.enable_advance_output = others_config.get("enable_advance_output", config.enable_advance_output) - print(f"\033[1;32m成功加载配置文件: {config_path}\033[0m") + logger.success(f"成功加载配置文件: {config_path}") return config -global_config = BotConfig.load_config(".bot_config.toml") +# 获取配置文件路径 +bot_config_path = BotConfig.get_default_config_path() +config_dir = os.path.dirname(bot_config_path) +env_path = os.path.join(config_dir, '.env') -from dotenv import load_dotenv -current_dir = os.path.dirname(os.path.abspath(__file__)) -root_dir = os.path.abspath(os.path.join(current_dir, '..', '..', '..')) -load_dotenv(os.path.join(root_dir, '.env')) +logger.info(f"尝试从 {bot_config_path} 加载机器人配置") +global_config = BotConfig.load_config(config_path=bot_config_path) + +# 加载环境变量 + +logger.info(f"尝试从 {env_path} 加载环境变量配置") +if os.path.exists(env_path): + load_dotenv(env_path) + logger.success("成功加载环境变量配置") +else: + logger.error(f"环境变量配置文件不存在: {env_path}") @dataclass class LLMConfig: @@ -132,9 +161,5 @@ llm_config.DEEP_SEEK_BASE_URL = os.getenv('DEEP_SEEK_BASE_URL') if not global_config.enable_advance_output: - # 只降低日志级别而不是完全移除 - logger.remove() - logger.add(sys.stderr, level="WARNING") # 添加一个只输出 WARNING 及以上级别的处理器 - - # 设置 nonebot 的日志级别 - logging.getLogger('nonebot').setLevel(logging.WARNING) + # logger.remove() + pass diff --git a/src/plugins/chat/llm_generator.py b/src/plugins/chat/llm_generator.py index bb68d3618..2ea4d7f24 100644 --- a/src/plugins/chat/llm_generator.py +++ b/src/plugins/chat/llm_generator.py @@ -4,7 +4,7 @@ import asyncio import requests from functools import partial from .message import Message -from .config import BotConfig +from .config import BotConfig, global_config from ...common.database import Database import random import time @@ -255,4 +255,4 @@ class LLMResponseGenerator: return processed_response, emotion_tags # 创建全局实例 -llm_response = LLMResponseGenerator(config=BotConfig()) \ No newline at end of file +llm_response = LLMResponseGenerator(global_config) \ No newline at end of file diff --git a/src/plugins/chat/message.py b/src/plugins/chat/message.py index 2e91f530e..f5ea0db0d 100644 --- a/src/plugins/chat/message.py +++ b/src/plugins/chat/message.py @@ -6,17 +6,13 @@ import os from datetime import datetime from ...common.database import Database from PIL import Image -from .config import BotConfig, global_config +from .config import global_config import urllib3 from .utils_user import get_user_nickname from .utils_cq import parse_cq_code from .cq_code import cq_code_tool,CQCode Message = ForwardRef('Message') # 添加这行 - -# 加载配置 -bot_config = BotConfig.load_config() - # 禁用SSL警告 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) @@ -48,6 +44,8 @@ class Message: is_emoji: bool = False # 是否是表情包 has_emoji: bool = False # 是否包含表情包 + + translate_cq: bool = True # 是否翻译cq码 reply_benefits: float = 0.0 @@ -99,7 +97,7 @@ class Message: - cq_code_list:分割出的聊天对象,包括文本和CQ码 - trans_list:翻译后的对象列表 """ - print(f"\033[1;34m[调试信息]\033[0m 正在处理消息: {message}") + # print(f"\033[1;34m[调试信息]\033[0m 正在处理消息: {message}") cq_code_dict_list = [] trans_list = [] diff --git a/src/plugins/chat/message_send_control.py b/src/plugins/chat/message_send_control.py index cb45b3132..0ddb79c5f 100644 --- a/src/plugins/chat/message_send_control.py +++ b/src/plugins/chat/message_send_control.py @@ -208,7 +208,15 @@ class MessageSendControl: print(f"\033[1;34m[调试]\033[0m 消息发送时间: {cost_time}秒") current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(message.time)) print(f"\033[1;32m群 {group_id} 消息, 用户 {global_config.BOT_NICKNAME}, 时间: {current_time}:\033[0m {str(message.processed_plain_text)}") - await self.storage.store_message(message, None) + + if message.is_emoji: + message.processed_plain_text = "[表情包]" + await self.storage.store_message(message, None) + else: + await self.storage.store_message(message, None) + + + queue.update_send_time() if queue.has_messages(): await asyncio.sleep( diff --git a/src/plugins/chat/prompt_builder.py b/src/plugins/chat/prompt_builder.py index 4e72c6304..da9037cfa 100644 --- a/src/plugins/chat/prompt_builder.py +++ b/src/plugins/chat/prompt_builder.py @@ -53,8 +53,8 @@ class PromptBuilder: # 遍历所有topic for current_topic in topic: first_layer_items, second_layer_items = memory_graph.get_related_item(current_topic, depth=2) - if first_layer_items: - print(f"\033[1;32m[pb记忆检索]\033[0m 主题 '{current_topic}' 的第一层记忆: {first_layer_items}") + # if first_layer_items: + # print(f"\033[1;32m[前额叶]\033[0m 主题 '{current_topic}' 的第一层记忆: {first_layer_items}") # 记录第一层数据 all_first_layer_items.extend(first_layer_items) @@ -68,14 +68,14 @@ class PromptBuilder: # 找到重叠的记忆 overlap = set(second_layer_items) & set(other_second_layer) if overlap: - print(f"\033[1;32m[pb记忆检索]\033[0m 发现主题 '{current_topic}' 和 '{other_topic}' 有共同的第二层记忆: {overlap}") + # print(f"\033[1;32m[前额叶]\033[0m 发现主题 '{current_topic}' 和 '{other_topic}' 有共同的第二层记忆: {overlap}") overlapping_second_layer.update(overlap) # 合并所有需要的记忆 if all_first_layer_items: - print(f"\033[1;32m[pb记忆检索]\033[0m 合并所有需要的记忆1: {all_first_layer_items}") + print(f"\033[1;32m[前额叶]\033[0m 合并所有需要的记忆1: {all_first_layer_items}") if overlapping_second_layer: - print(f"\033[1;32m[pb记忆检索]\033[0m 合并所有需要的记忆2: {list(overlapping_second_layer)}") + print(f"\033[1;32m[前额叶]\033[0m 合并所有需要的记忆2: {list(overlapping_second_layer)}") all_memories = all_first_layer_items + list(overlapping_second_layer) diff --git a/src/plugins/chat/utils.py b/src/plugins/chat/utils.py index 58e2280cc..4e2235805 100644 --- a/src/plugins/chat/utils.py +++ b/src/plugins/chat/utils.py @@ -7,6 +7,8 @@ import numpy as np from .config import llm_config, global_config import re from typing import Dict +from collections import Counter +import math def combine_messages(messages: List[Message]) -> str: @@ -81,6 +83,39 @@ def cosine_similarity(v1, v2): norm2 = np.linalg.norm(v2) return dot_product / (norm1 * norm2) +def calculate_information_content(text): + """计算文本的信息量(熵)""" + # 统计字符频率 + char_count = Counter(text) + total_chars = len(text) + + # 计算熵 + entropy = 0 + for count in char_count.values(): + probability = count / total_chars + entropy -= probability * math.log2(probability) + + return entropy + +def get_cloest_chat_from_db(db, length: int, timestamp: str): + # 从数据库中根据时间戳获取离其最近的聊天记录 + chat_text = '' + closest_record = db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)]) # 调试输出 + # print(f"距离time最近的消息时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(closest_record['time'])))}") + + if closest_record: + closest_time = closest_record['time'] + group_id = closest_record['group_id'] # 获取groupid + # 获取该时间戳之后的length条消息,且groupid相同 + chat_record = list(db.db.messages.find({"time": {"$gt": closest_time}, "group_id": group_id}).sort('time', 1).limit(length)) + for record in chat_record: + time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(record['time']))) + chat_text += f'[{time_str}] {record["user_nickname"] or "用户" + str(record["user_id"])}: {record["processed_plain_text"]}\n' # 添加发送者和时间信息 + return chat_text + + return [] # 如果没有找到记录,返回空列表 + + def get_recent_group_messages(db, group_id: int, limit: int = 12) -> list: """从数据库获取群组最近的消息记录 diff --git a/src/plugins/chat/utils_image.py b/src/plugins/chat/utils_image.py index e1a882341..9fe2c40cc 100644 --- a/src/plugins/chat/utils_image.py +++ b/src/plugins/chat/utils_image.py @@ -4,11 +4,9 @@ import hashlib import time import os from ...common.database import Database -from .config import BotConfig import zlib # 用于 CRC32 import base64 - -bot_config = BotConfig.load_config() +from .config import global_config def storage_image(image_data: bytes,type: str, max_size: int = 200) -> bytes: @@ -39,12 +37,12 @@ def storage_compress_image(image_data: bytes, max_size: int = 200) -> bytes: # 连接数据库 db = Database( - host=bot_config.MONGODB_HOST, - port=bot_config.MONGODB_PORT, - db_name=bot_config.DATABASE_NAME, - username=bot_config.MONGODB_USERNAME, - password=bot_config.MONGODB_PASSWORD, - auth_source=bot_config.MONGODB_AUTH_SOURCE + host=global_config.MONGODB_HOST, + port=global_config.MONGODB_PORT, + db_name=global_config.DATABASE_NAME, + username=global_config.MONGODB_USERNAME, + password=global_config.MONGODB_PASSWORD, + auth_source=global_config.MONGODB_AUTH_SOURCE ) # 检查是否已存在相同哈希值的图片 diff --git a/src/plugins/chat/willing_manager.py b/src/plugins/chat/willing_manager.py index df41ba42f..037c2d517 100644 --- a/src/plugins/chat/willing_manager.py +++ b/src/plugins/chat/willing_manager.py @@ -22,22 +22,31 @@ class WillingManager: """设置指定群组的回复意愿""" self.group_reply_willing[group_id] = willing - def change_reply_willing_received(self, group_id: int, topic: str, is_mentioned_bot: bool, config, user_id: int = None, is_emoji: bool = False) -> float: + def change_reply_willing_received(self, group_id: int, topic: str, is_mentioned_bot: bool, config, user_id: int = None, is_emoji: bool = False, interested_rate: float = 0) -> float: """改变指定群组的回复意愿并返回回复概率""" current_willing = self.group_reply_willing.get(group_id, 0) - if topic and current_willing < 1: - current_willing += 0.2 - elif topic: - current_willing += 0.05 + print(f"初始意愿: {current_willing}") + + # if topic and current_willing < 1: + # current_willing += 0.2 + # elif topic: + # current_willing += 0.05 if is_mentioned_bot and current_willing < 1.0: current_willing += 0.9 + print(f"被提及, 当前意愿: {current_willing}") elif is_mentioned_bot: current_willing += 0.05 + print(f"被重复提及, 当前意愿: {current_willing}") if is_emoji: - current_willing *= 0.2 + current_willing *= 0.15 + print(f"表情包, 当前意愿: {current_willing}") + + if interested_rate > 0.6: + print(f"兴趣度: {interested_rate}, 当前意愿: {current_willing}") + current_willing += interested_rate-0.45 self.group_reply_willing[group_id] = min(current_willing, 3.0) @@ -55,15 +64,15 @@ class WillingManager: return reply_probability def change_reply_willing_sent(self, group_id: int): - """发送消息后降低群组的回复意愿""" + """开始思考后降低群组的回复意愿""" current_willing = self.group_reply_willing.get(group_id, 0) - self.group_reply_willing[group_id] = max(0, current_willing - 1.8) + self.group_reply_willing[group_id] = max(0, current_willing - 2) def change_reply_willing_after_sent(self, group_id: int): """发送消息后提高群组的回复意愿""" current_willing = self.group_reply_willing.get(group_id, 0) if current_willing < 1: - self.group_reply_willing[group_id] = min(1, current_willing + 0.4) + self.group_reply_willing[group_id] = min(1, current_willing + 0.3) async def ensure_started(self): """确保衰减任务已启动""" diff --git a/src/plugins/chat/knowledege/knowledge_library.py b/src/plugins/knowledege/knowledge_library.py similarity index 100% rename from src/plugins/chat/knowledege/knowledge_library.py rename to src/plugins/knowledege/knowledge_library.py diff --git a/src/plugins/memory_system/draw_memory.py b/src/plugins/memory_system/draw_memory.py new file mode 100644 index 000000000..651d5fbca --- /dev/null +++ b/src/plugins/memory_system/draw_memory.py @@ -0,0 +1,264 @@ +# -*- coding: utf-8 -*- +import sys +import jieba +from llm_module import LLMModel +import networkx as nx +import matplotlib.pyplot as plt +import math +from collections import Counter +import datetime +import random +import time +# from chat.config import global_config +import sys +sys.path.append("C:/GitHub/MaiMBot") # 添加项目根目录到 Python 路径 +from src.common.database import Database # 使用正确的导入语法 + +class Memory_graph: + def __init__(self): + self.G = nx.Graph() # 使用 networkx 的图结构 + self.db = Database.get_instance() + + def connect_dot(self, concept1, concept2): + self.G.add_edge(concept1, concept2) + + def add_dot(self, concept, memory): + if concept in self.G: + # 如果节点已存在,将新记忆添加到现有列表中 + if 'memory_items' in self.G.nodes[concept]: + if not isinstance(self.G.nodes[concept]['memory_items'], list): + # 如果当前不是列表,将其转换为列表 + self.G.nodes[concept]['memory_items'] = [self.G.nodes[concept]['memory_items']] + self.G.nodes[concept]['memory_items'].append(memory) + else: + self.G.nodes[concept]['memory_items'] = [memory] + else: + # 如果是新节点,创建新的记忆列表 + self.G.add_node(concept, memory_items=[memory]) + + def get_dot(self, concept): + # 检查节点是否存在于图中 + if concept in self.G: + # 从图中获取节点数据 + node_data = self.G.nodes[concept] + # print(node_data) + # 创建新的Memory_dot对象 + return concept,node_data + return None + + def get_related_item(self, topic, depth=1): + if topic not in self.G: + return [], [] + + first_layer_items = [] + second_layer_items = [] + + # 获取相邻节点 + neighbors = list(self.G.neighbors(topic)) + # print(f"第一层: {topic}") + + # 获取当前节点的记忆项 + node_data = self.get_dot(topic) + if node_data: + concept, data = node_data + if 'memory_items' in data: + memory_items = data['memory_items'] + if isinstance(memory_items, list): + first_layer_items.extend(memory_items) + else: + first_layer_items.append(memory_items) + + # 只在depth=2时获取第二层记忆 + if depth >= 2: + # 获取相邻节点的记忆项 + for neighbor in neighbors: + # print(f"第二层: {neighbor}") + node_data = self.get_dot(neighbor) + if node_data: + concept, data = node_data + if 'memory_items' in data: + memory_items = data['memory_items'] + if isinstance(memory_items, list): + second_layer_items.extend(memory_items) + else: + second_layer_items.append(memory_items) + + return first_layer_items, second_layer_items + + def store_memory(self): + for node in self.G.nodes(): + dot_data = { + "concept": node + } + self.db.db.store_memory_dots.insert_one(dot_data) + + @property + def dots(self): + # 返回所有节点对应的 Memory_dot 对象 + return [self.get_dot(node) for node in self.G.nodes()] + + + def get_random_chat_from_db(self, length: int, timestamp: str): + # 从数据库中根据时间戳获取离其最近的聊天记录 + chat_text = '' + closest_record = self.db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)]) # 调试输出 + print(f"距离time最近的消息时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(closest_record['time'])))}") + + if closest_record: + closest_time = closest_record['time'] + group_id = closest_record['group_id'] # 获取groupid + # 获取该时间戳之后的length条消息,且groupid相同 + chat_record = list(self.db.db.messages.find({"time": {"$gt": closest_time}, "group_id": group_id}).sort('time', 1).limit(length)) + for record in chat_record: + time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(record['time']))) + chat_text += f'[{time_str}] {record["user_nickname"] or "用户" + str(record["user_id"])}: {record["processed_plain_text"]}\n' # 添加发送者和时间信息 + return chat_text + + return [] # 如果没有找到记录,返回空列表 + + def save_graph_to_db(self): + # 清空现有的图数据 + self.db.db.graph_data.delete_many({}) + # 保存节点 + for node in self.G.nodes(data=True): + node_data = { + 'concept': node[0], + 'memory_items': node[1].get('memory_items', []) # 默认为空列表 + } + self.db.db.graph_data.nodes.insert_one(node_data) + # 保存边 + for edge in self.G.edges(): + edge_data = { + 'source': edge[0], + 'target': edge[1] + } + self.db.db.graph_data.edges.insert_one(edge_data) + + def load_graph_from_db(self): + # 清空当前图 + self.G.clear() + # 加载节点 + nodes = self.db.db.graph_data.nodes.find() + for node in nodes: + memory_items = node.get('memory_items', []) + if not isinstance(memory_items, list): + memory_items = [memory_items] if memory_items else [] + self.G.add_node(node['concept'], memory_items=memory_items) + # 加载边 + edges = self.db.db.graph_data.edges.find() + for edge in edges: + self.G.add_edge(edge['source'], edge['target']) + + +def main(): + # 初始化数据库 + Database.initialize( + "127.0.0.1", + 27017, + "MegBot" + ) + + memory_graph = Memory_graph() + # 创建LLM模型实例 + + memory_graph.load_graph_from_db() + # 展示两种不同的可视化方式 + print("\n按连接数量着色的图谱:") + visualize_graph(memory_graph, color_by_memory=False) + + print("\n按记忆数量着色的图谱:") + visualize_graph(memory_graph, color_by_memory=True) + + # memory_graph.save_graph_to_db() + + while True: + query = input("请输入新的查询概念(输入'退出'以结束):") + if query.lower() == '退出': + break + items_list = memory_graph.get_related_item(query) + if items_list: + # print(items_list) + for memory_item in items_list: + print(memory_item) + else: + print("未找到相关记忆。") + + +def segment_text(text): + seg_text = list(jieba.cut(text)) + return seg_text + +def find_topic(text, topic_num): + prompt = f'这是一段文字:{text}。请你从这段话中总结出{topic_num}个话题,帮我列出来,用逗号隔开,尽可能精简。只需要列举{topic_num}个话题就好,不要告诉我其他内容。' + return prompt + +def topic_what(text, topic): + prompt = f'这是一段文字:{text}。我想知道这记忆里有什么关于{topic}的话题,帮我总结成一句自然的话,可以包含时间和人物。只输出这句话就好' + return prompt + +def visualize_graph(memory_graph: Memory_graph, color_by_memory: bool = False): + # 设置中文字体 + plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 + plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 + + G = memory_graph.G + + # 保存图到本地 + nx.write_gml(G, "memory_graph.gml") # 保存为 GML 格式 + + # 根据连接条数或记忆数量设置节点颜色 + node_colors = [] + nodes = list(G.nodes()) # 获取图中实际的节点列表 + + if color_by_memory: + # 计算每个节点的记忆数量 + memory_counts = [] + for node in nodes: + memory_items = G.nodes[node].get('memory_items', []) + if isinstance(memory_items, list): + count = len(memory_items) + else: + count = 1 if memory_items else 0 + memory_counts.append(count) + max_memories = max(memory_counts) if memory_counts else 1 + + for count in memory_counts: + # 使用不同的颜色方案:红色表示记忆多,蓝色表示记忆少 + if max_memories > 0: + intensity = min(1.0, count / max_memories) + color = (intensity, 0, 1.0 - intensity) # 从蓝色渐变到红色 + else: + color = (0, 0, 1) # 如果没有记忆,则为蓝色 + node_colors.append(color) + else: + # 使用原来的连接数量着色方案 + max_degree = max(G.degree(), key=lambda x: x[1])[1] if G.degree() else 1 + for node in nodes: + degree = G.degree(node) + if max_degree > 0: + red = min(1.0, degree / max_degree) + blue = 1.0 - red + color = (red, 0, blue) + else: + color = (0, 0, 1) + node_colors.append(color) + + # 绘制图形 + plt.figure(figsize=(12, 8)) + pos = nx.spring_layout(G, k=1, iterations=50) + nx.draw(G, pos, + with_labels=True, + node_color=node_colors, + node_size=2000, + font_size=10, + font_family='SimHei', + font_weight='bold') + + title = '记忆图谱可视化 - ' + ('按记忆数量着色' if color_by_memory else '按连接数量着色') + plt.title(title, fontsize=16, fontfamily='SimHei') + plt.show() + +if __name__ == "__main__": + main() + + diff --git a/src/plugins/memory_system/llm_module_memory_make.py b/src/plugins/memory_system/llm_module_memory_make.py new file mode 100644 index 000000000..1abfdb2c6 --- /dev/null +++ b/src/plugins/memory_system/llm_module_memory_make.py @@ -0,0 +1,82 @@ +import os +import requests +from dotenv import load_dotenv +from typing import Tuple, Union +import time +from ..chat.config import BotConfig + +# 获取当前文件的绝对路径 +current_dir = os.path.dirname(os.path.abspath(__file__)) +root_dir = os.path.abspath(os.path.join(current_dir, '..', '..', '..')) +env_path = os.path.join(root_dir, 'config', '.env') + +# 加载环境变量 +print(f"尝试从 {env_path} 加载环境变量配置") +if os.path.exists(env_path): + load_dotenv(env_path) + print("成功加载环境变量配置") +else: + print(f"环境变量配置文件不存在: {env_path}") + +class LLMModel: + # def __init__(self, model_name="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", **kwargs): + def __init__(self, model_name="Pro/deepseek-ai/DeepSeek-V3", **kwargs): + self.model_name = model_name + self.params = kwargs + self.api_key = os.getenv("SILICONFLOW_KEY") + self.base_url = os.getenv("SILICONFLOW_BASE_URL") + + if not self.api_key or not self.base_url: + raise ValueError("环境变量未正确加载:SILICONFLOW_KEY 或 SILICONFLOW_BASE_URL 未设置") + + print(f"API URL: {self.base_url}") # 打印 base_url 用于调试 + + def generate_response(self, prompt: str) -> Tuple[str, str]: + """根据输入的提示生成模型的响应""" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json" + } + + # 构建请求体 + data = { + "model": self.model_name, + "messages": [{"role": "user", "content": prompt}], + "temperature": 0.5, + **self.params + } + + # 发送请求到完整的chat/completions端点 + api_url = f"{self.base_url.rstrip('/')}/chat/completions" + + max_retries = 3 + base_wait_time = 15 # 基础等待时间(秒) + + for retry in range(max_retries): + try: + response = requests.post(api_url, headers=headers, json=data) + + if response.status_code == 429: + wait_time = base_wait_time * (2 ** retry) # 指数退避 + print(f"遇到请求限制(429),等待{wait_time}秒后重试...") + time.sleep(wait_time) + continue + + response.raise_for_status() # 检查其他响应状态 + + result = response.json() + if "choices" in result and len(result["choices"]) > 0: + content = result["choices"][0]["message"]["content"] + reasoning_content = result["choices"][0]["message"].get("reasoning_content", "") + return content, reasoning_content + return "没有返回结果", "" + + except requests.exceptions.RequestException as e: + if retry < max_retries - 1: # 如果还有重试机会 + wait_time = base_wait_time * (2 ** retry) + print(f"请求失败,等待{wait_time}秒后重试... 错误: {str(e)}") + time.sleep(wait_time) + else: + return f"请求失败: {str(e)}", "" + + return "达到最大重试次数,请求仍然失败", "" \ No newline at end of file diff --git a/src/plugins/memory_system/memory.py b/src/plugins/memory_system/memory.py index 3f216997f..af6aab39a 100644 --- a/src/plugins/memory_system/memory.py +++ b/src/plugins/memory_system/memory.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -import sys import jieba from .llm_module import LLMModel import networkx as nx @@ -11,8 +10,8 @@ import random import time from ..chat.config import global_config import sys -sys.path.append("C:/GitHub/MaiMBot") # 添加项目根目录到 Python 路径 -from src.common.database import Database # 使用正确的导入语法 +from ...common.database import Database # 使用正确的导入语法 +from ..chat.utils import calculate_information_content, get_cloest_chat_from_db class Memory_graph: def __init__(self): @@ -85,54 +84,66 @@ class Memory_graph: return first_layer_items, second_layer_items - def store_memory(self): - for node in self.G.nodes(): - dot_data = { - "concept": node - } - self.db.db.store_memory_dots.insert_one(dot_data) - @property def dots(self): # 返回所有节点对应的 Memory_dot 对象 return [self.get_dot(node) for node in self.G.nodes()] - - - def get_random_chat_from_db(self, length: int, timestamp: str): - # 从数据库中根据时间戳获取离其最近的聊天记录 - chat_text = '' - closest_record = self.db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)]) # 调试输出 - print(f"距离time最近的消息时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(closest_record['time'])))}") - - if closest_record: - closest_time = closest_record['time'] - group_id = closest_record['group_id'] # 获取groupid - # 获取该时间戳之后的length条消息,且groupid相同 - chat_record = list(self.db.db.messages.find({"time": {"$gt": closest_time}, "group_id": group_id}).sort('time', 1).limit(length)) - for record in chat_record: - time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(record['time']))) - chat_text += f'[{time_str}] {record["user_nickname"] or "用户" + str(record["user_id"])}: {record["processed_plain_text"]}\n' # 添加发送者和时间信息 - return chat_text - - return [] # 如果没有找到记录,返回空列表 def save_graph_to_db(self): - # 清空现有的图数据 - self.db.db.graph_data.delete_many({}) # 保存节点 for node in self.G.nodes(data=True): - node_data = { - 'concept': node[0], - 'memory_items': node[1].get('memory_items', []) # 默认为空列表 - } - self.db.db.graph_data.nodes.insert_one(node_data) + concept = node[0] + memory_items = node[1].get('memory_items', []) + + # 查找是否存在同名节点 + existing_node = self.db.db.graph_data.nodes.find_one({'concept': concept}) + if existing_node: + # 如果存在,合并memory_items并去重 + existing_items = existing_node.get('memory_items', []) + if not isinstance(existing_items, list): + existing_items = [existing_items] if existing_items else [] + + # 合并并去重 + all_items = list(set(existing_items + memory_items)) + + # 更新节点 + self.db.db.graph_data.nodes.update_one( + {'concept': concept}, + {'$set': {'memory_items': all_items}} + ) + else: + # 如果不存在,创建新节点 + node_data = { + 'concept': concept, + 'memory_items': memory_items + } + self.db.db.graph_data.nodes.insert_one(node_data) + # 保存边 for edge in self.G.edges(): - edge_data = { - 'source': edge[0], - 'target': edge[1] - } - self.db.db.graph_data.edges.insert_one(edge_data) + source, target = edge + + # 查找是否存在同样的边 + existing_edge = self.db.db.graph_data.edges.find_one({ + 'source': source, + 'target': target + }) + + if existing_edge: + # 如果存在,增加num属性 + num = existing_edge.get('num', 1) + 1 + self.db.db.graph_data.edges.update_one( + {'source': source, 'target': target}, + {'$set': {'num': num}} + ) + else: + # 如果不存在,创建新边 + edge_data = { + 'source': source, + 'target': target, + 'num': 1 + } + self.db.db.graph_data.edges.insert_one(edge_data) def load_graph_from_db(self): # 清空当前图 @@ -147,150 +158,92 @@ class Memory_graph: # 加载边 edges = self.db.db.graph_data.edges.find() for edge in edges: - self.G.add_edge(edge['source'], edge['target']) - -def calculate_information_content(text): - - """计算文本的信息量(熵)""" - # 统计字符频率 - char_count = Counter(text) - total_chars = len(text) - - # 计算熵 - entropy = 0 - for count in char_count.values(): - probability = count / total_chars - entropy -= probability * math.log2(probability) - - return entropy - - -start_time = time.time() - -Database.initialize( - global_config.MONGODB_HOST, - global_config.MONGODB_PORT, - global_config.DATABASE_NAME -) -memory_graph = Memory_graph() - -llm_model = LLMModel() -llm_model_small = LLMModel(model_name="deepseek-ai/DeepSeek-V2.5") - -memory_graph.load_graph_from_db() - -end_time = time.time() -print(f"加载海马体耗时: {end_time - start_time:.2f} 秒") + self.G.add_edge(edge['source'], edge['target'], num=edge.get('num', 1)) -def main(): - # 初始化数据库 - Database.initialize( - "127.0.0.1", - 27017, - "MegBot" - ) - - memory_graph = Memory_graph() - # 创建LLM模型实例 - llm_model = LLMModel() - llm_model_small = LLMModel(model_name="deepseek-ai/DeepSeek-V2.5") - - # 使用当前时间戳进行测试 - current_timestamp = datetime.datetime.now().timestamp() - chat_text = [] - - chat_size =40 - - for _ in range(100): # 循环10次 - random_time = current_timestamp - random.randint(1, 3600*39) # 随机时间 - print(f"随机时间戳对应的时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(random_time))}") - chat_ = memory_graph.get_random_chat_from_db(chat_size, random_time) - chat_text.append(chat_) # 拼接所有text - time.sleep(5) - - for input_text in chat_text: - print(input_text) - first_memory = set() - first_memory = memory_compress(input_text, llm_model_small, llm_model_small, rate=2.5) +# 海马体 +class Hippocampus: + def __init__(self,memory_graph:Memory_graph): + self.memory_graph = memory_graph + self.llm_model = LLMModel() + self.llm_model_small = LLMModel(model_name="deepseek-ai/DeepSeek-V2.5") - #将记忆加入到图谱中 - for topic, memory in first_memory: - topics = segment_text(topic) - print(f"\033[1;34m话题\033[0m: {topic},节点: {topics}, 记忆: {memory}") - for split_topic in topics: - memory_graph.add_dot(split_topic,memory) - for split_topic in topics: - for other_split_topic in topics: - if split_topic != other_split_topic: - memory_graph.connect_dot(split_topic, other_split_topic) + def get_memory_sample(self,chat_size=20,time_frequency:dict={'near':2,'mid':4,'far':3}): + current_timestamp = datetime.datetime.now().timestamp() + chat_text = [] + #短期:1h 中期:4h 长期:24h + for _ in range(time_frequency.get('near')): # 循环10次 + random_time = current_timestamp - random.randint(1, 3600) # 随机时间 + # print(f"获得 最近 随机时间戳对应的时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(random_time))}") + chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) + chat_text.append(chat_) + for _ in range(time_frequency.get('mid')): # 循环10次 + random_time = current_timestamp - random.randint(3600, 3600*4) # 随机时间 + # print(f"获得 最近 随机时间戳对应的时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(random_time))}") + chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) + chat_text.append(chat_) + for _ in range(time_frequency.get('far')): # 循环10次 + random_time = current_timestamp - random.randint(3600*4, 3600*24) # 随机时间 + # print(f"获得 最近 随机时间戳对应的时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(random_time))}") + chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) + chat_text.append(chat_) + return chat_text - # memory_graph.store_memory() - - # 展示两种不同的可视化方式 - print("\n按连接数量着色的图谱:") - visualize_graph(memory_graph, color_by_memory=False) - - print("\n按记忆数量着色的图谱:") - visualize_graph(memory_graph, color_by_memory=True) - - memory_graph.save_graph_to_db() - # memory_graph.load_graph_from_db() - - while True: - query = input("请输入新的查询概念(输入'退出'以结束):") - if query.lower() == '退出': - break - items_list = memory_graph.get_related_item(query) - if items_list: - # print(items_list) - for memory_item in items_list: - print(memory_item) - else: - print("未找到相关记忆。") + def build_memory(self,chat_size=12): + #最近消息获取频率 + time_frequency = {'near':1,'mid':2,'far':2} + memory_sample = self.get_memory_sample(chat_size,time_frequency) + # print(f"\033[1;32m[记忆构建]\033[0m 获取记忆样本: {memory_sample}") + + + for i, input_text in enumerate(memory_sample, 1): + #加载进度可视化 + progress = (i / len(memory_sample)) * 100 + bar_length = 30 + filled_length = int(bar_length * i // len(memory_sample)) + bar = '█' * filled_length + '-' * (bar_length - filled_length) + print(f"\n进度: [{bar}] {progress:.1f}% ({i}/{len(memory_sample)})") - while True: - query = input("请输入问题:") - - if query.lower() == '退出': - break - - topic_prompt = find_topic(query, 3) - topic_response = llm_model.generate_response(topic_prompt) + # 生成压缩后记忆 + first_memory = set() + first_memory = self.memory_compress(input_text, 2.5) + # 延时防止访问超频 + # time.sleep(5) + #将记忆加入到图谱中 + for topic, memory in first_memory: + topics = segment_text(topic) + print(f"\033[1;34m话题\033[0m: {topic},节点: {topics}, 记忆: {memory}") + for split_topic in topics: + self.memory_graph.add_dot(split_topic,memory) + for split_topic in topics: + for other_split_topic in topics: + if split_topic != other_split_topic: + self.memory_graph.connect_dot(split_topic, other_split_topic) + + self.memory_graph.save_graph_to_db() + + def memory_compress(self, input_text, rate=1): + information_content = calculate_information_content(input_text) + print(f"文本的信息量(熵): {information_content:.4f} bits") + topic_num = max(1, min(5, int(information_content * rate / 4))) + # print(topic_num) + topic_prompt = find_topic(input_text, topic_num) + topic_response = self.llm_model.generate_response(topic_prompt) # 检查 topic_response 是否为元组 if isinstance(topic_response, tuple): topics = topic_response[0].split(",") # 假设第一个元素是我们需要的字符串 else: topics = topic_response.split(",") - print(topics) - - for keyword in topics: - items_list = memory_graph.get_related_item(keyword) - if items_list: - print(items_list) - -def memory_compress(input_text, llm_model, llm_model_small, rate=1): - information_content = calculate_information_content(input_text) - print(f"文本的信息量(熵): {information_content:.4f} bits") - topic_num = max(1, min(5, int(information_content * rate / 4))) - print(topic_num) - topic_prompt = find_topic(input_text, topic_num) - topic_response = llm_model.generate_response(topic_prompt) - # 检查 topic_response 是否为元组 - if isinstance(topic_response, tuple): - topics = topic_response[0].split(",") # 假设第一个元素是我们需要的字符串 - else: - topics = topic_response.split(",") - print(topics) - compressed_memory = set() - for topic in topics: - topic_what_prompt = topic_what(input_text,topic) - topic_what_response = llm_model_small.generate_response(topic_what_prompt) - compressed_memory.add((topic.strip(), topic_what_response[0])) # 将话题和记忆作为元组存储 - return compressed_memory + # print(topics) + compressed_memory = set() + for topic in topics: + topic_what_prompt = topic_what(input_text,topic) + topic_what_response = self.llm_model_small.generate_response(topic_what_prompt) + compressed_memory.add((topic.strip(), topic_what_response[0])) # 将话题和记忆作为元组存储 + return compressed_memory def segment_text(text): @@ -305,69 +258,21 @@ def topic_what(text, topic): prompt = f'这是一段文字:{text}。我想知道这记忆里有什么关于{topic}的话题,帮我总结成一句自然的话,可以包含时间和人物。只输出这句话就好' return prompt -def visualize_graph(memory_graph: Memory_graph, color_by_memory: bool = False): - # 设置中文字体 - plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 - plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 - - G = memory_graph.G - - # 保存图到本地 - nx.write_gml(G, "memory_graph.gml") # 保存为 GML 格式 - - # 根据连接条数或记忆数量设置节点颜色 - node_colors = [] - nodes = list(G.nodes()) # 获取图中实际的节点列表 - - if color_by_memory: - # 计算每个节点的记忆数量 - memory_counts = [] - for node in nodes: - memory_items = G.nodes[node].get('memory_items', []) - if isinstance(memory_items, list): - count = len(memory_items) - else: - count = 1 if memory_items else 0 - memory_counts.append(count) - max_memories = max(memory_counts) if memory_counts else 1 - - for count in memory_counts: - # 使用不同的颜色方案:红色表示记忆多,蓝色表示记忆少 - if max_memories > 0: - intensity = min(1.0, count / max_memories) - color = (intensity, 0, 1.0 - intensity) # 从蓝色渐变到红色 - else: - color = (0, 0, 1) # 如果没有记忆,则为蓝色 - node_colors.append(color) - else: - # 使用原来的连接数量着色方案 - max_degree = max(G.degree(), key=lambda x: x[1])[1] if G.degree() else 1 - for node in nodes: - degree = G.degree(node) - if max_degree > 0: - red = min(1.0, degree / max_degree) - blue = 1.0 - red - color = (red, 0, blue) - else: - color = (0, 0, 1) - node_colors.append(color) - - # 绘制图形 - plt.figure(figsize=(12, 8)) - pos = nx.spring_layout(G, k=1, iterations=50) - nx.draw(G, pos, - with_labels=True, - node_color=node_colors, - node_size=2000, - font_size=10, - font_family='SimHei', - font_weight='bold') - - title = '记忆图谱可视化 - ' + ('按记忆数量着色' if color_by_memory else '按连接数量着色') - plt.title(title, fontsize=16, fontfamily='SimHei') - plt.show() - -if __name__ == "__main__": - main() +start_time = time.time() + +Database.initialize( + global_config.MONGODB_HOST, + global_config.MONGODB_PORT, + global_config.DATABASE_NAME +) +#创建记忆图 +memory_graph = Memory_graph() +#加载数据库中存储的记忆图 +memory_graph.load_graph_from_db() +#创建海马体 +hippocampus = Hippocampus(memory_graph) + +end_time = time.time() +print(f"\033[32m[加载海马体耗时: {end_time - start_time:.2f} 秒]\033[0m") \ No newline at end of file diff --git a/src/plugins/memory_system/memory copy.py b/src/plugins/memory_system/memory_make.py similarity index 82% rename from src/plugins/memory_system/memory copy.py rename to src/plugins/memory_system/memory_make.py index 07dea2a8b..244838e21 100644 --- a/src/plugins/memory_system/memory copy.py +++ b/src/plugins/memory_system/memory_make.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- import sys import jieba -from llm_module import LLMModel import networkx as nx import matplotlib.pyplot as plt import math @@ -9,10 +8,12 @@ from collections import Counter import datetime import random import time +import os +from dotenv import load_dotenv # from chat.config import global_config -import sys sys.path.append("C:/GitHub/MaiMBot") # 添加项目根目录到 Python 路径 from src.common.database import Database # 使用正确的导入语法 +from src.plugins.memory_system.llm_module import LLMModel class Memory_graph: def __init__(self): @@ -117,22 +118,60 @@ class Memory_graph: return [] # 如果没有找到记录,返回空列表 def save_graph_to_db(self): - # 清空现有的图数据 - self.db.db.graph_data.delete_many({}) # 保存节点 for node in self.G.nodes(data=True): - node_data = { - 'concept': node[0], - 'memory_items': node[1].get('memory_items', []) # 默认为空列表 - } - self.db.db.graph_data.nodes.insert_one(node_data) + concept = node[0] + memory_items = node[1].get('memory_items', []) + + # 查找是否存在同名节点 + existing_node = self.db.db.graph_data.nodes.find_one({'concept': concept}) + if existing_node: + # 如果存在,合并memory_items并去重 + existing_items = existing_node.get('memory_items', []) + if not isinstance(existing_items, list): + existing_items = [existing_items] if existing_items else [] + + # 合并并去重 + all_items = list(set(existing_items + memory_items)) + + # 更新节点 + self.db.db.graph_data.nodes.update_one( + {'concept': concept}, + {'$set': {'memory_items': all_items}} + ) + else: + # 如果不存在,创建新节点 + node_data = { + 'concept': concept, + 'memory_items': memory_items + } + self.db.db.graph_data.nodes.insert_one(node_data) + # 保存边 for edge in self.G.edges(): - edge_data = { - 'source': edge[0], - 'target': edge[1] - } - self.db.db.graph_data.edges.insert_one(edge_data) + source, target = edge + + # 查找是否存在同样的边 + existing_edge = self.db.db.graph_data.edges.find_one({ + 'source': source, + 'target': target + }) + + if existing_edge: + # 如果存在,增加num属性 + num = existing_edge.get('num', 1) + 1 + self.db.db.graph_data.edges.update_one( + {'source': source, 'target': target}, + {'$set': {'num': num}} + ) + else: + # 如果不存在,创建新边 + edge_data = { + 'source': source, + 'target': target, + 'num': 1 + } + self.db.db.graph_data.edges.insert_one(edge_data) def load_graph_from_db(self): # 清空当前图 @@ -147,7 +186,7 @@ class Memory_graph: # 加载边 edges = self.db.db.graph_data.edges.find() for edge in edges: - self.G.add_edge(edge['source'], edge['target']) + self.G.add_edge(edge['source'], edge['target'], num=edge.get('num', 1)) def calculate_information_content(text): @@ -180,6 +219,19 @@ def calculate_information_content(text): def main(): + # 获取当前文件的绝对路径 + current_dir = os.path.dirname(os.path.abspath(__file__)) + root_dir = os.path.abspath(os.path.join(current_dir, '..', '..', '..')) + env_path = os.path.join(root_dir, 'config', '.env') + + # 加载环境变量 + print(f"尝试从 {env_path} 加载环境变量配置") + if os.path.exists(env_path): + load_dotenv(env_path) + print("成功加载环境变量配置") + else: + print(f"环境变量配置文件不存在: {env_path}") + # 初始化数据库 Database.initialize( "127.0.0.1", @@ -196,10 +248,10 @@ def main(): current_timestamp = datetime.datetime.now().timestamp() chat_text = [] - chat_size =20 + chat_size =25 - for _ in range(10): # 循环10次 - random_time = current_timestamp - random.randint(1, 3600*3) # 随机时间 + for _ in range(30): # 循环10次 + random_time = current_timestamp - random.randint(1, 3600*10) # 随机时间 print(f"随机时间戳对应的时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(random_time))}") chat_ = memory_graph.get_random_chat_from_db(chat_size, random_time) chat_text.append(chat_) # 拼接所有text @@ -218,7 +270,7 @@ def main(): # print(input_text) first_memory = set() first_memory = memory_compress(input_text, llm_model_small, llm_model_small, rate=2.5) - time.sleep(5) + # time.sleep(5) #将记忆加入到图谱中 for topic, memory in first_memory: