diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 2a5f497fd..5b09b8cda 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -5,6 +5,7 @@ on: branches: - main - debug # 新增 debug 分支触发 + - stable-dev tags: - 'v*' workflow_dispatch: @@ -34,6 +35,8 @@ jobs: echo "tags=${{ secrets.DOCKERHUB_USERNAME }}/maimbot:main,${{ secrets.DOCKERHUB_USERNAME }}/maimbot:latest" >> $GITHUB_OUTPUT elif [ "${{ github.ref }}" == "refs/heads/debug" ]; then echo "tags=${{ secrets.DOCKERHUB_USERNAME }}/maimbot:debug" >> $GITHUB_OUTPUT + elif [ "${{ github.ref }}" == "refs/heads/stable-dev" ]; then + echo "tags=${{ secrets.DOCKERHUB_USERNAME }}/maimbot:stable-dev" >> $GITHUB_OUTPUT fi - name: Build and Push Docker Image diff --git a/.gitignore b/.gitignore index 4e1606a54..e51abc5cc 100644 --- a/.gitignore +++ b/.gitignore @@ -193,9 +193,8 @@ cython_debug/ # jieba jieba.cache - -# vscode -/.vscode +# .vscode +!.vscode/settings.json # direnv /.direnv \ No newline at end of file diff --git a/README.md b/README.md index 533d38383..f4ebca07d 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@
-![Python Version](https://img.shields.io/badge/Python-3.x-blue) +![Python Version](https://img.shields.io/badge/Python-3.9+-blue) ![License](https://img.shields.io/github/license/SengokuCola/MaiMBot) ![Status](https://img.shields.io/badge/状态-开发中-yellow) @@ -29,15 +29,21 @@
-> ⚠️ **注意事项** +> [!WARNING] > - 项目处于活跃开发阶段,代码可能随时更改 > - 文档未完善,有问题可以提交 Issue 或者 Discussion > - QQ机器人存在被限制风险,请自行了解,谨慎使用 > - 由于持续迭代,可能存在一些已知或未知的bug > - 由于开发中,可能消耗较多token -**交流群**: 766798517 一群人较多,建议加下面的(开发和建议相关讨论)不一定有空回复,会优先写文档和代码 -**交流群**: 571780722 另一个群(开发和建议相关讨论)不一定有空回复,会优先写文档和代码 +## 💬交流群 +- [一群](https://qm.qq.com/q/VQ3XZrWgMs) 766798517 ,建议加下面的(开发和建议相关讨论)不一定有空回复,会优先写文档和代码 +- [二群](https://qm.qq.com/q/RzmCiRtHEW) 571780722 (开发和建议相关讨论)不一定有空回复,会优先写文档和代码 +- [三群](https://qm.qq.com/q/wlH5eT8OmQ) 1035228475(开发和建议相关讨论)不一定有空回复,会优先写文档和代码 + +**其他平台版本** + +- (由 [CabLate](https://github.com/cablate) 贡献) [Telegram 与其他平台(未来可能会有)的版本](https://github.com/cablate/MaiMBot/tree/telegram) - [集中讨论串](https://github.com/SengokuCola/MaiMBot/discussions/149) ##
@@ -46,11 +52,16 @@ ### 部署方式 -如果你不知道Docker是什么,建议寻找相关教程或使用手动部署 +- 📦 **Windows 一键傻瓜式部署**:请运行项目根目录中的 `run.bat`,部署完成后请参照后续配置指南进行配置 + +- [📦 Windows 手动部署指南 ](docs/manual_deploy_windows.md) + +- [📦 Linux 手动部署指南 ](docs/manual_deploy_linux.md) + +如果你不知道Docker是什么,建议寻找相关教程或使用手动部署 **(现在不建议使用docker,更新慢,可能不适配)** - [🐳 Docker部署指南](docs/docker_deploy.md) -- [📦 手动部署指南](docs/manual_deploy.md) ### 配置说明 - [🎀 新手配置指南](docs/installation_cute.md) - 通俗易懂的配置教程,适合初次使用的猫娘 @@ -129,9 +140,10 @@ ## 📌 注意事项 -SengokuCola纯编程外行,面向cursor编程,很多代码史一样多多包涵 -> ⚠️ **警告**:本应用生成内容来自人工智能模型,由 AI 生成,请仔细甄别,请勿用于违反法律的用途,AI生成内容不代表本人观点和立场。 +SengokuCola纯编程外行,面向cursor编程,很多代码史一样多多包涵 +> [!WARNING] +> 本应用生成内容来自人工智能模型,由 AI 生成,请仔细甄别,请勿用于违反法律的用途,AI生成内容不代表本人观点和立场。 ## 致谢 [nonebot2](https://github.com/nonebot/nonebot2): 跨平台 Python 异步聊天机器人框架 @@ -142,7 +154,7 @@ SengokuCola纯编程外行,面向cursor编程,很多代码史一样多多包 感谢各位大佬! - + diff --git a/bot.py b/bot.py index 51979a5ea..471a98eaf 100644 --- a/bot.py +++ b/bot.py @@ -1,88 +1,233 @@ +import asyncio import os +import shutil +import sys import nonebot +import time + +import uvicorn from dotenv import load_dotenv from loguru import logger from nonebot.adapters.onebot.v11 import Adapter +import platform -'''彩蛋''' -from colorama import Fore, init +# 获取没有加载env时的环境变量 +env_mask = {key: os.getenv(key) for key in os.environ} -init() -text = "多年以后,面对AI行刑队,张三将会回想起他2023年在会议上讨论人工智能的那个下午" -rainbow_colors = [Fore.RED, Fore.YELLOW, Fore.GREEN, Fore.CYAN, Fore.BLUE, Fore.MAGENTA] -rainbow_text = "" -for i, char in enumerate(text): - rainbow_text += rainbow_colors[i % len(rainbow_colors)] + char -print(rainbow_text) -'''彩蛋''' +uvicorn_server = None -# 初次启动检测 -if not os.path.exists("config/bot_config.toml"): - logger.warning("检测到bot_config.toml不存在,正在从模板复制") - import shutil - # 检查config目录是否存在 - if not os.path.exists("config"): - os.makedirs("config") - logger.info("创建config目录") - shutil.copy("template/bot_config_template.toml", "config/bot_config.toml") - logger.info("复制完成,请修改config/bot_config.toml和.env.prod中的配置后重新启动") +def easter_egg(): + # 彩蛋 + from colorama import init, Fore -# 初始化.env 默认ENVIRONMENT=prod -if not os.path.exists(".env"): - with open(".env", "w") as f: - f.write("ENVIRONMENT=prod") + init() + text = "多年以后,面对AI行刑队,张三将会回想起他2023年在会议上讨论人工智能的那个下午" + rainbow_colors = [Fore.RED, Fore.YELLOW, Fore.GREEN, Fore.CYAN, Fore.BLUE, Fore.MAGENTA] + rainbow_text = "" + for i, char in enumerate(text): + rainbow_text += rainbow_colors[i % len(rainbow_colors)] + char + print(rainbow_text) - # 检测.env.prod文件是否存在 - if not os.path.exists(".env.prod"): - logger.error("检测到.env.prod文件不存在") - shutil.copy("template.env", "./.env.prod") -# 首先加载基础环境变量.env -if os.path.exists(".env"): - load_dotenv(".env") - logger.success("成功加载基础环境变量配置") +def init_config(): + # 初次启动检测 + if not os.path.exists("config/bot_config.toml"): + logger.warning("检测到bot_config.toml不存在,正在从模板复制") -# 根据 ENVIRONMENT 加载对应的环境配置 -if os.getenv("ENVIRONMENT") == "prod": - logger.success("加载生产环境变量配置") - load_dotenv(".env.prod", override=True) # override=True 允许覆盖已存在的环境变量 -elif os.getenv("ENVIRONMENT") == "dev": - logger.success("加载开发环境变量配置") - load_dotenv(".env.dev", override=True) # override=True 允许覆盖已存在的环境变量 -elif os.path.exists(f".env.{os.getenv('ENVIRONMENT')}"): - logger.success(f"加载{os.getenv('ENVIRONMENT')}环境变量配置") - load_dotenv(f".env.{os.getenv('ENVIRONMENT')}", override=True) # override=True 允许覆盖已存在的环境变量 -else: - logger.error(f"ENVIRONMENT配置错误,请检查.env文件中的ENVIRONMENT变量对应的.env.{os.getenv('ENVIRONMENT')}是否存在") - exit(1) + # 检查config目录是否存在 + if not os.path.exists("config"): + os.makedirs("config") + logger.info("创建config目录") -# 检测Key是否存在 -if not os.getenv("SILICONFLOW_KEY"): - logger.error("缺失必要的API KEY") - logger.error(f"请至少在.env.{os.getenv('ENVIRONMENT')}文件中填写SILICONFLOW_KEY后重新启动") - exit(1) + shutil.copy("template/bot_config_template.toml", "config/bot_config.toml") + logger.info("复制完成,请修改config/bot_config.toml和.env.prod中的配置后重新启动") -# 获取所有环境变量 -env_config = {key: os.getenv(key) for key in os.environ} -# 设置基础配置 -base_config = { - "websocket_port": int(env_config.get("PORT", 8080)), - "host": env_config.get("HOST", "127.0.0.1"), - "log_level": "INFO", -} +def init_env(): + # 初始化.env 默认ENVIRONMENT=prod + if not os.path.exists(".env"): + with open(".env", "w") as f: + f.write("ENVIRONMENT=prod") -# 合并配置 -nonebot.init(**base_config, **env_config) + # 检测.env.prod文件是否存在 + if not os.path.exists(".env.prod"): + logger.error("检测到.env.prod文件不存在") + shutil.copy("template.env", "./.env.prod") -# 注册适配器 -driver = nonebot.get_driver() -driver.register_adapter(Adapter) + # 检测.env.dev文件是否存在,不存在的话直接复制生产环境配置 + if not os.path.exists(".env.dev"): + logger.error("检测到.env.dev文件不存在") + shutil.copy(".env.prod", "./.env.dev") + + # 首先加载基础环境变量.env + if os.path.exists(".env"): + load_dotenv(".env") + logger.success("成功加载基础环境变量配置") + + +def load_env(): + # 使用闭包实现对加载器的横向扩展,避免大量重复判断 + def prod(): + logger.success("加载生产环境变量配置") + load_dotenv(".env.prod", override=True) # override=True 允许覆盖已存在的环境变量 + + def dev(): + logger.success("加载开发环境变量配置") + load_dotenv(".env.dev", override=True) # override=True 允许覆盖已存在的环境变量 + + fn_map = { + "prod": prod, + "dev": dev + } + + env = os.getenv("ENVIRONMENT") + logger.info(f"[load_env] 当前的 ENVIRONMENT 变量值:{env}") + + if env in fn_map: + fn_map[env]() # 根据映射执行闭包函数 + + elif os.path.exists(f".env.{env}"): + logger.success(f"加载{env}环境变量配置") + load_dotenv(f".env.{env}", override=True) # override=True 允许覆盖已存在的环境变量 + + else: + logger.error(f"ENVIRONMENT 配置错误,请检查 .env 文件中的 ENVIRONMENT 变量及对应 .env.{env} 是否存在") + RuntimeError(f"ENVIRONMENT 配置错误,请检查 .env 文件中的 ENVIRONMENT 变量及对应 .env.{env} 是否存在") + + +def load_logger(): + logger.remove() # 移除默认配置 + logger.add( + sys.stderr, + format="{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <7} | {name:.<8}:{function:.<8}:{line: >4} - {message}", + colorize=True, + level=os.getenv("LOG_LEVEL", "INFO"), # 根据环境设置日志级别,默认为INFO + filter=lambda record: "nonebot" not in record["name"] + ) + + + +def scan_provider(env_config: dict): + provider = {} + + # 利用未初始化 env 时获取的 env_mask 来对新的环境变量集去重 + # 避免 GPG_KEY 这样的变量干扰检查 + env_config = dict(filter(lambda item: item[0] not in env_mask, env_config.items())) + + # 遍历 env_config 的所有键 + for key in env_config: + # 检查键是否符合 {provider}_BASE_URL 或 {provider}_KEY 的格式 + if key.endswith("_BASE_URL") or key.endswith("_KEY"): + # 提取 provider 名称 + provider_name = key.split("_", 1)[0] # 从左分割一次,取第一部分 + + # 初始化 provider 的字典(如果尚未初始化) + if provider_name not in provider: + provider[provider_name] = {"url": None, "key": None} + + # 根据键的类型填充 url 或 key + if key.endswith("_BASE_URL"): + provider[provider_name]["url"] = env_config[key] + elif key.endswith("_KEY"): + provider[provider_name]["key"] = env_config[key] + + # 检查每个 provider 是否同时存在 url 和 key + for provider_name, config in provider.items(): + if config["url"] is None or config["key"] is None: + logger.error( + f"provider 内容:{config}\n" + f"env_config 内容:{env_config}" + ) + raise ValueError(f"请检查 '{provider_name}' 提供商配置是否丢失 BASE_URL 或 KEY 环境变量") + + +async def graceful_shutdown(): + try: + global uvicorn_server + if uvicorn_server: + uvicorn_server.force_exit = True # 强制退出 + await uvicorn_server.shutdown() + + tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] + for task in tasks: + task.cancel() + await asyncio.gather(*tasks, return_exceptions=True) + + except Exception as e: + logger.error(f"麦麦关闭失败: {e}") + + +async def uvicorn_main(): + global uvicorn_server + config = uvicorn.Config( + app="__main__:app", + host=os.getenv("HOST", "127.0.0.1"), + port=int(os.getenv("PORT", 8080)), + reload=os.getenv("ENVIRONMENT") == "dev", + timeout_graceful_shutdown=5, + log_config=None, + access_log=False + ) + server = uvicorn.Server(config) + uvicorn_server = server + await server.serve() + + +def raw_main(): + # 利用 TZ 环境变量设定程序工作的时区 + # 仅保证行为一致,不依赖 localtime(),实际对生产环境几乎没有作用 + if platform.system().lower() != 'windows': + time.tzset() + + easter_egg() + load_logger() + init_config() + init_env() + load_env() + load_logger() + + env_config = {key: os.getenv(key) for key in os.environ} + scan_provider(env_config) + + # 设置基础配置 + base_config = { + "websocket_port": int(env_config.get("PORT", 8080)), + "host": env_config.get("HOST", "127.0.0.1"), + "log_level": "INFO", + } + + # 合并配置 + nonebot.init(**base_config, **env_config) + + # 注册适配器 + global driver + driver = nonebot.get_driver() + driver.register_adapter(Adapter) + + # 加载插件 + nonebot.load_plugins("src/plugins") -# 加载插件 -nonebot.load_plugins("src/plugins") if __name__ == "__main__": - nonebot.run() + + try: + raw_main() + + global app + app = nonebot.get_asgi() + + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + loop.run_until_complete(uvicorn_main()) + except KeyboardInterrupt: + logger.warning("麦麦会努力做的更好的!正在停止中......") + except Exception as e: + logger.error(f"主程序异常: {e}") + finally: + loop.run_until_complete(graceful_shutdown()) + loop.close() + logger.info("进程终止完毕,麦麦开始休眠......下次再见哦!") diff --git a/changelog.md b/changelog.md new file mode 100644 index 000000000..c68a16ad9 --- /dev/null +++ b/changelog.md @@ -0,0 +1,6 @@ +# Changelog + +## [0.5.12] - 2025-3-9 +### Added +- 新增了 我是测试 + diff --git a/changelog_config.md b/changelog_config.md new file mode 100644 index 000000000..c4c560644 --- /dev/null +++ b/changelog_config.md @@ -0,0 +1,12 @@ +# Changelog + +## [0.0.5] - 2025-3-11 +### Added +- 新增了 `alias_names` 配置项,用于指定麦麦的别名。 + +## [0.0.4] - 2025-3-9 +### Added +- 新增了 `memory_ban_words` 配置项,用于指定不希望记忆的词汇。 + + + diff --git a/docker-compose.yml b/docker-compose.yml index dd2650b23..227df606b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,47 +2,47 @@ services: napcat: container_name: napcat environment: - - tz=Asia/Shanghai + - TZ=Asia/Shanghai - NAPCAT_UID=${NAPCAT_UID} - - NAPCAT_GID=${NAPCAT_GID} + - NAPCAT_GID=${NAPCAT_GID} # 让 NapCat 获取当前用户 GID,UID,防止权限问题 ports: - - 3000:3000 - - 3001:3001 - 6099:6099 - restart: always + restart: unless-stopped volumes: - - napcatQQ:/app/.config/QQ - - napcatCONFIG:/app/napcat/config - - maimbotDATA:/MaiMBot/data # 麦麦的图片等要给napcat不然发送图片会有问题 + - napcatQQ:/app/.config/QQ # 持久化 QQ 本体 + - napcatCONFIG:/app/napcat/config # 持久化 NapCat 配置文件 + - maimbotDATA:/MaiMBot/data # NapCat 和 NoneBot 共享此卷,否则发送图片会有问题 image: mlikiowa/napcat-docker:latest mongodb: container_name: mongodb environment: - - tz=Asia/Shanghai + - TZ=Asia/Shanghai + # - MONGO_INITDB_ROOT_USERNAME=your_username + # - MONGO_INITDB_ROOT_PASSWORD=your_password expose: - "27017" - restart: always + restart: unless-stopped volumes: - - mongodb:/data/db - - mongodbCONFIG:/data/configdb + - mongodb:/data/db # 持久化 MongoDB 数据库 + - mongodbCONFIG:/data/configdb # 持久化 MongoDB 配置文件 image: mongo:latest maimbot: container_name: maimbot environment: - - tz=Asia/Shanghai + - TZ=Asia/Shanghai expose: - "8080" - restart: always + restart: unless-stopped depends_on: - mongodb - napcat volumes: - - napcatCONFIG:/MaiMBot/napcat # 自动根据配置中的qq号创建ws反向客户端配置 - - ./bot_config.toml:/MaiMBot/config/bot_config.toml - - maimbotDATA:/MaiMBot/data - - ./.env.prod:/MaiMBot/.env.prod + - napcatCONFIG:/MaiMBot/napcat # 自动根据配置中的 QQ 号创建 ws 反向客户端配置 + - ./bot_config.toml:/MaiMBot/config/bot_config.toml # Toml 配置文件映射 + - maimbotDATA:/MaiMBot/data # NapCat 和 NoneBot 共享此卷,否则发送图片会有问题 + - ./.env.prod:/MaiMBot/.env.prod # Toml 配置文件映射 image: sengokucola/maimbot:latest volumes: diff --git a/docs/Jonathan R.md b/docs/Jonathan R.md new file mode 100644 index 000000000..660caaeec --- /dev/null +++ b/docs/Jonathan R.md @@ -0,0 +1,20 @@ +Jonathan R. Wolpaw 在 “Memory in neuroscience: rhetoric versus reality.” 一文中提到,从神经科学的感觉运动假设出发,整个神经系统的功能是将经验与适当的行为联系起来,而不是单纯的信息存储。 +Jonathan R,Wolpaw. (2019). Memory in neuroscience: rhetoric versus reality.. Behavioral and cognitive neuroscience reviews(2). + +1. **单一过程理论** + - 单一过程理论认为,识别记忆主要是基于熟悉性这一单一因素的影响。熟悉性是指对刺激的一种自动的、无意识的感知,它可以使我们在没有回忆起具体细节的情况下,判断一个刺激是否曾经出现过。 + - 例如,在一些实验中,研究者发现被试可以在没有回忆起具体学习情境的情况下,对曾经出现过的刺激做出正确的判断,这被认为是熟悉性在起作用1。 +2. **双重过程理论** + - 双重过程理论则认为,识别记忆是基于两个过程:回忆和熟悉性。回忆是指对过去经验的有意识的回忆,它可以使我们回忆起具体的细节和情境;熟悉性则是一种自动的、无意识的感知。 + - 该理论认为,在识别记忆中,回忆和熟悉性共同作用,使我们能够判断一个刺激是否曾经出现过。例如,在 “记得 / 知道” 范式中,被试被要求判断他们对一个刺激的记忆是基于回忆还是熟悉性。研究发现,被试可以区分这两种不同的记忆过程,这为双重过程理论提供了支持1。 + + + +1. **神经元节点与连接**:借鉴神经网络原理,将每个记忆单元视为一个神经元节点。节点之间通过连接相互关联,连接的强度代表记忆之间的关联程度。在形态学联想记忆中,具有相似形态特征的记忆节点连接强度较高。例如,苹果和橘子的记忆节点,由于在形状、都是水果等形态语义特征上相似,它们之间的连接强度大于苹果与汽车记忆节点间的连接强度。 +2. **记忆聚类与层次结构**:依据形态特征的相似性对记忆进行聚类,形成不同的记忆簇。每个记忆簇内部的记忆具有较高的相似性,而不同记忆簇之间的记忆相似性较低。同时,构建记忆的层次结构,高层次的记忆节点代表更抽象、概括的概念,低层次的记忆节点对应具体的实例。比如,“水果” 作为高层次记忆节点,连接着 “苹果”“橘子”“香蕉” 等低层次具体水果的记忆节点。 +3. **网络的动态更新**:随着新记忆的不断加入,记忆网络动态调整。新记忆节点根据其形态特征与现有网络中的节点建立连接,同时影响相关连接的强度。若新记忆与某个记忆簇的特征高度相似,则被纳入该记忆簇;若具有独特特征,则可能引发新的记忆簇的形成。例如,当系统学习到一种新的水果 “番石榴”,它会根据番石榴的形态、语义等特征,在记忆网络中找到与之最相似的区域(如水果记忆簇),并建立相应连接,同时调整周围节点连接强度以适应这一新记忆。 + + + +- **相似性联想**:该理论认为,当两个或多个事物在形态上具有相似性时,它们在记忆中会形成关联。例如,梨和苹果在形状和都是水果这一属性上有相似性,所以当我们看到梨时,很容易通过形态学联想记忆联想到苹果。这种相似性联想有助于我们对新事物进行分类和理解,当遇到一个新的类似水果时,我们可以通过与已有的水果记忆进行相似性匹配,来推测它的一些特征。 +- **时空关联性联想**:除了相似性联想,MAM 还强调时空关联性联想。如果两个事物在时间或空间上经常同时出现,它们也会在记忆中形成关联。比如,每次在公园里看到花的时候,都能听到鸟儿的叫声,那么花和鸟儿叫声的形态特征(花的视觉形态和鸟叫的听觉形态)就会在记忆中形成关联,以后听到鸟叫可能就会联想到公园里的花。 \ No newline at end of file diff --git a/docs/docker_deploy.md b/docs/docker_deploy.md index c9b069309..db759dfd0 100644 --- a/docs/docker_deploy.md +++ b/docs/docker_deploy.md @@ -1,24 +1,97 @@ # 🐳 Docker 部署指南 -## 部署步骤(推荐,但不一定是最新) +## 部署步骤 (推荐,但不一定是最新) + +**"更新镜像与容器"部分在本文档 [Part 6](#6-更新镜像与容器)** + +### 0. 前提说明 + +**本文假设读者已具备一定的 Docker 基础知识。若您对 Docker 不熟悉,建议先参考相关教程或文档进行学习,或选择使用 [📦Linux手动部署指南](./manual_deploy_linux.md) 或 [📦Windows手动部署指南](./manual_deploy_windows.md) 。** + + +### 1. 获取Docker配置文件 + +- 建议先单独创建好一个文件夹并进入,作为工作目录 -1. 获取配置文件: ```bash -wget https://raw.githubusercontent.com/SengokuCola/MaiMBot/main/docker-compose.yml +wget https://raw.githubusercontent.com/SengokuCola/MaiMBot/main/docker-compose.yml -O docker-compose.yml ``` -2. 启动服务: +- 若需要启用MongoDB数据库的用户名和密码,可进入docker-compose.yml,取消MongoDB处的注释并修改变量旁 `=` 后方的值为你的用户名和密码\ +修改后请注意在之后配置 `.env.prod` 文件时指定MongoDB数据库的用户名密码 + + +### 2. 启动服务 + +- **!!! 请在第一次启动前确保当前工作目录下 `.env.prod` 与 `bot_config.toml` 文件存在 !!!**\ +由于Docker文件映射行为的特殊性,若宿主机的映射路径不存在,可能导致意外的目录创建,而不会创建文件,由于此处需要文件映射到文件,需提前确保文件存在且路径正确,可使用如下命令: + +```bash +touch .env.prod +touch bot_config.toml +``` + +- 启动Docker容器: + ```bash NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker compose up -d +# 旧版Docker中可能找不到docker compose,请使用docker-compose工具替代 +NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker-compose up -d ``` -3. 修改配置后重启: + +### 3. 修改配置并重启Docker + +- 请前往 [🎀新手配置指南](./installation_cute.md) 或 [⚙️标准配置指南](./installation_standard.md) 完成 `.env.prod` 与 `bot_config.toml` 配置文件的编写\ +**需要注意 `.env.prod` 中HOST处IP的填写,Docker中部署和系统中直接安装的配置会有所不同** + +- 重启Docker容器: + +```bash +docker restart maimbot # 若修改过容器名称则替换maimbot为你自定的名称 +``` + +- 下方命令可以但不推荐,只是同时重启NapCat、MongoDB、MaiMBot三个服务 + ```bash NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker compose restart +# 旧版Docker中可能找不到docker compose,请使用docker-compose工具替代 +NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker-compose restart ``` + +### 4. 登入NapCat管理页添加反向WebSocket + +- 在浏览器地址栏输入 `http://<宿主机IP>:6099/` 进入NapCat的管理Web页,添加一个Websocket客户端 + +> 网络配置 -> 新建 -> Websocket客户端 + +- Websocket客户端的名称自定,URL栏填入 `ws://maimbot:8080/onebot/v11/ws`,启用并保存即可\ +(若修改过容器名称则替换maimbot为你自定的名称) + + +### 5. 部署完成,愉快地和麦麦对话吧! + + +### 6. 更新镜像与容器 + +- 拉取最新镜像 + +```bash +docker-compose pull +``` + +- 执行启动容器指令,该指令会自动重建镜像有更新的容器并启动 + +```bash +NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker compose up -d +# 旧版Docker中可能找不到docker compose,请使用docker-compose工具替代 +NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker-compose up -d +``` + + ## ⚠️ 注意事项 - 目前部署方案仍在测试中,可能存在未知问题 - 配置文件中的API密钥请妥善保管,不要泄露 -- 建议先在测试环境中运行,确认无误后再部署到生产环境 \ No newline at end of file +- 建议先在测试环境中运行,确认无误后再部署到生产环境 \ No newline at end of file diff --git a/docs/installation_cute.md b/docs/installation_cute.md index 278cbfe20..4465660f9 100644 --- a/docs/installation_cute.md +++ b/docs/installation_cute.md @@ -52,12 +52,12 @@ key = "SILICONFLOW_KEY" # 用同一张门票就可以啦 如果你想用DeepSeek官方的服务,就要这样改: ```toml [model.llm_reasoning] -name = "Pro/deepseek-ai/DeepSeek-R1" +name = "deepseek-reasoner" # 改成对应的模型名称,这里为DeepseekR1 base_url = "DEEP_SEEK_BASE_URL" # 改成去DeepSeek游乐园 key = "DEEP_SEEK_KEY" # 用DeepSeek的门票 [model.llm_normal] -name = "Pro/deepseek-ai/DeepSeek-V3" +name = "deepseek-chat" # 改成对应的模型名称,这里为DeepseekV3 base_url = "DEEP_SEEK_BASE_URL" # 也去DeepSeek游乐园 key = "DEEP_SEEK_KEY" # 用同一张DeepSeek门票 ``` @@ -88,11 +88,11 @@ CHAT_ANY_WHERE_KEY=your_key CHAT_ANY_WHERE_BASE_URL=https://api.chatanywhere.tech/v1 # 如果你不知道这是什么,那么下面这些不用改,保持原样就好啦 -HOST=127.0.0.1 +HOST=127.0.0.1 # 如果使用Docker部署,需要改成0.0.0.0喵,不然听不见群友讲话了喵 PORT=8080 # 这些是数据库设置,一般也不用改呢 -MONGODB_HOST=127.0.0.1 +MONGODB_HOST=127.0.0.1 # 如果使用Docker部署,需要改成数据库容器的名字喵,默认是mongodb喵 MONGODB_PORT=27017 DATABASE_NAME=MegBot MONGODB_USERNAME = "" # 如果数据库需要用户名,就在这里填写喵 @@ -110,7 +110,8 @@ PLUGINS=["src2.plugins.chat"] # 这里是机器人的插件列表呢 ```toml [bot] qq = "把这里改成你的机器人QQ号喵" # 填写你的机器人QQ号 -nickname = "麦麦" # 机器人的名字,你可以改成你喜欢的任何名字哦 +nickname = "麦麦" # 机器人的名字,你可以改成你喜欢的任何名字哦,建议和机器人QQ名称/群昵称一样哦 +alias_names = ["小麦", "阿麦"] # 也可以用这个招呼机器人,可以不设置呢 [personality] # 这里可以设置机器人的性格呢,让它更有趣一些喵 diff --git a/docs/installation_standard.md b/docs/installation_standard.md index 6e4920220..03b66dc46 100644 --- a/docs/installation_standard.md +++ b/docs/installation_standard.md @@ -8,7 +8,7 @@ ## API配置说明 -`.env.prod`和`bot_config.toml`中的API配置关系如下: +`.env.prod` 和 `bot_config.toml` 中的API配置关系如下: ### 在.env.prod中定义API凭证: ```ini @@ -34,7 +34,7 @@ key = "SILICONFLOW_KEY" # 引用.env.prod中定义的密钥 如需切换到其他API服务,只需修改引用: ```toml [model.llm_reasoning] -name = "Pro/deepseek-ai/DeepSeek-R1" +name = "deepseek-reasoner" # 改成对应的模型名称,这里为DeepseekR1 base_url = "DEEP_SEEK_BASE_URL" # 切换为DeepSeek服务 key = "DEEP_SEEK_KEY" # 使用DeepSeek密钥 ``` @@ -52,12 +52,12 @@ CHAT_ANY_WHERE_KEY=your_key CHAT_ANY_WHERE_BASE_URL=https://api.chatanywhere.tech/v1 # 服务配置 -HOST=127.0.0.1 -PORT=8080 +HOST=127.0.0.1 # 如果使用Docker部署,需要改成0.0.0.0,否则QQ消息无法传入 +PORT=8080 # 与反向端口相同 # 数据库配置 -MONGODB_HOST=127.0.0.1 -MONGODB_PORT=27017 +MONGODB_HOST=127.0.0.1 # 如果使用Docker部署,需要改成数据库容器的名字,默认是mongodb +MONGODB_PORT=27017 # MongoDB端口 DATABASE_NAME=MegBot MONGODB_USERNAME = "" # 数据库用户名 MONGODB_PASSWORD = "" # 数据库密码 @@ -72,6 +72,9 @@ PLUGINS=["src2.plugins.chat"] [bot] qq = "机器人QQ号" # 必填 nickname = "麦麦" # 机器人昵称 +# alias_names: 配置机器人可使用的别名。当机器人在群聊或对话中被调用时,别名可以作为直接命令或提及机器人的关键字使用。 +# 该配置项为字符串数组。例如: ["小麦", "阿麦"] +alias_names = ["小麦", "阿麦"] # 机器人别名 [personality] prompt_personality = [ diff --git a/docs/manual_deploy_linux.md b/docs/manual_deploy_linux.md new file mode 100644 index 000000000..41f0390b8 --- /dev/null +++ b/docs/manual_deploy_linux.md @@ -0,0 +1,115 @@ +# 📦 Linux系统如何手动部署MaiMbot麦麦? + +## 准备工作 +- 一台联网的Linux设备(本教程以Ubuntu/Debian系为例) +- QQ小号(QQ框架的使用可能导致qq被风控,严重(小概率)可能会导致账号封禁,强烈不推荐使用大号) +- 可用的大模型API +- 一个AI助手,网上随便搜一家打开来用都行,可以帮你解决一些不懂的问题 +- 以下内容假设你对Linux系统有一定的了解,如果觉得难以理解,请直接用Windows系统部署[Windows系统部署指南](./manual_deploy_windows.md) + +## 你需要知道什么? + +- 如何正确向AI助手提问,来学习新知识 + +- Python是什么 + +- Python的虚拟环境是什么?如何创建虚拟环境 + +- 命令行是什么 + +- 数据库是什么?如何安装并启动MongoDB + +- 如何运行一个QQ机器人,以及NapCat框架是什么 +--- + +## 环境配置 + +### 1️⃣ **确认Python版本** + +需确保Python版本为3.9及以上 + +```bash +python --version +# 或 +python3 --version +``` +如果版本低于3.9,请更新Python版本。 +```bash +# Ubuntu/Debian +sudo apt update +sudo apt install python3.9 +# 如执行了这一步,建议在执行时将python3指向python3.9 +# 更新替代方案,设置 python3.9 为默认的 python3 版本: +sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1 +sudo update-alternatives --config python3 +``` + +### 2️⃣ **创建虚拟环境** +```bash +# 方法1:使用venv(推荐) +python3 -m venv maimbot +source maimbot/bin/activate # 激活环境 + +# 方法2:使用conda(需先安装Miniconda) +wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh +bash Miniconda3-latest-Linux-x86_64.sh +conda create -n maimbot python=3.9 +conda activate maimbot + +# 通过以上方法创建并进入虚拟环境后,再执行以下命令 + +# 安装依赖(任选一种环境) +pip install -r requirements.txt +``` + +--- + +## 数据库配置 +### 3️⃣ **安装并启动MongoDB** +- 安装与启动: Debian参考[官方文档](https://docs.mongodb.com/manual/tutorial/install-mongodb-on-debian/),Ubuntu参考[官方文档](https://docs.mongodb.com/manual/tutorial/install-mongodb-on-ubuntu/) + +- 默认连接本地27017端口 +--- + +## NapCat配置 +### 4️⃣ **安装NapCat框架** + +- 参考[NapCat官方文档](https://www.napcat.wiki/guide/boot/Shell#napcat-installer-linux%E4%B8%80%E9%94%AE%E4%BD%BF%E7%94%A8%E8%84%9A%E6%9C%AC-%E6%94%AF%E6%8C%81ubuntu-20-debian-10-centos9)安装 + +- 使用QQ小号登录,添加反向WS地址: `ws://127.0.0.1:8080/onebot/v11/ws` + +--- + +## 配置文件设置 +### 5️⃣ **配置文件设置,让麦麦Bot正常工作** +- 修改环境配置文件: `.env.prod` +- 修改机器人配置文件: `bot_config.toml` + + +--- + +## 启动机器人 +### 6️⃣ **启动麦麦机器人** +```bash +# 在项目目录下操作 +nb run +# 或 +python3 bot.py +``` + +--- + +## **其他组件(可选)** +- 直接运行 knowledge.py生成知识库 + + +--- + +## 常见问题 +🔧 权限问题: 在命令前加 `sudo` +🔌 端口占用: 使用 `sudo lsof -i :8080` 查看端口占用 +🛡️ 防火墙: 确保8080/27017端口开放 +```bash +sudo ufw allow 8080/tcp +sudo ufw allow 27017/tcp +``` \ No newline at end of file diff --git a/docs/manual_deploy.md b/docs/manual_deploy_windows.md similarity index 90% rename from docs/manual_deploy.md rename to docs/manual_deploy_windows.md index 6d53beb4e..eebdc4f41 100644 --- a/docs/manual_deploy.md +++ b/docs/manual_deploy_windows.md @@ -1,4 +1,4 @@ -# 📦 如何手动部署MaiMbot麦麦? +# 📦 Windows系统如何手动部署MaiMbot麦麦? ## 你需要什么? @@ -30,7 +30,7 @@ 在创建虚拟环境之前,请确保你的电脑上安装了Python 3.9及以上版本。如果没有,可以按以下步骤安装: -1. 访问Python官网下载页面:https://www.python.org/downloads/release/python-3913/ +1. 访问Python官网下载页面: https://www.python.org/downloads/release/python-3913/ 2. 下载Windows安装程序 (64-bit): `python-3.9.13-amd64.exe` 3. 运行安装程序,并确保勾选"Add Python 3.9 to PATH"选项 4. 点击"Install Now"开始安装 @@ -79,11 +79,11 @@ pip install -r requirements.txt ### 3️⃣ **配置NapCat,让麦麦bot与qq取得联系** - 安装并登录NapCat(用你的qq小号) -- 添加反向WS:`ws://localhost:8080/onebot/v11/ws` +- 添加反向WS: `ws://127.0.0.1:8080/onebot/v11/ws` ### 4️⃣ **配置文件设置,让麦麦Bot正常工作** -- 修改环境配置文件:`.env.prod` -- 修改机器人配置文件:`bot_config.toml` +- 修改环境配置文件: `.env.prod` +- 修改机器人配置文件: `bot_config.toml` ### 5️⃣ **启动麦麦机器人** - 打开命令行,cd到对应路径 diff --git a/flake.nix b/flake.nix index 54737d640..3586857f0 100644 --- a/flake.nix +++ b/flake.nix @@ -22,6 +22,7 @@ pythonEnv = pkgs.python3.withPackages ( ps: with ps; [ + ruff pymongo python-dotenv pydantic diff --git a/pyproject.toml b/pyproject.toml index e54dcdacd..0a4805744 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,23 +1,51 @@ [project] -name = "Megbot" +name = "MaiMaiBot" version = "0.1.0" -description = "New Bot Project" +description = "MaiMaiBot" [tool.nonebot] plugins = ["src.plugins.chat"] -plugin_dirs = ["src/plugins"] +plugin_dirs = ["src/plugins"] [tool.ruff] -# 设置 Python 版本 -target-version = "py39" + +include = ["*.py"] + +# 行长度设置 +line-length = 120 + +[tool.ruff.lint] +fixable = ["ALL"] +unfixable = [] + +# 如果一个变量的名称以下划线开头,即使它未被使用,也不应该被视为错误或警告。 +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" # 启用的规则 select = [ - "E", # pycodestyle 错误 - "F", # pyflakes - "I", # isort - "B", # flake8-bugbear + "E", # pycodestyle 错误 + "F", # pyflakes + "B", # flake8-bugbear ] -# 行长度设置 -line-length = 88 \ No newline at end of file +ignore = ["E711"] + +[tool.ruff.format] +docstring-code-format = true +indent-style = "space" + + +# 使用双引号表示字符串 +quote-style = "double" + +# 尊重魔法尾随逗号 +# 例如: +# items = [ +# "apple", +# "banana", +# "cherry", +# ] +skip-magic-trailing-comma = false + +# 自动检测合适的换行符 +line-ending = "auto" diff --git a/requirements.txt b/requirements.txt index 4f969682f..0acaade5e 100644 Binary files a/requirements.txt and b/requirements.txt differ diff --git a/run.bat b/run.bat index 1d1385671..91904bc34 100644 --- a/run.bat +++ b/run.bat @@ -1,6 +1,10 @@ @ECHO OFF chcp 65001 -REM python -m venv venv -call venv\Scripts\activate.bat -REM pip install -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple --upgrade -r requirements.txt +if not exist "venv" ( + python -m venv venv + call venv\Scripts\activate.bat + pip install -i https://mirrors.aliyun.com/pypi/simple --upgrade -r requirements.txt + ) else ( + call venv\Scripts\activate.bat +) python run.py \ No newline at end of file diff --git a/run.py b/run.py index 0a195544f..50e312c37 100644 --- a/run.py +++ b/run.py @@ -1,7 +1,7 @@ import os import subprocess import zipfile - +import sys import requests from tqdm import tqdm @@ -37,7 +37,7 @@ def extract_files(zip_path, target_dir): f.write(zip_ref.read(file)) -def run_cmd(command: str, open_new_window: bool = False): +def run_cmd(command: str, open_new_window: bool = True): """ 运行 cmd 命令 @@ -45,26 +45,19 @@ def run_cmd(command: str, open_new_window: bool = False): command (str): 指定要运行的命令 open_new_window (bool): 指定是否新建一个 cmd 窗口运行 """ - creationflags = 0 if open_new_window: - creationflags = subprocess.CREATE_NEW_CONSOLE - subprocess.Popen( - [ - "cmd.exe", - "/c", - command, - ], - creationflags=creationflags, - ) + command = "start " + command + subprocess.Popen(command, shell=True) def run_maimbot(): run_cmd(r"napcat\NapCatWinBootMain.exe 10001", False) + if not os.path.exists(r"mongodb\db"): + os.makedirs(r"mongodb\db") run_cmd( - r"mongodb\bin\mongod.exe --dbpath=" + os.getcwd() + r"\mongodb\db --port 27017", - True, + r"mongodb\bin\mongod.exe --dbpath=" + os.getcwd() + r"\mongodb\db --port 27017" ) - run_cmd("nb run", True) + run_cmd("nb run") def install_mongodb(): @@ -87,17 +80,35 @@ def install_mongodb(): for data in resp.iter_content(chunk_size=1024): size = file.write(data) bar.update(size) - extract_files("mongodb.zip", "mongodb") - print("MongoDB 下载完成") - os.remove("mongodb.zip") + extract_files("mongodb.zip", "mongodb") + print("MongoDB 下载完成") + os.remove("mongodb.zip") + choice = input( + "是否安装 MongoDB Compass?此软件可以以可视化的方式修改数据库,建议安装(Y/n)" + ).upper() + if choice == "Y" or choice == "": + install_mongodb_compass() + + +def install_mongodb_compass(): + run_cmd( + r"powershell Start-Process powershell -Verb runAs 'Set-ExecutionPolicy RemoteSigned'" + ) + input("请在弹出的用户账户控制中点击“是”后按任意键继续安装") + run_cmd(r"powershell mongodb\bin\Install-Compass.ps1") + input("按任意键启动麦麦") + input("如不需要启动此窗口可直接关闭,无需等待 Compass 安装完成") + run_maimbot() def install_napcat(): - run_cmd("start https://github.com/NapNeko/NapCatQQ/releases", True) + run_cmd("start https://github.com/NapNeko/NapCatQQ/releases", False) print("请检查弹出的浏览器窗口,点击**第一个**蓝色的“Win64无头” 下载 napcat") napcat_filename = input( "下载完成后请把文件复制到此文件夹,并将**不包含后缀的文件名**输入至此窗口,如 NapCat.32793.Shell:" ) + if(napcat_filename[-4:] == ".zip"): + napcat_filename = napcat_filename[:-4] extract_files(napcat_filename + ".zip", "napcat") print("NapCat 安装完成") os.remove(napcat_filename + ".zip") @@ -105,11 +116,15 @@ def install_napcat(): if __name__ == "__main__": os.system("cls") + if sys.version_info < (3, 9): + print("当前 Python 版本过低,最低版本为 3.9,请更新 Python 版本") + print("按任意键退出") + input() + exit(1) choice = input( "请输入要进行的操作:\n" "1.首次安装\n" "2.运行麦麦\n" - "3.运行麦麦并启动可视化推理界面\n" ) os.system("cls") if choice == "1": @@ -117,6 +132,9 @@ if __name__ == "__main__": install_mongodb() elif choice == "2": run_maimbot() - elif choice == "3": - run_maimbot() - run_cmd("python src/gui/reasoning_gui.py", True) + choice = input("是否启动推理可视化?(y/N)").upper() + if choice == "Y": + run_cmd(r"python src\gui\reasoning_gui.py") + choice = input("是否启动记忆可视化?(y/N)").upper() + if choice == "Y": + run_cmd(r"python src/plugins/memory_system/memory_manual_build.py") diff --git a/run_memory_vis.bat b/run_memory_vis.bat new file mode 100644 index 000000000..b1feb0cb2 --- /dev/null +++ b/run_memory_vis.bat @@ -0,0 +1,29 @@ +@echo on +chcp 65001 > nul +set /p CONDA_ENV="请输入要激活的 conda 环境名称: " +call conda activate %CONDA_ENV% +if errorlevel 1 ( + echo 激活 conda 环境失败 + pause + exit /b 1 +) +echo Conda 环境 "%CONDA_ENV%" 激活成功 + +set /p OPTION="请选择运行选项 (1: 运行全部绘制, 2: 运行简单绘制): " +if "%OPTION%"=="1" ( + python src/plugins/memory_system/memory_manual_build.py +) else if "%OPTION%"=="2" ( + python src/plugins/memory_system/draw_memory.py +) else ( + echo 无效的选项 + pause + exit /b 1 +) + +if errorlevel 1 ( + echo 命令执行失败,错误代码 %errorlevel% + pause + exit /b 1 +) +echo 脚本成功完成 +pause \ No newline at end of file diff --git a/src/gui/reasoning_gui.py b/src/gui/reasoning_gui.py index 340791ee3..5768ddc09 100644 --- a/src/gui/reasoning_gui.py +++ b/src/gui/reasoning_gui.py @@ -5,6 +5,9 @@ import threading import time from datetime import datetime from typing import Dict, List +from loguru import logger +from typing import Optional +from pymongo import MongoClient import customtkinter as ctk from dotenv import load_dotenv @@ -17,23 +20,20 @@ root_dir = os.path.abspath(os.path.join(current_dir, '..', '..')) # 加载环境变量 if os.path.exists(os.path.join(root_dir, '.env.dev')): load_dotenv(os.path.join(root_dir, '.env.dev')) - print("成功加载开发环境配置") + logger.info("成功加载开发环境配置") elif os.path.exists(os.path.join(root_dir, '.env.prod')): load_dotenv(os.path.join(root_dir, '.env.prod')) - print("成功加载生产环境配置") + logger.info("成功加载生产环境配置") else: - print("未找到环境配置文件") + logger.error("未找到环境配置文件") sys.exit(1) -from typing import Optional - -from pymongo import MongoClient - class Database: _instance: Optional["Database"] = None - - def __init__(self, host: str, port: int, db_name: str, username: str = None, password: str = None, auth_source: str = None): + + def __init__(self, host: str, port: int, db_name: str, username: str = None, password: str = None, + auth_source: str = None): if username and password: self.client = MongoClient( host=host, @@ -45,96 +45,96 @@ class Database: else: self.client = MongoClient(host, port) self.db = self.client[db_name] - + @classmethod - def initialize(cls, host: str, port: int, db_name: str, username: str = None, password: str = None, auth_source: str = None) -> "Database": + def initialize(cls, host: str, port: int, db_name: str, username: str = None, password: str = None, + auth_source: str = None) -> "Database": if cls._instance is None: cls._instance = cls(host, port, db_name, username, password, auth_source) return cls._instance - + @classmethod def get_instance(cls) -> "Database": if cls._instance is None: raise RuntimeError("Database not initialized") - return cls._instance - + return cls._instance class ReasoningGUI: def __init__(self): # 记录启动时间戳,转换为Unix时间戳 self.start_timestamp = datetime.now().timestamp() - print(f"程序启动时间戳: {self.start_timestamp}") - + logger.info(f"程序启动时间戳: {self.start_timestamp}") + # 设置主题 ctk.set_appearance_mode("dark") ctk.set_default_color_theme("blue") - + # 创建主窗口 self.root = ctk.CTk() self.root.title('麦麦推理') self.root.geometry('800x600') self.root.protocol("WM_DELETE_WINDOW", self._on_closing) - + # 初始化数据库连接 try: self.db = Database.get_instance().db - print("数据库连接成功") + logger.success("数据库连接成功") except RuntimeError: - print("数据库未初始化,正在尝试初始化...") + logger.warning("数据库未初始化,正在尝试初始化...") try: - Database.initialize("localhost", 27017, "maimai_bot") + Database.initialize("127.0.0.1", 27017, "maimai_bot") self.db = Database.get_instance().db - print("数据库初始化成功") - except Exception as e: - print(f"数据库初始化失败: {e}") + logger.success("数据库初始化成功") + except Exception: + logger.exception("数据库初始化失败") sys.exit(1) - + # 存储群组数据 self.group_data: Dict[str, List[dict]] = {} - + # 创建更新队列 self.update_queue = queue.Queue() - + # 创建主框架 self.frame = ctk.CTkFrame(self.root) self.frame.pack(pady=20, padx=20, fill="both", expand=True) - + # 添加标题 self.title = ctk.CTkLabel(self.frame, text="麦麦的脑内所想", font=("Arial", 24)) self.title.pack(pady=10, padx=10) - + # 创建左右分栏 self.paned = ctk.CTkFrame(self.frame) self.paned.pack(fill="both", expand=True, padx=10, pady=10) - + # 左侧群组列表 self.left_frame = ctk.CTkFrame(self.paned, width=200) self.left_frame.pack(side="left", fill="y", padx=5, pady=5) - + self.group_label = ctk.CTkLabel(self.left_frame, text="群组列表", font=("Arial", 16)) self.group_label.pack(pady=5) - + # 创建可滚动框架来容纳群组按钮 self.group_scroll_frame = ctk.CTkScrollableFrame(self.left_frame, width=180, height=400) self.group_scroll_frame.pack(pady=5, padx=5, fill="both", expand=True) - + # 存储群组按钮的字典 self.group_buttons: Dict[str, ctk.CTkButton] = {} # 当前选中的群组ID self.selected_group_id: Optional[str] = None - + # 右侧内容显示 self.right_frame = ctk.CTkFrame(self.paned) self.right_frame.pack(side="right", fill="both", expand=True, padx=5, pady=5) - + self.content_label = ctk.CTkLabel(self.right_frame, text="推理内容", font=("Arial", 16)) self.content_label.pack(pady=5) - + # 创建富文本显示框 self.content_text = ctk.CTkTextbox(self.right_frame, width=500, height=400) self.content_text.pack(pady=5, padx=5, fill="both", expand=True) - + # 配置文本标签 - 只使用颜色 self.content_text.tag_config("timestamp", foreground="#888888") # 时间戳使用灰色 self.content_text.tag_config("user", foreground="#4CAF50") # 用户名使用绿色 @@ -144,11 +144,11 @@ class ReasoningGUI: self.content_text.tag_config("reasoning", foreground="#FF9800") # 推理过程使用橙色 self.content_text.tag_config("response", foreground="#E91E63") # 回复使用粉色 self.content_text.tag_config("separator", foreground="#666666") # 分隔符使用深灰色 - + # 底部控制栏 self.control_frame = ctk.CTkFrame(self.frame) self.control_frame.pack(fill="x", padx=10, pady=5) - + self.clear_button = ctk.CTkButton( self.control_frame, text="清除显示", @@ -156,19 +156,19 @@ class ReasoningGUI: width=120 ) self.clear_button.pack(side="left", padx=5) - + # 启动自动更新线程 self.update_thread = threading.Thread(target=self._auto_update, daemon=True) self.update_thread.start() - + # 启动GUI更新检查 self.root.after(100, self._process_queue) - + def _on_closing(self): """处理窗口关闭事件""" self.root.quit() sys.exit(0) - + def _process_queue(self): """处理更新队列中的任务""" try: @@ -183,14 +183,14 @@ class ReasoningGUI: finally: # 继续检查队列 self.root.after(100, self._process_queue) - + def _update_group_list_gui(self): """在主线程中更新群组列表""" # 清除现有按钮 for button in self.group_buttons.values(): button.destroy() self.group_buttons.clear() - + # 创建新的群组按钮 for group_id in self.group_data.keys(): button = ctk.CTkButton( @@ -203,16 +203,16 @@ class ReasoningGUI: ) button.pack(pady=2, padx=5) self.group_buttons[group_id] = button - + # 如果有选中的群组,保持其高亮状态 if self.selected_group_id and self.selected_group_id in self.group_buttons: self._highlight_selected_group(self.selected_group_id) - + def _on_group_select(self, group_id: str): """处理群组选择事件""" self._highlight_selected_group(group_id) self._update_display_gui(group_id) - + def _highlight_selected_group(self, group_id: str): """高亮显示选中的群组按钮""" # 重置所有按钮的颜色 @@ -223,9 +223,9 @@ class ReasoningGUI: else: # 恢复其他按钮的默认颜色 button.configure(fg_color="#2B2B2B", hover_color="#404040") - + self.selected_group_id = group_id - + def _update_display_gui(self, group_id: str): """在主线程中更新显示内容""" if group_id in self.group_data: @@ -234,19 +234,19 @@ class ReasoningGUI: # 时间戳 time_str = item['time'].strftime("%Y-%m-%d %H:%M:%S") self.content_text.insert("end", f"[{time_str}]\n", "timestamp") - + # 用户信息 self.content_text.insert("end", "用户: ", "timestamp") self.content_text.insert("end", f"{item.get('user', '未知')}\n", "user") - + # 消息内容 self.content_text.insert("end", "消息: ", "timestamp") self.content_text.insert("end", f"{item.get('message', '')}\n", "message") - + # 模型信息 self.content_text.insert("end", "模型: ", "timestamp") self.content_text.insert("end", f"{item.get('model', '')}\n", "model") - + # Prompt内容 self.content_text.insert("end", "Prompt内容:\n", "timestamp") prompt_text = item.get('prompt', '') @@ -257,7 +257,7 @@ class ReasoningGUI: self.content_text.insert("end", " " + line + "\n", "prompt") else: self.content_text.insert("end", " 无Prompt内容\n", "prompt") - + # 推理过程 self.content_text.insert("end", "推理过程:\n", "timestamp") reasoning_text = item.get('reasoning', '') @@ -268,53 +268,53 @@ class ReasoningGUI: self.content_text.insert("end", " " + line + "\n", "reasoning") else: self.content_text.insert("end", " 无推理过程\n", "reasoning") - + # 回复内容 self.content_text.insert("end", "回复: ", "timestamp") self.content_text.insert("end", f"{item.get('response', '')}\n", "response") - + # 分隔符 - self.content_text.insert("end", f"\n{'='*50}\n\n", "separator") - + self.content_text.insert("end", f"\n{'=' * 50}\n\n", "separator") + # 滚动到顶部 self.content_text.see("1.0") - + def _auto_update(self): """自动更新函数""" while True: try: # 从数据库获取最新数据,只获取启动时间之后的记录 query = {"time": {"$gt": self.start_timestamp}} - print(f"查询条件: {query}") - + logger.debug(f"查询条件: {query}") + # 先获取一条记录检查时间格式 sample = self.db.reasoning_logs.find_one() if sample: - print(f"样本记录时间格式: {type(sample['time'])} 值: {sample['time']}") - + logger.debug(f"样本记录时间格式: {type(sample['time'])} 值: {sample['time']}") + cursor = self.db.reasoning_logs.find(query).sort("time", -1) new_data = {} total_count = 0 - + for item in cursor: # 调试输出 if total_count == 0: - print(f"记录时间: {item['time']}, 类型: {type(item['time'])}") - + logger.debug(f"记录时间: {item['time']}, 类型: {type(item['time'])}") + total_count += 1 group_id = str(item.get('group_id', 'unknown')) if group_id not in new_data: new_data[group_id] = [] - + # 转换时间戳为datetime对象 if isinstance(item['time'], (int, float)): time_obj = datetime.fromtimestamp(item['time']) elif isinstance(item['time'], datetime): time_obj = item['time'] else: - print(f"未知的时间格式: {type(item['time'])}") + logger.warning(f"未知的时间格式: {type(item['time'])}") time_obj = datetime.now() # 使用当前时间作为后备 - + new_data[group_id].append({ 'time': time_obj, 'user': item.get('user', '未知'), @@ -324,13 +324,13 @@ class ReasoningGUI: 'response': item.get('response', ''), 'prompt': item.get('prompt', '') # 添加prompt字段 }) - - print(f"从数据库加载了 {total_count} 条记录,分布在 {len(new_data)} 个群组中") - + + logger.info(f"从数据库加载了 {total_count} 条记录,分布在 {len(new_data)} 个群组中") + # 更新数据 if new_data != self.group_data: self.group_data = new_data - print("数据已更新,正在刷新显示...") + logger.info("数据已更新,正在刷新显示...") # 将更新任务添加到队列 self.update_queue.put({'type': 'update_group_list'}) if self.group_data: @@ -341,16 +341,16 @@ class ReasoningGUI: 'type': 'update_display', 'group_id': self.selected_group_id }) - except Exception as e: - print(f"自动更新出错: {e}") - + except Exception: + logger.exception("自动更新出错") + # 每5秒更新一次 time.sleep(5) - + def clear_display(self): """清除显示内容""" self.content_text.delete("1.0", "end") - + def run(self): """运行GUI""" self.root.mainloop() @@ -359,18 +359,17 @@ class ReasoningGUI: def main(): """主函数""" Database.initialize( - host= os.getenv("MONGODB_HOST"), - port= int(os.getenv("MONGODB_PORT")), - db_name= os.getenv("DATABASE_NAME"), - username= os.getenv("MONGODB_USERNAME"), - password= os.getenv("MONGODB_PASSWORD"), + host=os.getenv("MONGODB_HOST"), + port=int(os.getenv("MONGODB_PORT")), + db_name=os.getenv("DATABASE_NAME"), + username=os.getenv("MONGODB_USERNAME"), + password=os.getenv("MONGODB_PASSWORD"), auth_source=os.getenv("MONGODB_AUTH_SOURCE") ) - + app = ReasoningGUI() app.run() - if __name__ == "__main__": main() diff --git a/src/plugins/chat/__init__.py b/src/plugins/chat/__init__.py index a62343d0c..9f9c6a45c 100644 --- a/src/plugins/chat/__init__.py +++ b/src/plugins/chat/__init__.py @@ -1,12 +1,9 @@ import asyncio -import os -import random import time from loguru import logger -from nonebot import get_driver, on_command, on_message, require +from nonebot import get_driver, on_message, require from nonebot.adapters.onebot.v11 import Bot, GroupMessageEvent, Message, MessageSegment -from nonebot.rule import to_me from nonebot.typing import T_State from ...common.database import Database @@ -19,6 +16,10 @@ from .emoji_manager import emoji_manager from .relationship_manager import relationship_manager from .willing_manager import willing_manager from .chat_stream import chat_manager +from ..memory_system.memory import hippocampus, memory_graph +from .bot import ChatBot +from .message_sender import message_manager, message_sender + # 创建LLM统计实例 llm_stats = LLMStatistics("llm_statistics.txt") @@ -31,27 +32,20 @@ driver = get_driver() config = driver.config Database.initialize( - host= config.MONGODB_HOST, - port= int(config.MONGODB_PORT), - db_name= config.DATABASE_NAME, - username= config.MONGODB_USERNAME, - password= config.MONGODB_PASSWORD, - auth_source= config.MONGODB_AUTH_SOURCE + host=config.MONGODB_HOST, + port=int(config.MONGODB_PORT), + db_name=config.DATABASE_NAME, + username=config.MONGODB_USERNAME, + password=config.MONGODB_PASSWORD, + auth_source=config.MONGODB_AUTH_SOURCE ) -print("\033[1;32m[初始化数据库完成]\033[0m") +logger.success("初始化数据库成功") -# 导入其他模块 -from ..memory_system.memory import hippocampus, memory_graph -from .bot import ChatBot - -# from .message_send_control import message_sender -from .message_sender import message_manager, message_sender - # 初始化表情管理器 emoji_manager.initialize() -print(f"\033[1;32m正在唤醒{global_config.BOT_NICKNAME}......\033[0m") +logger.debug(f"正在唤醒{global_config.BOT_NICKNAME}......") # 创建机器人实例 chat_bot = ChatBot() # 注册群消息处理器 @@ -60,71 +54,80 @@ group_msg = on_message(priority=5) scheduler = require("nonebot_plugin_apscheduler").scheduler - @driver.on_startup async def start_background_tasks(): """启动后台任务""" # 启动LLM统计 llm_stats.start() - print("\033[1;32m[初始化]\033[0m LLM统计功能已启动") - + logger.success("LLM统计功能启动成功") + # 初始化并启动情绪管理器 mood_manager = MoodManager.get_instance() mood_manager.start_mood_update(update_interval=global_config.mood_update_interval) - print("\033[1;32m[初始化]\033[0m 情绪管理器已启动") - + logger.success("情绪管理器启动成功") + # 只启动表情包管理任务 asyncio.create_task(emoji_manager.start_periodic_check(interval_MINS=global_config.EMOJI_CHECK_INTERVAL)) await bot_schedule.initialize() bot_schedule.print_schedule() - + + @driver.on_startup async def init_relationships(): """在 NoneBot2 启动时初始化关系管理器""" - print("\033[1;32m[初始化]\033[0m 正在加载用户关系数据...") + logger.debug("正在加载用户关系数据...") await relationship_manager.load_all_relationships() asyncio.create_task(relationship_manager._start_relationship_manager()) + @driver.on_bot_connect async def _(bot: Bot): """Bot连接成功时的处理""" global _message_manager_started - print(f"\033[1;38;5;208m-----------{global_config.BOT_NICKNAME}成功连接!-----------\033[0m") + logger.debug(f"-----------{global_config.BOT_NICKNAME}成功连接!-----------") await willing_manager.ensure_started() - + message_sender.set_bot(bot) - print("\033[1;38;5;208m-----------消息发送器已启动!-----------\033[0m") - + logger.success("-----------消息发送器已启动!-----------") + if not _message_manager_started: asyncio.create_task(message_manager.start_processor()) _message_manager_started = True - print("\033[1;38;5;208m-----------消息处理器已启动!-----------\033[0m") - + logger.success("-----------消息处理器已启动!-----------") + asyncio.create_task(emoji_manager._periodic_scan(interval_MINS=global_config.EMOJI_REGISTER_INTERVAL)) - print("\033[1;38;5;208m-----------开始偷表情包!-----------\033[0m") + logger.success("-----------开始偷表情包!-----------") asyncio.create_task(chat_manager._initialize()) asyncio.create_task(chat_manager._auto_save_task()) - + + @group_msg.handle() async def _(bot: Bot, event: GroupMessageEvent, state: T_State): await chat_bot.handle_message(event, bot) + # 添加build_memory定时任务 @scheduler.scheduled_job("interval", seconds=global_config.build_memory_interval, id="build_memory") async def build_memory_task(): """每build_memory_interval秒执行一次记忆构建""" - print("\033[1;32m[记忆构建]\033[0m -------------------------------------------开始构建记忆-------------------------------------------") + logger.debug( + "[记忆构建]" + "------------------------------------开始构建记忆--------------------------------------") start_time = time.time() await hippocampus.operation_build_memory(chat_size=20) end_time = time.time() - print(f"\033[1;32m[记忆构建]\033[0m -------------------------------------------记忆构建完成:耗时: {end_time - start_time:.2f} 秒-------------------------------------------") - -@scheduler.scheduled_job("interval", seconds=global_config.forget_memory_interval, id="forget_memory") + logger.success( + f"[记忆构建]--------------------------记忆构建完成:耗时: {end_time - start_time:.2f} " + "秒-------------------------------------------") + + +@scheduler.scheduled_job("interval", seconds=global_config.forget_memory_interval, id="forget_memory") async def forget_memory_task(): """每30秒执行一次记忆构建""" - # print("\033[1;32m[记忆遗忘]\033[0m 开始遗忘记忆...") - # await hippocampus.operation_forget_topic(percentage=0.1) - # print("\033[1;32m[记忆遗忘]\033[0m 记忆遗忘完成") + print("\033[1;32m[记忆遗忘]\033[0m 开始遗忘记忆...") + await hippocampus.operation_forget_topic(percentage=0.1) + print("\033[1;32m[记忆遗忘]\033[0m 记忆遗忘完成") + @scheduler.scheduled_job("interval", seconds=global_config.build_memory_interval + 10, id="merge_memory") async def merge_memory_task(): @@ -133,9 +136,9 @@ async def merge_memory_task(): # await hippocampus.operation_merge_memory(percentage=0.1) # print("\033[1;32m[记忆整合]\033[0m 记忆整合完成") + @scheduler.scheduled_job("interval", seconds=30, id="print_mood") async def print_mood_task(): """每30秒打印一次情绪状态""" mood_manager = MoodManager.get_instance() mood_manager.print_mood_status() - diff --git a/src/plugins/chat/bot.py b/src/plugins/chat/bot.py index a5f4ac476..a695cea77 100644 --- a/src/plugins/chat/bot.py +++ b/src/plugins/chat/bot.py @@ -1,3 +1,4 @@ +import re import time from random import random from loguru import logger @@ -31,10 +32,10 @@ class ChatBot: self._started = False self.mood_manager = MoodManager.get_instance() # 获取情绪管理器单例 self.mood_manager.start_mood_update() # 启动情绪更新 - + self.emoji_chance = 0.2 # 发送表情包的基础概率 # self.message_streams = MessageStreamContainer() - + async def _ensure_started(self): """确保所有任务已启动""" if not self._started: @@ -42,9 +43,9 @@ class ChatBot: async def handle_message(self, event: GroupMessageEvent, bot: Bot) -> None: """处理收到的群消息""" - + self.bot = bot # 更新 bot 实例 - + # group_info = await bot.get_group_info(group_id=event.group_id) # sender_info = await bot.get_group_member_info(group_id=event.group_id, user_id=event.user_id, no_cache=True) @@ -96,8 +97,17 @@ class ChatBot: # 过滤词 for word in global_config.ban_words: if word in message.processed_plain_text: - logger.info(f"\033[1;32m[{groupinfo.group_name}]{userinfo.user_nickname}:\033[0m {message.processed_plain_text}") - logger.info(f"\033[1;32m[过滤词识别]\033[0m 消息中含有{word},filtered") + logger.info( + f"[{groupinfo.group_name}]{userinfo.user_nickname}:{message.processed_plain_text}") + logger.info(f"[过滤词识别]消息中含有{word},filtered") + return + + # 正则表达式过滤 + for pattern in global_config.ban_msgs_regex: + if re.search(pattern, message.raw_message): + logger.info( + f"[{message.group_name}]{message.user_nickname}:{message.raw_message}") + logger.info(f"[正则表达式过滤]消息匹配到{pattern},filtered") return current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(messageinfo.time)) @@ -107,8 +117,9 @@ class ChatBot: # topic=await topic_identifier.identify_topic_llm(message.processed_plain_text) topic = '' interested_rate = 0 - interested_rate = await hippocampus.memory_activate_value(message.processed_plain_text)/100 - print(f"\033[1;32m[记忆激活]\033[0m 对{message.processed_plain_text}的激活度:---------------------------------------{interested_rate}\n") + interested_rate = await hippocampus.memory_activate_value(message.processed_plain_text) / 100 + logger.debug(f"对{message.processed_plain_text}" + f"的激活度:{interested_rate}") # logger.info(f"\033[1;32m[主题识别]\033[0m 使用{global_config.topic_extract}主题: {topic}") await self.storage.store_message(message,chat, topic[0] if topic else None) @@ -124,7 +135,10 @@ class ChatBot: ) current_willing = willing_manager.get_willing(chat_stream=chat) - print(f"\033[1;32m[{current_time}][{chat.group_info.group_name}]{chat.user_info.user_nickname}:\033[0m {message.processed_plain_text}\033[1;36m[回复意愿:{current_willing:.2f}][概率:{reply_probability * 100:.1f}%]\033[0m") + logger.info( + f"[{current_time}][{chat.group_info.group_name}]{chat.user_info.user_nickname}:" + f"{message.processed_plain_text}[回复意愿:{current_willing:.2f}][概率:{reply_probability * 100:.1f}%]" + ) response = None @@ -159,13 +173,13 @@ class ChatBot: thinking_message = msg container.messages.remove(msg) break - + # 如果找不到思考消息,直接返回 if not thinking_message: - print(f"\033[1;33m[警告]\033[0m 未找到对应的思考消息,可能已超时被移除") + logger.warning("未找到对应的思考消息,可能已超时被移除") return - - #记录开始思考的时间,避免从思考到回复的时间太久 + + # 记录开始思考的时间,避免从思考到回复的时间太久 thinking_start_time = thinking_message.thinking_start_time message_set = MessageSet(chat, think_id) message_set = MessageSet(chat, think_id) @@ -175,7 +189,7 @@ class ChatBot: mark_head = False for msg in response: # print(f"\033[1;32m[回复内容]\033[0m {msg}") - #通过时间改变时间戳 + # 通过时间改变时间戳 typing_time = calculate_typing_time(msg) accu_typing_time += typing_time timepoint = tinking_time_point + accu_typing_time @@ -193,19 +207,19 @@ class ChatBot: if not mark_head: mark_head = True message_set.add_message(bot_message) - - #message_set 可以直接加入 message_manager + + # message_set 可以直接加入 message_manager # print(f"\033[1;32m[回复]\033[0m 将回复载入发送容器") message_manager.add_message(message_set) - + bot_response_time = tinking_time_point if random() < global_config.emoji_chance: emoji_raw = await emoji_manager.get_emoji_for_text(response) - + # 检查是否 <没有找到> emoji if emoji_raw != None: - emoji_path,discription = emoji_raw + emoji_path, description = emoji_raw emoji_cq = image_path_to_base64(emoji_path) @@ -226,8 +240,8 @@ class ChatBot: ) message_manager.add_message(bot_message) emotion = await self.gpt._get_emotion_tags(raw_content) - print(f"为 '{response}' 获取到的情感标签为:{emotion}") - valuedict={ + logger.debug(f"为 '{response}' 获取到的情感标签为:{emotion}") + valuedict = { 'happy': 0.5, 'angry': -1, 'sad': -0.5, @@ -240,9 +254,10 @@ class ChatBot: # 使用情绪管理器更新情绪 self.mood_manager.update_mood_from_emotion(emotion[0], global_config.mood_intensity_factor) - willing_manager.change_reply_willing_after_sent( - chat_stream=chat - ) + # willing_manager.change_reply_willing_after_sent( + # chat_stream=chat + # ) + # 创建全局ChatBot实例 -chat_bot = ChatBot() \ No newline at end of file +chat_bot = ChatBot() diff --git a/src/plugins/chat/config.py b/src/plugins/chat/config.py index fd65c116d..7aed9eee8 100644 --- a/src/plugins/chat/config.py +++ b/src/plugins/chat/config.py @@ -1,46 +1,54 @@ import os from dataclasses import dataclass, field -from typing import Dict, Optional +from typing import Dict, List, Optional import tomli from loguru import logger +from packaging import version +from packaging.version import Version, InvalidVersion +from packaging.specifiers import SpecifierSet, InvalidSpecifier @dataclass class BotConfig: - """机器人配置类""" + """机器人配置类""" + + INNER_VERSION: Version = None + BOT_QQ: Optional[int] = 1 BOT_NICKNAME: Optional[str] = None - + BOT_ALIAS_NAMES: List[str] = field(default_factory=list) # 别名,可以通过这个叫它 + # 消息处理相关配置 MIN_TEXT_LENGTH: int = 2 # 最小处理文本长度 MAX_CONTEXT_SIZE: int = 15 # 上下文最大消息数 emoji_chance: float = 0.2 # 发送表情包的基础概率 - + ENABLE_PIC_TRANSLATE: bool = True # 是否启用图片翻译 - + talk_allowed_groups = set() talk_frequency_down_groups = set() thinking_timeout: int = 100 # 思考时间 - + response_willing_amplifier: float = 1.0 # 回复意愿放大系数 response_interested_rate_amplifier: float = 1.0 # 回复兴趣度放大系数 down_frequency_rate: float = 3.5 # 降低回复频率的群组回复意愿降低系数 - + ban_user_id = set() - + build_memory_interval: int = 30 # 记忆构建间隔(秒) forget_memory_interval: int = 300 # 记忆遗忘间隔(秒) EMOJI_CHECK_INTERVAL: int = 120 # 表情包检查间隔(分钟) EMOJI_REGISTER_INTERVAL: int = 10 # 表情包注册间隔(分钟) EMOJI_SAVE: bool = True # 偷表情包 - EMOJI_CHECK: bool = False #是否开启过滤 - EMOJI_CHECK_PROMPT: str = "符合公序良俗" # 表情包过滤要求 + EMOJI_CHECK: bool = False # 是否开启过滤 + EMOJI_CHECK_PROMPT: str = "符合公序良俗" # 表情包过滤要求 ban_words = set() + ban_msgs_regex = set() max_response_length: int = 1024 # 最大回复长度 - + # 模型配置 llm_reasoning: Dict[str, str] = field(default_factory=lambda: {}) llm_reasoning_minor: Dict[str, str] = field(default_factory=lambda: {}) @@ -56,176 +64,359 @@ class BotConfig: MODEL_R1_PROBABILITY: float = 0.8 # R1模型概率 MODEL_V3_PROBABILITY: float = 0.1 # V3模型概率 MODEL_R1_DISTILL_PROBABILITY: float = 0.1 # R1蒸馏模型概率 - + enable_advance_output: bool = False # 是否启用高级输出 - enable_kuuki_read: bool = True # 是否启用读空气功能 - - mood_update_interval: float = 1.0 # 情绪更新间隔 单位秒 - mood_decay_rate: float = 0.95 # 情绪衰减率 - mood_intensity_factor: float = 0.7 # 情绪强度因子 + enable_kuuki_read: bool = True # 是否启用读空气功能 + + mood_update_interval: float = 1.0 # 情绪更新间隔 单位秒 + mood_decay_rate: float = 0.95 # 情绪衰减率 + mood_intensity_factor: float = 0.7 # 情绪强度因子 + + keywords_reaction_rules = [] # 关键词回复规则 + + chinese_typo_enable = True # 是否启用中文错别字生成器 + chinese_typo_error_rate = 0.03 # 单字替换概率 + chinese_typo_min_freq = 7 # 最小字频阈值 + chinese_typo_tone_error_rate = 0.2 # 声调错误概率 + chinese_typo_word_replace_rate = 0.02 # 整词替换概率 # 默认人设 - PROMPT_PERSONALITY=[ + PROMPT_PERSONALITY = [ "曾经是一个学习地质的女大学生,现在学习心理学和脑科学,你会刷贴吧", "是一个女大学生,你有黑色头发,你会刷小红书", - "是一个女大学生,你会刷b站,对ACG文化感兴趣" + "是一个女大学生,你会刷b站,对ACG文化感兴趣", ] - PROMPT_SCHEDULE_GEN="一个曾经学习地质,现在学习心理学和脑科学的女大学生,喜欢刷qq,贴吧,知乎和小红书" - - PERSONALITY_1: float = 0.6 # 第一种人格概率 - PERSONALITY_2: float = 0.3 # 第二种人格概率 - PERSONALITY_3: float = 0.1 # 第三种人格概率 - + + PROMPT_SCHEDULE_GEN = "一个曾经学习地质,现在学习心理学和脑科学的女大学生,喜欢刷qq,贴吧,知乎和小红书" + + PERSONALITY_1: float = 0.6 # 第一种人格概率 + PERSONALITY_2: float = 0.3 # 第二种人格概率 + PERSONALITY_3: float = 0.1 # 第三种人格概率 + + memory_ban_words: list = field( + default_factory=lambda: ["表情包", "图片", "回复", "聊天记录"] + ) # 添加新的配置项默认值 + @staticmethod def get_config_dir() -> str: """获取配置文件目录""" current_dir = os.path.dirname(os.path.abspath(__file__)) - root_dir = os.path.abspath(os.path.join(current_dir, '..', '..', '..')) - config_dir = os.path.join(root_dir, 'config') + root_dir = os.path.abspath(os.path.join(current_dir, "..", "..", "..")) + config_dir = os.path.join(root_dir, "config") if not os.path.exists(config_dir): os.makedirs(config_dir) return config_dir - + @classmethod + def convert_to_specifierset(cls, value: str) -> SpecifierSet: + """将 字符串 版本表达式转换成 SpecifierSet + Args: + value[str]: 版本表达式(字符串) + Returns: + SpecifierSet + """ + + try: + converted = SpecifierSet(value) + except InvalidSpecifier: + logger.error(f"{value} 分类使用了错误的版本约束表达式\n", "请阅读 https://semver.org/lang/zh-CN/ 修改代码") + exit(1) + + return converted + + @classmethod + def get_config_version(cls, toml: dict) -> Version: + """提取配置文件的 SpecifierSet 版本数据 + Args: + toml[dict]: 输入的配置文件字典 + Returns: + Version + """ + + if "inner" in toml: + try: + config_version: str = toml["inner"]["version"] + except KeyError as e: + logger.error("配置文件中 inner 段 不存在, 这是错误的配置文件") + raise KeyError(f"配置文件中 inner 段 不存在 {e}, 这是错误的配置文件") from e + else: + toml["inner"] = {"version": "0.0.0"} + config_version = toml["inner"]["version"] + + try: + ver = version.parse(config_version) + except InvalidVersion as e: + logger.error( + "配置文件中 inner段 的 version 键是错误的版本描述\n" + "请阅读 https://semver.org/lang/zh-CN/ 修改配置,并参考本项目指定的模板进行修改\n" + "本项目在不同的版本下有不同的模板,请注意识别" + ) + raise InvalidVersion("配置文件中 inner段 的 version 键是错误的版本描述\n") from e + + return ver + @classmethod def load_config(cls, config_path: str = None) -> "BotConfig": """从TOML配置文件加载配置""" config = cls() + + def personality(parent: dict): + personality_config = parent["personality"] + personality = personality_config.get("prompt_personality") + if len(personality) >= 2: + logger.debug(f"载入自定义人格:{personality}") + config.PROMPT_PERSONALITY = personality_config.get("prompt_personality", config.PROMPT_PERSONALITY) + logger.info(f"载入自定义日程prompt:{personality_config.get('prompt_schedule', config.PROMPT_SCHEDULE_GEN)}") + config.PROMPT_SCHEDULE_GEN = personality_config.get("prompt_schedule", config.PROMPT_SCHEDULE_GEN) + + if config.INNER_VERSION in SpecifierSet(">=0.0.2"): + config.PERSONALITY_1 = personality_config.get("personality_1_probability", config.PERSONALITY_1) + config.PERSONALITY_2 = personality_config.get("personality_2_probability", config.PERSONALITY_2) + config.PERSONALITY_3 = personality_config.get("personality_3_probability", config.PERSONALITY_3) + + def emoji(parent: dict): + emoji_config = parent["emoji"] + config.EMOJI_CHECK_INTERVAL = emoji_config.get("check_interval", config.EMOJI_CHECK_INTERVAL) + config.EMOJI_REGISTER_INTERVAL = emoji_config.get("register_interval", config.EMOJI_REGISTER_INTERVAL) + config.EMOJI_CHECK_PROMPT = emoji_config.get("check_prompt", config.EMOJI_CHECK_PROMPT) + config.EMOJI_SAVE = emoji_config.get("auto_save", config.EMOJI_SAVE) + config.EMOJI_CHECK = emoji_config.get("enable_check", config.EMOJI_CHECK) + + def cq_code(parent: dict): + cq_code_config = parent["cq_code"] + config.ENABLE_PIC_TRANSLATE = cq_code_config.get("enable_pic_translate", config.ENABLE_PIC_TRANSLATE) + + def bot(parent: dict): + # 机器人基础配置 + bot_config = parent["bot"] + bot_qq = bot_config.get("qq") + config.BOT_QQ = int(bot_qq) + config.BOT_NICKNAME = bot_config.get("nickname", config.BOT_NICKNAME) + + if config.INNER_VERSION in SpecifierSet(">=0.0.5"): + config.BOT_ALIAS_NAMES = bot_config.get("alias_names", config.BOT_ALIAS_NAMES) + + def response(parent: dict): + response_config = parent["response"] + config.MODEL_R1_PROBABILITY = response_config.get("model_r1_probability", config.MODEL_R1_PROBABILITY) + config.MODEL_V3_PROBABILITY = response_config.get("model_v3_probability", config.MODEL_V3_PROBABILITY) + config.MODEL_R1_DISTILL_PROBABILITY = response_config.get( + "model_r1_distill_probability", config.MODEL_R1_DISTILL_PROBABILITY + ) + config.max_response_length = response_config.get("max_response_length", config.max_response_length) + + def model(parent: dict): + # 加载模型配置 + model_config: dict = parent["model"] + + config_list = [ + "llm_reasoning", + "llm_reasoning_minor", + "llm_normal", + "llm_normal_minor", + "llm_topic_judge", + "llm_summary_by_topic", + "llm_emotion_judge", + "vlm", + "embedding", + "moderation", + ] + + for item in config_list: + if item in model_config: + cfg_item: dict = model_config[item] + + # base_url 的例子: SILICONFLOW_BASE_URL + # key 的例子: SILICONFLOW_KEY + cfg_target = {"name": "", "base_url": "", "key": "", "pri_in": 0, "pri_out": 0} + + if config.INNER_VERSION in SpecifierSet("<=0.0.0"): + cfg_target = cfg_item + + elif config.INNER_VERSION in SpecifierSet(">=0.0.1"): + stable_item = ["name", "pri_in", "pri_out"] + pricing_item = ["pri_in", "pri_out"] + # 从配置中原始拷贝稳定字段 + for i in stable_item: + # 如果 字段 属于计费项 且获取不到,那默认值是 0 + if i in pricing_item and i not in cfg_item: + cfg_target[i] = 0 + else: + # 没有特殊情况则原样复制 + try: + cfg_target[i] = cfg_item[i] + except KeyError as e: + logger.error(f"{item} 中的必要字段不存在,请检查") + raise KeyError(f"{item} 中的必要字段 {e} 不存在,请检查") from e + + provider = cfg_item.get("provider") + if provider is None: + logger.error(f"provider 字段在模型配置 {item} 中不存在,请检查") + raise KeyError(f"provider 字段在模型配置 {item} 中不存在,请检查") + + cfg_target["base_url"] = f"{provider}_BASE_URL" + cfg_target["key"] = f"{provider}_KEY" + + # 如果 列表中的项目在 model_config 中,利用反射来设置对应项目 + setattr(config, item, cfg_target) + else: + logger.error(f"模型 {item} 在config中不存在,请检查") + raise KeyError(f"模型 {item} 在config中不存在,请检查") + + def message(parent: dict): + msg_config = parent["message"] + config.MIN_TEXT_LENGTH = msg_config.get("min_text_length", config.MIN_TEXT_LENGTH) + config.MAX_CONTEXT_SIZE = msg_config.get("max_context_size", config.MAX_CONTEXT_SIZE) + config.emoji_chance = msg_config.get("emoji_chance", config.emoji_chance) + config.ban_words = msg_config.get("ban_words", config.ban_words) + + if config.INNER_VERSION in SpecifierSet(">=0.0.2"): + config.thinking_timeout = msg_config.get("thinking_timeout", config.thinking_timeout) + config.response_willing_amplifier = msg_config.get( + "response_willing_amplifier", config.response_willing_amplifier + ) + config.response_interested_rate_amplifier = msg_config.get( + "response_interested_rate_amplifier", config.response_interested_rate_amplifier + ) + config.down_frequency_rate = msg_config.get("down_frequency_rate", config.down_frequency_rate) + + if config.INNER_VERSION in SpecifierSet(">=0.0.6"): + config.ban_msgs_regex = msg_config.get("ban_msgs_regex", config.ban_msgs_regex) + + def memory(parent: dict): + memory_config = parent["memory"] + config.build_memory_interval = memory_config.get("build_memory_interval", config.build_memory_interval) + config.forget_memory_interval = memory_config.get("forget_memory_interval", config.forget_memory_interval) + + # 在版本 >= 0.0.4 时才处理新增的配置项 + if config.INNER_VERSION in SpecifierSet(">=0.0.4"): + config.memory_ban_words = set(memory_config.get("memory_ban_words", [])) + + def mood(parent: dict): + mood_config = parent["mood"] + config.mood_update_interval = mood_config.get("mood_update_interval", config.mood_update_interval) + config.mood_decay_rate = mood_config.get("mood_decay_rate", config.mood_decay_rate) + config.mood_intensity_factor = mood_config.get("mood_intensity_factor", config.mood_intensity_factor) + + def keywords_reaction(parent: dict): + keywords_reaction_config = parent["keywords_reaction"] + if keywords_reaction_config.get("enable", False): + config.keywords_reaction_rules = keywords_reaction_config.get("rules", config.keywords_reaction_rules) + + def chinese_typo(parent: dict): + chinese_typo_config = parent["chinese_typo"] + config.chinese_typo_enable = chinese_typo_config.get("enable", config.chinese_typo_enable) + config.chinese_typo_error_rate = chinese_typo_config.get("error_rate", config.chinese_typo_error_rate) + config.chinese_typo_min_freq = chinese_typo_config.get("min_freq", config.chinese_typo_min_freq) + config.chinese_typo_tone_error_rate = chinese_typo_config.get( + "tone_error_rate", config.chinese_typo_tone_error_rate + ) + config.chinese_typo_word_replace_rate = chinese_typo_config.get( + "word_replace_rate", config.chinese_typo_word_replace_rate + ) + + def groups(parent: dict): + groups_config = parent["groups"] + config.talk_allowed_groups = set(groups_config.get("talk_allowed", [])) + config.talk_frequency_down_groups = set(groups_config.get("talk_frequency_down", [])) + config.ban_user_id = set(groups_config.get("ban_user_id", [])) + + def others(parent: dict): + others_config = parent["others"] + config.enable_advance_output = others_config.get("enable_advance_output", config.enable_advance_output) + config.enable_kuuki_read = others_config.get("enable_kuuki_read", config.enable_kuuki_read) + + # 版本表达式:>=1.0.0,<2.0.0 + # 允许字段:func: method, support: str, notice: str, necessary: bool + # 如果使用 notice 字段,在该组配置加载时,会展示该字段对用户的警示 + # 例如:"notice": "personality 将在 1.3.2 后被移除",那么在有效版本中的用户就会虽然可以 + # 正常执行程序,但是会看到这条自定义提示 + include_configs = { + "personality": {"func": personality, "support": ">=0.0.0"}, + "emoji": {"func": emoji, "support": ">=0.0.0"}, + "cq_code": {"func": cq_code, "support": ">=0.0.0"}, + "bot": {"func": bot, "support": ">=0.0.0"}, + "response": {"func": response, "support": ">=0.0.0"}, + "model": {"func": model, "support": ">=0.0.0"}, + "message": {"func": message, "support": ">=0.0.0"}, + "memory": {"func": memory, "support": ">=0.0.0", "necessary": False}, + "mood": {"func": mood, "support": ">=0.0.0"}, + "keywords_reaction": {"func": keywords_reaction, "support": ">=0.0.2", "necessary": False}, + "chinese_typo": {"func": chinese_typo, "support": ">=0.0.3", "necessary": False}, + "groups": {"func": groups, "support": ">=0.0.0"}, + "others": {"func": others, "support": ">=0.0.0"}, + } + + # 原地修改,将 字符串版本表达式 转换成 版本对象 + for key in include_configs: + item_support = include_configs[key]["support"] + include_configs[key]["support"] = cls.convert_to_specifierset(item_support) + if os.path.exists(config_path): with open(config_path, "rb") as f: try: toml_dict = tomli.load(f) - except(tomli.TOMLDecodeError) as e: + except tomli.TOMLDecodeError as e: logger.critical(f"配置文件bot_config.toml填写有误,请检查第{e.lineno}行第{e.colno}处:{e.msg}") exit(1) - - if 'personality' in toml_dict: - personality_config=toml_dict['personality'] - personality=personality_config.get('prompt_personality') - if len(personality) >= 2: - logger.info(f"载入自定义人格:{personality}") - config.PROMPT_PERSONALITY=personality_config.get('prompt_personality',config.PROMPT_PERSONALITY) - logger.info(f"载入自定义日程prompt:{personality_config.get('prompt_schedule',config.PROMPT_SCHEDULE_GEN)}") - config.PROMPT_SCHEDULE_GEN=personality_config.get('prompt_schedule',config.PROMPT_SCHEDULE_GEN) - config.PERSONALITY_1=personality_config.get('personality_1_probability',config.PERSONALITY_1) - config.PERSONALITY_2=personality_config.get('personality_2_probability',config.PERSONALITY_2) - config.PERSONALITY_3=personality_config.get('personality_3_probability',config.PERSONALITY_3) - if "emoji" in toml_dict: - emoji_config = toml_dict["emoji"] - config.EMOJI_CHECK_INTERVAL = emoji_config.get("check_interval", config.EMOJI_CHECK_INTERVAL) - config.EMOJI_REGISTER_INTERVAL = emoji_config.get("register_interval", config.EMOJI_REGISTER_INTERVAL) - config.EMOJI_CHECK_PROMPT = emoji_config.get('check_prompt',config.EMOJI_CHECK_PROMPT) - config.EMOJI_SAVE = emoji_config.get('auto_save',config.EMOJI_SAVE) - config.EMOJI_CHECK = emoji_config.get('enable_check',config.EMOJI_CHECK) - - if "cq_code" in toml_dict: - cq_code_config = toml_dict["cq_code"] - config.ENABLE_PIC_TRANSLATE = cq_code_config.get("enable_pic_translate", config.ENABLE_PIC_TRANSLATE) - - # 机器人基础配置 - if "bot" in toml_dict: - bot_config = toml_dict["bot"] - bot_qq = bot_config.get("qq") - config.BOT_QQ = int(bot_qq) - config.BOT_NICKNAME = bot_config.get("nickname", config.BOT_NICKNAME) - - if "response" in toml_dict: - response_config = toml_dict["response"] - config.MODEL_R1_PROBABILITY = response_config.get("model_r1_probability", config.MODEL_R1_PROBABILITY) - config.MODEL_V3_PROBABILITY = response_config.get("model_v3_probability", config.MODEL_V3_PROBABILITY) - config.MODEL_R1_DISTILL_PROBABILITY = response_config.get("model_r1_distill_probability", config.MODEL_R1_DISTILL_PROBABILITY) - config.max_response_length = response_config.get("max_response_length", config.max_response_length) - - # 加载模型配置 - if "model" in toml_dict: - model_config = toml_dict["model"] - - if "llm_reasoning" in model_config: - config.llm_reasoning = model_config["llm_reasoning"] - - if "llm_reasoning_minor" in model_config: - config.llm_reasoning_minor = model_config["llm_reasoning_minor"] - - if "llm_normal" in model_config: - config.llm_normal = model_config["llm_normal"] - - if "llm_normal_minor" in model_config: - config.llm_normal_minor = model_config["llm_normal_minor"] - - if "llm_topic_judge" in model_config: - config.llm_topic_judge = model_config["llm_topic_judge"] - - if "llm_summary_by_topic" in model_config: - config.llm_summary_by_topic = model_config["llm_summary_by_topic"] - - if "llm_emotion_judge" in model_config: - config.llm_emotion_judge = model_config["llm_emotion_judge"] - - if "vlm" in model_config: - config.vlm = model_config["vlm"] - - if "embedding" in model_config: - config.embedding = model_config["embedding"] - - if "moderation" in model_config: - config.moderation = model_config["moderation"] - - # 消息配置 - if "message" in toml_dict: - msg_config = toml_dict["message"] - config.MIN_TEXT_LENGTH = msg_config.get("min_text_length", config.MIN_TEXT_LENGTH) - config.MAX_CONTEXT_SIZE = msg_config.get("max_context_size", config.MAX_CONTEXT_SIZE) - config.emoji_chance = msg_config.get("emoji_chance", config.emoji_chance) - config.ban_words=msg_config.get("ban_words",config.ban_words) - config.thinking_timeout = msg_config.get("thinking_timeout", config.thinking_timeout) - config.response_willing_amplifier = msg_config.get("response_willing_amplifier", config.response_willing_amplifier) - config.response_interested_rate_amplifier = msg_config.get("response_interested_rate_amplifier", config.response_interested_rate_amplifier) - config.down_frequency_rate = msg_config.get("down_frequency_rate", config.down_frequency_rate) + # 获取配置文件版本 + config.INNER_VERSION = cls.get_config_version(toml_dict) + + # 如果在配置中找到了需要的项,调用对应项的闭包函数处理 + for key in include_configs: + if key in toml_dict: + group_specifierset: SpecifierSet = include_configs[key]["support"] + + # 检查配置文件版本是否在支持范围内 + if config.INNER_VERSION in group_specifierset: + # 如果版本在支持范围内,检查是否存在通知 + if "notice" in include_configs[key]: + logger.warning(include_configs[key]["notice"]) + + include_configs[key]["func"](toml_dict) + + else: + # 如果版本不在支持范围内,崩溃并提示用户 + logger.error( + f"配置文件中的 '{key}' 字段的版本 ({config.INNER_VERSION}) 不在支持范围内。\n" + f"当前程序仅支持以下版本范围: {group_specifierset}" + ) + raise InvalidVersion(f"当前程序仅支持以下版本范围: {group_specifierset}") + + # 如果 necessary 项目存在,而且显式声明是 False,进入特殊处理 + elif "necessary" in include_configs[key] and include_configs[key].get("necessary") is False: + # 通过 pass 处理的项虽然直接忽略也是可以的,但是为了不增加理解困难,依然需要在这里显式处理 + if key == "keywords_reaction": + pass + + else: + # 如果用户根本没有需要的配置项,提示缺少配置 + logger.error(f"配置文件中缺少必需的字段: '{key}'") + raise KeyError(f"配置文件中缺少必需的字段: '{key}'") + + logger.success(f"成功加载配置文件: {config_path}") + + return config + - if "memory" in toml_dict: - memory_config = toml_dict["memory"] - config.build_memory_interval = memory_config.get("build_memory_interval", config.build_memory_interval) - config.forget_memory_interval = memory_config.get("forget_memory_interval", config.forget_memory_interval) - - if "mood" in toml_dict: - mood_config = toml_dict["mood"] - config.mood_update_interval = mood_config.get("mood_update_interval", config.mood_update_interval) - config.mood_decay_rate = mood_config.get("mood_decay_rate", config.mood_decay_rate) - config.mood_intensity_factor = mood_config.get("mood_intensity_factor", config.mood_intensity_factor) - - # 群组配置 - if "groups" in toml_dict: - groups_config = toml_dict["groups"] - config.talk_allowed_groups = set(groups_config.get("talk_allowed", [])) - config.talk_frequency_down_groups = set(groups_config.get("talk_frequency_down", [])) - config.ban_user_id = set(groups_config.get("ban_user_id", [])) - - if "others" in toml_dict: - others_config = toml_dict["others"] - config.enable_advance_output = others_config.get("enable_advance_output", config.enable_advance_output) - config.enable_kuuki_read = others_config.get("enable_kuuki_read", config.enable_kuuki_read) - - logger.success(f"成功加载配置文件: {config_path}") - - return config - # 获取配置文件路径 - bot_config_floder_path = BotConfig.get_config_dir() -print(f"正在品鉴配置文件目录: {bot_config_floder_path}") +logger.debug(f"正在品鉴配置文件目录: {bot_config_floder_path}") + bot_config_path = os.path.join(bot_config_floder_path, "bot_config.toml") + if os.path.exists(bot_config_path): # 如果开发环境配置文件不存在,则使用默认配置文件 - print(f"异常的新鲜,异常的美味: {bot_config_path}") + logger.debug(f"异常的新鲜,异常的美味: {bot_config_path}") logger.info("使用bot配置文件") else: - logger.info("没有找到美味") + # 配置文件不存在 + logger.error("配置文件不存在,请检查路径: {bot_config_path}") + raise FileNotFoundError(f"配置文件不存在: {bot_config_path}") global_config = BotConfig.load_config(config_path=bot_config_path) - if not global_config.enable_advance_output: logger.remove() pass - diff --git a/src/plugins/chat/cq_code.py b/src/plugins/chat/cq_code.py index 6030b893f..d0f50c5ae 100644 --- a/src/plugins/chat/cq_code.py +++ b/src/plugins/chat/cq_code.py @@ -170,11 +170,11 @@ class CQCode: except (requests.exceptions.SSLError, requests.exceptions.HTTPError) as e: if retry == max_retries - 1: - print(f"\033[1;31m[致命错误]\033[0m 最终请求失败: {str(e)}") + logger.error(f"最终请求失败: {str(e)}") time.sleep(1.5**retry) # 指数退避 - except Exception as e: - print(f"\033[1;33m[未知错误]\033[0m {str(e)}") + except Exception: + logger.exception("[未知错误]") return None return None diff --git a/src/plugins/chat/emoji_manager.py b/src/plugins/chat/emoji_manager.py index f3728ce92..a26f4dc4b 100644 --- a/src/plugins/chat/emoji_manager.py +++ b/src/plugins/chat/emoji_manager.py @@ -24,24 +24,26 @@ image_manager = ImageManager() class EmojiManager: _instance = None EMOJI_DIR = "data/emoji" # 表情包存储目录 - + def __new__(cls): if cls._instance is None: cls._instance = super().__new__(cls) cls._instance.db = None cls._instance._initialized = False return cls._instance - + def __init__(self): self.db = Database.get_instance() self._scan_task = None self.vlm = LLM_request(model=global_config.vlm, temperature=0.3, max_tokens=1000) - self.llm_emotion_judge = LLM_request(model=global_config.llm_normal_minor, max_tokens=60,temperature=0.8) #更高的温度,更少的token(后续可以根据情绪来调整温度) - + self.llm_emotion_judge = LLM_request(model=global_config.llm_normal_minor, max_tokens=60, + temperature=0.8) # 更高的温度,更少的token(后续可以根据情绪来调整温度) + + def _ensure_emoji_dir(self): """确保表情存储目录存在""" os.makedirs(self.EMOJI_DIR, exist_ok=True) - + def initialize(self): """初始化数据库连接和表情目录""" if not self._initialized: @@ -52,16 +54,16 @@ class EmojiManager: self._initialized = True # 启动时执行一次完整性检查 self.check_emoji_file_integrity() - except Exception as e: - logger.error(f"初始化表情管理器失败: {str(e)}") - + except Exception: + logger.exception("初始化表情管理器失败") + def _ensure_db(self): """确保数据库已初始化""" if not self._initialized: self.initialize() if not self._initialized: raise RuntimeError("EmojiManager not initialized") - + def _ensure_emoji_collection(self): """确保emoji集合存在并创建索引 @@ -78,7 +80,7 @@ class EmojiManager: self.db.db.create_collection('emoji') self.db.db.emoji.create_index([('embedding', '2dsphere')]) self.db.db.emoji.create_index([('filename', 1)], unique=True) - + def record_usage(self, emoji_id: str): """记录表情使用次数""" try: @@ -104,9 +106,9 @@ class EmojiManager: """ try: self._ensure_db() - + # 获取文本的embedding - text_for_search= await self._get_kimoji_for_text(text) + text_for_search = await self._get_kimoji_for_text(text) if not text_for_search: logger.error("无法获取文本的情绪") return None @@ -114,15 +116,15 @@ class EmojiManager: if not text_embedding: logger.error("无法获取文本的embedding") return None - + try: # 获取所有表情包 - all_emojis = list(self.db.db.emoji.find({}, {'_id': 1, 'path': 1, 'embedding': 1, 'discription': 1})) - + all_emojis = list(self.db.db.emoji.find({}, {'_id': 1, 'path': 1, 'embedding': 1, 'description': 1})) + if not all_emojis: logger.warning("数据库中没有任何表情包") return None - + # 计算余弦相似度并排序 def cosine_similarity(v1, v2): if not v1 or not v2: @@ -133,23 +135,23 @@ class EmojiManager: if norm_v1 == 0 or norm_v2 == 0: return 0 return dot_product / (norm_v1 * norm_v2) - + # 计算所有表情包与输入文本的相似度 emoji_similarities = [ (emoji, cosine_similarity(text_embedding, emoji.get('embedding', []))) for emoji in all_emojis ] - + # 按相似度降序排序 emoji_similarities.sort(key=lambda x: x[1], reverse=True) - + # 获取前3个最相似的表情包 top_10_emojis = emoji_similarities[:10 if len(emoji_similarities) > 10 else len(emoji_similarities)] if not top_10_emojis: logger.warning("未找到匹配的表情包") return None - + # 从前3个中随机选择一个 selected_emoji, similarity = random.choice(top_10_emojis) @@ -159,16 +161,17 @@ class EmojiManager: {'_id': selected_emoji['_id']}, {'$inc': {'usage_count': 1}} ) - logger.success(f"找到匹配的表情包: {selected_emoji.get('discription', '无描述')} (相似度: {similarity:.4f})") + logger.success( + f"找到匹配的表情包: {selected_emoji.get('description', '无描述')} (相似度: {similarity:.4f})") # 稍微改一下文本描述,不然容易产生幻觉,描述已经包含 表情包 了 - return selected_emoji['path'],"[ %s ]" % selected_emoji.get('discription', '无描述') - + return selected_emoji['path'], "[ %s ]" % selected_emoji.get('description', '无描述') + except Exception as search_error: logger.error(f"搜索表情包失败: {str(search_error)}") return None - + return None - + except Exception as e: logger.error(f"获取表情包失败: {str(e)}") return None @@ -185,31 +188,31 @@ class EmojiManager: except Exception as e: logger.error(f"获取标签失败: {str(e)}") return None - + async def _check_emoji(self, image_base64: str) -> str: try: prompt = f'这是一个表情包,请回答这个表情包是否满足\"{global_config.EMOJI_CHECK_PROMPT}\"的要求,是则回答是,否则回答否,不要出现任何其他内容' - + content, _ = await self.vlm.generate_response_for_image(prompt, image_base64) logger.debug(f"输出描述: {content}") return content - + except Exception as e: logger.error(f"获取标签失败: {str(e)}") return None - - async def _get_kimoji_for_text(self, text:str): + + async def _get_kimoji_for_text(self, text: str): try: prompt = f'这是{global_config.BOT_NICKNAME}将要发送的消息内容:\n{text}\n若要为其配上表情包,请你输出这个表情包应该表达怎样的情感,应该给人什么样的感觉,不要太简洁也不要太长,注意不要输出任何对消息内容的分析内容,只输出\"一种什么样的感觉\"中间的形容词部分。' - - content, _ = await self.llm_emotion_judge.generate_response_async(prompt) + + content, _ = await self.llm_emotion_judge.generate_response_async(prompt,temperature=1.5) logger.info(f"输出描述: {content}") return content - + except Exception as e: logger.error(f"获取标签失败: {str(e)}") return None - + async def scan_new_emojis(self): """扫描新的表情包""" try: @@ -217,8 +220,9 @@ class EmojiManager: os.makedirs(emoji_dir, exist_ok=True) # 获取所有支持的图片文件 - files_to_process = [f for f in os.listdir(emoji_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png', '.gif'))] - + files_to_process = [f for f in os.listdir(emoji_dir) if + f.lower().endswith(('.jpg', '.jpeg', '.png', '.gif'))] + for filename in files_to_process: image_path = os.path.join(emoji_dir, filename) @@ -273,10 +277,14 @@ class EmojiManager: if '是' not in check: os.remove(image_path) logger.info(f"描述: {description}") + logger.info(f"描述: {description}") logger.info(f"其不满足过滤规则,被剔除 {check}") continue logger.info(f"check通过 {check}") + if description is not None: + embedding = await get_embedding(description) + if description is not None: embedding = await get_embedding(description) # 准备数据库记录 @@ -312,19 +320,17 @@ class EmojiManager: logger.success(f"同步保存到images集合: {filename}") else: logger.warning(f"跳过表情包: {filename}") - - except Exception as e: - logger.error(f"扫描表情包失败: {str(e)}") - logger.error(traceback.format_exc()) - + + except Exception: + logger.exception("扫描表情包失败") + async def _periodic_scan(self, interval_MINS: int = 10): """定期扫描新表情包""" while True: - print("\033[1;36m[表情包]\033[0m 开始扫描新表情包...") + logger.info("开始扫描新表情包...") await self.scan_new_emojis() await asyncio.sleep(interval_MINS * 60) # 每600秒扫描一次 - def check_emoji_file_integrity(self): """检查表情包文件完整性 如果文件已被删除,则从数据库中移除对应记录 @@ -335,7 +341,7 @@ class EmojiManager: all_emojis = list(self.db.db.emoji.find()) removed_count = 0 total_count = len(all_emojis) - + for emoji in all_emojis: try: if 'path' not in emoji: @@ -343,27 +349,27 @@ class EmojiManager: self.db.db.emoji.delete_one({'_id': emoji['_id']}) removed_count += 1 continue - + if 'embedding' not in emoji: logger.warning(f"发现过时记录(缺少embedding字段),ID: {emoji.get('_id', 'unknown')}") self.db.db.emoji.delete_one({'_id': emoji['_id']}) removed_count += 1 continue - + # 检查文件是否存在 if not os.path.exists(emoji['path']): logger.warning(f"表情包文件已被删除: {emoji['path']}") # 从数据库中删除记录 result = self.db.db.emoji.delete_one({'_id': emoji['_id']}) if result.deleted_count > 0: - logger.success(f"成功删除数据库记录: {emoji['_id']}") + logger.debug(f"成功删除数据库记录: {emoji['_id']}") removed_count += 1 else: logger.error(f"删除数据库记录失败: {emoji['_id']}") except Exception as item_error: logger.error(f"处理表情包记录时出错: {str(item_error)}") continue - + # 验证清理结果 remaining_count = self.db.db.emoji.count_documents({}) if removed_count > 0: @@ -371,7 +377,7 @@ class EmojiManager: logger.info(f"清理前总数: {total_count} | 清理后总数: {remaining_count}") else: logger.info(f"已检查 {total_count} 个表情包记录") - + except Exception as e: logger.error(f"检查表情包完整性失败: {str(e)}") logger.error(traceback.format_exc()) @@ -382,6 +388,6 @@ class EmojiManager: await asyncio.sleep(interval_MINS * 60) - # 创建全局单例 -emoji_manager = EmojiManager() \ No newline at end of file +emoji_manager = EmojiManager() + diff --git a/src/plugins/chat/llm_generator.py b/src/plugins/chat/llm_generator.py index bfd5eec2e..517e8aa7a 100644 --- a/src/plugins/chat/llm_generator.py +++ b/src/plugins/chat/llm_generator.py @@ -3,6 +3,7 @@ import time from typing import List, Optional, Tuple, Union from nonebot import get_driver +from loguru import logger from ...common.database import Database from ..models.utils_model import LLM_request @@ -55,9 +56,7 @@ class ResponseGenerator: self.current_model_type = "r1_distill" current_model = self.model_r1_distill - print( - f"+++++++++++++++++{global_config.BOT_NICKNAME}{self.current_model_type}思考中+++++++++++++++++" - ) + logger.info(f"{global_config.BOT_NICKNAME}{self.current_model_type}思考中") model_response = await self._generate_response_with_model( message, current_model @@ -65,7 +64,7 @@ class ResponseGenerator: raw_content = model_response if model_response: - print(f"{global_config.BOT_NICKNAME}的回复是:{model_response}") + logger.info(f'{global_config.BOT_NICKNAME}的回复是:{model_response}') model_response = await self._process_response(model_response) if model_response: return model_response, raw_content @@ -122,8 +121,8 @@ class ResponseGenerator: # 生成回复 try: content, reasoning_content = await model.generate_response(prompt) - except Exception as e: - print(f"生成回复时出错: {e}") + except Exception: + logger.exception("生成回复时出错") return None # 保存到数据库 @@ -219,7 +218,7 @@ class InitiativeMessageGenerate: prompt_builder._build_initiative_prompt_select(message.group_id) ) content_select, reasoning = self.model_v3.generate_response(topic_select_prompt) - print(f"[DEBUG] {content_select} {reasoning}") + logger.debug(f"{content_select} {reasoning}") topics_list = [dot[0] for dot in dots_for_select] if content_select: if content_select in topics_list: @@ -232,12 +231,12 @@ class InitiativeMessageGenerate: select_dot[1], prompt_template ) content_check, reasoning_check = self.model_v3.generate_response(prompt_check) - print(f"[DEBUG] {content_check} {reasoning_check}") + logger.info(f"{content_check} {reasoning_check}") if "yes" not in content_check.lower(): return None prompt = prompt_builder._build_initiative_prompt( select_dot, prompt_template, memory ) content, reasoning = self.model_r1.generate_response_async(prompt) - print(f"[DEBUG] {content} {reasoning}") + logger.debug(f"[DEBUG] {content} {reasoning}") return content diff --git a/src/plugins/chat/message_sender.py b/src/plugins/chat/message_sender.py index 2c3880bb8..d5f710bbf 100644 --- a/src/plugins/chat/message_sender.py +++ b/src/plugins/chat/message_sender.py @@ -2,6 +2,7 @@ import asyncio import time from typing import Dict, List, Optional, Union +from loguru import logger from nonebot.adapters.onebot.v11 import Bot from .cq_code import cq_code_tool @@ -14,11 +15,12 @@ from .chat_stream import chat_manager class Message_Sender: """发送器""" + def __init__(self): self.message_interval = (0.5, 1) # 消息间隔时间范围(秒) self.last_send_time = 0 self._current_bot = None - + def set_bot(self, bot: Bot): """设置当前bot实例""" self._current_bot = bot @@ -41,10 +43,10 @@ class Message_Sender: message=message_send.raw_message, auto_escape=False ) - print(f"\033[1;34m[调试]\033[0m 发送消息{message.processed_plain_text}成功") + logger.success(f"[调试] 发送消息{message.processed_plain_text}成功") except Exception as e: - print(f"发生错误 {e}") - print(f"\033[1;34m[调试]\033[0m 发送消息{message.processed_plain_text}失败") + logger.error(f"[调试] 发生错误 {e}") + logger.error(f"[调试] 发送消息{message.processed_plain_text}失败") else: try: await self._current_bot.send_private_msg( @@ -52,10 +54,10 @@ class Message_Sender: message=message_send.raw_message, auto_escape=False ) - print(f"\033[1;34m[调试]\033[0m 发送消息{message.processed_plain_text}成功") + logger.success(f"[调试] 发送消息{message.processed_plain_text}成功") except Exception as e: - print(f"发生错误 {e}") - print(f"\033[1;34m[调试]\033[0m 发送消息{message.processed_plain_text}失败") + logger.error(f"发生错误 {e}") + logger.error(f"[调试] 发送消息{message.processed_plain_text}失败") class MessageContainer: @@ -71,15 +73,15 @@ class MessageContainer: """获取所有超时的Message_Sending对象(思考时间超过30秒),按thinking_start_time排序""" current_time = time.time() timeout_messages = [] - + for msg in self.messages: if isinstance(msg, MessageSending): if current_time - msg.thinking_start_time > self.thinking_timeout: timeout_messages.append(msg) - + # 按thinking_start_time排序,时间早的在前面 timeout_messages.sort(key=lambda x: x.thinking_start_time) - + return timeout_messages def get_earliest_message(self) -> Optional[Union[MessageThinking, MessageSending]]: @@ -88,11 +90,11 @@ class MessageContainer: return None earliest_time = float('inf') earliest_message = None - for msg in self.messages: + for msg in self.messages: msg_time = msg.thinking_start_time if msg_time < earliest_time: earliest_time = msg_time - earliest_message = msg + earliest_message = msg return earliest_message def add_message(self, message: Union[MessageThinking, MessageSending]) -> None: @@ -110,10 +112,10 @@ class MessageContainer: self.messages.remove(message) return True return False - except Exception as e: - print(f"\033[1;31m[错误]\033[0m 移除消息时发生错误: {e}") + except Exception: + logger.exception("移除消息时发生错误") return False - + def has_messages(self) -> bool: """检查是否有待发送的消息""" return bool(self.messages) @@ -121,7 +123,7 @@ class MessageContainer: def get_all_messages(self) -> List[Union[MessageSending, MessageThinking]]: """获取所有消息""" return list(self.messages) - + class MessageManager: """管理所有聊天流的消息容器""" @@ -152,11 +154,11 @@ class MessageManager: if isinstance(message_earliest, MessageThinking): message_earliest.update_thinking_time() thinking_time = message_earliest.thinking_time - print(f"\033[1;34m[调试]\033[0m 消息正在思考中,已思考{int(thinking_time)}秒\033[K\r", end='', flush=True) - + print(f"消息正在思考中,已思考{int(thinking_time)}秒\r", end='', flush=True) + # 检查是否超时 if thinking_time > global_config.thinking_timeout: - print(f"\033[1;33m[警告]\033[0m 消息思考超时({thinking_time}秒),移除该消息") + logger.warning(f"消息思考超时({thinking_time}秒),移除该消息") container.remove_message(message_earliest) else: print(f"\033[1;34m[调试]\033[0m 消息'{message_earliest.processed_plain_text}'正在发送中") @@ -174,7 +176,7 @@ class MessageManager: message_timeout = container.get_timeout_messages() if message_timeout: - print(f"\033[1;34m[调试]\033[0m 发现{len(message_timeout)}条超时消息") + logger.warning(f"发现{len(message_timeout)}条超时消息") for msg in message_timeout: if msg == message_earliest: continue @@ -191,11 +193,11 @@ class MessageManager: await self.storage.store_message(msg,msg.chat_stream, None) if not container.remove_message(msg): - print("\033[1;33m[警告]\033[0m 尝试删除不存在的消息") - except Exception as e: - print(f"\033[1;31m[错误]\033[0m 处理超时消息时发生错误: {e}") + logger.warning("尝试删除不存在的消息") + except Exception: + logger.exception("处理超时消息时发生错误") continue - + async def start_processor(self): """启动消息处理器""" while self._running: @@ -206,6 +208,7 @@ class MessageManager: await asyncio.gather(*tasks) + # 创建全局消息管理器实例 message_manager = MessageManager() # 创建全局发送器实例 diff --git a/src/plugins/chat/prompt_builder.py b/src/plugins/chat/prompt_builder.py index 46adc343e..fec6c7926 100644 --- a/src/plugins/chat/prompt_builder.py +++ b/src/plugins/chat/prompt_builder.py @@ -1,6 +1,7 @@ import random import time from typing import Optional +from loguru import logger from ...common.database import Database from ..memory_system.memory import hippocampus, memory_graph @@ -34,44 +35,43 @@ class PromptBuilder: Returns: str: 构建好的prompt - """ - #先禁用关系 + """ + # 先禁用关系 if 0 > 30: relation_prompt = "关系特别特别好,你很喜欢喜欢他" relation_prompt_2 = "热情发言或者回复" - elif 0 <-20: + elif 0 < -20: relation_prompt = "关系很差,你很讨厌他" relation_prompt_2 = "骂他" else: relation_prompt = "关系一般" relation_prompt_2 = "发言或者回复" - - #开始构建prompt - - - #心情 + + # 开始构建prompt + + # 心情 mood_manager = MoodManager.get_instance() mood_prompt = mood_manager.get_prompt() - - - #日程构建 + + # 日程构建 current_date = time.strftime("%Y-%m-%d", time.localtime()) current_time = time.strftime("%H:%M:%S", time.localtime()) - bot_schedule_now_time,bot_schedule_now_activity = bot_schedule.get_current_task() + bot_schedule_now_time, bot_schedule_now_activity = bot_schedule.get_current_task() prompt_date = f'''今天是{current_date},现在是{current_time},你今天的日程是:\n{bot_schedule.today_schedule}\n你现在正在{bot_schedule_now_activity}\n''' - #知识构建 + # 知识构建 start_time = time.time() - + prompt_info = '' promt_info_prompt = '' - prompt_info = await self.get_prompt_info(message_txt,threshold=0.5) + prompt_info = await self.get_prompt_info(message_txt, threshold=0.5) if prompt_info: - prompt_info = f'''\n----------------------------------------------------\n你有以下这些[知识]:\n{prompt_info}\n请你记住上面的[知识],之后可能会用到\n----------------------------------------------------\n''' - + prompt_info = f'''你有以下这些[知识]:{prompt_info}请你记住上面的[ + 知识],之后可能会用到-''' + end_time = time.time() - print(f"\033[1;32m[知识检索]\033[0m 耗时: {(end_time - start_time):.3f}秒") - + logger.debug(f"知识检索耗时: {(end_time - start_time):.3f}秒") + # 获取聊天上下文 chat_in_group=True chat_talking_prompt = '' @@ -90,7 +90,7 @@ class PromptBuilder: # 使用新的记忆获取方法 memory_prompt = '' start_time = time.time() - + # 调用 hippocampus 的 get_relevant_memories 方法 relevant_memories = await hippocampus.get_relevant_memories( text=message_txt, @@ -98,64 +98,64 @@ class PromptBuilder: similarity_threshold=0.4, max_memory_num=5 ) - + if relevant_memories: # 格式化记忆内容 memory_items = [] for memory in relevant_memories: memory_items.append(f"关于「{memory['topic']}」的记忆:{memory['content']}") - + memory_prompt = "看到这些聊天,你想起来:\n" + "\n".join(memory_items) + "\n" - + # 打印调试信息 - print("\n\033[1;32m[记忆检索]\033[0m 找到以下相关记忆:") + logger.debug("[记忆检索]找到以下相关记忆:") for memory in relevant_memories: - print(f"- 主题「{memory['topic']}」[相似度: {memory['similarity']:.2f}]: {memory['content']}") - + logger.debug(f"- 主题「{memory['topic']}」[相似度: {memory['similarity']:.2f}]: {memory['content']}") + end_time = time.time() - print(f"\033[1;32m[回忆耗时]\033[0m 耗时: {(end_time - start_time):.3f}秒") - - - - #激活prompt构建 + logger.info(f"回忆耗时: {(end_time - start_time):.3f}秒") + + # 激活prompt构建 activate_prompt = '' if chat_in_group: activate_prompt = f"以上是群里正在进行的聊天,{memory_prompt} 现在昵称为 '{sender_name}' 的用户说的:{message_txt}。引起了你的注意,你和ta{relation_prompt},{mood_prompt},你想要{relation_prompt_2}。" else: activate_prompt = f"以上是你正在和{sender_name}私聊的内容,{memory_prompt} 现在昵称为 '{sender_name}' 的用户说的:{message_txt}。引起了你的注意,你和ta{relation_prompt},{mood_prompt},你想要{relation_prompt_2}。" - #检测机器人相关词汇 - bot_keywords = ['人机', 'bot', '机器', '入机', 'robot', '机器人'] - is_bot = any(keyword in message_txt.lower() for keyword in bot_keywords) - if is_bot: - is_bot_prompt = '有人说你是人机或者机器人,否定这一事实,攻击或反问他。注意,只有认为你是机器人才回复,否则不要否认' - else: - is_bot_prompt = '' + + # 关键词检测与反应 + keywords_reaction_prompt = '' + for rule in global_config.keywords_reaction_rules: + if rule.get("enable", False): + if any(keyword in message_txt.lower() for keyword in rule.get("keywords", [])): + logger.info(f"检测到以下关键词之一:{rule.get('keywords', [])},触发反应:{rule.get('reaction', '')}") + keywords_reaction_prompt += rule.get("reaction", "") + ',' #人格选择 personality=global_config.PROMPT_PERSONALITY probability_1 = global_config.PERSONALITY_1 probability_2 = global_config.PERSONALITY_2 probability_3 = global_config.PERSONALITY_3 - prompt_personality = '' + + prompt_personality = f'{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},你还有很多别名:{"/".join(global_config.BOT_ALIAS_NAMES)},' personality_choice = random.random() if chat_in_group: prompt_in_group=f"你正在浏览{chat_stream.platform}群" else: prompt_in_group=f"你正在{chat_stream.platform}上和{sender_name}私聊" if personality_choice < probability_1: # 第一种人格 - prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},{personality[0]},{prompt_in_group},{promt_info_prompt}, - 现在请你给出日常且口语化的回复,平淡一些,尽量简短一些。{is_bot_prompt} + prompt_personality += f'''{personality[0]}, 你正在浏览qq群,{promt_info_prompt}, + 现在请你给出日常且口语化的回复,平淡一些,尽量简短一些。{keywords_reaction_prompt} 请注意把握群里的聊天内容,不要刻意突出自身学科背景,不要回复的太有条理,可以有个性。''' elif personality_choice < probability_1 + probability_2: # 第二种人格 - prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},{personality[1]},{prompt_in_group},{promt_info_prompt}, - 现在请你给出日常且口语化的回复,请表现你自己的见解,不要一昧迎合,尽量简短一些。{is_bot_prompt} + prompt_personality += f'''{personality[1]}, 你正在浏览qq群,{promt_info_prompt}, + 现在请你给出日常且口语化的回复,请表现你自己的见解,不要一昧迎合,尽量简短一些。{keywords_reaction_prompt} 请你表达自己的见解和观点。可以有个性。''' else: # 第三种人格 - prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},{personality[2]},{prompt_in_group},{promt_info_prompt}, - 现在请你给出日常且口语化的回复,请表现你自己的见解,不要一昧迎合,尽量简短一些。{is_bot_prompt} + prompt_personality += f'''{personality[2]}, 你正在浏览qq群,{promt_info_prompt}, + 现在请你给出日常且口语化的回复,请表现你自己的见解,不要一昧迎合,尽量简短一些。{keywords_reaction_prompt} 请你表达自己的见解和观点。可以有个性。''' - - #中文高手(新加的好玩功能) + + # 中文高手(新加的好玩功能) prompt_ger = '' if random.random() < 0.04: prompt_ger += '你喜欢用倒装句' @@ -163,23 +163,23 @@ class PromptBuilder: prompt_ger += '你喜欢用反问句' if random.random() < 0.01: prompt_ger += '你喜欢用文言文' - - #额外信息要求 - extra_info = '''但是记得回复平淡一些,简短一些,尤其注意在没明确提到时不要过多提及自身的背景, 不要直接回复别人发的表情包,记住不要输出多余内容(包括前后缀,冒号和引号,括号,表情等),只需要输出回复内容就好,不要输出其他任何内容''' - - #合并prompt + + # 额外信息要求 + extra_info = '''但是记得回复平淡一些,简短一些,尤其注意在没明确提到时不要过多提及自身的背景, 不要直接回复别人发的表情包,记住不要输出多余内容(包括前后缀,冒号和引号,括号,表情等),只需要输出回复内容就好,不要输出其他任何内容''' + + # 合并prompt prompt = "" prompt += f"{prompt_info}\n" prompt += f"{prompt_date}\n" - prompt += f"{chat_talking_prompt}\n" + prompt += f"{chat_talking_prompt}\n" prompt += f"{prompt_personality}\n" prompt += f"{prompt_ger}\n" - prompt += f"{extra_info}\n" - - '''读空气prompt处理''' - activate_prompt_check=f"以上是群里正在进行的聊天,昵称为 '{sender_name}' 的用户说的:{message_txt}。引起了你的注意,你和他{relation_prompt},你想要{relation_prompt_2},但是这不一定是合适的时机,请你决定是否要回应这条消息。" + prompt += f"{extra_info}\n" + + '''读空气prompt处理''' + activate_prompt_check = f"以上是群里正在进行的聊天,昵称为 '{sender_name}' 的用户说的:{message_txt}。引起了你的注意,你和他{relation_prompt},你想要{relation_prompt_2},但是这不一定是合适的时机,请你决定是否要回应这条消息。" prompt_personality_check = '' - extra_check_info=f"请注意把握群里的聊天内容的基础上,综合群内的氛围,例如,和{global_config.BOT_NICKNAME}相关的话题要积极回复,如果是at自己的消息一定要回复,如果自己正在和别人聊天一定要回复,其他话题如果合适搭话也可以回复,如果认为应该回复请输出yes,否则输出no,请注意是决定是否需要回复,而不是编写回复内容,除了yes和no不要输出任何回复内容。" + extra_check_info = f"请注意把握群里的聊天内容的基础上,综合群内的氛围,例如,和{global_config.BOT_NICKNAME}相关的话题要积极回复,如果是at自己的消息一定要回复,如果自己正在和别人聊天一定要回复,其他话题如果合适搭话也可以回复,如果认为应该回复请输出yes,否则输出no,请注意是决定是否需要回复,而不是编写回复内容,除了yes和no不要输出任何回复内容。" if personality_choice < probability_1: # 第一种人格 prompt_personality_check = f'''你的网名叫{global_config.BOT_NICKNAME},{personality[0]}, 你正在浏览qq群,{promt_info_prompt} {activate_prompt_check} {extra_check_info}''' elif personality_choice < probability_1 + probability_2: # 第二种人格 @@ -187,34 +187,36 @@ class PromptBuilder: else: # 第三种人格 prompt_personality_check = f'''你的网名叫{global_config.BOT_NICKNAME},{personality[2]}, 你正在浏览qq群,{promt_info_prompt} {activate_prompt_check} {extra_check_info}''' - prompt_check_if_response=f"{prompt_info}\n{prompt_date}\n{chat_talking_prompt}\n{prompt_personality_check}" - - return prompt,prompt_check_if_response - - def _build_initiative_prompt_select(self,group_id): + prompt_check_if_response = f"{prompt_info}\n{prompt_date}\n{chat_talking_prompt}\n{prompt_personality_check}" + + return prompt, prompt_check_if_response + + def _build_initiative_prompt_select(self, group_id, probability_1=0.8, probability_2=0.1): current_date = time.strftime("%Y-%m-%d", time.localtime()) current_time = time.strftime("%H:%M:%S", time.localtime()) - bot_schedule_now_time,bot_schedule_now_activity = bot_schedule.get_current_task() + bot_schedule_now_time, bot_schedule_now_activity = bot_schedule.get_current_task() prompt_date = f'''今天是{current_date},现在是{current_time},你今天的日程是:\n{bot_schedule.today_schedule}\n你现在正在{bot_schedule_now_activity}\n''' chat_talking_prompt = '' if group_id: - chat_talking_prompt = get_recent_group_detailed_plain_text(self.db, group_id, limit=global_config.MAX_CONTEXT_SIZE,combine = True) - + chat_talking_prompt = get_recent_group_detailed_plain_text(self.db, group_id, + limit=global_config.MAX_CONTEXT_SIZE, + combine=True) + chat_talking_prompt = f"以下是群里正在聊天的内容:\n{chat_talking_prompt}" - # print(f"\033[1;34m[调试]\033[0m 已从数据库获取群 {group_id} 的消息记录:{chat_talking_prompt}") + # print(f"\033[1;34m[调试]\033[0m 已从数据库获取群 {group_id} 的消息记录:{chat_talking_prompt}") # 获取主动发言的话题 - all_nodes=memory_graph.dots - all_nodes=filter(lambda dot:len(dot[1]['memory_items'])>3,all_nodes) - nodes_for_select=random.sample(all_nodes,5) - topics=[info[0] for info in nodes_for_select] - infos=[info[1] for info in nodes_for_select] + all_nodes = memory_graph.dots + all_nodes = filter(lambda dot: len(dot[1]['memory_items']) > 3, all_nodes) + nodes_for_select = random.sample(all_nodes, 5) + topics = [info[0] for info in nodes_for_select] + infos = [info[1] for info in nodes_for_select] - #激活prompt构建 + # 激活prompt构建 activate_prompt = '' activate_prompt = "以上是群里正在进行的聊天。" - personality=global_config.PROMPT_PERSONALITY + personality = global_config.PROMPT_PERSONALITY prompt_personality = '' personality_choice = random.random() if personality_choice < probability_1: # 第一种人格 @@ -223,32 +225,31 @@ class PromptBuilder: prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},{personality[1]}''' else: # 第三种人格 prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},{personality[2]}''' - - topics_str=','.join(f"\"{topics}\"") - prompt_for_select=f"你现在想在群里发言,回忆了一下,想到几个话题,分别是{topics_str},综合当前状态以及群内气氛,请你在其中选择一个合适的话题,注意只需要输出话题,除了话题什么也不要输出(双引号也不要输出)" - - prompt_initiative_select=f"{prompt_date}\n{prompt_personality}\n{prompt_for_select}" - prompt_regular=f"{prompt_date}\n{prompt_personality}" - return prompt_initiative_select,nodes_for_select,prompt_regular - - def _build_initiative_prompt_check(self,selected_node,prompt_regular): - memory=random.sample(selected_node['memory_items'],3) - memory='\n'.join(memory) - prompt_for_check=f"{prompt_regular}你现在想在群里发言,回忆了一下,想到一个话题,是{selected_node['concept']},关于这个话题的记忆有\n{memory}\n,以这个作为主题发言合适吗?请在把握群里的聊天内容的基础上,综合群内的氛围,如果认为应该发言请输出yes,否则输出no,请注意是决定是否需要发言,而不是编写回复内容,除了yes和no不要输出任何回复内容。" - return prompt_for_check,memory - - def _build_initiative_prompt(self,selected_node,prompt_regular,memory): - prompt_for_initiative=f"{prompt_regular}你现在想在群里发言,回忆了一下,想到一个话题,是{selected_node['concept']},关于这个话题的记忆有\n{memory}\n,请在把握群里的聊天内容的基础上,综合群内的氛围,以日常且口语化的口吻,简短且随意一点进行发言,不要说的太有条理,可以有个性。记住不要输出多余内容(包括前后缀,冒号和引号,括号,表情等)" + topics_str = ','.join(f"\"{topics}\"") + prompt_for_select = f"你现在想在群里发言,回忆了一下,想到几个话题,分别是{topics_str},综合当前状态以及群内气氛,请你在其中选择一个合适的话题,注意只需要输出话题,除了话题什么也不要输出(双引号也不要输出)" + + prompt_initiative_select = f"{prompt_date}\n{prompt_personality}\n{prompt_for_select}" + prompt_regular = f"{prompt_date}\n{prompt_personality}" + + return prompt_initiative_select, nodes_for_select, prompt_regular + + def _build_initiative_prompt_check(self, selected_node, prompt_regular): + memory = random.sample(selected_node['memory_items'], 3) + memory = '\n'.join(memory) + prompt_for_check = f"{prompt_regular}你现在想在群里发言,回忆了一下,想到一个话题,是{selected_node['concept']},关于这个话题的记忆有\n{memory}\n,以这个作为主题发言合适吗?请在把握群里的聊天内容的基础上,综合群内的氛围,如果认为应该发言请输出yes,否则输出no,请注意是决定是否需要发言,而不是编写回复内容,除了yes和no不要输出任何回复内容。" + return prompt_for_check, memory + + def _build_initiative_prompt(self, selected_node, prompt_regular, memory): + prompt_for_initiative = f"{prompt_regular}你现在想在群里发言,回忆了一下,想到一个话题,是{selected_node['concept']},关于这个话题的记忆有\n{memory}\n,请在把握群里的聊天内容的基础上,综合群内的氛围,以日常且口语化的口吻,简短且随意一点进行发言,不要说的太有条理,可以有个性。记住不要输出多余内容(包括前后缀,冒号和引号,括号,表情等)" return prompt_for_initiative - - async def get_prompt_info(self,message:str,threshold:float): + async def get_prompt_info(self, message: str, threshold: float): related_info = '' - print(f"\033[1;34m[调试]\033[0m 获取知识库内容,元消息:{message[:30]}...,消息长度: {len(message)}") + logger.debug(f"获取知识库内容,元消息:{message[:30]}...,消息长度: {len(message)}") embedding = await get_embedding(message) - related_info += self.get_info_from_db(embedding,threshold=threshold) - + related_info += self.get_info_from_db(embedding, threshold=threshold) + return related_info def get_info_from_db(self, query_embedding: list, limit: int = 1, threshold: float = 0.5) -> str: @@ -309,14 +310,15 @@ class PromptBuilder: {"$limit": limit}, {"$project": {"content": 1, "similarity": 1}} ] - + results = list(self.db.db.knowledges.aggregate(pipeline)) # print(f"\033[1;34m[调试]\033[0m获取知识库内容结果: {results}") - + if not results: return '' - + # 返回所有找到的内容,用换行分隔 return '\n'.join(str(result['content']) for result in results) - -prompt_builder = PromptBuilder() \ No newline at end of file + + +prompt_builder = PromptBuilder() diff --git a/src/plugins/chat/relationship_manager.py b/src/plugins/chat/relationship_manager.py index 5552aee8c..9e7cafda0 100644 --- a/src/plugins/chat/relationship_manager.py +++ b/src/plugins/chat/relationship_manager.py @@ -1,6 +1,7 @@ import asyncio from typing import Optional, Union from typing import Optional, Union +from loguru import logger from ...common.database import Database from .message_base import UserInfo @@ -10,9 +11,10 @@ class Impression: traits: str = None called: str = None know_time: float = None - + relationship_value: float = None + class Relationship: user_id: int = None platform: str = None @@ -79,7 +81,7 @@ class RelationshipManager: # 保存到数据库 await self.storage_relationship(relationship) relationship.saved = True - + return relationship async def update_relationship_value(self, @@ -121,7 +123,7 @@ class RelationshipManager: # 如果不存在且提供了user_info,则创建新的关系 if user_info is not None: return await self.update_relationship(chat_stream=chat_stream, **kwargs) - print(f"\033[1;31m[关系管理]\033[0m 用户 {user_id}({platform}) 不存在,无法更新") + logger.warning(f"[关系管理] 用户 {user_id}({platform}) 不存在,无法更新") return None def get_relationship(self, @@ -151,7 +153,7 @@ class RelationshipManager: return self.relationships[key] else: return 0 - + async def load_relationship(self, data: dict) -> Relationship: """从数据库加载或创建新的关系对象""" # 确保data中有platform字段,如果没有则默认为'qq' @@ -163,14 +165,14 @@ class RelationshipManager: key = (rela.user_id, rela.platform) self.relationships[key] = rela return rela - + async def load_all_relationships(self): """加载所有关系对象""" db = Database.get_instance() all_relationships = db.db.relationships.find({}) for data in all_relationships: await self.load_relationship(data) - + async def _start_relationship_manager(self): """每5分钟自动保存一次关系数据""" db = Database.get_instance() @@ -179,15 +181,15 @@ class RelationshipManager: # 依次加载每条记录 for data in all_relationships: await self.load_relationship(data) - print(f"\033[1;32m[关系管理]\033[0m 已加载 {len(self.relationships)} 条关系记录") + logger.debug(f"[关系管理] 已加载 {len(self.relationships)} 条关系记录") while True: - print("\033[1;32m[关系管理]\033[0m 正在自动保存关系") + logger.debug("正在自动保存关系") await asyncio.sleep(300) # 等待300秒(5分钟) await self._save_all_relationships() - + async def _save_all_relationships(self): - """将所有关系数据保存到数据库""" + """将所有关系数据保存到数据库""" # 保存所有关系数据 for (userid, platform), relationship in self.relationships.items(): if not relationship.saved: @@ -203,7 +205,7 @@ class RelationshipManager: gender = relationship.gender age = relationship.age saved = relationship.saved - + db = Database.get_instance() db.db.relationships.update_one( {'user_id': user_id, 'platform': platform}, @@ -252,4 +254,4 @@ class RelationshipManager: return "某人" -relationship_manager = RelationshipManager() \ No newline at end of file +relationship_manager = RelationshipManager() diff --git a/src/plugins/chat/storage.py b/src/plugins/chat/storage.py index 614246d26..f403b2c8b 100644 --- a/src/plugins/chat/storage.py +++ b/src/plugins/chat/storage.py @@ -5,6 +5,8 @@ from ...common.database import Database from .message_base import MessageBase from .message import MessageSending, MessageRecv from .chat_stream import ChatStream +from loguru import logger + class MessageStorage: def __init__(self): @@ -24,7 +26,7 @@ class MessageStorage: "topic": topic, } self.db.db.messages.insert_one(message_data) - except Exception as e: - print(f"\033[1;31m[错误]\033[0m 存储消息失败: {e}") + except Exception: + logger.exception("存储消息失败") -# 如果需要其他存储相关的函数,可以在这里添加 \ No newline at end of file +# 如果需要其他存储相关的函数,可以在这里添加 diff --git a/src/plugins/chat/topic_identifier.py b/src/plugins/chat/topic_identifier.py index 3296d0895..a0c5bae30 100644 --- a/src/plugins/chat/topic_identifier.py +++ b/src/plugins/chat/topic_identifier.py @@ -4,9 +4,11 @@ from nonebot import get_driver from ..models.utils_model import LLM_request from .config import global_config +from loguru import logger driver = get_driver() -config = driver.config +config = driver.config + class TopicIdentifier: def __init__(self): @@ -23,19 +25,20 @@ class TopicIdentifier: # 使用 LLM_request 类进行请求 topic, _ = await self.llm_topic_judge.generate_response(prompt) - + if not topic: - print("\033[1;31m[错误]\033[0m LLM API 返回为空") + logger.error("LLM API 返回为空") return None - + # 直接在这里处理主题解析 if not topic or topic == "无主题": return None - + # 解析主题字符串为列表 topic_list = [t.strip() for t in topic.split(",") if t.strip()] - - print(f"\033[1;32m[主题识别]\033[0m 主题: {topic_list}") + + logger.info(f"主题: {topic_list}") return topic_list if topic_list else None -topic_identifier = TopicIdentifier() \ No newline at end of file + +topic_identifier = TopicIdentifier() diff --git a/src/plugins/chat/utils.py b/src/plugins/chat/utils.py index 495d0480d..a889ef177 100644 --- a/src/plugins/chat/utils.py +++ b/src/plugins/chat/utils.py @@ -7,6 +7,7 @@ from typing import Dict, List import jieba import numpy as np from nonebot import get_driver +from loguru import logger from ..models.utils_model import LLM_request from ..utils.typo_generator import ChineseTypoGenerator @@ -21,16 +22,16 @@ config = driver.config def db_message_to_str(message_dict: Dict) -> str: - print(f"message_dict: {message_dict}") + logger.debug(f"message_dict: {message_dict}") time_str = time.strftime("%m-%d %H:%M:%S", time.localtime(message_dict["time"])) try: name = "[(%s)%s]%s" % ( - message_dict['user_id'], message_dict.get("user_nickname", ""), message_dict.get("user_cardname", "")) + message_dict['user_id'], message_dict.get("user_nickname", ""), message_dict.get("user_cardname", "")) except: name = message_dict.get("user_nickname", "") or f"用户{message_dict['user_id']}" content = message_dict.get("processed_plain_text", "") result = f"[{time_str}] {name}: {content}\n" - print(f"result: {result}") + logger.debug(f"result: {result}") return result @@ -71,37 +72,43 @@ def calculate_information_content(text): def get_cloest_chat_from_db(db, length: int, timestamp: str): - """从数据库中获取最接近指定时间戳的聊天记录,并记录读取次数""" - chat_text = '' + """从数据库中获取最接近指定时间戳的聊天记录,并记录读取次数 + + Returns: + list: 消息记录字典列表,每个字典包含消息内容和时间信息 + """ + chat_records = [] closest_record = db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)]) - - if closest_record and closest_record.get('memorized', 0) < 4: + + if closest_record and closest_record.get('memorized', 0) < 4: closest_time = closest_record['time'] chat_id = closest_record['chat_id'] # 获取groupid # 获取该时间戳之后的length条消息,且groupid相同 chat_records = list(db.db.messages.find( {"time": {"$gt": closest_time}, "chat_id": chat_id} ).sort('time', 1).limit(length)) - + # 更新每条消息的memorized属性 - for record in chat_records: - # 检查当前记录的memorized值 + for record in records: current_memorized = record.get('memorized', 0) if current_memorized > 3: - # print(f"消息已读取3次,跳过") + print("消息已读取3次,跳过") return '' - + # 更新memorized值 db.db.messages.update_one( {"_id": record["_id"]}, {"$set": {"memorized": current_memorized + 1}} ) - - chat_text += record["detailed_plain_text"] - - return chat_text - # print(f"消息已读取3次,跳过") - return '' + + # 添加到记录列表中 + chat_records.append({ + 'text': record["detailed_plain_text"], + 'time': record["time"], + 'group_id': record["group_id"] + }) + + return chat_records async def get_recent_group_messages(db, chat_id:str, limit: int = 12) -> list: @@ -142,7 +149,7 @@ async def get_recent_group_messages(db, chat_id:str, limit: int = 12) -> list: ) message_objects.append(msg) except KeyError: - print("[WARNING] 数据库中存在无效的消息") + logger.warning("数据库中存在无效的消息") continue # 按时间正序排列 @@ -259,11 +266,10 @@ def split_into_sentences_w_remove_punctuation(text: str) -> List[str]: sentence = sentence.replace(',', ' ').replace(',', ' ') sentences_done.append(sentence) - print(f"处理后的句子: {sentences_done}") + logger.info(f"处理后的句子: {sentences_done}") return sentences_done - def random_remove_punctuation(text: str) -> str: """随机处理标点符号,模拟人类打字习惯 @@ -291,43 +297,70 @@ def random_remove_punctuation(text: str) -> str: return result - def process_llm_response(text: str) -> List[str]: # processed_response = process_text_with_typos(content) - if len(text) > 300: - print(f"回复过长 ({len(text)} 字符),返回默认回复") + if len(text) > 200: + logger.warning(f"回复过长 ({len(text)} 字符),返回默认回复") return ['懒得说'] # 处理长消息 typo_generator = ChineseTypoGenerator( - error_rate=0.03, - min_freq=7, - tone_error_rate=0.2, - word_replace_rate=0.02 + error_rate=global_config.chinese_typo_error_rate, + min_freq=global_config.chinese_typo_min_freq, + tone_error_rate=global_config.chinese_typo_tone_error_rate, + word_replace_rate=global_config.chinese_typo_word_replace_rate ) - typoed_text = typo_generator.create_typo_sentence(text)[0] - sentences = split_into_sentences_w_remove_punctuation(typoed_text) + split_sentences = split_into_sentences_w_remove_punctuation(text) + sentences = [] + for sentence in split_sentences: + if global_config.chinese_typo_enable: + typoed_text, typo_corrections = typo_generator.create_typo_sentence(sentence) + sentences.append(typoed_text) + if typo_corrections: + sentences.append(typo_corrections) + else: + sentences.append(sentence) # 检查分割后的消息数量是否过多(超过3条) - if len(sentences) > 4: - print(f"分割后消息数量过多 ({len(sentences)} 条),返回默认回复") + + if len(sentences) > 5: + logger.warning(f"分割后消息数量过多 ({len(sentences)} 条),返回默认回复") return [f'{global_config.BOT_NICKNAME}不知道哦'] return sentences -def calculate_typing_time(input_string: str, chinese_time: float = 0.2, english_time: float = 0.1) -> float: +def calculate_typing_time(input_string: str, chinese_time: float = 0.4, english_time: float = 0.2) -> float: """ 计算输入字符串所需的时间,中文和英文字符有不同的输入时间 input_string (str): 输入的字符串 - chinese_time (float): 中文字符的输入时间,默认为0.3秒 - english_time (float): 英文字符的输入时间,默认为0.15秒 + chinese_time (float): 中文字符的输入时间,默认为0.2秒 + english_time (float): 英文字符的输入时间,默认为0.1秒 + + 特殊情况: + - 如果只有一个中文字符,将使用3倍的中文输入时间 + - 在所有输入结束后,额外加上回车时间0.3秒 """ + mood_manager = MoodManager.get_instance() + # 将0-1的唤醒度映射到-1到1 + mood_arousal = mood_manager.current_mood.arousal + # 映射到0.5到2倍的速度系数 + typing_speed_multiplier = 1.5 ** mood_arousal # 唤醒度为1时速度翻倍,为-1时速度减半 + chinese_time *= 1 / typing_speed_multiplier + english_time *= 1 / typing_speed_multiplier + # 计算中文字符数 + chinese_chars = sum(1 for char in input_string if '\u4e00' <= char <= '\u9fff') + + # 如果只有一个中文字符,使用3倍时间 + if chinese_chars == 1 and len(input_string.strip()) == 1: + return chinese_time * 3 + 0.3 # 加上回车时间 + + # 正常计算所有字符的输入时间 total_time = 0.0 for char in input_string: if '\u4e00' <= char <= '\u9fff': # 判断是否为中文字符 total_time += chinese_time else: # 其他字符(如英文) total_time += english_time - return total_time + return total_time + 0.3 # 加上回车时间 def cosine_similarity(v1, v2): diff --git a/src/plugins/chat/willing_manager.py b/src/plugins/chat/willing_manager.py index d430ac74d..96cf74095 100644 --- a/src/plugins/chat/willing_manager.py +++ b/src/plugins/chat/willing_manager.py @@ -16,7 +16,9 @@ class WillingManager: self.chat_reply_willing: Dict[str, float] = {} # 存储每个聊天流的回复意愿 self._decay_task = None self._started = False - + self.min_reply_willing = 0.01 + self.attenuation_coefficient = 0.75 + async def _decay_reply_willing(self): """定期衰减回复意愿""" while True: @@ -33,12 +35,9 @@ class WillingManager: return self.chat_reply_willing.get(stream.stream_id, 0) return 0 - def set_willing(self, chat_id: str, willing: float): - """设置指定聊天流的回复意愿""" - self.chat_reply_willing[chat_id] = willing - def set_willing(self, chat_id: str, willing: float): - """设置指定聊天流的回复意愿""" - self.chat_reply_willing[chat_id] = willing + def set_willing(self, chat_id: int, willing: float): + """设置指定群组的回复意愿""" + self.group_reply_willing[chat_id] = willing async def change_reply_willing_received(self, chat_stream:ChatStream, @@ -51,47 +50,67 @@ class WillingManager: # 获取或创建聊天流 stream = chat_stream chat_id = stream.stream_id + group_id = stream.group_info.group_id + + # 若非目标回复群组,则直接return + if group_id not in config.talk_allowed_groups: + reply_probability = 0 + return reply_probability + current_willing = self.chat_reply_willing.get(chat_id, 0) - # print(f"初始意愿: {current_willing}") - if is_mentioned_bot and current_willing < 1.0: - current_willing += 0.9 - print(f"被提及, 当前意愿: {current_willing}") - elif is_mentioned_bot: - current_willing += 0.05 - print(f"被重复提及, 当前意愿: {current_willing}") - + logger.debug(f"[{chat_id}]的初始回复意愿: {current_willing}") + + + # 根据消息类型(被cue/表情包)调控 + if is_mentioned_bot: + current_willing = min( + 3.0, + current_willing + 0.9 + ) + logger.debug(f"被提及, 当前意愿: {current_willing}") + if is_emoji: current_willing *= 0.1 - print(f"表情包, 当前意愿: {current_willing}") - - print(f"放大系数_interested_rate: {global_config.response_interested_rate_amplifier}") - interested_rate *= global_config.response_interested_rate_amplifier #放大回复兴趣度 - if interested_rate > 0.4: - # print(f"兴趣度: {interested_rate}, 当前意愿: {current_willing}") - current_willing += interested_rate-0.4 - - current_willing *= global_config.response_willing_amplifier #放大回复意愿 - # print(f"放大系数_willing: {global_config.response_willing_amplifier}, 当前意愿: {current_willing}") - - reply_probability = max((current_willing - 0.45) * 2, 0) - - # 检查群组权限(如果是群聊) - if chat_stream.group_info: - if chat_stream.group_info.group_id not in config.talk_allowed_groups: - current_willing = 0 - reply_probability = 0 - - if chat_stream.group_info.group_id in config.talk_frequency_down_groups: - reply_probability = reply_probability / global_config.down_frequency_rate + logger.debug(f"表情包, 当前意愿: {current_willing}") + + # 兴趣放大系数,若兴趣 > 0.4则增加回复概率 + interested_rate_amplifier = global_config.response_interested_rate_amplifier + logger.debug(f"放大系数_interested_rate: {interested_rate_amplifier}") + interested_rate *= interested_rate_amplifier + + current_willing += max( + 0.0, + interested_rate - 0.4 + ) + + # 回复意愿系数调控,独立乘区 + willing_amplifier = max( + global_config.response_willing_amplifier, + self.min_reply_willing + ) + current_willing *= willing_amplifier + logger.debug(f"放大系数_willing: {global_config.response_willing_amplifier}, 当前意愿: {current_willing}") + + # 回复概率迭代,保底0.01回复概率 + reply_probability = max( + (current_willing - 0.45) * 2, + self.min_reply_willing + ) + + # 降低目标低频群组回复概率 + down_frequency_rate = max( + 1.0, + global_config.down_frequency_rate + ) + if group_id in config.talk_frequency_down_groups: + reply_probability = reply_probability / down_frequency_rate reply_probability = min(reply_probability, 1) - if reply_probability < 0: - reply_probability = 0 - - self.chat_reply_willing[chat_id] = min(current_willing, 3.0) - self.chat_reply_willing[chat_id] = min(current_willing, 3.0) + + self.group_reply_willing[group_id] = min(current_willing, 3.0) + logger.debug(f"当前群组{group_id}回复概率:{reply_probability}") return reply_probability def change_reply_willing_sent(self, chat_stream:ChatStream): @@ -116,5 +135,6 @@ class WillingManager: self._decay_task = asyncio.create_task(self._decay_reply_willing()) self._started = True + # 创建全局实例 -willing_manager = WillingManager() +willing_manager = WillingManager() diff --git a/src/plugins/knowledege/knowledge_library.py b/src/plugins/knowledege/knowledge_library.py index d2408e24f..4bf6227bb 100644 --- a/src/plugins/knowledege/knowledge_library.py +++ b/src/plugins/knowledege/knowledge_library.py @@ -19,7 +19,7 @@ from src.common.database import Database # 从环境变量获取配置 Database.initialize( - host=os.getenv("MONGODB_HOST", "localhost"), + host=os.getenv("MONGODB_HOST", "127.0.0.1"), port=int(os.getenv("MONGODB_PORT", "27017")), db_name=os.getenv("DATABASE_NAME", "maimai"), username=os.getenv("MONGODB_USERNAME"), @@ -79,7 +79,7 @@ class KnowledgeLibrary: content = f.read() # 按1024字符分段 - segments = [content[i:i+600] for i in range(0, len(content), 600)] + segments = [content[i:i+600] for i in range(0, len(content), 300)] # 处理每个分段 for segment in segments: diff --git a/src/plugins/memory_system/__init__.py b/src/plugins/memory_system/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/plugins/memory_system/draw_memory.py b/src/plugins/memory_system/draw_memory.py index 006991bcb..6da330d95 100644 --- a/src/plugins/memory_system/draw_memory.py +++ b/src/plugins/memory_system/draw_memory.py @@ -7,6 +7,7 @@ import jieba import matplotlib.pyplot as plt import networkx as nx from dotenv import load_dotenv +from loguru import logger sys.path.append("C:/GitHub/MaiMBot") # 添加项目根目录到 Python 路径 from src.common.database import Database # 使用正确的导入语法 @@ -15,15 +16,15 @@ from src.common.database import Database # 使用正确的导入语法 env_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), '.env.dev') load_dotenv(env_path) - + class Memory_graph: def __init__(self): self.G = nx.Graph() # 使用 networkx 的图结构 self.db = Database.get_instance() - + def connect_dot(self, concept1, concept2): self.G.add_edge(concept1, concept2) - + def add_dot(self, concept, memory): if concept in self.G: # 如果节点已存在,将新记忆添加到现有列表中 @@ -37,7 +38,7 @@ class Memory_graph: else: # 如果是新节点,创建新的记忆列表 self.G.add_node(concept, memory_items=[memory]) - + def get_dot(self, concept): # 检查节点是否存在于图中 if concept in self.G: @@ -45,20 +46,20 @@ class Memory_graph: node_data = self.G.nodes[concept] # print(node_data) # 创建新的Memory_dot对象 - return concept,node_data + return concept, node_data return None def get_related_item(self, topic, depth=1): if topic not in self.G: return [], [] - + first_layer_items = [] second_layer_items = [] - + # 获取相邻节点 neighbors = list(self.G.neighbors(topic)) # print(f"第一层: {topic}") - + # 获取当前节点的记忆项 node_data = self.get_dot(topic) if node_data: @@ -69,7 +70,7 @@ class Memory_graph: first_layer_items.extend(memory_items) else: first_layer_items.append(memory_items) - + # 只在depth=2时获取第二层记忆 if depth >= 2: # 获取相邻节点的记忆项 @@ -84,42 +85,44 @@ class Memory_graph: second_layer_items.extend(memory_items) else: second_layer_items.append(memory_items) - + return first_layer_items, second_layer_items - + def store_memory(self): for node in self.G.nodes(): dot_data = { "concept": node } self.db.db.store_memory_dots.insert_one(dot_data) - + @property def dots(self): # 返回所有节点对应的 Memory_dot 对象 return [self.get_dot(node) for node in self.G.nodes()] - - + def get_random_chat_from_db(self, length: int, timestamp: str): # 从数据库中根据时间戳获取离其最近的聊天记录 chat_text = '' closest_record = self.db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)]) # 调试输出 - print(f"距离time最近的消息时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(closest_record['time'])))}") - + logger.info( + f"距离time最近的消息时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(closest_record['time'])))}") + if closest_record: closest_time = closest_record['time'] group_id = closest_record['group_id'] # 获取groupid # 获取该时间戳之后的length条消息,且groupid相同 - chat_record = list(self.db.db.messages.find({"time": {"$gt": closest_time}, "group_id": group_id}).sort('time', 1).limit(length)) + chat_record = list( + self.db.db.messages.find({"time": {"$gt": closest_time}, "group_id": group_id}).sort('time', 1).limit( + length)) for record in chat_record: time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(record['time']))) try: - displayname="[(%s)%s]%s" % (record["user_id"],record["user_nickname"],record["user_cardname"]) + displayname = "[(%s)%s]%s" % (record["user_id"], record["user_nickname"], record["user_cardname"]) except: - displayname=record["user_nickname"] or "用户" + str(record["user_id"]) + displayname = record["user_nickname"] or "用户" + str(record["user_id"]) chat_text += f'[{time_str}] {displayname}: {record["processed_plain_text"]}\n' # 添加发送者和时间信息 return chat_text - + return [] # 如果没有找到记录,返回空列表 def save_graph_to_db(self): @@ -166,138 +169,78 @@ def main(): password=os.getenv("MONGODB_PASSWORD", ""), auth_source=os.getenv("MONGODB_AUTH_SOURCE", "") ) - + memory_graph = Memory_graph() memory_graph.load_graph_from_db() - + # 只显示一次优化后的图形 visualize_graph_lite(memory_graph) - + while True: query = input("请输入新的查询概念(输入'退出'以结束):") if query.lower() == '退出': break first_layer_items, second_layer_items = memory_graph.get_related_item(query) if first_layer_items or second_layer_items: - print("\n第一层记忆:") + logger.debug("第一层记忆:") for item in first_layer_items: - print(item) - print("\n第二层记忆:") + logger.debug(item) + logger.debug("第二层记忆:") for item in second_layer_items: - print(item) + logger.debug(item) else: - print("未找到相关记忆。") - + logger.debug("未找到相关记忆。") + def segment_text(text): seg_text = list(jieba.cut(text)) - return seg_text + return seg_text + def find_topic(text, topic_num): prompt = f'这是一段文字:{text}。请你从这段话中总结出{topic_num}个话题,帮我列出来,用逗号隔开,尽可能精简。只需要列举{topic_num}个话题就好,不要告诉我其他内容。' return prompt + def topic_what(text, topic): prompt = f'这是一段文字:{text}。我想知道这记忆里有什么关于{topic}的话题,帮我总结成一句自然的话,可以包含时间和人物。只输出这句话就好' return prompt -def visualize_graph(memory_graph: Memory_graph, color_by_memory: bool = False): - # 设置中文字体 - plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 - plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 - - G = memory_graph.G - - # 保存图到本地 - nx.write_gml(G, "memory_graph.gml") # 保存为 GML 格式 - - # 根据连接条数或记忆数量设置节点颜色 - node_colors = [] - nodes = list(G.nodes()) # 获取图中实际的节点列表 - - if color_by_memory: - # 计算每个节点的记忆数量 - memory_counts = [] - for node in nodes: - memory_items = G.nodes[node].get('memory_items', []) - if isinstance(memory_items, list): - count = len(memory_items) - else: - count = 1 if memory_items else 0 - memory_counts.append(count) - max_memories = max(memory_counts) if memory_counts else 1 - - for count in memory_counts: - # 使用不同的颜色方案:红色表示记忆多,蓝色表示记忆少 - if max_memories > 0: - intensity = min(1.0, count / max_memories) - color = (intensity, 0, 1.0 - intensity) # 从蓝色渐变到红色 - else: - color = (0, 0, 1) # 如果没有记忆,则为蓝色 - node_colors.append(color) - else: - # 使用原来的连接数量着色方案 - max_degree = max(G.degree(), key=lambda x: x[1])[1] if G.degree() else 1 - for node in nodes: - degree = G.degree(node) - if max_degree > 0: - red = min(1.0, degree / max_degree) - blue = 1.0 - red - color = (red, 0, blue) - else: - color = (0, 0, 1) - node_colors.append(color) - - # 绘制图形 - plt.figure(figsize=(12, 8)) - pos = nx.spring_layout(G, k=1, iterations=50) - nx.draw(G, pos, - with_labels=True, - node_color=node_colors, - node_size=200, - font_size=10, - font_family='SimHei', - font_weight='bold') - - title = '记忆图谱可视化 - ' + ('按记忆数量着色' if color_by_memory else '按连接数量着色') - plt.title(title, fontsize=16, fontfamily='SimHei') - plt.show() - def visualize_graph_lite(memory_graph: Memory_graph, color_by_memory: bool = False): # 设置中文字体 plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 - + G = memory_graph.G - + # 创建一个新图用于可视化 H = G.copy() - + # 移除只有一条记忆的节点和连接数少于3的节点 nodes_to_remove = [] for node in H.nodes(): memory_items = H.nodes[node].get('memory_items', []) memory_count = len(memory_items) if isinstance(memory_items, list) else (1 if memory_items else 0) degree = H.degree(node) - if memory_count < 5 or degree < 2: # 改为小于2而不是小于等于2 + if memory_count < 3 or degree < 2: # 改为小于2而不是小于等于2 nodes_to_remove.append(node) - + H.remove_nodes_from(nodes_to_remove) - + # 如果过滤后没有节点,则返回 if len(H.nodes()) == 0: - print("过滤后没有符合条件的节点可显示") + logger.debug("过滤后没有符合条件的节点可显示") return - + # 保存图到本地 - nx.write_gml(H, "memory_graph.gml") # 保存为 GML 格式 + # nx.write_gml(H, "memory_graph.gml") # 保存为 GML 格式 # 计算节点大小和颜色 node_colors = [] node_sizes = [] nodes = list(H.nodes()) - + # 获取最大记忆数和最大度数用于归一化 max_memories = 1 max_degree = 1 @@ -307,7 +250,7 @@ def visualize_graph_lite(memory_graph: Memory_graph, color_by_memory: bool = Fal degree = H.degree(node) max_memories = max(max_memories, memory_count) max_degree = max(max_degree, degree) - + # 计算每个节点的大小和颜色 for node in nodes: # 计算节点大小(基于记忆数量) @@ -315,37 +258,38 @@ def visualize_graph_lite(memory_graph: Memory_graph, color_by_memory: bool = Fal memory_count = len(memory_items) if isinstance(memory_items, list) else (1 if memory_items else 0) # 使用指数函数使变化更明显 ratio = memory_count / max_memories - size = 500 + 5000 * (ratio ** 2) # 使用平方函数使差异更明显 + size = 500 + 5000 * (ratio) # 使用1.5次方函数使差异不那么明显 node_sizes.append(size) - + # 计算节点颜色(基于连接数) degree = H.degree(node) # 红色分量随着度数增加而增加 - red = min(1.0, degree / max_degree) + r = (degree / max_degree) ** 0.3 + red = min(1.0, r) # 蓝色分量随着度数减少而增加 - blue = 1.0 - red - color = (red, 0, blue) + blue = max(0.0, 1 - red) + # blue = 1 + color = (red, 0.1, blue) node_colors.append(color) - + # 绘制图形 plt.figure(figsize=(12, 8)) - pos = nx.spring_layout(H, k=1.5, iterations=50) # 增加k值使节点分布更开 - nx.draw(H, pos, - with_labels=True, - node_color=node_colors, - node_size=node_sizes, - font_size=10, - font_family='SimHei', - font_weight='bold', - edge_color='gray', - width=0.5, - alpha=0.7) - + pos = nx.spring_layout(H, k=1, iterations=50) # 增加k值使节点分布更开 + nx.draw(H, pos, + with_labels=True, + node_color=node_colors, + node_size=node_sizes, + font_size=10, + font_family='SimHei', + font_weight='bold', + edge_color='gray', + width=0.5, + alpha=0.9) + title = '记忆图谱可视化 - 节点大小表示记忆数量,颜色表示连接数' plt.title(title, fontsize=16, fontfamily='SimHei') plt.show() - - - + + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/src/plugins/memory_system/memory.py b/src/plugins/memory_system/memory.py index f88888aa4..0730f9e57 100644 --- a/src/plugins/memory_system/memory.py +++ b/src/plugins/memory_system/memory.py @@ -7,6 +7,7 @@ import time import jieba import networkx as nx +from loguru import logger from ...common.database import Database # 使用正确的导入语法 from ..chat.config import global_config from ..chat.utils import ( @@ -22,29 +23,49 @@ class Memory_graph: def __init__(self): self.G = nx.Graph() # 使用 networkx 的图结构 self.db = Database.get_instance() - + def connect_dot(self, concept1, concept2): - # 如果边已存在,增加 strength + # 避免自连接 + if concept1 == concept2: + return + + current_time = datetime.datetime.now().timestamp() + + # 如果边已存在,增加 strength if self.G.has_edge(concept1, concept2): self.G[concept1][concept2]['strength'] = self.G[concept1][concept2].get('strength', 1) + 1 + # 更新最后修改时间 + self.G[concept1][concept2]['last_modified'] = current_time else: - # 如果是新边,初始化 strength 为 1 - self.G.add_edge(concept1, concept2, strength=1) - + # 如果是新边,初始化 strength 为 1 + self.G.add_edge(concept1, concept2, + strength=1, + created_time=current_time, # 添加创建时间 + last_modified=current_time) # 添加最后修改时间 + def add_dot(self, concept, memory): + current_time = datetime.datetime.now().timestamp() + if concept in self.G: - # 如果节点已存在,将新记忆添加到现有列表中 if 'memory_items' in self.G.nodes[concept]: if not isinstance(self.G.nodes[concept]['memory_items'], list): - # 如果当前不是列表,将其转换为列表 self.G.nodes[concept]['memory_items'] = [self.G.nodes[concept]['memory_items']] self.G.nodes[concept]['memory_items'].append(memory) + # 更新最后修改时间 + self.G.nodes[concept]['last_modified'] = current_time else: self.G.nodes[concept]['memory_items'] = [memory] + # 如果节点存在但没有memory_items,说明是第一次添加memory,设置created_time + if 'created_time' not in self.G.nodes[concept]: + self.G.nodes[concept]['created_time'] = current_time + self.G.nodes[concept]['last_modified'] = current_time else: - # 如果是新节点,创建新的记忆列表 - self.G.add_node(concept, memory_items=[memory]) - + # 如果是新节点,创建新的记忆列表 + self.G.add_node(concept, + memory_items=[memory], + created_time=current_time, # 添加创建时间 + last_modified=current_time) # 添加最后修改时间 + def get_dot(self, concept): # 检查节点是否存在于图中 if concept in self.G: @@ -56,13 +77,13 @@ class Memory_graph: def get_related_item(self, topic, depth=1): if topic not in self.G: return [], [] - + first_layer_items = [] second_layer_items = [] - + # 获取相邻节点 neighbors = list(self.G.neighbors(topic)) - + # 获取当前节点的记忆项 node_data = self.get_dot(topic) if node_data: @@ -73,7 +94,7 @@ class Memory_graph: first_layer_items.extend(memory_items) else: first_layer_items.append(memory_items) - + # 只在depth=2时获取第二层记忆 if depth >= 2: # 获取相邻节点的记忆项 @@ -87,9 +108,9 @@ class Memory_graph: second_layer_items.extend(memory_items) else: second_layer_items.append(memory_items) - + return first_layer_items, second_layer_items - + @property def dots(self): # 返回所有节点对应的 Memory_dot 对象 @@ -99,43 +120,43 @@ class Memory_graph: """随机删除指定话题中的一条记忆,如果话题没有记忆则移除该话题节点""" if topic not in self.G: return None - + # 获取话题节点数据 node_data = self.G.nodes[topic] - + # 如果节点存在memory_items if 'memory_items' in node_data: memory_items = node_data['memory_items'] - + # 确保memory_items是列表 if not isinstance(memory_items, list): memory_items = [memory_items] if memory_items else [] - + # 如果有记忆项可以删除 if memory_items: # 随机选择一个记忆项删除 removed_item = random.choice(memory_items) memory_items.remove(removed_item) - + # 更新节点的记忆项 if memory_items: self.G.nodes[topic]['memory_items'] = memory_items else: # 如果没有记忆项了,删除整个节点 self.G.remove_node(topic) - + return removed_item - + return None # 海马体 class Hippocampus: - def __init__(self,memory_graph:Memory_graph): + def __init__(self, memory_graph: Memory_graph): self.memory_graph = memory_graph - self.llm_topic_judge = LLM_request(model = global_config.llm_topic_judge,temperature=0.5) - self.llm_summary_by_topic = LLM_request(model = global_config.llm_summary_by_topic,temperature=0.5) - + self.llm_topic_judge = LLM_request(model=global_config.llm_topic_judge, temperature=0.5) + self.llm_summary_by_topic = LLM_request(model=global_config.llm_summary_by_topic, temperature=0.5) + def get_all_node_names(self) -> list: """获取记忆图中所有节点的名字列表 @@ -156,98 +177,167 @@ class Hippocampus: """计算边的特征值""" nodes = sorted([source, target]) return hash(f"{nodes[0]}:{nodes[1]}") + + def get_memory_sample(self, chat_size=20, time_frequency: dict = {'near': 2, 'mid': 4, 'far': 3}): + """获取记忆样本 - def get_memory_sample(self,chat_size=20,time_frequency:dict={'near':2,'mid':4,'far':3}): + Returns: + list: 消息记录列表,每个元素是一个消息记录字典列表 + """ current_timestamp = datetime.datetime.now().timestamp() - chat_text = [] - #短期:1h 中期:4h 长期:24h - for _ in range(time_frequency.get('near')): # 循环10次 - random_time = current_timestamp - random.randint(1, 3600) # 随机时间 - chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) - chat_text.append(chat_) - for _ in range(time_frequency.get('mid')): # 循环10次 - random_time = current_timestamp - random.randint(3600, 3600*4) # 随机时间 - chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) - chat_text.append(chat_) - for _ in range(time_frequency.get('far')): # 循环10次 - random_time = current_timestamp - random.randint(3600*4, 3600*24) # 随机时间 - chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) - chat_text.append(chat_) - return [text for text in chat_text if text] - - async def memory_compress(self, input_text, compress_rate=0.1): - print(input_text) + chat_samples = [] + + # 短期:1h 中期:4h 长期:24h + for _ in range(time_frequency.get('near')): + random_time = current_timestamp - random.randint(1, 3600) + messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) + if messages: + chat_samples.append(messages) + + for _ in range(time_frequency.get('mid')): + random_time = current_timestamp - random.randint(3600, 3600 * 4) + messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) + if messages: + chat_samples.append(messages) + + for _ in range(time_frequency.get('far')): + random_time = current_timestamp - random.randint(3600 * 4, 3600 * 24) + messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) + if messages: + chat_samples.append(messages) + + return chat_samples + + async def memory_compress(self, messages: list, compress_rate=0.1): + """压缩消息记录为记忆 - #获取topics + Returns: + tuple: (压缩记忆集合, 相似主题字典) + """ + if not messages: + return set(), {} + + # 合并消息文本,同时保留时间信息 + input_text = "" + time_info = "" + # 计算最早和最晚时间 + earliest_time = min(msg['time'] for msg in messages) + latest_time = max(msg['time'] for msg in messages) + + earliest_dt = datetime.datetime.fromtimestamp(earliest_time) + latest_dt = datetime.datetime.fromtimestamp(latest_time) + + # 如果是同一年 + if earliest_dt.year == latest_dt.year: + earliest_str = earliest_dt.strftime("%m-%d %H:%M:%S") + latest_str = latest_dt.strftime("%m-%d %H:%M:%S") + time_info += f"是在{earliest_dt.year}年,{earliest_str} 到 {latest_str} 的对话:\n" + else: + earliest_str = earliest_dt.strftime("%Y-%m-%d %H:%M:%S") + latest_str = latest_dt.strftime("%Y-%m-%d %H:%M:%S") + time_info += f"是从 {earliest_str} 到 {latest_str} 的对话:\n" + + for msg in messages: + input_text += f"{msg['text']}\n" + + logger.debug(input_text) + topic_num = self.calculate_topic_num(input_text, compress_rate) topics_response = await self.llm_topic_judge.generate_response(self.find_topic_llm(input_text, topic_num)) - # 修改话题处理逻辑 - # 定义需要过滤的关键词 - filter_keywords = ['表情包', '图片', '回复', '聊天记录'] - + # 过滤topics - topics = [topic.strip() for topic in topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()] + filter_keywords = global_config.memory_ban_words + topics = [topic.strip() for topic in + topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()] filtered_topics = [topic for topic in topics if not any(keyword in topic for keyword in filter_keywords)] - - # print(f"原始话题: {topics}") - print(f"过滤后话题: {filtered_topics}") - - # 使用过滤后的话题继续处理 + + logger.info(f"过滤后话题: {filtered_topics}") + + # 创建所有话题的请求任务 tasks = [] for topic in filtered_topics: - topic_what_prompt = self.topic_what(input_text, topic) - # 创建异步任务 + topic_what_prompt = self.topic_what(input_text, topic, time_info) task = self.llm_summary_by_topic.generate_response_async(topic_what_prompt) tasks.append((topic.strip(), task)) - + # 等待所有任务完成 compressed_memory = set() + similar_topics_dict = {} # 存储每个话题的相似主题列表 for topic, task in tasks: response = await task if response: compressed_memory.add((topic, response[0])) + # 为每个话题查找相似的已存在主题 + existing_topics = list(self.memory_graph.G.nodes()) + similar_topics = [] - return compressed_memory + for existing_topic in existing_topics: + topic_words = set(jieba.cut(topic)) + existing_words = set(jieba.cut(existing_topic)) + + all_words = topic_words | existing_words + v1 = [1 if word in topic_words else 0 for word in all_words] + v2 = [1 if word in existing_words else 0 for word in all_words] + + similarity = cosine_similarity(v1, v2) + + if similarity >= 0.6: + similar_topics.append((existing_topic, similarity)) + + similar_topics.sort(key=lambda x: x[1], reverse=True) + similar_topics = similar_topics[:5] + similar_topics_dict[topic] = similar_topics - def calculate_topic_num(self,text, compress_rate): + return compressed_memory, similar_topics_dict + + def calculate_topic_num(self, text, compress_rate): """计算文本的话题数量""" information_content = calculate_information_content(text) - topic_by_length = text.count('\n')*compress_rate - topic_by_information_content = max(1, min(5, int((information_content-3) * 2))) - topic_num = int((topic_by_length + topic_by_information_content)/2) - print(f"topic_by_length: {topic_by_length}, topic_by_information_content: {topic_by_information_content}, topic_num: {topic_num}") + topic_by_length = text.count('\n') * compress_rate + topic_by_information_content = max(1, min(5, int((information_content - 3) * 2))) + topic_num = int((topic_by_length + topic_by_information_content) / 2) + logger.debug( + f"topic_by_length: {topic_by_length}, topic_by_information_content: {topic_by_information_content}, " + f"topic_num: {topic_num}") return topic_num - async def operation_build_memory(self,chat_size=20): - # 最近消息获取频率 - time_frequency = {'near':2,'mid':4,'far':2} - memory_sample = self.get_memory_sample(chat_size,time_frequency) + async def operation_build_memory(self, chat_size=20): + time_frequency = {'near': 3, 'mid': 8, 'far': 5} + memory_samples = self.get_memory_sample(chat_size, time_frequency) - for i, input_text in enumerate(memory_sample, 1): - # 加载进度可视化 + for i, messages in enumerate(memory_samples, 1): all_topics = [] - progress = (i / len(memory_sample)) * 100 + # 加载进度可视化 + progress = (i / len(memory_samples)) * 100 bar_length = 30 - filled_length = int(bar_length * i // len(memory_sample)) + filled_length = int(bar_length * i // len(memory_samples)) bar = '█' * filled_length + '-' * (bar_length - filled_length) - print(f"\n进度: [{bar}] {progress:.1f}% ({i}/{len(memory_sample)})") + logger.debug(f"进度: [{bar}] {progress:.1f}% ({i}/{len(memory_samples)})") - # 生成压缩后记忆 ,表现为 (话题,记忆) 的元组 - compressed_memory = set() compress_rate = 0.1 - compressed_memory = await self.memory_compress(input_text, compress_rate) - print(f"\033[1;33m压缩后记忆数量\033[0m: {len(compressed_memory)}") + compressed_memory, similar_topics_dict = await self.memory_compress(messages, compress_rate) + logger.info(f"压缩后记忆数量: {len(compressed_memory)},似曾相识的话题: {len(similar_topics_dict)}") - # 将记忆加入到图谱中 for topic, memory in compressed_memory: - print(f"\033[1;32m添加节点\033[0m: {topic}") + logger.info(f"添加节点: {topic}") self.memory_graph.add_dot(topic, memory) - all_topics.append(topic) # 收集所有话题 + all_topics.append(topic) + + # 连接相似的已存在主题 + if topic in similar_topics_dict: + similar_topics = similar_topics_dict[topic] + for similar_topic, similarity in similar_topics: + if topic != similar_topic: + strength = int(similarity * 10) + logger.info(f"连接相似节点: {topic} 和 {similar_topic} (强度: {strength})") + self.memory_graph.G.add_edge(topic, similar_topic, strength=strength) + + # 连接同批次的相关话题 for i in range(len(all_topics)): for j in range(i + 1, len(all_topics)): - print(f"\033[1;32m连接节点\033[0m: {all_topics[i]} 和 {all_topics[j]}") + logger.info(f"连接同批次节点: {all_topics[i]} 和 {all_topics[j]}") self.memory_graph.connect_dot(all_topics[i], all_topics[j]) - + self.sync_memory_to_db() def sync_memory_to_db(self): @@ -255,52 +345,54 @@ class Hippocampus: # 获取数据库中所有节点和内存中所有节点 db_nodes = list(self.memory_graph.db.db.graph_data.nodes.find()) memory_nodes = list(self.memory_graph.G.nodes(data=True)) - - # 转换数据库节点为字典格式,方便查找 + + # 转换数据库节点为字典格式,方便查找 db_nodes_dict = {node['concept']: node for node in db_nodes} - + # 检查并更新节点 for concept, data in memory_nodes: memory_items = data.get('memory_items', []) if not isinstance(memory_items, list): memory_items = [memory_items] if memory_items else [] - + # 计算内存中节点的特征值 memory_hash = self.calculate_node_hash(concept, memory_items) - + + # 获取时间信息 + created_time = data.get('created_time', datetime.datetime.now().timestamp()) + last_modified = data.get('last_modified', datetime.datetime.now().timestamp()) + if concept not in db_nodes_dict: - # 数据库中缺少的节点,添加 + # 数据库中缺少的节点,添加 node_data = { 'concept': concept, 'memory_items': memory_items, - 'hash': memory_hash + 'hash': memory_hash, + 'created_time': created_time, + 'last_modified': last_modified } self.memory_graph.db.db.graph_data.nodes.insert_one(node_data) else: # 获取数据库中节点的特征值 db_node = db_nodes_dict[concept] db_hash = db_node.get('hash', None) - - # 如果特征值不同,则更新节点 + + # 如果特征值不同,则更新节点 if db_hash != memory_hash: self.memory_graph.db.db.graph_data.nodes.update_one( {'concept': concept}, {'$set': { 'memory_items': memory_items, - 'hash': memory_hash + 'hash': memory_hash, + 'created_time': created_time, + 'last_modified': last_modified }} ) - - # 检查并删除数据库中多余的节点 - memory_concepts = set(node[0] for node in memory_nodes) - for db_node in db_nodes: - if db_node['concept'] not in memory_concepts: - self.memory_graph.db.db.graph_data.nodes.delete_one({'concept': db_node['concept']}) - + # 处理边的信息 db_edges = list(self.memory_graph.db.db.graph_data.edges.find()) - memory_edges = list(self.memory_graph.G.edges()) - + memory_edges = list(self.memory_graph.G.edges(data=True)) + # 创建边的哈希值字典 db_edge_dict = {} for edge in db_edges: @@ -309,20 +401,26 @@ class Hippocampus: 'hash': edge_hash, 'strength': edge.get('strength', 1) } - + # 检查并更新边 - for source, target in memory_edges: + for source, target, data in memory_edges: edge_hash = self.calculate_edge_hash(source, target) edge_key = (source, target) - strength = self.memory_graph.G[source][target].get('strength', 1) + strength = data.get('strength', 1) + # 获取边的时间信息 + created_time = data.get('created_time', datetime.datetime.now().timestamp()) + last_modified = data.get('last_modified', datetime.datetime.now().timestamp()) + if edge_key not in db_edge_dict: # 添加新边 edge_data = { 'source': source, 'target': target, 'strength': strength, - 'hash': edge_hash + 'hash': edge_hash, + 'created_time': created_time, + 'last_modified': last_modified } self.memory_graph.db.db.graph_data.edges.insert_one(edge_data) else: @@ -332,25 +430,17 @@ class Hippocampus: {'source': source, 'target': target}, {'$set': { 'hash': edge_hash, - 'strength': strength + 'strength': strength, + 'created_time': created_time, + 'last_modified': last_modified }} ) - - # 删除多余的边 - memory_edge_set = set(memory_edges) - for edge_key in db_edge_dict: - if edge_key not in memory_edge_set: - source, target = edge_key - self.memory_graph.db.db.graph_data.edges.delete_one({ - 'source': source, - 'target': target - }) def sync_memory_from_db(self): """从数据库同步数据到内存中的图结构""" # 清空当前图 self.memory_graph.G.clear() - + # 从数据库加载所有节点 nodes = self.memory_graph.db.db.graph_data.nodes.find() for node in nodes: @@ -359,61 +449,107 @@ class Hippocampus: # 确保memory_items是列表 if not isinstance(memory_items, list): memory_items = [memory_items] if memory_items else [] - # 添加节点到图中 - self.memory_graph.G.add_node(concept, memory_items=memory_items) + # 获取时间信息 + created_time = node.get('created_time', datetime.datetime.now().timestamp()) + last_modified = node.get('last_modified', datetime.datetime.now().timestamp()) + + # 添加节点到图中 + self.memory_graph.G.add_node(concept, + memory_items=memory_items, + created_time=created_time, + last_modified=last_modified) + # 从数据库加载所有边 edges = self.memory_graph.db.db.graph_data.edges.find() for edge in edges: source = edge['source'] target = edge['target'] - strength = edge.get('strength', 1) # 获取 strength,默认为 1 + strength = edge.get('strength', 1) # 获取 strength,默认为 1 + + # 获取时间信息 + created_time = edge.get('created_time', datetime.datetime.now().timestamp()) + last_modified = edge.get('last_modified', datetime.datetime.now().timestamp()) + # 只有当源节点和目标节点都存在时才添加边 if source in self.memory_graph.G and target in self.memory_graph.G: - self.memory_graph.G.add_edge(source, target, strength=strength) - + self.memory_graph.G.add_edge(source, target, + strength=strength, + created_time=created_time, + last_modified=last_modified) + async def operation_forget_topic(self, percentage=0.1): - """随机选择图中一定比例的节点进行检查,根据条件决定是否遗忘""" - # 获取所有节点 + """随机选择图中一定比例的节点和边进行检查,根据时间条件决定是否遗忘""" all_nodes = list(self.memory_graph.G.nodes()) - # 计算要检查的节点数量 - check_count = max(1, int(len(all_nodes) * percentage)) - # 随机选择节点 - nodes_to_check = random.sample(all_nodes, check_count) + all_edges = list(self.memory_graph.G.edges()) - forgotten_nodes = [] + check_nodes_count = max(1, int(len(all_nodes) * percentage)) + check_edges_count = max(1, int(len(all_edges) * percentage)) + + nodes_to_check = random.sample(all_nodes, check_nodes_count) + edges_to_check = random.sample(all_edges, check_edges_count) + + edge_changes = {'weakened': 0, 'removed': 0} + node_changes = {'reduced': 0, 'removed': 0} + + current_time = datetime.datetime.now().timestamp() + + # 检查并遗忘连接 + logger.info("开始检查连接...") + for source, target in edges_to_check: + edge_data = self.memory_graph.G[source][target] + last_modified = edge_data.get('last_modified') + # print(source,target) + # print(f"float(last_modified):{float(last_modified)}" ) + # print(f"current_time:{current_time}") + # print(f"current_time - last_modified:{current_time - last_modified}") + if current_time - last_modified > 3600*24: # test + current_strength = edge_data.get('strength', 1) + new_strength = current_strength - 1 + + if new_strength <= 0: + self.memory_graph.G.remove_edge(source, target) + edge_changes['removed'] += 1 + logger.info(f"\033[1;31m[连接移除]\033[0m {source} - {target}") + else: + edge_data['strength'] = new_strength + edge_data['last_modified'] = current_time + edge_changes['weakened'] += 1 + logger.info(f"\033[1;34m[连接减弱]\033[0m {source} - {target} (强度: {current_strength} -> {new_strength})") + + # 检查并遗忘话题 + logger.info("开始检查节点...") for node in nodes_to_check: - # 获取节点的连接数 - connections = self.memory_graph.G.degree(node) + node_data = self.memory_graph.G.nodes[node] + last_modified = node_data.get('last_modified', current_time) - # 获取节点的内容条数 - memory_items = self.memory_graph.G.nodes[node].get('memory_items', []) - if not isinstance(memory_items, list): - memory_items = [memory_items] if memory_items else [] - content_count = len(memory_items) - - # 检查连接强度 - weak_connections = True - if connections > 1: # 只有当连接数大于1时才检查强度 - for neighbor in self.memory_graph.G.neighbors(node): - strength = self.memory_graph.G[node][neighbor].get('strength', 1) - if strength > 2: - weak_connections = False - break - - # 如果满足遗忘条件 - if (connections <= 1 and weak_connections) or content_count <= 2: - removed_item = self.memory_graph.forget_topic(node) - if removed_item: - forgotten_nodes.append((node, removed_item)) - print(f"遗忘节点 {node} 的记忆: {removed_item}") + if current_time - last_modified > 3600*24: # test + memory_items = node_data.get('memory_items', []) + if not isinstance(memory_items, list): + memory_items = [memory_items] if memory_items else [] + + if memory_items: + current_count = len(memory_items) + removed_item = random.choice(memory_items) + memory_items.remove(removed_item) + + if memory_items: + self.memory_graph.G.nodes[node]['memory_items'] = memory_items + self.memory_graph.G.nodes[node]['last_modified'] = current_time + node_changes['reduced'] += 1 + logger.info(f"\033[1;33m[记忆减少]\033[0m {node} (记忆数量: {current_count} -> {len(memory_items)})") + else: + self.memory_graph.G.remove_node(node) + node_changes['removed'] += 1 + logger.info(f"\033[1;31m[节点移除]\033[0m {node}") - # 同步到数据库 - if forgotten_nodes: + if any(count > 0 for count in edge_changes.values()) or any(count > 0 for count in node_changes.values()): self.sync_memory_to_db() - print(f"完成遗忘操作,共遗忘 {len(forgotten_nodes)} 个节点的记忆") + logger.info("\n遗忘操作统计:") + logger.info(f"连接变化: {edge_changes['weakened']} 个减弱, {edge_changes['removed']} 个移除") + logger.info(f"节点变化: {node_changes['reduced']} 个减少记忆, {node_changes['removed']} 个移除") else: - print("本次检查没有节点满足遗忘条件") + logger.info("\n本次检查没有节点或连接满足遗忘条件") async def merge_memory(self, topic): """ @@ -426,35 +562,35 @@ class Hippocampus: memory_items = self.memory_graph.G.nodes[topic].get('memory_items', []) if not isinstance(memory_items, list): memory_items = [memory_items] if memory_items else [] - + # 如果记忆项不足,直接返回 if len(memory_items) < 10: return - + # 随机选择10条记忆 selected_memories = random.sample(memory_items, 10) - + # 拼接成文本 merged_text = "\n".join(selected_memories) - print(f"\n[合并记忆] 话题: {topic}") - print(f"选择的记忆:\n{merged_text}") - + logger.debug(f"\n[合并记忆] 话题: {topic}") + logger.debug(f"选择的记忆:\n{merged_text}") + # 使用memory_compress生成新的压缩记忆 - compressed_memories = await self.memory_compress(merged_text, 0.1) - + compressed_memories, _ = await self.memory_compress(selected_memories, 0.1) + # 从原记忆列表中移除被选中的记忆 for memory in selected_memories: memory_items.remove(memory) - + # 添加新的压缩记忆 for _, compressed_memory in compressed_memories: memory_items.append(compressed_memory) - print(f"添加压缩记忆: {compressed_memory}") - + logger.info(f"添加压缩记忆: {compressed_memory}") + # 更新节点的记忆项 self.memory_graph.G.nodes[topic]['memory_items'] = memory_items - print(f"完成记忆合并,当前记忆数量: {len(memory_items)}") - + logger.debug(f"完成记忆合并,当前记忆数量: {len(memory_items)}") + async def operation_merge_memory(self, percentage=0.1): """ 随机检查一定比例的节点,对内容数量超过100的节点进行记忆合并 @@ -468,7 +604,7 @@ class Hippocampus: check_count = max(1, int(len(all_nodes) * percentage)) # 随机选择节点 nodes_to_check = random.sample(all_nodes, check_count) - + merged_nodes = [] for node in nodes_to_check: # 获取节点的内容条数 @@ -476,26 +612,26 @@ class Hippocampus: if not isinstance(memory_items, list): memory_items = [memory_items] if memory_items else [] content_count = len(memory_items) - + # 如果内容数量超过100,进行合并 if content_count > 100: - print(f"\n检查节点: {node}, 当前记忆数量: {content_count}") + logger.debug(f"检查节点: {node}, 当前记忆数量: {content_count}") await self.merge_memory(node) merged_nodes.append(node) - + # 同步到数据库 if merged_nodes: self.sync_memory_to_db() - print(f"\n完成记忆合并操作,共处理 {len(merged_nodes)} 个节点") + logger.debug(f"完成记忆合并操作,共处理 {len(merged_nodes)} 个节点") else: - print("\n本次检查没有需要合并的节点") + logger.debug("本次检查没有需要合并的节点") - def find_topic_llm(self,text, topic_num): + def find_topic_llm(self, text, topic_num): prompt = f'这是一段文字:{text}。请你从这段话中总结出{topic_num}个关键的概念,可以是名词,动词,或者特定人物,帮我列出来,用逗号,隔开,尽可能精简。只需要列举{topic_num}个话题就好,不要有序号,不要告诉我其他内容。' return prompt - def topic_what(self,text, topic): - prompt = f'这是一段文字:{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好' + def topic_what(self, text, topic, time_info): + prompt = f'这是一段文字,{time_info}:{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好' return prompt async def _identify_topics(self, text: str) -> list: @@ -509,11 +645,12 @@ class Hippocampus: """ topics_response = await self.llm_topic_judge.generate_response(self.find_topic_llm(text, 5)) # print(f"话题: {topics_response[0]}") - topics = [topic.strip() for topic in topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()] + topics = [topic.strip() for topic in + topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()] # print(f"话题: {topics}") - + return topics - + def _find_similar_topics(self, topics: list, similarity_threshold: float = 0.4, debug_info: str = "") -> list: """查找与给定主题相似的记忆主题 @@ -527,16 +664,16 @@ class Hippocampus: """ all_memory_topics = self.get_all_node_names() all_similar_topics = [] - + # 计算每个识别出的主题与记忆主题的相似度 for topic in topics: if debug_info: # print(f"\033[1;32m[{debug_info}]\033[0m 正在思考有没有见过: {topic}") pass - + topic_vector = text_to_vector(topic) has_similar_topic = False - + for memory_topic in all_memory_topics: memory_vector = text_to_vector(memory_topic) # 获取所有唯一词 @@ -546,20 +683,20 @@ class Hippocampus: v2 = [memory_vector.get(word, 0) for word in all_words] # 计算相似度 similarity = cosine_similarity(v1, v2) - + if similarity >= similarity_threshold: has_similar_topic = True if debug_info: # print(f"\033[1;32m[{debug_info}]\033[0m 找到相似主题: {topic} -> {memory_topic} (相似度: {similarity:.2f})") pass all_similar_topics.append((memory_topic, similarity)) - + if not has_similar_topic and debug_info: # print(f"\033[1;31m[{debug_info}]\033[0m 没有见过: {topic} ,呃呃") pass - + return all_similar_topics - + def _get_top_topics(self, similar_topics: list, max_topics: int = 5) -> list: """获取相似度最高的主题 @@ -572,36 +709,36 @@ class Hippocampus: """ seen_topics = set() top_topics = [] - + for topic, score in sorted(similar_topics, key=lambda x: x[1], reverse=True): if topic not in seen_topics and len(top_topics) < max_topics: seen_topics.add(topic) top_topics.append((topic, score)) - + return top_topics async def memory_activate_value(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.3) -> int: """计算输入文本对记忆的激活程度""" - print(f"\033[1;32m[记忆激活]\033[0m 识别主题: {await self._identify_topics(text)}") - + logger.info(f"识别主题: {await self._identify_topics(text)}") + # 识别主题 identified_topics = await self._identify_topics(text) if not identified_topics: return 0 - + # 查找相似主题 all_similar_topics = self._find_similar_topics( - identified_topics, + identified_topics, similarity_threshold=similarity_threshold, debug_info="记忆激活" ) - + if not all_similar_topics: return 0 - + # 获取最相关的主题 top_topics = self._get_top_topics(all_similar_topics, max_topics) - + # 如果只找到一个主题,进行惩罚 if len(top_topics) == 1: topic, score = top_topics[0] @@ -611,15 +748,16 @@ class Hippocampus: memory_items = [memory_items] if memory_items else [] content_count = len(memory_items) penalty = 1.0 / (1 + math.log(content_count + 1)) - + activation = int(score * 50 * penalty) - print(f"\033[1;32m[记忆激活]\033[0m 单主题「{topic}」- 相似度: {score:.3f}, 内容数: {content_count}, 激活值: {activation}") + logger.info( + f"[记忆激活]单主题「{topic}」- 相似度: {score:.3f}, 内容数: {content_count}, 激活值: {activation}") return activation - + # 计算关键词匹配率,同时考虑内容数量 matched_topics = set() topic_similarities = {} - + for memory_topic, similarity in top_topics: # 计算内容数量惩罚 memory_items = self.memory_graph.G.nodes[memory_topic].get('memory_items', []) @@ -627,7 +765,7 @@ class Hippocampus: memory_items = [memory_items] if memory_items else [] content_count = len(memory_items) penalty = 1.0 / (1 + math.log(content_count + 1)) - + # 对每个记忆主题,检查它与哪些输入主题相似 for input_topic in identified_topics: topic_vector = text_to_vector(input_topic) @@ -640,33 +778,36 @@ class Hippocampus: matched_topics.add(input_topic) adjusted_sim = sim * penalty topic_similarities[input_topic] = max(topic_similarities.get(input_topic, 0), adjusted_sim) - print(f"\033[1;32m[记忆激活]\033[0m 主题「{input_topic}」-> 「{memory_topic}」(内容数: {content_count}, 相似度: {adjusted_sim:.3f})") - + logger.info( + f"[记忆激活]主题「{input_topic}」-> 「{memory_topic}」(内容数: {content_count}, 相似度: {adjusted_sim:.3f})") + # 计算主题匹配率和平均相似度 topic_match = len(matched_topics) / len(identified_topics) average_similarities = sum(topic_similarities.values()) / len(topic_similarities) if topic_similarities else 0 - + # 计算最终激活值 activation = int((topic_match + average_similarities) / 2 * 100) - print(f"\033[1;32m[记忆激活]\033[0m 匹配率: {topic_match:.3f}, 平均相似度: {average_similarities:.3f}, 激活值: {activation}") - + logger.info( + f"[记忆激活]匹配率: {topic_match:.3f}, 平均相似度: {average_similarities:.3f}, 激活值: {activation}") + return activation - async def get_relevant_memories(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.4, max_memory_num: int = 5) -> list: + async def get_relevant_memories(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.4, + max_memory_num: int = 5) -> list: """根据输入文本获取相关的记忆内容""" # 识别主题 identified_topics = await self._identify_topics(text) - + # 查找相似主题 all_similar_topics = self._find_similar_topics( - identified_topics, + identified_topics, similarity_threshold=similarity_threshold, debug_info="记忆检索" ) - + # 获取最相关的主题 relevant_topics = self._get_top_topics(all_similar_topics, max_topics) - + # 获取相关记忆内容 relevant_memories = [] for topic, score in relevant_topics: @@ -674,8 +815,8 @@ class Hippocampus: first_layer, _ = self.memory_graph.get_related_item(topic, depth=1) if first_layer: # 如果记忆条数超过限制,随机选择指定数量的记忆 - if len(first_layer) > max_memory_num/2: - first_layer = random.sample(first_layer, max_memory_num//2) + if len(first_layer) > max_memory_num / 2: + first_layer = random.sample(first_layer, max_memory_num // 2) # 为每条记忆添加来源主题和相似度信息 for memory in first_layer: relevant_memories.append({ @@ -683,20 +824,20 @@ class Hippocampus: 'similarity': score, 'content': memory }) - + # 如果记忆数量超过5个,随机选择5个 # 按相似度排序 relevant_memories.sort(key=lambda x: x['similarity'], reverse=True) - + if len(relevant_memories) > max_memory_num: relevant_memories = random.sample(relevant_memories, max_memory_num) - + return relevant_memories def segment_text(text): seg_text = list(jieba.cut(text)) - return seg_text + return seg_text from nonebot import get_driver @@ -707,19 +848,19 @@ config = driver.config start_time = time.time() Database.initialize( - host= config.MONGODB_HOST, - port= config.MONGODB_PORT, - db_name= config.DATABASE_NAME, - username= config.MONGODB_USERNAME, - password= config.MONGODB_PASSWORD, + host=config.MONGODB_HOST, + port=config.MONGODB_PORT, + db_name=config.DATABASE_NAME, + username=config.MONGODB_USERNAME, + password=config.MONGODB_PASSWORD, auth_source=config.MONGODB_AUTH_SOURCE ) -#创建记忆图 +# 创建记忆图 memory_graph = Memory_graph() -#创建海马体 +# 创建海马体 hippocampus = Hippocampus(memory_graph) -#从数据库加载记忆图 +# 从数据库加载记忆图 hippocampus.sync_memory_from_db() end_time = time.time() -print(f"\033[32m[加载海马体耗时: {end_time - start_time:.2f} 秒]\033[0m") \ No newline at end of file +logger.success(f"加载海马体耗时: {end_time - start_time:.2f} 秒") diff --git a/src/plugins/memory_system/memory_manual_build.py b/src/plugins/memory_system/memory_manual_build.py index 3124bc8e4..3c120f21b 100644 --- a/src/plugins/memory_system/memory_manual_build.py +++ b/src/plugins/memory_system/memory_manual_build.py @@ -13,6 +13,7 @@ import networkx as nx import pymongo from dotenv import load_dotenv from loguru import logger +import jieba # from chat.config import global_config sys.path.append("C:/GitHub/MaiMBot") # 添加项目根目录到 Python 路径 @@ -86,23 +87,26 @@ def calculate_information_content(text): return entropy def get_cloest_chat_from_db(db, length: int, timestamp: str): - """从数据库中获取最接近指定时间戳的聊天记录,并记录读取次数""" - chat_text = '' + """从数据库中获取最接近指定时间戳的聊天记录,并记录读取次数 + + Returns: + list: 消息记录字典列表,每个字典包含消息内容和时间信息 + """ + chat_records = [] closest_record = db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)]) if closest_record and closest_record.get('memorized', 0) < 4: closest_time = closest_record['time'] - group_id = closest_record['group_id'] # 获取groupid + group_id = closest_record['group_id'] # 获取该时间戳之后的length条消息,且groupid相同 - chat_records = list(db.db.messages.find( + records = list(db.db.messages.find( {"time": {"$gt": closest_time}, "group_id": group_id} ).sort('time', 1).limit(length)) # 更新每条消息的memorized属性 - for record in chat_records: - # 检查当前记录的memorized值 + for record in records: current_memorized = record.get('memorized', 0) - if current_memorized > 3: + if current_memorized > 3: print("消息已读取3次,跳过") return '' @@ -112,11 +116,14 @@ def get_cloest_chat_from_db(db, length: int, timestamp: str): {"$set": {"memorized": current_memorized + 1}} ) - chat_text += record["detailed_plain_text"] + # 添加到记录列表中 + chat_records.append({ + 'text': record["detailed_plain_text"], + 'time': record["time"], + 'group_id': record["group_id"] + }) - return chat_text - print("消息已读取3次,跳过") - return '' + return chat_records class Memory_graph: def __init__(self): @@ -205,22 +212,34 @@ class Hippocampus: self.llm_model_summary = LLMModel(model_name="Qwen/Qwen2.5-32B-Instruct") def get_memory_sample(self, chat_size=20, time_frequency:dict={'near':2,'mid':4,'far':3}): + """获取记忆样本 + + Returns: + list: 消息记录列表,每个元素是一个消息记录字典列表 + """ current_timestamp = datetime.datetime.now().timestamp() - chat_text = [] - #短期:1h 中期:4h 长期:24h - for _ in range(time_frequency.get('near')): # 循环10次 - random_time = current_timestamp - random.randint(1, 3600*4) # 随机时间 - chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) - chat_text.append(chat_) - for _ in range(time_frequency.get('mid')): # 循环10次 - random_time = current_timestamp - random.randint(3600*4, 3600*24) # 随机时间 - chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) - chat_text.append(chat_) - for _ in range(time_frequency.get('far')): # 循环10次 - random_time = current_timestamp - random.randint(3600*24, 3600*24*7) # 随机时间 - chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) - chat_text.append(chat_) - return [chat for chat in chat_text if chat] + chat_samples = [] + + # 短期:1h 中期:4h 长期:24h + for _ in range(time_frequency.get('near')): + random_time = current_timestamp - random.randint(1, 3600*4) + messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) + if messages: + chat_samples.append(messages) + + for _ in range(time_frequency.get('mid')): + random_time = current_timestamp - random.randint(3600*4, 3600*24) + messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) + if messages: + chat_samples.append(messages) + + for _ in range(time_frequency.get('far')): + random_time = current_timestamp - random.randint(3600*24, 3600*24*7) + messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) + if messages: + chat_samples.append(messages) + + return chat_samples def calculate_topic_num(self,text, compress_rate): """计算文本的话题数量""" @@ -231,16 +250,49 @@ class Hippocampus: print(f"topic_by_length: {topic_by_length}, topic_by_information_content: {topic_by_information_content}, topic_num: {topic_num}") return topic_num - async def memory_compress(self, input_text, compress_rate=0.1): + async def memory_compress(self, messages: list, compress_rate=0.1): + """压缩消息记录为记忆 + + Args: + messages: 消息记录字典列表,每个字典包含text和time字段 + compress_rate: 压缩率 + + Returns: + set: (话题, 记忆) 元组集合 + """ + if not messages: + return set() + + # 合并消息文本,同时保留时间信息 + input_text = "" + time_info = "" + # 计算最早和最晚时间 + earliest_time = min(msg['time'] for msg in messages) + latest_time = max(msg['time'] for msg in messages) + + earliest_dt = datetime.datetime.fromtimestamp(earliest_time) + latest_dt = datetime.datetime.fromtimestamp(latest_time) + + # 如果是同一年 + if earliest_dt.year == latest_dt.year: + earliest_str = earliest_dt.strftime("%m-%d %H:%M:%S") + latest_str = latest_dt.strftime("%m-%d %H:%M:%S") + time_info += f"是在{earliest_dt.year}年,{earliest_str} 到 {latest_str} 的对话:\n" + else: + earliest_str = earliest_dt.strftime("%Y-%m-%d %H:%M:%S") + latest_str = latest_dt.strftime("%Y-%m-%d %H:%M:%S") + time_info += f"是从 {earliest_str} 到 {latest_str} 的对话:\n" + + for msg in messages: + input_text += f"{msg['text']}\n" + print(input_text) topic_num = self.calculate_topic_num(input_text, compress_rate) topics_response = self.llm_model_get_topic.generate_response(self.find_topic_llm(input_text, topic_num)) - # 修改话题处理逻辑 - # 定义需要过滤的关键词 - filter_keywords = ['表情包', '图片', '回复', '聊天记录'] # 过滤topics + filter_keywords = ['表情包', '图片', '回复', '聊天记录'] topics = [topic.strip() for topic in topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()] filtered_topics = [topic for topic in topics if not any(keyword in topic for keyword in filter_keywords)] @@ -250,7 +302,7 @@ class Hippocampus: # 创建所有话题的请求任务 tasks = [] for topic in filtered_topics: - topic_what_prompt = self.topic_what(input_text, topic) + topic_what_prompt = self.topic_what(input_text, topic , time_info) # 创建异步任务 task = self.llm_model_small.generate_response_async(topic_what_prompt) tasks.append((topic.strip(), task)) @@ -267,37 +319,35 @@ class Hippocampus: async def operation_build_memory(self, chat_size=12): # 最近消息获取频率 time_frequency = {'near': 3, 'mid': 8, 'far': 5} - memory_sample = self.get_memory_sample(chat_size, time_frequency) + memory_samples = self.get_memory_sample(chat_size, time_frequency) all_topics = [] # 用于存储所有话题 - for i, input_text in enumerate(memory_sample, 1): + for i, messages in enumerate(memory_samples, 1): # 加载进度可视化 all_topics = [] - progress = (i / len(memory_sample)) * 100 + progress = (i / len(memory_samples)) * 100 bar_length = 30 - filled_length = int(bar_length * i // len(memory_sample)) + filled_length = int(bar_length * i // len(memory_samples)) bar = '█' * filled_length + '-' * (bar_length - filled_length) - print(f"\n进度: [{bar}] {progress:.1f}% ({i}/{len(memory_sample)})") + print(f"\n进度: [{bar}] {progress:.1f}% ({i}/{len(memory_samples)})") - # 生成压缩后记忆 ,表现为 (话题,记忆) 的元组 - compressed_memory = set() + # 生成压缩后记忆 compress_rate = 0.1 - compressed_memory = await self.memory_compress(input_text, compress_rate) + compressed_memory = await self.memory_compress(messages, compress_rate) print(f"\033[1;33m压缩后记忆数量\033[0m: {len(compressed_memory)}") # 将记忆加入到图谱中 for topic, memory in compressed_memory: print(f"\033[1;32m添加节点\033[0m: {topic}") self.memory_graph.add_dot(topic, memory) - all_topics.append(topic) # 收集所有话题 + all_topics.append(topic) + + # 连接相关话题 for i in range(len(all_topics)): for j in range(i + 1, len(all_topics)): print(f"\033[1;32m连接节点\033[0m: {all_topics[i]} 和 {all_topics[j]}") self.memory_graph.connect_dot(all_topics[i], all_topics[j]) - - - self.sync_memory_to_db() @@ -375,7 +425,7 @@ class Hippocampus: if concept not in db_nodes_dict: # 数据库中缺少的节点,添加 - logger.info(f"添加新节点: {concept}") + # logger.info(f"添加新节点: {concept}") node_data = { 'concept': concept, 'memory_items': memory_items, @@ -389,7 +439,7 @@ class Hippocampus: # 如果特征值不同,则更新节点 if db_hash != memory_hash: - logger.info(f"更新节点内容: {concept}") + # logger.info(f"更新节点内容: {concept}") self.memory_graph.db.db.graph_data.nodes.update_one( {'concept': concept}, {'$set': { @@ -402,7 +452,7 @@ class Hippocampus: memory_concepts = set(node[0] for node in memory_nodes) for db_node in db_nodes: if db_node['concept'] not in memory_concepts: - logger.info(f"删除多余节点: {db_node['concept']}") + # logger.info(f"删除多余节点: {db_node['concept']}") self.memory_graph.db.db.graph_data.nodes.delete_one({'concept': db_node['concept']}) # 处理边的信息 @@ -460,9 +510,10 @@ class Hippocampus: prompt = f'这是一段文字:{text}。请你从这段话中总结出{topic_num}个关键的概念,可以是名词,动词,或者特定人物,帮我列出来,用逗号,隔开,尽可能精简。只需要列举{topic_num}个话题就好,不要有序号,不要告诉我其他内容。' return prompt - def topic_what(self,text, topic): + def topic_what(self,text, topic, time_info): # prompt = f'这是一段文字:{text}。我想知道这段文字里有什么关于{topic}的话题,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好' - prompt = f'这是一段文字:{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好' + # 获取当前时间 + prompt = f'这是一段文字,{time_info}:{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好' return prompt def remove_node_from_db(self, topic): @@ -597,7 +648,7 @@ class Hippocampus: print(f"选择的记忆:\n{merged_text}") # 使用memory_compress生成新的压缩记忆 - compressed_memories = await self.memory_compress(merged_text, 0.1) + compressed_memories = await self.memory_compress(selected_memories, 0.1) # 从原记忆列表中移除被选中的记忆 for memory in selected_memories: @@ -647,6 +698,164 @@ class Hippocampus: else: print("\n本次检查没有需要合并的节点") + async def _identify_topics(self, text: str) -> list: + """从文本中识别可能的主题""" + topics_response = self.llm_model_get_topic.generate_response(self.find_topic_llm(text, 5)) + topics = [topic.strip() for topic in topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()] + return topics + + def _find_similar_topics(self, topics: list, similarity_threshold: float = 0.4, debug_info: str = "") -> list: + """查找与给定主题相似的记忆主题""" + all_memory_topics = list(self.memory_graph.G.nodes()) + all_similar_topics = [] + + for topic in topics: + if debug_info: + pass + + topic_vector = text_to_vector(topic) + has_similar_topic = False + + for memory_topic in all_memory_topics: + memory_vector = text_to_vector(memory_topic) + all_words = set(topic_vector.keys()) | set(memory_vector.keys()) + v1 = [topic_vector.get(word, 0) for word in all_words] + v2 = [memory_vector.get(word, 0) for word in all_words] + similarity = cosine_similarity(v1, v2) + + if similarity >= similarity_threshold: + has_similar_topic = True + all_similar_topics.append((memory_topic, similarity)) + + return all_similar_topics + + def _get_top_topics(self, similar_topics: list, max_topics: int = 5) -> list: + """获取相似度最高的主题""" + seen_topics = set() + top_topics = [] + + for topic, score in sorted(similar_topics, key=lambda x: x[1], reverse=True): + if topic not in seen_topics and len(top_topics) < max_topics: + seen_topics.add(topic) + top_topics.append((topic, score)) + + return top_topics + + async def memory_activate_value(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.3) -> int: + """计算输入文本对记忆的激活程度""" + logger.info(f"[记忆激活]识别主题: {await self._identify_topics(text)}") + + identified_topics = await self._identify_topics(text) + if not identified_topics: + return 0 + + all_similar_topics = self._find_similar_topics( + identified_topics, + similarity_threshold=similarity_threshold, + debug_info="记忆激活" + ) + + if not all_similar_topics: + return 0 + + top_topics = self._get_top_topics(all_similar_topics, max_topics) + + if len(top_topics) == 1: + topic, score = top_topics[0] + memory_items = self.memory_graph.G.nodes[topic].get('memory_items', []) + if not isinstance(memory_items, list): + memory_items = [memory_items] if memory_items else [] + content_count = len(memory_items) + penalty = 1.0 / (1 + math.log(content_count + 1)) + + activation = int(score * 50 * penalty) + print(f"\033[1;32m[记忆激活]\033[0m 单主题「{topic}」- 相似度: {score:.3f}, 内容数: {content_count}, 激活值: {activation}") + return activation + + matched_topics = set() + topic_similarities = {} + + for memory_topic, similarity in top_topics: + memory_items = self.memory_graph.G.nodes[memory_topic].get('memory_items', []) + if not isinstance(memory_items, list): + memory_items = [memory_items] if memory_items else [] + content_count = len(memory_items) + penalty = 1.0 / (1 + math.log(content_count + 1)) + + for input_topic in identified_topics: + topic_vector = text_to_vector(input_topic) + memory_vector = text_to_vector(memory_topic) + all_words = set(topic_vector.keys()) | set(memory_vector.keys()) + v1 = [topic_vector.get(word, 0) for word in all_words] + v2 = [memory_vector.get(word, 0) for word in all_words] + sim = cosine_similarity(v1, v2) + if sim >= similarity_threshold: + matched_topics.add(input_topic) + adjusted_sim = sim * penalty + topic_similarities[input_topic] = max(topic_similarities.get(input_topic, 0), adjusted_sim) + print(f"\033[1;32m[记忆激活]\033[0m 主题「{input_topic}」-> 「{memory_topic}」(内容数: {content_count}, 相似度: {adjusted_sim:.3f})") + + topic_match = len(matched_topics) / len(identified_topics) + average_similarities = sum(topic_similarities.values()) / len(topic_similarities) if topic_similarities else 0 + + activation = int((topic_match + average_similarities) / 2 * 100) + print(f"\033[1;32m[记忆激活]\033[0m 匹配率: {topic_match:.3f}, 平均相似度: {average_similarities:.3f}, 激活值: {activation}") + + return activation + + async def get_relevant_memories(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.4, max_memory_num: int = 5) -> list: + """根据输入文本获取相关的记忆内容""" + identified_topics = await self._identify_topics(text) + + all_similar_topics = self._find_similar_topics( + identified_topics, + similarity_threshold=similarity_threshold, + debug_info="记忆检索" + ) + + relevant_topics = self._get_top_topics(all_similar_topics, max_topics) + + relevant_memories = [] + for topic, score in relevant_topics: + first_layer, _ = self.memory_graph.get_related_item(topic, depth=1) + if first_layer: + if len(first_layer) > max_memory_num/2: + first_layer = random.sample(first_layer, max_memory_num//2) + for memory in first_layer: + relevant_memories.append({ + 'topic': topic, + 'similarity': score, + 'content': memory + }) + + relevant_memories.sort(key=lambda x: x['similarity'], reverse=True) + + if len(relevant_memories) > max_memory_num: + relevant_memories = random.sample(relevant_memories, max_memory_num) + + return relevant_memories + +def segment_text(text): + """使用jieba进行文本分词""" + seg_text = list(jieba.cut(text)) + return seg_text + +def text_to_vector(text): + """将文本转换为词频向量""" + words = segment_text(text) + vector = {} + for word in words: + vector[word] = vector.get(word, 0) + 1 + return vector + +def cosine_similarity(v1, v2): + """计算两个向量的余弦相似度""" + dot_product = sum(a * b for a, b in zip(v1, v2)) + norm1 = math.sqrt(sum(a * a for a in v1)) + norm2 = math.sqrt(sum(b * b for b in v2)) + if norm1 == 0 or norm2 == 0: + return 0 + return dot_product / (norm1 * norm2) def visualize_graph_lite(memory_graph: Memory_graph, color_by_memory: bool = False): # 设置中文字体 @@ -735,7 +944,7 @@ async def main(): db = Database.get_instance() start_time = time.time() - test_pare = {'do_build_memory':True,'do_forget_topic':False,'do_visualize_graph':True,'do_query':False,'do_merge_memory':False} + test_pare = {'do_build_memory':False,'do_forget_topic':False,'do_visualize_graph':True,'do_query':False,'do_merge_memory':False} # 创建记忆图 memory_graph = Memory_graph() diff --git a/src/plugins/memory_system/memory_test1.py b/src/plugins/memory_system/memory_test1.py new file mode 100644 index 000000000..bbd734ec2 --- /dev/null +++ b/src/plugins/memory_system/memory_test1.py @@ -0,0 +1,1208 @@ +# -*- coding: utf-8 -*- +import datetime +import math +import os +import random +import sys +import time +from collections import Counter +from pathlib import Path + +import matplotlib.pyplot as plt +import networkx as nx +import pymongo +from dotenv import load_dotenv +from loguru import logger +import jieba + +''' +该理论认为,当两个或多个事物在形态上具有相似性时, +它们在记忆中会形成关联。 +例如,梨和苹果在形状和都是水果这一属性上有相似性, +所以当我们看到梨时,很容易通过形态学联想记忆联想到苹果。 +这种相似性联想有助于我们对新事物进行分类和理解, +当遇到一个新的类似水果时, +我们可以通过与已有的水果记忆进行相似性匹配, +来推测它的一些特征。 + + + +时空关联性联想: +除了相似性联想,MAM 还强调时空关联性联想。 +如果两个事物在时间或空间上经常同时出现,它们也会在记忆中形成关联。 +比如,每次在公园里看到花的时候,都能听到鸟儿的叫声, +那么花和鸟儿叫声的形态特征(花的视觉形态和鸟叫的听觉形态)就会在记忆中形成关联, +以后听到鸟叫可能就会联想到公园里的花。 + +''' + +# from chat.config import global_config +sys.path.append("C:/GitHub/MaiMBot") # 添加项目根目录到 Python 路径 +from src.common.database import Database +from src.plugins.memory_system.offline_llm import LLMModel + +# 获取当前文件的目录 +current_dir = Path(__file__).resolve().parent +# 获取项目根目录(上三层目录) +project_root = current_dir.parent.parent.parent +# env.dev文件路径 +env_path = project_root / ".env.dev" + +# 加载环境变量 +if env_path.exists(): + logger.info(f"从 {env_path} 加载环境变量") + load_dotenv(env_path) +else: + logger.warning(f"未找到环境变量文件: {env_path}") + logger.info("将使用默认配置") + +class Database: + _instance = None + db = None + + @classmethod + def get_instance(cls): + if cls._instance is None: + cls._instance = cls() + return cls._instance + + def __init__(self): + if not Database.db: + Database.initialize( + host=os.getenv("MONGODB_HOST"), + port=int(os.getenv("MONGODB_PORT")), + db_name=os.getenv("DATABASE_NAME"), + username=os.getenv("MONGODB_USERNAME"), + password=os.getenv("MONGODB_PASSWORD"), + auth_source=os.getenv("MONGODB_AUTH_SOURCE") + ) + + @classmethod + def initialize(cls, host, port, db_name, username=None, password=None, auth_source="admin"): + try: + if username and password: + uri = f"mongodb://{username}:{password}@{host}:{port}/{db_name}?authSource={auth_source}" + else: + uri = f"mongodb://{host}:{port}" + + client = pymongo.MongoClient(uri) + cls.db = client[db_name] + # 测试连接 + client.server_info() + logger.success("MongoDB连接成功!") + + except Exception as e: + logger.error(f"初始化MongoDB失败: {str(e)}") + raise + +def calculate_information_content(text): + """计算文本的信息量(熵)""" + char_count = Counter(text) + total_chars = len(text) + + entropy = 0 + for count in char_count.values(): + probability = count / total_chars + entropy -= probability * math.log2(probability) + + return entropy + +def get_cloest_chat_from_db(db, length: int, timestamp: str): + """从数据库中获取最接近指定时间戳的聊天记录,并记录读取次数 + + Returns: + list: 消息记录字典列表,每个字典包含消息内容和时间信息 + """ + chat_records = [] + closest_record = db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)]) + + if closest_record and closest_record.get('memorized', 0) < 4: + closest_time = closest_record['time'] + group_id = closest_record['group_id'] + # 获取该时间戳之后的length条消息,且groupid相同 + records = list(db.db.messages.find( + {"time": {"$gt": closest_time}, "group_id": group_id} + ).sort('time', 1).limit(length)) + + # 更新每条消息的memorized属性 + for record in records: + current_memorized = record.get('memorized', 0) + if current_memorized > 3: + print("消息已读取3次,跳过") + return '' + + # 更新memorized值 + db.db.messages.update_one( + {"_id": record["_id"]}, + {"$set": {"memorized": current_memorized + 1}} + ) + + # 添加到记录列表中 + chat_records.append({ + 'text': record["detailed_plain_text"], + 'time': record["time"], + 'group_id': record["group_id"] + }) + + return chat_records + +class Memory_cortex: + def __init__(self, memory_graph: 'Memory_graph'): + self.memory_graph = memory_graph + + def sync_memory_from_db(self): + """ + 从数据库同步数据到内存中的图结构 + 将清空当前内存中的图,并从数据库重新加载所有节点和边 + """ + # 清空当前图 + self.memory_graph.G.clear() + + # 获取当前时间作为默认时间 + default_time = datetime.datetime.now().timestamp() + + # 从数据库加载所有节点 + nodes = self.memory_graph.db.db.graph_data.nodes.find() + for node in nodes: + concept = node['concept'] + memory_items = node.get('memory_items', []) + # 确保memory_items是列表 + if not isinstance(memory_items, list): + memory_items = [memory_items] if memory_items else [] + + # 获取时间属性,如果不存在则使用默认时间 + created_time = node.get('created_time') + last_modified = node.get('last_modified') + + # 如果时间属性不存在,则更新数据库 + if created_time is None or last_modified is None: + created_time = default_time + last_modified = default_time + # 更新数据库中的节点 + self.memory_graph.db.db.graph_data.nodes.update_one( + {'concept': concept}, + {'$set': { + 'created_time': created_time, + 'last_modified': last_modified + }} + ) + logger.info(f"为节点 {concept} 添加默认时间属性") + + # 添加节点到图中,包含时间属性 + self.memory_graph.G.add_node(concept, + memory_items=memory_items, + created_time=created_time, + last_modified=last_modified) + + # 从数据库加载所有边 + edges = self.memory_graph.db.db.graph_data.edges.find() + for edge in edges: + source = edge['source'] + target = edge['target'] + + # 只有当源节点和目标节点都存在时才添加边 + if source in self.memory_graph.G and target in self.memory_graph.G: + # 获取时间属性,如果不存在则使用默认时间 + created_time = edge.get('created_time') + last_modified = edge.get('last_modified') + + # 如果时间属性不存在,则更新数据库 + if created_time is None or last_modified is None: + created_time = default_time + last_modified = default_time + # 更新数据库中的边 + self.memory_graph.db.db.graph_data.edges.update_one( + {'source': source, 'target': target}, + {'$set': { + 'created_time': created_time, + 'last_modified': last_modified + }} + ) + logger.info(f"为边 {source} - {target} 添加默认时间属性") + + self.memory_graph.G.add_edge(source, target, + strength=edge.get('strength', 1), + created_time=created_time, + last_modified=last_modified) + + logger.success("从数据库同步记忆图谱完成") + + def calculate_node_hash(self, concept, memory_items): + """ + 计算节点的特征值 + """ + if not isinstance(memory_items, list): + memory_items = [memory_items] if memory_items else [] + # 将记忆项排序以确保相同内容生成相同的哈希值 + sorted_items = sorted(memory_items) + # 组合概念和记忆项生成特征值 + content = f"{concept}:{'|'.join(sorted_items)}" + return hash(content) + + def calculate_edge_hash(self, source, target): + """ + 计算边的特征值 + """ + # 对源节点和目标节点排序以确保相同的边生成相同的哈希值 + nodes = sorted([source, target]) + return hash(f"{nodes[0]}:{nodes[1]}") + + def sync_memory_to_db(self): + """ + 检查并同步内存中的图结构与数据库 + 使用特征值(哈希值)快速判断是否需要更新 + """ + current_time = datetime.datetime.now().timestamp() + + # 获取数据库中所有节点和内存中所有节点 + db_nodes = list(self.memory_graph.db.db.graph_data.nodes.find()) + memory_nodes = list(self.memory_graph.G.nodes(data=True)) + + # 转换数据库节点为字典格式,方便查找 + db_nodes_dict = {node['concept']: node for node in db_nodes} + + # 检查并更新节点 + for concept, data in memory_nodes: + memory_items = data.get('memory_items', []) + if not isinstance(memory_items, list): + memory_items = [memory_items] if memory_items else [] + + # 计算内存中节点的特征值 + memory_hash = self.calculate_node_hash(concept, memory_items) + + if concept not in db_nodes_dict: + # 数据库中缺少的节点,添加 + node_data = { + 'concept': concept, + 'memory_items': memory_items, + 'hash': memory_hash, + 'created_time': data.get('created_time', current_time), + 'last_modified': data.get('last_modified', current_time) + } + self.memory_graph.db.db.graph_data.nodes.insert_one(node_data) + else: + # 获取数据库中节点的特征值 + db_node = db_nodes_dict[concept] + db_hash = db_node.get('hash', None) + + # 如果特征值不同,则更新节点 + if db_hash != memory_hash: + self.memory_graph.db.db.graph_data.nodes.update_one( + {'concept': concept}, + {'$set': { + 'memory_items': memory_items, + 'hash': memory_hash, + 'last_modified': current_time + }} + ) + + # 检查并删除数据库中多余的节点 + memory_concepts = set(node[0] for node in memory_nodes) + for db_node in db_nodes: + if db_node['concept'] not in memory_concepts: + self.memory_graph.db.db.graph_data.nodes.delete_one({'concept': db_node['concept']}) + + # 处理边的信息 + db_edges = list(self.memory_graph.db.db.graph_data.edges.find()) + memory_edges = list(self.memory_graph.G.edges(data=True)) + + # 创建边的哈希值字典 + db_edge_dict = {} + for edge in db_edges: + edge_hash = self.calculate_edge_hash(edge['source'], edge['target']) + db_edge_dict[(edge['source'], edge['target'])] = { + 'hash': edge_hash, + 'strength': edge.get('strength', 1) + } + + # 检查并更新边 + for source, target, data in memory_edges: + edge_hash = self.calculate_edge_hash(source, target) + edge_key = (source, target) + strength = data.get('strength', 1) + + if edge_key not in db_edge_dict: + # 添加新边 + edge_data = { + 'source': source, + 'target': target, + 'strength': strength, + 'hash': edge_hash, + 'created_time': data.get('created_time', current_time), + 'last_modified': data.get('last_modified', current_time) + } + self.memory_graph.db.db.graph_data.edges.insert_one(edge_data) + else: + # 检查边的特征值是否变化 + if db_edge_dict[edge_key]['hash'] != edge_hash: + self.memory_graph.db.db.graph_data.edges.update_one( + {'source': source, 'target': target}, + {'$set': { + 'hash': edge_hash, + 'strength': strength, + 'last_modified': current_time + }} + ) + + # 删除多余的边 + memory_edge_set = set((source, target) for source, target, _ in memory_edges) + for edge_key in db_edge_dict: + if edge_key not in memory_edge_set: + source, target = edge_key + self.memory_graph.db.db.graph_data.edges.delete_one({ + 'source': source, + 'target': target + }) + + logger.success("完成记忆图谱与数据库的差异同步") + + def remove_node_from_db(self, topic): + """ + 从数据库中删除指定节点及其相关的边 + + Args: + topic: 要删除的节点概念 + """ + # 删除节点 + self.memory_graph.db.db.graph_data.nodes.delete_one({'concept': topic}) + # 删除所有涉及该节点的边 + self.memory_graph.db.db.graph_data.edges.delete_many({ + '$or': [ + {'source': topic}, + {'target': topic} + ] + }) + +class Memory_graph: + def __init__(self): + self.G = nx.Graph() # 使用 networkx 的图结构 + self.db = Database.get_instance() + + def connect_dot(self, concept1, concept2): + # 避免自连接 + if concept1 == concept2: + return + + current_time = datetime.datetime.now().timestamp() + + # 如果边已存在,增加 strength + if self.G.has_edge(concept1, concept2): + self.G[concept1][concept2]['strength'] = self.G[concept1][concept2].get('strength', 1) + 1 + # 更新最后修改时间 + self.G[concept1][concept2]['last_modified'] = current_time + else: + # 如果是新边,初始化 strength 为 1 + self.G.add_edge(concept1, concept2, + strength=1, + created_time=current_time, + last_modified=current_time) + + def add_dot(self, concept, memory): + current_time = datetime.datetime.now().timestamp() + + if concept in self.G: + # 如果节点已存在,将新记忆添加到现有列表中 + if 'memory_items' in self.G.nodes[concept]: + if not isinstance(self.G.nodes[concept]['memory_items'], list): + # 如果当前不是列表,将其转换为列表 + self.G.nodes[concept]['memory_items'] = [self.G.nodes[concept]['memory_items']] + self.G.nodes[concept]['memory_items'].append(memory) + # 更新最后修改时间 + self.G.nodes[concept]['last_modified'] = current_time + else: + self.G.nodes[concept]['memory_items'] = [memory] + self.G.nodes[concept]['last_modified'] = current_time + else: + # 如果是新节点,创建新的记忆列表 + self.G.add_node(concept, + memory_items=[memory], + created_time=current_time, + last_modified=current_time) + + def get_dot(self, concept): + # 检查节点是否存在于图中 + if concept in self.G: + # 从图中获取节点数据 + node_data = self.G.nodes[concept] + return concept, node_data + return None + + def get_related_item(self, topic, depth=1): + if topic not in self.G: + return [], [] + + first_layer_items = [] + second_layer_items = [] + + # 获取相邻节点 + neighbors = list(self.G.neighbors(topic)) + + # 获取当前节点的记忆项 + node_data = self.get_dot(topic) + if node_data: + concept, data = node_data + if 'memory_items' in data: + memory_items = data['memory_items'] + if isinstance(memory_items, list): + first_layer_items.extend(memory_items) + else: + first_layer_items.append(memory_items) + + # 只在depth=2时获取第二层记忆 + if depth >= 2: + # 获取相邻节点的记忆项 + for neighbor in neighbors: + node_data = self.get_dot(neighbor) + if node_data: + concept, data = node_data + if 'memory_items' in data: + memory_items = data['memory_items'] + if isinstance(memory_items, list): + second_layer_items.extend(memory_items) + else: + second_layer_items.append(memory_items) + + return first_layer_items, second_layer_items + + @property + def dots(self): + # 返回所有节点对应的 Memory_dot 对象 + return [self.get_dot(node) for node in self.G.nodes()] + +# 海马体 +class Hippocampus: + def __init__(self, memory_graph: Memory_graph): + self.memory_graph = memory_graph + self.memory_cortex = Memory_cortex(memory_graph) + self.llm_model = LLMModel() + self.llm_model_small = LLMModel(model_name="deepseek-ai/DeepSeek-V2.5") + self.llm_model_get_topic = LLMModel(model_name="Pro/Qwen/Qwen2.5-7B-Instruct") + self.llm_model_summary = LLMModel(model_name="Qwen/Qwen2.5-32B-Instruct") + + def get_memory_sample(self, chat_size=20, time_frequency:dict={'near':2,'mid':4,'far':3}): + """获取记忆样本 + + Returns: + list: 消息记录列表,每个元素是一个消息记录字典列表 + """ + current_timestamp = datetime.datetime.now().timestamp() + chat_samples = [] + + # 短期:1h 中期:4h 长期:24h + for _ in range(time_frequency.get('near')): + random_time = current_timestamp - random.randint(1, 3600*4) + messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) + if messages: + chat_samples.append(messages) + + for _ in range(time_frequency.get('mid')): + random_time = current_timestamp - random.randint(3600*4, 3600*24) + messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) + if messages: + chat_samples.append(messages) + + for _ in range(time_frequency.get('far')): + random_time = current_timestamp - random.randint(3600*24, 3600*24*7) + messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) + if messages: + chat_samples.append(messages) + + return chat_samples + + def calculate_topic_num(self,text, compress_rate): + """计算文本的话题数量""" + information_content = calculate_information_content(text) + topic_by_length = text.count('\n')*compress_rate + topic_by_information_content = max(1, min(5, int((information_content-3) * 2))) + topic_num = int((topic_by_length + topic_by_information_content)/2) + print(f"topic_by_length: {topic_by_length}, topic_by_information_content: {topic_by_information_content}, topic_num: {topic_num}") + return topic_num + + async def memory_compress(self, messages: list, compress_rate=0.1): + """压缩消息记录为记忆 + + Args: + messages: 消息记录字典列表,每个字典包含text和time字段 + compress_rate: 压缩率 + + Returns: + tuple: (压缩记忆集合, 相似主题字典) + - 压缩记忆集合: set of (话题, 记忆) 元组 + - 相似主题字典: dict of {话题: [(相似主题, 相似度), ...]} + """ + if not messages: + return set(), {} + + # 合并消息文本,同时保留时间信息 + input_text = "" + time_info = "" + # 计算最早和最晚时间 + earliest_time = min(msg['time'] for msg in messages) + latest_time = max(msg['time'] for msg in messages) + + earliest_dt = datetime.datetime.fromtimestamp(earliest_time) + latest_dt = datetime.datetime.fromtimestamp(latest_time) + + # 如果是同一年 + if earliest_dt.year == latest_dt.year: + earliest_str = earliest_dt.strftime("%m-%d %H:%M:%S") + latest_str = latest_dt.strftime("%m-%d %H:%M:%S") + time_info += f"是在{earliest_dt.year}年,{earliest_str} 到 {latest_str} 的对话:\n" + else: + earliest_str = earliest_dt.strftime("%Y-%m-%d %H:%M:%S") + latest_str = latest_dt.strftime("%Y-%m-%d %H:%M:%S") + time_info += f"是从 {earliest_str} 到 {latest_str} 的对话:\n" + + for msg in messages: + input_text += f"{msg['text']}\n" + + print(input_text) + + topic_num = self.calculate_topic_num(input_text, compress_rate) + topics_response = self.llm_model_get_topic.generate_response(self.find_topic_llm(input_text, topic_num)) + + # 过滤topics + filter_keywords = ['表情包', '图片', '回复', '聊天记录'] + topics = [topic.strip() for topic in topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()] + filtered_topics = [topic for topic in topics if not any(keyword in topic for keyword in filter_keywords)] + + print(f"过滤后话题: {filtered_topics}") + + # 为每个话题查找相似的已存在主题 + print("\n检查相似主题:") + similar_topics_dict = {} # 存储每个话题的相似主题列表 + + for topic in filtered_topics: + # 获取所有现有节点 + existing_topics = list(self.memory_graph.G.nodes()) + similar_topics = [] + + # 对每个现有节点计算相似度 + for existing_topic in existing_topics: + # 使用jieba分词并计算余弦相似度 + topic_words = set(jieba.cut(topic)) + existing_words = set(jieba.cut(existing_topic)) + + # 计算词向量 + all_words = topic_words | existing_words + v1 = [1 if word in topic_words else 0 for word in all_words] + v2 = [1 if word in existing_words else 0 for word in all_words] + + # 计算余弦相似度 + similarity = cosine_similarity(v1, v2) + + # 如果相似度超过阈值,添加到结果中 + if similarity >= 0.6: # 设置相似度阈值 + similar_topics.append((existing_topic, similarity)) + + # 按相似度降序排序 + similar_topics.sort(key=lambda x: x[1], reverse=True) + # 只保留前5个最相似的主题 + similar_topics = similar_topics[:5] + + # 存储到字典中 + similar_topics_dict[topic] = similar_topics + + # 输出结果 + if similar_topics: + print(f"\n主题「{topic}」的相似主题:") + for similar_topic, score in similar_topics: + print(f"- {similar_topic} (相似度: {score:.3f})") + else: + print(f"\n主题「{topic}」没有找到相似主题") + + # 创建所有话题的请求任务 + tasks = [] + for topic in filtered_topics: + topic_what_prompt = self.topic_what(input_text, topic , time_info) + # 创建异步任务 + task = self.llm_model_small.generate_response_async(topic_what_prompt) + tasks.append((topic.strip(), task)) + + # 等待所有任务完成 + compressed_memory = set() + for topic, task in tasks: + response = await task + if response: + compressed_memory.add((topic, response[0])) + + return compressed_memory, similar_topics_dict + + async def operation_build_memory(self, chat_size=12): + # 最近消息获取频率 + time_frequency = {'near': 3, 'mid': 8, 'far': 5} + memory_samples = self.get_memory_sample(chat_size, time_frequency) + + all_topics = [] # 用于存储所有话题 + + for i, messages in enumerate(memory_samples, 1): + # 加载进度可视化 + all_topics = [] + progress = (i / len(memory_samples)) * 100 + bar_length = 30 + filled_length = int(bar_length * i // len(memory_samples)) + bar = '█' * filled_length + '-' * (bar_length - filled_length) + print(f"\n进度: [{bar}] {progress:.1f}% ({i}/{len(memory_samples)})") + + # 生成压缩后记忆 + compress_rate = 0.1 + compressed_memory, similar_topics_dict = await self.memory_compress(messages, compress_rate) + print(f"\033[1;33m压缩后记忆数量\033[0m: {len(compressed_memory)},似曾相识的话题: {len(similar_topics_dict)}") + + # 将记忆加入到图谱中 + for topic, memory in compressed_memory: + print(f"\033[1;32m添加节点\033[0m: {topic}") + self.memory_graph.add_dot(topic, memory) + all_topics.append(topic) + + # 连接相似的已存在主题 + if topic in similar_topics_dict: + similar_topics = similar_topics_dict[topic] + for similar_topic, similarity in similar_topics: + # 避免自连接 + if topic != similar_topic: + # 根据相似度设置连接强度 + strength = int(similarity * 10) # 将0.3-1.0的相似度映射到3-10的强度 + print(f"\033[1;36m连接相似节点\033[0m: {topic} 和 {similar_topic} (强度: {strength})") + # 使用相似度作为初始连接强度 + self.memory_graph.G.add_edge(topic, similar_topic, strength=strength) + + # 连接同批次的相关话题 + for i in range(len(all_topics)): + for j in range(i + 1, len(all_topics)): + print(f"\033[1;32m连接同批次节点\033[0m: {all_topics[i]} 和 {all_topics[j]}") + self.memory_graph.connect_dot(all_topics[i], all_topics[j]) + + self.memory_cortex.sync_memory_to_db() + + def forget_connection(self, source, target): + """ + 检查并可能遗忘一个连接 + + Args: + source: 连接的源节点 + target: 连接的目标节点 + + Returns: + tuple: (是否有变化, 变化类型, 变化详情) + 变化类型: 0-无变化, 1-强度减少, 2-连接移除 + """ + current_time = datetime.datetime.now().timestamp() + # 获取边的属性 + edge_data = self.memory_graph.G[source][target] + last_modified = edge_data.get('last_modified', current_time) + + # 如果连接超过7天未更新 + if current_time - last_modified > 6000: # test + # 获取当前强度 + current_strength = edge_data.get('strength', 1) + # 减少连接强度 + new_strength = current_strength - 1 + edge_data['strength'] = new_strength + edge_data['last_modified'] = current_time + + # 如果强度降为0,移除连接 + if new_strength <= 0: + self.memory_graph.G.remove_edge(source, target) + return True, 2, f"移除连接: {source} - {target} (强度降至0)" + else: + return True, 1, f"减弱连接: {source} - {target} (强度: {current_strength} -> {new_strength})" + + return False, 0, "" + + def forget_topic(self, topic): + """ + 检查并可能遗忘一个话题的记忆 + + Args: + topic: 要检查的话题 + + Returns: + tuple: (是否有变化, 变化类型, 变化详情) + 变化类型: 0-无变化, 1-记忆减少, 2-节点移除 + """ + current_time = datetime.datetime.now().timestamp() + # 获取节点的最后修改时间 + node_data = self.memory_graph.G.nodes[topic] + last_modified = node_data.get('last_modified', current_time) + + # 如果话题超过7天未更新 + if current_time - last_modified > 3000: # test + memory_items = node_data.get('memory_items', []) + if not isinstance(memory_items, list): + memory_items = [memory_items] if memory_items else [] + + if memory_items: + # 获取当前记忆数量 + current_count = len(memory_items) + # 随机选择一条记忆删除 + removed_item = random.choice(memory_items) + memory_items.remove(removed_item) + + if memory_items: + # 更新节点的记忆项和最后修改时间 + self.memory_graph.G.nodes[topic]['memory_items'] = memory_items + self.memory_graph.G.nodes[topic]['last_modified'] = current_time + return True, 1, f"减少记忆: {topic} (记忆数量: {current_count} -> {len(memory_items)})\n被移除的记忆: {removed_item}" + else: + # 如果没有记忆了,删除节点及其所有连接 + self.memory_graph.G.remove_node(topic) + return True, 2, f"移除节点: {topic} (无剩余记忆)\n最后一条记忆: {removed_item}" + + return False, 0, "" + + async def operation_forget_topic(self, percentage=0.1): + """ + 随机选择图中一定比例的节点和边进行检查,根据时间条件决定是否遗忘 + + Args: + percentage: 要检查的节点和边的比例,默认为0.1(10%) + """ + # 获取所有节点和边 + all_nodes = list(self.memory_graph.G.nodes()) + all_edges = list(self.memory_graph.G.edges()) + + # 计算要检查的数量 + check_nodes_count = max(1, int(len(all_nodes) * percentage)) + check_edges_count = max(1, int(len(all_edges) * percentage)) + + # 随机选择要检查的节点和边 + nodes_to_check = random.sample(all_nodes, check_nodes_count) + edges_to_check = random.sample(all_edges, check_edges_count) + + # 用于统计不同类型的变化 + edge_changes = {'weakened': 0, 'removed': 0} + node_changes = {'reduced': 0, 'removed': 0} + + # 检查并遗忘连接 + print("\n开始检查连接...") + for source, target in edges_to_check: + changed, change_type, details = self.forget_connection(source, target) + if changed: + if change_type == 1: + edge_changes['weakened'] += 1 + logger.info(f"\033[1;34m[连接减弱]\033[0m {details}") + elif change_type == 2: + edge_changes['removed'] += 1 + logger.info(f"\033[1;31m[连接移除]\033[0m {details}") + + # 检查并遗忘话题 + print("\n开始检查节点...") + for node in nodes_to_check: + changed, change_type, details = self.forget_topic(node) + if changed: + if change_type == 1: + node_changes['reduced'] += 1 + logger.info(f"\033[1;33m[记忆减少]\033[0m {details}") + elif change_type == 2: + node_changes['removed'] += 1 + logger.info(f"\033[1;31m[节点移除]\033[0m {details}") + + # 同步到数据库 + if any(count > 0 for count in edge_changes.values()) or any(count > 0 for count in node_changes.values()): + self.memory_cortex.sync_memory_to_db() + print("\n遗忘操作统计:") + print(f"连接变化: {edge_changes['weakened']} 个减弱, {edge_changes['removed']} 个移除") + print(f"节点变化: {node_changes['reduced']} 个减少记忆, {node_changes['removed']} 个移除") + else: + print("\n本次检查没有节点或连接满足遗忘条件") + + async def merge_memory(self, topic): + """ + 对指定话题的记忆进行合并压缩 + + Args: + topic: 要合并的话题节点 + """ + # 获取节点的记忆项 + memory_items = self.memory_graph.G.nodes[topic].get('memory_items', []) + if not isinstance(memory_items, list): + memory_items = [memory_items] if memory_items else [] + + # 如果记忆项不足,直接返回 + if len(memory_items) < 10: + return + + # 随机选择10条记忆 + selected_memories = random.sample(memory_items, 10) + + # 拼接成文本 + merged_text = "\n".join(selected_memories) + print(f"\n[合并记忆] 话题: {topic}") + print(f"选择的记忆:\n{merged_text}") + + # 使用memory_compress生成新的压缩记忆 + compressed_memories, _ = await self.memory_compress(selected_memories, 0.1) + + # 从原记忆列表中移除被选中的记忆 + for memory in selected_memories: + memory_items.remove(memory) + + # 添加新的压缩记忆 + for _, compressed_memory in compressed_memories: + memory_items.append(compressed_memory) + print(f"添加压缩记忆: {compressed_memory}") + + # 更新节点的记忆项 + self.memory_graph.G.nodes[topic]['memory_items'] = memory_items + print(f"完成记忆合并,当前记忆数量: {len(memory_items)}") + + async def operation_merge_memory(self, percentage=0.1): + """ + 随机检查一定比例的节点,对内容数量超过100的节点进行记忆合并 + + Args: + percentage: 要检查的节点比例,默认为0.1(10%) + """ + # 获取所有节点 + all_nodes = list(self.memory_graph.G.nodes()) + # 计算要检查的节点数量 + check_count = max(1, int(len(all_nodes) * percentage)) + # 随机选择节点 + nodes_to_check = random.sample(all_nodes, check_count) + + merged_nodes = [] + for node in nodes_to_check: + # 获取节点的内容条数 + memory_items = self.memory_graph.G.nodes[node].get('memory_items', []) + if not isinstance(memory_items, list): + memory_items = [memory_items] if memory_items else [] + content_count = len(memory_items) + + # 如果内容数量超过100,进行合并 + if content_count > 100: + print(f"\n检查节点: {node}, 当前记忆数量: {content_count}") + await self.merge_memory(node) + merged_nodes.append(node) + + # 同步到数据库 + if merged_nodes: + self.memory_cortex.sync_memory_to_db() + print(f"\n完成记忆合并操作,共处理 {len(merged_nodes)} 个节点") + else: + print("\n本次检查没有需要合并的节点") + + async def _identify_topics(self, text: str) -> list: + """从文本中识别可能的主题""" + topics_response = self.llm_model_get_topic.generate_response(self.find_topic_llm(text, 5)) + topics = [topic.strip() for topic in topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()] + return topics + + def _find_similar_topics(self, topics: list, similarity_threshold: float = 0.4, debug_info: str = "") -> list: + """查找与给定主题相似的记忆主题""" + all_memory_topics = list(self.memory_graph.G.nodes()) + all_similar_topics = [] + + for topic in topics: + if debug_info: + pass + + topic_vector = text_to_vector(topic) + has_similar_topic = False + + for memory_topic in all_memory_topics: + memory_vector = text_to_vector(memory_topic) + all_words = set(topic_vector.keys()) | set(memory_vector.keys()) + v1 = [topic_vector.get(word, 0) for word in all_words] + v2 = [memory_vector.get(word, 0) for word in all_words] + similarity = cosine_similarity(v1, v2) + + if similarity >= similarity_threshold: + has_similar_topic = True + all_similar_topics.append((memory_topic, similarity)) + + return all_similar_topics + + def _get_top_topics(self, similar_topics: list, max_topics: int = 5) -> list: + """获取相似度最高的主题""" + seen_topics = set() + top_topics = [] + + for topic, score in sorted(similar_topics, key=lambda x: x[1], reverse=True): + if topic not in seen_topics and len(top_topics) < max_topics: + seen_topics.add(topic) + top_topics.append((topic, score)) + + return top_topics + + async def memory_activate_value(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.3) -> int: + """计算输入文本对记忆的激活程度""" + logger.info(f"[记忆激活]识别主题: {await self._identify_topics(text)}") + + identified_topics = await self._identify_topics(text) + if not identified_topics: + return 0 + + all_similar_topics = self._find_similar_topics( + identified_topics, + similarity_threshold=similarity_threshold, + debug_info="记忆激活" + ) + + if not all_similar_topics: + return 0 + + top_topics = self._get_top_topics(all_similar_topics, max_topics) + + if len(top_topics) == 1: + topic, score = top_topics[0] + memory_items = self.memory_graph.G.nodes[topic].get('memory_items', []) + if not isinstance(memory_items, list): + memory_items = [memory_items] if memory_items else [] + content_count = len(memory_items) + penalty = 1.0 / (1 + math.log(content_count + 1)) + + activation = int(score * 50 * penalty) + print(f"\033[1;32m[记忆激活]\033[0m 单主题「{topic}」- 相似度: {score:.3f}, 内容数: {content_count}, 激活值: {activation}") + return activation + + matched_topics = set() + topic_similarities = {} + + for memory_topic, similarity in top_topics: + memory_items = self.memory_graph.G.nodes[memory_topic].get('memory_items', []) + if not isinstance(memory_items, list): + memory_items = [memory_items] if memory_items else [] + content_count = len(memory_items) + penalty = 1.0 / (1 + math.log(content_count + 1)) + + for input_topic in identified_topics: + topic_vector = text_to_vector(input_topic) + memory_vector = text_to_vector(memory_topic) + all_words = set(topic_vector.keys()) | set(memory_vector.keys()) + v1 = [topic_vector.get(word, 0) for word in all_words] + v2 = [memory_vector.get(word, 0) for word in all_words] + sim = cosine_similarity(v1, v2) + if sim >= similarity_threshold: + matched_topics.add(input_topic) + adjusted_sim = sim * penalty + topic_similarities[input_topic] = max(topic_similarities.get(input_topic, 0), adjusted_sim) + print(f"\033[1;32m[记忆激活]\033[0m 主题「{input_topic}」-> 「{memory_topic}」(内容数: {content_count}, 相似度: {adjusted_sim:.3f})") + + topic_match = len(matched_topics) / len(identified_topics) + average_similarities = sum(topic_similarities.values()) / len(topic_similarities) if topic_similarities else 0 + + activation = int((topic_match + average_similarities) / 2 * 100) + print(f"\033[1;32m[记忆激活]\033[0m 匹配率: {topic_match:.3f}, 平均相似度: {average_similarities:.3f}, 激活值: {activation}") + + return activation + + async def get_relevant_memories(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.4, max_memory_num: int = 5) -> list: + """根据输入文本获取相关的记忆内容""" + identified_topics = await self._identify_topics(text) + + all_similar_topics = self._find_similar_topics( + identified_topics, + similarity_threshold=similarity_threshold, + debug_info="记忆检索" + ) + + relevant_topics = self._get_top_topics(all_similar_topics, max_topics) + + relevant_memories = [] + for topic, score in relevant_topics: + first_layer, _ = self.memory_graph.get_related_item(topic, depth=1) + if first_layer: + if len(first_layer) > max_memory_num/2: + first_layer = random.sample(first_layer, max_memory_num//2) + for memory in first_layer: + relevant_memories.append({ + 'topic': topic, + 'similarity': score, + 'content': memory + }) + + relevant_memories.sort(key=lambda x: x['similarity'], reverse=True) + + if len(relevant_memories) > max_memory_num: + relevant_memories = random.sample(relevant_memories, max_memory_num) + + return relevant_memories + + def find_topic_llm(self,text, topic_num): + prompt = f'这是一段文字:{text}。请你从这段话中总结出{topic_num}个关键的概念,可以是名词,动词,或者特定人物,帮我列出来,用逗号,隔开,尽可能精简。只需要列举{topic_num}个话题就好,不要有序号,不要告诉我其他内容。' + return prompt + + def topic_what(self,text, topic, time_info): + prompt = f'这是一段文字,{time_info}:{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好' + return prompt + +def segment_text(text): + """使用jieba进行文本分词""" + seg_text = list(jieba.cut(text)) + return seg_text + +def text_to_vector(text): + """将文本转换为词频向量""" + words = segment_text(text) + vector = {} + for word in words: + vector[word] = vector.get(word, 0) + 1 + return vector + +def cosine_similarity(v1, v2): + """计算两个向量的余弦相似度""" + dot_product = sum(a * b for a, b in zip(v1, v2)) + norm1 = math.sqrt(sum(a * a for a in v1)) + norm2 = math.sqrt(sum(b * b for b in v2)) + if norm1 == 0 or norm2 == 0: + return 0 + return dot_product / (norm1 * norm2) + +def visualize_graph_lite(memory_graph: Memory_graph, color_by_memory: bool = False): + # 设置中文字体 + plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 + plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 + + G = memory_graph.G + + # 创建一个新图用于可视化 + H = G.copy() + + # 过滤掉内容数量小于2的节点 + nodes_to_remove = [] + for node in H.nodes(): + memory_items = H.nodes[node].get('memory_items', []) + memory_count = len(memory_items) if isinstance(memory_items, list) else (1 if memory_items else 0) + if memory_count < 2: + nodes_to_remove.append(node) + + H.remove_nodes_from(nodes_to_remove) + + # 如果没有符合条件的节点,直接返回 + if len(H.nodes()) == 0: + print("没有找到内容数量大于等于2的节点") + return + + # 计算节点大小和颜色 + node_colors = [] + node_sizes = [] + nodes = list(H.nodes()) + + # 获取最大记忆数用于归一化节点大小 + max_memories = 1 + for node in nodes: + memory_items = H.nodes[node].get('memory_items', []) + memory_count = len(memory_items) if isinstance(memory_items, list) else (1 if memory_items else 0) + max_memories = max(max_memories, memory_count) + + # 计算每个节点的大小和颜色 + for node in nodes: + # 计算节点大小(基于记忆数量) + memory_items = H.nodes[node].get('memory_items', []) + memory_count = len(memory_items) if isinstance(memory_items, list) else (1 if memory_items else 0) + # 使用指数函数使变化更明显 + ratio = memory_count / max_memories + size = 400 + 2000 * (ratio ** 2) # 增大节点大小 + node_sizes.append(size) + + # 计算节点颜色(基于连接数) + degree = H.degree(node) + if degree >= 30: + node_colors.append((1.0, 0, 0)) # 亮红色 (#FF0000) + else: + # 将1-10映射到0-1的范围 + color_ratio = (degree - 1) / 29.0 if degree > 1 else 0 + # 使用蓝到红的渐变 + red = min(0.9, color_ratio) + blue = max(0.0, 1.0 - color_ratio) + node_colors.append((red, 0, blue)) + + # 绘制图形 + plt.figure(figsize=(16, 12)) # 减小图形尺寸 + pos = nx.spring_layout(H, + k=1, # 调整节点间斥力 + iterations=100, # 增加迭代次数 + scale=1.5, # 减小布局尺寸 + weight='strength') # 使用边的strength属性作为权重 + + nx.draw(H, pos, + with_labels=True, + node_color=node_colors, + node_size=node_sizes, + font_size=12, # 保持增大的字体大小 + font_family='SimHei', + font_weight='bold', + edge_color='gray', + width=1.5) # 统一的边宽度 + + title = '记忆图谱可视化(仅显示内容≥2的节点)\n节点大小表示记忆数量\n节点颜色:蓝(弱连接)到红(强连接)渐变,边的透明度表示连接强度\n连接强度越大的节点距离越近' + plt.title(title, fontsize=16, fontfamily='SimHei') + plt.show() + +async def main(): + # 初始化数据库 + logger.info("正在初始化数据库连接...") + db = Database.get_instance() + start_time = time.time() + + test_pare = {'do_build_memory':True,'do_forget_topic':False,'do_visualize_graph':True,'do_query':False,'do_merge_memory':False} + + # 创建记忆图 + memory_graph = Memory_graph() + + # 创建海马体 + hippocampus = Hippocampus(memory_graph) + + # 从数据库同步数据 + hippocampus.memory_cortex.sync_memory_from_db() + + end_time = time.time() + logger.info(f"\033[32m[加载海马体耗时: {end_time - start_time:.2f} 秒]\033[0m") + + # 构建记忆 + if test_pare['do_build_memory']: + logger.info("开始构建记忆...") + chat_size = 20 + await hippocampus.operation_build_memory(chat_size=chat_size) + + end_time = time.time() + logger.info(f"\033[32m[构建记忆耗时: {end_time - start_time:.2f} 秒,chat_size={chat_size},chat_count = 16]\033[0m") + + if test_pare['do_forget_topic']: + logger.info("开始遗忘记忆...") + await hippocampus.operation_forget_topic(percentage=0.01) + + end_time = time.time() + logger.info(f"\033[32m[遗忘记忆耗时: {end_time - start_time:.2f} 秒]\033[0m") + + if test_pare['do_merge_memory']: + logger.info("开始合并记忆...") + await hippocampus.operation_merge_memory(percentage=0.1) + + end_time = time.time() + logger.info(f"\033[32m[合并记忆耗时: {end_time - start_time:.2f} 秒]\033[0m") + + if test_pare['do_visualize_graph']: + # 展示优化后的图形 + logger.info("生成记忆图谱可视化...") + print("\n生成优化后的记忆图谱:") + visualize_graph_lite(memory_graph) + + if test_pare['do_query']: + # 交互式查询 + while True: + query = input("\n请输入新的查询概念(输入'退出'以结束):") + if query.lower() == '退出': + break + + items_list = memory_graph.get_related_item(query) + if items_list: + first_layer, second_layer = items_list + if first_layer: + print("\n直接相关的记忆:") + for item in first_layer: + print(f"- {item}") + if second_layer: + print("\n间接相关的记忆:") + for item in second_layer: + print(f"- {item}") + else: + print("未找到相关记忆。") + + +if __name__ == "__main__": + import asyncio + asyncio.run(main()) + + diff --git a/src/plugins/models/utils_model.py b/src/plugins/models/utils_model.py index 56ed80693..c6ed6b619 100644 --- a/src/plugins/models/utils_model.py +++ b/src/plugins/models/utils_model.py @@ -24,14 +24,15 @@ class LLM_request: self.api_key = getattr(config, model["key"]) self.base_url = getattr(config, model["base_url"]) except AttributeError as e: + logger.error(f"原始 model dict 信息:{model}") logger.error(f"配置错误:找不到对应的配置项 - {str(e)}") raise ValueError(f"配置错误:找不到对应的配置项 - {str(e)}") from e self.model_name = model["name"] self.params = kwargs - + self.pri_in = model.get("pri_in", 0) self.pri_out = model.get("pri_out", 0) - + # 获取数据库实例 self.db = Database.get_instance() self._init_database() @@ -44,12 +45,12 @@ class LLM_request: self.db.db.llm_usage.create_index([("model_name", 1)]) self.db.db.llm_usage.create_index([("user_id", 1)]) self.db.db.llm_usage.create_index([("request_type", 1)]) - except Exception as e: - logger.error(f"创建数据库索引失败: {e}") + except Exception: + logger.error("创建数据库索引失败") - def _record_usage(self, prompt_tokens: int, completion_tokens: int, total_tokens: int, - user_id: str = "system", request_type: str = "chat", - endpoint: str = "/chat/completions"): + def _record_usage(self, prompt_tokens: int, completion_tokens: int, total_tokens: int, + user_id: str = "system", request_type: str = "chat", + endpoint: str = "/chat/completions"): """记录模型使用情况到数据库 Args: prompt_tokens: 输入token数 @@ -79,8 +80,8 @@ class LLM_request: f"提示词: {prompt_tokens}, 完成: {completion_tokens}, " f"总计: {total_tokens}" ) - except Exception as e: - logger.error(f"记录token使用情况失败: {e}") + except Exception: + logger.error("记录token使用情况失败") def _calculate_cost(self, prompt_tokens: int, completion_tokens: int) -> float: """计算API调用成本 @@ -140,12 +141,12 @@ class LLM_request: } api_url = f"{self.base_url.rstrip('/')}/{endpoint.lstrip('/')}" - #判断是否为流式 + # 判断是否为流式 stream_mode = self.params.get("stream", False) if self.params.get("stream", False) is True: - logger.info(f"进入流式输出模式,发送请求到URL: {api_url}") + logger.debug(f"进入流式输出模式,发送请求到URL: {api_url}") else: - logger.info(f"发送请求到URL: {api_url}") + logger.debug(f"发送请求到URL: {api_url}") logger.info(f"使用模型: {self.model_name}") # 构建请求体 @@ -158,7 +159,7 @@ class LLM_request: try: # 使用上下文管理器处理会话 headers = await self._build_headers() - #似乎是openai流式必须要的东西,不过阿里云的qwq-plus加了这个没有影响 + # 似乎是openai流式必须要的东西,不过阿里云的qwq-plus加了这个没有影响 if stream_mode: headers["Accept"] = "text/event-stream" @@ -182,11 +183,33 @@ class LLM_request: continue elif response.status in policy["abort_codes"]: logger.error(f"错误码: {response.status} - {error_code_mapping.get(response.status)}") + if response.status == 403: + # 尝试降级Pro模型 + if self.model_name.startswith( + "Pro/") and self.base_url == "https://api.siliconflow.cn/v1/": + old_model_name = self.model_name + self.model_name = self.model_name[4:] # 移除"Pro/"前缀 + logger.warning(f"检测到403错误,模型从 {old_model_name} 降级为 {self.model_name}") + + # 对全局配置进行更新 + if hasattr(global_config, 'llm_normal') and global_config.llm_normal.get( + 'name') == old_model_name: + global_config.llm_normal['name'] = self.model_name + logger.warning("已将全局配置中的 llm_normal 模型降级") + + # 更新payload中的模型名 + if payload and 'model' in payload: + payload['model'] = self.model_name + + # 重新尝试请求 + retry -= 1 # 不计入重试次数 + continue + raise RuntimeError(f"请求被拒绝: {error_code_mapping.get(response.status)}") - + response.raise_for_status() - - #将流式输出转化为非流式输出 + + # 将流式输出转化为非流式输出 if stream_mode: accumulated_content = "" async for line_bytes in response.content: @@ -204,8 +227,8 @@ class LLM_request: if delta_content is None: delta_content = "" accumulated_content += delta_content - except Exception as e: - logger.error(f"解析流式输出错误: {e}") + except Exception: + logger.exception("解析流式输出错") content = accumulated_content reasoning_content = "" think_match = re.search(r'(.*?)', content, re.DOTALL) @@ -213,12 +236,15 @@ class LLM_request: reasoning_content = think_match.group(1).strip() content = re.sub(r'.*?', '', content, flags=re.DOTALL).strip() # 构造一个伪result以便调用自定义响应处理器或默认处理器 - result = {"choices": [{"message": {"content": content, "reasoning_content": reasoning_content}}]} - return response_handler(result) if response_handler else self._default_response_handler(result, user_id, request_type, endpoint) + result = { + "choices": [{"message": {"content": content, "reasoning_content": reasoning_content}}]} + return response_handler(result) if response_handler else self._default_response_handler( + result, user_id, request_type, endpoint) else: result = await response.json() # 使用自定义处理器或默认处理 - return response_handler(result) if response_handler else self._default_response_handler(result, user_id, request_type, endpoint) + return response_handler(result) if response_handler else self._default_response_handler( + result, user_id, request_type, endpoint) except Exception as e: if retry < policy["max_retries"] - 1: @@ -232,8 +258,8 @@ class LLM_request: logger.error("达到最大重试次数,请求仍然失败") raise RuntimeError("达到最大重试次数,API请求仍然失败") - - async def _transform_parameters(self, params: dict) ->dict: + + async def _transform_parameters(self, params: dict) -> dict: """ 根据模型名称转换参数: - 对于需要转换的OpenAI CoT系列模型(例如 "o3-mini"),删除 'temprature' 参数, @@ -242,7 +268,8 @@ class LLM_request: # 复制一份参数,避免直接修改原始数据 new_params = dict(params) # 定义需要转换的模型列表 - models_needing_transformation = ["o3-mini", "o1-mini", "o1-preview", "o1-2024-12-17", "o1-preview-2024-09-12", "o3-mini-2025-01-31", "o1-mini-2024-09-12"] + models_needing_transformation = ["o3-mini", "o1-mini", "o1-preview", "o1-2024-12-17", "o1-preview-2024-09-12", + "o3-mini-2025-01-31", "o1-mini-2024-09-12"] if self.model_name.lower() in models_needing_transformation: # 删除 'temprature' 参数(如果存在) new_params.pop("temperature", None) @@ -278,13 +305,13 @@ class LLM_request: **params_copy } # 如果 payload 中依然存在 max_tokens 且需要转换,在这里进行再次检查 - if self.model_name.lower() in ["o3-mini", "o1-mini", "o1-preview", "o1-2024-12-17", "o1-preview-2024-09-12", "o3-mini-2025-01-31", "o1-mini-2024-09-12"] and "max_tokens" in payload: + if self.model_name.lower() in ["o3-mini", "o1-mini", "o1-preview", "o1-2024-12-17", "o1-preview-2024-09-12", + "o3-mini-2025-01-31", "o1-mini-2024-09-12"] and "max_tokens" in payload: payload["max_completion_tokens"] = payload.pop("max_tokens") return payload - - def _default_response_handler(self, result: dict, user_id: str = "system", - request_type: str = "chat", endpoint: str = "/chat/completions") -> Tuple: + def _default_response_handler(self, result: dict, user_id: str = "system", + request_type: str = "chat", endpoint: str = "/chat/completions") -> Tuple: """默认响应解析""" if "choices" in result and result["choices"]: message = result["choices"][0]["message"] @@ -329,15 +356,15 @@ class LLM_request: """构建请求头""" if no_key: return { - "Authorization": f"Bearer **********", + "Authorization": "Bearer **********", "Content-Type": "application/json" } else: return { "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json" - } - # 防止小朋友们截图自己的key + } + # 防止小朋友们截图自己的key async def generate_response(self, prompt: str) -> Tuple[str, str]: """根据输入的提示生成模型的异步响应""" @@ -384,6 +411,7 @@ class LLM_request: Returns: list: embedding向量,如果失败则返回None """ + def embedding_handler(result): """处理响应""" if "data" in result and len(result["data"]) > 0: diff --git a/src/plugins/moods/moods.py b/src/plugins/moods/moods.py index 32b900b0b..c37bfc81d 100644 --- a/src/plugins/moods/moods.py +++ b/src/plugins/moods/moods.py @@ -4,7 +4,7 @@ import time from dataclasses import dataclass from ..chat.config import global_config - +from loguru import logger @dataclass class MoodState: @@ -51,11 +51,11 @@ class MoodManager: # 情绪词映射表 (valence, arousal) self.emotion_map = { 'happy': (0.8, 0.6), # 高愉悦度,中等唤醒度 - 'angry': (-0.7, 0.8), # 负愉悦度,高唤醒度 + 'angry': (-0.7, 0.7), # 负愉悦度,高唤醒度 'sad': (-0.6, 0.3), # 负愉悦度,低唤醒度 - 'surprised': (0.4, 0.9), # 中等愉悦度,高唤醒度 + 'surprised': (0.4, 0.8), # 中等愉悦度,高唤醒度 'disgusted': (-0.8, 0.5), # 高负愉悦度,中等唤醒度 - 'fearful': (-0.7, 0.7), # 负愉悦度,高唤醒度 + 'fearful': (-0.7, 0.6), # 负愉悦度,高唤醒度 'neutral': (0.0, 0.5), # 中性愉悦度,中等唤醒度 } @@ -64,15 +64,20 @@ class MoodManager: # 第一象限:高唤醒,正愉悦 (0.5, 0.7): "兴奋", (0.3, 0.8): "快乐", + (0.2, 0.65): "满足", # 第二象限:高唤醒,负愉悦 (-0.5, 0.7): "愤怒", (-0.3, 0.8): "焦虑", + (-0.2, 0.65): "烦躁", # 第三象限:低唤醒,负愉悦 (-0.5, 0.3): "悲伤", - (-0.3, 0.2): "疲倦", + (-0.3, 0.35): "疲倦", + (-0.4, 0.15): "疲倦", # 第四象限:低唤醒,正愉悦 - (0.5, 0.3): "放松", - (0.3, 0.2): "平静" + (0.2, 0.45): "平静", + (0.3, 0.4): "安宁", + (0.5, 0.3): "放松" + } @classmethod @@ -119,9 +124,13 @@ class MoodManager: current_time = time.time() time_diff = current_time - self.last_update - # 应用衰减公式 - self.current_mood.valence *= math.pow(1 - self.decay_rate_valence, time_diff) - self.current_mood.arousal *= math.pow(1 - self.decay_rate_arousal, time_diff) + # Valence 向中性(0)回归 + valence_target = 0.0 + self.current_mood.valence = valence_target + (self.current_mood.valence - valence_target) * math.exp(-self.decay_rate_valence * time_diff) + + # Arousal 向中性(0.5)回归 + arousal_target = 0.5 + self.current_mood.arousal = arousal_target + (self.current_mood.arousal - arousal_target) * math.exp(-self.decay_rate_arousal * time_diff) # 确保值在合理范围内 self.current_mood.valence = max(-1.0, min(1.0, self.current_mood.valence)) @@ -201,7 +210,7 @@ class MoodManager: def print_mood_status(self) -> None: """打印当前情绪状态""" - print(f"\033[1;35m[情绪状态]\033[0m 愉悦度: {self.current_mood.valence:.2f}, " + logger.info(f"[情绪状态]愉悦度: {self.current_mood.valence:.2f}, " f"唤醒度: {self.current_mood.arousal:.2f}, " f"心情: {self.current_mood.text}") diff --git a/src/plugins/schedule/schedule_generator.py b/src/plugins/schedule/schedule_generator.py index 8a036152c..e280c6bce 100644 --- a/src/plugins/schedule/schedule_generator.py +++ b/src/plugins/schedule/schedule_generator.py @@ -13,21 +13,21 @@ from ..models.utils_model import LLM_request driver = get_driver() config = driver.config - Database.initialize( - host= config.MONGODB_HOST, - port= int(config.MONGODB_PORT), - db_name= config.DATABASE_NAME, - username= config.MONGODB_USERNAME, - password= config.MONGODB_PASSWORD, - auth_source=config.MONGODB_AUTH_SOURCE - ) + host=config.MONGODB_HOST, + port=int(config.MONGODB_PORT), + db_name=config.DATABASE_NAME, + username=config.MONGODB_USERNAME, + password=config.MONGODB_PASSWORD, + auth_source=config.MONGODB_AUTH_SOURCE +) + class ScheduleGenerator: def __init__(self): - #根据global_config.llm_normal这一字典配置指定模型 + # 根据global_config.llm_normal这一字典配置指定模型 # self.llm_scheduler = LLMModel(model = global_config.llm_normal,temperature=0.9) - self.llm_scheduler = LLM_request(model = global_config.llm_normal,temperature=0.9) + self.llm_scheduler = LLM_request(model=global_config.llm_normal, temperature=0.9) self.db = Database.get_instance() self.today_schedule_text = "" self.today_schedule = {} @@ -35,39 +35,41 @@ class ScheduleGenerator: self.tomorrow_schedule = {} self.yesterday_schedule_text = "" self.yesterday_schedule = {} - + async def initialize(self): today = datetime.datetime.now() tomorrow = datetime.datetime.now() + datetime.timedelta(days=1) yesterday = datetime.datetime.now() - datetime.timedelta(days=1) - + self.today_schedule_text, self.today_schedule = await self.generate_daily_schedule(target_date=today) - self.tomorrow_schedule_text, self.tomorrow_schedule = await self.generate_daily_schedule(target_date=tomorrow,read_only=True) - self.yesterday_schedule_text, self.yesterday_schedule = await self.generate_daily_schedule(target_date=yesterday,read_only=True) - - async def generate_daily_schedule(self, target_date: datetime.datetime = None,read_only:bool = False) -> Dict[str, str]: - + self.tomorrow_schedule_text, self.tomorrow_schedule = await self.generate_daily_schedule(target_date=tomorrow, + read_only=True) + self.yesterday_schedule_text, self.yesterday_schedule = await self.generate_daily_schedule( + target_date=yesterday, read_only=True) + + async def generate_daily_schedule(self, target_date: datetime.datetime = None, read_only: bool = False) -> Dict[ + str, str]: + date_str = target_date.strftime("%Y-%m-%d") weekday = target_date.strftime("%A") - schedule_text = str - + existing_schedule = self.db.db.schedule.find_one({"date": date_str}) if existing_schedule: - print(f"{date_str}的日程已存在:") + logger.debug(f"{date_str}的日程已存在:") schedule_text = existing_schedule["schedule"] # print(self.schedule_text) - elif read_only == False: - print(f"{date_str}的日程不存在,准备生成新的日程。") - prompt = f"""我是{global_config.BOT_NICKNAME},{global_config.PROMPT_SCHEDULE_GEN},请为我生成{date_str}({weekday})的日程安排,包括:"""+\ - """ + elif not read_only: + logger.debug(f"{date_str}的日程不存在,准备生成新的日程。") + prompt = f"""我是{global_config.BOT_NICKNAME},{global_config.PROMPT_SCHEDULE_GEN},请为我生成{date_str}({weekday})的日程安排,包括:""" + \ + """ 1. 早上的学习和工作安排 2. 下午的活动和任务 3. 晚上的计划和休息时间 - 请按照时间顺序列出具体时间点和对应的活动,用一个时间点而不是时间段来表示时间,用JSON格式返回日程表,仅返回内容,不要返回注释,时间采用24小时制,格式为{"时间": "活动","时间": "活动",...}。""" - + 请按照时间顺序列出具体时间点和对应的活动,用一个时间点而不是时间段来表示时间,用JSON格式返回日程表,仅返回内容,不要返回注释,不要添加任何markdown或代码块样式,时间采用24小时制,格式为{"时间": "活动","时间": "活动",...}。""" + try: schedule_text, _ = await self.llm_scheduler.generate_response(prompt) self.db.db.schedule.insert_one({"date": date_str, "schedule": schedule_text}) @@ -76,36 +78,35 @@ class ScheduleGenerator: schedule_text = "生成日程时出错了" # print(self.schedule_text) else: - print(f"{date_str}的日程不存在。") + logger.debug(f"{date_str}的日程不存在。") schedule_text = "忘了" - return schedule_text,None - + return schedule_text, None + schedule_form = self._parse_schedule(schedule_text) - return schedule_text,schedule_form - + return schedule_text, schedule_form + def _parse_schedule(self, schedule_text: str) -> Union[bool, Dict[str, str]]: """解析日程文本,转换为时间和活动的字典""" - try: + try: schedule_dict = json.loads(schedule_text) return schedule_dict - except json.JSONDecodeError as e: - print(schedule_text) - print(f"解析日程失败: {str(e)}") + except json.JSONDecodeError: + logger.exception("解析日程失败: {}".format(schedule_text)) return False - + def _parse_time(self, time_str: str) -> str: """解析时间字符串,转换为时间""" return datetime.datetime.strptime(time_str, "%H:%M") - + def get_current_task(self) -> str: """获取当前时间应该进行的任务""" current_time = datetime.datetime.now().strftime("%H:%M") - + # 找到最接近当前时间的任务 closest_time = None min_diff = float('inf') - + # 检查今天的日程 if not self.today_schedule: return "摸鱼" @@ -114,7 +115,7 @@ class ScheduleGenerator: if closest_time is None or diff < min_diff: closest_time = time_str min_diff = diff - + # 检查昨天的日程中的晚间任务 if self.yesterday_schedule: for time_str in self.yesterday_schedule.keys(): @@ -125,17 +126,17 @@ class ScheduleGenerator: closest_time = time_str min_diff = diff return closest_time, self.yesterday_schedule[closest_time] - + if closest_time: return closest_time, self.today_schedule[closest_time] return "摸鱼" - + def _time_diff(self, time1: str, time2: str) -> int: """计算两个时间字符串之间的分钟差""" - if time1=="24:00": - time1="23:59" - if time2=="24:00": - time2="23:59" + if time1 == "24:00": + time1 = "23:59" + if time2 == "24:00": + time2 = "23:59" t1 = datetime.datetime.strptime(time1, "%H:%M") t2 = datetime.datetime.strptime(time2, "%H:%M") diff = int((t2 - t1).total_seconds() / 60) @@ -146,17 +147,18 @@ class ScheduleGenerator: diff -= 1440 # 减一天的分钟 # print(f"时间1[{time1}]: 时间2[{time2}],差值[{diff}]分钟") return diff - + def print_schedule(self): """打印完整的日程安排""" if not self._parse_schedule(self.today_schedule_text): - print("今日日程有误,将在下次运行时重新生成") + logger.warning("今日日程有误,将在下次运行时重新生成") self.db.db.schedule.delete_one({"date": datetime.datetime.now().strftime("%Y-%m-%d")}) else: - print("\n=== 今日日程安排 ===") + logger.info("=== 今日日程安排 ===") for time_str, activity in self.today_schedule.items(): - print(f"时间[{time_str}]: 活动[{activity}]") - print("==================\n") + logger.info(f"时间[{time_str}]: 活动[{activity}]") + logger.info("==================") + # def main(): # # 使用示例 @@ -165,7 +167,7 @@ class ScheduleGenerator: # scheduler.print_schedule() # print("\n当前任务:") # print(scheduler.get_current_task()) - + # print("昨天日程:") # print(scheduler.yesterday_schedule) # print("今天日程:") @@ -175,5 +177,5 @@ class ScheduleGenerator: # if __name__ == "__main__": # main() - + bot_schedule = ScheduleGenerator() diff --git a/src/plugins/utils/statistic.py b/src/plugins/utils/statistic.py index d7248e869..2974389e6 100644 --- a/src/plugins/utils/statistic.py +++ b/src/plugins/utils/statistic.py @@ -3,6 +3,7 @@ import time from collections import defaultdict from datetime import datetime, timedelta from typing import Any, Dict +from loguru import logger from ...common.database import Database @@ -153,8 +154,8 @@ class LLMStatistics: try: all_stats = self._collect_all_statistics() self._save_statistics(all_stats) - except Exception as e: - print(f"\033[1;31m[错误]\033[0m 统计数据处理失败: {e}") + except Exception: + logger.exception("统计数据处理失败") # 等待1分钟 for _ in range(60): diff --git a/src/plugins/utils/typo_generator.py b/src/plugins/utils/typo_generator.py index c743ec6ec..aa72c387f 100644 --- a/src/plugins/utils/typo_generator.py +++ b/src/plugins/utils/typo_generator.py @@ -284,10 +284,13 @@ class ChineseTypoGenerator: 返回: typo_sentence: 包含错别字的句子 - typo_info: 错别字信息列表 + correction_suggestion: 随机选择的一个纠正建议,返回正确的字/词 """ result = [] typo_info = [] + word_typos = [] # 记录词语错误对(错词,正确词) + char_typos = [] # 记录单字错误对(错字,正确字) + current_pos = 0 # 分词 words = self._segment_sentence(sentence) @@ -296,6 +299,7 @@ class ChineseTypoGenerator: # 如果是标点符号或空格,直接添加 if all(not self._is_chinese_char(c) for c in word): result.append(word) + current_pos += len(word) continue # 获取词语的拼音 @@ -316,6 +320,8 @@ class ChineseTypoGenerator: ' '.join(word_pinyin), ' '.join(self._get_word_pinyin(typo_word)), orig_freq, typo_freq)) + word_typos.append((typo_word, word)) # 记录(错词,正确词)对 + current_pos += len(typo_word) continue # 如果不进行整词替换,则进行单字替换 @@ -333,11 +339,15 @@ class ChineseTypoGenerator: result.append(typo_char) typo_py = pinyin(typo_char, style=Style.TONE3)[0][0] typo_info.append((char, typo_char, py, typo_py, orig_freq, typo_freq)) + char_typos.append((typo_char, char)) # 记录(错字,正确字)对 + current_pos += 1 continue result.append(char) + current_pos += 1 else: # 处理多字词的单字替换 word_result = [] + word_start_pos = current_pos for i, (char, py) in enumerate(zip(word, word_pinyin)): # 词中的字替换概率降低 word_error_rate = self.error_rate * (0.7 ** (len(word) - 1)) @@ -353,11 +363,24 @@ class ChineseTypoGenerator: word_result.append(typo_char) typo_py = pinyin(typo_char, style=Style.TONE3)[0][0] typo_info.append((char, typo_char, py, typo_py, orig_freq, typo_freq)) + char_typos.append((typo_char, char)) # 记录(错字,正确字)对 continue word_result.append(char) result.append(''.join(word_result)) + current_pos += len(word) - return ''.join(result), typo_info + # 优先从词语错误中选择,如果没有则从单字错误中选择 + correction_suggestion = None + # 50%概率返回纠正建议 + if random.random() < 0.5: + if word_typos: + wrong_word, correct_word = random.choice(word_typos) + correction_suggestion = correct_word + elif char_typos: + wrong_char, correct_char = random.choice(char_typos) + correction_suggestion = correct_char + + return ''.join(result), correction_suggestion def format_typo_info(self, typo_info): """ @@ -419,16 +442,16 @@ def main(): # 创建包含错别字的句子 start_time = time.time() - typo_sentence, typo_info = typo_generator.create_typo_sentence(sentence) + typo_sentence, correction_suggestion = typo_generator.create_typo_sentence(sentence) # 打印结果 print("\n原句:", sentence) print("错字版:", typo_sentence) - # 打印错别字信息 - if typo_info: - print("\n错别字信息:") - print(typo_generator.format_typo_info(typo_info)) + # 打印纠正建议 + if correction_suggestion: + print("\n随机纠正建议:") + print(f"应该改为:{correction_suggestion}") # 计算并打印总耗时 end_time = time.time() diff --git a/src/test/typo.py b/src/test/typo.py index 16834200f..1378eae7d 100644 --- a/src/test/typo.py +++ b/src/test/typo.py @@ -11,12 +11,14 @@ from pathlib import Path import random import math import time +from loguru import logger + class ChineseTypoGenerator: - def __init__(self, - error_rate=0.3, - min_freq=5, - tone_error_rate=0.2, + def __init__(self, + error_rate=0.3, + min_freq=5, + tone_error_rate=0.2, word_replace_rate=0.3, max_freq_diff=200): """ @@ -34,27 +36,27 @@ class ChineseTypoGenerator: self.tone_error_rate = tone_error_rate self.word_replace_rate = word_replace_rate self.max_freq_diff = max_freq_diff - + # 加载数据 - print("正在加载汉字数据库,请稍候...") + logger.debug("正在加载汉字数据库,请稍候...") self.pinyin_dict = self._create_pinyin_dict() self.char_frequency = self._load_or_create_char_frequency() - + def _load_or_create_char_frequency(self): """ 加载或创建汉字频率字典 """ cache_file = Path("char_frequency.json") - + # 如果缓存文件存在,直接加载 if cache_file.exists(): with open(cache_file, 'r', encoding='utf-8') as f: return json.load(f) - + # 使用内置的词频文件 char_freq = defaultdict(int) dict_path = os.path.join(os.path.dirname(jieba.__file__), 'dict.txt') - + # 读取jieba的词典文件 with open(dict_path, 'r', encoding='utf-8') as f: for line in f: @@ -63,15 +65,15 @@ class ChineseTypoGenerator: for char in word: if self._is_chinese_char(char): char_freq[char] += int(freq) - + # 归一化频率值 max_freq = max(char_freq.values()) - normalized_freq = {char: freq/max_freq * 1000 for char, freq in char_freq.items()} - + normalized_freq = {char: freq / max_freq * 1000 for char, freq in char_freq.items()} + # 保存到缓存文件 with open(cache_file, 'w', encoding='utf-8') as f: json.dump(normalized_freq, f, ensure_ascii=False, indent=2) - + return normalized_freq def _create_pinyin_dict(self): @@ -81,7 +83,7 @@ class ChineseTypoGenerator: # 常用汉字范围 chars = [chr(i) for i in range(0x4e00, 0x9fff)] pinyin_dict = defaultdict(list) - + # 为每个汉字建立拼音映射 for char in chars: try: @@ -89,7 +91,7 @@ class ChineseTypoGenerator: pinyin_dict[py].append(char) except Exception: continue - + return pinyin_dict def _is_chinese_char(self, char): @@ -107,7 +109,7 @@ class ChineseTypoGenerator: """ # 将句子拆分成单个字符 characters = list(sentence) - + # 获取每个字符的拼音 result = [] for char in characters: @@ -117,7 +119,7 @@ class ChineseTypoGenerator: # 获取拼音(数字声调) py = pinyin(char, style=Style.TONE3)[0][0] result.append((char, py)) - + return result def _get_similar_tone_pinyin(self, py): @@ -127,19 +129,19 @@ class ChineseTypoGenerator: # 检查拼音是否为空或无效 if not py or len(py) < 1: return py - + # 如果最后一个字符不是数字,说明可能是轻声或其他特殊情况 if not py[-1].isdigit(): # 为非数字结尾的拼音添加数字声调1 return py + '1' - + base = py[:-1] # 去掉声调 tone = int(py[-1]) # 获取声调 - + # 处理轻声(通常用5表示)或无效声调 if tone not in [1, 2, 3, 4]: return base + str(random.choice([1, 2, 3, 4])) - + # 正常处理声调 possible_tones = [1, 2, 3, 4] possible_tones.remove(tone) # 移除原声调 @@ -152,11 +154,11 @@ class ChineseTypoGenerator: """ if target_freq > orig_freq: return 1.0 # 如果替换字频率更高,保持原有概率 - + freq_diff = orig_freq - target_freq if freq_diff > self.max_freq_diff: return 0.0 # 频率差太大,不替换 - + # 使用指数衰减函数计算概率 # 频率差为0时概率为1,频率差为max_freq_diff时概率接近0 return math.exp(-3 * freq_diff / self.max_freq_diff) @@ -166,42 +168,42 @@ class ChineseTypoGenerator: 获取与给定字频率相近的同音字,可能包含声调错误 """ homophones = [] - + # 有一定概率使用错误声调 if random.random() < self.tone_error_rate: wrong_tone_py = self._get_similar_tone_pinyin(py) homophones.extend(self.pinyin_dict[wrong_tone_py]) - + # 添加正确声调的同音字 homophones.extend(self.pinyin_dict[py]) - + if not homophones: return None - + # 获取原字的频率 orig_freq = self.char_frequency.get(char, 0) - + # 计算所有同音字与原字的频率差,并过滤掉低频字 - freq_diff = [(h, self.char_frequency.get(h, 0)) - for h in homophones - if h != char and self.char_frequency.get(h, 0) >= self.min_freq] - + freq_diff = [(h, self.char_frequency.get(h, 0)) + for h in homophones + if h != char and self.char_frequency.get(h, 0) >= self.min_freq] + if not freq_diff: return None - + # 计算每个候选字的替换概率 candidates_with_prob = [] for h, freq in freq_diff: prob = self._calculate_replacement_probability(orig_freq, freq) if prob > 0: # 只保留有效概率的候选字 candidates_with_prob.append((h, prob)) - + if not candidates_with_prob: return None - + # 根据概率排序 candidates_with_prob.sort(key=lambda x: x[1], reverse=True) - + # 返回概率最高的几个字 return [char for char, _ in candidates_with_prob[:num_candidates]] @@ -223,10 +225,10 @@ class ChineseTypoGenerator: """ if len(word) == 1: return [] - + # 获取词的拼音 word_pinyin = self._get_word_pinyin(word) - + # 遍历所有可能的同音字组合 candidates = [] for py in word_pinyin: @@ -234,11 +236,11 @@ class ChineseTypoGenerator: if not chars: return [] candidates.append(chars) - + # 生成所有可能的组合 import itertools all_combinations = itertools.product(*candidates) - + # 获取jieba词典和词频信息 dict_path = os.path.join(os.path.dirname(jieba.__file__), 'dict.txt') valid_words = {} # 改用字典存储词语及其频率 @@ -249,11 +251,11 @@ class ChineseTypoGenerator: word_text = parts[0] word_freq = float(parts[1]) # 获取词频 valid_words[word_text] = word_freq - + # 获取原词的词频作为参考 original_word_freq = valid_words.get(word, 0) min_word_freq = original_word_freq * 0.1 # 设置最小词频为原词频的10% - + # 过滤和计算频率 homophones = [] for combo in all_combinations: @@ -268,7 +270,7 @@ class ChineseTypoGenerator: combined_score = (new_word_freq * 0.7 + char_avg_freq * 0.3) if combined_score >= self.min_freq: homophones.append((new_word, combined_score)) - + # 按综合分数排序并限制返回数量 sorted_homophones = sorted(homophones, key=lambda x: x[1], reverse=True) return [word for word, _ in sorted_homophones[:5]] # 限制返回前5个结果 @@ -286,19 +288,19 @@ class ChineseTypoGenerator: """ result = [] typo_info = [] - + # 分词 words = self._segment_sentence(sentence) - + for word in words: # 如果是标点符号或空格,直接添加 if all(not self._is_chinese_char(c) for c in word): result.append(word) continue - + # 获取词语的拼音 word_pinyin = self._get_word_pinyin(word) - + # 尝试整词替换 if len(word) > 1 and random.random() < self.word_replace_rate: word_homophones = self._get_word_homophones(word) @@ -307,15 +309,15 @@ class ChineseTypoGenerator: # 计算词的平均频率 orig_freq = sum(self.char_frequency.get(c, 0) for c in word) / len(word) typo_freq = sum(self.char_frequency.get(c, 0) for c in typo_word) / len(typo_word) - + # 添加到结果中 result.append(typo_word) - typo_info.append((word, typo_word, - ' '.join(word_pinyin), - ' '.join(self._get_word_pinyin(typo_word)), - orig_freq, typo_freq)) + typo_info.append((word, typo_word, + ' '.join(word_pinyin), + ' '.join(self._get_word_pinyin(typo_word)), + orig_freq, typo_freq)) continue - + # 如果不进行整词替换,则进行单字替换 if len(word) == 1: char = word @@ -339,7 +341,7 @@ class ChineseTypoGenerator: for i, (char, py) in enumerate(zip(word, word_pinyin)): # 词中的字替换概率降低 word_error_rate = self.error_rate * (0.7 ** (len(word) - 1)) - + if random.random() < word_error_rate: similar_chars = self._get_similar_frequency_chars(char, py) if similar_chars: @@ -354,7 +356,7 @@ class ChineseTypoGenerator: continue word_result.append(char) result.append(''.join(word_result)) - + return ''.join(result), typo_info def format_typo_info(self, typo_info): @@ -369,7 +371,7 @@ class ChineseTypoGenerator: """ if not typo_info: return "未生成错别字" - + result = [] for orig, typo, orig_py, typo_py, orig_freq, typo_freq in typo_info: # 判断是否为词语替换 @@ -379,12 +381,12 @@ class ChineseTypoGenerator: else: tone_error = orig_py[:-1] == typo_py[:-1] and orig_py[-1] != typo_py[-1] error_type = "声调错误" if tone_error else "同音字替换" - + result.append(f"原文:{orig}({orig_py}) [频率:{orig_freq:.2f}] -> " - f"替换:{typo}({typo_py}) [频率:{typo_freq:.2f}] [{error_type}]") - + f"替换:{typo}({typo_py}) [频率:{typo_freq:.2f}] [{error_type}]") + return "\n".join(result) - + def set_params(self, **kwargs): """ 设置参数 @@ -399,9 +401,10 @@ class ChineseTypoGenerator: for key, value in kwargs.items(): if hasattr(self, key): setattr(self, key, value) - print(f"参数 {key} 已设置为 {value}") + logger.debug(f"参数 {key} 已设置为 {value}") else: - print(f"警告: 参数 {key} 不存在") + logger.warning(f"警告: 参数 {key} 不存在") + def main(): # 创建错别字生成器实例 @@ -411,27 +414,27 @@ def main(): tone_error_rate=0.02, word_replace_rate=0.3 ) - + # 获取用户输入 sentence = input("请输入中文句子:") - + # 创建包含错别字的句子 start_time = time.time() typo_sentence, typo_info = typo_generator.create_typo_sentence(sentence) - + # 打印结果 - print("\n原句:", sentence) - print("错字版:", typo_sentence) - + logger.debug("原句:", sentence) + logger.debug("错字版:", typo_sentence) + # 打印错别字信息 if typo_info: - print("\n错别字信息:") - print(typo_generator.format_typo_info(typo_info)) - + logger.debug(f"错别字信息:{typo_generator.format_typo_info(typo_info)})") + # 计算并打印总耗时 end_time = time.time() total_time = end_time - start_time - print(f"\n总耗时:{total_time:.2f}秒") + logger.debug(f"总耗时:{total_time:.2f}秒") + if __name__ == "__main__": main() diff --git a/template/auto_format.py b/template/auto_format.py deleted file mode 100644 index d99e29e34..000000000 --- a/template/auto_format.py +++ /dev/null @@ -1,48 +0,0 @@ -import os -import sys -from pathlib import Path - -import tomli -import tomli_w - - -def sync_configs(): - # 读取两个配置文件 - try: - with open('bot_config_dev.toml', 'rb') as f: # tomli需要使用二进制模式读取 - dev_config = tomli.load(f) - - with open('bot_config.toml', 'rb') as f: - prod_config = tomli.load(f) - except FileNotFoundError as e: - print(f"错误:找不到配置文件 - {e}") - sys.exit(1) - except tomli.TOMLDecodeError as e: - print(f"错误:TOML格式解析失败 - {e}") - sys.exit(1) - - # 递归合并配置 - def merge_configs(source, target): - for key, value in source.items(): - if key not in target: - target[key] = value - elif isinstance(value, dict) and isinstance(target[key], dict): - merge_configs(value, target[key]) - - # 将dev配置的新属性合并到prod配置中 - merge_configs(dev_config, prod_config) - - # 保存更新后的配置 - try: - with open('bot_config.toml', 'wb') as f: # tomli_w需要使用二进制模式写入 - tomli_w.dump(prod_config, f) - print("配置文件同步完成!") - except Exception as e: - print(f"错误:保存配置文件失败 - {e}") - sys.exit(1) - -if __name__ == '__main__': - # 确保在正确的目录下运行 - script_dir = Path(__file__).parent - os.chdir(script_dir) - sync_configs() diff --git a/template/bot_config_template.toml b/template/bot_config_template.toml index 6584bc4d7..126fc501d 100644 --- a/template/bot_config_template.toml +++ b/template/bot_config_template.toml @@ -1,6 +1,21 @@ +[inner] +version = "0.0.6" + +#如果你想要修改配置文件,请在修改后将version的值进行变更 +#如果新增项目,请在BotConfig类下新增相应的变量 +#1.如果你修改的是[]层级项目,例如你新增了 [memory],那么请在config.py的 load_config函数中的include_configs字典中新增"内容":{ +#"func":memory, +#"support":">=0.0.0", #新的版本号 +#"necessary":False #是否必须 +#} +#2.如果你修改的是[]下的项目,例如你新增了[memory]下的 memory_ban_words ,那么请在config.py的 load_config函数中的 memory函数下新增版本判断: + # if config.INNER_VERSION in SpecifierSet(">=0.0.2"): + # config.memory_ban_words = set(memory_config.get("memory_ban_words", [])) + [bot] qq = 123 nickname = "麦麦" +alias_names = ["小麦", "阿麦"] [personality] prompt_personality = [ @@ -26,6 +41,13 @@ ban_words = [ # "403","张三" ] +ban_msgs_regex = [ + # 需要过滤的消息(原始消息)匹配的正则表达式,匹配到的消息将被过滤(支持CQ码),若不了解正则表达式请勿修改 + #"https?://[^\\s]+", # 匹配https链接 + #"\\d{4}-\\d{2}-\\d{2}", # 匹配日期 + # "\\[CQ:at,qq=\\d+\\]" # 匹配@ +] + [emoji] check_interval = 120 # 检查表情包的时间间隔 register_interval = 10 # 注册表情包的时间间隔 @@ -46,11 +68,35 @@ max_response_length = 1024 # 麦麦回答的最大token数 build_memory_interval = 300 # 记忆构建间隔 单位秒 forget_memory_interval = 300 # 记忆遗忘间隔 单位秒 +memory_ban_words = [ #不希望记忆的词 + # "403","张三" +] + [mood] mood_update_interval = 1.0 # 情绪更新间隔 单位秒 mood_decay_rate = 0.95 # 情绪衰减率 mood_intensity_factor = 1.0 # 情绪强度因子 +[keywords_reaction] # 针对某个关键词作出反应 +enable = true # 关键词反应功能的总开关 + +[[keywords_reaction.rules]] # 如果想要新增多个关键词,直接复制本条,修改keywords和reaction即可 +enable = true # 是否启用此条(为了人类在未来AI战争能更好地识别AI(bushi),默认开启) +keywords = ["人机", "bot", "机器", "入机", "robot", "机器人"] # 会触发反应的关键词 +reaction = "有人说你是人机或者机器人,否定这一事实,攻击或反问他。注意,只有认为你是机器人才回复,否则不要否认" # 触发之后添加的提示词 + +[[keywords_reaction.rules]] # 就像这样复制 +enable = false # 仅作示例,不会触发 +keywords = ["测试关键词回复","test",""] +reaction = "回答“测试成功”" + +[chinese_typo] +enable = true # 是否启用中文错别字生成器 +error_rate=0.006 # 单字替换概率 +min_freq=7 # 最小字频阈值 +tone_error_rate=0.2 # 声调错误概率 +word_replace_rate=0.006 # 整词替换概率 + [others] enable_advance_output = true # 是否启用高级输出 enable_kuuki_read = true # 是否启用读空气功能 @@ -80,49 +126,42 @@ ban_user_id = [] #禁止回复消息的QQ号 [model.llm_reasoning] #回复模型1 主要回复模型 name = "Pro/deepseek-ai/DeepSeek-R1" -base_url = "SILICONFLOW_BASE_URL" -key = "SILICONFLOW_KEY" +provider = "SILICONFLOW" pri_in = 0 #模型的输入价格(非必填,可以记录消耗) pri_out = 0 #模型的输出价格(非必填,可以记录消耗) + [model.llm_reasoning_minor] #回复模型3 次要回复模型 name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" -base_url = "SILICONFLOW_BASE_URL" -key = "SILICONFLOW_KEY" +provider = "SILICONFLOW" #非推理模型 [model.llm_normal] #V3 回复模型2 次要回复模型 name = "Pro/deepseek-ai/DeepSeek-V3" -base_url = "SILICONFLOW_BASE_URL" -key = "SILICONFLOW_KEY" +provider = "SILICONFLOW" [model.llm_normal_minor] #V2.5 name = "deepseek-ai/DeepSeek-V2.5" -base_url = "SILICONFLOW_BASE_URL" -key = "SILICONFLOW_KEY" +provider = "SILICONFLOW" [model.llm_emotion_judge] #主题判断 0.7/m name = "Qwen/Qwen2.5-14B-Instruct" -base_url = "SILICONFLOW_BASE_URL" -key = "SILICONFLOW_KEY" +provider = "SILICONFLOW" [model.llm_topic_judge] #主题判断:建议使用qwen2.5 7b name = "Pro/Qwen/Qwen2.5-7B-Instruct" -base_url = "SILICONFLOW_BASE_URL" -key = "SILICONFLOW_KEY" +provider = "SILICONFLOW" [model.llm_summary_by_topic] #建议使用qwen2.5 32b 及以上 name = "Qwen/Qwen2.5-32B-Instruct" -base_url = "SILICONFLOW_BASE_URL" -key = "SILICONFLOW_KEY" +provider = "SILICONFLOW" pri_in = 0 pri_out = 0 [model.moderation] #内容审核 未启用 name = "" -base_url = "SILICONFLOW_BASE_URL" -key = "SILICONFLOW_KEY" +provider = "SILICONFLOW" pri_in = 0 pri_out = 0 @@ -130,8 +169,7 @@ pri_out = 0 [model.vlm] #图像识别 0.35/m name = "Pro/Qwen/Qwen2-VL-7B-Instruct" -base_url = "SILICONFLOW_BASE_URL" -key = "SILICONFLOW_KEY" +provider = "SILICONFLOW" @@ -139,5 +177,4 @@ key = "SILICONFLOW_KEY" [model.embedding] #嵌入 name = "BAAI/bge-m3" -base_url = "SILICONFLOW_BASE_URL" -key = "SILICONFLOW_KEY" +provider = "SILICONFLOW"