diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml
index 2a5f497fd..5b09b8cda 100644
--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@@ -5,6 +5,7 @@ on:
branches:
- main
- debug # 新增 debug 分支触发
+ - stable-dev
tags:
- 'v*'
workflow_dispatch:
@@ -34,6 +35,8 @@ jobs:
echo "tags=${{ secrets.DOCKERHUB_USERNAME }}/maimbot:main,${{ secrets.DOCKERHUB_USERNAME }}/maimbot:latest" >> $GITHUB_OUTPUT
elif [ "${{ github.ref }}" == "refs/heads/debug" ]; then
echo "tags=${{ secrets.DOCKERHUB_USERNAME }}/maimbot:debug" >> $GITHUB_OUTPUT
+ elif [ "${{ github.ref }}" == "refs/heads/stable-dev" ]; then
+ echo "tags=${{ secrets.DOCKERHUB_USERNAME }}/maimbot:stable-dev" >> $GITHUB_OUTPUT
fi
- name: Build and Push Docker Image
diff --git a/.gitignore b/.gitignore
index 4e1606a54..e51abc5cc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -193,9 +193,8 @@ cython_debug/
# jieba
jieba.cache
-
-# vscode
-/.vscode
+# .vscode
+!.vscode/settings.json
# direnv
/.direnv
\ No newline at end of file
diff --git a/README.md b/README.md
index 533d38383..f4ebca07d 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
-
+


@@ -29,15 +29,21 @@
-> ⚠️ **注意事项**
+> [!WARNING]
> - 项目处于活跃开发阶段,代码可能随时更改
> - 文档未完善,有问题可以提交 Issue 或者 Discussion
> - QQ机器人存在被限制风险,请自行了解,谨慎使用
> - 由于持续迭代,可能存在一些已知或未知的bug
> - 由于开发中,可能消耗较多token
-**交流群**: 766798517 一群人较多,建议加下面的(开发和建议相关讨论)不一定有空回复,会优先写文档和代码
-**交流群**: 571780722 另一个群(开发和建议相关讨论)不一定有空回复,会优先写文档和代码
+## 💬交流群
+- [一群](https://qm.qq.com/q/VQ3XZrWgMs) 766798517 ,建议加下面的(开发和建议相关讨论)不一定有空回复,会优先写文档和代码
+- [二群](https://qm.qq.com/q/RzmCiRtHEW) 571780722 (开发和建议相关讨论)不一定有空回复,会优先写文档和代码
+- [三群](https://qm.qq.com/q/wlH5eT8OmQ) 1035228475(开发和建议相关讨论)不一定有空回复,会优先写文档和代码
+
+**其他平台版本**
+
+- (由 [CabLate](https://github.com/cablate) 贡献) [Telegram 与其他平台(未来可能会有)的版本](https://github.com/cablate/MaiMBot/tree/telegram) - [集中讨论串](https://github.com/SengokuCola/MaiMBot/discussions/149)
##
@@ -46,11 +52,16 @@
### 部署方式
-如果你不知道Docker是什么,建议寻找相关教程或使用手动部署
+- 📦 **Windows 一键傻瓜式部署**:请运行项目根目录中的 `run.bat`,部署完成后请参照后续配置指南进行配置
+
+- [📦 Windows 手动部署指南 ](docs/manual_deploy_windows.md)
+
+- [📦 Linux 手动部署指南 ](docs/manual_deploy_linux.md)
+
+如果你不知道Docker是什么,建议寻找相关教程或使用手动部署 **(现在不建议使用docker,更新慢,可能不适配)**
- [🐳 Docker部署指南](docs/docker_deploy.md)
-- [📦 手动部署指南](docs/manual_deploy.md)
### 配置说明
- [🎀 新手配置指南](docs/installation_cute.md) - 通俗易懂的配置教程,适合初次使用的猫娘
@@ -129,9 +140,10 @@
## 📌 注意事项
-SengokuCola纯编程外行,面向cursor编程,很多代码史一样多多包涵
-> ⚠️ **警告**:本应用生成内容来自人工智能模型,由 AI 生成,请仔细甄别,请勿用于违反法律的用途,AI生成内容不代表本人观点和立场。
+SengokuCola纯编程外行,面向cursor编程,很多代码史一样多多包涵
+> [!WARNING]
+> 本应用生成内容来自人工智能模型,由 AI 生成,请仔细甄别,请勿用于违反法律的用途,AI生成内容不代表本人观点和立场。
## 致谢
[nonebot2](https://github.com/nonebot/nonebot2): 跨平台 Python 异步聊天机器人框架
@@ -142,7 +154,7 @@ SengokuCola纯编程外行,面向cursor编程,很多代码史一样多多包
感谢各位大佬!
-
+
diff --git a/bot.py b/bot.py
index 51979a5ea..471a98eaf 100644
--- a/bot.py
+++ b/bot.py
@@ -1,88 +1,233 @@
+import asyncio
import os
+import shutil
+import sys
import nonebot
+import time
+
+import uvicorn
from dotenv import load_dotenv
from loguru import logger
from nonebot.adapters.onebot.v11 import Adapter
+import platform
-'''彩蛋'''
-from colorama import Fore, init
+# 获取没有加载env时的环境变量
+env_mask = {key: os.getenv(key) for key in os.environ}
-init()
-text = "多年以后,面对AI行刑队,张三将会回想起他2023年在会议上讨论人工智能的那个下午"
-rainbow_colors = [Fore.RED, Fore.YELLOW, Fore.GREEN, Fore.CYAN, Fore.BLUE, Fore.MAGENTA]
-rainbow_text = ""
-for i, char in enumerate(text):
- rainbow_text += rainbow_colors[i % len(rainbow_colors)] + char
-print(rainbow_text)
-'''彩蛋'''
+uvicorn_server = None
-# 初次启动检测
-if not os.path.exists("config/bot_config.toml"):
- logger.warning("检测到bot_config.toml不存在,正在从模板复制")
- import shutil
- # 检查config目录是否存在
- if not os.path.exists("config"):
- os.makedirs("config")
- logger.info("创建config目录")
- shutil.copy("template/bot_config_template.toml", "config/bot_config.toml")
- logger.info("复制完成,请修改config/bot_config.toml和.env.prod中的配置后重新启动")
+def easter_egg():
+ # 彩蛋
+ from colorama import init, Fore
-# 初始化.env 默认ENVIRONMENT=prod
-if not os.path.exists(".env"):
- with open(".env", "w") as f:
- f.write("ENVIRONMENT=prod")
+ init()
+ text = "多年以后,面对AI行刑队,张三将会回想起他2023年在会议上讨论人工智能的那个下午"
+ rainbow_colors = [Fore.RED, Fore.YELLOW, Fore.GREEN, Fore.CYAN, Fore.BLUE, Fore.MAGENTA]
+ rainbow_text = ""
+ for i, char in enumerate(text):
+ rainbow_text += rainbow_colors[i % len(rainbow_colors)] + char
+ print(rainbow_text)
- # 检测.env.prod文件是否存在
- if not os.path.exists(".env.prod"):
- logger.error("检测到.env.prod文件不存在")
- shutil.copy("template.env", "./.env.prod")
-# 首先加载基础环境变量.env
-if os.path.exists(".env"):
- load_dotenv(".env")
- logger.success("成功加载基础环境变量配置")
+def init_config():
+ # 初次启动检测
+ if not os.path.exists("config/bot_config.toml"):
+ logger.warning("检测到bot_config.toml不存在,正在从模板复制")
-# 根据 ENVIRONMENT 加载对应的环境配置
-if os.getenv("ENVIRONMENT") == "prod":
- logger.success("加载生产环境变量配置")
- load_dotenv(".env.prod", override=True) # override=True 允许覆盖已存在的环境变量
-elif os.getenv("ENVIRONMENT") == "dev":
- logger.success("加载开发环境变量配置")
- load_dotenv(".env.dev", override=True) # override=True 允许覆盖已存在的环境变量
-elif os.path.exists(f".env.{os.getenv('ENVIRONMENT')}"):
- logger.success(f"加载{os.getenv('ENVIRONMENT')}环境变量配置")
- load_dotenv(f".env.{os.getenv('ENVIRONMENT')}", override=True) # override=True 允许覆盖已存在的环境变量
-else:
- logger.error(f"ENVIRONMENT配置错误,请检查.env文件中的ENVIRONMENT变量对应的.env.{os.getenv('ENVIRONMENT')}是否存在")
- exit(1)
+ # 检查config目录是否存在
+ if not os.path.exists("config"):
+ os.makedirs("config")
+ logger.info("创建config目录")
-# 检测Key是否存在
-if not os.getenv("SILICONFLOW_KEY"):
- logger.error("缺失必要的API KEY")
- logger.error(f"请至少在.env.{os.getenv('ENVIRONMENT')}文件中填写SILICONFLOW_KEY后重新启动")
- exit(1)
+ shutil.copy("template/bot_config_template.toml", "config/bot_config.toml")
+ logger.info("复制完成,请修改config/bot_config.toml和.env.prod中的配置后重新启动")
-# 获取所有环境变量
-env_config = {key: os.getenv(key) for key in os.environ}
-# 设置基础配置
-base_config = {
- "websocket_port": int(env_config.get("PORT", 8080)),
- "host": env_config.get("HOST", "127.0.0.1"),
- "log_level": "INFO",
-}
+def init_env():
+ # 初始化.env 默认ENVIRONMENT=prod
+ if not os.path.exists(".env"):
+ with open(".env", "w") as f:
+ f.write("ENVIRONMENT=prod")
-# 合并配置
-nonebot.init(**base_config, **env_config)
+ # 检测.env.prod文件是否存在
+ if not os.path.exists(".env.prod"):
+ logger.error("检测到.env.prod文件不存在")
+ shutil.copy("template.env", "./.env.prod")
-# 注册适配器
-driver = nonebot.get_driver()
-driver.register_adapter(Adapter)
+ # 检测.env.dev文件是否存在,不存在的话直接复制生产环境配置
+ if not os.path.exists(".env.dev"):
+ logger.error("检测到.env.dev文件不存在")
+ shutil.copy(".env.prod", "./.env.dev")
+
+ # 首先加载基础环境变量.env
+ if os.path.exists(".env"):
+ load_dotenv(".env")
+ logger.success("成功加载基础环境变量配置")
+
+
+def load_env():
+ # 使用闭包实现对加载器的横向扩展,避免大量重复判断
+ def prod():
+ logger.success("加载生产环境变量配置")
+ load_dotenv(".env.prod", override=True) # override=True 允许覆盖已存在的环境变量
+
+ def dev():
+ logger.success("加载开发环境变量配置")
+ load_dotenv(".env.dev", override=True) # override=True 允许覆盖已存在的环境变量
+
+ fn_map = {
+ "prod": prod,
+ "dev": dev
+ }
+
+ env = os.getenv("ENVIRONMENT")
+ logger.info(f"[load_env] 当前的 ENVIRONMENT 变量值:{env}")
+
+ if env in fn_map:
+ fn_map[env]() # 根据映射执行闭包函数
+
+ elif os.path.exists(f".env.{env}"):
+ logger.success(f"加载{env}环境变量配置")
+ load_dotenv(f".env.{env}", override=True) # override=True 允许覆盖已存在的环境变量
+
+ else:
+ logger.error(f"ENVIRONMENT 配置错误,请检查 .env 文件中的 ENVIRONMENT 变量及对应 .env.{env} 是否存在")
+ RuntimeError(f"ENVIRONMENT 配置错误,请检查 .env 文件中的 ENVIRONMENT 变量及对应 .env.{env} 是否存在")
+
+
+def load_logger():
+ logger.remove() # 移除默认配置
+ logger.add(
+ sys.stderr,
+ format="
{time:YYYY-MM-DD HH:mm:ss.SSS} |> {level: <7} |> {name:.<8}:{function:.<8}:{line: >4} -> {message}",
+ colorize=True,
+ level=os.getenv("LOG_LEVEL", "INFO"), # 根据环境设置日志级别,默认为INFO
+ filter=lambda record: "nonebot" not in record["name"]
+ )
+
+
+
+def scan_provider(env_config: dict):
+ provider = {}
+
+ # 利用未初始化 env 时获取的 env_mask 来对新的环境变量集去重
+ # 避免 GPG_KEY 这样的变量干扰检查
+ env_config = dict(filter(lambda item: item[0] not in env_mask, env_config.items()))
+
+ # 遍历 env_config 的所有键
+ for key in env_config:
+ # 检查键是否符合 {provider}_BASE_URL 或 {provider}_KEY 的格式
+ if key.endswith("_BASE_URL") or key.endswith("_KEY"):
+ # 提取 provider 名称
+ provider_name = key.split("_", 1)[0] # 从左分割一次,取第一部分
+
+ # 初始化 provider 的字典(如果尚未初始化)
+ if provider_name not in provider:
+ provider[provider_name] = {"url": None, "key": None}
+
+ # 根据键的类型填充 url 或 key
+ if key.endswith("_BASE_URL"):
+ provider[provider_name]["url"] = env_config[key]
+ elif key.endswith("_KEY"):
+ provider[provider_name]["key"] = env_config[key]
+
+ # 检查每个 provider 是否同时存在 url 和 key
+ for provider_name, config in provider.items():
+ if config["url"] is None or config["key"] is None:
+ logger.error(
+ f"provider 内容:{config}\n"
+ f"env_config 内容:{env_config}"
+ )
+ raise ValueError(f"请检查 '{provider_name}' 提供商配置是否丢失 BASE_URL 或 KEY 环境变量")
+
+
+async def graceful_shutdown():
+ try:
+ global uvicorn_server
+ if uvicorn_server:
+ uvicorn_server.force_exit = True # 强制退出
+ await uvicorn_server.shutdown()
+
+ tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
+ for task in tasks:
+ task.cancel()
+ await asyncio.gather(*tasks, return_exceptions=True)
+
+ except Exception as e:
+ logger.error(f"麦麦关闭失败: {e}")
+
+
+async def uvicorn_main():
+ global uvicorn_server
+ config = uvicorn.Config(
+ app="__main__:app",
+ host=os.getenv("HOST", "127.0.0.1"),
+ port=int(os.getenv("PORT", 8080)),
+ reload=os.getenv("ENVIRONMENT") == "dev",
+ timeout_graceful_shutdown=5,
+ log_config=None,
+ access_log=False
+ )
+ server = uvicorn.Server(config)
+ uvicorn_server = server
+ await server.serve()
+
+
+def raw_main():
+ # 利用 TZ 环境变量设定程序工作的时区
+ # 仅保证行为一致,不依赖 localtime(),实际对生产环境几乎没有作用
+ if platform.system().lower() != 'windows':
+ time.tzset()
+
+ easter_egg()
+ load_logger()
+ init_config()
+ init_env()
+ load_env()
+ load_logger()
+
+ env_config = {key: os.getenv(key) for key in os.environ}
+ scan_provider(env_config)
+
+ # 设置基础配置
+ base_config = {
+ "websocket_port": int(env_config.get("PORT", 8080)),
+ "host": env_config.get("HOST", "127.0.0.1"),
+ "log_level": "INFO",
+ }
+
+ # 合并配置
+ nonebot.init(**base_config, **env_config)
+
+ # 注册适配器
+ global driver
+ driver = nonebot.get_driver()
+ driver.register_adapter(Adapter)
+
+ # 加载插件
+ nonebot.load_plugins("src/plugins")
-# 加载插件
-nonebot.load_plugins("src/plugins")
if __name__ == "__main__":
- nonebot.run()
+
+ try:
+ raw_main()
+
+ global app
+ app = nonebot.get_asgi()
+
+ loop = asyncio.new_event_loop()
+ asyncio.set_event_loop(loop)
+ loop.run_until_complete(uvicorn_main())
+ except KeyboardInterrupt:
+ logger.warning("麦麦会努力做的更好的!正在停止中......")
+ except Exception as e:
+ logger.error(f"主程序异常: {e}")
+ finally:
+ loop.run_until_complete(graceful_shutdown())
+ loop.close()
+ logger.info("进程终止完毕,麦麦开始休眠......下次再见哦!")
diff --git a/changelog.md b/changelog.md
new file mode 100644
index 000000000..c68a16ad9
--- /dev/null
+++ b/changelog.md
@@ -0,0 +1,6 @@
+# Changelog
+
+## [0.5.12] - 2025-3-9
+### Added
+- 新增了 我是测试
+
diff --git a/changelog_config.md b/changelog_config.md
new file mode 100644
index 000000000..c4c560644
--- /dev/null
+++ b/changelog_config.md
@@ -0,0 +1,12 @@
+# Changelog
+
+## [0.0.5] - 2025-3-11
+### Added
+- 新增了 `alias_names` 配置项,用于指定麦麦的别名。
+
+## [0.0.4] - 2025-3-9
+### Added
+- 新增了 `memory_ban_words` 配置项,用于指定不希望记忆的词汇。
+
+
+
diff --git a/docker-compose.yml b/docker-compose.yml
index dd2650b23..227df606b 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,47 +2,47 @@ services:
napcat:
container_name: napcat
environment:
- - tz=Asia/Shanghai
+ - TZ=Asia/Shanghai
- NAPCAT_UID=${NAPCAT_UID}
- - NAPCAT_GID=${NAPCAT_GID}
+ - NAPCAT_GID=${NAPCAT_GID} # 让 NapCat 获取当前用户 GID,UID,防止权限问题
ports:
- - 3000:3000
- - 3001:3001
- 6099:6099
- restart: always
+ restart: unless-stopped
volumes:
- - napcatQQ:/app/.config/QQ
- - napcatCONFIG:/app/napcat/config
- - maimbotDATA:/MaiMBot/data # 麦麦的图片等要给napcat不然发送图片会有问题
+ - napcatQQ:/app/.config/QQ # 持久化 QQ 本体
+ - napcatCONFIG:/app/napcat/config # 持久化 NapCat 配置文件
+ - maimbotDATA:/MaiMBot/data # NapCat 和 NoneBot 共享此卷,否则发送图片会有问题
image: mlikiowa/napcat-docker:latest
mongodb:
container_name: mongodb
environment:
- - tz=Asia/Shanghai
+ - TZ=Asia/Shanghai
+ # - MONGO_INITDB_ROOT_USERNAME=your_username
+ # - MONGO_INITDB_ROOT_PASSWORD=your_password
expose:
- "27017"
- restart: always
+ restart: unless-stopped
volumes:
- - mongodb:/data/db
- - mongodbCONFIG:/data/configdb
+ - mongodb:/data/db # 持久化 MongoDB 数据库
+ - mongodbCONFIG:/data/configdb # 持久化 MongoDB 配置文件
image: mongo:latest
maimbot:
container_name: maimbot
environment:
- - tz=Asia/Shanghai
+ - TZ=Asia/Shanghai
expose:
- "8080"
- restart: always
+ restart: unless-stopped
depends_on:
- mongodb
- napcat
volumes:
- - napcatCONFIG:/MaiMBot/napcat # 自动根据配置中的qq号创建ws反向客户端配置
- - ./bot_config.toml:/MaiMBot/config/bot_config.toml
- - maimbotDATA:/MaiMBot/data
- - ./.env.prod:/MaiMBot/.env.prod
+ - napcatCONFIG:/MaiMBot/napcat # 自动根据配置中的 QQ 号创建 ws 反向客户端配置
+ - ./bot_config.toml:/MaiMBot/config/bot_config.toml # Toml 配置文件映射
+ - maimbotDATA:/MaiMBot/data # NapCat 和 NoneBot 共享此卷,否则发送图片会有问题
+ - ./.env.prod:/MaiMBot/.env.prod # Toml 配置文件映射
image: sengokucola/maimbot:latest
volumes:
diff --git a/docs/Jonathan R.md b/docs/Jonathan R.md
new file mode 100644
index 000000000..660caaeec
--- /dev/null
+++ b/docs/Jonathan R.md
@@ -0,0 +1,20 @@
+Jonathan R. Wolpaw 在 “Memory in neuroscience: rhetoric versus reality.” 一文中提到,从神经科学的感觉运动假设出发,整个神经系统的功能是将经验与适当的行为联系起来,而不是单纯的信息存储。
+Jonathan R,Wolpaw. (2019). Memory in neuroscience: rhetoric versus reality.. Behavioral and cognitive neuroscience reviews(2).
+
+1. **单一过程理论**
+ - 单一过程理论认为,识别记忆主要是基于熟悉性这一单一因素的影响。熟悉性是指对刺激的一种自动的、无意识的感知,它可以使我们在没有回忆起具体细节的情况下,判断一个刺激是否曾经出现过。
+ - 例如,在一些实验中,研究者发现被试可以在没有回忆起具体学习情境的情况下,对曾经出现过的刺激做出正确的判断,这被认为是熟悉性在起作用1。
+2. **双重过程理论**
+ - 双重过程理论则认为,识别记忆是基于两个过程:回忆和熟悉性。回忆是指对过去经验的有意识的回忆,它可以使我们回忆起具体的细节和情境;熟悉性则是一种自动的、无意识的感知。
+ - 该理论认为,在识别记忆中,回忆和熟悉性共同作用,使我们能够判断一个刺激是否曾经出现过。例如,在 “记得 / 知道” 范式中,被试被要求判断他们对一个刺激的记忆是基于回忆还是熟悉性。研究发现,被试可以区分这两种不同的记忆过程,这为双重过程理论提供了支持1。
+
+
+
+1. **神经元节点与连接**:借鉴神经网络原理,将每个记忆单元视为一个神经元节点。节点之间通过连接相互关联,连接的强度代表记忆之间的关联程度。在形态学联想记忆中,具有相似形态特征的记忆节点连接强度较高。例如,苹果和橘子的记忆节点,由于在形状、都是水果等形态语义特征上相似,它们之间的连接强度大于苹果与汽车记忆节点间的连接强度。
+2. **记忆聚类与层次结构**:依据形态特征的相似性对记忆进行聚类,形成不同的记忆簇。每个记忆簇内部的记忆具有较高的相似性,而不同记忆簇之间的记忆相似性较低。同时,构建记忆的层次结构,高层次的记忆节点代表更抽象、概括的概念,低层次的记忆节点对应具体的实例。比如,“水果” 作为高层次记忆节点,连接着 “苹果”“橘子”“香蕉” 等低层次具体水果的记忆节点。
+3. **网络的动态更新**:随着新记忆的不断加入,记忆网络动态调整。新记忆节点根据其形态特征与现有网络中的节点建立连接,同时影响相关连接的强度。若新记忆与某个记忆簇的特征高度相似,则被纳入该记忆簇;若具有独特特征,则可能引发新的记忆簇的形成。例如,当系统学习到一种新的水果 “番石榴”,它会根据番石榴的形态、语义等特征,在记忆网络中找到与之最相似的区域(如水果记忆簇),并建立相应连接,同时调整周围节点连接强度以适应这一新记忆。
+
+
+
+- **相似性联想**:该理论认为,当两个或多个事物在形态上具有相似性时,它们在记忆中会形成关联。例如,梨和苹果在形状和都是水果这一属性上有相似性,所以当我们看到梨时,很容易通过形态学联想记忆联想到苹果。这种相似性联想有助于我们对新事物进行分类和理解,当遇到一个新的类似水果时,我们可以通过与已有的水果记忆进行相似性匹配,来推测它的一些特征。
+- **时空关联性联想**:除了相似性联想,MAM 还强调时空关联性联想。如果两个事物在时间或空间上经常同时出现,它们也会在记忆中形成关联。比如,每次在公园里看到花的时候,都能听到鸟儿的叫声,那么花和鸟儿叫声的形态特征(花的视觉形态和鸟叫的听觉形态)就会在记忆中形成关联,以后听到鸟叫可能就会联想到公园里的花。
\ No newline at end of file
diff --git a/docs/docker_deploy.md b/docs/docker_deploy.md
index c9b069309..db759dfd0 100644
--- a/docs/docker_deploy.md
+++ b/docs/docker_deploy.md
@@ -1,24 +1,97 @@
# 🐳 Docker 部署指南
-## 部署步骤(推荐,但不一定是最新)
+## 部署步骤 (推荐,但不一定是最新)
+
+**"更新镜像与容器"部分在本文档 [Part 6](#6-更新镜像与容器)**
+
+### 0. 前提说明
+
+**本文假设读者已具备一定的 Docker 基础知识。若您对 Docker 不熟悉,建议先参考相关教程或文档进行学习,或选择使用 [📦Linux手动部署指南](./manual_deploy_linux.md) 或 [📦Windows手动部署指南](./manual_deploy_windows.md) 。**
+
+
+### 1. 获取Docker配置文件
+
+- 建议先单独创建好一个文件夹并进入,作为工作目录
-1. 获取配置文件:
```bash
-wget https://raw.githubusercontent.com/SengokuCola/MaiMBot/main/docker-compose.yml
+wget https://raw.githubusercontent.com/SengokuCola/MaiMBot/main/docker-compose.yml -O docker-compose.yml
```
-2. 启动服务:
+- 若需要启用MongoDB数据库的用户名和密码,可进入docker-compose.yml,取消MongoDB处的注释并修改变量旁 `=` 后方的值为你的用户名和密码\
+修改后请注意在之后配置 `.env.prod` 文件时指定MongoDB数据库的用户名密码
+
+
+### 2. 启动服务
+
+- **!!! 请在第一次启动前确保当前工作目录下 `.env.prod` 与 `bot_config.toml` 文件存在 !!!**\
+由于Docker文件映射行为的特殊性,若宿主机的映射路径不存在,可能导致意外的目录创建,而不会创建文件,由于此处需要文件映射到文件,需提前确保文件存在且路径正确,可使用如下命令:
+
+```bash
+touch .env.prod
+touch bot_config.toml
+```
+
+- 启动Docker容器:
+
```bash
NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker compose up -d
+# 旧版Docker中可能找不到docker compose,请使用docker-compose工具替代
+NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker-compose up -d
```
-3. 修改配置后重启:
+
+### 3. 修改配置并重启Docker
+
+- 请前往 [🎀新手配置指南](./installation_cute.md) 或 [⚙️标准配置指南](./installation_standard.md) 完成 `.env.prod` 与 `bot_config.toml` 配置文件的编写\
+**需要注意 `.env.prod` 中HOST处IP的填写,Docker中部署和系统中直接安装的配置会有所不同**
+
+- 重启Docker容器:
+
+```bash
+docker restart maimbot # 若修改过容器名称则替换maimbot为你自定的名称
+```
+
+- 下方命令可以但不推荐,只是同时重启NapCat、MongoDB、MaiMBot三个服务
+
```bash
NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker compose restart
+# 旧版Docker中可能找不到docker compose,请使用docker-compose工具替代
+NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker-compose restart
```
+
+### 4. 登入NapCat管理页添加反向WebSocket
+
+- 在浏览器地址栏输入 `http://<宿主机IP>:6099/` 进入NapCat的管理Web页,添加一个Websocket客户端
+
+> 网络配置 -> 新建 -> Websocket客户端
+
+- Websocket客户端的名称自定,URL栏填入 `ws://maimbot:8080/onebot/v11/ws`,启用并保存即可\
+(若修改过容器名称则替换maimbot为你自定的名称)
+
+
+### 5. 部署完成,愉快地和麦麦对话吧!
+
+
+### 6. 更新镜像与容器
+
+- 拉取最新镜像
+
+```bash
+docker-compose pull
+```
+
+- 执行启动容器指令,该指令会自动重建镜像有更新的容器并启动
+
+```bash
+NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker compose up -d
+# 旧版Docker中可能找不到docker compose,请使用docker-compose工具替代
+NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker-compose up -d
+```
+
+
## ⚠️ 注意事项
- 目前部署方案仍在测试中,可能存在未知问题
- 配置文件中的API密钥请妥善保管,不要泄露
-- 建议先在测试环境中运行,确认无误后再部署到生产环境
\ No newline at end of file
+- 建议先在测试环境中运行,确认无误后再部署到生产环境
\ No newline at end of file
diff --git a/docs/installation_cute.md b/docs/installation_cute.md
index 278cbfe20..4465660f9 100644
--- a/docs/installation_cute.md
+++ b/docs/installation_cute.md
@@ -52,12 +52,12 @@ key = "SILICONFLOW_KEY" # 用同一张门票就可以啦
如果你想用DeepSeek官方的服务,就要这样改:
```toml
[model.llm_reasoning]
-name = "Pro/deepseek-ai/DeepSeek-R1"
+name = "deepseek-reasoner" # 改成对应的模型名称,这里为DeepseekR1
base_url = "DEEP_SEEK_BASE_URL" # 改成去DeepSeek游乐园
key = "DEEP_SEEK_KEY" # 用DeepSeek的门票
[model.llm_normal]
-name = "Pro/deepseek-ai/DeepSeek-V3"
+name = "deepseek-chat" # 改成对应的模型名称,这里为DeepseekV3
base_url = "DEEP_SEEK_BASE_URL" # 也去DeepSeek游乐园
key = "DEEP_SEEK_KEY" # 用同一张DeepSeek门票
```
@@ -88,11 +88,11 @@ CHAT_ANY_WHERE_KEY=your_key
CHAT_ANY_WHERE_BASE_URL=https://api.chatanywhere.tech/v1
# 如果你不知道这是什么,那么下面这些不用改,保持原样就好啦
-HOST=127.0.0.1
+HOST=127.0.0.1 # 如果使用Docker部署,需要改成0.0.0.0喵,不然听不见群友讲话了喵
PORT=8080
# 这些是数据库设置,一般也不用改呢
-MONGODB_HOST=127.0.0.1
+MONGODB_HOST=127.0.0.1 # 如果使用Docker部署,需要改成数据库容器的名字喵,默认是mongodb喵
MONGODB_PORT=27017
DATABASE_NAME=MegBot
MONGODB_USERNAME = "" # 如果数据库需要用户名,就在这里填写喵
@@ -110,7 +110,8 @@ PLUGINS=["src2.plugins.chat"] # 这里是机器人的插件列表呢
```toml
[bot]
qq = "把这里改成你的机器人QQ号喵" # 填写你的机器人QQ号
-nickname = "麦麦" # 机器人的名字,你可以改成你喜欢的任何名字哦
+nickname = "麦麦" # 机器人的名字,你可以改成你喜欢的任何名字哦,建议和机器人QQ名称/群昵称一样哦
+alias_names = ["小麦", "阿麦"] # 也可以用这个招呼机器人,可以不设置呢
[personality]
# 这里可以设置机器人的性格呢,让它更有趣一些喵
diff --git a/docs/installation_standard.md b/docs/installation_standard.md
index 6e4920220..03b66dc46 100644
--- a/docs/installation_standard.md
+++ b/docs/installation_standard.md
@@ -8,7 +8,7 @@
## API配置说明
-`.env.prod`和`bot_config.toml`中的API配置关系如下:
+`.env.prod` 和 `bot_config.toml` 中的API配置关系如下:
### 在.env.prod中定义API凭证:
```ini
@@ -34,7 +34,7 @@ key = "SILICONFLOW_KEY" # 引用.env.prod中定义的密钥
如需切换到其他API服务,只需修改引用:
```toml
[model.llm_reasoning]
-name = "Pro/deepseek-ai/DeepSeek-R1"
+name = "deepseek-reasoner" # 改成对应的模型名称,这里为DeepseekR1
base_url = "DEEP_SEEK_BASE_URL" # 切换为DeepSeek服务
key = "DEEP_SEEK_KEY" # 使用DeepSeek密钥
```
@@ -52,12 +52,12 @@ CHAT_ANY_WHERE_KEY=your_key
CHAT_ANY_WHERE_BASE_URL=https://api.chatanywhere.tech/v1
# 服务配置
-HOST=127.0.0.1
-PORT=8080
+HOST=127.0.0.1 # 如果使用Docker部署,需要改成0.0.0.0,否则QQ消息无法传入
+PORT=8080 # 与反向端口相同
# 数据库配置
-MONGODB_HOST=127.0.0.1
-MONGODB_PORT=27017
+MONGODB_HOST=127.0.0.1 # 如果使用Docker部署,需要改成数据库容器的名字,默认是mongodb
+MONGODB_PORT=27017 # MongoDB端口
DATABASE_NAME=MegBot
MONGODB_USERNAME = "" # 数据库用户名
MONGODB_PASSWORD = "" # 数据库密码
@@ -72,6 +72,9 @@ PLUGINS=["src2.plugins.chat"]
[bot]
qq = "机器人QQ号" # 必填
nickname = "麦麦" # 机器人昵称
+# alias_names: 配置机器人可使用的别名。当机器人在群聊或对话中被调用时,别名可以作为直接命令或提及机器人的关键字使用。
+# 该配置项为字符串数组。例如: ["小麦", "阿麦"]
+alias_names = ["小麦", "阿麦"] # 机器人别名
[personality]
prompt_personality = [
diff --git a/docs/manual_deploy_linux.md b/docs/manual_deploy_linux.md
new file mode 100644
index 000000000..41f0390b8
--- /dev/null
+++ b/docs/manual_deploy_linux.md
@@ -0,0 +1,115 @@
+# 📦 Linux系统如何手动部署MaiMbot麦麦?
+
+## 准备工作
+- 一台联网的Linux设备(本教程以Ubuntu/Debian系为例)
+- QQ小号(QQ框架的使用可能导致qq被风控,严重(小概率)可能会导致账号封禁,强烈不推荐使用大号)
+- 可用的大模型API
+- 一个AI助手,网上随便搜一家打开来用都行,可以帮你解决一些不懂的问题
+- 以下内容假设你对Linux系统有一定的了解,如果觉得难以理解,请直接用Windows系统部署[Windows系统部署指南](./manual_deploy_windows.md)
+
+## 你需要知道什么?
+
+- 如何正确向AI助手提问,来学习新知识
+
+- Python是什么
+
+- Python的虚拟环境是什么?如何创建虚拟环境
+
+- 命令行是什么
+
+- 数据库是什么?如何安装并启动MongoDB
+
+- 如何运行一个QQ机器人,以及NapCat框架是什么
+---
+
+## 环境配置
+
+### 1️⃣ **确认Python版本**
+
+需确保Python版本为3.9及以上
+
+```bash
+python --version
+# 或
+python3 --version
+```
+如果版本低于3.9,请更新Python版本。
+```bash
+# Ubuntu/Debian
+sudo apt update
+sudo apt install python3.9
+# 如执行了这一步,建议在执行时将python3指向python3.9
+# 更新替代方案,设置 python3.9 为默认的 python3 版本:
+sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1
+sudo update-alternatives --config python3
+```
+
+### 2️⃣ **创建虚拟环境**
+```bash
+# 方法1:使用venv(推荐)
+python3 -m venv maimbot
+source maimbot/bin/activate # 激活环境
+
+# 方法2:使用conda(需先安装Miniconda)
+wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
+bash Miniconda3-latest-Linux-x86_64.sh
+conda create -n maimbot python=3.9
+conda activate maimbot
+
+# 通过以上方法创建并进入虚拟环境后,再执行以下命令
+
+# 安装依赖(任选一种环境)
+pip install -r requirements.txt
+```
+
+---
+
+## 数据库配置
+### 3️⃣ **安装并启动MongoDB**
+- 安装与启动: Debian参考[官方文档](https://docs.mongodb.com/manual/tutorial/install-mongodb-on-debian/),Ubuntu参考[官方文档](https://docs.mongodb.com/manual/tutorial/install-mongodb-on-ubuntu/)
+
+- 默认连接本地27017端口
+---
+
+## NapCat配置
+### 4️⃣ **安装NapCat框架**
+
+- 参考[NapCat官方文档](https://www.napcat.wiki/guide/boot/Shell#napcat-installer-linux%E4%B8%80%E9%94%AE%E4%BD%BF%E7%94%A8%E8%84%9A%E6%9C%AC-%E6%94%AF%E6%8C%81ubuntu-20-debian-10-centos9)安装
+
+- 使用QQ小号登录,添加反向WS地址: `ws://127.0.0.1:8080/onebot/v11/ws`
+
+---
+
+## 配置文件设置
+### 5️⃣ **配置文件设置,让麦麦Bot正常工作**
+- 修改环境配置文件: `.env.prod`
+- 修改机器人配置文件: `bot_config.toml`
+
+
+---
+
+## 启动机器人
+### 6️⃣ **启动麦麦机器人**
+```bash
+# 在项目目录下操作
+nb run
+# 或
+python3 bot.py
+```
+
+---
+
+## **其他组件(可选)**
+- 直接运行 knowledge.py生成知识库
+
+
+---
+
+## 常见问题
+🔧 权限问题: 在命令前加 `sudo`
+🔌 端口占用: 使用 `sudo lsof -i :8080` 查看端口占用
+🛡️ 防火墙: 确保8080/27017端口开放
+```bash
+sudo ufw allow 8080/tcp
+sudo ufw allow 27017/tcp
+```
\ No newline at end of file
diff --git a/docs/manual_deploy.md b/docs/manual_deploy_windows.md
similarity index 90%
rename from docs/manual_deploy.md
rename to docs/manual_deploy_windows.md
index 6d53beb4e..eebdc4f41 100644
--- a/docs/manual_deploy.md
+++ b/docs/manual_deploy_windows.md
@@ -1,4 +1,4 @@
-# 📦 如何手动部署MaiMbot麦麦?
+# 📦 Windows系统如何手动部署MaiMbot麦麦?
## 你需要什么?
@@ -30,7 +30,7 @@
在创建虚拟环境之前,请确保你的电脑上安装了Python 3.9及以上版本。如果没有,可以按以下步骤安装:
-1. 访问Python官网下载页面:https://www.python.org/downloads/release/python-3913/
+1. 访问Python官网下载页面: https://www.python.org/downloads/release/python-3913/
2. 下载Windows安装程序 (64-bit): `python-3.9.13-amd64.exe`
3. 运行安装程序,并确保勾选"Add Python 3.9 to PATH"选项
4. 点击"Install Now"开始安装
@@ -79,11 +79,11 @@ pip install -r requirements.txt
### 3️⃣ **配置NapCat,让麦麦bot与qq取得联系**
- 安装并登录NapCat(用你的qq小号)
-- 添加反向WS:`ws://localhost:8080/onebot/v11/ws`
+- 添加反向WS: `ws://127.0.0.1:8080/onebot/v11/ws`
### 4️⃣ **配置文件设置,让麦麦Bot正常工作**
-- 修改环境配置文件:`.env.prod`
-- 修改机器人配置文件:`bot_config.toml`
+- 修改环境配置文件: `.env.prod`
+- 修改机器人配置文件: `bot_config.toml`
### 5️⃣ **启动麦麦机器人**
- 打开命令行,cd到对应路径
diff --git a/flake.nix b/flake.nix
index 54737d640..3586857f0 100644
--- a/flake.nix
+++ b/flake.nix
@@ -22,6 +22,7 @@
pythonEnv = pkgs.python3.withPackages (
ps: with ps; [
+ ruff
pymongo
python-dotenv
pydantic
diff --git a/pyproject.toml b/pyproject.toml
index e54dcdacd..0a4805744 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,23 +1,51 @@
[project]
-name = "Megbot"
+name = "MaiMaiBot"
version = "0.1.0"
-description = "New Bot Project"
+description = "MaiMaiBot"
[tool.nonebot]
plugins = ["src.plugins.chat"]
-plugin_dirs = ["src/plugins"]
+plugin_dirs = ["src/plugins"]
[tool.ruff]
-# 设置 Python 版本
-target-version = "py39"
+
+include = ["*.py"]
+
+# 行长度设置
+line-length = 120
+
+[tool.ruff.lint]
+fixable = ["ALL"]
+unfixable = []
+
+# 如果一个变量的名称以下划线开头,即使它未被使用,也不应该被视为错误或警告。
+dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
# 启用的规则
select = [
- "E", # pycodestyle 错误
- "F", # pyflakes
- "I", # isort
- "B", # flake8-bugbear
+ "E", # pycodestyle 错误
+ "F", # pyflakes
+ "B", # flake8-bugbear
]
-# 行长度设置
-line-length = 88
\ No newline at end of file
+ignore = ["E711"]
+
+[tool.ruff.format]
+docstring-code-format = true
+indent-style = "space"
+
+
+# 使用双引号表示字符串
+quote-style = "double"
+
+# 尊重魔法尾随逗号
+# 例如:
+# items = [
+# "apple",
+# "banana",
+# "cherry",
+# ]
+skip-magic-trailing-comma = false
+
+# 自动检测合适的换行符
+line-ending = "auto"
diff --git a/requirements.txt b/requirements.txt
index 4f969682f..0acaade5e 100644
Binary files a/requirements.txt and b/requirements.txt differ
diff --git a/run.bat b/run.bat
index 1d1385671..91904bc34 100644
--- a/run.bat
+++ b/run.bat
@@ -1,6 +1,10 @@
@ECHO OFF
chcp 65001
-REM python -m venv venv
-call venv\Scripts\activate.bat
-REM pip install -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple --upgrade -r requirements.txt
+if not exist "venv" (
+ python -m venv venv
+ call venv\Scripts\activate.bat
+ pip install -i https://mirrors.aliyun.com/pypi/simple --upgrade -r requirements.txt
+ ) else (
+ call venv\Scripts\activate.bat
+)
python run.py
\ No newline at end of file
diff --git a/run.py b/run.py
index 0a195544f..50e312c37 100644
--- a/run.py
+++ b/run.py
@@ -1,7 +1,7 @@
import os
import subprocess
import zipfile
-
+import sys
import requests
from tqdm import tqdm
@@ -37,7 +37,7 @@ def extract_files(zip_path, target_dir):
f.write(zip_ref.read(file))
-def run_cmd(command: str, open_new_window: bool = False):
+def run_cmd(command: str, open_new_window: bool = True):
"""
运行 cmd 命令
@@ -45,26 +45,19 @@ def run_cmd(command: str, open_new_window: bool = False):
command (str): 指定要运行的命令
open_new_window (bool): 指定是否新建一个 cmd 窗口运行
"""
- creationflags = 0
if open_new_window:
- creationflags = subprocess.CREATE_NEW_CONSOLE
- subprocess.Popen(
- [
- "cmd.exe",
- "/c",
- command,
- ],
- creationflags=creationflags,
- )
+ command = "start " + command
+ subprocess.Popen(command, shell=True)
def run_maimbot():
run_cmd(r"napcat\NapCatWinBootMain.exe 10001", False)
+ if not os.path.exists(r"mongodb\db"):
+ os.makedirs(r"mongodb\db")
run_cmd(
- r"mongodb\bin\mongod.exe --dbpath=" + os.getcwd() + r"\mongodb\db --port 27017",
- True,
+ r"mongodb\bin\mongod.exe --dbpath=" + os.getcwd() + r"\mongodb\db --port 27017"
)
- run_cmd("nb run", True)
+ run_cmd("nb run")
def install_mongodb():
@@ -87,17 +80,35 @@ def install_mongodb():
for data in resp.iter_content(chunk_size=1024):
size = file.write(data)
bar.update(size)
- extract_files("mongodb.zip", "mongodb")
- print("MongoDB 下载完成")
- os.remove("mongodb.zip")
+ extract_files("mongodb.zip", "mongodb")
+ print("MongoDB 下载完成")
+ os.remove("mongodb.zip")
+ choice = input(
+ "是否安装 MongoDB Compass?此软件可以以可视化的方式修改数据库,建议安装(Y/n)"
+ ).upper()
+ if choice == "Y" or choice == "":
+ install_mongodb_compass()
+
+
+def install_mongodb_compass():
+ run_cmd(
+ r"powershell Start-Process powershell -Verb runAs 'Set-ExecutionPolicy RemoteSigned'"
+ )
+ input("请在弹出的用户账户控制中点击“是”后按任意键继续安装")
+ run_cmd(r"powershell mongodb\bin\Install-Compass.ps1")
+ input("按任意键启动麦麦")
+ input("如不需要启动此窗口可直接关闭,无需等待 Compass 安装完成")
+ run_maimbot()
def install_napcat():
- run_cmd("start https://github.com/NapNeko/NapCatQQ/releases", True)
+ run_cmd("start https://github.com/NapNeko/NapCatQQ/releases", False)
print("请检查弹出的浏览器窗口,点击**第一个**蓝色的“Win64无头” 下载 napcat")
napcat_filename = input(
"下载完成后请把文件复制到此文件夹,并将**不包含后缀的文件名**输入至此窗口,如 NapCat.32793.Shell:"
)
+ if(napcat_filename[-4:] == ".zip"):
+ napcat_filename = napcat_filename[:-4]
extract_files(napcat_filename + ".zip", "napcat")
print("NapCat 安装完成")
os.remove(napcat_filename + ".zip")
@@ -105,11 +116,15 @@ def install_napcat():
if __name__ == "__main__":
os.system("cls")
+ if sys.version_info < (3, 9):
+ print("当前 Python 版本过低,最低版本为 3.9,请更新 Python 版本")
+ print("按任意键退出")
+ input()
+ exit(1)
choice = input(
"请输入要进行的操作:\n"
"1.首次安装\n"
"2.运行麦麦\n"
- "3.运行麦麦并启动可视化推理界面\n"
)
os.system("cls")
if choice == "1":
@@ -117,6 +132,9 @@ if __name__ == "__main__":
install_mongodb()
elif choice == "2":
run_maimbot()
- elif choice == "3":
- run_maimbot()
- run_cmd("python src/gui/reasoning_gui.py", True)
+ choice = input("是否启动推理可视化?(y/N)").upper()
+ if choice == "Y":
+ run_cmd(r"python src\gui\reasoning_gui.py")
+ choice = input("是否启动记忆可视化?(y/N)").upper()
+ if choice == "Y":
+ run_cmd(r"python src/plugins/memory_system/memory_manual_build.py")
diff --git a/run_memory_vis.bat b/run_memory_vis.bat
new file mode 100644
index 000000000..b1feb0cb2
--- /dev/null
+++ b/run_memory_vis.bat
@@ -0,0 +1,29 @@
+@echo on
+chcp 65001 > nul
+set /p CONDA_ENV="请输入要激活的 conda 环境名称: "
+call conda activate %CONDA_ENV%
+if errorlevel 1 (
+ echo 激活 conda 环境失败
+ pause
+ exit /b 1
+)
+echo Conda 环境 "%CONDA_ENV%" 激活成功
+
+set /p OPTION="请选择运行选项 (1: 运行全部绘制, 2: 运行简单绘制): "
+if "%OPTION%"=="1" (
+ python src/plugins/memory_system/memory_manual_build.py
+) else if "%OPTION%"=="2" (
+ python src/plugins/memory_system/draw_memory.py
+) else (
+ echo 无效的选项
+ pause
+ exit /b 1
+)
+
+if errorlevel 1 (
+ echo 命令执行失败,错误代码 %errorlevel%
+ pause
+ exit /b 1
+)
+echo 脚本成功完成
+pause
\ No newline at end of file
diff --git a/src/gui/reasoning_gui.py b/src/gui/reasoning_gui.py
index 340791ee3..5768ddc09 100644
--- a/src/gui/reasoning_gui.py
+++ b/src/gui/reasoning_gui.py
@@ -5,6 +5,9 @@ import threading
import time
from datetime import datetime
from typing import Dict, List
+from loguru import logger
+from typing import Optional
+from pymongo import MongoClient
import customtkinter as ctk
from dotenv import load_dotenv
@@ -17,23 +20,20 @@ root_dir = os.path.abspath(os.path.join(current_dir, '..', '..'))
# 加载环境变量
if os.path.exists(os.path.join(root_dir, '.env.dev')):
load_dotenv(os.path.join(root_dir, '.env.dev'))
- print("成功加载开发环境配置")
+ logger.info("成功加载开发环境配置")
elif os.path.exists(os.path.join(root_dir, '.env.prod')):
load_dotenv(os.path.join(root_dir, '.env.prod'))
- print("成功加载生产环境配置")
+ logger.info("成功加载生产环境配置")
else:
- print("未找到环境配置文件")
+ logger.error("未找到环境配置文件")
sys.exit(1)
-from typing import Optional
-
-from pymongo import MongoClient
-
class Database:
_instance: Optional["Database"] = None
-
- def __init__(self, host: str, port: int, db_name: str, username: str = None, password: str = None, auth_source: str = None):
+
+ def __init__(self, host: str, port: int, db_name: str, username: str = None, password: str = None,
+ auth_source: str = None):
if username and password:
self.client = MongoClient(
host=host,
@@ -45,96 +45,96 @@ class Database:
else:
self.client = MongoClient(host, port)
self.db = self.client[db_name]
-
+
@classmethod
- def initialize(cls, host: str, port: int, db_name: str, username: str = None, password: str = None, auth_source: str = None) -> "Database":
+ def initialize(cls, host: str, port: int, db_name: str, username: str = None, password: str = None,
+ auth_source: str = None) -> "Database":
if cls._instance is None:
cls._instance = cls(host, port, db_name, username, password, auth_source)
return cls._instance
-
+
@classmethod
def get_instance(cls) -> "Database":
if cls._instance is None:
raise RuntimeError("Database not initialized")
- return cls._instance
-
+ return cls._instance
class ReasoningGUI:
def __init__(self):
# 记录启动时间戳,转换为Unix时间戳
self.start_timestamp = datetime.now().timestamp()
- print(f"程序启动时间戳: {self.start_timestamp}")
-
+ logger.info(f"程序启动时间戳: {self.start_timestamp}")
+
# 设置主题
ctk.set_appearance_mode("dark")
ctk.set_default_color_theme("blue")
-
+
# 创建主窗口
self.root = ctk.CTk()
self.root.title('麦麦推理')
self.root.geometry('800x600')
self.root.protocol("WM_DELETE_WINDOW", self._on_closing)
-
+
# 初始化数据库连接
try:
self.db = Database.get_instance().db
- print("数据库连接成功")
+ logger.success("数据库连接成功")
except RuntimeError:
- print("数据库未初始化,正在尝试初始化...")
+ logger.warning("数据库未初始化,正在尝试初始化...")
try:
- Database.initialize("localhost", 27017, "maimai_bot")
+ Database.initialize("127.0.0.1", 27017, "maimai_bot")
self.db = Database.get_instance().db
- print("数据库初始化成功")
- except Exception as e:
- print(f"数据库初始化失败: {e}")
+ logger.success("数据库初始化成功")
+ except Exception:
+ logger.exception("数据库初始化失败")
sys.exit(1)
-
+
# 存储群组数据
self.group_data: Dict[str, List[dict]] = {}
-
+
# 创建更新队列
self.update_queue = queue.Queue()
-
+
# 创建主框架
self.frame = ctk.CTkFrame(self.root)
self.frame.pack(pady=20, padx=20, fill="both", expand=True)
-
+
# 添加标题
self.title = ctk.CTkLabel(self.frame, text="麦麦的脑内所想", font=("Arial", 24))
self.title.pack(pady=10, padx=10)
-
+
# 创建左右分栏
self.paned = ctk.CTkFrame(self.frame)
self.paned.pack(fill="both", expand=True, padx=10, pady=10)
-
+
# 左侧群组列表
self.left_frame = ctk.CTkFrame(self.paned, width=200)
self.left_frame.pack(side="left", fill="y", padx=5, pady=5)
-
+
self.group_label = ctk.CTkLabel(self.left_frame, text="群组列表", font=("Arial", 16))
self.group_label.pack(pady=5)
-
+
# 创建可滚动框架来容纳群组按钮
self.group_scroll_frame = ctk.CTkScrollableFrame(self.left_frame, width=180, height=400)
self.group_scroll_frame.pack(pady=5, padx=5, fill="both", expand=True)
-
+
# 存储群组按钮的字典
self.group_buttons: Dict[str, ctk.CTkButton] = {}
# 当前选中的群组ID
self.selected_group_id: Optional[str] = None
-
+
# 右侧内容显示
self.right_frame = ctk.CTkFrame(self.paned)
self.right_frame.pack(side="right", fill="both", expand=True, padx=5, pady=5)
-
+
self.content_label = ctk.CTkLabel(self.right_frame, text="推理内容", font=("Arial", 16))
self.content_label.pack(pady=5)
-
+
# 创建富文本显示框
self.content_text = ctk.CTkTextbox(self.right_frame, width=500, height=400)
self.content_text.pack(pady=5, padx=5, fill="both", expand=True)
-
+
# 配置文本标签 - 只使用颜色
self.content_text.tag_config("timestamp", foreground="#888888") # 时间戳使用灰色
self.content_text.tag_config("user", foreground="#4CAF50") # 用户名使用绿色
@@ -144,11 +144,11 @@ class ReasoningGUI:
self.content_text.tag_config("reasoning", foreground="#FF9800") # 推理过程使用橙色
self.content_text.tag_config("response", foreground="#E91E63") # 回复使用粉色
self.content_text.tag_config("separator", foreground="#666666") # 分隔符使用深灰色
-
+
# 底部控制栏
self.control_frame = ctk.CTkFrame(self.frame)
self.control_frame.pack(fill="x", padx=10, pady=5)
-
+
self.clear_button = ctk.CTkButton(
self.control_frame,
text="清除显示",
@@ -156,19 +156,19 @@ class ReasoningGUI:
width=120
)
self.clear_button.pack(side="left", padx=5)
-
+
# 启动自动更新线程
self.update_thread = threading.Thread(target=self._auto_update, daemon=True)
self.update_thread.start()
-
+
# 启动GUI更新检查
self.root.after(100, self._process_queue)
-
+
def _on_closing(self):
"""处理窗口关闭事件"""
self.root.quit()
sys.exit(0)
-
+
def _process_queue(self):
"""处理更新队列中的任务"""
try:
@@ -183,14 +183,14 @@ class ReasoningGUI:
finally:
# 继续检查队列
self.root.after(100, self._process_queue)
-
+
def _update_group_list_gui(self):
"""在主线程中更新群组列表"""
# 清除现有按钮
for button in self.group_buttons.values():
button.destroy()
self.group_buttons.clear()
-
+
# 创建新的群组按钮
for group_id in self.group_data.keys():
button = ctk.CTkButton(
@@ -203,16 +203,16 @@ class ReasoningGUI:
)
button.pack(pady=2, padx=5)
self.group_buttons[group_id] = button
-
+
# 如果有选中的群组,保持其高亮状态
if self.selected_group_id and self.selected_group_id in self.group_buttons:
self._highlight_selected_group(self.selected_group_id)
-
+
def _on_group_select(self, group_id: str):
"""处理群组选择事件"""
self._highlight_selected_group(group_id)
self._update_display_gui(group_id)
-
+
def _highlight_selected_group(self, group_id: str):
"""高亮显示选中的群组按钮"""
# 重置所有按钮的颜色
@@ -223,9 +223,9 @@ class ReasoningGUI:
else:
# 恢复其他按钮的默认颜色
button.configure(fg_color="#2B2B2B", hover_color="#404040")
-
+
self.selected_group_id = group_id
-
+
def _update_display_gui(self, group_id: str):
"""在主线程中更新显示内容"""
if group_id in self.group_data:
@@ -234,19 +234,19 @@ class ReasoningGUI:
# 时间戳
time_str = item['time'].strftime("%Y-%m-%d %H:%M:%S")
self.content_text.insert("end", f"[{time_str}]\n", "timestamp")
-
+
# 用户信息
self.content_text.insert("end", "用户: ", "timestamp")
self.content_text.insert("end", f"{item.get('user', '未知')}\n", "user")
-
+
# 消息内容
self.content_text.insert("end", "消息: ", "timestamp")
self.content_text.insert("end", f"{item.get('message', '')}\n", "message")
-
+
# 模型信息
self.content_text.insert("end", "模型: ", "timestamp")
self.content_text.insert("end", f"{item.get('model', '')}\n", "model")
-
+
# Prompt内容
self.content_text.insert("end", "Prompt内容:\n", "timestamp")
prompt_text = item.get('prompt', '')
@@ -257,7 +257,7 @@ class ReasoningGUI:
self.content_text.insert("end", " " + line + "\n", "prompt")
else:
self.content_text.insert("end", " 无Prompt内容\n", "prompt")
-
+
# 推理过程
self.content_text.insert("end", "推理过程:\n", "timestamp")
reasoning_text = item.get('reasoning', '')
@@ -268,53 +268,53 @@ class ReasoningGUI:
self.content_text.insert("end", " " + line + "\n", "reasoning")
else:
self.content_text.insert("end", " 无推理过程\n", "reasoning")
-
+
# 回复内容
self.content_text.insert("end", "回复: ", "timestamp")
self.content_text.insert("end", f"{item.get('response', '')}\n", "response")
-
+
# 分隔符
- self.content_text.insert("end", f"\n{'='*50}\n\n", "separator")
-
+ self.content_text.insert("end", f"\n{'=' * 50}\n\n", "separator")
+
# 滚动到顶部
self.content_text.see("1.0")
-
+
def _auto_update(self):
"""自动更新函数"""
while True:
try:
# 从数据库获取最新数据,只获取启动时间之后的记录
query = {"time": {"$gt": self.start_timestamp}}
- print(f"查询条件: {query}")
-
+ logger.debug(f"查询条件: {query}")
+
# 先获取一条记录检查时间格式
sample = self.db.reasoning_logs.find_one()
if sample:
- print(f"样本记录时间格式: {type(sample['time'])} 值: {sample['time']}")
-
+ logger.debug(f"样本记录时间格式: {type(sample['time'])} 值: {sample['time']}")
+
cursor = self.db.reasoning_logs.find(query).sort("time", -1)
new_data = {}
total_count = 0
-
+
for item in cursor:
# 调试输出
if total_count == 0:
- print(f"记录时间: {item['time']}, 类型: {type(item['time'])}")
-
+ logger.debug(f"记录时间: {item['time']}, 类型: {type(item['time'])}")
+
total_count += 1
group_id = str(item.get('group_id', 'unknown'))
if group_id not in new_data:
new_data[group_id] = []
-
+
# 转换时间戳为datetime对象
if isinstance(item['time'], (int, float)):
time_obj = datetime.fromtimestamp(item['time'])
elif isinstance(item['time'], datetime):
time_obj = item['time']
else:
- print(f"未知的时间格式: {type(item['time'])}")
+ logger.warning(f"未知的时间格式: {type(item['time'])}")
time_obj = datetime.now() # 使用当前时间作为后备
-
+
new_data[group_id].append({
'time': time_obj,
'user': item.get('user', '未知'),
@@ -324,13 +324,13 @@ class ReasoningGUI:
'response': item.get('response', ''),
'prompt': item.get('prompt', '') # 添加prompt字段
})
-
- print(f"从数据库加载了 {total_count} 条记录,分布在 {len(new_data)} 个群组中")
-
+
+ logger.info(f"从数据库加载了 {total_count} 条记录,分布在 {len(new_data)} 个群组中")
+
# 更新数据
if new_data != self.group_data:
self.group_data = new_data
- print("数据已更新,正在刷新显示...")
+ logger.info("数据已更新,正在刷新显示...")
# 将更新任务添加到队列
self.update_queue.put({'type': 'update_group_list'})
if self.group_data:
@@ -341,16 +341,16 @@ class ReasoningGUI:
'type': 'update_display',
'group_id': self.selected_group_id
})
- except Exception as e:
- print(f"自动更新出错: {e}")
-
+ except Exception:
+ logger.exception("自动更新出错")
+
# 每5秒更新一次
time.sleep(5)
-
+
def clear_display(self):
"""清除显示内容"""
self.content_text.delete("1.0", "end")
-
+
def run(self):
"""运行GUI"""
self.root.mainloop()
@@ -359,18 +359,17 @@ class ReasoningGUI:
def main():
"""主函数"""
Database.initialize(
- host= os.getenv("MONGODB_HOST"),
- port= int(os.getenv("MONGODB_PORT")),
- db_name= os.getenv("DATABASE_NAME"),
- username= os.getenv("MONGODB_USERNAME"),
- password= os.getenv("MONGODB_PASSWORD"),
+ host=os.getenv("MONGODB_HOST"),
+ port=int(os.getenv("MONGODB_PORT")),
+ db_name=os.getenv("DATABASE_NAME"),
+ username=os.getenv("MONGODB_USERNAME"),
+ password=os.getenv("MONGODB_PASSWORD"),
auth_source=os.getenv("MONGODB_AUTH_SOURCE")
)
-
+
app = ReasoningGUI()
app.run()
-
if __name__ == "__main__":
main()
diff --git a/src/plugins/chat/__init__.py b/src/plugins/chat/__init__.py
index a62343d0c..9f9c6a45c 100644
--- a/src/plugins/chat/__init__.py
+++ b/src/plugins/chat/__init__.py
@@ -1,12 +1,9 @@
import asyncio
-import os
-import random
import time
from loguru import logger
-from nonebot import get_driver, on_command, on_message, require
+from nonebot import get_driver, on_message, require
from nonebot.adapters.onebot.v11 import Bot, GroupMessageEvent, Message, MessageSegment
-from nonebot.rule import to_me
from nonebot.typing import T_State
from ...common.database import Database
@@ -19,6 +16,10 @@ from .emoji_manager import emoji_manager
from .relationship_manager import relationship_manager
from .willing_manager import willing_manager
from .chat_stream import chat_manager
+from ..memory_system.memory import hippocampus, memory_graph
+from .bot import ChatBot
+from .message_sender import message_manager, message_sender
+
# 创建LLM统计实例
llm_stats = LLMStatistics("llm_statistics.txt")
@@ -31,27 +32,20 @@ driver = get_driver()
config = driver.config
Database.initialize(
- host= config.MONGODB_HOST,
- port= int(config.MONGODB_PORT),
- db_name= config.DATABASE_NAME,
- username= config.MONGODB_USERNAME,
- password= config.MONGODB_PASSWORD,
- auth_source= config.MONGODB_AUTH_SOURCE
+ host=config.MONGODB_HOST,
+ port=int(config.MONGODB_PORT),
+ db_name=config.DATABASE_NAME,
+ username=config.MONGODB_USERNAME,
+ password=config.MONGODB_PASSWORD,
+ auth_source=config.MONGODB_AUTH_SOURCE
)
-print("\033[1;32m[初始化数据库完成]\033[0m")
+logger.success("初始化数据库成功")
-# 导入其他模块
-from ..memory_system.memory import hippocampus, memory_graph
-from .bot import ChatBot
-
-# from .message_send_control import message_sender
-from .message_sender import message_manager, message_sender
-
# 初始化表情管理器
emoji_manager.initialize()
-print(f"\033[1;32m正在唤醒{global_config.BOT_NICKNAME}......\033[0m")
+logger.debug(f"正在唤醒{global_config.BOT_NICKNAME}......")
# 创建机器人实例
chat_bot = ChatBot()
# 注册群消息处理器
@@ -60,71 +54,80 @@ group_msg = on_message(priority=5)
scheduler = require("nonebot_plugin_apscheduler").scheduler
-
@driver.on_startup
async def start_background_tasks():
"""启动后台任务"""
# 启动LLM统计
llm_stats.start()
- print("\033[1;32m[初始化]\033[0m LLM统计功能已启动")
-
+ logger.success("LLM统计功能启动成功")
+
# 初始化并启动情绪管理器
mood_manager = MoodManager.get_instance()
mood_manager.start_mood_update(update_interval=global_config.mood_update_interval)
- print("\033[1;32m[初始化]\033[0m 情绪管理器已启动")
-
+ logger.success("情绪管理器启动成功")
+
# 只启动表情包管理任务
asyncio.create_task(emoji_manager.start_periodic_check(interval_MINS=global_config.EMOJI_CHECK_INTERVAL))
await bot_schedule.initialize()
bot_schedule.print_schedule()
-
+
+
@driver.on_startup
async def init_relationships():
"""在 NoneBot2 启动时初始化关系管理器"""
- print("\033[1;32m[初始化]\033[0m 正在加载用户关系数据...")
+ logger.debug("正在加载用户关系数据...")
await relationship_manager.load_all_relationships()
asyncio.create_task(relationship_manager._start_relationship_manager())
+
@driver.on_bot_connect
async def _(bot: Bot):
"""Bot连接成功时的处理"""
global _message_manager_started
- print(f"\033[1;38;5;208m-----------{global_config.BOT_NICKNAME}成功连接!-----------\033[0m")
+ logger.debug(f"-----------{global_config.BOT_NICKNAME}成功连接!-----------")
await willing_manager.ensure_started()
-
+
message_sender.set_bot(bot)
- print("\033[1;38;5;208m-----------消息发送器已启动!-----------\033[0m")
-
+ logger.success("-----------消息发送器已启动!-----------")
+
if not _message_manager_started:
asyncio.create_task(message_manager.start_processor())
_message_manager_started = True
- print("\033[1;38;5;208m-----------消息处理器已启动!-----------\033[0m")
-
+ logger.success("-----------消息处理器已启动!-----------")
+
asyncio.create_task(emoji_manager._periodic_scan(interval_MINS=global_config.EMOJI_REGISTER_INTERVAL))
- print("\033[1;38;5;208m-----------开始偷表情包!-----------\033[0m")
+ logger.success("-----------开始偷表情包!-----------")
asyncio.create_task(chat_manager._initialize())
asyncio.create_task(chat_manager._auto_save_task())
-
+
+
@group_msg.handle()
async def _(bot: Bot, event: GroupMessageEvent, state: T_State):
await chat_bot.handle_message(event, bot)
+
# 添加build_memory定时任务
@scheduler.scheduled_job("interval", seconds=global_config.build_memory_interval, id="build_memory")
async def build_memory_task():
"""每build_memory_interval秒执行一次记忆构建"""
- print("\033[1;32m[记忆构建]\033[0m -------------------------------------------开始构建记忆-------------------------------------------")
+ logger.debug(
+ "[记忆构建]"
+ "------------------------------------开始构建记忆--------------------------------------")
start_time = time.time()
await hippocampus.operation_build_memory(chat_size=20)
end_time = time.time()
- print(f"\033[1;32m[记忆构建]\033[0m -------------------------------------------记忆构建完成:耗时: {end_time - start_time:.2f} 秒-------------------------------------------")
-
-@scheduler.scheduled_job("interval", seconds=global_config.forget_memory_interval, id="forget_memory")
+ logger.success(
+ f"[记忆构建]--------------------------记忆构建完成:耗时: {end_time - start_time:.2f} "
+ "秒-------------------------------------------")
+
+
+@scheduler.scheduled_job("interval", seconds=global_config.forget_memory_interval, id="forget_memory")
async def forget_memory_task():
"""每30秒执行一次记忆构建"""
- # print("\033[1;32m[记忆遗忘]\033[0m 开始遗忘记忆...")
- # await hippocampus.operation_forget_topic(percentage=0.1)
- # print("\033[1;32m[记忆遗忘]\033[0m 记忆遗忘完成")
+ print("\033[1;32m[记忆遗忘]\033[0m 开始遗忘记忆...")
+ await hippocampus.operation_forget_topic(percentage=0.1)
+ print("\033[1;32m[记忆遗忘]\033[0m 记忆遗忘完成")
+
@scheduler.scheduled_job("interval", seconds=global_config.build_memory_interval + 10, id="merge_memory")
async def merge_memory_task():
@@ -133,9 +136,9 @@ async def merge_memory_task():
# await hippocampus.operation_merge_memory(percentage=0.1)
# print("\033[1;32m[记忆整合]\033[0m 记忆整合完成")
+
@scheduler.scheduled_job("interval", seconds=30, id="print_mood")
async def print_mood_task():
"""每30秒打印一次情绪状态"""
mood_manager = MoodManager.get_instance()
mood_manager.print_mood_status()
-
diff --git a/src/plugins/chat/bot.py b/src/plugins/chat/bot.py
index a5f4ac476..a695cea77 100644
--- a/src/plugins/chat/bot.py
+++ b/src/plugins/chat/bot.py
@@ -1,3 +1,4 @@
+import re
import time
from random import random
from loguru import logger
@@ -31,10 +32,10 @@ class ChatBot:
self._started = False
self.mood_manager = MoodManager.get_instance() # 获取情绪管理器单例
self.mood_manager.start_mood_update() # 启动情绪更新
-
+
self.emoji_chance = 0.2 # 发送表情包的基础概率
# self.message_streams = MessageStreamContainer()
-
+
async def _ensure_started(self):
"""确保所有任务已启动"""
if not self._started:
@@ -42,9 +43,9 @@ class ChatBot:
async def handle_message(self, event: GroupMessageEvent, bot: Bot) -> None:
"""处理收到的群消息"""
-
+
self.bot = bot # 更新 bot 实例
-
+
# group_info = await bot.get_group_info(group_id=event.group_id)
# sender_info = await bot.get_group_member_info(group_id=event.group_id, user_id=event.user_id, no_cache=True)
@@ -96,8 +97,17 @@ class ChatBot:
# 过滤词
for word in global_config.ban_words:
if word in message.processed_plain_text:
- logger.info(f"\033[1;32m[{groupinfo.group_name}]{userinfo.user_nickname}:\033[0m {message.processed_plain_text}")
- logger.info(f"\033[1;32m[过滤词识别]\033[0m 消息中含有{word},filtered")
+ logger.info(
+ f"[{groupinfo.group_name}]{userinfo.user_nickname}:{message.processed_plain_text}")
+ logger.info(f"[过滤词识别]消息中含有{word},filtered")
+ return
+
+ # 正则表达式过滤
+ for pattern in global_config.ban_msgs_regex:
+ if re.search(pattern, message.raw_message):
+ logger.info(
+ f"[{message.group_name}]{message.user_nickname}:{message.raw_message}")
+ logger.info(f"[正则表达式过滤]消息匹配到{pattern},filtered")
return
current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(messageinfo.time))
@@ -107,8 +117,9 @@ class ChatBot:
# topic=await topic_identifier.identify_topic_llm(message.processed_plain_text)
topic = ''
interested_rate = 0
- interested_rate = await hippocampus.memory_activate_value(message.processed_plain_text)/100
- print(f"\033[1;32m[记忆激活]\033[0m 对{message.processed_plain_text}的激活度:---------------------------------------{interested_rate}\n")
+ interested_rate = await hippocampus.memory_activate_value(message.processed_plain_text) / 100
+ logger.debug(f"对{message.processed_plain_text}"
+ f"的激活度:{interested_rate}")
# logger.info(f"\033[1;32m[主题识别]\033[0m 使用{global_config.topic_extract}主题: {topic}")
await self.storage.store_message(message,chat, topic[0] if topic else None)
@@ -124,7 +135,10 @@ class ChatBot:
)
current_willing = willing_manager.get_willing(chat_stream=chat)
- print(f"\033[1;32m[{current_time}][{chat.group_info.group_name}]{chat.user_info.user_nickname}:\033[0m {message.processed_plain_text}\033[1;36m[回复意愿:{current_willing:.2f}][概率:{reply_probability * 100:.1f}%]\033[0m")
+ logger.info(
+ f"[{current_time}][{chat.group_info.group_name}]{chat.user_info.user_nickname}:"
+ f"{message.processed_plain_text}[回复意愿:{current_willing:.2f}][概率:{reply_probability * 100:.1f}%]"
+ )
response = None
@@ -159,13 +173,13 @@ class ChatBot:
thinking_message = msg
container.messages.remove(msg)
break
-
+
# 如果找不到思考消息,直接返回
if not thinking_message:
- print(f"\033[1;33m[警告]\033[0m 未找到对应的思考消息,可能已超时被移除")
+ logger.warning("未找到对应的思考消息,可能已超时被移除")
return
-
- #记录开始思考的时间,避免从思考到回复的时间太久
+
+ # 记录开始思考的时间,避免从思考到回复的时间太久
thinking_start_time = thinking_message.thinking_start_time
message_set = MessageSet(chat, think_id)
message_set = MessageSet(chat, think_id)
@@ -175,7 +189,7 @@ class ChatBot:
mark_head = False
for msg in response:
# print(f"\033[1;32m[回复内容]\033[0m {msg}")
- #通过时间改变时间戳
+ # 通过时间改变时间戳
typing_time = calculate_typing_time(msg)
accu_typing_time += typing_time
timepoint = tinking_time_point + accu_typing_time
@@ -193,19 +207,19 @@ class ChatBot:
if not mark_head:
mark_head = True
message_set.add_message(bot_message)
-
- #message_set 可以直接加入 message_manager
+
+ # message_set 可以直接加入 message_manager
# print(f"\033[1;32m[回复]\033[0m 将回复载入发送容器")
message_manager.add_message(message_set)
-
+
bot_response_time = tinking_time_point
if random() < global_config.emoji_chance:
emoji_raw = await emoji_manager.get_emoji_for_text(response)
-
+
# 检查是否 <没有找到> emoji
if emoji_raw != None:
- emoji_path,discription = emoji_raw
+ emoji_path, description = emoji_raw
emoji_cq = image_path_to_base64(emoji_path)
@@ -226,8 +240,8 @@ class ChatBot:
)
message_manager.add_message(bot_message)
emotion = await self.gpt._get_emotion_tags(raw_content)
- print(f"为 '{response}' 获取到的情感标签为:{emotion}")
- valuedict={
+ logger.debug(f"为 '{response}' 获取到的情感标签为:{emotion}")
+ valuedict = {
'happy': 0.5,
'angry': -1,
'sad': -0.5,
@@ -240,9 +254,10 @@ class ChatBot:
# 使用情绪管理器更新情绪
self.mood_manager.update_mood_from_emotion(emotion[0], global_config.mood_intensity_factor)
- willing_manager.change_reply_willing_after_sent(
- chat_stream=chat
- )
+ # willing_manager.change_reply_willing_after_sent(
+ # chat_stream=chat
+ # )
+
# 创建全局ChatBot实例
-chat_bot = ChatBot()
\ No newline at end of file
+chat_bot = ChatBot()
diff --git a/src/plugins/chat/config.py b/src/plugins/chat/config.py
index fd65c116d..7aed9eee8 100644
--- a/src/plugins/chat/config.py
+++ b/src/plugins/chat/config.py
@@ -1,46 +1,54 @@
import os
from dataclasses import dataclass, field
-from typing import Dict, Optional
+from typing import Dict, List, Optional
import tomli
from loguru import logger
+from packaging import version
+from packaging.version import Version, InvalidVersion
+from packaging.specifiers import SpecifierSet, InvalidSpecifier
@dataclass
class BotConfig:
- """机器人配置类"""
+ """机器人配置类"""
+
+ INNER_VERSION: Version = None
+
BOT_QQ: Optional[int] = 1
BOT_NICKNAME: Optional[str] = None
-
+ BOT_ALIAS_NAMES: List[str] = field(default_factory=list) # 别名,可以通过这个叫它
+
# 消息处理相关配置
MIN_TEXT_LENGTH: int = 2 # 最小处理文本长度
MAX_CONTEXT_SIZE: int = 15 # 上下文最大消息数
emoji_chance: float = 0.2 # 发送表情包的基础概率
-
+
ENABLE_PIC_TRANSLATE: bool = True # 是否启用图片翻译
-
+
talk_allowed_groups = set()
talk_frequency_down_groups = set()
thinking_timeout: int = 100 # 思考时间
-
+
response_willing_amplifier: float = 1.0 # 回复意愿放大系数
response_interested_rate_amplifier: float = 1.0 # 回复兴趣度放大系数
down_frequency_rate: float = 3.5 # 降低回复频率的群组回复意愿降低系数
-
+
ban_user_id = set()
-
+
build_memory_interval: int = 30 # 记忆构建间隔(秒)
forget_memory_interval: int = 300 # 记忆遗忘间隔(秒)
EMOJI_CHECK_INTERVAL: int = 120 # 表情包检查间隔(分钟)
EMOJI_REGISTER_INTERVAL: int = 10 # 表情包注册间隔(分钟)
EMOJI_SAVE: bool = True # 偷表情包
- EMOJI_CHECK: bool = False #是否开启过滤
- EMOJI_CHECK_PROMPT: str = "符合公序良俗" # 表情包过滤要求
+ EMOJI_CHECK: bool = False # 是否开启过滤
+ EMOJI_CHECK_PROMPT: str = "符合公序良俗" # 表情包过滤要求
ban_words = set()
+ ban_msgs_regex = set()
max_response_length: int = 1024 # 最大回复长度
-
+
# 模型配置
llm_reasoning: Dict[str, str] = field(default_factory=lambda: {})
llm_reasoning_minor: Dict[str, str] = field(default_factory=lambda: {})
@@ -56,176 +64,359 @@ class BotConfig:
MODEL_R1_PROBABILITY: float = 0.8 # R1模型概率
MODEL_V3_PROBABILITY: float = 0.1 # V3模型概率
MODEL_R1_DISTILL_PROBABILITY: float = 0.1 # R1蒸馏模型概率
-
+
enable_advance_output: bool = False # 是否启用高级输出
- enable_kuuki_read: bool = True # 是否启用读空气功能
-
- mood_update_interval: float = 1.0 # 情绪更新间隔 单位秒
- mood_decay_rate: float = 0.95 # 情绪衰减率
- mood_intensity_factor: float = 0.7 # 情绪强度因子
+ enable_kuuki_read: bool = True # 是否启用读空气功能
+
+ mood_update_interval: float = 1.0 # 情绪更新间隔 单位秒
+ mood_decay_rate: float = 0.95 # 情绪衰减率
+ mood_intensity_factor: float = 0.7 # 情绪强度因子
+
+ keywords_reaction_rules = [] # 关键词回复规则
+
+ chinese_typo_enable = True # 是否启用中文错别字生成器
+ chinese_typo_error_rate = 0.03 # 单字替换概率
+ chinese_typo_min_freq = 7 # 最小字频阈值
+ chinese_typo_tone_error_rate = 0.2 # 声调错误概率
+ chinese_typo_word_replace_rate = 0.02 # 整词替换概率
# 默认人设
- PROMPT_PERSONALITY=[
+ PROMPT_PERSONALITY = [
"曾经是一个学习地质的女大学生,现在学习心理学和脑科学,你会刷贴吧",
"是一个女大学生,你有黑色头发,你会刷小红书",
- "是一个女大学生,你会刷b站,对ACG文化感兴趣"
+ "是一个女大学生,你会刷b站,对ACG文化感兴趣",
]
- PROMPT_SCHEDULE_GEN="一个曾经学习地质,现在学习心理学和脑科学的女大学生,喜欢刷qq,贴吧,知乎和小红书"
-
- PERSONALITY_1: float = 0.6 # 第一种人格概率
- PERSONALITY_2: float = 0.3 # 第二种人格概率
- PERSONALITY_3: float = 0.1 # 第三种人格概率
-
+
+ PROMPT_SCHEDULE_GEN = "一个曾经学习地质,现在学习心理学和脑科学的女大学生,喜欢刷qq,贴吧,知乎和小红书"
+
+ PERSONALITY_1: float = 0.6 # 第一种人格概率
+ PERSONALITY_2: float = 0.3 # 第二种人格概率
+ PERSONALITY_3: float = 0.1 # 第三种人格概率
+
+ memory_ban_words: list = field(
+ default_factory=lambda: ["表情包", "图片", "回复", "聊天记录"]
+ ) # 添加新的配置项默认值
+
@staticmethod
def get_config_dir() -> str:
"""获取配置文件目录"""
current_dir = os.path.dirname(os.path.abspath(__file__))
- root_dir = os.path.abspath(os.path.join(current_dir, '..', '..', '..'))
- config_dir = os.path.join(root_dir, 'config')
+ root_dir = os.path.abspath(os.path.join(current_dir, "..", "..", ".."))
+ config_dir = os.path.join(root_dir, "config")
if not os.path.exists(config_dir):
os.makedirs(config_dir)
return config_dir
-
+ @classmethod
+ def convert_to_specifierset(cls, value: str) -> SpecifierSet:
+ """将 字符串 版本表达式转换成 SpecifierSet
+ Args:
+ value[str]: 版本表达式(字符串)
+ Returns:
+ SpecifierSet
+ """
+
+ try:
+ converted = SpecifierSet(value)
+ except InvalidSpecifier:
+ logger.error(f"{value} 分类使用了错误的版本约束表达式\n", "请阅读 https://semver.org/lang/zh-CN/ 修改代码")
+ exit(1)
+
+ return converted
+
+ @classmethod
+ def get_config_version(cls, toml: dict) -> Version:
+ """提取配置文件的 SpecifierSet 版本数据
+ Args:
+ toml[dict]: 输入的配置文件字典
+ Returns:
+ Version
+ """
+
+ if "inner" in toml:
+ try:
+ config_version: str = toml["inner"]["version"]
+ except KeyError as e:
+ logger.error("配置文件中 inner 段 不存在, 这是错误的配置文件")
+ raise KeyError(f"配置文件中 inner 段 不存在 {e}, 这是错误的配置文件") from e
+ else:
+ toml["inner"] = {"version": "0.0.0"}
+ config_version = toml["inner"]["version"]
+
+ try:
+ ver = version.parse(config_version)
+ except InvalidVersion as e:
+ logger.error(
+ "配置文件中 inner段 的 version 键是错误的版本描述\n"
+ "请阅读 https://semver.org/lang/zh-CN/ 修改配置,并参考本项目指定的模板进行修改\n"
+ "本项目在不同的版本下有不同的模板,请注意识别"
+ )
+ raise InvalidVersion("配置文件中 inner段 的 version 键是错误的版本描述\n") from e
+
+ return ver
+
@classmethod
def load_config(cls, config_path: str = None) -> "BotConfig":
"""从TOML配置文件加载配置"""
config = cls()
+
+ def personality(parent: dict):
+ personality_config = parent["personality"]
+ personality = personality_config.get("prompt_personality")
+ if len(personality) >= 2:
+ logger.debug(f"载入自定义人格:{personality}")
+ config.PROMPT_PERSONALITY = personality_config.get("prompt_personality", config.PROMPT_PERSONALITY)
+ logger.info(f"载入自定义日程prompt:{personality_config.get('prompt_schedule', config.PROMPT_SCHEDULE_GEN)}")
+ config.PROMPT_SCHEDULE_GEN = personality_config.get("prompt_schedule", config.PROMPT_SCHEDULE_GEN)
+
+ if config.INNER_VERSION in SpecifierSet(">=0.0.2"):
+ config.PERSONALITY_1 = personality_config.get("personality_1_probability", config.PERSONALITY_1)
+ config.PERSONALITY_2 = personality_config.get("personality_2_probability", config.PERSONALITY_2)
+ config.PERSONALITY_3 = personality_config.get("personality_3_probability", config.PERSONALITY_3)
+
+ def emoji(parent: dict):
+ emoji_config = parent["emoji"]
+ config.EMOJI_CHECK_INTERVAL = emoji_config.get("check_interval", config.EMOJI_CHECK_INTERVAL)
+ config.EMOJI_REGISTER_INTERVAL = emoji_config.get("register_interval", config.EMOJI_REGISTER_INTERVAL)
+ config.EMOJI_CHECK_PROMPT = emoji_config.get("check_prompt", config.EMOJI_CHECK_PROMPT)
+ config.EMOJI_SAVE = emoji_config.get("auto_save", config.EMOJI_SAVE)
+ config.EMOJI_CHECK = emoji_config.get("enable_check", config.EMOJI_CHECK)
+
+ def cq_code(parent: dict):
+ cq_code_config = parent["cq_code"]
+ config.ENABLE_PIC_TRANSLATE = cq_code_config.get("enable_pic_translate", config.ENABLE_PIC_TRANSLATE)
+
+ def bot(parent: dict):
+ # 机器人基础配置
+ bot_config = parent["bot"]
+ bot_qq = bot_config.get("qq")
+ config.BOT_QQ = int(bot_qq)
+ config.BOT_NICKNAME = bot_config.get("nickname", config.BOT_NICKNAME)
+
+ if config.INNER_VERSION in SpecifierSet(">=0.0.5"):
+ config.BOT_ALIAS_NAMES = bot_config.get("alias_names", config.BOT_ALIAS_NAMES)
+
+ def response(parent: dict):
+ response_config = parent["response"]
+ config.MODEL_R1_PROBABILITY = response_config.get("model_r1_probability", config.MODEL_R1_PROBABILITY)
+ config.MODEL_V3_PROBABILITY = response_config.get("model_v3_probability", config.MODEL_V3_PROBABILITY)
+ config.MODEL_R1_DISTILL_PROBABILITY = response_config.get(
+ "model_r1_distill_probability", config.MODEL_R1_DISTILL_PROBABILITY
+ )
+ config.max_response_length = response_config.get("max_response_length", config.max_response_length)
+
+ def model(parent: dict):
+ # 加载模型配置
+ model_config: dict = parent["model"]
+
+ config_list = [
+ "llm_reasoning",
+ "llm_reasoning_minor",
+ "llm_normal",
+ "llm_normal_minor",
+ "llm_topic_judge",
+ "llm_summary_by_topic",
+ "llm_emotion_judge",
+ "vlm",
+ "embedding",
+ "moderation",
+ ]
+
+ for item in config_list:
+ if item in model_config:
+ cfg_item: dict = model_config[item]
+
+ # base_url 的例子: SILICONFLOW_BASE_URL
+ # key 的例子: SILICONFLOW_KEY
+ cfg_target = {"name": "", "base_url": "", "key": "", "pri_in": 0, "pri_out": 0}
+
+ if config.INNER_VERSION in SpecifierSet("<=0.0.0"):
+ cfg_target = cfg_item
+
+ elif config.INNER_VERSION in SpecifierSet(">=0.0.1"):
+ stable_item = ["name", "pri_in", "pri_out"]
+ pricing_item = ["pri_in", "pri_out"]
+ # 从配置中原始拷贝稳定字段
+ for i in stable_item:
+ # 如果 字段 属于计费项 且获取不到,那默认值是 0
+ if i in pricing_item and i not in cfg_item:
+ cfg_target[i] = 0
+ else:
+ # 没有特殊情况则原样复制
+ try:
+ cfg_target[i] = cfg_item[i]
+ except KeyError as e:
+ logger.error(f"{item} 中的必要字段不存在,请检查")
+ raise KeyError(f"{item} 中的必要字段 {e} 不存在,请检查") from e
+
+ provider = cfg_item.get("provider")
+ if provider is None:
+ logger.error(f"provider 字段在模型配置 {item} 中不存在,请检查")
+ raise KeyError(f"provider 字段在模型配置 {item} 中不存在,请检查")
+
+ cfg_target["base_url"] = f"{provider}_BASE_URL"
+ cfg_target["key"] = f"{provider}_KEY"
+
+ # 如果 列表中的项目在 model_config 中,利用反射来设置对应项目
+ setattr(config, item, cfg_target)
+ else:
+ logger.error(f"模型 {item} 在config中不存在,请检查")
+ raise KeyError(f"模型 {item} 在config中不存在,请检查")
+
+ def message(parent: dict):
+ msg_config = parent["message"]
+ config.MIN_TEXT_LENGTH = msg_config.get("min_text_length", config.MIN_TEXT_LENGTH)
+ config.MAX_CONTEXT_SIZE = msg_config.get("max_context_size", config.MAX_CONTEXT_SIZE)
+ config.emoji_chance = msg_config.get("emoji_chance", config.emoji_chance)
+ config.ban_words = msg_config.get("ban_words", config.ban_words)
+
+ if config.INNER_VERSION in SpecifierSet(">=0.0.2"):
+ config.thinking_timeout = msg_config.get("thinking_timeout", config.thinking_timeout)
+ config.response_willing_amplifier = msg_config.get(
+ "response_willing_amplifier", config.response_willing_amplifier
+ )
+ config.response_interested_rate_amplifier = msg_config.get(
+ "response_interested_rate_amplifier", config.response_interested_rate_amplifier
+ )
+ config.down_frequency_rate = msg_config.get("down_frequency_rate", config.down_frequency_rate)
+
+ if config.INNER_VERSION in SpecifierSet(">=0.0.6"):
+ config.ban_msgs_regex = msg_config.get("ban_msgs_regex", config.ban_msgs_regex)
+
+ def memory(parent: dict):
+ memory_config = parent["memory"]
+ config.build_memory_interval = memory_config.get("build_memory_interval", config.build_memory_interval)
+ config.forget_memory_interval = memory_config.get("forget_memory_interval", config.forget_memory_interval)
+
+ # 在版本 >= 0.0.4 时才处理新增的配置项
+ if config.INNER_VERSION in SpecifierSet(">=0.0.4"):
+ config.memory_ban_words = set(memory_config.get("memory_ban_words", []))
+
+ def mood(parent: dict):
+ mood_config = parent["mood"]
+ config.mood_update_interval = mood_config.get("mood_update_interval", config.mood_update_interval)
+ config.mood_decay_rate = mood_config.get("mood_decay_rate", config.mood_decay_rate)
+ config.mood_intensity_factor = mood_config.get("mood_intensity_factor", config.mood_intensity_factor)
+
+ def keywords_reaction(parent: dict):
+ keywords_reaction_config = parent["keywords_reaction"]
+ if keywords_reaction_config.get("enable", False):
+ config.keywords_reaction_rules = keywords_reaction_config.get("rules", config.keywords_reaction_rules)
+
+ def chinese_typo(parent: dict):
+ chinese_typo_config = parent["chinese_typo"]
+ config.chinese_typo_enable = chinese_typo_config.get("enable", config.chinese_typo_enable)
+ config.chinese_typo_error_rate = chinese_typo_config.get("error_rate", config.chinese_typo_error_rate)
+ config.chinese_typo_min_freq = chinese_typo_config.get("min_freq", config.chinese_typo_min_freq)
+ config.chinese_typo_tone_error_rate = chinese_typo_config.get(
+ "tone_error_rate", config.chinese_typo_tone_error_rate
+ )
+ config.chinese_typo_word_replace_rate = chinese_typo_config.get(
+ "word_replace_rate", config.chinese_typo_word_replace_rate
+ )
+
+ def groups(parent: dict):
+ groups_config = parent["groups"]
+ config.talk_allowed_groups = set(groups_config.get("talk_allowed", []))
+ config.talk_frequency_down_groups = set(groups_config.get("talk_frequency_down", []))
+ config.ban_user_id = set(groups_config.get("ban_user_id", []))
+
+ def others(parent: dict):
+ others_config = parent["others"]
+ config.enable_advance_output = others_config.get("enable_advance_output", config.enable_advance_output)
+ config.enable_kuuki_read = others_config.get("enable_kuuki_read", config.enable_kuuki_read)
+
+ # 版本表达式:>=1.0.0,<2.0.0
+ # 允许字段:func: method, support: str, notice: str, necessary: bool
+ # 如果使用 notice 字段,在该组配置加载时,会展示该字段对用户的警示
+ # 例如:"notice": "personality 将在 1.3.2 后被移除",那么在有效版本中的用户就会虽然可以
+ # 正常执行程序,但是会看到这条自定义提示
+ include_configs = {
+ "personality": {"func": personality, "support": ">=0.0.0"},
+ "emoji": {"func": emoji, "support": ">=0.0.0"},
+ "cq_code": {"func": cq_code, "support": ">=0.0.0"},
+ "bot": {"func": bot, "support": ">=0.0.0"},
+ "response": {"func": response, "support": ">=0.0.0"},
+ "model": {"func": model, "support": ">=0.0.0"},
+ "message": {"func": message, "support": ">=0.0.0"},
+ "memory": {"func": memory, "support": ">=0.0.0", "necessary": False},
+ "mood": {"func": mood, "support": ">=0.0.0"},
+ "keywords_reaction": {"func": keywords_reaction, "support": ">=0.0.2", "necessary": False},
+ "chinese_typo": {"func": chinese_typo, "support": ">=0.0.3", "necessary": False},
+ "groups": {"func": groups, "support": ">=0.0.0"},
+ "others": {"func": others, "support": ">=0.0.0"},
+ }
+
+ # 原地修改,将 字符串版本表达式 转换成 版本对象
+ for key in include_configs:
+ item_support = include_configs[key]["support"]
+ include_configs[key]["support"] = cls.convert_to_specifierset(item_support)
+
if os.path.exists(config_path):
with open(config_path, "rb") as f:
try:
toml_dict = tomli.load(f)
- except(tomli.TOMLDecodeError) as e:
+ except tomli.TOMLDecodeError as e:
logger.critical(f"配置文件bot_config.toml填写有误,请检查第{e.lineno}行第{e.colno}处:{e.msg}")
exit(1)
-
- if 'personality' in toml_dict:
- personality_config=toml_dict['personality']
- personality=personality_config.get('prompt_personality')
- if len(personality) >= 2:
- logger.info(f"载入自定义人格:{personality}")
- config.PROMPT_PERSONALITY=personality_config.get('prompt_personality',config.PROMPT_PERSONALITY)
- logger.info(f"载入自定义日程prompt:{personality_config.get('prompt_schedule',config.PROMPT_SCHEDULE_GEN)}")
- config.PROMPT_SCHEDULE_GEN=personality_config.get('prompt_schedule',config.PROMPT_SCHEDULE_GEN)
- config.PERSONALITY_1=personality_config.get('personality_1_probability',config.PERSONALITY_1)
- config.PERSONALITY_2=personality_config.get('personality_2_probability',config.PERSONALITY_2)
- config.PERSONALITY_3=personality_config.get('personality_3_probability',config.PERSONALITY_3)
- if "emoji" in toml_dict:
- emoji_config = toml_dict["emoji"]
- config.EMOJI_CHECK_INTERVAL = emoji_config.get("check_interval", config.EMOJI_CHECK_INTERVAL)
- config.EMOJI_REGISTER_INTERVAL = emoji_config.get("register_interval", config.EMOJI_REGISTER_INTERVAL)
- config.EMOJI_CHECK_PROMPT = emoji_config.get('check_prompt',config.EMOJI_CHECK_PROMPT)
- config.EMOJI_SAVE = emoji_config.get('auto_save',config.EMOJI_SAVE)
- config.EMOJI_CHECK = emoji_config.get('enable_check',config.EMOJI_CHECK)
-
- if "cq_code" in toml_dict:
- cq_code_config = toml_dict["cq_code"]
- config.ENABLE_PIC_TRANSLATE = cq_code_config.get("enable_pic_translate", config.ENABLE_PIC_TRANSLATE)
-
- # 机器人基础配置
- if "bot" in toml_dict:
- bot_config = toml_dict["bot"]
- bot_qq = bot_config.get("qq")
- config.BOT_QQ = int(bot_qq)
- config.BOT_NICKNAME = bot_config.get("nickname", config.BOT_NICKNAME)
-
- if "response" in toml_dict:
- response_config = toml_dict["response"]
- config.MODEL_R1_PROBABILITY = response_config.get("model_r1_probability", config.MODEL_R1_PROBABILITY)
- config.MODEL_V3_PROBABILITY = response_config.get("model_v3_probability", config.MODEL_V3_PROBABILITY)
- config.MODEL_R1_DISTILL_PROBABILITY = response_config.get("model_r1_distill_probability", config.MODEL_R1_DISTILL_PROBABILITY)
- config.max_response_length = response_config.get("max_response_length", config.max_response_length)
-
- # 加载模型配置
- if "model" in toml_dict:
- model_config = toml_dict["model"]
-
- if "llm_reasoning" in model_config:
- config.llm_reasoning = model_config["llm_reasoning"]
-
- if "llm_reasoning_minor" in model_config:
- config.llm_reasoning_minor = model_config["llm_reasoning_minor"]
-
- if "llm_normal" in model_config:
- config.llm_normal = model_config["llm_normal"]
-
- if "llm_normal_minor" in model_config:
- config.llm_normal_minor = model_config["llm_normal_minor"]
-
- if "llm_topic_judge" in model_config:
- config.llm_topic_judge = model_config["llm_topic_judge"]
-
- if "llm_summary_by_topic" in model_config:
- config.llm_summary_by_topic = model_config["llm_summary_by_topic"]
-
- if "llm_emotion_judge" in model_config:
- config.llm_emotion_judge = model_config["llm_emotion_judge"]
-
- if "vlm" in model_config:
- config.vlm = model_config["vlm"]
-
- if "embedding" in model_config:
- config.embedding = model_config["embedding"]
-
- if "moderation" in model_config:
- config.moderation = model_config["moderation"]
-
- # 消息配置
- if "message" in toml_dict:
- msg_config = toml_dict["message"]
- config.MIN_TEXT_LENGTH = msg_config.get("min_text_length", config.MIN_TEXT_LENGTH)
- config.MAX_CONTEXT_SIZE = msg_config.get("max_context_size", config.MAX_CONTEXT_SIZE)
- config.emoji_chance = msg_config.get("emoji_chance", config.emoji_chance)
- config.ban_words=msg_config.get("ban_words",config.ban_words)
- config.thinking_timeout = msg_config.get("thinking_timeout", config.thinking_timeout)
- config.response_willing_amplifier = msg_config.get("response_willing_amplifier", config.response_willing_amplifier)
- config.response_interested_rate_amplifier = msg_config.get("response_interested_rate_amplifier", config.response_interested_rate_amplifier)
- config.down_frequency_rate = msg_config.get("down_frequency_rate", config.down_frequency_rate)
+ # 获取配置文件版本
+ config.INNER_VERSION = cls.get_config_version(toml_dict)
+
+ # 如果在配置中找到了需要的项,调用对应项的闭包函数处理
+ for key in include_configs:
+ if key in toml_dict:
+ group_specifierset: SpecifierSet = include_configs[key]["support"]
+
+ # 检查配置文件版本是否在支持范围内
+ if config.INNER_VERSION in group_specifierset:
+ # 如果版本在支持范围内,检查是否存在通知
+ if "notice" in include_configs[key]:
+ logger.warning(include_configs[key]["notice"])
+
+ include_configs[key]["func"](toml_dict)
+
+ else:
+ # 如果版本不在支持范围内,崩溃并提示用户
+ logger.error(
+ f"配置文件中的 '{key}' 字段的版本 ({config.INNER_VERSION}) 不在支持范围内。\n"
+ f"当前程序仅支持以下版本范围: {group_specifierset}"
+ )
+ raise InvalidVersion(f"当前程序仅支持以下版本范围: {group_specifierset}")
+
+ # 如果 necessary 项目存在,而且显式声明是 False,进入特殊处理
+ elif "necessary" in include_configs[key] and include_configs[key].get("necessary") is False:
+ # 通过 pass 处理的项虽然直接忽略也是可以的,但是为了不增加理解困难,依然需要在这里显式处理
+ if key == "keywords_reaction":
+ pass
+
+ else:
+ # 如果用户根本没有需要的配置项,提示缺少配置
+ logger.error(f"配置文件中缺少必需的字段: '{key}'")
+ raise KeyError(f"配置文件中缺少必需的字段: '{key}'")
+
+ logger.success(f"成功加载配置文件: {config_path}")
+
+ return config
+
- if "memory" in toml_dict:
- memory_config = toml_dict["memory"]
- config.build_memory_interval = memory_config.get("build_memory_interval", config.build_memory_interval)
- config.forget_memory_interval = memory_config.get("forget_memory_interval", config.forget_memory_interval)
-
- if "mood" in toml_dict:
- mood_config = toml_dict["mood"]
- config.mood_update_interval = mood_config.get("mood_update_interval", config.mood_update_interval)
- config.mood_decay_rate = mood_config.get("mood_decay_rate", config.mood_decay_rate)
- config.mood_intensity_factor = mood_config.get("mood_intensity_factor", config.mood_intensity_factor)
-
- # 群组配置
- if "groups" in toml_dict:
- groups_config = toml_dict["groups"]
- config.talk_allowed_groups = set(groups_config.get("talk_allowed", []))
- config.talk_frequency_down_groups = set(groups_config.get("talk_frequency_down", []))
- config.ban_user_id = set(groups_config.get("ban_user_id", []))
-
- if "others" in toml_dict:
- others_config = toml_dict["others"]
- config.enable_advance_output = others_config.get("enable_advance_output", config.enable_advance_output)
- config.enable_kuuki_read = others_config.get("enable_kuuki_read", config.enable_kuuki_read)
-
- logger.success(f"成功加载配置文件: {config_path}")
-
- return config
-
# 获取配置文件路径
-
bot_config_floder_path = BotConfig.get_config_dir()
-print(f"正在品鉴配置文件目录: {bot_config_floder_path}")
+logger.debug(f"正在品鉴配置文件目录: {bot_config_floder_path}")
+
bot_config_path = os.path.join(bot_config_floder_path, "bot_config.toml")
+
if os.path.exists(bot_config_path):
# 如果开发环境配置文件不存在,则使用默认配置文件
- print(f"异常的新鲜,异常的美味: {bot_config_path}")
+ logger.debug(f"异常的新鲜,异常的美味: {bot_config_path}")
logger.info("使用bot配置文件")
else:
- logger.info("没有找到美味")
+ # 配置文件不存在
+ logger.error("配置文件不存在,请检查路径: {bot_config_path}")
+ raise FileNotFoundError(f"配置文件不存在: {bot_config_path}")
global_config = BotConfig.load_config(config_path=bot_config_path)
-
if not global_config.enable_advance_output:
logger.remove()
pass
-
diff --git a/src/plugins/chat/cq_code.py b/src/plugins/chat/cq_code.py
index 6030b893f..d0f50c5ae 100644
--- a/src/plugins/chat/cq_code.py
+++ b/src/plugins/chat/cq_code.py
@@ -170,11 +170,11 @@ class CQCode:
except (requests.exceptions.SSLError, requests.exceptions.HTTPError) as e:
if retry == max_retries - 1:
- print(f"\033[1;31m[致命错误]\033[0m 最终请求失败: {str(e)}")
+ logger.error(f"最终请求失败: {str(e)}")
time.sleep(1.5**retry) # 指数退避
- except Exception as e:
- print(f"\033[1;33m[未知错误]\033[0m {str(e)}")
+ except Exception:
+ logger.exception("[未知错误]")
return None
return None
diff --git a/src/plugins/chat/emoji_manager.py b/src/plugins/chat/emoji_manager.py
index f3728ce92..a26f4dc4b 100644
--- a/src/plugins/chat/emoji_manager.py
+++ b/src/plugins/chat/emoji_manager.py
@@ -24,24 +24,26 @@ image_manager = ImageManager()
class EmojiManager:
_instance = None
EMOJI_DIR = "data/emoji" # 表情包存储目录
-
+
def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance.db = None
cls._instance._initialized = False
return cls._instance
-
+
def __init__(self):
self.db = Database.get_instance()
self._scan_task = None
self.vlm = LLM_request(model=global_config.vlm, temperature=0.3, max_tokens=1000)
- self.llm_emotion_judge = LLM_request(model=global_config.llm_normal_minor, max_tokens=60,temperature=0.8) #更高的温度,更少的token(后续可以根据情绪来调整温度)
-
+ self.llm_emotion_judge = LLM_request(model=global_config.llm_normal_minor, max_tokens=60,
+ temperature=0.8) # 更高的温度,更少的token(后续可以根据情绪来调整温度)
+
+
def _ensure_emoji_dir(self):
"""确保表情存储目录存在"""
os.makedirs(self.EMOJI_DIR, exist_ok=True)
-
+
def initialize(self):
"""初始化数据库连接和表情目录"""
if not self._initialized:
@@ -52,16 +54,16 @@ class EmojiManager:
self._initialized = True
# 启动时执行一次完整性检查
self.check_emoji_file_integrity()
- except Exception as e:
- logger.error(f"初始化表情管理器失败: {str(e)}")
-
+ except Exception:
+ logger.exception("初始化表情管理器失败")
+
def _ensure_db(self):
"""确保数据库已初始化"""
if not self._initialized:
self.initialize()
if not self._initialized:
raise RuntimeError("EmojiManager not initialized")
-
+
def _ensure_emoji_collection(self):
"""确保emoji集合存在并创建索引
@@ -78,7 +80,7 @@ class EmojiManager:
self.db.db.create_collection('emoji')
self.db.db.emoji.create_index([('embedding', '2dsphere')])
self.db.db.emoji.create_index([('filename', 1)], unique=True)
-
+
def record_usage(self, emoji_id: str):
"""记录表情使用次数"""
try:
@@ -104,9 +106,9 @@ class EmojiManager:
"""
try:
self._ensure_db()
-
+
# 获取文本的embedding
- text_for_search= await self._get_kimoji_for_text(text)
+ text_for_search = await self._get_kimoji_for_text(text)
if not text_for_search:
logger.error("无法获取文本的情绪")
return None
@@ -114,15 +116,15 @@ class EmojiManager:
if not text_embedding:
logger.error("无法获取文本的embedding")
return None
-
+
try:
# 获取所有表情包
- all_emojis = list(self.db.db.emoji.find({}, {'_id': 1, 'path': 1, 'embedding': 1, 'discription': 1}))
-
+ all_emojis = list(self.db.db.emoji.find({}, {'_id': 1, 'path': 1, 'embedding': 1, 'description': 1}))
+
if not all_emojis:
logger.warning("数据库中没有任何表情包")
return None
-
+
# 计算余弦相似度并排序
def cosine_similarity(v1, v2):
if not v1 or not v2:
@@ -133,23 +135,23 @@ class EmojiManager:
if norm_v1 == 0 or norm_v2 == 0:
return 0
return dot_product / (norm_v1 * norm_v2)
-
+
# 计算所有表情包与输入文本的相似度
emoji_similarities = [
(emoji, cosine_similarity(text_embedding, emoji.get('embedding', [])))
for emoji in all_emojis
]
-
+
# 按相似度降序排序
emoji_similarities.sort(key=lambda x: x[1], reverse=True)
-
+
# 获取前3个最相似的表情包
top_10_emojis = emoji_similarities[:10 if len(emoji_similarities) > 10 else len(emoji_similarities)]
if not top_10_emojis:
logger.warning("未找到匹配的表情包")
return None
-
+
# 从前3个中随机选择一个
selected_emoji, similarity = random.choice(top_10_emojis)
@@ -159,16 +161,17 @@ class EmojiManager:
{'_id': selected_emoji['_id']},
{'$inc': {'usage_count': 1}}
)
- logger.success(f"找到匹配的表情包: {selected_emoji.get('discription', '无描述')} (相似度: {similarity:.4f})")
+ logger.success(
+ f"找到匹配的表情包: {selected_emoji.get('description', '无描述')} (相似度: {similarity:.4f})")
# 稍微改一下文本描述,不然容易产生幻觉,描述已经包含 表情包 了
- return selected_emoji['path'],"[ %s ]" % selected_emoji.get('discription', '无描述')
-
+ return selected_emoji['path'], "[ %s ]" % selected_emoji.get('description', '无描述')
+
except Exception as search_error:
logger.error(f"搜索表情包失败: {str(search_error)}")
return None
-
+
return None
-
+
except Exception as e:
logger.error(f"获取表情包失败: {str(e)}")
return None
@@ -185,31 +188,31 @@ class EmojiManager:
except Exception as e:
logger.error(f"获取标签失败: {str(e)}")
return None
-
+
async def _check_emoji(self, image_base64: str) -> str:
try:
prompt = f'这是一个表情包,请回答这个表情包是否满足\"{global_config.EMOJI_CHECK_PROMPT}\"的要求,是则回答是,否则回答否,不要出现任何其他内容'
-
+
content, _ = await self.vlm.generate_response_for_image(prompt, image_base64)
logger.debug(f"输出描述: {content}")
return content
-
+
except Exception as e:
logger.error(f"获取标签失败: {str(e)}")
return None
-
- async def _get_kimoji_for_text(self, text:str):
+
+ async def _get_kimoji_for_text(self, text: str):
try:
prompt = f'这是{global_config.BOT_NICKNAME}将要发送的消息内容:\n{text}\n若要为其配上表情包,请你输出这个表情包应该表达怎样的情感,应该给人什么样的感觉,不要太简洁也不要太长,注意不要输出任何对消息内容的分析内容,只输出\"一种什么样的感觉\"中间的形容词部分。'
-
- content, _ = await self.llm_emotion_judge.generate_response_async(prompt)
+
+ content, _ = await self.llm_emotion_judge.generate_response_async(prompt,temperature=1.5)
logger.info(f"输出描述: {content}")
return content
-
+
except Exception as e:
logger.error(f"获取标签失败: {str(e)}")
return None
-
+
async def scan_new_emojis(self):
"""扫描新的表情包"""
try:
@@ -217,8 +220,9 @@ class EmojiManager:
os.makedirs(emoji_dir, exist_ok=True)
# 获取所有支持的图片文件
- files_to_process = [f for f in os.listdir(emoji_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png', '.gif'))]
-
+ files_to_process = [f for f in os.listdir(emoji_dir) if
+ f.lower().endswith(('.jpg', '.jpeg', '.png', '.gif'))]
+
for filename in files_to_process:
image_path = os.path.join(emoji_dir, filename)
@@ -273,10 +277,14 @@ class EmojiManager:
if '是' not in check:
os.remove(image_path)
logger.info(f"描述: {description}")
+ logger.info(f"描述: {description}")
logger.info(f"其不满足过滤规则,被剔除 {check}")
continue
logger.info(f"check通过 {check}")
+ if description is not None:
+ embedding = await get_embedding(description)
+
if description is not None:
embedding = await get_embedding(description)
# 准备数据库记录
@@ -312,19 +320,17 @@ class EmojiManager:
logger.success(f"同步保存到images集合: {filename}")
else:
logger.warning(f"跳过表情包: {filename}")
-
- except Exception as e:
- logger.error(f"扫描表情包失败: {str(e)}")
- logger.error(traceback.format_exc())
-
+
+ except Exception:
+ logger.exception("扫描表情包失败")
+
async def _periodic_scan(self, interval_MINS: int = 10):
"""定期扫描新表情包"""
while True:
- print("\033[1;36m[表情包]\033[0m 开始扫描新表情包...")
+ logger.info("开始扫描新表情包...")
await self.scan_new_emojis()
await asyncio.sleep(interval_MINS * 60) # 每600秒扫描一次
-
def check_emoji_file_integrity(self):
"""检查表情包文件完整性
如果文件已被删除,则从数据库中移除对应记录
@@ -335,7 +341,7 @@ class EmojiManager:
all_emojis = list(self.db.db.emoji.find())
removed_count = 0
total_count = len(all_emojis)
-
+
for emoji in all_emojis:
try:
if 'path' not in emoji:
@@ -343,27 +349,27 @@ class EmojiManager:
self.db.db.emoji.delete_one({'_id': emoji['_id']})
removed_count += 1
continue
-
+
if 'embedding' not in emoji:
logger.warning(f"发现过时记录(缺少embedding字段),ID: {emoji.get('_id', 'unknown')}")
self.db.db.emoji.delete_one({'_id': emoji['_id']})
removed_count += 1
continue
-
+
# 检查文件是否存在
if not os.path.exists(emoji['path']):
logger.warning(f"表情包文件已被删除: {emoji['path']}")
# 从数据库中删除记录
result = self.db.db.emoji.delete_one({'_id': emoji['_id']})
if result.deleted_count > 0:
- logger.success(f"成功删除数据库记录: {emoji['_id']}")
+ logger.debug(f"成功删除数据库记录: {emoji['_id']}")
removed_count += 1
else:
logger.error(f"删除数据库记录失败: {emoji['_id']}")
except Exception as item_error:
logger.error(f"处理表情包记录时出错: {str(item_error)}")
continue
-
+
# 验证清理结果
remaining_count = self.db.db.emoji.count_documents({})
if removed_count > 0:
@@ -371,7 +377,7 @@ class EmojiManager:
logger.info(f"清理前总数: {total_count} | 清理后总数: {remaining_count}")
else:
logger.info(f"已检查 {total_count} 个表情包记录")
-
+
except Exception as e:
logger.error(f"检查表情包完整性失败: {str(e)}")
logger.error(traceback.format_exc())
@@ -382,6 +388,6 @@ class EmojiManager:
await asyncio.sleep(interval_MINS * 60)
-
# 创建全局单例
-emoji_manager = EmojiManager()
\ No newline at end of file
+emoji_manager = EmojiManager()
+
diff --git a/src/plugins/chat/llm_generator.py b/src/plugins/chat/llm_generator.py
index bfd5eec2e..517e8aa7a 100644
--- a/src/plugins/chat/llm_generator.py
+++ b/src/plugins/chat/llm_generator.py
@@ -3,6 +3,7 @@ import time
from typing import List, Optional, Tuple, Union
from nonebot import get_driver
+from loguru import logger
from ...common.database import Database
from ..models.utils_model import LLM_request
@@ -55,9 +56,7 @@ class ResponseGenerator:
self.current_model_type = "r1_distill"
current_model = self.model_r1_distill
- print(
- f"+++++++++++++++++{global_config.BOT_NICKNAME}{self.current_model_type}思考中+++++++++++++++++"
- )
+ logger.info(f"{global_config.BOT_NICKNAME}{self.current_model_type}思考中")
model_response = await self._generate_response_with_model(
message, current_model
@@ -65,7 +64,7 @@ class ResponseGenerator:
raw_content = model_response
if model_response:
- print(f"{global_config.BOT_NICKNAME}的回复是:{model_response}")
+ logger.info(f'{global_config.BOT_NICKNAME}的回复是:{model_response}')
model_response = await self._process_response(model_response)
if model_response:
return model_response, raw_content
@@ -122,8 +121,8 @@ class ResponseGenerator:
# 生成回复
try:
content, reasoning_content = await model.generate_response(prompt)
- except Exception as e:
- print(f"生成回复时出错: {e}")
+ except Exception:
+ logger.exception("生成回复时出错")
return None
# 保存到数据库
@@ -219,7 +218,7 @@ class InitiativeMessageGenerate:
prompt_builder._build_initiative_prompt_select(message.group_id)
)
content_select, reasoning = self.model_v3.generate_response(topic_select_prompt)
- print(f"[DEBUG] {content_select} {reasoning}")
+ logger.debug(f"{content_select} {reasoning}")
topics_list = [dot[0] for dot in dots_for_select]
if content_select:
if content_select in topics_list:
@@ -232,12 +231,12 @@ class InitiativeMessageGenerate:
select_dot[1], prompt_template
)
content_check, reasoning_check = self.model_v3.generate_response(prompt_check)
- print(f"[DEBUG] {content_check} {reasoning_check}")
+ logger.info(f"{content_check} {reasoning_check}")
if "yes" not in content_check.lower():
return None
prompt = prompt_builder._build_initiative_prompt(
select_dot, prompt_template, memory
)
content, reasoning = self.model_r1.generate_response_async(prompt)
- print(f"[DEBUG] {content} {reasoning}")
+ logger.debug(f"[DEBUG] {content} {reasoning}")
return content
diff --git a/src/plugins/chat/message_sender.py b/src/plugins/chat/message_sender.py
index 2c3880bb8..d5f710bbf 100644
--- a/src/plugins/chat/message_sender.py
+++ b/src/plugins/chat/message_sender.py
@@ -2,6 +2,7 @@ import asyncio
import time
from typing import Dict, List, Optional, Union
+from loguru import logger
from nonebot.adapters.onebot.v11 import Bot
from .cq_code import cq_code_tool
@@ -14,11 +15,12 @@ from .chat_stream import chat_manager
class Message_Sender:
"""发送器"""
+
def __init__(self):
self.message_interval = (0.5, 1) # 消息间隔时间范围(秒)
self.last_send_time = 0
self._current_bot = None
-
+
def set_bot(self, bot: Bot):
"""设置当前bot实例"""
self._current_bot = bot
@@ -41,10 +43,10 @@ class Message_Sender:
message=message_send.raw_message,
auto_escape=False
)
- print(f"\033[1;34m[调试]\033[0m 发送消息{message.processed_plain_text}成功")
+ logger.success(f"[调试] 发送消息{message.processed_plain_text}成功")
except Exception as e:
- print(f"发生错误 {e}")
- print(f"\033[1;34m[调试]\033[0m 发送消息{message.processed_plain_text}失败")
+ logger.error(f"[调试] 发生错误 {e}")
+ logger.error(f"[调试] 发送消息{message.processed_plain_text}失败")
else:
try:
await self._current_bot.send_private_msg(
@@ -52,10 +54,10 @@ class Message_Sender:
message=message_send.raw_message,
auto_escape=False
)
- print(f"\033[1;34m[调试]\033[0m 发送消息{message.processed_plain_text}成功")
+ logger.success(f"[调试] 发送消息{message.processed_plain_text}成功")
except Exception as e:
- print(f"发生错误 {e}")
- print(f"\033[1;34m[调试]\033[0m 发送消息{message.processed_plain_text}失败")
+ logger.error(f"发生错误 {e}")
+ logger.error(f"[调试] 发送消息{message.processed_plain_text}失败")
class MessageContainer:
@@ -71,15 +73,15 @@ class MessageContainer:
"""获取所有超时的Message_Sending对象(思考时间超过30秒),按thinking_start_time排序"""
current_time = time.time()
timeout_messages = []
-
+
for msg in self.messages:
if isinstance(msg, MessageSending):
if current_time - msg.thinking_start_time > self.thinking_timeout:
timeout_messages.append(msg)
-
+
# 按thinking_start_time排序,时间早的在前面
timeout_messages.sort(key=lambda x: x.thinking_start_time)
-
+
return timeout_messages
def get_earliest_message(self) -> Optional[Union[MessageThinking, MessageSending]]:
@@ -88,11 +90,11 @@ class MessageContainer:
return None
earliest_time = float('inf')
earliest_message = None
- for msg in self.messages:
+ for msg in self.messages:
msg_time = msg.thinking_start_time
if msg_time < earliest_time:
earliest_time = msg_time
- earliest_message = msg
+ earliest_message = msg
return earliest_message
def add_message(self, message: Union[MessageThinking, MessageSending]) -> None:
@@ -110,10 +112,10 @@ class MessageContainer:
self.messages.remove(message)
return True
return False
- except Exception as e:
- print(f"\033[1;31m[错误]\033[0m 移除消息时发生错误: {e}")
+ except Exception:
+ logger.exception("移除消息时发生错误")
return False
-
+
def has_messages(self) -> bool:
"""检查是否有待发送的消息"""
return bool(self.messages)
@@ -121,7 +123,7 @@ class MessageContainer:
def get_all_messages(self) -> List[Union[MessageSending, MessageThinking]]:
"""获取所有消息"""
return list(self.messages)
-
+
class MessageManager:
"""管理所有聊天流的消息容器"""
@@ -152,11 +154,11 @@ class MessageManager:
if isinstance(message_earliest, MessageThinking):
message_earliest.update_thinking_time()
thinking_time = message_earliest.thinking_time
- print(f"\033[1;34m[调试]\033[0m 消息正在思考中,已思考{int(thinking_time)}秒\033[K\r", end='', flush=True)
-
+ print(f"消息正在思考中,已思考{int(thinking_time)}秒\r", end='', flush=True)
+
# 检查是否超时
if thinking_time > global_config.thinking_timeout:
- print(f"\033[1;33m[警告]\033[0m 消息思考超时({thinking_time}秒),移除该消息")
+ logger.warning(f"消息思考超时({thinking_time}秒),移除该消息")
container.remove_message(message_earliest)
else:
print(f"\033[1;34m[调试]\033[0m 消息'{message_earliest.processed_plain_text}'正在发送中")
@@ -174,7 +176,7 @@ class MessageManager:
message_timeout = container.get_timeout_messages()
if message_timeout:
- print(f"\033[1;34m[调试]\033[0m 发现{len(message_timeout)}条超时消息")
+ logger.warning(f"发现{len(message_timeout)}条超时消息")
for msg in message_timeout:
if msg == message_earliest:
continue
@@ -191,11 +193,11 @@ class MessageManager:
await self.storage.store_message(msg,msg.chat_stream, None)
if not container.remove_message(msg):
- print("\033[1;33m[警告]\033[0m 尝试删除不存在的消息")
- except Exception as e:
- print(f"\033[1;31m[错误]\033[0m 处理超时消息时发生错误: {e}")
+ logger.warning("尝试删除不存在的消息")
+ except Exception:
+ logger.exception("处理超时消息时发生错误")
continue
-
+
async def start_processor(self):
"""启动消息处理器"""
while self._running:
@@ -206,6 +208,7 @@ class MessageManager:
await asyncio.gather(*tasks)
+
# 创建全局消息管理器实例
message_manager = MessageManager()
# 创建全局发送器实例
diff --git a/src/plugins/chat/prompt_builder.py b/src/plugins/chat/prompt_builder.py
index 46adc343e..fec6c7926 100644
--- a/src/plugins/chat/prompt_builder.py
+++ b/src/plugins/chat/prompt_builder.py
@@ -1,6 +1,7 @@
import random
import time
from typing import Optional
+from loguru import logger
from ...common.database import Database
from ..memory_system.memory import hippocampus, memory_graph
@@ -34,44 +35,43 @@ class PromptBuilder:
Returns:
str: 构建好的prompt
- """
- #先禁用关系
+ """
+ # 先禁用关系
if 0 > 30:
relation_prompt = "关系特别特别好,你很喜欢喜欢他"
relation_prompt_2 = "热情发言或者回复"
- elif 0 <-20:
+ elif 0 < -20:
relation_prompt = "关系很差,你很讨厌他"
relation_prompt_2 = "骂他"
else:
relation_prompt = "关系一般"
relation_prompt_2 = "发言或者回复"
-
- #开始构建prompt
-
-
- #心情
+
+ # 开始构建prompt
+
+ # 心情
mood_manager = MoodManager.get_instance()
mood_prompt = mood_manager.get_prompt()
-
-
- #日程构建
+
+ # 日程构建
current_date = time.strftime("%Y-%m-%d", time.localtime())
current_time = time.strftime("%H:%M:%S", time.localtime())
- bot_schedule_now_time,bot_schedule_now_activity = bot_schedule.get_current_task()
+ bot_schedule_now_time, bot_schedule_now_activity = bot_schedule.get_current_task()
prompt_date = f'''今天是{current_date},现在是{current_time},你今天的日程是:\n{bot_schedule.today_schedule}\n你现在正在{bot_schedule_now_activity}\n'''
- #知识构建
+ # 知识构建
start_time = time.time()
-
+
prompt_info = ''
promt_info_prompt = ''
- prompt_info = await self.get_prompt_info(message_txt,threshold=0.5)
+ prompt_info = await self.get_prompt_info(message_txt, threshold=0.5)
if prompt_info:
- prompt_info = f'''\n----------------------------------------------------\n你有以下这些[知识]:\n{prompt_info}\n请你记住上面的[知识],之后可能会用到\n----------------------------------------------------\n'''
-
+ prompt_info = f'''你有以下这些[知识]:{prompt_info}请你记住上面的[
+ 知识],之后可能会用到-'''
+
end_time = time.time()
- print(f"\033[1;32m[知识检索]\033[0m 耗时: {(end_time - start_time):.3f}秒")
-
+ logger.debug(f"知识检索耗时: {(end_time - start_time):.3f}秒")
+
# 获取聊天上下文
chat_in_group=True
chat_talking_prompt = ''
@@ -90,7 +90,7 @@ class PromptBuilder:
# 使用新的记忆获取方法
memory_prompt = ''
start_time = time.time()
-
+
# 调用 hippocampus 的 get_relevant_memories 方法
relevant_memories = await hippocampus.get_relevant_memories(
text=message_txt,
@@ -98,64 +98,64 @@ class PromptBuilder:
similarity_threshold=0.4,
max_memory_num=5
)
-
+
if relevant_memories:
# 格式化记忆内容
memory_items = []
for memory in relevant_memories:
memory_items.append(f"关于「{memory['topic']}」的记忆:{memory['content']}")
-
+
memory_prompt = "看到这些聊天,你想起来:\n" + "\n".join(memory_items) + "\n"
-
+
# 打印调试信息
- print("\n\033[1;32m[记忆检索]\033[0m 找到以下相关记忆:")
+ logger.debug("[记忆检索]找到以下相关记忆:")
for memory in relevant_memories:
- print(f"- 主题「{memory['topic']}」[相似度: {memory['similarity']:.2f}]: {memory['content']}")
-
+ logger.debug(f"- 主题「{memory['topic']}」[相似度: {memory['similarity']:.2f}]: {memory['content']}")
+
end_time = time.time()
- print(f"\033[1;32m[回忆耗时]\033[0m 耗时: {(end_time - start_time):.3f}秒")
-
-
-
- #激活prompt构建
+ logger.info(f"回忆耗时: {(end_time - start_time):.3f}秒")
+
+ # 激活prompt构建
activate_prompt = ''
if chat_in_group:
activate_prompt = f"以上是群里正在进行的聊天,{memory_prompt} 现在昵称为 '{sender_name}' 的用户说的:{message_txt}。引起了你的注意,你和ta{relation_prompt},{mood_prompt},你想要{relation_prompt_2}。"
else:
activate_prompt = f"以上是你正在和{sender_name}私聊的内容,{memory_prompt} 现在昵称为 '{sender_name}' 的用户说的:{message_txt}。引起了你的注意,你和ta{relation_prompt},{mood_prompt},你想要{relation_prompt_2}。"
- #检测机器人相关词汇
- bot_keywords = ['人机', 'bot', '机器', '入机', 'robot', '机器人']
- is_bot = any(keyword in message_txt.lower() for keyword in bot_keywords)
- if is_bot:
- is_bot_prompt = '有人说你是人机或者机器人,否定这一事实,攻击或反问他。注意,只有认为你是机器人才回复,否则不要否认'
- else:
- is_bot_prompt = ''
+
+ # 关键词检测与反应
+ keywords_reaction_prompt = ''
+ for rule in global_config.keywords_reaction_rules:
+ if rule.get("enable", False):
+ if any(keyword in message_txt.lower() for keyword in rule.get("keywords", [])):
+ logger.info(f"检测到以下关键词之一:{rule.get('keywords', [])},触发反应:{rule.get('reaction', '')}")
+ keywords_reaction_prompt += rule.get("reaction", "") + ','
#人格选择
personality=global_config.PROMPT_PERSONALITY
probability_1 = global_config.PERSONALITY_1
probability_2 = global_config.PERSONALITY_2
probability_3 = global_config.PERSONALITY_3
- prompt_personality = ''
+
+ prompt_personality = f'{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},你还有很多别名:{"/".join(global_config.BOT_ALIAS_NAMES)},'
personality_choice = random.random()
if chat_in_group:
prompt_in_group=f"你正在浏览{chat_stream.platform}群"
else:
prompt_in_group=f"你正在{chat_stream.platform}上和{sender_name}私聊"
if personality_choice < probability_1: # 第一种人格
- prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},{personality[0]},{prompt_in_group},{promt_info_prompt},
- 现在请你给出日常且口语化的回复,平淡一些,尽量简短一些。{is_bot_prompt}
+ prompt_personality += f'''{personality[0]}, 你正在浏览qq群,{promt_info_prompt},
+ 现在请你给出日常且口语化的回复,平淡一些,尽量简短一些。{keywords_reaction_prompt}
请注意把握群里的聊天内容,不要刻意突出自身学科背景,不要回复的太有条理,可以有个性。'''
elif personality_choice < probability_1 + probability_2: # 第二种人格
- prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},{personality[1]},{prompt_in_group},{promt_info_prompt},
- 现在请你给出日常且口语化的回复,请表现你自己的见解,不要一昧迎合,尽量简短一些。{is_bot_prompt}
+ prompt_personality += f'''{personality[1]}, 你正在浏览qq群,{promt_info_prompt},
+ 现在请你给出日常且口语化的回复,请表现你自己的见解,不要一昧迎合,尽量简短一些。{keywords_reaction_prompt}
请你表达自己的见解和观点。可以有个性。'''
else: # 第三种人格
- prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},{personality[2]},{prompt_in_group},{promt_info_prompt},
- 现在请你给出日常且口语化的回复,请表现你自己的见解,不要一昧迎合,尽量简短一些。{is_bot_prompt}
+ prompt_personality += f'''{personality[2]}, 你正在浏览qq群,{promt_info_prompt},
+ 现在请你给出日常且口语化的回复,请表现你自己的见解,不要一昧迎合,尽量简短一些。{keywords_reaction_prompt}
请你表达自己的见解和观点。可以有个性。'''
-
- #中文高手(新加的好玩功能)
+
+ # 中文高手(新加的好玩功能)
prompt_ger = ''
if random.random() < 0.04:
prompt_ger += '你喜欢用倒装句'
@@ -163,23 +163,23 @@ class PromptBuilder:
prompt_ger += '你喜欢用反问句'
if random.random() < 0.01:
prompt_ger += '你喜欢用文言文'
-
- #额外信息要求
- extra_info = '''但是记得回复平淡一些,简短一些,尤其注意在没明确提到时不要过多提及自身的背景, 不要直接回复别人发的表情包,记住不要输出多余内容(包括前后缀,冒号和引号,括号,表情等),只需要输出回复内容就好,不要输出其他任何内容'''
-
- #合并prompt
+
+ # 额外信息要求
+ extra_info = '''但是记得回复平淡一些,简短一些,尤其注意在没明确提到时不要过多提及自身的背景, 不要直接回复别人发的表情包,记住不要输出多余内容(包括前后缀,冒号和引号,括号,表情等),只需要输出回复内容就好,不要输出其他任何内容'''
+
+ # 合并prompt
prompt = ""
prompt += f"{prompt_info}\n"
prompt += f"{prompt_date}\n"
- prompt += f"{chat_talking_prompt}\n"
+ prompt += f"{chat_talking_prompt}\n"
prompt += f"{prompt_personality}\n"
prompt += f"{prompt_ger}\n"
- prompt += f"{extra_info}\n"
-
- '''读空气prompt处理'''
- activate_prompt_check=f"以上是群里正在进行的聊天,昵称为 '{sender_name}' 的用户说的:{message_txt}。引起了你的注意,你和他{relation_prompt},你想要{relation_prompt_2},但是这不一定是合适的时机,请你决定是否要回应这条消息。"
+ prompt += f"{extra_info}\n"
+
+ '''读空气prompt处理'''
+ activate_prompt_check = f"以上是群里正在进行的聊天,昵称为 '{sender_name}' 的用户说的:{message_txt}。引起了你的注意,你和他{relation_prompt},你想要{relation_prompt_2},但是这不一定是合适的时机,请你决定是否要回应这条消息。"
prompt_personality_check = ''
- extra_check_info=f"请注意把握群里的聊天内容的基础上,综合群内的氛围,例如,和{global_config.BOT_NICKNAME}相关的话题要积极回复,如果是at自己的消息一定要回复,如果自己正在和别人聊天一定要回复,其他话题如果合适搭话也可以回复,如果认为应该回复请输出yes,否则输出no,请注意是决定是否需要回复,而不是编写回复内容,除了yes和no不要输出任何回复内容。"
+ extra_check_info = f"请注意把握群里的聊天内容的基础上,综合群内的氛围,例如,和{global_config.BOT_NICKNAME}相关的话题要积极回复,如果是at自己的消息一定要回复,如果自己正在和别人聊天一定要回复,其他话题如果合适搭话也可以回复,如果认为应该回复请输出yes,否则输出no,请注意是决定是否需要回复,而不是编写回复内容,除了yes和no不要输出任何回复内容。"
if personality_choice < probability_1: # 第一种人格
prompt_personality_check = f'''你的网名叫{global_config.BOT_NICKNAME},{personality[0]}, 你正在浏览qq群,{promt_info_prompt} {activate_prompt_check} {extra_check_info}'''
elif personality_choice < probability_1 + probability_2: # 第二种人格
@@ -187,34 +187,36 @@ class PromptBuilder:
else: # 第三种人格
prompt_personality_check = f'''你的网名叫{global_config.BOT_NICKNAME},{personality[2]}, 你正在浏览qq群,{promt_info_prompt} {activate_prompt_check} {extra_check_info}'''
- prompt_check_if_response=f"{prompt_info}\n{prompt_date}\n{chat_talking_prompt}\n{prompt_personality_check}"
-
- return prompt,prompt_check_if_response
-
- def _build_initiative_prompt_select(self,group_id):
+ prompt_check_if_response = f"{prompt_info}\n{prompt_date}\n{chat_talking_prompt}\n{prompt_personality_check}"
+
+ return prompt, prompt_check_if_response
+
+ def _build_initiative_prompt_select(self, group_id, probability_1=0.8, probability_2=0.1):
current_date = time.strftime("%Y-%m-%d", time.localtime())
current_time = time.strftime("%H:%M:%S", time.localtime())
- bot_schedule_now_time,bot_schedule_now_activity = bot_schedule.get_current_task()
+ bot_schedule_now_time, bot_schedule_now_activity = bot_schedule.get_current_task()
prompt_date = f'''今天是{current_date},现在是{current_time},你今天的日程是:\n{bot_schedule.today_schedule}\n你现在正在{bot_schedule_now_activity}\n'''
chat_talking_prompt = ''
if group_id:
- chat_talking_prompt = get_recent_group_detailed_plain_text(self.db, group_id, limit=global_config.MAX_CONTEXT_SIZE,combine = True)
-
+ chat_talking_prompt = get_recent_group_detailed_plain_text(self.db, group_id,
+ limit=global_config.MAX_CONTEXT_SIZE,
+ combine=True)
+
chat_talking_prompt = f"以下是群里正在聊天的内容:\n{chat_talking_prompt}"
- # print(f"\033[1;34m[调试]\033[0m 已从数据库获取群 {group_id} 的消息记录:{chat_talking_prompt}")
+ # print(f"\033[1;34m[调试]\033[0m 已从数据库获取群 {group_id} 的消息记录:{chat_talking_prompt}")
# 获取主动发言的话题
- all_nodes=memory_graph.dots
- all_nodes=filter(lambda dot:len(dot[1]['memory_items'])>3,all_nodes)
- nodes_for_select=random.sample(all_nodes,5)
- topics=[info[0] for info in nodes_for_select]
- infos=[info[1] for info in nodes_for_select]
+ all_nodes = memory_graph.dots
+ all_nodes = filter(lambda dot: len(dot[1]['memory_items']) > 3, all_nodes)
+ nodes_for_select = random.sample(all_nodes, 5)
+ topics = [info[0] for info in nodes_for_select]
+ infos = [info[1] for info in nodes_for_select]
- #激活prompt构建
+ # 激活prompt构建
activate_prompt = ''
activate_prompt = "以上是群里正在进行的聊天。"
- personality=global_config.PROMPT_PERSONALITY
+ personality = global_config.PROMPT_PERSONALITY
prompt_personality = ''
personality_choice = random.random()
if personality_choice < probability_1: # 第一种人格
@@ -223,32 +225,31 @@ class PromptBuilder:
prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},{personality[1]}'''
else: # 第三种人格
prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},{personality[2]}'''
-
- topics_str=','.join(f"\"{topics}\"")
- prompt_for_select=f"你现在想在群里发言,回忆了一下,想到几个话题,分别是{topics_str},综合当前状态以及群内气氛,请你在其中选择一个合适的话题,注意只需要输出话题,除了话题什么也不要输出(双引号也不要输出)"
-
- prompt_initiative_select=f"{prompt_date}\n{prompt_personality}\n{prompt_for_select}"
- prompt_regular=f"{prompt_date}\n{prompt_personality}"
- return prompt_initiative_select,nodes_for_select,prompt_regular
-
- def _build_initiative_prompt_check(self,selected_node,prompt_regular):
- memory=random.sample(selected_node['memory_items'],3)
- memory='\n'.join(memory)
- prompt_for_check=f"{prompt_regular}你现在想在群里发言,回忆了一下,想到一个话题,是{selected_node['concept']},关于这个话题的记忆有\n{memory}\n,以这个作为主题发言合适吗?请在把握群里的聊天内容的基础上,综合群内的氛围,如果认为应该发言请输出yes,否则输出no,请注意是决定是否需要发言,而不是编写回复内容,除了yes和no不要输出任何回复内容。"
- return prompt_for_check,memory
-
- def _build_initiative_prompt(self,selected_node,prompt_regular,memory):
- prompt_for_initiative=f"{prompt_regular}你现在想在群里发言,回忆了一下,想到一个话题,是{selected_node['concept']},关于这个话题的记忆有\n{memory}\n,请在把握群里的聊天内容的基础上,综合群内的氛围,以日常且口语化的口吻,简短且随意一点进行发言,不要说的太有条理,可以有个性。记住不要输出多余内容(包括前后缀,冒号和引号,括号,表情等)"
+ topics_str = ','.join(f"\"{topics}\"")
+ prompt_for_select = f"你现在想在群里发言,回忆了一下,想到几个话题,分别是{topics_str},综合当前状态以及群内气氛,请你在其中选择一个合适的话题,注意只需要输出话题,除了话题什么也不要输出(双引号也不要输出)"
+
+ prompt_initiative_select = f"{prompt_date}\n{prompt_personality}\n{prompt_for_select}"
+ prompt_regular = f"{prompt_date}\n{prompt_personality}"
+
+ return prompt_initiative_select, nodes_for_select, prompt_regular
+
+ def _build_initiative_prompt_check(self, selected_node, prompt_regular):
+ memory = random.sample(selected_node['memory_items'], 3)
+ memory = '\n'.join(memory)
+ prompt_for_check = f"{prompt_regular}你现在想在群里发言,回忆了一下,想到一个话题,是{selected_node['concept']},关于这个话题的记忆有\n{memory}\n,以这个作为主题发言合适吗?请在把握群里的聊天内容的基础上,综合群内的氛围,如果认为应该发言请输出yes,否则输出no,请注意是决定是否需要发言,而不是编写回复内容,除了yes和no不要输出任何回复内容。"
+ return prompt_for_check, memory
+
+ def _build_initiative_prompt(self, selected_node, prompt_regular, memory):
+ prompt_for_initiative = f"{prompt_regular}你现在想在群里发言,回忆了一下,想到一个话题,是{selected_node['concept']},关于这个话题的记忆有\n{memory}\n,请在把握群里的聊天内容的基础上,综合群内的氛围,以日常且口语化的口吻,简短且随意一点进行发言,不要说的太有条理,可以有个性。记住不要输出多余内容(包括前后缀,冒号和引号,括号,表情等)"
return prompt_for_initiative
-
- async def get_prompt_info(self,message:str,threshold:float):
+ async def get_prompt_info(self, message: str, threshold: float):
related_info = ''
- print(f"\033[1;34m[调试]\033[0m 获取知识库内容,元消息:{message[:30]}...,消息长度: {len(message)}")
+ logger.debug(f"获取知识库内容,元消息:{message[:30]}...,消息长度: {len(message)}")
embedding = await get_embedding(message)
- related_info += self.get_info_from_db(embedding,threshold=threshold)
-
+ related_info += self.get_info_from_db(embedding, threshold=threshold)
+
return related_info
def get_info_from_db(self, query_embedding: list, limit: int = 1, threshold: float = 0.5) -> str:
@@ -309,14 +310,15 @@ class PromptBuilder:
{"$limit": limit},
{"$project": {"content": 1, "similarity": 1}}
]
-
+
results = list(self.db.db.knowledges.aggregate(pipeline))
# print(f"\033[1;34m[调试]\033[0m获取知识库内容结果: {results}")
-
+
if not results:
return ''
-
+
# 返回所有找到的内容,用换行分隔
return '\n'.join(str(result['content']) for result in results)
-
-prompt_builder = PromptBuilder()
\ No newline at end of file
+
+
+prompt_builder = PromptBuilder()
diff --git a/src/plugins/chat/relationship_manager.py b/src/plugins/chat/relationship_manager.py
index 5552aee8c..9e7cafda0 100644
--- a/src/plugins/chat/relationship_manager.py
+++ b/src/plugins/chat/relationship_manager.py
@@ -1,6 +1,7 @@
import asyncio
from typing import Optional, Union
from typing import Optional, Union
+from loguru import logger
from ...common.database import Database
from .message_base import UserInfo
@@ -10,9 +11,10 @@ class Impression:
traits: str = None
called: str = None
know_time: float = None
-
+
relationship_value: float = None
+
class Relationship:
user_id: int = None
platform: str = None
@@ -79,7 +81,7 @@ class RelationshipManager:
# 保存到数据库
await self.storage_relationship(relationship)
relationship.saved = True
-
+
return relationship
async def update_relationship_value(self,
@@ -121,7 +123,7 @@ class RelationshipManager:
# 如果不存在且提供了user_info,则创建新的关系
if user_info is not None:
return await self.update_relationship(chat_stream=chat_stream, **kwargs)
- print(f"\033[1;31m[关系管理]\033[0m 用户 {user_id}({platform}) 不存在,无法更新")
+ logger.warning(f"[关系管理] 用户 {user_id}({platform}) 不存在,无法更新")
return None
def get_relationship(self,
@@ -151,7 +153,7 @@ class RelationshipManager:
return self.relationships[key]
else:
return 0
-
+
async def load_relationship(self, data: dict) -> Relationship:
"""从数据库加载或创建新的关系对象"""
# 确保data中有platform字段,如果没有则默认为'qq'
@@ -163,14 +165,14 @@ class RelationshipManager:
key = (rela.user_id, rela.platform)
self.relationships[key] = rela
return rela
-
+
async def load_all_relationships(self):
"""加载所有关系对象"""
db = Database.get_instance()
all_relationships = db.db.relationships.find({})
for data in all_relationships:
await self.load_relationship(data)
-
+
async def _start_relationship_manager(self):
"""每5分钟自动保存一次关系数据"""
db = Database.get_instance()
@@ -179,15 +181,15 @@ class RelationshipManager:
# 依次加载每条记录
for data in all_relationships:
await self.load_relationship(data)
- print(f"\033[1;32m[关系管理]\033[0m 已加载 {len(self.relationships)} 条关系记录")
+ logger.debug(f"[关系管理] 已加载 {len(self.relationships)} 条关系记录")
while True:
- print("\033[1;32m[关系管理]\033[0m 正在自动保存关系")
+ logger.debug("正在自动保存关系")
await asyncio.sleep(300) # 等待300秒(5分钟)
await self._save_all_relationships()
-
+
async def _save_all_relationships(self):
- """将所有关系数据保存到数据库"""
+ """将所有关系数据保存到数据库"""
# 保存所有关系数据
for (userid, platform), relationship in self.relationships.items():
if not relationship.saved:
@@ -203,7 +205,7 @@ class RelationshipManager:
gender = relationship.gender
age = relationship.age
saved = relationship.saved
-
+
db = Database.get_instance()
db.db.relationships.update_one(
{'user_id': user_id, 'platform': platform},
@@ -252,4 +254,4 @@ class RelationshipManager:
return "某人"
-relationship_manager = RelationshipManager()
\ No newline at end of file
+relationship_manager = RelationshipManager()
diff --git a/src/plugins/chat/storage.py b/src/plugins/chat/storage.py
index 614246d26..f403b2c8b 100644
--- a/src/plugins/chat/storage.py
+++ b/src/plugins/chat/storage.py
@@ -5,6 +5,8 @@ from ...common.database import Database
from .message_base import MessageBase
from .message import MessageSending, MessageRecv
from .chat_stream import ChatStream
+from loguru import logger
+
class MessageStorage:
def __init__(self):
@@ -24,7 +26,7 @@ class MessageStorage:
"topic": topic,
}
self.db.db.messages.insert_one(message_data)
- except Exception as e:
- print(f"\033[1;31m[错误]\033[0m 存储消息失败: {e}")
+ except Exception:
+ logger.exception("存储消息失败")
-# 如果需要其他存储相关的函数,可以在这里添加
\ No newline at end of file
+# 如果需要其他存储相关的函数,可以在这里添加
diff --git a/src/plugins/chat/topic_identifier.py b/src/plugins/chat/topic_identifier.py
index 3296d0895..a0c5bae30 100644
--- a/src/plugins/chat/topic_identifier.py
+++ b/src/plugins/chat/topic_identifier.py
@@ -4,9 +4,11 @@ from nonebot import get_driver
from ..models.utils_model import LLM_request
from .config import global_config
+from loguru import logger
driver = get_driver()
-config = driver.config
+config = driver.config
+
class TopicIdentifier:
def __init__(self):
@@ -23,19 +25,20 @@ class TopicIdentifier:
# 使用 LLM_request 类进行请求
topic, _ = await self.llm_topic_judge.generate_response(prompt)
-
+
if not topic:
- print("\033[1;31m[错误]\033[0m LLM API 返回为空")
+ logger.error("LLM API 返回为空")
return None
-
+
# 直接在这里处理主题解析
if not topic or topic == "无主题":
return None
-
+
# 解析主题字符串为列表
topic_list = [t.strip() for t in topic.split(",") if t.strip()]
-
- print(f"\033[1;32m[主题识别]\033[0m 主题: {topic_list}")
+
+ logger.info(f"主题: {topic_list}")
return topic_list if topic_list else None
-topic_identifier = TopicIdentifier()
\ No newline at end of file
+
+topic_identifier = TopicIdentifier()
diff --git a/src/plugins/chat/utils.py b/src/plugins/chat/utils.py
index 495d0480d..a889ef177 100644
--- a/src/plugins/chat/utils.py
+++ b/src/plugins/chat/utils.py
@@ -7,6 +7,7 @@ from typing import Dict, List
import jieba
import numpy as np
from nonebot import get_driver
+from loguru import logger
from ..models.utils_model import LLM_request
from ..utils.typo_generator import ChineseTypoGenerator
@@ -21,16 +22,16 @@ config = driver.config
def db_message_to_str(message_dict: Dict) -> str:
- print(f"message_dict: {message_dict}")
+ logger.debug(f"message_dict: {message_dict}")
time_str = time.strftime("%m-%d %H:%M:%S", time.localtime(message_dict["time"]))
try:
name = "[(%s)%s]%s" % (
- message_dict['user_id'], message_dict.get("user_nickname", ""), message_dict.get("user_cardname", ""))
+ message_dict['user_id'], message_dict.get("user_nickname", ""), message_dict.get("user_cardname", ""))
except:
name = message_dict.get("user_nickname", "") or f"用户{message_dict['user_id']}"
content = message_dict.get("processed_plain_text", "")
result = f"[{time_str}] {name}: {content}\n"
- print(f"result: {result}")
+ logger.debug(f"result: {result}")
return result
@@ -71,37 +72,43 @@ def calculate_information_content(text):
def get_cloest_chat_from_db(db, length: int, timestamp: str):
- """从数据库中获取最接近指定时间戳的聊天记录,并记录读取次数"""
- chat_text = ''
+ """从数据库中获取最接近指定时间戳的聊天记录,并记录读取次数
+
+ Returns:
+ list: 消息记录字典列表,每个字典包含消息内容和时间信息
+ """
+ chat_records = []
closest_record = db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)])
-
- if closest_record and closest_record.get('memorized', 0) < 4:
+
+ if closest_record and closest_record.get('memorized', 0) < 4:
closest_time = closest_record['time']
chat_id = closest_record['chat_id'] # 获取groupid
# 获取该时间戳之后的length条消息,且groupid相同
chat_records = list(db.db.messages.find(
{"time": {"$gt": closest_time}, "chat_id": chat_id}
).sort('time', 1).limit(length))
-
+
# 更新每条消息的memorized属性
- for record in chat_records:
- # 检查当前记录的memorized值
+ for record in records:
current_memorized = record.get('memorized', 0)
if current_memorized > 3:
- # print(f"消息已读取3次,跳过")
+ print("消息已读取3次,跳过")
return ''
-
+
# 更新memorized值
db.db.messages.update_one(
{"_id": record["_id"]},
{"$set": {"memorized": current_memorized + 1}}
)
-
- chat_text += record["detailed_plain_text"]
-
- return chat_text
- # print(f"消息已读取3次,跳过")
- return ''
+
+ # 添加到记录列表中
+ chat_records.append({
+ 'text': record["detailed_plain_text"],
+ 'time': record["time"],
+ 'group_id': record["group_id"]
+ })
+
+ return chat_records
async def get_recent_group_messages(db, chat_id:str, limit: int = 12) -> list:
@@ -142,7 +149,7 @@ async def get_recent_group_messages(db, chat_id:str, limit: int = 12) -> list:
)
message_objects.append(msg)
except KeyError:
- print("[WARNING] 数据库中存在无效的消息")
+ logger.warning("数据库中存在无效的消息")
continue
# 按时间正序排列
@@ -259,11 +266,10 @@ def split_into_sentences_w_remove_punctuation(text: str) -> List[str]:
sentence = sentence.replace(',', ' ').replace(',', ' ')
sentences_done.append(sentence)
- print(f"处理后的句子: {sentences_done}")
+ logger.info(f"处理后的句子: {sentences_done}")
return sentences_done
-
def random_remove_punctuation(text: str) -> str:
"""随机处理标点符号,模拟人类打字习惯
@@ -291,43 +297,70 @@ def random_remove_punctuation(text: str) -> str:
return result
-
def process_llm_response(text: str) -> List[str]:
# processed_response = process_text_with_typos(content)
- if len(text) > 300:
- print(f"回复过长 ({len(text)} 字符),返回默认回复")
+ if len(text) > 200:
+ logger.warning(f"回复过长 ({len(text)} 字符),返回默认回复")
return ['懒得说']
# 处理长消息
typo_generator = ChineseTypoGenerator(
- error_rate=0.03,
- min_freq=7,
- tone_error_rate=0.2,
- word_replace_rate=0.02
+ error_rate=global_config.chinese_typo_error_rate,
+ min_freq=global_config.chinese_typo_min_freq,
+ tone_error_rate=global_config.chinese_typo_tone_error_rate,
+ word_replace_rate=global_config.chinese_typo_word_replace_rate
)
- typoed_text = typo_generator.create_typo_sentence(text)[0]
- sentences = split_into_sentences_w_remove_punctuation(typoed_text)
+ split_sentences = split_into_sentences_w_remove_punctuation(text)
+ sentences = []
+ for sentence in split_sentences:
+ if global_config.chinese_typo_enable:
+ typoed_text, typo_corrections = typo_generator.create_typo_sentence(sentence)
+ sentences.append(typoed_text)
+ if typo_corrections:
+ sentences.append(typo_corrections)
+ else:
+ sentences.append(sentence)
# 检查分割后的消息数量是否过多(超过3条)
- if len(sentences) > 4:
- print(f"分割后消息数量过多 ({len(sentences)} 条),返回默认回复")
+
+ if len(sentences) > 5:
+ logger.warning(f"分割后消息数量过多 ({len(sentences)} 条),返回默认回复")
return [f'{global_config.BOT_NICKNAME}不知道哦']
return sentences
-def calculate_typing_time(input_string: str, chinese_time: float = 0.2, english_time: float = 0.1) -> float:
+def calculate_typing_time(input_string: str, chinese_time: float = 0.4, english_time: float = 0.2) -> float:
"""
计算输入字符串所需的时间,中文和英文字符有不同的输入时间
input_string (str): 输入的字符串
- chinese_time (float): 中文字符的输入时间,默认为0.3秒
- english_time (float): 英文字符的输入时间,默认为0.15秒
+ chinese_time (float): 中文字符的输入时间,默认为0.2秒
+ english_time (float): 英文字符的输入时间,默认为0.1秒
+
+ 特殊情况:
+ - 如果只有一个中文字符,将使用3倍的中文输入时间
+ - 在所有输入结束后,额外加上回车时间0.3秒
"""
+ mood_manager = MoodManager.get_instance()
+ # 将0-1的唤醒度映射到-1到1
+ mood_arousal = mood_manager.current_mood.arousal
+ # 映射到0.5到2倍的速度系数
+ typing_speed_multiplier = 1.5 ** mood_arousal # 唤醒度为1时速度翻倍,为-1时速度减半
+ chinese_time *= 1 / typing_speed_multiplier
+ english_time *= 1 / typing_speed_multiplier
+ # 计算中文字符数
+ chinese_chars = sum(1 for char in input_string if '\u4e00' <= char <= '\u9fff')
+
+ # 如果只有一个中文字符,使用3倍时间
+ if chinese_chars == 1 and len(input_string.strip()) == 1:
+ return chinese_time * 3 + 0.3 # 加上回车时间
+
+ # 正常计算所有字符的输入时间
total_time = 0.0
for char in input_string:
if '\u4e00' <= char <= '\u9fff': # 判断是否为中文字符
total_time += chinese_time
else: # 其他字符(如英文)
total_time += english_time
- return total_time
+ return total_time + 0.3 # 加上回车时间
def cosine_similarity(v1, v2):
diff --git a/src/plugins/chat/willing_manager.py b/src/plugins/chat/willing_manager.py
index d430ac74d..96cf74095 100644
--- a/src/plugins/chat/willing_manager.py
+++ b/src/plugins/chat/willing_manager.py
@@ -16,7 +16,9 @@ class WillingManager:
self.chat_reply_willing: Dict[str, float] = {} # 存储每个聊天流的回复意愿
self._decay_task = None
self._started = False
-
+ self.min_reply_willing = 0.01
+ self.attenuation_coefficient = 0.75
+
async def _decay_reply_willing(self):
"""定期衰减回复意愿"""
while True:
@@ -33,12 +35,9 @@ class WillingManager:
return self.chat_reply_willing.get(stream.stream_id, 0)
return 0
- def set_willing(self, chat_id: str, willing: float):
- """设置指定聊天流的回复意愿"""
- self.chat_reply_willing[chat_id] = willing
- def set_willing(self, chat_id: str, willing: float):
- """设置指定聊天流的回复意愿"""
- self.chat_reply_willing[chat_id] = willing
+ def set_willing(self, chat_id: int, willing: float):
+ """设置指定群组的回复意愿"""
+ self.group_reply_willing[chat_id] = willing
async def change_reply_willing_received(self,
chat_stream:ChatStream,
@@ -51,47 +50,67 @@ class WillingManager:
# 获取或创建聊天流
stream = chat_stream
chat_id = stream.stream_id
+ group_id = stream.group_info.group_id
+
+ # 若非目标回复群组,则直接return
+ if group_id not in config.talk_allowed_groups:
+ reply_probability = 0
+ return reply_probability
+
current_willing = self.chat_reply_willing.get(chat_id, 0)
- # print(f"初始意愿: {current_willing}")
- if is_mentioned_bot and current_willing < 1.0:
- current_willing += 0.9
- print(f"被提及, 当前意愿: {current_willing}")
- elif is_mentioned_bot:
- current_willing += 0.05
- print(f"被重复提及, 当前意愿: {current_willing}")
-
+ logger.debug(f"[{chat_id}]的初始回复意愿: {current_willing}")
+
+
+ # 根据消息类型(被cue/表情包)调控
+ if is_mentioned_bot:
+ current_willing = min(
+ 3.0,
+ current_willing + 0.9
+ )
+ logger.debug(f"被提及, 当前意愿: {current_willing}")
+
if is_emoji:
current_willing *= 0.1
- print(f"表情包, 当前意愿: {current_willing}")
-
- print(f"放大系数_interested_rate: {global_config.response_interested_rate_amplifier}")
- interested_rate *= global_config.response_interested_rate_amplifier #放大回复兴趣度
- if interested_rate > 0.4:
- # print(f"兴趣度: {interested_rate}, 当前意愿: {current_willing}")
- current_willing += interested_rate-0.4
-
- current_willing *= global_config.response_willing_amplifier #放大回复意愿
- # print(f"放大系数_willing: {global_config.response_willing_amplifier}, 当前意愿: {current_willing}")
-
- reply_probability = max((current_willing - 0.45) * 2, 0)
-
- # 检查群组权限(如果是群聊)
- if chat_stream.group_info:
- if chat_stream.group_info.group_id not in config.talk_allowed_groups:
- current_willing = 0
- reply_probability = 0
-
- if chat_stream.group_info.group_id in config.talk_frequency_down_groups:
- reply_probability = reply_probability / global_config.down_frequency_rate
+ logger.debug(f"表情包, 当前意愿: {current_willing}")
+
+ # 兴趣放大系数,若兴趣 > 0.4则增加回复概率
+ interested_rate_amplifier = global_config.response_interested_rate_amplifier
+ logger.debug(f"放大系数_interested_rate: {interested_rate_amplifier}")
+ interested_rate *= interested_rate_amplifier
+
+ current_willing += max(
+ 0.0,
+ interested_rate - 0.4
+ )
+
+ # 回复意愿系数调控,独立乘区
+ willing_amplifier = max(
+ global_config.response_willing_amplifier,
+ self.min_reply_willing
+ )
+ current_willing *= willing_amplifier
+ logger.debug(f"放大系数_willing: {global_config.response_willing_amplifier}, 当前意愿: {current_willing}")
+
+ # 回复概率迭代,保底0.01回复概率
+ reply_probability = max(
+ (current_willing - 0.45) * 2,
+ self.min_reply_willing
+ )
+
+ # 降低目标低频群组回复概率
+ down_frequency_rate = max(
+ 1.0,
+ global_config.down_frequency_rate
+ )
+ if group_id in config.talk_frequency_down_groups:
+ reply_probability = reply_probability / down_frequency_rate
reply_probability = min(reply_probability, 1)
- if reply_probability < 0:
- reply_probability = 0
-
- self.chat_reply_willing[chat_id] = min(current_willing, 3.0)
- self.chat_reply_willing[chat_id] = min(current_willing, 3.0)
+
+ self.group_reply_willing[group_id] = min(current_willing, 3.0)
+ logger.debug(f"当前群组{group_id}回复概率:{reply_probability}")
return reply_probability
def change_reply_willing_sent(self, chat_stream:ChatStream):
@@ -116,5 +135,6 @@ class WillingManager:
self._decay_task = asyncio.create_task(self._decay_reply_willing())
self._started = True
+
# 创建全局实例
-willing_manager = WillingManager()
+willing_manager = WillingManager()
diff --git a/src/plugins/knowledege/knowledge_library.py b/src/plugins/knowledege/knowledge_library.py
index d2408e24f..4bf6227bb 100644
--- a/src/plugins/knowledege/knowledge_library.py
+++ b/src/plugins/knowledege/knowledge_library.py
@@ -19,7 +19,7 @@ from src.common.database import Database
# 从环境变量获取配置
Database.initialize(
- host=os.getenv("MONGODB_HOST", "localhost"),
+ host=os.getenv("MONGODB_HOST", "127.0.0.1"),
port=int(os.getenv("MONGODB_PORT", "27017")),
db_name=os.getenv("DATABASE_NAME", "maimai"),
username=os.getenv("MONGODB_USERNAME"),
@@ -79,7 +79,7 @@ class KnowledgeLibrary:
content = f.read()
# 按1024字符分段
- segments = [content[i:i+600] for i in range(0, len(content), 600)]
+ segments = [content[i:i+600] for i in range(0, len(content), 300)]
# 处理每个分段
for segment in segments:
diff --git a/src/plugins/memory_system/__init__.py b/src/plugins/memory_system/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/plugins/memory_system/draw_memory.py b/src/plugins/memory_system/draw_memory.py
index 006991bcb..6da330d95 100644
--- a/src/plugins/memory_system/draw_memory.py
+++ b/src/plugins/memory_system/draw_memory.py
@@ -7,6 +7,7 @@ import jieba
import matplotlib.pyplot as plt
import networkx as nx
from dotenv import load_dotenv
+from loguru import logger
sys.path.append("C:/GitHub/MaiMBot") # 添加项目根目录到 Python 路径
from src.common.database import Database # 使用正确的导入语法
@@ -15,15 +16,15 @@ from src.common.database import Database # 使用正确的导入语法
env_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), '.env.dev')
load_dotenv(env_path)
-
+
class Memory_graph:
def __init__(self):
self.G = nx.Graph() # 使用 networkx 的图结构
self.db = Database.get_instance()
-
+
def connect_dot(self, concept1, concept2):
self.G.add_edge(concept1, concept2)
-
+
def add_dot(self, concept, memory):
if concept in self.G:
# 如果节点已存在,将新记忆添加到现有列表中
@@ -37,7 +38,7 @@ class Memory_graph:
else:
# 如果是新节点,创建新的记忆列表
self.G.add_node(concept, memory_items=[memory])
-
+
def get_dot(self, concept):
# 检查节点是否存在于图中
if concept in self.G:
@@ -45,20 +46,20 @@ class Memory_graph:
node_data = self.G.nodes[concept]
# print(node_data)
# 创建新的Memory_dot对象
- return concept,node_data
+ return concept, node_data
return None
def get_related_item(self, topic, depth=1):
if topic not in self.G:
return [], []
-
+
first_layer_items = []
second_layer_items = []
-
+
# 获取相邻节点
neighbors = list(self.G.neighbors(topic))
# print(f"第一层: {topic}")
-
+
# 获取当前节点的记忆项
node_data = self.get_dot(topic)
if node_data:
@@ -69,7 +70,7 @@ class Memory_graph:
first_layer_items.extend(memory_items)
else:
first_layer_items.append(memory_items)
-
+
# 只在depth=2时获取第二层记忆
if depth >= 2:
# 获取相邻节点的记忆项
@@ -84,42 +85,44 @@ class Memory_graph:
second_layer_items.extend(memory_items)
else:
second_layer_items.append(memory_items)
-
+
return first_layer_items, second_layer_items
-
+
def store_memory(self):
for node in self.G.nodes():
dot_data = {
"concept": node
}
self.db.db.store_memory_dots.insert_one(dot_data)
-
+
@property
def dots(self):
# 返回所有节点对应的 Memory_dot 对象
return [self.get_dot(node) for node in self.G.nodes()]
-
-
+
def get_random_chat_from_db(self, length: int, timestamp: str):
# 从数据库中根据时间戳获取离其最近的聊天记录
chat_text = ''
closest_record = self.db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)]) # 调试输出
- print(f"距离time最近的消息时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(closest_record['time'])))}")
-
+ logger.info(
+ f"距离time最近的消息时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(closest_record['time'])))}")
+
if closest_record:
closest_time = closest_record['time']
group_id = closest_record['group_id'] # 获取groupid
# 获取该时间戳之后的length条消息,且groupid相同
- chat_record = list(self.db.db.messages.find({"time": {"$gt": closest_time}, "group_id": group_id}).sort('time', 1).limit(length))
+ chat_record = list(
+ self.db.db.messages.find({"time": {"$gt": closest_time}, "group_id": group_id}).sort('time', 1).limit(
+ length))
for record in chat_record:
time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(record['time'])))
try:
- displayname="[(%s)%s]%s" % (record["user_id"],record["user_nickname"],record["user_cardname"])
+ displayname = "[(%s)%s]%s" % (record["user_id"], record["user_nickname"], record["user_cardname"])
except:
- displayname=record["user_nickname"] or "用户" + str(record["user_id"])
+ displayname = record["user_nickname"] or "用户" + str(record["user_id"])
chat_text += f'[{time_str}] {displayname}: {record["processed_plain_text"]}\n' # 添加发送者和时间信息
return chat_text
-
+
return [] # 如果没有找到记录,返回空列表
def save_graph_to_db(self):
@@ -166,138 +169,78 @@ def main():
password=os.getenv("MONGODB_PASSWORD", ""),
auth_source=os.getenv("MONGODB_AUTH_SOURCE", "")
)
-
+
memory_graph = Memory_graph()
memory_graph.load_graph_from_db()
-
+
# 只显示一次优化后的图形
visualize_graph_lite(memory_graph)
-
+
while True:
query = input("请输入新的查询概念(输入'退出'以结束):")
if query.lower() == '退出':
break
first_layer_items, second_layer_items = memory_graph.get_related_item(query)
if first_layer_items or second_layer_items:
- print("\n第一层记忆:")
+ logger.debug("第一层记忆:")
for item in first_layer_items:
- print(item)
- print("\n第二层记忆:")
+ logger.debug(item)
+ logger.debug("第二层记忆:")
for item in second_layer_items:
- print(item)
+ logger.debug(item)
else:
- print("未找到相关记忆。")
-
+ logger.debug("未找到相关记忆。")
+
def segment_text(text):
seg_text = list(jieba.cut(text))
- return seg_text
+ return seg_text
+
def find_topic(text, topic_num):
prompt = f'这是一段文字:{text}。请你从这段话中总结出{topic_num}个话题,帮我列出来,用逗号隔开,尽可能精简。只需要列举{topic_num}个话题就好,不要告诉我其他内容。'
return prompt
+
def topic_what(text, topic):
prompt = f'这是一段文字:{text}。我想知道这记忆里有什么关于{topic}的话题,帮我总结成一句自然的话,可以包含时间和人物。只输出这句话就好'
return prompt
-def visualize_graph(memory_graph: Memory_graph, color_by_memory: bool = False):
- # 设置中文字体
- plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
- plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
-
- G = memory_graph.G
-
- # 保存图到本地
- nx.write_gml(G, "memory_graph.gml") # 保存为 GML 格式
-
- # 根据连接条数或记忆数量设置节点颜色
- node_colors = []
- nodes = list(G.nodes()) # 获取图中实际的节点列表
-
- if color_by_memory:
- # 计算每个节点的记忆数量
- memory_counts = []
- for node in nodes:
- memory_items = G.nodes[node].get('memory_items', [])
- if isinstance(memory_items, list):
- count = len(memory_items)
- else:
- count = 1 if memory_items else 0
- memory_counts.append(count)
- max_memories = max(memory_counts) if memory_counts else 1
-
- for count in memory_counts:
- # 使用不同的颜色方案:红色表示记忆多,蓝色表示记忆少
- if max_memories > 0:
- intensity = min(1.0, count / max_memories)
- color = (intensity, 0, 1.0 - intensity) # 从蓝色渐变到红色
- else:
- color = (0, 0, 1) # 如果没有记忆,则为蓝色
- node_colors.append(color)
- else:
- # 使用原来的连接数量着色方案
- max_degree = max(G.degree(), key=lambda x: x[1])[1] if G.degree() else 1
- for node in nodes:
- degree = G.degree(node)
- if max_degree > 0:
- red = min(1.0, degree / max_degree)
- blue = 1.0 - red
- color = (red, 0, blue)
- else:
- color = (0, 0, 1)
- node_colors.append(color)
-
- # 绘制图形
- plt.figure(figsize=(12, 8))
- pos = nx.spring_layout(G, k=1, iterations=50)
- nx.draw(G, pos,
- with_labels=True,
- node_color=node_colors,
- node_size=200,
- font_size=10,
- font_family='SimHei',
- font_weight='bold')
-
- title = '记忆图谱可视化 - ' + ('按记忆数量着色' if color_by_memory else '按连接数量着色')
- plt.title(title, fontsize=16, fontfamily='SimHei')
- plt.show()
-
def visualize_graph_lite(memory_graph: Memory_graph, color_by_memory: bool = False):
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
-
+
G = memory_graph.G
-
+
# 创建一个新图用于可视化
H = G.copy()
-
+
# 移除只有一条记忆的节点和连接数少于3的节点
nodes_to_remove = []
for node in H.nodes():
memory_items = H.nodes[node].get('memory_items', [])
memory_count = len(memory_items) if isinstance(memory_items, list) else (1 if memory_items else 0)
degree = H.degree(node)
- if memory_count < 5 or degree < 2: # 改为小于2而不是小于等于2
+ if memory_count < 3 or degree < 2: # 改为小于2而不是小于等于2
nodes_to_remove.append(node)
-
+
H.remove_nodes_from(nodes_to_remove)
-
+
# 如果过滤后没有节点,则返回
if len(H.nodes()) == 0:
- print("过滤后没有符合条件的节点可显示")
+ logger.debug("过滤后没有符合条件的节点可显示")
return
-
+
# 保存图到本地
- nx.write_gml(H, "memory_graph.gml") # 保存为 GML 格式
+ # nx.write_gml(H, "memory_graph.gml") # 保存为 GML 格式
# 计算节点大小和颜色
node_colors = []
node_sizes = []
nodes = list(H.nodes())
-
+
# 获取最大记忆数和最大度数用于归一化
max_memories = 1
max_degree = 1
@@ -307,7 +250,7 @@ def visualize_graph_lite(memory_graph: Memory_graph, color_by_memory: bool = Fal
degree = H.degree(node)
max_memories = max(max_memories, memory_count)
max_degree = max(max_degree, degree)
-
+
# 计算每个节点的大小和颜色
for node in nodes:
# 计算节点大小(基于记忆数量)
@@ -315,37 +258,38 @@ def visualize_graph_lite(memory_graph: Memory_graph, color_by_memory: bool = Fal
memory_count = len(memory_items) if isinstance(memory_items, list) else (1 if memory_items else 0)
# 使用指数函数使变化更明显
ratio = memory_count / max_memories
- size = 500 + 5000 * (ratio ** 2) # 使用平方函数使差异更明显
+ size = 500 + 5000 * (ratio) # 使用1.5次方函数使差异不那么明显
node_sizes.append(size)
-
+
# 计算节点颜色(基于连接数)
degree = H.degree(node)
# 红色分量随着度数增加而增加
- red = min(1.0, degree / max_degree)
+ r = (degree / max_degree) ** 0.3
+ red = min(1.0, r)
# 蓝色分量随着度数减少而增加
- blue = 1.0 - red
- color = (red, 0, blue)
+ blue = max(0.0, 1 - red)
+ # blue = 1
+ color = (red, 0.1, blue)
node_colors.append(color)
-
+
# 绘制图形
plt.figure(figsize=(12, 8))
- pos = nx.spring_layout(H, k=1.5, iterations=50) # 增加k值使节点分布更开
- nx.draw(H, pos,
- with_labels=True,
- node_color=node_colors,
- node_size=node_sizes,
- font_size=10,
- font_family='SimHei',
- font_weight='bold',
- edge_color='gray',
- width=0.5,
- alpha=0.7)
-
+ pos = nx.spring_layout(H, k=1, iterations=50) # 增加k值使节点分布更开
+ nx.draw(H, pos,
+ with_labels=True,
+ node_color=node_colors,
+ node_size=node_sizes,
+ font_size=10,
+ font_family='SimHei',
+ font_weight='bold',
+ edge_color='gray',
+ width=0.5,
+ alpha=0.9)
+
title = '记忆图谱可视化 - 节点大小表示记忆数量,颜色表示连接数'
plt.title(title, fontsize=16, fontfamily='SimHei')
plt.show()
-
-
-
+
+
if __name__ == "__main__":
- main()
\ No newline at end of file
+ main()
diff --git a/src/plugins/memory_system/memory.py b/src/plugins/memory_system/memory.py
index f88888aa4..0730f9e57 100644
--- a/src/plugins/memory_system/memory.py
+++ b/src/plugins/memory_system/memory.py
@@ -7,6 +7,7 @@ import time
import jieba
import networkx as nx
+from loguru import logger
from ...common.database import Database # 使用正确的导入语法
from ..chat.config import global_config
from ..chat.utils import (
@@ -22,29 +23,49 @@ class Memory_graph:
def __init__(self):
self.G = nx.Graph() # 使用 networkx 的图结构
self.db = Database.get_instance()
-
+
def connect_dot(self, concept1, concept2):
- # 如果边已存在,增加 strength
+ # 避免自连接
+ if concept1 == concept2:
+ return
+
+ current_time = datetime.datetime.now().timestamp()
+
+ # 如果边已存在,增加 strength
if self.G.has_edge(concept1, concept2):
self.G[concept1][concept2]['strength'] = self.G[concept1][concept2].get('strength', 1) + 1
+ # 更新最后修改时间
+ self.G[concept1][concept2]['last_modified'] = current_time
else:
- # 如果是新边,初始化 strength 为 1
- self.G.add_edge(concept1, concept2, strength=1)
-
+ # 如果是新边,初始化 strength 为 1
+ self.G.add_edge(concept1, concept2,
+ strength=1,
+ created_time=current_time, # 添加创建时间
+ last_modified=current_time) # 添加最后修改时间
+
def add_dot(self, concept, memory):
+ current_time = datetime.datetime.now().timestamp()
+
if concept in self.G:
- # 如果节点已存在,将新记忆添加到现有列表中
if 'memory_items' in self.G.nodes[concept]:
if not isinstance(self.G.nodes[concept]['memory_items'], list):
- # 如果当前不是列表,将其转换为列表
self.G.nodes[concept]['memory_items'] = [self.G.nodes[concept]['memory_items']]
self.G.nodes[concept]['memory_items'].append(memory)
+ # 更新最后修改时间
+ self.G.nodes[concept]['last_modified'] = current_time
else:
self.G.nodes[concept]['memory_items'] = [memory]
+ # 如果节点存在但没有memory_items,说明是第一次添加memory,设置created_time
+ if 'created_time' not in self.G.nodes[concept]:
+ self.G.nodes[concept]['created_time'] = current_time
+ self.G.nodes[concept]['last_modified'] = current_time
else:
- # 如果是新节点,创建新的记忆列表
- self.G.add_node(concept, memory_items=[memory])
-
+ # 如果是新节点,创建新的记忆列表
+ self.G.add_node(concept,
+ memory_items=[memory],
+ created_time=current_time, # 添加创建时间
+ last_modified=current_time) # 添加最后修改时间
+
def get_dot(self, concept):
# 检查节点是否存在于图中
if concept in self.G:
@@ -56,13 +77,13 @@ class Memory_graph:
def get_related_item(self, topic, depth=1):
if topic not in self.G:
return [], []
-
+
first_layer_items = []
second_layer_items = []
-
+
# 获取相邻节点
neighbors = list(self.G.neighbors(topic))
-
+
# 获取当前节点的记忆项
node_data = self.get_dot(topic)
if node_data:
@@ -73,7 +94,7 @@ class Memory_graph:
first_layer_items.extend(memory_items)
else:
first_layer_items.append(memory_items)
-
+
# 只在depth=2时获取第二层记忆
if depth >= 2:
# 获取相邻节点的记忆项
@@ -87,9 +108,9 @@ class Memory_graph:
second_layer_items.extend(memory_items)
else:
second_layer_items.append(memory_items)
-
+
return first_layer_items, second_layer_items
-
+
@property
def dots(self):
# 返回所有节点对应的 Memory_dot 对象
@@ -99,43 +120,43 @@ class Memory_graph:
"""随机删除指定话题中的一条记忆,如果话题没有记忆则移除该话题节点"""
if topic not in self.G:
return None
-
+
# 获取话题节点数据
node_data = self.G.nodes[topic]
-
+
# 如果节点存在memory_items
if 'memory_items' in node_data:
memory_items = node_data['memory_items']
-
+
# 确保memory_items是列表
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
-
+
# 如果有记忆项可以删除
if memory_items:
# 随机选择一个记忆项删除
removed_item = random.choice(memory_items)
memory_items.remove(removed_item)
-
+
# 更新节点的记忆项
if memory_items:
self.G.nodes[topic]['memory_items'] = memory_items
else:
# 如果没有记忆项了,删除整个节点
self.G.remove_node(topic)
-
+
return removed_item
-
+
return None
# 海马体
class Hippocampus:
- def __init__(self,memory_graph:Memory_graph):
+ def __init__(self, memory_graph: Memory_graph):
self.memory_graph = memory_graph
- self.llm_topic_judge = LLM_request(model = global_config.llm_topic_judge,temperature=0.5)
- self.llm_summary_by_topic = LLM_request(model = global_config.llm_summary_by_topic,temperature=0.5)
-
+ self.llm_topic_judge = LLM_request(model=global_config.llm_topic_judge, temperature=0.5)
+ self.llm_summary_by_topic = LLM_request(model=global_config.llm_summary_by_topic, temperature=0.5)
+
def get_all_node_names(self) -> list:
"""获取记忆图中所有节点的名字列表
@@ -156,98 +177,167 @@ class Hippocampus:
"""计算边的特征值"""
nodes = sorted([source, target])
return hash(f"{nodes[0]}:{nodes[1]}")
+
+ def get_memory_sample(self, chat_size=20, time_frequency: dict = {'near': 2, 'mid': 4, 'far': 3}):
+ """获取记忆样本
- def get_memory_sample(self,chat_size=20,time_frequency:dict={'near':2,'mid':4,'far':3}):
+ Returns:
+ list: 消息记录列表,每个元素是一个消息记录字典列表
+ """
current_timestamp = datetime.datetime.now().timestamp()
- chat_text = []
- #短期:1h 中期:4h 长期:24h
- for _ in range(time_frequency.get('near')): # 循环10次
- random_time = current_timestamp - random.randint(1, 3600) # 随机时间
- chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
- chat_text.append(chat_)
- for _ in range(time_frequency.get('mid')): # 循环10次
- random_time = current_timestamp - random.randint(3600, 3600*4) # 随机时间
- chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
- chat_text.append(chat_)
- for _ in range(time_frequency.get('far')): # 循环10次
- random_time = current_timestamp - random.randint(3600*4, 3600*24) # 随机时间
- chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
- chat_text.append(chat_)
- return [text for text in chat_text if text]
-
- async def memory_compress(self, input_text, compress_rate=0.1):
- print(input_text)
+ chat_samples = []
+
+ # 短期:1h 中期:4h 长期:24h
+ for _ in range(time_frequency.get('near')):
+ random_time = current_timestamp - random.randint(1, 3600)
+ messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
+ if messages:
+ chat_samples.append(messages)
+
+ for _ in range(time_frequency.get('mid')):
+ random_time = current_timestamp - random.randint(3600, 3600 * 4)
+ messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
+ if messages:
+ chat_samples.append(messages)
+
+ for _ in range(time_frequency.get('far')):
+ random_time = current_timestamp - random.randint(3600 * 4, 3600 * 24)
+ messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
+ if messages:
+ chat_samples.append(messages)
+
+ return chat_samples
+
+ async def memory_compress(self, messages: list, compress_rate=0.1):
+ """压缩消息记录为记忆
- #获取topics
+ Returns:
+ tuple: (压缩记忆集合, 相似主题字典)
+ """
+ if not messages:
+ return set(), {}
+
+ # 合并消息文本,同时保留时间信息
+ input_text = ""
+ time_info = ""
+ # 计算最早和最晚时间
+ earliest_time = min(msg['time'] for msg in messages)
+ latest_time = max(msg['time'] for msg in messages)
+
+ earliest_dt = datetime.datetime.fromtimestamp(earliest_time)
+ latest_dt = datetime.datetime.fromtimestamp(latest_time)
+
+ # 如果是同一年
+ if earliest_dt.year == latest_dt.year:
+ earliest_str = earliest_dt.strftime("%m-%d %H:%M:%S")
+ latest_str = latest_dt.strftime("%m-%d %H:%M:%S")
+ time_info += f"是在{earliest_dt.year}年,{earliest_str} 到 {latest_str} 的对话:\n"
+ else:
+ earliest_str = earliest_dt.strftime("%Y-%m-%d %H:%M:%S")
+ latest_str = latest_dt.strftime("%Y-%m-%d %H:%M:%S")
+ time_info += f"是从 {earliest_str} 到 {latest_str} 的对话:\n"
+
+ for msg in messages:
+ input_text += f"{msg['text']}\n"
+
+ logger.debug(input_text)
+
topic_num = self.calculate_topic_num(input_text, compress_rate)
topics_response = await self.llm_topic_judge.generate_response(self.find_topic_llm(input_text, topic_num))
- # 修改话题处理逻辑
- # 定义需要过滤的关键词
- filter_keywords = ['表情包', '图片', '回复', '聊天记录']
-
+
# 过滤topics
- topics = [topic.strip() for topic in topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()]
+ filter_keywords = global_config.memory_ban_words
+ topics = [topic.strip() for topic in
+ topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()]
filtered_topics = [topic for topic in topics if not any(keyword in topic for keyword in filter_keywords)]
-
- # print(f"原始话题: {topics}")
- print(f"过滤后话题: {filtered_topics}")
-
- # 使用过滤后的话题继续处理
+
+ logger.info(f"过滤后话题: {filtered_topics}")
+
+ # 创建所有话题的请求任务
tasks = []
for topic in filtered_topics:
- topic_what_prompt = self.topic_what(input_text, topic)
- # 创建异步任务
+ topic_what_prompt = self.topic_what(input_text, topic, time_info)
task = self.llm_summary_by_topic.generate_response_async(topic_what_prompt)
tasks.append((topic.strip(), task))
-
+
# 等待所有任务完成
compressed_memory = set()
+ similar_topics_dict = {} # 存储每个话题的相似主题列表
for topic, task in tasks:
response = await task
if response:
compressed_memory.add((topic, response[0]))
+ # 为每个话题查找相似的已存在主题
+ existing_topics = list(self.memory_graph.G.nodes())
+ similar_topics = []
- return compressed_memory
+ for existing_topic in existing_topics:
+ topic_words = set(jieba.cut(topic))
+ existing_words = set(jieba.cut(existing_topic))
+
+ all_words = topic_words | existing_words
+ v1 = [1 if word in topic_words else 0 for word in all_words]
+ v2 = [1 if word in existing_words else 0 for word in all_words]
+
+ similarity = cosine_similarity(v1, v2)
+
+ if similarity >= 0.6:
+ similar_topics.append((existing_topic, similarity))
+
+ similar_topics.sort(key=lambda x: x[1], reverse=True)
+ similar_topics = similar_topics[:5]
+ similar_topics_dict[topic] = similar_topics
- def calculate_topic_num(self,text, compress_rate):
+ return compressed_memory, similar_topics_dict
+
+ def calculate_topic_num(self, text, compress_rate):
"""计算文本的话题数量"""
information_content = calculate_information_content(text)
- topic_by_length = text.count('\n')*compress_rate
- topic_by_information_content = max(1, min(5, int((information_content-3) * 2)))
- topic_num = int((topic_by_length + topic_by_information_content)/2)
- print(f"topic_by_length: {topic_by_length}, topic_by_information_content: {topic_by_information_content}, topic_num: {topic_num}")
+ topic_by_length = text.count('\n') * compress_rate
+ topic_by_information_content = max(1, min(5, int((information_content - 3) * 2)))
+ topic_num = int((topic_by_length + topic_by_information_content) / 2)
+ logger.debug(
+ f"topic_by_length: {topic_by_length}, topic_by_information_content: {topic_by_information_content}, "
+ f"topic_num: {topic_num}")
return topic_num
- async def operation_build_memory(self,chat_size=20):
- # 最近消息获取频率
- time_frequency = {'near':2,'mid':4,'far':2}
- memory_sample = self.get_memory_sample(chat_size,time_frequency)
+ async def operation_build_memory(self, chat_size=20):
+ time_frequency = {'near': 3, 'mid': 8, 'far': 5}
+ memory_samples = self.get_memory_sample(chat_size, time_frequency)
- for i, input_text in enumerate(memory_sample, 1):
- # 加载进度可视化
+ for i, messages in enumerate(memory_samples, 1):
all_topics = []
- progress = (i / len(memory_sample)) * 100
+ # 加载进度可视化
+ progress = (i / len(memory_samples)) * 100
bar_length = 30
- filled_length = int(bar_length * i // len(memory_sample))
+ filled_length = int(bar_length * i // len(memory_samples))
bar = '█' * filled_length + '-' * (bar_length - filled_length)
- print(f"\n进度: [{bar}] {progress:.1f}% ({i}/{len(memory_sample)})")
+ logger.debug(f"进度: [{bar}] {progress:.1f}% ({i}/{len(memory_samples)})")
- # 生成压缩后记忆 ,表现为 (话题,记忆) 的元组
- compressed_memory = set()
compress_rate = 0.1
- compressed_memory = await self.memory_compress(input_text, compress_rate)
- print(f"\033[1;33m压缩后记忆数量\033[0m: {len(compressed_memory)}")
+ compressed_memory, similar_topics_dict = await self.memory_compress(messages, compress_rate)
+ logger.info(f"压缩后记忆数量: {len(compressed_memory)},似曾相识的话题: {len(similar_topics_dict)}")
- # 将记忆加入到图谱中
for topic, memory in compressed_memory:
- print(f"\033[1;32m添加节点\033[0m: {topic}")
+ logger.info(f"添加节点: {topic}")
self.memory_graph.add_dot(topic, memory)
- all_topics.append(topic) # 收集所有话题
+ all_topics.append(topic)
+
+ # 连接相似的已存在主题
+ if topic in similar_topics_dict:
+ similar_topics = similar_topics_dict[topic]
+ for similar_topic, similarity in similar_topics:
+ if topic != similar_topic:
+ strength = int(similarity * 10)
+ logger.info(f"连接相似节点: {topic} 和 {similar_topic} (强度: {strength})")
+ self.memory_graph.G.add_edge(topic, similar_topic, strength=strength)
+
+ # 连接同批次的相关话题
for i in range(len(all_topics)):
for j in range(i + 1, len(all_topics)):
- print(f"\033[1;32m连接节点\033[0m: {all_topics[i]} 和 {all_topics[j]}")
+ logger.info(f"连接同批次节点: {all_topics[i]} 和 {all_topics[j]}")
self.memory_graph.connect_dot(all_topics[i], all_topics[j])
-
+
self.sync_memory_to_db()
def sync_memory_to_db(self):
@@ -255,52 +345,54 @@ class Hippocampus:
# 获取数据库中所有节点和内存中所有节点
db_nodes = list(self.memory_graph.db.db.graph_data.nodes.find())
memory_nodes = list(self.memory_graph.G.nodes(data=True))
-
- # 转换数据库节点为字典格式,方便查找
+
+ # 转换数据库节点为字典格式,方便查找
db_nodes_dict = {node['concept']: node for node in db_nodes}
-
+
# 检查并更新节点
for concept, data in memory_nodes:
memory_items = data.get('memory_items', [])
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
-
+
# 计算内存中节点的特征值
memory_hash = self.calculate_node_hash(concept, memory_items)
-
+
+ # 获取时间信息
+ created_time = data.get('created_time', datetime.datetime.now().timestamp())
+ last_modified = data.get('last_modified', datetime.datetime.now().timestamp())
+
if concept not in db_nodes_dict:
- # 数据库中缺少的节点,添加
+ # 数据库中缺少的节点,添加
node_data = {
'concept': concept,
'memory_items': memory_items,
- 'hash': memory_hash
+ 'hash': memory_hash,
+ 'created_time': created_time,
+ 'last_modified': last_modified
}
self.memory_graph.db.db.graph_data.nodes.insert_one(node_data)
else:
# 获取数据库中节点的特征值
db_node = db_nodes_dict[concept]
db_hash = db_node.get('hash', None)
-
- # 如果特征值不同,则更新节点
+
+ # 如果特征值不同,则更新节点
if db_hash != memory_hash:
self.memory_graph.db.db.graph_data.nodes.update_one(
{'concept': concept},
{'$set': {
'memory_items': memory_items,
- 'hash': memory_hash
+ 'hash': memory_hash,
+ 'created_time': created_time,
+ 'last_modified': last_modified
}}
)
-
- # 检查并删除数据库中多余的节点
- memory_concepts = set(node[0] for node in memory_nodes)
- for db_node in db_nodes:
- if db_node['concept'] not in memory_concepts:
- self.memory_graph.db.db.graph_data.nodes.delete_one({'concept': db_node['concept']})
-
+
# 处理边的信息
db_edges = list(self.memory_graph.db.db.graph_data.edges.find())
- memory_edges = list(self.memory_graph.G.edges())
-
+ memory_edges = list(self.memory_graph.G.edges(data=True))
+
# 创建边的哈希值字典
db_edge_dict = {}
for edge in db_edges:
@@ -309,20 +401,26 @@ class Hippocampus:
'hash': edge_hash,
'strength': edge.get('strength', 1)
}
-
+
# 检查并更新边
- for source, target in memory_edges:
+ for source, target, data in memory_edges:
edge_hash = self.calculate_edge_hash(source, target)
edge_key = (source, target)
- strength = self.memory_graph.G[source][target].get('strength', 1)
+ strength = data.get('strength', 1)
+ # 获取边的时间信息
+ created_time = data.get('created_time', datetime.datetime.now().timestamp())
+ last_modified = data.get('last_modified', datetime.datetime.now().timestamp())
+
if edge_key not in db_edge_dict:
# 添加新边
edge_data = {
'source': source,
'target': target,
'strength': strength,
- 'hash': edge_hash
+ 'hash': edge_hash,
+ 'created_time': created_time,
+ 'last_modified': last_modified
}
self.memory_graph.db.db.graph_data.edges.insert_one(edge_data)
else:
@@ -332,25 +430,17 @@ class Hippocampus:
{'source': source, 'target': target},
{'$set': {
'hash': edge_hash,
- 'strength': strength
+ 'strength': strength,
+ 'created_time': created_time,
+ 'last_modified': last_modified
}}
)
-
- # 删除多余的边
- memory_edge_set = set(memory_edges)
- for edge_key in db_edge_dict:
- if edge_key not in memory_edge_set:
- source, target = edge_key
- self.memory_graph.db.db.graph_data.edges.delete_one({
- 'source': source,
- 'target': target
- })
def sync_memory_from_db(self):
"""从数据库同步数据到内存中的图结构"""
# 清空当前图
self.memory_graph.G.clear()
-
+
# 从数据库加载所有节点
nodes = self.memory_graph.db.db.graph_data.nodes.find()
for node in nodes:
@@ -359,61 +449,107 @@ class Hippocampus:
# 确保memory_items是列表
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
- # 添加节点到图中
- self.memory_graph.G.add_node(concept, memory_items=memory_items)
+ # 获取时间信息
+ created_time = node.get('created_time', datetime.datetime.now().timestamp())
+ last_modified = node.get('last_modified', datetime.datetime.now().timestamp())
+
+ # 添加节点到图中
+ self.memory_graph.G.add_node(concept,
+ memory_items=memory_items,
+ created_time=created_time,
+ last_modified=last_modified)
+
# 从数据库加载所有边
edges = self.memory_graph.db.db.graph_data.edges.find()
for edge in edges:
source = edge['source']
target = edge['target']
- strength = edge.get('strength', 1) # 获取 strength,默认为 1
+ strength = edge.get('strength', 1) # 获取 strength,默认为 1
+
+ # 获取时间信息
+ created_time = edge.get('created_time', datetime.datetime.now().timestamp())
+ last_modified = edge.get('last_modified', datetime.datetime.now().timestamp())
+
# 只有当源节点和目标节点都存在时才添加边
if source in self.memory_graph.G and target in self.memory_graph.G:
- self.memory_graph.G.add_edge(source, target, strength=strength)
-
+ self.memory_graph.G.add_edge(source, target,
+ strength=strength,
+ created_time=created_time,
+ last_modified=last_modified)
+
async def operation_forget_topic(self, percentage=0.1):
- """随机选择图中一定比例的节点进行检查,根据条件决定是否遗忘"""
- # 获取所有节点
+ """随机选择图中一定比例的节点和边进行检查,根据时间条件决定是否遗忘"""
all_nodes = list(self.memory_graph.G.nodes())
- # 计算要检查的节点数量
- check_count = max(1, int(len(all_nodes) * percentage))
- # 随机选择节点
- nodes_to_check = random.sample(all_nodes, check_count)
+ all_edges = list(self.memory_graph.G.edges())
- forgotten_nodes = []
+ check_nodes_count = max(1, int(len(all_nodes) * percentage))
+ check_edges_count = max(1, int(len(all_edges) * percentage))
+
+ nodes_to_check = random.sample(all_nodes, check_nodes_count)
+ edges_to_check = random.sample(all_edges, check_edges_count)
+
+ edge_changes = {'weakened': 0, 'removed': 0}
+ node_changes = {'reduced': 0, 'removed': 0}
+
+ current_time = datetime.datetime.now().timestamp()
+
+ # 检查并遗忘连接
+ logger.info("开始检查连接...")
+ for source, target in edges_to_check:
+ edge_data = self.memory_graph.G[source][target]
+ last_modified = edge_data.get('last_modified')
+ # print(source,target)
+ # print(f"float(last_modified):{float(last_modified)}" )
+ # print(f"current_time:{current_time}")
+ # print(f"current_time - last_modified:{current_time - last_modified}")
+ if current_time - last_modified > 3600*24: # test
+ current_strength = edge_data.get('strength', 1)
+ new_strength = current_strength - 1
+
+ if new_strength <= 0:
+ self.memory_graph.G.remove_edge(source, target)
+ edge_changes['removed'] += 1
+ logger.info(f"\033[1;31m[连接移除]\033[0m {source} - {target}")
+ else:
+ edge_data['strength'] = new_strength
+ edge_data['last_modified'] = current_time
+ edge_changes['weakened'] += 1
+ logger.info(f"\033[1;34m[连接减弱]\033[0m {source} - {target} (强度: {current_strength} -> {new_strength})")
+
+ # 检查并遗忘话题
+ logger.info("开始检查节点...")
for node in nodes_to_check:
- # 获取节点的连接数
- connections = self.memory_graph.G.degree(node)
+ node_data = self.memory_graph.G.nodes[node]
+ last_modified = node_data.get('last_modified', current_time)
- # 获取节点的内容条数
- memory_items = self.memory_graph.G.nodes[node].get('memory_items', [])
- if not isinstance(memory_items, list):
- memory_items = [memory_items] if memory_items else []
- content_count = len(memory_items)
-
- # 检查连接强度
- weak_connections = True
- if connections > 1: # 只有当连接数大于1时才检查强度
- for neighbor in self.memory_graph.G.neighbors(node):
- strength = self.memory_graph.G[node][neighbor].get('strength', 1)
- if strength > 2:
- weak_connections = False
- break
-
- # 如果满足遗忘条件
- if (connections <= 1 and weak_connections) or content_count <= 2:
- removed_item = self.memory_graph.forget_topic(node)
- if removed_item:
- forgotten_nodes.append((node, removed_item))
- print(f"遗忘节点 {node} 的记忆: {removed_item}")
+ if current_time - last_modified > 3600*24: # test
+ memory_items = node_data.get('memory_items', [])
+ if not isinstance(memory_items, list):
+ memory_items = [memory_items] if memory_items else []
+
+ if memory_items:
+ current_count = len(memory_items)
+ removed_item = random.choice(memory_items)
+ memory_items.remove(removed_item)
+
+ if memory_items:
+ self.memory_graph.G.nodes[node]['memory_items'] = memory_items
+ self.memory_graph.G.nodes[node]['last_modified'] = current_time
+ node_changes['reduced'] += 1
+ logger.info(f"\033[1;33m[记忆减少]\033[0m {node} (记忆数量: {current_count} -> {len(memory_items)})")
+ else:
+ self.memory_graph.G.remove_node(node)
+ node_changes['removed'] += 1
+ logger.info(f"\033[1;31m[节点移除]\033[0m {node}")
- # 同步到数据库
- if forgotten_nodes:
+ if any(count > 0 for count in edge_changes.values()) or any(count > 0 for count in node_changes.values()):
self.sync_memory_to_db()
- print(f"完成遗忘操作,共遗忘 {len(forgotten_nodes)} 个节点的记忆")
+ logger.info("\n遗忘操作统计:")
+ logger.info(f"连接变化: {edge_changes['weakened']} 个减弱, {edge_changes['removed']} 个移除")
+ logger.info(f"节点变化: {node_changes['reduced']} 个减少记忆, {node_changes['removed']} 个移除")
else:
- print("本次检查没有节点满足遗忘条件")
+ logger.info("\n本次检查没有节点或连接满足遗忘条件")
async def merge_memory(self, topic):
"""
@@ -426,35 +562,35 @@ class Hippocampus:
memory_items = self.memory_graph.G.nodes[topic].get('memory_items', [])
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
-
+
# 如果记忆项不足,直接返回
if len(memory_items) < 10:
return
-
+
# 随机选择10条记忆
selected_memories = random.sample(memory_items, 10)
-
+
# 拼接成文本
merged_text = "\n".join(selected_memories)
- print(f"\n[合并记忆] 话题: {topic}")
- print(f"选择的记忆:\n{merged_text}")
-
+ logger.debug(f"\n[合并记忆] 话题: {topic}")
+ logger.debug(f"选择的记忆:\n{merged_text}")
+
# 使用memory_compress生成新的压缩记忆
- compressed_memories = await self.memory_compress(merged_text, 0.1)
-
+ compressed_memories, _ = await self.memory_compress(selected_memories, 0.1)
+
# 从原记忆列表中移除被选中的记忆
for memory in selected_memories:
memory_items.remove(memory)
-
+
# 添加新的压缩记忆
for _, compressed_memory in compressed_memories:
memory_items.append(compressed_memory)
- print(f"添加压缩记忆: {compressed_memory}")
-
+ logger.info(f"添加压缩记忆: {compressed_memory}")
+
# 更新节点的记忆项
self.memory_graph.G.nodes[topic]['memory_items'] = memory_items
- print(f"完成记忆合并,当前记忆数量: {len(memory_items)}")
-
+ logger.debug(f"完成记忆合并,当前记忆数量: {len(memory_items)}")
+
async def operation_merge_memory(self, percentage=0.1):
"""
随机检查一定比例的节点,对内容数量超过100的节点进行记忆合并
@@ -468,7 +604,7 @@ class Hippocampus:
check_count = max(1, int(len(all_nodes) * percentage))
# 随机选择节点
nodes_to_check = random.sample(all_nodes, check_count)
-
+
merged_nodes = []
for node in nodes_to_check:
# 获取节点的内容条数
@@ -476,26 +612,26 @@ class Hippocampus:
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
content_count = len(memory_items)
-
+
# 如果内容数量超过100,进行合并
if content_count > 100:
- print(f"\n检查节点: {node}, 当前记忆数量: {content_count}")
+ logger.debug(f"检查节点: {node}, 当前记忆数量: {content_count}")
await self.merge_memory(node)
merged_nodes.append(node)
-
+
# 同步到数据库
if merged_nodes:
self.sync_memory_to_db()
- print(f"\n完成记忆合并操作,共处理 {len(merged_nodes)} 个节点")
+ logger.debug(f"完成记忆合并操作,共处理 {len(merged_nodes)} 个节点")
else:
- print("\n本次检查没有需要合并的节点")
+ logger.debug("本次检查没有需要合并的节点")
- def find_topic_llm(self,text, topic_num):
+ def find_topic_llm(self, text, topic_num):
prompt = f'这是一段文字:{text}。请你从这段话中总结出{topic_num}个关键的概念,可以是名词,动词,或者特定人物,帮我列出来,用逗号,隔开,尽可能精简。只需要列举{topic_num}个话题就好,不要有序号,不要告诉我其他内容。'
return prompt
- def topic_what(self,text, topic):
- prompt = f'这是一段文字:{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好'
+ def topic_what(self, text, topic, time_info):
+ prompt = f'这是一段文字,{time_info}:{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好'
return prompt
async def _identify_topics(self, text: str) -> list:
@@ -509,11 +645,12 @@ class Hippocampus:
"""
topics_response = await self.llm_topic_judge.generate_response(self.find_topic_llm(text, 5))
# print(f"话题: {topics_response[0]}")
- topics = [topic.strip() for topic in topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()]
+ topics = [topic.strip() for topic in
+ topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()]
# print(f"话题: {topics}")
-
+
return topics
-
+
def _find_similar_topics(self, topics: list, similarity_threshold: float = 0.4, debug_info: str = "") -> list:
"""查找与给定主题相似的记忆主题
@@ -527,16 +664,16 @@ class Hippocampus:
"""
all_memory_topics = self.get_all_node_names()
all_similar_topics = []
-
+
# 计算每个识别出的主题与记忆主题的相似度
for topic in topics:
if debug_info:
# print(f"\033[1;32m[{debug_info}]\033[0m 正在思考有没有见过: {topic}")
pass
-
+
topic_vector = text_to_vector(topic)
has_similar_topic = False
-
+
for memory_topic in all_memory_topics:
memory_vector = text_to_vector(memory_topic)
# 获取所有唯一词
@@ -546,20 +683,20 @@ class Hippocampus:
v2 = [memory_vector.get(word, 0) for word in all_words]
# 计算相似度
similarity = cosine_similarity(v1, v2)
-
+
if similarity >= similarity_threshold:
has_similar_topic = True
if debug_info:
# print(f"\033[1;32m[{debug_info}]\033[0m 找到相似主题: {topic} -> {memory_topic} (相似度: {similarity:.2f})")
pass
all_similar_topics.append((memory_topic, similarity))
-
+
if not has_similar_topic and debug_info:
# print(f"\033[1;31m[{debug_info}]\033[0m 没有见过: {topic} ,呃呃")
pass
-
+
return all_similar_topics
-
+
def _get_top_topics(self, similar_topics: list, max_topics: int = 5) -> list:
"""获取相似度最高的主题
@@ -572,36 +709,36 @@ class Hippocampus:
"""
seen_topics = set()
top_topics = []
-
+
for topic, score in sorted(similar_topics, key=lambda x: x[1], reverse=True):
if topic not in seen_topics and len(top_topics) < max_topics:
seen_topics.add(topic)
top_topics.append((topic, score))
-
+
return top_topics
async def memory_activate_value(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.3) -> int:
"""计算输入文本对记忆的激活程度"""
- print(f"\033[1;32m[记忆激活]\033[0m 识别主题: {await self._identify_topics(text)}")
-
+ logger.info(f"识别主题: {await self._identify_topics(text)}")
+
# 识别主题
identified_topics = await self._identify_topics(text)
if not identified_topics:
return 0
-
+
# 查找相似主题
all_similar_topics = self._find_similar_topics(
- identified_topics,
+ identified_topics,
similarity_threshold=similarity_threshold,
debug_info="记忆激活"
)
-
+
if not all_similar_topics:
return 0
-
+
# 获取最相关的主题
top_topics = self._get_top_topics(all_similar_topics, max_topics)
-
+
# 如果只找到一个主题,进行惩罚
if len(top_topics) == 1:
topic, score = top_topics[0]
@@ -611,15 +748,16 @@ class Hippocampus:
memory_items = [memory_items] if memory_items else []
content_count = len(memory_items)
penalty = 1.0 / (1 + math.log(content_count + 1))
-
+
activation = int(score * 50 * penalty)
- print(f"\033[1;32m[记忆激活]\033[0m 单主题「{topic}」- 相似度: {score:.3f}, 内容数: {content_count}, 激活值: {activation}")
+ logger.info(
+ f"[记忆激活]单主题「{topic}」- 相似度: {score:.3f}, 内容数: {content_count}, 激活值: {activation}")
return activation
-
+
# 计算关键词匹配率,同时考虑内容数量
matched_topics = set()
topic_similarities = {}
-
+
for memory_topic, similarity in top_topics:
# 计算内容数量惩罚
memory_items = self.memory_graph.G.nodes[memory_topic].get('memory_items', [])
@@ -627,7 +765,7 @@ class Hippocampus:
memory_items = [memory_items] if memory_items else []
content_count = len(memory_items)
penalty = 1.0 / (1 + math.log(content_count + 1))
-
+
# 对每个记忆主题,检查它与哪些输入主题相似
for input_topic in identified_topics:
topic_vector = text_to_vector(input_topic)
@@ -640,33 +778,36 @@ class Hippocampus:
matched_topics.add(input_topic)
adjusted_sim = sim * penalty
topic_similarities[input_topic] = max(topic_similarities.get(input_topic, 0), adjusted_sim)
- print(f"\033[1;32m[记忆激活]\033[0m 主题「{input_topic}」-> 「{memory_topic}」(内容数: {content_count}, 相似度: {adjusted_sim:.3f})")
-
+ logger.info(
+ f"[记忆激活]主题「{input_topic}」-> 「{memory_topic}」(内容数: {content_count}, 相似度: {adjusted_sim:.3f})")
+
# 计算主题匹配率和平均相似度
topic_match = len(matched_topics) / len(identified_topics)
average_similarities = sum(topic_similarities.values()) / len(topic_similarities) if topic_similarities else 0
-
+
# 计算最终激活值
activation = int((topic_match + average_similarities) / 2 * 100)
- print(f"\033[1;32m[记忆激活]\033[0m 匹配率: {topic_match:.3f}, 平均相似度: {average_similarities:.3f}, 激活值: {activation}")
-
+ logger.info(
+ f"[记忆激活]匹配率: {topic_match:.3f}, 平均相似度: {average_similarities:.3f}, 激活值: {activation}")
+
return activation
- async def get_relevant_memories(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.4, max_memory_num: int = 5) -> list:
+ async def get_relevant_memories(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.4,
+ max_memory_num: int = 5) -> list:
"""根据输入文本获取相关的记忆内容"""
# 识别主题
identified_topics = await self._identify_topics(text)
-
+
# 查找相似主题
all_similar_topics = self._find_similar_topics(
- identified_topics,
+ identified_topics,
similarity_threshold=similarity_threshold,
debug_info="记忆检索"
)
-
+
# 获取最相关的主题
relevant_topics = self._get_top_topics(all_similar_topics, max_topics)
-
+
# 获取相关记忆内容
relevant_memories = []
for topic, score in relevant_topics:
@@ -674,8 +815,8 @@ class Hippocampus:
first_layer, _ = self.memory_graph.get_related_item(topic, depth=1)
if first_layer:
# 如果记忆条数超过限制,随机选择指定数量的记忆
- if len(first_layer) > max_memory_num/2:
- first_layer = random.sample(first_layer, max_memory_num//2)
+ if len(first_layer) > max_memory_num / 2:
+ first_layer = random.sample(first_layer, max_memory_num // 2)
# 为每条记忆添加来源主题和相似度信息
for memory in first_layer:
relevant_memories.append({
@@ -683,20 +824,20 @@ class Hippocampus:
'similarity': score,
'content': memory
})
-
+
# 如果记忆数量超过5个,随机选择5个
# 按相似度排序
relevant_memories.sort(key=lambda x: x['similarity'], reverse=True)
-
+
if len(relevant_memories) > max_memory_num:
relevant_memories = random.sample(relevant_memories, max_memory_num)
-
+
return relevant_memories
def segment_text(text):
seg_text = list(jieba.cut(text))
- return seg_text
+ return seg_text
from nonebot import get_driver
@@ -707,19 +848,19 @@ config = driver.config
start_time = time.time()
Database.initialize(
- host= config.MONGODB_HOST,
- port= config.MONGODB_PORT,
- db_name= config.DATABASE_NAME,
- username= config.MONGODB_USERNAME,
- password= config.MONGODB_PASSWORD,
+ host=config.MONGODB_HOST,
+ port=config.MONGODB_PORT,
+ db_name=config.DATABASE_NAME,
+ username=config.MONGODB_USERNAME,
+ password=config.MONGODB_PASSWORD,
auth_source=config.MONGODB_AUTH_SOURCE
)
-#创建记忆图
+# 创建记忆图
memory_graph = Memory_graph()
-#创建海马体
+# 创建海马体
hippocampus = Hippocampus(memory_graph)
-#从数据库加载记忆图
+# 从数据库加载记忆图
hippocampus.sync_memory_from_db()
end_time = time.time()
-print(f"\033[32m[加载海马体耗时: {end_time - start_time:.2f} 秒]\033[0m")
\ No newline at end of file
+logger.success(f"加载海马体耗时: {end_time - start_time:.2f} 秒")
diff --git a/src/plugins/memory_system/memory_manual_build.py b/src/plugins/memory_system/memory_manual_build.py
index 3124bc8e4..3c120f21b 100644
--- a/src/plugins/memory_system/memory_manual_build.py
+++ b/src/plugins/memory_system/memory_manual_build.py
@@ -13,6 +13,7 @@ import networkx as nx
import pymongo
from dotenv import load_dotenv
from loguru import logger
+import jieba
# from chat.config import global_config
sys.path.append("C:/GitHub/MaiMBot") # 添加项目根目录到 Python 路径
@@ -86,23 +87,26 @@ def calculate_information_content(text):
return entropy
def get_cloest_chat_from_db(db, length: int, timestamp: str):
- """从数据库中获取最接近指定时间戳的聊天记录,并记录读取次数"""
- chat_text = ''
+ """从数据库中获取最接近指定时间戳的聊天记录,并记录读取次数
+
+ Returns:
+ list: 消息记录字典列表,每个字典包含消息内容和时间信息
+ """
+ chat_records = []
closest_record = db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)])
if closest_record and closest_record.get('memorized', 0) < 4:
closest_time = closest_record['time']
- group_id = closest_record['group_id'] # 获取groupid
+ group_id = closest_record['group_id']
# 获取该时间戳之后的length条消息,且groupid相同
- chat_records = list(db.db.messages.find(
+ records = list(db.db.messages.find(
{"time": {"$gt": closest_time}, "group_id": group_id}
).sort('time', 1).limit(length))
# 更新每条消息的memorized属性
- for record in chat_records:
- # 检查当前记录的memorized值
+ for record in records:
current_memorized = record.get('memorized', 0)
- if current_memorized > 3:
+ if current_memorized > 3:
print("消息已读取3次,跳过")
return ''
@@ -112,11 +116,14 @@ def get_cloest_chat_from_db(db, length: int, timestamp: str):
{"$set": {"memorized": current_memorized + 1}}
)
- chat_text += record["detailed_plain_text"]
+ # 添加到记录列表中
+ chat_records.append({
+ 'text': record["detailed_plain_text"],
+ 'time': record["time"],
+ 'group_id': record["group_id"]
+ })
- return chat_text
- print("消息已读取3次,跳过")
- return ''
+ return chat_records
class Memory_graph:
def __init__(self):
@@ -205,22 +212,34 @@ class Hippocampus:
self.llm_model_summary = LLMModel(model_name="Qwen/Qwen2.5-32B-Instruct")
def get_memory_sample(self, chat_size=20, time_frequency:dict={'near':2,'mid':4,'far':3}):
+ """获取记忆样本
+
+ Returns:
+ list: 消息记录列表,每个元素是一个消息记录字典列表
+ """
current_timestamp = datetime.datetime.now().timestamp()
- chat_text = []
- #短期:1h 中期:4h 长期:24h
- for _ in range(time_frequency.get('near')): # 循环10次
- random_time = current_timestamp - random.randint(1, 3600*4) # 随机时间
- chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
- chat_text.append(chat_)
- for _ in range(time_frequency.get('mid')): # 循环10次
- random_time = current_timestamp - random.randint(3600*4, 3600*24) # 随机时间
- chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
- chat_text.append(chat_)
- for _ in range(time_frequency.get('far')): # 循环10次
- random_time = current_timestamp - random.randint(3600*24, 3600*24*7) # 随机时间
- chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
- chat_text.append(chat_)
- return [chat for chat in chat_text if chat]
+ chat_samples = []
+
+ # 短期:1h 中期:4h 长期:24h
+ for _ in range(time_frequency.get('near')):
+ random_time = current_timestamp - random.randint(1, 3600*4)
+ messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
+ if messages:
+ chat_samples.append(messages)
+
+ for _ in range(time_frequency.get('mid')):
+ random_time = current_timestamp - random.randint(3600*4, 3600*24)
+ messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
+ if messages:
+ chat_samples.append(messages)
+
+ for _ in range(time_frequency.get('far')):
+ random_time = current_timestamp - random.randint(3600*24, 3600*24*7)
+ messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
+ if messages:
+ chat_samples.append(messages)
+
+ return chat_samples
def calculate_topic_num(self,text, compress_rate):
"""计算文本的话题数量"""
@@ -231,16 +250,49 @@ class Hippocampus:
print(f"topic_by_length: {topic_by_length}, topic_by_information_content: {topic_by_information_content}, topic_num: {topic_num}")
return topic_num
- async def memory_compress(self, input_text, compress_rate=0.1):
+ async def memory_compress(self, messages: list, compress_rate=0.1):
+ """压缩消息记录为记忆
+
+ Args:
+ messages: 消息记录字典列表,每个字典包含text和time字段
+ compress_rate: 压缩率
+
+ Returns:
+ set: (话题, 记忆) 元组集合
+ """
+ if not messages:
+ return set()
+
+ # 合并消息文本,同时保留时间信息
+ input_text = ""
+ time_info = ""
+ # 计算最早和最晚时间
+ earliest_time = min(msg['time'] for msg in messages)
+ latest_time = max(msg['time'] for msg in messages)
+
+ earliest_dt = datetime.datetime.fromtimestamp(earliest_time)
+ latest_dt = datetime.datetime.fromtimestamp(latest_time)
+
+ # 如果是同一年
+ if earliest_dt.year == latest_dt.year:
+ earliest_str = earliest_dt.strftime("%m-%d %H:%M:%S")
+ latest_str = latest_dt.strftime("%m-%d %H:%M:%S")
+ time_info += f"是在{earliest_dt.year}年,{earliest_str} 到 {latest_str} 的对话:\n"
+ else:
+ earliest_str = earliest_dt.strftime("%Y-%m-%d %H:%M:%S")
+ latest_str = latest_dt.strftime("%Y-%m-%d %H:%M:%S")
+ time_info += f"是从 {earliest_str} 到 {latest_str} 的对话:\n"
+
+ for msg in messages:
+ input_text += f"{msg['text']}\n"
+
print(input_text)
topic_num = self.calculate_topic_num(input_text, compress_rate)
topics_response = self.llm_model_get_topic.generate_response(self.find_topic_llm(input_text, topic_num))
- # 修改话题处理逻辑
- # 定义需要过滤的关键词
- filter_keywords = ['表情包', '图片', '回复', '聊天记录']
# 过滤topics
+ filter_keywords = ['表情包', '图片', '回复', '聊天记录']
topics = [topic.strip() for topic in topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()]
filtered_topics = [topic for topic in topics if not any(keyword in topic for keyword in filter_keywords)]
@@ -250,7 +302,7 @@ class Hippocampus:
# 创建所有话题的请求任务
tasks = []
for topic in filtered_topics:
- topic_what_prompt = self.topic_what(input_text, topic)
+ topic_what_prompt = self.topic_what(input_text, topic , time_info)
# 创建异步任务
task = self.llm_model_small.generate_response_async(topic_what_prompt)
tasks.append((topic.strip(), task))
@@ -267,37 +319,35 @@ class Hippocampus:
async def operation_build_memory(self, chat_size=12):
# 最近消息获取频率
time_frequency = {'near': 3, 'mid': 8, 'far': 5}
- memory_sample = self.get_memory_sample(chat_size, time_frequency)
+ memory_samples = self.get_memory_sample(chat_size, time_frequency)
all_topics = [] # 用于存储所有话题
- for i, input_text in enumerate(memory_sample, 1):
+ for i, messages in enumerate(memory_samples, 1):
# 加载进度可视化
all_topics = []
- progress = (i / len(memory_sample)) * 100
+ progress = (i / len(memory_samples)) * 100
bar_length = 30
- filled_length = int(bar_length * i // len(memory_sample))
+ filled_length = int(bar_length * i // len(memory_samples))
bar = '█' * filled_length + '-' * (bar_length - filled_length)
- print(f"\n进度: [{bar}] {progress:.1f}% ({i}/{len(memory_sample)})")
+ print(f"\n进度: [{bar}] {progress:.1f}% ({i}/{len(memory_samples)})")
- # 生成压缩后记忆 ,表现为 (话题,记忆) 的元组
- compressed_memory = set()
+ # 生成压缩后记忆
compress_rate = 0.1
- compressed_memory = await self.memory_compress(input_text, compress_rate)
+ compressed_memory = await self.memory_compress(messages, compress_rate)
print(f"\033[1;33m压缩后记忆数量\033[0m: {len(compressed_memory)}")
# 将记忆加入到图谱中
for topic, memory in compressed_memory:
print(f"\033[1;32m添加节点\033[0m: {topic}")
self.memory_graph.add_dot(topic, memory)
- all_topics.append(topic) # 收集所有话题
+ all_topics.append(topic)
+
+ # 连接相关话题
for i in range(len(all_topics)):
for j in range(i + 1, len(all_topics)):
print(f"\033[1;32m连接节点\033[0m: {all_topics[i]} 和 {all_topics[j]}")
self.memory_graph.connect_dot(all_topics[i], all_topics[j])
-
-
-
self.sync_memory_to_db()
@@ -375,7 +425,7 @@ class Hippocampus:
if concept not in db_nodes_dict:
# 数据库中缺少的节点,添加
- logger.info(f"添加新节点: {concept}")
+ # logger.info(f"添加新节点: {concept}")
node_data = {
'concept': concept,
'memory_items': memory_items,
@@ -389,7 +439,7 @@ class Hippocampus:
# 如果特征值不同,则更新节点
if db_hash != memory_hash:
- logger.info(f"更新节点内容: {concept}")
+ # logger.info(f"更新节点内容: {concept}")
self.memory_graph.db.db.graph_data.nodes.update_one(
{'concept': concept},
{'$set': {
@@ -402,7 +452,7 @@ class Hippocampus:
memory_concepts = set(node[0] for node in memory_nodes)
for db_node in db_nodes:
if db_node['concept'] not in memory_concepts:
- logger.info(f"删除多余节点: {db_node['concept']}")
+ # logger.info(f"删除多余节点: {db_node['concept']}")
self.memory_graph.db.db.graph_data.nodes.delete_one({'concept': db_node['concept']})
# 处理边的信息
@@ -460,9 +510,10 @@ class Hippocampus:
prompt = f'这是一段文字:{text}。请你从这段话中总结出{topic_num}个关键的概念,可以是名词,动词,或者特定人物,帮我列出来,用逗号,隔开,尽可能精简。只需要列举{topic_num}个话题就好,不要有序号,不要告诉我其他内容。'
return prompt
- def topic_what(self,text, topic):
+ def topic_what(self,text, topic, time_info):
# prompt = f'这是一段文字:{text}。我想知道这段文字里有什么关于{topic}的话题,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好'
- prompt = f'这是一段文字:{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好'
+ # 获取当前时间
+ prompt = f'这是一段文字,{time_info}:{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好'
return prompt
def remove_node_from_db(self, topic):
@@ -597,7 +648,7 @@ class Hippocampus:
print(f"选择的记忆:\n{merged_text}")
# 使用memory_compress生成新的压缩记忆
- compressed_memories = await self.memory_compress(merged_text, 0.1)
+ compressed_memories = await self.memory_compress(selected_memories, 0.1)
# 从原记忆列表中移除被选中的记忆
for memory in selected_memories:
@@ -647,6 +698,164 @@ class Hippocampus:
else:
print("\n本次检查没有需要合并的节点")
+ async def _identify_topics(self, text: str) -> list:
+ """从文本中识别可能的主题"""
+ topics_response = self.llm_model_get_topic.generate_response(self.find_topic_llm(text, 5))
+ topics = [topic.strip() for topic in topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()]
+ return topics
+
+ def _find_similar_topics(self, topics: list, similarity_threshold: float = 0.4, debug_info: str = "") -> list:
+ """查找与给定主题相似的记忆主题"""
+ all_memory_topics = list(self.memory_graph.G.nodes())
+ all_similar_topics = []
+
+ for topic in topics:
+ if debug_info:
+ pass
+
+ topic_vector = text_to_vector(topic)
+ has_similar_topic = False
+
+ for memory_topic in all_memory_topics:
+ memory_vector = text_to_vector(memory_topic)
+ all_words = set(topic_vector.keys()) | set(memory_vector.keys())
+ v1 = [topic_vector.get(word, 0) for word in all_words]
+ v2 = [memory_vector.get(word, 0) for word in all_words]
+ similarity = cosine_similarity(v1, v2)
+
+ if similarity >= similarity_threshold:
+ has_similar_topic = True
+ all_similar_topics.append((memory_topic, similarity))
+
+ return all_similar_topics
+
+ def _get_top_topics(self, similar_topics: list, max_topics: int = 5) -> list:
+ """获取相似度最高的主题"""
+ seen_topics = set()
+ top_topics = []
+
+ for topic, score in sorted(similar_topics, key=lambda x: x[1], reverse=True):
+ if topic not in seen_topics and len(top_topics) < max_topics:
+ seen_topics.add(topic)
+ top_topics.append((topic, score))
+
+ return top_topics
+
+ async def memory_activate_value(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.3) -> int:
+ """计算输入文本对记忆的激活程度"""
+ logger.info(f"[记忆激活]识别主题: {await self._identify_topics(text)}")
+
+ identified_topics = await self._identify_topics(text)
+ if not identified_topics:
+ return 0
+
+ all_similar_topics = self._find_similar_topics(
+ identified_topics,
+ similarity_threshold=similarity_threshold,
+ debug_info="记忆激活"
+ )
+
+ if not all_similar_topics:
+ return 0
+
+ top_topics = self._get_top_topics(all_similar_topics, max_topics)
+
+ if len(top_topics) == 1:
+ topic, score = top_topics[0]
+ memory_items = self.memory_graph.G.nodes[topic].get('memory_items', [])
+ if not isinstance(memory_items, list):
+ memory_items = [memory_items] if memory_items else []
+ content_count = len(memory_items)
+ penalty = 1.0 / (1 + math.log(content_count + 1))
+
+ activation = int(score * 50 * penalty)
+ print(f"\033[1;32m[记忆激活]\033[0m 单主题「{topic}」- 相似度: {score:.3f}, 内容数: {content_count}, 激活值: {activation}")
+ return activation
+
+ matched_topics = set()
+ topic_similarities = {}
+
+ for memory_topic, similarity in top_topics:
+ memory_items = self.memory_graph.G.nodes[memory_topic].get('memory_items', [])
+ if not isinstance(memory_items, list):
+ memory_items = [memory_items] if memory_items else []
+ content_count = len(memory_items)
+ penalty = 1.0 / (1 + math.log(content_count + 1))
+
+ for input_topic in identified_topics:
+ topic_vector = text_to_vector(input_topic)
+ memory_vector = text_to_vector(memory_topic)
+ all_words = set(topic_vector.keys()) | set(memory_vector.keys())
+ v1 = [topic_vector.get(word, 0) for word in all_words]
+ v2 = [memory_vector.get(word, 0) for word in all_words]
+ sim = cosine_similarity(v1, v2)
+ if sim >= similarity_threshold:
+ matched_topics.add(input_topic)
+ adjusted_sim = sim * penalty
+ topic_similarities[input_topic] = max(topic_similarities.get(input_topic, 0), adjusted_sim)
+ print(f"\033[1;32m[记忆激活]\033[0m 主题「{input_topic}」-> 「{memory_topic}」(内容数: {content_count}, 相似度: {adjusted_sim:.3f})")
+
+ topic_match = len(matched_topics) / len(identified_topics)
+ average_similarities = sum(topic_similarities.values()) / len(topic_similarities) if topic_similarities else 0
+
+ activation = int((topic_match + average_similarities) / 2 * 100)
+ print(f"\033[1;32m[记忆激活]\033[0m 匹配率: {topic_match:.3f}, 平均相似度: {average_similarities:.3f}, 激活值: {activation}")
+
+ return activation
+
+ async def get_relevant_memories(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.4, max_memory_num: int = 5) -> list:
+ """根据输入文本获取相关的记忆内容"""
+ identified_topics = await self._identify_topics(text)
+
+ all_similar_topics = self._find_similar_topics(
+ identified_topics,
+ similarity_threshold=similarity_threshold,
+ debug_info="记忆检索"
+ )
+
+ relevant_topics = self._get_top_topics(all_similar_topics, max_topics)
+
+ relevant_memories = []
+ for topic, score in relevant_topics:
+ first_layer, _ = self.memory_graph.get_related_item(topic, depth=1)
+ if first_layer:
+ if len(first_layer) > max_memory_num/2:
+ first_layer = random.sample(first_layer, max_memory_num//2)
+ for memory in first_layer:
+ relevant_memories.append({
+ 'topic': topic,
+ 'similarity': score,
+ 'content': memory
+ })
+
+ relevant_memories.sort(key=lambda x: x['similarity'], reverse=True)
+
+ if len(relevant_memories) > max_memory_num:
+ relevant_memories = random.sample(relevant_memories, max_memory_num)
+
+ return relevant_memories
+
+def segment_text(text):
+ """使用jieba进行文本分词"""
+ seg_text = list(jieba.cut(text))
+ return seg_text
+
+def text_to_vector(text):
+ """将文本转换为词频向量"""
+ words = segment_text(text)
+ vector = {}
+ for word in words:
+ vector[word] = vector.get(word, 0) + 1
+ return vector
+
+def cosine_similarity(v1, v2):
+ """计算两个向量的余弦相似度"""
+ dot_product = sum(a * b for a, b in zip(v1, v2))
+ norm1 = math.sqrt(sum(a * a for a in v1))
+ norm2 = math.sqrt(sum(b * b for b in v2))
+ if norm1 == 0 or norm2 == 0:
+ return 0
+ return dot_product / (norm1 * norm2)
def visualize_graph_lite(memory_graph: Memory_graph, color_by_memory: bool = False):
# 设置中文字体
@@ -735,7 +944,7 @@ async def main():
db = Database.get_instance()
start_time = time.time()
- test_pare = {'do_build_memory':True,'do_forget_topic':False,'do_visualize_graph':True,'do_query':False,'do_merge_memory':False}
+ test_pare = {'do_build_memory':False,'do_forget_topic':False,'do_visualize_graph':True,'do_query':False,'do_merge_memory':False}
# 创建记忆图
memory_graph = Memory_graph()
diff --git a/src/plugins/memory_system/memory_test1.py b/src/plugins/memory_system/memory_test1.py
new file mode 100644
index 000000000..bbd734ec2
--- /dev/null
+++ b/src/plugins/memory_system/memory_test1.py
@@ -0,0 +1,1208 @@
+# -*- coding: utf-8 -*-
+import datetime
+import math
+import os
+import random
+import sys
+import time
+from collections import Counter
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import networkx as nx
+import pymongo
+from dotenv import load_dotenv
+from loguru import logger
+import jieba
+
+'''
+该理论认为,当两个或多个事物在形态上具有相似性时,
+它们在记忆中会形成关联。
+例如,梨和苹果在形状和都是水果这一属性上有相似性,
+所以当我们看到梨时,很容易通过形态学联想记忆联想到苹果。
+这种相似性联想有助于我们对新事物进行分类和理解,
+当遇到一个新的类似水果时,
+我们可以通过与已有的水果记忆进行相似性匹配,
+来推测它的一些特征。
+
+
+
+时空关联性联想:
+除了相似性联想,MAM 还强调时空关联性联想。
+如果两个事物在时间或空间上经常同时出现,它们也会在记忆中形成关联。
+比如,每次在公园里看到花的时候,都能听到鸟儿的叫声,
+那么花和鸟儿叫声的形态特征(花的视觉形态和鸟叫的听觉形态)就会在记忆中形成关联,
+以后听到鸟叫可能就会联想到公园里的花。
+
+'''
+
+# from chat.config import global_config
+sys.path.append("C:/GitHub/MaiMBot") # 添加项目根目录到 Python 路径
+from src.common.database import Database
+from src.plugins.memory_system.offline_llm import LLMModel
+
+# 获取当前文件的目录
+current_dir = Path(__file__).resolve().parent
+# 获取项目根目录(上三层目录)
+project_root = current_dir.parent.parent.parent
+# env.dev文件路径
+env_path = project_root / ".env.dev"
+
+# 加载环境变量
+if env_path.exists():
+ logger.info(f"从 {env_path} 加载环境变量")
+ load_dotenv(env_path)
+else:
+ logger.warning(f"未找到环境变量文件: {env_path}")
+ logger.info("将使用默认配置")
+
+class Database:
+ _instance = None
+ db = None
+
+ @classmethod
+ def get_instance(cls):
+ if cls._instance is None:
+ cls._instance = cls()
+ return cls._instance
+
+ def __init__(self):
+ if not Database.db:
+ Database.initialize(
+ host=os.getenv("MONGODB_HOST"),
+ port=int(os.getenv("MONGODB_PORT")),
+ db_name=os.getenv("DATABASE_NAME"),
+ username=os.getenv("MONGODB_USERNAME"),
+ password=os.getenv("MONGODB_PASSWORD"),
+ auth_source=os.getenv("MONGODB_AUTH_SOURCE")
+ )
+
+ @classmethod
+ def initialize(cls, host, port, db_name, username=None, password=None, auth_source="admin"):
+ try:
+ if username and password:
+ uri = f"mongodb://{username}:{password}@{host}:{port}/{db_name}?authSource={auth_source}"
+ else:
+ uri = f"mongodb://{host}:{port}"
+
+ client = pymongo.MongoClient(uri)
+ cls.db = client[db_name]
+ # 测试连接
+ client.server_info()
+ logger.success("MongoDB连接成功!")
+
+ except Exception as e:
+ logger.error(f"初始化MongoDB失败: {str(e)}")
+ raise
+
+def calculate_information_content(text):
+ """计算文本的信息量(熵)"""
+ char_count = Counter(text)
+ total_chars = len(text)
+
+ entropy = 0
+ for count in char_count.values():
+ probability = count / total_chars
+ entropy -= probability * math.log2(probability)
+
+ return entropy
+
+def get_cloest_chat_from_db(db, length: int, timestamp: str):
+ """从数据库中获取最接近指定时间戳的聊天记录,并记录读取次数
+
+ Returns:
+ list: 消息记录字典列表,每个字典包含消息内容和时间信息
+ """
+ chat_records = []
+ closest_record = db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)])
+
+ if closest_record and closest_record.get('memorized', 0) < 4:
+ closest_time = closest_record['time']
+ group_id = closest_record['group_id']
+ # 获取该时间戳之后的length条消息,且groupid相同
+ records = list(db.db.messages.find(
+ {"time": {"$gt": closest_time}, "group_id": group_id}
+ ).sort('time', 1).limit(length))
+
+ # 更新每条消息的memorized属性
+ for record in records:
+ current_memorized = record.get('memorized', 0)
+ if current_memorized > 3:
+ print("消息已读取3次,跳过")
+ return ''
+
+ # 更新memorized值
+ db.db.messages.update_one(
+ {"_id": record["_id"]},
+ {"$set": {"memorized": current_memorized + 1}}
+ )
+
+ # 添加到记录列表中
+ chat_records.append({
+ 'text': record["detailed_plain_text"],
+ 'time': record["time"],
+ 'group_id': record["group_id"]
+ })
+
+ return chat_records
+
+class Memory_cortex:
+ def __init__(self, memory_graph: 'Memory_graph'):
+ self.memory_graph = memory_graph
+
+ def sync_memory_from_db(self):
+ """
+ 从数据库同步数据到内存中的图结构
+ 将清空当前内存中的图,并从数据库重新加载所有节点和边
+ """
+ # 清空当前图
+ self.memory_graph.G.clear()
+
+ # 获取当前时间作为默认时间
+ default_time = datetime.datetime.now().timestamp()
+
+ # 从数据库加载所有节点
+ nodes = self.memory_graph.db.db.graph_data.nodes.find()
+ for node in nodes:
+ concept = node['concept']
+ memory_items = node.get('memory_items', [])
+ # 确保memory_items是列表
+ if not isinstance(memory_items, list):
+ memory_items = [memory_items] if memory_items else []
+
+ # 获取时间属性,如果不存在则使用默认时间
+ created_time = node.get('created_time')
+ last_modified = node.get('last_modified')
+
+ # 如果时间属性不存在,则更新数据库
+ if created_time is None or last_modified is None:
+ created_time = default_time
+ last_modified = default_time
+ # 更新数据库中的节点
+ self.memory_graph.db.db.graph_data.nodes.update_one(
+ {'concept': concept},
+ {'$set': {
+ 'created_time': created_time,
+ 'last_modified': last_modified
+ }}
+ )
+ logger.info(f"为节点 {concept} 添加默认时间属性")
+
+ # 添加节点到图中,包含时间属性
+ self.memory_graph.G.add_node(concept,
+ memory_items=memory_items,
+ created_time=created_time,
+ last_modified=last_modified)
+
+ # 从数据库加载所有边
+ edges = self.memory_graph.db.db.graph_data.edges.find()
+ for edge in edges:
+ source = edge['source']
+ target = edge['target']
+
+ # 只有当源节点和目标节点都存在时才添加边
+ if source in self.memory_graph.G and target in self.memory_graph.G:
+ # 获取时间属性,如果不存在则使用默认时间
+ created_time = edge.get('created_time')
+ last_modified = edge.get('last_modified')
+
+ # 如果时间属性不存在,则更新数据库
+ if created_time is None or last_modified is None:
+ created_time = default_time
+ last_modified = default_time
+ # 更新数据库中的边
+ self.memory_graph.db.db.graph_data.edges.update_one(
+ {'source': source, 'target': target},
+ {'$set': {
+ 'created_time': created_time,
+ 'last_modified': last_modified
+ }}
+ )
+ logger.info(f"为边 {source} - {target} 添加默认时间属性")
+
+ self.memory_graph.G.add_edge(source, target,
+ strength=edge.get('strength', 1),
+ created_time=created_time,
+ last_modified=last_modified)
+
+ logger.success("从数据库同步记忆图谱完成")
+
+ def calculate_node_hash(self, concept, memory_items):
+ """
+ 计算节点的特征值
+ """
+ if not isinstance(memory_items, list):
+ memory_items = [memory_items] if memory_items else []
+ # 将记忆项排序以确保相同内容生成相同的哈希值
+ sorted_items = sorted(memory_items)
+ # 组合概念和记忆项生成特征值
+ content = f"{concept}:{'|'.join(sorted_items)}"
+ return hash(content)
+
+ def calculate_edge_hash(self, source, target):
+ """
+ 计算边的特征值
+ """
+ # 对源节点和目标节点排序以确保相同的边生成相同的哈希值
+ nodes = sorted([source, target])
+ return hash(f"{nodes[0]}:{nodes[1]}")
+
+ def sync_memory_to_db(self):
+ """
+ 检查并同步内存中的图结构与数据库
+ 使用特征值(哈希值)快速判断是否需要更新
+ """
+ current_time = datetime.datetime.now().timestamp()
+
+ # 获取数据库中所有节点和内存中所有节点
+ db_nodes = list(self.memory_graph.db.db.graph_data.nodes.find())
+ memory_nodes = list(self.memory_graph.G.nodes(data=True))
+
+ # 转换数据库节点为字典格式,方便查找
+ db_nodes_dict = {node['concept']: node for node in db_nodes}
+
+ # 检查并更新节点
+ for concept, data in memory_nodes:
+ memory_items = data.get('memory_items', [])
+ if not isinstance(memory_items, list):
+ memory_items = [memory_items] if memory_items else []
+
+ # 计算内存中节点的特征值
+ memory_hash = self.calculate_node_hash(concept, memory_items)
+
+ if concept not in db_nodes_dict:
+ # 数据库中缺少的节点,添加
+ node_data = {
+ 'concept': concept,
+ 'memory_items': memory_items,
+ 'hash': memory_hash,
+ 'created_time': data.get('created_time', current_time),
+ 'last_modified': data.get('last_modified', current_time)
+ }
+ self.memory_graph.db.db.graph_data.nodes.insert_one(node_data)
+ else:
+ # 获取数据库中节点的特征值
+ db_node = db_nodes_dict[concept]
+ db_hash = db_node.get('hash', None)
+
+ # 如果特征值不同,则更新节点
+ if db_hash != memory_hash:
+ self.memory_graph.db.db.graph_data.nodes.update_one(
+ {'concept': concept},
+ {'$set': {
+ 'memory_items': memory_items,
+ 'hash': memory_hash,
+ 'last_modified': current_time
+ }}
+ )
+
+ # 检查并删除数据库中多余的节点
+ memory_concepts = set(node[0] for node in memory_nodes)
+ for db_node in db_nodes:
+ if db_node['concept'] not in memory_concepts:
+ self.memory_graph.db.db.graph_data.nodes.delete_one({'concept': db_node['concept']})
+
+ # 处理边的信息
+ db_edges = list(self.memory_graph.db.db.graph_data.edges.find())
+ memory_edges = list(self.memory_graph.G.edges(data=True))
+
+ # 创建边的哈希值字典
+ db_edge_dict = {}
+ for edge in db_edges:
+ edge_hash = self.calculate_edge_hash(edge['source'], edge['target'])
+ db_edge_dict[(edge['source'], edge['target'])] = {
+ 'hash': edge_hash,
+ 'strength': edge.get('strength', 1)
+ }
+
+ # 检查并更新边
+ for source, target, data in memory_edges:
+ edge_hash = self.calculate_edge_hash(source, target)
+ edge_key = (source, target)
+ strength = data.get('strength', 1)
+
+ if edge_key not in db_edge_dict:
+ # 添加新边
+ edge_data = {
+ 'source': source,
+ 'target': target,
+ 'strength': strength,
+ 'hash': edge_hash,
+ 'created_time': data.get('created_time', current_time),
+ 'last_modified': data.get('last_modified', current_time)
+ }
+ self.memory_graph.db.db.graph_data.edges.insert_one(edge_data)
+ else:
+ # 检查边的特征值是否变化
+ if db_edge_dict[edge_key]['hash'] != edge_hash:
+ self.memory_graph.db.db.graph_data.edges.update_one(
+ {'source': source, 'target': target},
+ {'$set': {
+ 'hash': edge_hash,
+ 'strength': strength,
+ 'last_modified': current_time
+ }}
+ )
+
+ # 删除多余的边
+ memory_edge_set = set((source, target) for source, target, _ in memory_edges)
+ for edge_key in db_edge_dict:
+ if edge_key not in memory_edge_set:
+ source, target = edge_key
+ self.memory_graph.db.db.graph_data.edges.delete_one({
+ 'source': source,
+ 'target': target
+ })
+
+ logger.success("完成记忆图谱与数据库的差异同步")
+
+ def remove_node_from_db(self, topic):
+ """
+ 从数据库中删除指定节点及其相关的边
+
+ Args:
+ topic: 要删除的节点概念
+ """
+ # 删除节点
+ self.memory_graph.db.db.graph_data.nodes.delete_one({'concept': topic})
+ # 删除所有涉及该节点的边
+ self.memory_graph.db.db.graph_data.edges.delete_many({
+ '$or': [
+ {'source': topic},
+ {'target': topic}
+ ]
+ })
+
+class Memory_graph:
+ def __init__(self):
+ self.G = nx.Graph() # 使用 networkx 的图结构
+ self.db = Database.get_instance()
+
+ def connect_dot(self, concept1, concept2):
+ # 避免自连接
+ if concept1 == concept2:
+ return
+
+ current_time = datetime.datetime.now().timestamp()
+
+ # 如果边已存在,增加 strength
+ if self.G.has_edge(concept1, concept2):
+ self.G[concept1][concept2]['strength'] = self.G[concept1][concept2].get('strength', 1) + 1
+ # 更新最后修改时间
+ self.G[concept1][concept2]['last_modified'] = current_time
+ else:
+ # 如果是新边,初始化 strength 为 1
+ self.G.add_edge(concept1, concept2,
+ strength=1,
+ created_time=current_time,
+ last_modified=current_time)
+
+ def add_dot(self, concept, memory):
+ current_time = datetime.datetime.now().timestamp()
+
+ if concept in self.G:
+ # 如果节点已存在,将新记忆添加到现有列表中
+ if 'memory_items' in self.G.nodes[concept]:
+ if not isinstance(self.G.nodes[concept]['memory_items'], list):
+ # 如果当前不是列表,将其转换为列表
+ self.G.nodes[concept]['memory_items'] = [self.G.nodes[concept]['memory_items']]
+ self.G.nodes[concept]['memory_items'].append(memory)
+ # 更新最后修改时间
+ self.G.nodes[concept]['last_modified'] = current_time
+ else:
+ self.G.nodes[concept]['memory_items'] = [memory]
+ self.G.nodes[concept]['last_modified'] = current_time
+ else:
+ # 如果是新节点,创建新的记忆列表
+ self.G.add_node(concept,
+ memory_items=[memory],
+ created_time=current_time,
+ last_modified=current_time)
+
+ def get_dot(self, concept):
+ # 检查节点是否存在于图中
+ if concept in self.G:
+ # 从图中获取节点数据
+ node_data = self.G.nodes[concept]
+ return concept, node_data
+ return None
+
+ def get_related_item(self, topic, depth=1):
+ if topic not in self.G:
+ return [], []
+
+ first_layer_items = []
+ second_layer_items = []
+
+ # 获取相邻节点
+ neighbors = list(self.G.neighbors(topic))
+
+ # 获取当前节点的记忆项
+ node_data = self.get_dot(topic)
+ if node_data:
+ concept, data = node_data
+ if 'memory_items' in data:
+ memory_items = data['memory_items']
+ if isinstance(memory_items, list):
+ first_layer_items.extend(memory_items)
+ else:
+ first_layer_items.append(memory_items)
+
+ # 只在depth=2时获取第二层记忆
+ if depth >= 2:
+ # 获取相邻节点的记忆项
+ for neighbor in neighbors:
+ node_data = self.get_dot(neighbor)
+ if node_data:
+ concept, data = node_data
+ if 'memory_items' in data:
+ memory_items = data['memory_items']
+ if isinstance(memory_items, list):
+ second_layer_items.extend(memory_items)
+ else:
+ second_layer_items.append(memory_items)
+
+ return first_layer_items, second_layer_items
+
+ @property
+ def dots(self):
+ # 返回所有节点对应的 Memory_dot 对象
+ return [self.get_dot(node) for node in self.G.nodes()]
+
+# 海马体
+class Hippocampus:
+ def __init__(self, memory_graph: Memory_graph):
+ self.memory_graph = memory_graph
+ self.memory_cortex = Memory_cortex(memory_graph)
+ self.llm_model = LLMModel()
+ self.llm_model_small = LLMModel(model_name="deepseek-ai/DeepSeek-V2.5")
+ self.llm_model_get_topic = LLMModel(model_name="Pro/Qwen/Qwen2.5-7B-Instruct")
+ self.llm_model_summary = LLMModel(model_name="Qwen/Qwen2.5-32B-Instruct")
+
+ def get_memory_sample(self, chat_size=20, time_frequency:dict={'near':2,'mid':4,'far':3}):
+ """获取记忆样本
+
+ Returns:
+ list: 消息记录列表,每个元素是一个消息记录字典列表
+ """
+ current_timestamp = datetime.datetime.now().timestamp()
+ chat_samples = []
+
+ # 短期:1h 中期:4h 长期:24h
+ for _ in range(time_frequency.get('near')):
+ random_time = current_timestamp - random.randint(1, 3600*4)
+ messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
+ if messages:
+ chat_samples.append(messages)
+
+ for _ in range(time_frequency.get('mid')):
+ random_time = current_timestamp - random.randint(3600*4, 3600*24)
+ messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
+ if messages:
+ chat_samples.append(messages)
+
+ for _ in range(time_frequency.get('far')):
+ random_time = current_timestamp - random.randint(3600*24, 3600*24*7)
+ messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
+ if messages:
+ chat_samples.append(messages)
+
+ return chat_samples
+
+ def calculate_topic_num(self,text, compress_rate):
+ """计算文本的话题数量"""
+ information_content = calculate_information_content(text)
+ topic_by_length = text.count('\n')*compress_rate
+ topic_by_information_content = max(1, min(5, int((information_content-3) * 2)))
+ topic_num = int((topic_by_length + topic_by_information_content)/2)
+ print(f"topic_by_length: {topic_by_length}, topic_by_information_content: {topic_by_information_content}, topic_num: {topic_num}")
+ return topic_num
+
+ async def memory_compress(self, messages: list, compress_rate=0.1):
+ """压缩消息记录为记忆
+
+ Args:
+ messages: 消息记录字典列表,每个字典包含text和time字段
+ compress_rate: 压缩率
+
+ Returns:
+ tuple: (压缩记忆集合, 相似主题字典)
+ - 压缩记忆集合: set of (话题, 记忆) 元组
+ - 相似主题字典: dict of {话题: [(相似主题, 相似度), ...]}
+ """
+ if not messages:
+ return set(), {}
+
+ # 合并消息文本,同时保留时间信息
+ input_text = ""
+ time_info = ""
+ # 计算最早和最晚时间
+ earliest_time = min(msg['time'] for msg in messages)
+ latest_time = max(msg['time'] for msg in messages)
+
+ earliest_dt = datetime.datetime.fromtimestamp(earliest_time)
+ latest_dt = datetime.datetime.fromtimestamp(latest_time)
+
+ # 如果是同一年
+ if earliest_dt.year == latest_dt.year:
+ earliest_str = earliest_dt.strftime("%m-%d %H:%M:%S")
+ latest_str = latest_dt.strftime("%m-%d %H:%M:%S")
+ time_info += f"是在{earliest_dt.year}年,{earliest_str} 到 {latest_str} 的对话:\n"
+ else:
+ earliest_str = earliest_dt.strftime("%Y-%m-%d %H:%M:%S")
+ latest_str = latest_dt.strftime("%Y-%m-%d %H:%M:%S")
+ time_info += f"是从 {earliest_str} 到 {latest_str} 的对话:\n"
+
+ for msg in messages:
+ input_text += f"{msg['text']}\n"
+
+ print(input_text)
+
+ topic_num = self.calculate_topic_num(input_text, compress_rate)
+ topics_response = self.llm_model_get_topic.generate_response(self.find_topic_llm(input_text, topic_num))
+
+ # 过滤topics
+ filter_keywords = ['表情包', '图片', '回复', '聊天记录']
+ topics = [topic.strip() for topic in topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()]
+ filtered_topics = [topic for topic in topics if not any(keyword in topic for keyword in filter_keywords)]
+
+ print(f"过滤后话题: {filtered_topics}")
+
+ # 为每个话题查找相似的已存在主题
+ print("\n检查相似主题:")
+ similar_topics_dict = {} # 存储每个话题的相似主题列表
+
+ for topic in filtered_topics:
+ # 获取所有现有节点
+ existing_topics = list(self.memory_graph.G.nodes())
+ similar_topics = []
+
+ # 对每个现有节点计算相似度
+ for existing_topic in existing_topics:
+ # 使用jieba分词并计算余弦相似度
+ topic_words = set(jieba.cut(topic))
+ existing_words = set(jieba.cut(existing_topic))
+
+ # 计算词向量
+ all_words = topic_words | existing_words
+ v1 = [1 if word in topic_words else 0 for word in all_words]
+ v2 = [1 if word in existing_words else 0 for word in all_words]
+
+ # 计算余弦相似度
+ similarity = cosine_similarity(v1, v2)
+
+ # 如果相似度超过阈值,添加到结果中
+ if similarity >= 0.6: # 设置相似度阈值
+ similar_topics.append((existing_topic, similarity))
+
+ # 按相似度降序排序
+ similar_topics.sort(key=lambda x: x[1], reverse=True)
+ # 只保留前5个最相似的主题
+ similar_topics = similar_topics[:5]
+
+ # 存储到字典中
+ similar_topics_dict[topic] = similar_topics
+
+ # 输出结果
+ if similar_topics:
+ print(f"\n主题「{topic}」的相似主题:")
+ for similar_topic, score in similar_topics:
+ print(f"- {similar_topic} (相似度: {score:.3f})")
+ else:
+ print(f"\n主题「{topic}」没有找到相似主题")
+
+ # 创建所有话题的请求任务
+ tasks = []
+ for topic in filtered_topics:
+ topic_what_prompt = self.topic_what(input_text, topic , time_info)
+ # 创建异步任务
+ task = self.llm_model_small.generate_response_async(topic_what_prompt)
+ tasks.append((topic.strip(), task))
+
+ # 等待所有任务完成
+ compressed_memory = set()
+ for topic, task in tasks:
+ response = await task
+ if response:
+ compressed_memory.add((topic, response[0]))
+
+ return compressed_memory, similar_topics_dict
+
+ async def operation_build_memory(self, chat_size=12):
+ # 最近消息获取频率
+ time_frequency = {'near': 3, 'mid': 8, 'far': 5}
+ memory_samples = self.get_memory_sample(chat_size, time_frequency)
+
+ all_topics = [] # 用于存储所有话题
+
+ for i, messages in enumerate(memory_samples, 1):
+ # 加载进度可视化
+ all_topics = []
+ progress = (i / len(memory_samples)) * 100
+ bar_length = 30
+ filled_length = int(bar_length * i // len(memory_samples))
+ bar = '█' * filled_length + '-' * (bar_length - filled_length)
+ print(f"\n进度: [{bar}] {progress:.1f}% ({i}/{len(memory_samples)})")
+
+ # 生成压缩后记忆
+ compress_rate = 0.1
+ compressed_memory, similar_topics_dict = await self.memory_compress(messages, compress_rate)
+ print(f"\033[1;33m压缩后记忆数量\033[0m: {len(compressed_memory)},似曾相识的话题: {len(similar_topics_dict)}")
+
+ # 将记忆加入到图谱中
+ for topic, memory in compressed_memory:
+ print(f"\033[1;32m添加节点\033[0m: {topic}")
+ self.memory_graph.add_dot(topic, memory)
+ all_topics.append(topic)
+
+ # 连接相似的已存在主题
+ if topic in similar_topics_dict:
+ similar_topics = similar_topics_dict[topic]
+ for similar_topic, similarity in similar_topics:
+ # 避免自连接
+ if topic != similar_topic:
+ # 根据相似度设置连接强度
+ strength = int(similarity * 10) # 将0.3-1.0的相似度映射到3-10的强度
+ print(f"\033[1;36m连接相似节点\033[0m: {topic} 和 {similar_topic} (强度: {strength})")
+ # 使用相似度作为初始连接强度
+ self.memory_graph.G.add_edge(topic, similar_topic, strength=strength)
+
+ # 连接同批次的相关话题
+ for i in range(len(all_topics)):
+ for j in range(i + 1, len(all_topics)):
+ print(f"\033[1;32m连接同批次节点\033[0m: {all_topics[i]} 和 {all_topics[j]}")
+ self.memory_graph.connect_dot(all_topics[i], all_topics[j])
+
+ self.memory_cortex.sync_memory_to_db()
+
+ def forget_connection(self, source, target):
+ """
+ 检查并可能遗忘一个连接
+
+ Args:
+ source: 连接的源节点
+ target: 连接的目标节点
+
+ Returns:
+ tuple: (是否有变化, 变化类型, 变化详情)
+ 变化类型: 0-无变化, 1-强度减少, 2-连接移除
+ """
+ current_time = datetime.datetime.now().timestamp()
+ # 获取边的属性
+ edge_data = self.memory_graph.G[source][target]
+ last_modified = edge_data.get('last_modified', current_time)
+
+ # 如果连接超过7天未更新
+ if current_time - last_modified > 6000: # test
+ # 获取当前强度
+ current_strength = edge_data.get('strength', 1)
+ # 减少连接强度
+ new_strength = current_strength - 1
+ edge_data['strength'] = new_strength
+ edge_data['last_modified'] = current_time
+
+ # 如果强度降为0,移除连接
+ if new_strength <= 0:
+ self.memory_graph.G.remove_edge(source, target)
+ return True, 2, f"移除连接: {source} - {target} (强度降至0)"
+ else:
+ return True, 1, f"减弱连接: {source} - {target} (强度: {current_strength} -> {new_strength})"
+
+ return False, 0, ""
+
+ def forget_topic(self, topic):
+ """
+ 检查并可能遗忘一个话题的记忆
+
+ Args:
+ topic: 要检查的话题
+
+ Returns:
+ tuple: (是否有变化, 变化类型, 变化详情)
+ 变化类型: 0-无变化, 1-记忆减少, 2-节点移除
+ """
+ current_time = datetime.datetime.now().timestamp()
+ # 获取节点的最后修改时间
+ node_data = self.memory_graph.G.nodes[topic]
+ last_modified = node_data.get('last_modified', current_time)
+
+ # 如果话题超过7天未更新
+ if current_time - last_modified > 3000: # test
+ memory_items = node_data.get('memory_items', [])
+ if not isinstance(memory_items, list):
+ memory_items = [memory_items] if memory_items else []
+
+ if memory_items:
+ # 获取当前记忆数量
+ current_count = len(memory_items)
+ # 随机选择一条记忆删除
+ removed_item = random.choice(memory_items)
+ memory_items.remove(removed_item)
+
+ if memory_items:
+ # 更新节点的记忆项和最后修改时间
+ self.memory_graph.G.nodes[topic]['memory_items'] = memory_items
+ self.memory_graph.G.nodes[topic]['last_modified'] = current_time
+ return True, 1, f"减少记忆: {topic} (记忆数量: {current_count} -> {len(memory_items)})\n被移除的记忆: {removed_item}"
+ else:
+ # 如果没有记忆了,删除节点及其所有连接
+ self.memory_graph.G.remove_node(topic)
+ return True, 2, f"移除节点: {topic} (无剩余记忆)\n最后一条记忆: {removed_item}"
+
+ return False, 0, ""
+
+ async def operation_forget_topic(self, percentage=0.1):
+ """
+ 随机选择图中一定比例的节点和边进行检查,根据时间条件决定是否遗忘
+
+ Args:
+ percentage: 要检查的节点和边的比例,默认为0.1(10%)
+ """
+ # 获取所有节点和边
+ all_nodes = list(self.memory_graph.G.nodes())
+ all_edges = list(self.memory_graph.G.edges())
+
+ # 计算要检查的数量
+ check_nodes_count = max(1, int(len(all_nodes) * percentage))
+ check_edges_count = max(1, int(len(all_edges) * percentage))
+
+ # 随机选择要检查的节点和边
+ nodes_to_check = random.sample(all_nodes, check_nodes_count)
+ edges_to_check = random.sample(all_edges, check_edges_count)
+
+ # 用于统计不同类型的变化
+ edge_changes = {'weakened': 0, 'removed': 0}
+ node_changes = {'reduced': 0, 'removed': 0}
+
+ # 检查并遗忘连接
+ print("\n开始检查连接...")
+ for source, target in edges_to_check:
+ changed, change_type, details = self.forget_connection(source, target)
+ if changed:
+ if change_type == 1:
+ edge_changes['weakened'] += 1
+ logger.info(f"\033[1;34m[连接减弱]\033[0m {details}")
+ elif change_type == 2:
+ edge_changes['removed'] += 1
+ logger.info(f"\033[1;31m[连接移除]\033[0m {details}")
+
+ # 检查并遗忘话题
+ print("\n开始检查节点...")
+ for node in nodes_to_check:
+ changed, change_type, details = self.forget_topic(node)
+ if changed:
+ if change_type == 1:
+ node_changes['reduced'] += 1
+ logger.info(f"\033[1;33m[记忆减少]\033[0m {details}")
+ elif change_type == 2:
+ node_changes['removed'] += 1
+ logger.info(f"\033[1;31m[节点移除]\033[0m {details}")
+
+ # 同步到数据库
+ if any(count > 0 for count in edge_changes.values()) or any(count > 0 for count in node_changes.values()):
+ self.memory_cortex.sync_memory_to_db()
+ print("\n遗忘操作统计:")
+ print(f"连接变化: {edge_changes['weakened']} 个减弱, {edge_changes['removed']} 个移除")
+ print(f"节点变化: {node_changes['reduced']} 个减少记忆, {node_changes['removed']} 个移除")
+ else:
+ print("\n本次检查没有节点或连接满足遗忘条件")
+
+ async def merge_memory(self, topic):
+ """
+ 对指定话题的记忆进行合并压缩
+
+ Args:
+ topic: 要合并的话题节点
+ """
+ # 获取节点的记忆项
+ memory_items = self.memory_graph.G.nodes[topic].get('memory_items', [])
+ if not isinstance(memory_items, list):
+ memory_items = [memory_items] if memory_items else []
+
+ # 如果记忆项不足,直接返回
+ if len(memory_items) < 10:
+ return
+
+ # 随机选择10条记忆
+ selected_memories = random.sample(memory_items, 10)
+
+ # 拼接成文本
+ merged_text = "\n".join(selected_memories)
+ print(f"\n[合并记忆] 话题: {topic}")
+ print(f"选择的记忆:\n{merged_text}")
+
+ # 使用memory_compress生成新的压缩记忆
+ compressed_memories, _ = await self.memory_compress(selected_memories, 0.1)
+
+ # 从原记忆列表中移除被选中的记忆
+ for memory in selected_memories:
+ memory_items.remove(memory)
+
+ # 添加新的压缩记忆
+ for _, compressed_memory in compressed_memories:
+ memory_items.append(compressed_memory)
+ print(f"添加压缩记忆: {compressed_memory}")
+
+ # 更新节点的记忆项
+ self.memory_graph.G.nodes[topic]['memory_items'] = memory_items
+ print(f"完成记忆合并,当前记忆数量: {len(memory_items)}")
+
+ async def operation_merge_memory(self, percentage=0.1):
+ """
+ 随机检查一定比例的节点,对内容数量超过100的节点进行记忆合并
+
+ Args:
+ percentage: 要检查的节点比例,默认为0.1(10%)
+ """
+ # 获取所有节点
+ all_nodes = list(self.memory_graph.G.nodes())
+ # 计算要检查的节点数量
+ check_count = max(1, int(len(all_nodes) * percentage))
+ # 随机选择节点
+ nodes_to_check = random.sample(all_nodes, check_count)
+
+ merged_nodes = []
+ for node in nodes_to_check:
+ # 获取节点的内容条数
+ memory_items = self.memory_graph.G.nodes[node].get('memory_items', [])
+ if not isinstance(memory_items, list):
+ memory_items = [memory_items] if memory_items else []
+ content_count = len(memory_items)
+
+ # 如果内容数量超过100,进行合并
+ if content_count > 100:
+ print(f"\n检查节点: {node}, 当前记忆数量: {content_count}")
+ await self.merge_memory(node)
+ merged_nodes.append(node)
+
+ # 同步到数据库
+ if merged_nodes:
+ self.memory_cortex.sync_memory_to_db()
+ print(f"\n完成记忆合并操作,共处理 {len(merged_nodes)} 个节点")
+ else:
+ print("\n本次检查没有需要合并的节点")
+
+ async def _identify_topics(self, text: str) -> list:
+ """从文本中识别可能的主题"""
+ topics_response = self.llm_model_get_topic.generate_response(self.find_topic_llm(text, 5))
+ topics = [topic.strip() for topic in topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()]
+ return topics
+
+ def _find_similar_topics(self, topics: list, similarity_threshold: float = 0.4, debug_info: str = "") -> list:
+ """查找与给定主题相似的记忆主题"""
+ all_memory_topics = list(self.memory_graph.G.nodes())
+ all_similar_topics = []
+
+ for topic in topics:
+ if debug_info:
+ pass
+
+ topic_vector = text_to_vector(topic)
+ has_similar_topic = False
+
+ for memory_topic in all_memory_topics:
+ memory_vector = text_to_vector(memory_topic)
+ all_words = set(topic_vector.keys()) | set(memory_vector.keys())
+ v1 = [topic_vector.get(word, 0) for word in all_words]
+ v2 = [memory_vector.get(word, 0) for word in all_words]
+ similarity = cosine_similarity(v1, v2)
+
+ if similarity >= similarity_threshold:
+ has_similar_topic = True
+ all_similar_topics.append((memory_topic, similarity))
+
+ return all_similar_topics
+
+ def _get_top_topics(self, similar_topics: list, max_topics: int = 5) -> list:
+ """获取相似度最高的主题"""
+ seen_topics = set()
+ top_topics = []
+
+ for topic, score in sorted(similar_topics, key=lambda x: x[1], reverse=True):
+ if topic not in seen_topics and len(top_topics) < max_topics:
+ seen_topics.add(topic)
+ top_topics.append((topic, score))
+
+ return top_topics
+
+ async def memory_activate_value(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.3) -> int:
+ """计算输入文本对记忆的激活程度"""
+ logger.info(f"[记忆激活]识别主题: {await self._identify_topics(text)}")
+
+ identified_topics = await self._identify_topics(text)
+ if not identified_topics:
+ return 0
+
+ all_similar_topics = self._find_similar_topics(
+ identified_topics,
+ similarity_threshold=similarity_threshold,
+ debug_info="记忆激活"
+ )
+
+ if not all_similar_topics:
+ return 0
+
+ top_topics = self._get_top_topics(all_similar_topics, max_topics)
+
+ if len(top_topics) == 1:
+ topic, score = top_topics[0]
+ memory_items = self.memory_graph.G.nodes[topic].get('memory_items', [])
+ if not isinstance(memory_items, list):
+ memory_items = [memory_items] if memory_items else []
+ content_count = len(memory_items)
+ penalty = 1.0 / (1 + math.log(content_count + 1))
+
+ activation = int(score * 50 * penalty)
+ print(f"\033[1;32m[记忆激活]\033[0m 单主题「{topic}」- 相似度: {score:.3f}, 内容数: {content_count}, 激活值: {activation}")
+ return activation
+
+ matched_topics = set()
+ topic_similarities = {}
+
+ for memory_topic, similarity in top_topics:
+ memory_items = self.memory_graph.G.nodes[memory_topic].get('memory_items', [])
+ if not isinstance(memory_items, list):
+ memory_items = [memory_items] if memory_items else []
+ content_count = len(memory_items)
+ penalty = 1.0 / (1 + math.log(content_count + 1))
+
+ for input_topic in identified_topics:
+ topic_vector = text_to_vector(input_topic)
+ memory_vector = text_to_vector(memory_topic)
+ all_words = set(topic_vector.keys()) | set(memory_vector.keys())
+ v1 = [topic_vector.get(word, 0) for word in all_words]
+ v2 = [memory_vector.get(word, 0) for word in all_words]
+ sim = cosine_similarity(v1, v2)
+ if sim >= similarity_threshold:
+ matched_topics.add(input_topic)
+ adjusted_sim = sim * penalty
+ topic_similarities[input_topic] = max(topic_similarities.get(input_topic, 0), adjusted_sim)
+ print(f"\033[1;32m[记忆激活]\033[0m 主题「{input_topic}」-> 「{memory_topic}」(内容数: {content_count}, 相似度: {adjusted_sim:.3f})")
+
+ topic_match = len(matched_topics) / len(identified_topics)
+ average_similarities = sum(topic_similarities.values()) / len(topic_similarities) if topic_similarities else 0
+
+ activation = int((topic_match + average_similarities) / 2 * 100)
+ print(f"\033[1;32m[记忆激活]\033[0m 匹配率: {topic_match:.3f}, 平均相似度: {average_similarities:.3f}, 激活值: {activation}")
+
+ return activation
+
+ async def get_relevant_memories(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.4, max_memory_num: int = 5) -> list:
+ """根据输入文本获取相关的记忆内容"""
+ identified_topics = await self._identify_topics(text)
+
+ all_similar_topics = self._find_similar_topics(
+ identified_topics,
+ similarity_threshold=similarity_threshold,
+ debug_info="记忆检索"
+ )
+
+ relevant_topics = self._get_top_topics(all_similar_topics, max_topics)
+
+ relevant_memories = []
+ for topic, score in relevant_topics:
+ first_layer, _ = self.memory_graph.get_related_item(topic, depth=1)
+ if first_layer:
+ if len(first_layer) > max_memory_num/2:
+ first_layer = random.sample(first_layer, max_memory_num//2)
+ for memory in first_layer:
+ relevant_memories.append({
+ 'topic': topic,
+ 'similarity': score,
+ 'content': memory
+ })
+
+ relevant_memories.sort(key=lambda x: x['similarity'], reverse=True)
+
+ if len(relevant_memories) > max_memory_num:
+ relevant_memories = random.sample(relevant_memories, max_memory_num)
+
+ return relevant_memories
+
+ def find_topic_llm(self,text, topic_num):
+ prompt = f'这是一段文字:{text}。请你从这段话中总结出{topic_num}个关键的概念,可以是名词,动词,或者特定人物,帮我列出来,用逗号,隔开,尽可能精简。只需要列举{topic_num}个话题就好,不要有序号,不要告诉我其他内容。'
+ return prompt
+
+ def topic_what(self,text, topic, time_info):
+ prompt = f'这是一段文字,{time_info}:{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好'
+ return prompt
+
+def segment_text(text):
+ """使用jieba进行文本分词"""
+ seg_text = list(jieba.cut(text))
+ return seg_text
+
+def text_to_vector(text):
+ """将文本转换为词频向量"""
+ words = segment_text(text)
+ vector = {}
+ for word in words:
+ vector[word] = vector.get(word, 0) + 1
+ return vector
+
+def cosine_similarity(v1, v2):
+ """计算两个向量的余弦相似度"""
+ dot_product = sum(a * b for a, b in zip(v1, v2))
+ norm1 = math.sqrt(sum(a * a for a in v1))
+ norm2 = math.sqrt(sum(b * b for b in v2))
+ if norm1 == 0 or norm2 == 0:
+ return 0
+ return dot_product / (norm1 * norm2)
+
+def visualize_graph_lite(memory_graph: Memory_graph, color_by_memory: bool = False):
+ # 设置中文字体
+ plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
+ plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
+
+ G = memory_graph.G
+
+ # 创建一个新图用于可视化
+ H = G.copy()
+
+ # 过滤掉内容数量小于2的节点
+ nodes_to_remove = []
+ for node in H.nodes():
+ memory_items = H.nodes[node].get('memory_items', [])
+ memory_count = len(memory_items) if isinstance(memory_items, list) else (1 if memory_items else 0)
+ if memory_count < 2:
+ nodes_to_remove.append(node)
+
+ H.remove_nodes_from(nodes_to_remove)
+
+ # 如果没有符合条件的节点,直接返回
+ if len(H.nodes()) == 0:
+ print("没有找到内容数量大于等于2的节点")
+ return
+
+ # 计算节点大小和颜色
+ node_colors = []
+ node_sizes = []
+ nodes = list(H.nodes())
+
+ # 获取最大记忆数用于归一化节点大小
+ max_memories = 1
+ for node in nodes:
+ memory_items = H.nodes[node].get('memory_items', [])
+ memory_count = len(memory_items) if isinstance(memory_items, list) else (1 if memory_items else 0)
+ max_memories = max(max_memories, memory_count)
+
+ # 计算每个节点的大小和颜色
+ for node in nodes:
+ # 计算节点大小(基于记忆数量)
+ memory_items = H.nodes[node].get('memory_items', [])
+ memory_count = len(memory_items) if isinstance(memory_items, list) else (1 if memory_items else 0)
+ # 使用指数函数使变化更明显
+ ratio = memory_count / max_memories
+ size = 400 + 2000 * (ratio ** 2) # 增大节点大小
+ node_sizes.append(size)
+
+ # 计算节点颜色(基于连接数)
+ degree = H.degree(node)
+ if degree >= 30:
+ node_colors.append((1.0, 0, 0)) # 亮红色 (#FF0000)
+ else:
+ # 将1-10映射到0-1的范围
+ color_ratio = (degree - 1) / 29.0 if degree > 1 else 0
+ # 使用蓝到红的渐变
+ red = min(0.9, color_ratio)
+ blue = max(0.0, 1.0 - color_ratio)
+ node_colors.append((red, 0, blue))
+
+ # 绘制图形
+ plt.figure(figsize=(16, 12)) # 减小图形尺寸
+ pos = nx.spring_layout(H,
+ k=1, # 调整节点间斥力
+ iterations=100, # 增加迭代次数
+ scale=1.5, # 减小布局尺寸
+ weight='strength') # 使用边的strength属性作为权重
+
+ nx.draw(H, pos,
+ with_labels=True,
+ node_color=node_colors,
+ node_size=node_sizes,
+ font_size=12, # 保持增大的字体大小
+ font_family='SimHei',
+ font_weight='bold',
+ edge_color='gray',
+ width=1.5) # 统一的边宽度
+
+ title = '记忆图谱可视化(仅显示内容≥2的节点)\n节点大小表示记忆数量\n节点颜色:蓝(弱连接)到红(强连接)渐变,边的透明度表示连接强度\n连接强度越大的节点距离越近'
+ plt.title(title, fontsize=16, fontfamily='SimHei')
+ plt.show()
+
+async def main():
+ # 初始化数据库
+ logger.info("正在初始化数据库连接...")
+ db = Database.get_instance()
+ start_time = time.time()
+
+ test_pare = {'do_build_memory':True,'do_forget_topic':False,'do_visualize_graph':True,'do_query':False,'do_merge_memory':False}
+
+ # 创建记忆图
+ memory_graph = Memory_graph()
+
+ # 创建海马体
+ hippocampus = Hippocampus(memory_graph)
+
+ # 从数据库同步数据
+ hippocampus.memory_cortex.sync_memory_from_db()
+
+ end_time = time.time()
+ logger.info(f"\033[32m[加载海马体耗时: {end_time - start_time:.2f} 秒]\033[0m")
+
+ # 构建记忆
+ if test_pare['do_build_memory']:
+ logger.info("开始构建记忆...")
+ chat_size = 20
+ await hippocampus.operation_build_memory(chat_size=chat_size)
+
+ end_time = time.time()
+ logger.info(f"\033[32m[构建记忆耗时: {end_time - start_time:.2f} 秒,chat_size={chat_size},chat_count = 16]\033[0m")
+
+ if test_pare['do_forget_topic']:
+ logger.info("开始遗忘记忆...")
+ await hippocampus.operation_forget_topic(percentage=0.01)
+
+ end_time = time.time()
+ logger.info(f"\033[32m[遗忘记忆耗时: {end_time - start_time:.2f} 秒]\033[0m")
+
+ if test_pare['do_merge_memory']:
+ logger.info("开始合并记忆...")
+ await hippocampus.operation_merge_memory(percentage=0.1)
+
+ end_time = time.time()
+ logger.info(f"\033[32m[合并记忆耗时: {end_time - start_time:.2f} 秒]\033[0m")
+
+ if test_pare['do_visualize_graph']:
+ # 展示优化后的图形
+ logger.info("生成记忆图谱可视化...")
+ print("\n生成优化后的记忆图谱:")
+ visualize_graph_lite(memory_graph)
+
+ if test_pare['do_query']:
+ # 交互式查询
+ while True:
+ query = input("\n请输入新的查询概念(输入'退出'以结束):")
+ if query.lower() == '退出':
+ break
+
+ items_list = memory_graph.get_related_item(query)
+ if items_list:
+ first_layer, second_layer = items_list
+ if first_layer:
+ print("\n直接相关的记忆:")
+ for item in first_layer:
+ print(f"- {item}")
+ if second_layer:
+ print("\n间接相关的记忆:")
+ for item in second_layer:
+ print(f"- {item}")
+ else:
+ print("未找到相关记忆。")
+
+
+if __name__ == "__main__":
+ import asyncio
+ asyncio.run(main())
+
+
diff --git a/src/plugins/models/utils_model.py b/src/plugins/models/utils_model.py
index 56ed80693..c6ed6b619 100644
--- a/src/plugins/models/utils_model.py
+++ b/src/plugins/models/utils_model.py
@@ -24,14 +24,15 @@ class LLM_request:
self.api_key = getattr(config, model["key"])
self.base_url = getattr(config, model["base_url"])
except AttributeError as e:
+ logger.error(f"原始 model dict 信息:{model}")
logger.error(f"配置错误:找不到对应的配置项 - {str(e)}")
raise ValueError(f"配置错误:找不到对应的配置项 - {str(e)}") from e
self.model_name = model["name"]
self.params = kwargs
-
+
self.pri_in = model.get("pri_in", 0)
self.pri_out = model.get("pri_out", 0)
-
+
# 获取数据库实例
self.db = Database.get_instance()
self._init_database()
@@ -44,12 +45,12 @@ class LLM_request:
self.db.db.llm_usage.create_index([("model_name", 1)])
self.db.db.llm_usage.create_index([("user_id", 1)])
self.db.db.llm_usage.create_index([("request_type", 1)])
- except Exception as e:
- logger.error(f"创建数据库索引失败: {e}")
+ except Exception:
+ logger.error("创建数据库索引失败")
- def _record_usage(self, prompt_tokens: int, completion_tokens: int, total_tokens: int,
- user_id: str = "system", request_type: str = "chat",
- endpoint: str = "/chat/completions"):
+ def _record_usage(self, prompt_tokens: int, completion_tokens: int, total_tokens: int,
+ user_id: str = "system", request_type: str = "chat",
+ endpoint: str = "/chat/completions"):
"""记录模型使用情况到数据库
Args:
prompt_tokens: 输入token数
@@ -79,8 +80,8 @@ class LLM_request:
f"提示词: {prompt_tokens}, 完成: {completion_tokens}, "
f"总计: {total_tokens}"
)
- except Exception as e:
- logger.error(f"记录token使用情况失败: {e}")
+ except Exception:
+ logger.error("记录token使用情况失败")
def _calculate_cost(self, prompt_tokens: int, completion_tokens: int) -> float:
"""计算API调用成本
@@ -140,12 +141,12 @@ class LLM_request:
}
api_url = f"{self.base_url.rstrip('/')}/{endpoint.lstrip('/')}"
- #判断是否为流式
+ # 判断是否为流式
stream_mode = self.params.get("stream", False)
if self.params.get("stream", False) is True:
- logger.info(f"进入流式输出模式,发送请求到URL: {api_url}")
+ logger.debug(f"进入流式输出模式,发送请求到URL: {api_url}")
else:
- logger.info(f"发送请求到URL: {api_url}")
+ logger.debug(f"发送请求到URL: {api_url}")
logger.info(f"使用模型: {self.model_name}")
# 构建请求体
@@ -158,7 +159,7 @@ class LLM_request:
try:
# 使用上下文管理器处理会话
headers = await self._build_headers()
- #似乎是openai流式必须要的东西,不过阿里云的qwq-plus加了这个没有影响
+ # 似乎是openai流式必须要的东西,不过阿里云的qwq-plus加了这个没有影响
if stream_mode:
headers["Accept"] = "text/event-stream"
@@ -182,11 +183,33 @@ class LLM_request:
continue
elif response.status in policy["abort_codes"]:
logger.error(f"错误码: {response.status} - {error_code_mapping.get(response.status)}")
+ if response.status == 403:
+ # 尝试降级Pro模型
+ if self.model_name.startswith(
+ "Pro/") and self.base_url == "https://api.siliconflow.cn/v1/":
+ old_model_name = self.model_name
+ self.model_name = self.model_name[4:] # 移除"Pro/"前缀
+ logger.warning(f"检测到403错误,模型从 {old_model_name} 降级为 {self.model_name}")
+
+ # 对全局配置进行更新
+ if hasattr(global_config, 'llm_normal') and global_config.llm_normal.get(
+ 'name') == old_model_name:
+ global_config.llm_normal['name'] = self.model_name
+ logger.warning("已将全局配置中的 llm_normal 模型降级")
+
+ # 更新payload中的模型名
+ if payload and 'model' in payload:
+ payload['model'] = self.model_name
+
+ # 重新尝试请求
+ retry -= 1 # 不计入重试次数
+ continue
+
raise RuntimeError(f"请求被拒绝: {error_code_mapping.get(response.status)}")
-
+
response.raise_for_status()
-
- #将流式输出转化为非流式输出
+
+ # 将流式输出转化为非流式输出
if stream_mode:
accumulated_content = ""
async for line_bytes in response.content:
@@ -204,8 +227,8 @@ class LLM_request:
if delta_content is None:
delta_content = ""
accumulated_content += delta_content
- except Exception as e:
- logger.error(f"解析流式输出错误: {e}")
+ except Exception:
+ logger.exception("解析流式输出错")
content = accumulated_content
reasoning_content = ""
think_match = re.search(r'(.*?)', content, re.DOTALL)
@@ -213,12 +236,15 @@ class LLM_request:
reasoning_content = think_match.group(1).strip()
content = re.sub(r'.*?', '', content, flags=re.DOTALL).strip()
# 构造一个伪result以便调用自定义响应处理器或默认处理器
- result = {"choices": [{"message": {"content": content, "reasoning_content": reasoning_content}}]}
- return response_handler(result) if response_handler else self._default_response_handler(result, user_id, request_type, endpoint)
+ result = {
+ "choices": [{"message": {"content": content, "reasoning_content": reasoning_content}}]}
+ return response_handler(result) if response_handler else self._default_response_handler(
+ result, user_id, request_type, endpoint)
else:
result = await response.json()
# 使用自定义处理器或默认处理
- return response_handler(result) if response_handler else self._default_response_handler(result, user_id, request_type, endpoint)
+ return response_handler(result) if response_handler else self._default_response_handler(
+ result, user_id, request_type, endpoint)
except Exception as e:
if retry < policy["max_retries"] - 1:
@@ -232,8 +258,8 @@ class LLM_request:
logger.error("达到最大重试次数,请求仍然失败")
raise RuntimeError("达到最大重试次数,API请求仍然失败")
-
- async def _transform_parameters(self, params: dict) ->dict:
+
+ async def _transform_parameters(self, params: dict) -> dict:
"""
根据模型名称转换参数:
- 对于需要转换的OpenAI CoT系列模型(例如 "o3-mini"),删除 'temprature' 参数,
@@ -242,7 +268,8 @@ class LLM_request:
# 复制一份参数,避免直接修改原始数据
new_params = dict(params)
# 定义需要转换的模型列表
- models_needing_transformation = ["o3-mini", "o1-mini", "o1-preview", "o1-2024-12-17", "o1-preview-2024-09-12", "o3-mini-2025-01-31", "o1-mini-2024-09-12"]
+ models_needing_transformation = ["o3-mini", "o1-mini", "o1-preview", "o1-2024-12-17", "o1-preview-2024-09-12",
+ "o3-mini-2025-01-31", "o1-mini-2024-09-12"]
if self.model_name.lower() in models_needing_transformation:
# 删除 'temprature' 参数(如果存在)
new_params.pop("temperature", None)
@@ -278,13 +305,13 @@ class LLM_request:
**params_copy
}
# 如果 payload 中依然存在 max_tokens 且需要转换,在这里进行再次检查
- if self.model_name.lower() in ["o3-mini", "o1-mini", "o1-preview", "o1-2024-12-17", "o1-preview-2024-09-12", "o3-mini-2025-01-31", "o1-mini-2024-09-12"] and "max_tokens" in payload:
+ if self.model_name.lower() in ["o3-mini", "o1-mini", "o1-preview", "o1-2024-12-17", "o1-preview-2024-09-12",
+ "o3-mini-2025-01-31", "o1-mini-2024-09-12"] and "max_tokens" in payload:
payload["max_completion_tokens"] = payload.pop("max_tokens")
return payload
-
- def _default_response_handler(self, result: dict, user_id: str = "system",
- request_type: str = "chat", endpoint: str = "/chat/completions") -> Tuple:
+ def _default_response_handler(self, result: dict, user_id: str = "system",
+ request_type: str = "chat", endpoint: str = "/chat/completions") -> Tuple:
"""默认响应解析"""
if "choices" in result and result["choices"]:
message = result["choices"][0]["message"]
@@ -329,15 +356,15 @@ class LLM_request:
"""构建请求头"""
if no_key:
return {
- "Authorization": f"Bearer **********",
+ "Authorization": "Bearer **********",
"Content-Type": "application/json"
}
else:
return {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
- }
- # 防止小朋友们截图自己的key
+ }
+ # 防止小朋友们截图自己的key
async def generate_response(self, prompt: str) -> Tuple[str, str]:
"""根据输入的提示生成模型的异步响应"""
@@ -384,6 +411,7 @@ class LLM_request:
Returns:
list: embedding向量,如果失败则返回None
"""
+
def embedding_handler(result):
"""处理响应"""
if "data" in result and len(result["data"]) > 0:
diff --git a/src/plugins/moods/moods.py b/src/plugins/moods/moods.py
index 32b900b0b..c37bfc81d 100644
--- a/src/plugins/moods/moods.py
+++ b/src/plugins/moods/moods.py
@@ -4,7 +4,7 @@ import time
from dataclasses import dataclass
from ..chat.config import global_config
-
+from loguru import logger
@dataclass
class MoodState:
@@ -51,11 +51,11 @@ class MoodManager:
# 情绪词映射表 (valence, arousal)
self.emotion_map = {
'happy': (0.8, 0.6), # 高愉悦度,中等唤醒度
- 'angry': (-0.7, 0.8), # 负愉悦度,高唤醒度
+ 'angry': (-0.7, 0.7), # 负愉悦度,高唤醒度
'sad': (-0.6, 0.3), # 负愉悦度,低唤醒度
- 'surprised': (0.4, 0.9), # 中等愉悦度,高唤醒度
+ 'surprised': (0.4, 0.8), # 中等愉悦度,高唤醒度
'disgusted': (-0.8, 0.5), # 高负愉悦度,中等唤醒度
- 'fearful': (-0.7, 0.7), # 负愉悦度,高唤醒度
+ 'fearful': (-0.7, 0.6), # 负愉悦度,高唤醒度
'neutral': (0.0, 0.5), # 中性愉悦度,中等唤醒度
}
@@ -64,15 +64,20 @@ class MoodManager:
# 第一象限:高唤醒,正愉悦
(0.5, 0.7): "兴奋",
(0.3, 0.8): "快乐",
+ (0.2, 0.65): "满足",
# 第二象限:高唤醒,负愉悦
(-0.5, 0.7): "愤怒",
(-0.3, 0.8): "焦虑",
+ (-0.2, 0.65): "烦躁",
# 第三象限:低唤醒,负愉悦
(-0.5, 0.3): "悲伤",
- (-0.3, 0.2): "疲倦",
+ (-0.3, 0.35): "疲倦",
+ (-0.4, 0.15): "疲倦",
# 第四象限:低唤醒,正愉悦
- (0.5, 0.3): "放松",
- (0.3, 0.2): "平静"
+ (0.2, 0.45): "平静",
+ (0.3, 0.4): "安宁",
+ (0.5, 0.3): "放松"
+
}
@classmethod
@@ -119,9 +124,13 @@ class MoodManager:
current_time = time.time()
time_diff = current_time - self.last_update
- # 应用衰减公式
- self.current_mood.valence *= math.pow(1 - self.decay_rate_valence, time_diff)
- self.current_mood.arousal *= math.pow(1 - self.decay_rate_arousal, time_diff)
+ # Valence 向中性(0)回归
+ valence_target = 0.0
+ self.current_mood.valence = valence_target + (self.current_mood.valence - valence_target) * math.exp(-self.decay_rate_valence * time_diff)
+
+ # Arousal 向中性(0.5)回归
+ arousal_target = 0.5
+ self.current_mood.arousal = arousal_target + (self.current_mood.arousal - arousal_target) * math.exp(-self.decay_rate_arousal * time_diff)
# 确保值在合理范围内
self.current_mood.valence = max(-1.0, min(1.0, self.current_mood.valence))
@@ -201,7 +210,7 @@ class MoodManager:
def print_mood_status(self) -> None:
"""打印当前情绪状态"""
- print(f"\033[1;35m[情绪状态]\033[0m 愉悦度: {self.current_mood.valence:.2f}, "
+ logger.info(f"[情绪状态]愉悦度: {self.current_mood.valence:.2f}, "
f"唤醒度: {self.current_mood.arousal:.2f}, "
f"心情: {self.current_mood.text}")
diff --git a/src/plugins/schedule/schedule_generator.py b/src/plugins/schedule/schedule_generator.py
index 8a036152c..e280c6bce 100644
--- a/src/plugins/schedule/schedule_generator.py
+++ b/src/plugins/schedule/schedule_generator.py
@@ -13,21 +13,21 @@ from ..models.utils_model import LLM_request
driver = get_driver()
config = driver.config
-
Database.initialize(
- host= config.MONGODB_HOST,
- port= int(config.MONGODB_PORT),
- db_name= config.DATABASE_NAME,
- username= config.MONGODB_USERNAME,
- password= config.MONGODB_PASSWORD,
- auth_source=config.MONGODB_AUTH_SOURCE
- )
+ host=config.MONGODB_HOST,
+ port=int(config.MONGODB_PORT),
+ db_name=config.DATABASE_NAME,
+ username=config.MONGODB_USERNAME,
+ password=config.MONGODB_PASSWORD,
+ auth_source=config.MONGODB_AUTH_SOURCE
+)
+
class ScheduleGenerator:
def __init__(self):
- #根据global_config.llm_normal这一字典配置指定模型
+ # 根据global_config.llm_normal这一字典配置指定模型
# self.llm_scheduler = LLMModel(model = global_config.llm_normal,temperature=0.9)
- self.llm_scheduler = LLM_request(model = global_config.llm_normal,temperature=0.9)
+ self.llm_scheduler = LLM_request(model=global_config.llm_normal, temperature=0.9)
self.db = Database.get_instance()
self.today_schedule_text = ""
self.today_schedule = {}
@@ -35,39 +35,41 @@ class ScheduleGenerator:
self.tomorrow_schedule = {}
self.yesterday_schedule_text = ""
self.yesterday_schedule = {}
-
+
async def initialize(self):
today = datetime.datetime.now()
tomorrow = datetime.datetime.now() + datetime.timedelta(days=1)
yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
-
+
self.today_schedule_text, self.today_schedule = await self.generate_daily_schedule(target_date=today)
- self.tomorrow_schedule_text, self.tomorrow_schedule = await self.generate_daily_schedule(target_date=tomorrow,read_only=True)
- self.yesterday_schedule_text, self.yesterday_schedule = await self.generate_daily_schedule(target_date=yesterday,read_only=True)
-
- async def generate_daily_schedule(self, target_date: datetime.datetime = None,read_only:bool = False) -> Dict[str, str]:
-
+ self.tomorrow_schedule_text, self.tomorrow_schedule = await self.generate_daily_schedule(target_date=tomorrow,
+ read_only=True)
+ self.yesterday_schedule_text, self.yesterday_schedule = await self.generate_daily_schedule(
+ target_date=yesterday, read_only=True)
+
+ async def generate_daily_schedule(self, target_date: datetime.datetime = None, read_only: bool = False) -> Dict[
+ str, str]:
+
date_str = target_date.strftime("%Y-%m-%d")
weekday = target_date.strftime("%A")
-
schedule_text = str
-
+
existing_schedule = self.db.db.schedule.find_one({"date": date_str})
if existing_schedule:
- print(f"{date_str}的日程已存在:")
+ logger.debug(f"{date_str}的日程已存在:")
schedule_text = existing_schedule["schedule"]
# print(self.schedule_text)
- elif read_only == False:
- print(f"{date_str}的日程不存在,准备生成新的日程。")
- prompt = f"""我是{global_config.BOT_NICKNAME},{global_config.PROMPT_SCHEDULE_GEN},请为我生成{date_str}({weekday})的日程安排,包括:"""+\
- """
+ elif not read_only:
+ logger.debug(f"{date_str}的日程不存在,准备生成新的日程。")
+ prompt = f"""我是{global_config.BOT_NICKNAME},{global_config.PROMPT_SCHEDULE_GEN},请为我生成{date_str}({weekday})的日程安排,包括:""" + \
+ """
1. 早上的学习和工作安排
2. 下午的活动和任务
3. 晚上的计划和休息时间
- 请按照时间顺序列出具体时间点和对应的活动,用一个时间点而不是时间段来表示时间,用JSON格式返回日程表,仅返回内容,不要返回注释,时间采用24小时制,格式为{"时间": "活动","时间": "活动",...}。"""
-
+ 请按照时间顺序列出具体时间点和对应的活动,用一个时间点而不是时间段来表示时间,用JSON格式返回日程表,仅返回内容,不要返回注释,不要添加任何markdown或代码块样式,时间采用24小时制,格式为{"时间": "活动","时间": "活动",...}。"""
+
try:
schedule_text, _ = await self.llm_scheduler.generate_response(prompt)
self.db.db.schedule.insert_one({"date": date_str, "schedule": schedule_text})
@@ -76,36 +78,35 @@ class ScheduleGenerator:
schedule_text = "生成日程时出错了"
# print(self.schedule_text)
else:
- print(f"{date_str}的日程不存在。")
+ logger.debug(f"{date_str}的日程不存在。")
schedule_text = "忘了"
- return schedule_text,None
-
+ return schedule_text, None
+
schedule_form = self._parse_schedule(schedule_text)
- return schedule_text,schedule_form
-
+ return schedule_text, schedule_form
+
def _parse_schedule(self, schedule_text: str) -> Union[bool, Dict[str, str]]:
"""解析日程文本,转换为时间和活动的字典"""
- try:
+ try:
schedule_dict = json.loads(schedule_text)
return schedule_dict
- except json.JSONDecodeError as e:
- print(schedule_text)
- print(f"解析日程失败: {str(e)}")
+ except json.JSONDecodeError:
+ logger.exception("解析日程失败: {}".format(schedule_text))
return False
-
+
def _parse_time(self, time_str: str) -> str:
"""解析时间字符串,转换为时间"""
return datetime.datetime.strptime(time_str, "%H:%M")
-
+
def get_current_task(self) -> str:
"""获取当前时间应该进行的任务"""
current_time = datetime.datetime.now().strftime("%H:%M")
-
+
# 找到最接近当前时间的任务
closest_time = None
min_diff = float('inf')
-
+
# 检查今天的日程
if not self.today_schedule:
return "摸鱼"
@@ -114,7 +115,7 @@ class ScheduleGenerator:
if closest_time is None or diff < min_diff:
closest_time = time_str
min_diff = diff
-
+
# 检查昨天的日程中的晚间任务
if self.yesterday_schedule:
for time_str in self.yesterday_schedule.keys():
@@ -125,17 +126,17 @@ class ScheduleGenerator:
closest_time = time_str
min_diff = diff
return closest_time, self.yesterday_schedule[closest_time]
-
+
if closest_time:
return closest_time, self.today_schedule[closest_time]
return "摸鱼"
-
+
def _time_diff(self, time1: str, time2: str) -> int:
"""计算两个时间字符串之间的分钟差"""
- if time1=="24:00":
- time1="23:59"
- if time2=="24:00":
- time2="23:59"
+ if time1 == "24:00":
+ time1 = "23:59"
+ if time2 == "24:00":
+ time2 = "23:59"
t1 = datetime.datetime.strptime(time1, "%H:%M")
t2 = datetime.datetime.strptime(time2, "%H:%M")
diff = int((t2 - t1).total_seconds() / 60)
@@ -146,17 +147,18 @@ class ScheduleGenerator:
diff -= 1440 # 减一天的分钟
# print(f"时间1[{time1}]: 时间2[{time2}],差值[{diff}]分钟")
return diff
-
+
def print_schedule(self):
"""打印完整的日程安排"""
if not self._parse_schedule(self.today_schedule_text):
- print("今日日程有误,将在下次运行时重新生成")
+ logger.warning("今日日程有误,将在下次运行时重新生成")
self.db.db.schedule.delete_one({"date": datetime.datetime.now().strftime("%Y-%m-%d")})
else:
- print("\n=== 今日日程安排 ===")
+ logger.info("=== 今日日程安排 ===")
for time_str, activity in self.today_schedule.items():
- print(f"时间[{time_str}]: 活动[{activity}]")
- print("==================\n")
+ logger.info(f"时间[{time_str}]: 活动[{activity}]")
+ logger.info("==================")
+
# def main():
# # 使用示例
@@ -165,7 +167,7 @@ class ScheduleGenerator:
# scheduler.print_schedule()
# print("\n当前任务:")
# print(scheduler.get_current_task())
-
+
# print("昨天日程:")
# print(scheduler.yesterday_schedule)
# print("今天日程:")
@@ -175,5 +177,5 @@ class ScheduleGenerator:
# if __name__ == "__main__":
# main()
-
+
bot_schedule = ScheduleGenerator()
diff --git a/src/plugins/utils/statistic.py b/src/plugins/utils/statistic.py
index d7248e869..2974389e6 100644
--- a/src/plugins/utils/statistic.py
+++ b/src/plugins/utils/statistic.py
@@ -3,6 +3,7 @@ import time
from collections import defaultdict
from datetime import datetime, timedelta
from typing import Any, Dict
+from loguru import logger
from ...common.database import Database
@@ -153,8 +154,8 @@ class LLMStatistics:
try:
all_stats = self._collect_all_statistics()
self._save_statistics(all_stats)
- except Exception as e:
- print(f"\033[1;31m[错误]\033[0m 统计数据处理失败: {e}")
+ except Exception:
+ logger.exception("统计数据处理失败")
# 等待1分钟
for _ in range(60):
diff --git a/src/plugins/utils/typo_generator.py b/src/plugins/utils/typo_generator.py
index c743ec6ec..aa72c387f 100644
--- a/src/plugins/utils/typo_generator.py
+++ b/src/plugins/utils/typo_generator.py
@@ -284,10 +284,13 @@ class ChineseTypoGenerator:
返回:
typo_sentence: 包含错别字的句子
- typo_info: 错别字信息列表
+ correction_suggestion: 随机选择的一个纠正建议,返回正确的字/词
"""
result = []
typo_info = []
+ word_typos = [] # 记录词语错误对(错词,正确词)
+ char_typos = [] # 记录单字错误对(错字,正确字)
+ current_pos = 0
# 分词
words = self._segment_sentence(sentence)
@@ -296,6 +299,7 @@ class ChineseTypoGenerator:
# 如果是标点符号或空格,直接添加
if all(not self._is_chinese_char(c) for c in word):
result.append(word)
+ current_pos += len(word)
continue
# 获取词语的拼音
@@ -316,6 +320,8 @@ class ChineseTypoGenerator:
' '.join(word_pinyin),
' '.join(self._get_word_pinyin(typo_word)),
orig_freq, typo_freq))
+ word_typos.append((typo_word, word)) # 记录(错词,正确词)对
+ current_pos += len(typo_word)
continue
# 如果不进行整词替换,则进行单字替换
@@ -333,11 +339,15 @@ class ChineseTypoGenerator:
result.append(typo_char)
typo_py = pinyin(typo_char, style=Style.TONE3)[0][0]
typo_info.append((char, typo_char, py, typo_py, orig_freq, typo_freq))
+ char_typos.append((typo_char, char)) # 记录(错字,正确字)对
+ current_pos += 1
continue
result.append(char)
+ current_pos += 1
else:
# 处理多字词的单字替换
word_result = []
+ word_start_pos = current_pos
for i, (char, py) in enumerate(zip(word, word_pinyin)):
# 词中的字替换概率降低
word_error_rate = self.error_rate * (0.7 ** (len(word) - 1))
@@ -353,11 +363,24 @@ class ChineseTypoGenerator:
word_result.append(typo_char)
typo_py = pinyin(typo_char, style=Style.TONE3)[0][0]
typo_info.append((char, typo_char, py, typo_py, orig_freq, typo_freq))
+ char_typos.append((typo_char, char)) # 记录(错字,正确字)对
continue
word_result.append(char)
result.append(''.join(word_result))
+ current_pos += len(word)
- return ''.join(result), typo_info
+ # 优先从词语错误中选择,如果没有则从单字错误中选择
+ correction_suggestion = None
+ # 50%概率返回纠正建议
+ if random.random() < 0.5:
+ if word_typos:
+ wrong_word, correct_word = random.choice(word_typos)
+ correction_suggestion = correct_word
+ elif char_typos:
+ wrong_char, correct_char = random.choice(char_typos)
+ correction_suggestion = correct_char
+
+ return ''.join(result), correction_suggestion
def format_typo_info(self, typo_info):
"""
@@ -419,16 +442,16 @@ def main():
# 创建包含错别字的句子
start_time = time.time()
- typo_sentence, typo_info = typo_generator.create_typo_sentence(sentence)
+ typo_sentence, correction_suggestion = typo_generator.create_typo_sentence(sentence)
# 打印结果
print("\n原句:", sentence)
print("错字版:", typo_sentence)
- # 打印错别字信息
- if typo_info:
- print("\n错别字信息:")
- print(typo_generator.format_typo_info(typo_info))
+ # 打印纠正建议
+ if correction_suggestion:
+ print("\n随机纠正建议:")
+ print(f"应该改为:{correction_suggestion}")
# 计算并打印总耗时
end_time = time.time()
diff --git a/src/test/typo.py b/src/test/typo.py
index 16834200f..1378eae7d 100644
--- a/src/test/typo.py
+++ b/src/test/typo.py
@@ -11,12 +11,14 @@ from pathlib import Path
import random
import math
import time
+from loguru import logger
+
class ChineseTypoGenerator:
- def __init__(self,
- error_rate=0.3,
- min_freq=5,
- tone_error_rate=0.2,
+ def __init__(self,
+ error_rate=0.3,
+ min_freq=5,
+ tone_error_rate=0.2,
word_replace_rate=0.3,
max_freq_diff=200):
"""
@@ -34,27 +36,27 @@ class ChineseTypoGenerator:
self.tone_error_rate = tone_error_rate
self.word_replace_rate = word_replace_rate
self.max_freq_diff = max_freq_diff
-
+
# 加载数据
- print("正在加载汉字数据库,请稍候...")
+ logger.debug("正在加载汉字数据库,请稍候...")
self.pinyin_dict = self._create_pinyin_dict()
self.char_frequency = self._load_or_create_char_frequency()
-
+
def _load_or_create_char_frequency(self):
"""
加载或创建汉字频率字典
"""
cache_file = Path("char_frequency.json")
-
+
# 如果缓存文件存在,直接加载
if cache_file.exists():
with open(cache_file, 'r', encoding='utf-8') as f:
return json.load(f)
-
+
# 使用内置的词频文件
char_freq = defaultdict(int)
dict_path = os.path.join(os.path.dirname(jieba.__file__), 'dict.txt')
-
+
# 读取jieba的词典文件
with open(dict_path, 'r', encoding='utf-8') as f:
for line in f:
@@ -63,15 +65,15 @@ class ChineseTypoGenerator:
for char in word:
if self._is_chinese_char(char):
char_freq[char] += int(freq)
-
+
# 归一化频率值
max_freq = max(char_freq.values())
- normalized_freq = {char: freq/max_freq * 1000 for char, freq in char_freq.items()}
-
+ normalized_freq = {char: freq / max_freq * 1000 for char, freq in char_freq.items()}
+
# 保存到缓存文件
with open(cache_file, 'w', encoding='utf-8') as f:
json.dump(normalized_freq, f, ensure_ascii=False, indent=2)
-
+
return normalized_freq
def _create_pinyin_dict(self):
@@ -81,7 +83,7 @@ class ChineseTypoGenerator:
# 常用汉字范围
chars = [chr(i) for i in range(0x4e00, 0x9fff)]
pinyin_dict = defaultdict(list)
-
+
# 为每个汉字建立拼音映射
for char in chars:
try:
@@ -89,7 +91,7 @@ class ChineseTypoGenerator:
pinyin_dict[py].append(char)
except Exception:
continue
-
+
return pinyin_dict
def _is_chinese_char(self, char):
@@ -107,7 +109,7 @@ class ChineseTypoGenerator:
"""
# 将句子拆分成单个字符
characters = list(sentence)
-
+
# 获取每个字符的拼音
result = []
for char in characters:
@@ -117,7 +119,7 @@ class ChineseTypoGenerator:
# 获取拼音(数字声调)
py = pinyin(char, style=Style.TONE3)[0][0]
result.append((char, py))
-
+
return result
def _get_similar_tone_pinyin(self, py):
@@ -127,19 +129,19 @@ class ChineseTypoGenerator:
# 检查拼音是否为空或无效
if not py or len(py) < 1:
return py
-
+
# 如果最后一个字符不是数字,说明可能是轻声或其他特殊情况
if not py[-1].isdigit():
# 为非数字结尾的拼音添加数字声调1
return py + '1'
-
+
base = py[:-1] # 去掉声调
tone = int(py[-1]) # 获取声调
-
+
# 处理轻声(通常用5表示)或无效声调
if tone not in [1, 2, 3, 4]:
return base + str(random.choice([1, 2, 3, 4]))
-
+
# 正常处理声调
possible_tones = [1, 2, 3, 4]
possible_tones.remove(tone) # 移除原声调
@@ -152,11 +154,11 @@ class ChineseTypoGenerator:
"""
if target_freq > orig_freq:
return 1.0 # 如果替换字频率更高,保持原有概率
-
+
freq_diff = orig_freq - target_freq
if freq_diff > self.max_freq_diff:
return 0.0 # 频率差太大,不替换
-
+
# 使用指数衰减函数计算概率
# 频率差为0时概率为1,频率差为max_freq_diff时概率接近0
return math.exp(-3 * freq_diff / self.max_freq_diff)
@@ -166,42 +168,42 @@ class ChineseTypoGenerator:
获取与给定字频率相近的同音字,可能包含声调错误
"""
homophones = []
-
+
# 有一定概率使用错误声调
if random.random() < self.tone_error_rate:
wrong_tone_py = self._get_similar_tone_pinyin(py)
homophones.extend(self.pinyin_dict[wrong_tone_py])
-
+
# 添加正确声调的同音字
homophones.extend(self.pinyin_dict[py])
-
+
if not homophones:
return None
-
+
# 获取原字的频率
orig_freq = self.char_frequency.get(char, 0)
-
+
# 计算所有同音字与原字的频率差,并过滤掉低频字
- freq_diff = [(h, self.char_frequency.get(h, 0))
- for h in homophones
- if h != char and self.char_frequency.get(h, 0) >= self.min_freq]
-
+ freq_diff = [(h, self.char_frequency.get(h, 0))
+ for h in homophones
+ if h != char and self.char_frequency.get(h, 0) >= self.min_freq]
+
if not freq_diff:
return None
-
+
# 计算每个候选字的替换概率
candidates_with_prob = []
for h, freq in freq_diff:
prob = self._calculate_replacement_probability(orig_freq, freq)
if prob > 0: # 只保留有效概率的候选字
candidates_with_prob.append((h, prob))
-
+
if not candidates_with_prob:
return None
-
+
# 根据概率排序
candidates_with_prob.sort(key=lambda x: x[1], reverse=True)
-
+
# 返回概率最高的几个字
return [char for char, _ in candidates_with_prob[:num_candidates]]
@@ -223,10 +225,10 @@ class ChineseTypoGenerator:
"""
if len(word) == 1:
return []
-
+
# 获取词的拼音
word_pinyin = self._get_word_pinyin(word)
-
+
# 遍历所有可能的同音字组合
candidates = []
for py in word_pinyin:
@@ -234,11 +236,11 @@ class ChineseTypoGenerator:
if not chars:
return []
candidates.append(chars)
-
+
# 生成所有可能的组合
import itertools
all_combinations = itertools.product(*candidates)
-
+
# 获取jieba词典和词频信息
dict_path = os.path.join(os.path.dirname(jieba.__file__), 'dict.txt')
valid_words = {} # 改用字典存储词语及其频率
@@ -249,11 +251,11 @@ class ChineseTypoGenerator:
word_text = parts[0]
word_freq = float(parts[1]) # 获取词频
valid_words[word_text] = word_freq
-
+
# 获取原词的词频作为参考
original_word_freq = valid_words.get(word, 0)
min_word_freq = original_word_freq * 0.1 # 设置最小词频为原词频的10%
-
+
# 过滤和计算频率
homophones = []
for combo in all_combinations:
@@ -268,7 +270,7 @@ class ChineseTypoGenerator:
combined_score = (new_word_freq * 0.7 + char_avg_freq * 0.3)
if combined_score >= self.min_freq:
homophones.append((new_word, combined_score))
-
+
# 按综合分数排序并限制返回数量
sorted_homophones = sorted(homophones, key=lambda x: x[1], reverse=True)
return [word for word, _ in sorted_homophones[:5]] # 限制返回前5个结果
@@ -286,19 +288,19 @@ class ChineseTypoGenerator:
"""
result = []
typo_info = []
-
+
# 分词
words = self._segment_sentence(sentence)
-
+
for word in words:
# 如果是标点符号或空格,直接添加
if all(not self._is_chinese_char(c) for c in word):
result.append(word)
continue
-
+
# 获取词语的拼音
word_pinyin = self._get_word_pinyin(word)
-
+
# 尝试整词替换
if len(word) > 1 and random.random() < self.word_replace_rate:
word_homophones = self._get_word_homophones(word)
@@ -307,15 +309,15 @@ class ChineseTypoGenerator:
# 计算词的平均频率
orig_freq = sum(self.char_frequency.get(c, 0) for c in word) / len(word)
typo_freq = sum(self.char_frequency.get(c, 0) for c in typo_word) / len(typo_word)
-
+
# 添加到结果中
result.append(typo_word)
- typo_info.append((word, typo_word,
- ' '.join(word_pinyin),
- ' '.join(self._get_word_pinyin(typo_word)),
- orig_freq, typo_freq))
+ typo_info.append((word, typo_word,
+ ' '.join(word_pinyin),
+ ' '.join(self._get_word_pinyin(typo_word)),
+ orig_freq, typo_freq))
continue
-
+
# 如果不进行整词替换,则进行单字替换
if len(word) == 1:
char = word
@@ -339,7 +341,7 @@ class ChineseTypoGenerator:
for i, (char, py) in enumerate(zip(word, word_pinyin)):
# 词中的字替换概率降低
word_error_rate = self.error_rate * (0.7 ** (len(word) - 1))
-
+
if random.random() < word_error_rate:
similar_chars = self._get_similar_frequency_chars(char, py)
if similar_chars:
@@ -354,7 +356,7 @@ class ChineseTypoGenerator:
continue
word_result.append(char)
result.append(''.join(word_result))
-
+
return ''.join(result), typo_info
def format_typo_info(self, typo_info):
@@ -369,7 +371,7 @@ class ChineseTypoGenerator:
"""
if not typo_info:
return "未生成错别字"
-
+
result = []
for orig, typo, orig_py, typo_py, orig_freq, typo_freq in typo_info:
# 判断是否为词语替换
@@ -379,12 +381,12 @@ class ChineseTypoGenerator:
else:
tone_error = orig_py[:-1] == typo_py[:-1] and orig_py[-1] != typo_py[-1]
error_type = "声调错误" if tone_error else "同音字替换"
-
+
result.append(f"原文:{orig}({orig_py}) [频率:{orig_freq:.2f}] -> "
- f"替换:{typo}({typo_py}) [频率:{typo_freq:.2f}] [{error_type}]")
-
+ f"替换:{typo}({typo_py}) [频率:{typo_freq:.2f}] [{error_type}]")
+
return "\n".join(result)
-
+
def set_params(self, **kwargs):
"""
设置参数
@@ -399,9 +401,10 @@ class ChineseTypoGenerator:
for key, value in kwargs.items():
if hasattr(self, key):
setattr(self, key, value)
- print(f"参数 {key} 已设置为 {value}")
+ logger.debug(f"参数 {key} 已设置为 {value}")
else:
- print(f"警告: 参数 {key} 不存在")
+ logger.warning(f"警告: 参数 {key} 不存在")
+
def main():
# 创建错别字生成器实例
@@ -411,27 +414,27 @@ def main():
tone_error_rate=0.02,
word_replace_rate=0.3
)
-
+
# 获取用户输入
sentence = input("请输入中文句子:")
-
+
# 创建包含错别字的句子
start_time = time.time()
typo_sentence, typo_info = typo_generator.create_typo_sentence(sentence)
-
+
# 打印结果
- print("\n原句:", sentence)
- print("错字版:", typo_sentence)
-
+ logger.debug("原句:", sentence)
+ logger.debug("错字版:", typo_sentence)
+
# 打印错别字信息
if typo_info:
- print("\n错别字信息:")
- print(typo_generator.format_typo_info(typo_info))
-
+ logger.debug(f"错别字信息:{typo_generator.format_typo_info(typo_info)})")
+
# 计算并打印总耗时
end_time = time.time()
total_time = end_time - start_time
- print(f"\n总耗时:{total_time:.2f}秒")
+ logger.debug(f"总耗时:{total_time:.2f}秒")
+
if __name__ == "__main__":
main()
diff --git a/template/auto_format.py b/template/auto_format.py
deleted file mode 100644
index d99e29e34..000000000
--- a/template/auto_format.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import os
-import sys
-from pathlib import Path
-
-import tomli
-import tomli_w
-
-
-def sync_configs():
- # 读取两个配置文件
- try:
- with open('bot_config_dev.toml', 'rb') as f: # tomli需要使用二进制模式读取
- dev_config = tomli.load(f)
-
- with open('bot_config.toml', 'rb') as f:
- prod_config = tomli.load(f)
- except FileNotFoundError as e:
- print(f"错误:找不到配置文件 - {e}")
- sys.exit(1)
- except tomli.TOMLDecodeError as e:
- print(f"错误:TOML格式解析失败 - {e}")
- sys.exit(1)
-
- # 递归合并配置
- def merge_configs(source, target):
- for key, value in source.items():
- if key not in target:
- target[key] = value
- elif isinstance(value, dict) and isinstance(target[key], dict):
- merge_configs(value, target[key])
-
- # 将dev配置的新属性合并到prod配置中
- merge_configs(dev_config, prod_config)
-
- # 保存更新后的配置
- try:
- with open('bot_config.toml', 'wb') as f: # tomli_w需要使用二进制模式写入
- tomli_w.dump(prod_config, f)
- print("配置文件同步完成!")
- except Exception as e:
- print(f"错误:保存配置文件失败 - {e}")
- sys.exit(1)
-
-if __name__ == '__main__':
- # 确保在正确的目录下运行
- script_dir = Path(__file__).parent
- os.chdir(script_dir)
- sync_configs()
diff --git a/template/bot_config_template.toml b/template/bot_config_template.toml
index 6584bc4d7..126fc501d 100644
--- a/template/bot_config_template.toml
+++ b/template/bot_config_template.toml
@@ -1,6 +1,21 @@
+[inner]
+version = "0.0.6"
+
+#如果你想要修改配置文件,请在修改后将version的值进行变更
+#如果新增项目,请在BotConfig类下新增相应的变量
+#1.如果你修改的是[]层级项目,例如你新增了 [memory],那么请在config.py的 load_config函数中的include_configs字典中新增"内容":{
+#"func":memory,
+#"support":">=0.0.0", #新的版本号
+#"necessary":False #是否必须
+#}
+#2.如果你修改的是[]下的项目,例如你新增了[memory]下的 memory_ban_words ,那么请在config.py的 load_config函数中的 memory函数下新增版本判断:
+ # if config.INNER_VERSION in SpecifierSet(">=0.0.2"):
+ # config.memory_ban_words = set(memory_config.get("memory_ban_words", []))
+
[bot]
qq = 123
nickname = "麦麦"
+alias_names = ["小麦", "阿麦"]
[personality]
prompt_personality = [
@@ -26,6 +41,13 @@ ban_words = [
# "403","张三"
]
+ban_msgs_regex = [
+ # 需要过滤的消息(原始消息)匹配的正则表达式,匹配到的消息将被过滤(支持CQ码),若不了解正则表达式请勿修改
+ #"https?://[^\\s]+", # 匹配https链接
+ #"\\d{4}-\\d{2}-\\d{2}", # 匹配日期
+ # "\\[CQ:at,qq=\\d+\\]" # 匹配@
+]
+
[emoji]
check_interval = 120 # 检查表情包的时间间隔
register_interval = 10 # 注册表情包的时间间隔
@@ -46,11 +68,35 @@ max_response_length = 1024 # 麦麦回答的最大token数
build_memory_interval = 300 # 记忆构建间隔 单位秒
forget_memory_interval = 300 # 记忆遗忘间隔 单位秒
+memory_ban_words = [ #不希望记忆的词
+ # "403","张三"
+]
+
[mood]
mood_update_interval = 1.0 # 情绪更新间隔 单位秒
mood_decay_rate = 0.95 # 情绪衰减率
mood_intensity_factor = 1.0 # 情绪强度因子
+[keywords_reaction] # 针对某个关键词作出反应
+enable = true # 关键词反应功能的总开关
+
+[[keywords_reaction.rules]] # 如果想要新增多个关键词,直接复制本条,修改keywords和reaction即可
+enable = true # 是否启用此条(为了人类在未来AI战争能更好地识别AI(bushi),默认开启)
+keywords = ["人机", "bot", "机器", "入机", "robot", "机器人"] # 会触发反应的关键词
+reaction = "有人说你是人机或者机器人,否定这一事实,攻击或反问他。注意,只有认为你是机器人才回复,否则不要否认" # 触发之后添加的提示词
+
+[[keywords_reaction.rules]] # 就像这样复制
+enable = false # 仅作示例,不会触发
+keywords = ["测试关键词回复","test",""]
+reaction = "回答“测试成功”"
+
+[chinese_typo]
+enable = true # 是否启用中文错别字生成器
+error_rate=0.006 # 单字替换概率
+min_freq=7 # 最小字频阈值
+tone_error_rate=0.2 # 声调错误概率
+word_replace_rate=0.006 # 整词替换概率
+
[others]
enable_advance_output = true # 是否启用高级输出
enable_kuuki_read = true # 是否启用读空气功能
@@ -80,49 +126,42 @@ ban_user_id = [] #禁止回复消息的QQ号
[model.llm_reasoning] #回复模型1 主要回复模型
name = "Pro/deepseek-ai/DeepSeek-R1"
-base_url = "SILICONFLOW_BASE_URL"
-key = "SILICONFLOW_KEY"
+provider = "SILICONFLOW"
pri_in = 0 #模型的输入价格(非必填,可以记录消耗)
pri_out = 0 #模型的输出价格(非必填,可以记录消耗)
+
[model.llm_reasoning_minor] #回复模型3 次要回复模型
name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
-base_url = "SILICONFLOW_BASE_URL"
-key = "SILICONFLOW_KEY"
+provider = "SILICONFLOW"
#非推理模型
[model.llm_normal] #V3 回复模型2 次要回复模型
name = "Pro/deepseek-ai/DeepSeek-V3"
-base_url = "SILICONFLOW_BASE_URL"
-key = "SILICONFLOW_KEY"
+provider = "SILICONFLOW"
[model.llm_normal_minor] #V2.5
name = "deepseek-ai/DeepSeek-V2.5"
-base_url = "SILICONFLOW_BASE_URL"
-key = "SILICONFLOW_KEY"
+provider = "SILICONFLOW"
[model.llm_emotion_judge] #主题判断 0.7/m
name = "Qwen/Qwen2.5-14B-Instruct"
-base_url = "SILICONFLOW_BASE_URL"
-key = "SILICONFLOW_KEY"
+provider = "SILICONFLOW"
[model.llm_topic_judge] #主题判断:建议使用qwen2.5 7b
name = "Pro/Qwen/Qwen2.5-7B-Instruct"
-base_url = "SILICONFLOW_BASE_URL"
-key = "SILICONFLOW_KEY"
+provider = "SILICONFLOW"
[model.llm_summary_by_topic] #建议使用qwen2.5 32b 及以上
name = "Qwen/Qwen2.5-32B-Instruct"
-base_url = "SILICONFLOW_BASE_URL"
-key = "SILICONFLOW_KEY"
+provider = "SILICONFLOW"
pri_in = 0
pri_out = 0
[model.moderation] #内容审核 未启用
name = ""
-base_url = "SILICONFLOW_BASE_URL"
-key = "SILICONFLOW_KEY"
+provider = "SILICONFLOW"
pri_in = 0
pri_out = 0
@@ -130,8 +169,7 @@ pri_out = 0
[model.vlm] #图像识别 0.35/m
name = "Pro/Qwen/Qwen2-VL-7B-Instruct"
-base_url = "SILICONFLOW_BASE_URL"
-key = "SILICONFLOW_KEY"
+provider = "SILICONFLOW"
@@ -139,5 +177,4 @@ key = "SILICONFLOW_KEY"
[model.embedding] #嵌入
name = "BAAI/bge-m3"
-base_url = "SILICONFLOW_BASE_URL"
-key = "SILICONFLOW_KEY"
+provider = "SILICONFLOW"