Merge remote-tracking branch 'upstream/debug' into debug

This commit is contained in:
tcmofashi
2025-03-12 08:25:42 +08:00
45 changed files with 1842 additions and 659 deletions

1
.gitignore vendored
View File

@@ -1,4 +1,5 @@
data/
data1/
mongodb/
NapCat.Framework.Windows.Once/
log/

View File

@@ -17,7 +17,11 @@
- MongoDB 提供数据持久化支持
- NapCat 作为QQ协议端支持
**最新版本: v0.5.***
**最新版本: v0.5.13**
> [!WARNING]
> 注意3月12日的v0.5.13, 该版本更新较大,建议单独开文件夹部署,然后转移/data文件 和数据库数据库可能需要删除messages下的内容不需要删除记忆
<div align="center">
<a href="https://www.bilibili.com/video/BV1amAneGE3P" target="_blank">
@@ -40,7 +44,12 @@
- [二群](https://qm.qq.com/q/RzmCiRtHEW) 571780722 (开发和建议相关讨论)不一定有空回复,会优先写文档和代码
- [三群](https://qm.qq.com/q/wlH5eT8OmQ) 1035228475开发和建议相关讨论不一定有空回复会优先写文档和代码
**其他平台版本**
**📚 有热心网友创作的wiki:** https://maimbot.pages.dev/
**😊 其他平台版本**
- (由 [CabLate](https://github.com/cablate) 贡献) [Telegram 与其他平台(未来可能会有)的版本](https://github.com/cablate/MaiMBot/tree/telegram) - [集中讨论串](https://github.com/SengokuCola/MaiMBot/discussions/149)

55
bot.py
View File

@@ -12,6 +12,8 @@ from loguru import logger
from nonebot.adapters.onebot.v11 import Adapter
import platform
from src.common.database import Database
# 获取没有加载env时的环境变量
env_mask = {key: os.getenv(key) for key in os.environ}
@@ -51,19 +53,19 @@ def init_env():
with open(".env", "w") as f:
f.write("ENVIRONMENT=prod")
# 检测.env.prod文件是否存在
if not os.path.exists(".env.prod"):
logger.error("检测到.env.prod文件不存在")
shutil.copy("template.env", "./.env.prod")
# 检测.env.prod文件是否存在
if not os.path.exists(".env.prod"):
logger.error("检测到.env.prod文件不存在")
shutil.copy("template.env", "./.env.prod")
# 检测.env.dev文件是否存在不存在的话直接复制生产环境配置
if not os.path.exists(".env.dev"):
logger.error("检测到.env.dev文件不存在")
shutil.copy("template.env", "./.env.dev")
shutil.copy(".env.prod", "./.env.dev")
# 首先加载基础环境变量.env
if os.path.exists(".env"):
load_dotenv(".env")
load_dotenv(".env",override=True)
logger.success("成功加载基础环境变量配置")
@@ -96,18 +98,39 @@ def load_env():
logger.error(f"ENVIRONMENT 配置错误,请检查 .env 文件中的 ENVIRONMENT 变量及对应 .env.{env} 是否存在")
RuntimeError(f"ENVIRONMENT 配置错误,请检查 .env 文件中的 ENVIRONMENT 变量及对应 .env.{env} 是否存在")
def init_database():
Database.initialize(
uri=os.getenv("MONGODB_URI"),
host=os.getenv("MONGODB_HOST", "127.0.0.1"),
port=int(os.getenv("MONGODB_PORT", "27017")),
db_name=os.getenv("DATABASE_NAME", "MegBot"),
username=os.getenv("MONGODB_USERNAME"),
password=os.getenv("MONGODB_PASSWORD"),
auth_source=os.getenv("MONGODB_AUTH_SOURCE"),
)
def load_logger():
logger.remove() # 移除默认配置
logger.add(
sys.stderr,
format="<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> <fg #777777>|</> <level>{level: <7}</level> <fg "
"#777777>|</> <cyan>{name:.<8}</cyan>:<cyan>{function:.<8}</cyan>:<cyan>{line: >4}</cyan> <fg "
"#777777>-</> <level>{message}</level>",
colorize=True,
level=os.getenv("LOG_LEVEL", "INFO"), # 根据环境设置日志级别默认为INFO
filter=lambda record: "nonebot" not in record["name"]
)
if os.getenv("ENVIRONMENT") == "dev":
logger.add(
sys.stderr,
format="<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> <fg #777777>|</> <level>{level: <7}</level> <fg "
"#777777>|</> <cyan>{name:.<8}</cyan>:<cyan>{function:.<8}</cyan>:<cyan>{line: >4}</cyan> <fg "
"#777777>-</> <level>{message}</level>",
colorize=True,
level=os.getenv("LOG_LEVEL", "DEBUG"), # 根据环境设置日志级别默认为DEBUG
)
else:
logger.add(
sys.stderr,
format="<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> <fg #777777>|</> <level>{level: <7}</level> <fg "
"#777777>|</> <cyan>{name:.<8}</cyan>:<cyan>{function:.<8}</cyan>:<cyan>{line: >4}</cyan> <fg "
"#777777>-</> <level>{message}</level>",
colorize=True,
level=os.getenv("LOG_LEVEL", "INFO"), # 根据环境设置日志级别默认为INFO
filter=lambda record: "nonebot" not in record["name"]
)
@@ -188,6 +211,7 @@ def raw_main():
init_config()
init_env()
load_env()
init_database() # 加载完成环境后初始化database
load_logger()
env_config = {key: os.getenv(key) for key in os.environ}
@@ -213,7 +237,6 @@ def raw_main():
if __name__ == "__main__":
try:
raw_main()

View File

@@ -1,6 +1,84 @@
# Changelog
## [0.5.12] - 2025-3-9
### Added
- 新增了 我是测试
## [0.5.13] - 2025-3-12
AI总结
### 🌟 核心功能增强
#### 记忆系统升级
- 新增了记忆系统的时间戳功能,包括创建时间和最后修改时间
- 新增了记忆图节点和边的时间追踪功能
- 新增了自动补充缺失时间字段的功能
- 新增了记忆遗忘机制,基于时间条件自动遗忘旧记忆
- 优化了记忆系统的数据同步机制
- 优化了记忆系统的数据结构,确保所有数据类型的一致性
#### 私聊功能完善
- 新增了完整的私聊功能支持,包括消息处理和回复
- 新增了聊天流管理器,支持群聊和私聊的上下文管理
- 新增了私聊过滤开关功能
- 优化了关系管理系统,支持跨平台用户关系
#### 消息处理升级
- 新增了消息队列管理系统,支持按时间顺序处理消息
- 新增了消息发送控制器,实现人性化的发送速度和间隔
- 新增了JSON格式分享卡片读取支持
- 新增了Base64格式表情包CQ码支持
- 改进了消息处理流程,支持多种消息类型
### 💻 系统架构优化
#### 配置系统改进
- 新增了配置文件自动更新和版本检测功能
- 新增了配置文件热重载API接口
- 新增了配置文件版本兼容性检查
- 新增了根据不同环境(dev/prod)显示不同级别的日志功能
- 优化了配置文件格式和结构
#### 部署支持扩展
- 新增了Linux系统部署指南
- 新增了Docker部署支持的详细文档
- 新增了NixOS环境支持使用venv方式
- 新增了优雅的shutdown机制
- 优化了Docker部署文档
### 🛠️ 开发体验提升
#### 工具链升级
- 新增了ruff代码格式化和检查工具
- 新增了知识库一键启动脚本
- 新增了自动保存脚本,定期保存聊天记录和关系数据
- 新增了表情包自动获取脚本
- 优化了日志记录使用logger.debug替代print
- 精简了日志输出禁用了Uvicorn/NoneBot默认日志
#### 安全性强化
- 新增了API密钥安全管理机制
- 新增了数据库完整性检查功能
- 新增了表情包文件完整性自动检查
- 新增了异常处理和自动恢复机制
- 优化了安全性检查机制
### 🐛 关键问题修复
#### 系统稳定性
- 修复了systemctl强制停止的问题
- 修复了ENVIRONMENT变量在同一终端下不能被覆盖的问题
- 修复了libc++.so依赖问题
- 修复了数据库索引创建失败的问题
- 修复了MongoDB连接配置相关问题
- 修复了消息队列溢出问题
- 修复了配置文件加载时的版本兼容性问题
#### 功能完善性
- 修复了私聊时产生reply消息的bug
- 修复了回复消息无法识别的问题
- 修复了CQ码解析错误
- 修复了情绪管理器导入问题
- 修复了小名无效的问题
- 修复了表情包发送时的参数缺失问题
- 修复了表情包重复注册问题
- 修复了变量拼写错误问题
### 主要改进方向
1. 提升记忆系统的智能性和可靠性
2. 完善私聊功能的完整生态
3. 优化系统架构和部署便利性
4. 提升开发体验和代码质量
5. 加强系统安全性和稳定性

59
config/auto_update.py Normal file
View File

@@ -0,0 +1,59 @@
import os
import shutil
import tomlkit
from pathlib import Path
def update_config():
# 获取根目录路径
root_dir = Path(__file__).parent.parent
template_dir = root_dir / "template"
config_dir = root_dir / "config"
# 定义文件路径
template_path = template_dir / "bot_config_template.toml"
old_config_path = config_dir / "bot_config.toml"
new_config_path = config_dir / "bot_config.toml"
# 读取旧配置文件
old_config = {}
if old_config_path.exists():
with open(old_config_path, "r", encoding="utf-8") as f:
old_config = tomlkit.load(f)
# 删除旧的配置文件
if old_config_path.exists():
os.remove(old_config_path)
# 复制模板文件到配置目录
shutil.copy2(template_path, new_config_path)
# 读取新配置文件
with open(new_config_path, "r", encoding="utf-8") as f:
new_config = tomlkit.load(f)
# 递归更新配置
def update_dict(target, source):
for key, value in source.items():
# 跳过version字段的更新
if key == "version":
continue
if key in target:
if isinstance(value, dict) and isinstance(target[key], (dict, tomlkit.items.Table)):
update_dict(target[key], value)
else:
try:
# 直接使用tomlkit的item方法创建新值
target[key] = tomlkit.item(value)
except (TypeError, ValueError):
# 如果转换失败,直接赋值
target[key] = value
# 将旧配置的值更新到新配置中
update_dict(new_config, old_config)
# 保存更新后的配置(保留注释和格式)
with open(new_config_path, "w", encoding="utf-8") as f:
f.write(tomlkit.dumps(new_config))
if __name__ == "__main__":
update_config()

View File

@@ -0,0 +1,444 @@
# 面向纯新手的Linux服务器麦麦部署指南
## 你得先有一个服务器
为了能使麦麦在你的电脑关机之后还能运行,你需要一台不间断开机的主机,也就是我们常说的服务器。
华为云、阿里云、腾讯云等等都是在国内可以选择的选择。
你可以去租一台最低配置的就足敷需要了,按月租大概十几块钱就能租到了。
我们假设你已经租好了一台Linux架构的云服务器。我用的是阿里云ubuntu24.04,其他的原理相似。
## 0.我们就从零开始吧
### 网络问题
为访问github相关界面推荐去下一款加速器新手可以试试watttoolkit。
### 安装包下载
#### MongoDB
对于ubuntu24.04 x86来说是这个
https://repo.mongodb.org/apt/ubuntu/dists/noble/mongodb-org/8.0/multiverse/binary-amd64/mongodb-org-server_8.0.5_amd64.deb
如果不是就在这里自行选择对应版本
https://www.mongodb.com/try/download/community-kubernetes-operator
#### Napcat
在这里选择对应版本。
https://github.com/NapNeko/NapCatQQ/releases/tag/v4.6.7
对于ubuntu24.04 x86来说是这个
https://dldir1.qq.com/qqfile/qq/QQNT/ee4bd910/linuxqq_3.2.16-32793_amd64.deb
#### 麦麦
https://github.com/SengokuCola/MaiMBot/archive/refs/tags/0.5.8-alpha.zip
下载这个官方压缩包。
### 路径
我把麦麦相关文件放在了/moi/mai里面你可以凭喜好更改记得适当调整下面涉及到的部分即可。
文件结构:
```
moi
└─ mai
├─ linuxqq_3.2.16-32793_amd64.deb
├─ mongodb-org-server_8.0.5_amd64.deb
└─ bot
└─ MaiMBot-0.5.8-alpha.zip
```
### 网络
你可以在你的服务器控制台网页更改防火墙规则允许6099808027017这几个端口的出入。
## 1.正式开始!
远程连接你的服务器你会看到一个黑框框闪着白方格这就是我们要进行设置的场所——终端了。以下的bash命令都是在这里输入。
## 2. Python的安装
- 导入 Python 的稳定版 PPA
```bash
sudo add-apt-repository ppa:deadsnakes/ppa
```
- 导入 PPA 后,更新 APT 缓存:
```bash
sudo apt update
```
- 在「终端」中执行以下命令来安装 Python 3.12
```bash
sudo apt install python3.12
```
- 验证安装是否成功:
```bash
python3.12 --version
```
- 在「终端」中,执行以下命令安装 pip
```bash
sudo apt install python3-pip
```
- 检查Pip是否安装成功
```bash
pip --version
```
- 安装必要组件
``` bash
sudo apt install python-is-python3
```
## 3.MongoDB的安装
``` bash
cd /moi/mai
```
``` bash
dpkg -i mongodb-org-server_8.0.5_amd64.deb
```
``` bash
mkdir -p /root/data/mongodb/{data,log}
```
## 4.MongoDB的运行
```bash
service mongod start
```
```bash
systemctl status mongod #通过这条指令检查运行状态
```
有需要的话可以把这个服务注册成开机自启
```bash
sudo systemctl enable mongod
```
## 5.napcat的安装
``` bash
curl -o napcat.sh https://nclatest.znin.net/NapNeko/NapCat-Installer/main/script/install.sh && sudo bash napcat.sh
```
上面的不行试试下面的
``` bash
dpkg -i linuxqq_3.2.16-32793_amd64.deb
apt-get install -f
dpkg -i linuxqq_3.2.16-32793_amd64.deb
```
成功的标志是输入``` napcat ```出来炫酷的彩虹色界面
## 6.napcat的运行
此时你就可以根据提示在```napcat```里面登录你的QQ号了。
```bash
napcat start <你的QQ号>
napcat status #检查运行状态
```
然后你就可以登录napcat的webui进行设置了
```http://<你服务器的公网IP>:6099/webui?token=napcat```
第一次是这个后续改了密码之后token就会对应修改。你也可以使用```napcat log <你的QQ号>```来查看webui地址。把里面的```127.0.0.1```改成<你服务器的公网IP>即可。
登录上之后在网络配置界面添加websocket客户端名称随便输一个url改成`ws://127.0.0.1:8080/onebot/v11/ws`保存之后点启用,就大功告成了。
## 7.麦麦的安装
### step 1 安装解压软件
```
sudo apt-get install unzip
```
### step 2 解压文件
```bash
cd /moi/mai/bot # 注意:要切换到压缩包的目录中去
unzip MaiMBot-0.5.8-alpha.zip
```
### step 3 进入虚拟环境安装库
```bash
cd /moi/mai/bot
python -m venv venv
source venv/bin/activate
pip install -r requirements.txt
```
### step 4 试运行
```bash
cd /moi/mai/bot
python -m venv venv
source venv/bin/activate
python bot.py
```
肯定运行不成功,不过你会发现结束之后多了一些文件
```
bot
├─ .env.prod
└─ config
└─ bot_config.toml
```
你要会vim直接在终端里修改也行不过也可以把它们下到本地改好再传上去
### step 5 文件配置
本项目需要配置两个主要文件:
1. `.env.prod` - 配置API服务和系统环境
2. `bot_config.toml` - 配置机器人行为和模型
#### API
你可以注册一个硅基流动的账号通过邀请码注册有14块钱的免费额度https://cloud.siliconflow.cn/i/7Yld7cfg。
#### 在.env.prod中定义API凭证
```
# API凭证配置
SILICONFLOW_KEY=your_key # 硅基流动API密钥
SILICONFLOW_BASE_URL=https://api.siliconflow.cn/v1/ # 硅基流动API地址
DEEP_SEEK_KEY=your_key # DeepSeek API密钥
DEEP_SEEK_BASE_URL=https://api.deepseek.com/v1 # DeepSeek API地址
CHAT_ANY_WHERE_KEY=your_key # ChatAnyWhere API密钥
CHAT_ANY_WHERE_BASE_URL=https://api.chatanywhere.tech/v1 # ChatAnyWhere API地址
```
#### 在bot_config.toml中引用API凭证
```
[model.llm_reasoning]
name = "Pro/deepseek-ai/DeepSeek-R1"
base_url = "SILICONFLOW_BASE_URL" # 引用.env.prod中定义的地址
key = "SILICONFLOW_KEY" # 引用.env.prod中定义的密钥
```
如需切换到其他API服务只需修改引用
```
[model.llm_reasoning]
name = "Pro/deepseek-ai/DeepSeek-R1"
base_url = "DEEP_SEEK_BASE_URL" # 切换为DeepSeek服务
key = "DEEP_SEEK_KEY" # 使用DeepSeek密钥
```
#### 配置文件详解
##### 环境配置文件 (.env.prod)
```
# API配置
SILICONFLOW_KEY=your_key
SILICONFLOW_BASE_URL=https://api.siliconflow.cn/v1/
DEEP_SEEK_KEY=your_key
DEEP_SEEK_BASE_URL=https://api.deepseek.com/v1
CHAT_ANY_WHERE_KEY=your_key
CHAT_ANY_WHERE_BASE_URL=https://api.chatanywhere.tech/v1
# 服务配置
HOST=127.0.0.1 # 如果使用Docker部署需要改成0.0.0.0否则QQ消息无法传入
PORT=8080
# 数据库配置
MONGODB_HOST=127.0.0.1 # 如果使用Docker部署需要改成数据库容器的名字默认是mongodb
MONGODB_PORT=27017
DATABASE_NAME=MegBot
MONGODB_USERNAME = "" # 数据库用户名
MONGODB_PASSWORD = "" # 数据库密码
MONGODB_AUTH_SOURCE = "" # 认证数据库
# 插件配置
PLUGINS=["src2.plugins.chat"]
```
##### 机器人配置文件 (bot_config.toml)
```
[bot]
qq = "机器人QQ号" # 必填
nickname = "麦麦" # 机器人昵称(你希望机器人怎么称呼它自己)
[personality]
prompt_personality = [
"曾经是一个学习地质的女大学生,现在学习心理学和脑科学,你会刷贴吧",
"是一个女大学生,你有黑色头发,你会刷小红书"
]
prompt_schedule = "一个曾经学习地质,现在学习心理学和脑科学的女大学生喜欢刷qq贴吧知乎和小红书"
[message]
min_text_length = 2 # 最小回复长度
max_context_size = 15 # 上下文记忆条数
emoji_chance = 0.2 # 表情使用概率
ban_words = [] # 禁用词列表
[emoji]
auto_save = true # 自动保存表情
enable_check = false # 启用表情审核
check_prompt = "符合公序良俗"
[groups]
talk_allowed = [] # 允许对话的群号
talk_frequency_down = [] # 降低回复频率的群号
ban_user_id = [] # 禁止回复的用户QQ号
[others]
enable_advance_output = true # 启用详细日志
enable_kuuki_read = true # 启用场景理解
# 模型配置
[model.llm_reasoning] # 推理模型
name = "Pro/deepseek-ai/DeepSeek-R1"
base_url = "SILICONFLOW_BASE_URL"
key = "SILICONFLOW_KEY"
[model.llm_reasoning_minor] # 轻量推理模型
name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
base_url = "SILICONFLOW_BASE_URL"
key = "SILICONFLOW_KEY"
[model.llm_normal] # 对话模型
name = "Pro/deepseek-ai/DeepSeek-V3"
base_url = "SILICONFLOW_BASE_URL"
key = "SILICONFLOW_KEY"
[model.llm_normal_minor] # 备用对话模型
name = "deepseek-ai/DeepSeek-V2.5"
base_url = "SILICONFLOW_BASE_URL"
key = "SILICONFLOW_KEY"
[model.vlm] # 图像识别模型
name = "deepseek-ai/deepseek-vl2"
base_url = "SILICONFLOW_BASE_URL"
key = "SILICONFLOW_KEY"
[model.embedding] # 文本向量模型
name = "BAAI/bge-m3"
base_url = "SILICONFLOW_BASE_URL"
key = "SILICONFLOW_KEY"
[topic.llm_topic]
name = "Pro/deepseek-ai/DeepSeek-V3"
base_url = "SILICONFLOW_BASE_URL"
key = "SILICONFLOW_KEY"
```
**step # 6** 运行
现在再运行
```bash
cd /moi/mai/bot
python -m venv venv
source venv/bin/activate
python bot.py
```
应该就能运行成功了。
## 8.事后配置
可是现在还有个问题只要你一关闭终端bot.py就会停止运行。那该怎么办呢我们可以把bot.py注册成服务。
重启服务器打开MongoDB和napcat服务。
新建一个文件,名为`bot.service`,内容如下
```
[Unit]
Description=maimai bot
[Service]
WorkingDirectory=/moi/mai/bot
ExecStart=/moi/mai/bot/venv/bin/python /moi/mai/bot/bot.py
Restart=on-failure
User=root
[Install]
WantedBy=multi-user.target
```
里面的路径视自己的情况更改。
把它放到`/etc/systemd/system`里面。
重新加载 `systemd` 配置:
```bash
sudo systemctl daemon-reload
```
启动服务:
```bash
sudo systemctl start bot.service # 启动服务
sudo systemctl restart bot.service # 或者重启服务
```
检查服务状态:
```bash
sudo systemctl status bot.service
```
现在再关闭终端检查麦麦能不能正常回复QQ信息。如果可以的话就大功告成了
## 9.命令速查
```bash
service mongod start # 启动mongod服务
napcat start <你的QQ号> # 登录napcat
cd /moi/mai/bot # 切换路径
python -m venv venv # 创建虚拟环境
source venv/bin/activate # 激活虚拟环境
sudo systemctl daemon-reload # 重新加载systemd配置
sudo systemctl start bot.service # 启动bot服务
sudo systemctl enable bot.service # 启动bot服务
sudo systemctl status bot.service # 检查bot服务状态
```
```
python bot.py
```

View File

@@ -110,6 +110,57 @@ python3 bot.py
---
### 7⃣ **使用systemctl管理maimbot**
使用以下命令添加服务文件:
```bash
sudo nano /etc/systemd/system/maimbot.service
```
输入以下内容:
`<maimbot_directory>`你的maimbot目录
`<venv_directory>`你的venv环境就是上文创建环境后执行的代码`source maimbot/bin/activate`中source后面的路径的绝对路径
```ini
[Unit]
Description=MaiMbot 麦麦
After=network.target mongod.service
[Service]
Type=simple
WorkingDirectory=<maimbot_directory>
ExecStart=<venv_directory>/python3 bot.py
ExecStop=/bin/kill -2 $MAINPID
Restart=always
RestartSec=10s
[Install]
WantedBy=multi-user.target
```
输入以下命令重新加载systemd
```bash
sudo systemctl daemon-reload
```
启动并设置开机自启:
```bash
sudo systemctl start maimbot
sudo systemctl enable maimbot
```
输入以下命令查看日志:
```bash
sudo journalctl -xeu maimbot
```
---
## **其他组件(可选)**
- 直接运行 knowledge.py生成知识库

56
flake.lock generated
View File

@@ -1,43 +1,21 @@
{
"nodes": {
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1731533236,
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1741196730,
"narHash": "sha256-0Sj6ZKjCpQMfWnN0NURqRCQn2ob7YtXTAOTwCuz7fkA=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "48913d8f9127ea6530a2a2f1bd4daa1b8685d8a3",
"type": "github"
"lastModified": 0,
"narHash": "sha256-nJj8f78AYAxl/zqLiFGXn5Im1qjFKU8yBPKoWEeZN5M=",
"path": "/nix/store/f30jn7l0bf7a01qj029fq55i466vmnkh-source",
"type": "path"
},
"original": {
"owner": "NixOS",
"ref": "nixos-24.11",
"repo": "nixpkgs",
"type": "github"
"id": "nixpkgs",
"type": "indirect"
}
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
"nixpkgs": "nixpkgs",
"utils": "utils"
}
},
"systems": {
@@ -54,6 +32,24 @@
"repo": "default",
"type": "github"
}
},
"utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1731533236,
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
}
},
"root": "root",

View File

@@ -1,62 +1,38 @@
{
description = "MaiMBot Nix Dev Env";
# 本配置仅方便用于开发,但是因为 nb-cli 上游打包中并未包含 nonebot2因此目前本配置并不能用于运行和调试
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11";
flake-utils.url = "github:numtide/flake-utils";
utils.url = "github:numtide/flake-utils";
};
outputs =
{
self,
nixpkgs,
flake-utils,
}:
flake-utils.lib.eachDefaultSystem (
system:
let
pkgs = import nixpkgs {
inherit system;
};
outputs = {
self,
nixpkgs,
utils,
...
}:
utils.lib.eachDefaultSystem (system: let
pkgs = import nixpkgs {inherit system;};
pythonPackages = pkgs.python3Packages;
in {
devShells.default = pkgs.mkShell {
name = "python-venv";
venvDir = "./.venv";
buildInputs = [
pythonPackages.python
pythonPackages.venvShellHook
pythonPackages.numpy
];
pythonEnv = pkgs.python3.withPackages (
ps: with ps; [
ruff
pymongo
python-dotenv
pydantic
jieba
openai
aiohttp
requests
urllib3
numpy
pandas
matplotlib
networkx
python-dateutil
APScheduler
loguru
tomli
customtkinter
colorama
pypinyin
pillow
setuptools
]
);
in
{
devShell = pkgs.mkShell {
buildInputs = [
pythonEnv
pkgs.nb-cli
];
postVenvCreation = ''
unset SOURCE_DATE_EPOCH
pip install -r requirements.txt
'';
shellHook = ''
'';
};
}
);
postShellHook = ''
# allow pip to install wheels
unset SOURCE_DATE_EPOCH
'';
};
});
}

View File

@@ -0,0 +1,141 @@
cbb569e - Create 如果你更新了版本,点我.txt
a91ef7b - 自动升级配置文件脚本
ed18f2e - 新增了知识库一键启动漂亮脚本
80ed568 - fix: 删除print调试代码
c681a82 - 修复小名无效问题
e54038f - fix: 从 nixpkgs 增加 numpy 依赖,以避免出现 libc++.so 找不到的问题
26782c9 - fix: 修复 ENVIRONMENT 变量在同一终端下不能被覆盖的问题
8c34637 - 提高健壮性
2688a96 - close SengokuCola/MaiMBot#225 让麦麦可以正确读取分享卡片
cd16e68 - 修复表情包发送时的缺失参数
b362c35 - feat: 更新 flake.nix ,采用 venv 的方式生成环境nixos用户也可以本机运行项目了
3c8c897 - 屏蔽一个臃肿的debug信息
9d0152a - 修复了合并过程中造成的代码重复
956135c - 添加一些注释
a412741 - 将print变为logger.debug
3180426 - 修复了没有改掉的typo字段
aea3bff - 添加私聊过滤开关,更新config,增加约束
cda6281 - chore: update emoji_manager.py
baed856 - 修正了私聊屏蔽词输出
66a0f18 - 修复了私聊时产生reply消息的bug
3bf5cd6 - feat: 新增运行时重载配置文件;新增根据不同环境(dev;prod)显示不同级别的log
33cd83b - 添加私聊功能
aa41f0d - fix: 放反了
ef8691c - fix: 修改message继承逻辑修复回复消息无法识别
7d017be - fix:模型降级
e1019ad - fix: 修复变量拼写错误并优化代码可读性
c24bb70 - fix: 流式输出模式增加结束判断与token用量记录
60a9376 - 添加logger的debug输出开关,默认为不开启
bfa9a3c - fix: 添加群信息获取的错误处理 (#173)
4cc5c8e - 修正.env.prod和.env.dev的生成
dea14c1 - fix: 模型降级目前只对硅基流动的V3和R1生效
b6edbea - fix: 图片保存路径不正确
01a6fa8 - fix: 删除神秘test
20f009d - 修复systemctl强制停止maimbot的问题
af962c2 - 修复了情绪管理器没有正确导入导致发布出消息
0586700 - 按照Sourcery提供的建议修改systemctl管理指南
e48b32a - 在手动部署教程中增加使用systemctl管理
5760412 - fix: 小修
1c9b0cc - fix: 修复部分cq码解析错误merge
b6867b9 - fix: 统一使用os.getenv获取数据库连接信息避免从config对象获取不存在的值时出现KeyError
5e069f7 - 修复记忆保存时无时间信息的bug
73a3e41 - 修复记忆更新bug
52c93ba - refactor: use Base64 for emoji CQ codes
67f6d7c - fix: 保证能运行的小修改
c32c4fb - refactor: 修改配置文件的版本号
a54ca8c - Merge remote-tracking branch 'upstream/debug' into feat_regix
8cbf9bb - feat: 史上最好的消息流重构和图片管理
9e41c4f - feat: 修改 bot_config 0.0.5 版本的变更日志
eede406 - fix: 修复nonebot无法加载项目的问题
00e02ed - fix: 0.0.5 版本的增加分层控制项
0f99d6a - Update docs/docker_deploy.md
c789074 - feat: 增加ruff依赖
ff65ab8 - feat: 修改默认的ruff配置文件同时消除config的所有不符合规范的地方
bf97013 - feat: 精简日志禁用Uvicorn/NoneBot默认日志启动方式改为显示加载uvicorn以便优雅shutdown
d9a2863 - 优化Docker部署文档更新容器部分
efcf00f - Docker部署文档追加更新部分
a63ce96 - fix: 更新情感判断模型配置(使配置文件里的 llm_emotion_judge 生效)
1294c88 - feat: 增加标准化格式化设置
2e8cd47 - fix: 避免可能出现的日程解析错误
043a724 - 修一下文档跳转,小美化(
e4b8865 - 支持别名,可以用不同名称召唤机器人
7b35ddd - ruff 哥又有新点子
7899e67 - feat: 重构完成开始测试debug
354d6d0 - 记忆系统优化
6cef8fd - 修复时区删去napcat用不到的端口
cd96644 - 添加使用说明
84495f8 - fix
204744c - 修改配置名与修改过滤对象为raw_message
a03b490 - Update README.md
2b2b342 - feat: 增加 ruff 依赖
72a6749 - fix: 修复docker部署时区指定问题
ee579bc - Update README.md
1b611ec - resolve SengokuCola/MaiMBot#167 根据正则表达式过滤消息
6e2ea82 - refractor: 几乎写完了,进入测试阶段
2ffdfef - More
e680405 - fix: typo 'discription'
68b3f57 - Minor Doc Update
312f065 - Create linux_deploy_guide_for_beginners.md
ed505a4 - fix: 使用动态路径替换硬编码的项目路径
8ff7bb6 - docs: 更新文档,修正格式并添加必要的换行符
6e36a56 - feat: 增加 MONGODB_URI 的配置项并将所有env文件的注释单独放在一行python的dotenv有时无法正确处理行内注释
4baa6c6 - feat: 实现MongoDB URI方式连接并统一数据库连接代码。
8a32d18 - feat: 优化willing_manager逻辑增加回复保底概率
c9f1244 - docs: 改进README.md文档格式和排版
e1b484a - docs: 添加CLAUDE.md开发指南文件用于Claude Code
a43f949 - fix: remove duplicate message(CR comments)
fddb641 - fix: 修复错误的空值检测逻辑
8b7876c - fix: 修复没有上传tag的问题
6b4130e - feat: 增加stable-dev分支的打包
052e67b - refactor: 日志打印优化(终于改完了,爽了
a7f9d05 - 修复记忆整理传入格式问题
536bb1d - fix: 更新情感判断模型配置
8d99592 - fix: logger初始化顺序
052802c - refactor: logger promotion
8661d94 - doc: README.md - telegram version information
5746afa - refactor: logger in src\plugins\chat\bot.py
288dbb6 - refactor: logger in src\plugins\chat\__init__.py
8428a06 - fix: memory logger optimization (CR comment)
665c459 - 改进了可视化脚本
6c35704 - fix: 调用了错误的函数
3223153 - feat: 一键脚本新增记忆可视化
3149dd3 - fix: mongodb.zip 无法解压 fix:更换执行命令的方法 fix:当 db 不存在时自动创建 feat: 一键安装完成后启动麦麦
089d6a6 - feat: 针对硅基流动的Pro模型添加了自动降级功能
c4b0917 - 一个记忆可视化小脚本
6a71ea4 - 修复了记忆时间bug,config添加了记忆屏蔽关键词
1b5344f - fix: 优化bot初始化的日志&格式
41aa974 - fix: 优化chat/config.py的日志&格式
980cde7 - fix: 优化scheduler_generator日志&格式
31a5514 - fix: 调整全局logger加载顺序
8baef07 - feat: 添加全局logger初始化设置
5566f17 - refractor: 几乎写完了,进入测试阶段
6a66933 - feat: 添加开发环境.env.dev初始化
411ff1a - feat: 安装 MongoDB Compass
0de9eba - feat: 增加实时更新贡献者列表的功能
f327f45 - fix: 优化src/plugins/chat/__init__.py的import
826daa5 - fix: 当虚拟环境存在时跳过创建
f54de42 - fix: time.tzset 仅在类 Unix 系统可用
47c4990 - fix: 修复docker部署场景下时间错误的问题
e23a371 - docs: 添加 compose 注释
1002822 - docs: 标注 Python 最低版本
564350d - feat: 校验 Python 版本
4cc4482 - docs: 添加傻瓜式脚本
757173a - 带麦麦看了心理医生,让她没那么容易陷入负面情绪
39bb99c - 将错别字生成提取到配置,一句一个错别字太烦了!
fe36847 - feat: 超大型重构
e304dd7 - Update README.md
b7cfe6d - feat: 发布第 0.0.2 版本配置模板
ca929d5 - 补充Docker部署文档
1e97120 - 补充Docker部署文档
25f7052 - fix: 修复兼容性选项和目前第一个版本之间的版本间隙 0.0.0 版,并将所有的直接退出修改为抛出异常
c5bdc4f - 防ipv6炸虽然小概率事件
d86610d - fix: 修复不能加载环境变量的问题
2306ebf - feat: 因为判断临界版本范围比较麻烦,增加 notice 字段,删除原本的判断逻辑(存在故障)
dd09576 - fix: 修复 TypeError: BotConfig.convert_to_specifierset() takes 1 positional argument but 2 were given
18f839b - fix: 修复 missing 1 required positional argument: 'INNER_VERSION'
6adb5ed - 调整一些细节docker部署时可选数据库账密
07f48e9 - fix: 利用filter来过滤环境变量避免直接删除key造成的 RuntimeError: dictionary changed size during iteration
5856074 - fix: 修复无法进行基础设置的问题
32aa032 - feat: 发布 0.0.1 版本的配置文件
edc07ac - feat: 重构配置加载器,增加配置文件版本控制和程序兼容能力
0f492ed - fix: 修复 BASE_URL/KEY 组合检查中被 GPG_KEY 干扰的问题

Binary file not shown.

12
run.py
View File

@@ -128,13 +128,17 @@ if __name__ == "__main__":
)
os.system("cls")
if choice == "1":
install_napcat()
install_mongodb()
confirm = input("首次安装将下载并配置所需组件\n1.确认\n2.取消\n")
if confirm == "1":
install_napcat()
install_mongodb()
else:
print("已取消安装")
elif choice == "2":
run_maimbot()
choice = input("是否启动推理可视化y/N").upper()
choice = input("是否启动推理可视化?(未完善)(y/N").upper()
if choice == "Y":
run_cmd(r"python src\gui\reasoning_gui.py")
choice = input("是否启动记忆可视化y/N").upper()
choice = input("是否启动记忆可视化?(未完善)(y/N").upper()
if choice == "Y":
run_cmd(r"python src/plugins/memory_system/memory_manual_build.py")

View File

@@ -1,7 +1,6 @@
from typing import Optional
from pymongo import MongoClient
from pymongo.database import Database as MongoDatabase
class Database:
_instance: Optional["Database"] = None
@@ -27,7 +26,7 @@ class Database:
else:
# 否则使用无认证连接
self.client = MongoClient(host, port)
self.db = self.client[db_name]
self.db: MongoDatabase = self.client[db_name]
@classmethod
def initialize(
@@ -39,18 +38,18 @@ class Database:
password: Optional[str] = None,
auth_source: Optional[str] = None,
uri: Optional[str] = None,
) -> "Database":
) -> MongoDatabase:
if cls._instance is None:
cls._instance = cls(
host, port, db_name, username, password, auth_source, uri
)
return cls._instance
return cls._instance.db
@classmethod
def get_instance(cls) -> "Database":
def get_instance(cls) -> MongoDatabase:
if cls._instance is None:
raise RuntimeError("Database not initialized")
return cls._instance
return cls._instance.db
#测试用

View File

@@ -46,7 +46,7 @@ class ReasoningGUI:
# 初始化数据库连接
try:
self.db = Database.get_instance().db
self.db = Database.get_instance()
logger.success("数据库连接成功")
except RuntimeError:
logger.warning("数据库未初始化,正在尝试初始化...")
@@ -60,7 +60,7 @@ class ReasoningGUI:
password=os.getenv("MONGODB_PASSWORD"),
auth_source=os.getenv("MONGODB_AUTH_SOURCE"),
)
self.db = Database.get_instance().db
self.db = Database.get_instance()
logger.success("数据库初始化成功")
except Exception:
logger.exception("数据库初始化失败")

View File

@@ -4,7 +4,7 @@ import os
from loguru import logger
from nonebot import get_driver, on_message, require
from nonebot.adapters.onebot.v11 import Bot, GroupMessageEvent, Message, MessageSegment
from nonebot.adapters.onebot.v11 import Bot, GroupMessageEvent, Message, MessageSegment,MessageEvent
from nonebot.typing import T_State
from ...common.database import Database
@@ -32,26 +32,14 @@ _message_manager_started = False
driver = get_driver()
config = driver.config
Database.initialize(
uri=os.getenv("MONGODB_URI"),
host=os.getenv("MONGODB_HOST", "127.0.0.1"),
port=int(os.getenv("MONGODB_PORT", "27017")),
db_name=os.getenv("DATABASE_NAME", "MegBot"),
username=os.getenv("MONGODB_USERNAME"),
password=os.getenv("MONGODB_PASSWORD"),
auth_source=os.getenv("MONGODB_AUTH_SOURCE"),
)
logger.success("初始化数据库成功")
# 初始化表情管理器
emoji_manager.initialize()
logger.debug(f"正在唤醒{global_config.BOT_NICKNAME}......")
# 创建机器人实例
chat_bot = ChatBot()
# 注册消息处理器
group_msg = on_message(priority=5)
# 注册消息处理器
msg_in = on_message(priority=5)
# 创建定时任务
scheduler = require("nonebot_plugin_apscheduler").scheduler
@@ -103,8 +91,8 @@ async def _(bot: Bot):
asyncio.create_task(chat_manager._auto_save_task())
@group_msg.handle()
async def _(bot: Bot, event: GroupMessageEvent, state: T_State):
@msg_in.handle()
async def _(bot: Bot, event: MessageEvent, state: T_State):
await chat_bot.handle_message(event, bot)
@@ -127,7 +115,7 @@ async def build_memory_task():
async def forget_memory_task():
"""每30秒执行一次记忆构建"""
print("\033[1;32m[记忆遗忘]\033[0m 开始遗忘记忆...")
await hippocampus.operation_forget_topic(percentage=0.1)
await hippocampus.operation_forget_topic(percentage=global_config.memory_forget_percentage)
print("\033[1;32m[记忆遗忘]\033[0m 记忆遗忘完成")

View File

@@ -2,12 +2,16 @@ import re
import time
from random import random
from loguru import logger
from nonebot.adapters.onebot.v11 import Bot, GroupMessageEvent
from nonebot.adapters.onebot.v11 import (
Bot,
GroupMessageEvent,
MessageEvent,
PrivateMessageEvent,
)
from ..memory_system.memory import hippocampus
from ..moods.moods import MoodManager # 导入情绪管理器
from .config import global_config
from .cq_code import CQCode, cq_code_tool # 导入CQCode模块
from .emoji_manager import emoji_manager # 导入表情包管理器
from .llm_generator import ResponseGenerator
from .message import MessageSending, MessageRecv, MessageThinking, MessageSet
@@ -42,39 +46,53 @@ class ChatBot:
if not self._started:
self._started = True
async def handle_message(self, event: GroupMessageEvent, bot: Bot) -> None:
"""处理收到的消息"""
async def handle_message(self, event: MessageEvent, bot: Bot) -> None:
"""处理收到的消息"""
self.bot = bot # 更新 bot 实例
try:
group_info_api = await bot.get_group_info(group_id=event.group_id)
logger.info(f"成功获取群信息: {group_info_api}")
group_name = group_info_api["group_name"]
except Exception as e:
logger.error(f"获取群信息失败: {str(e)}")
group_name = None
# 白名单设定由nontbot侧完成
# 消息过滤涉及到config有待更新
if event.group_id:
if event.group_id not in global_config.talk_allowed_groups:
return
# 用户屏蔽,不区分私聊/群聊
if event.user_id in global_config.ban_user_id:
return
user_info = UserInfo(
user_id=event.user_id,
user_nickname=event.sender.nickname,
user_cardname=event.sender.card or None,
platform="qq",
)
# 处理私聊消息
if isinstance(event, PrivateMessageEvent):
if not global_config.enable_friend_chat: # 私聊过滤
return
else:
try:
user_info = UserInfo(
user_id=event.user_id,
user_nickname=(await bot.get_stranger_info(user_id=event.user_id, no_cache=True))["nickname"],
user_cardname=None,
platform="qq",
)
except Exception as e:
logger.error(f"获取陌生人信息失败: {e}")
return
logger.debug(user_info)
group_info = GroupInfo(
group_id=event.group_id,
group_name=group_name, # 使用获取到的群名称或None
platform="qq",
)
# group_info = GroupInfo(group_id=0, group_name="私聊", platform="qq")
group_info = None
# 处理群聊消息
else:
# 白名单设定由nontbot侧完成
if event.group_id:
if event.group_id not in global_config.talk_allowed_groups:
return
user_info = UserInfo(
user_id=event.user_id,
user_nickname=event.sender.nickname,
user_cardname=event.sender.card or None,
platform="qq",
)
group_info = GroupInfo(group_id=event.group_id, group_name=None, platform="qq")
# group_info = await bot.get_group_info(group_id=event.group_id)
# sender_info = await bot.get_group_member_info(group_id=event.group_id, user_id=event.user_id, no_cache=True)
message_cq = MessageRecvCQ(
message_id=event.message_id,
@@ -88,7 +106,6 @@ class ChatBot:
# 进入maimbot
message = MessageRecv(message_json)
groupinfo = message.message_info.group_info
userinfo = message.message_info.user_info
messageinfo = message.message_info
@@ -108,7 +125,9 @@ class ChatBot:
# 过滤词
for word in global_config.ban_words:
if word in message.processed_plain_text:
logger.info(f"[群{groupinfo.group_id}]{userinfo.user_nickname}:{message.processed_plain_text}")
logger.info(
f"[{chat.group_info.group_name if chat.group_info.group_id else '私聊'}]{userinfo.user_nickname}:{message.processed_plain_text}"
)
logger.info(f"[过滤词识别]消息中含有{word}filtered")
return
@@ -116,7 +135,7 @@ class ChatBot:
for pattern in global_config.ban_msgs_regex:
if re.search(pattern, message.raw_message):
logger.info(
f"[{message.message_info.group_info.group_id}]{message.user_nickname}:{message.raw_message}"
f"[{chat.group_info.group_name if chat.group_info.group_id else '私聊'}]{message.user_nickname}:{message.raw_message}"
)
logger.info(f"[正则表达式过滤]消息匹配到{pattern}filtered")
return
@@ -124,8 +143,8 @@ class ChatBot:
current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(messageinfo.time))
# topic=await topic_identifier.identify_topic_llm(message.processed_plain_text)
topic = ""
interested_rate = 0
interested_rate = await hippocampus.memory_activate_value(message.processed_plain_text) / 100
logger.debug(f"{message.processed_plain_text}的激活度:{interested_rate}")
# logger.info(f"\033[1;32m[主题识别]\033[0m 使用{global_config.topic_extract}主题: {topic}")
@@ -144,7 +163,7 @@ class ChatBot:
current_willing = willing_manager.get_willing(chat_stream=chat)
logger.info(
f"[{current_time}][{chat.group_info.group_id}]{chat.user_info.user_nickname}:"
f"[{current_time}][{chat.group_info.group_name if chat.group_info.group_id else '私聊'}]{chat.user_info.user_nickname}:"
f"{message.processed_plain_text}[回复意愿:{current_willing:.2f}][概率:{reply_probability * 100:.1f}%]"
)
@@ -152,12 +171,17 @@ class ChatBot:
if random() < reply_probability:
bot_user_info = UserInfo(
user_id=global_config.BOT_QQ, user_nickname=global_config.BOT_NICKNAME, platform=messageinfo.platform
user_id=global_config.BOT_QQ,
user_nickname=global_config.BOT_NICKNAME,
platform=messageinfo.platform,
)
thinking_time_point = round(time.time(), 2)
think_id = "mt" + str(thinking_time_point)
thinking_message = MessageThinking(
message_id=think_id, chat_stream=chat, bot_user_info=bot_user_info, reply=message
message_id=think_id,
chat_stream=chat,
bot_user_info=bot_user_info,
reply=message,
)
message_manager.add_message(thinking_message)
@@ -196,15 +220,16 @@ class ChatBot:
# print(f"\033[1;32m[回复内容]\033[0m {msg}")
# 通过时间改变时间戳
typing_time = calculate_typing_time(msg)
print(f"typing_time: {typing_time}")
logger.debug(f"typing_time: {typing_time}")
accu_typing_time += typing_time
timepoint = thinking_time_point + accu_typing_time
message_segment = Seg(type="text", data=msg)
print(f"message_segment: {message_segment}")
# logger.debug(f"message_segment: {message_segment}")
bot_message = MessageSending(
message_id=think_id,
chat_stream=chat,
bot_user_info=bot_user_info,
sender_info=userinfo,
message_segment=message_segment,
reply=message,
is_head=not mark_head,
@@ -218,7 +243,9 @@ class ChatBot:
# message_set 可以直接加入 message_manager
# print(f"\033[1;32m[回复]\033[0m 将回复载入发送容器")
print(f"添加message_set到message_manager")
logger.debug("添加message_set到message_manager")
message_manager.add_message(message_set)
bot_response_time = thinking_time_point
@@ -242,6 +269,7 @@ class ChatBot:
message_id=think_id,
chat_stream=chat,
bot_user_info=bot_user_info,
sender_info=userinfo,
message_segment=message_segment,
reply=message,
is_head=False,

View File

@@ -111,11 +111,11 @@ class ChatManager:
def _ensure_collection(self):
"""确保数据库集合存在并创建索引"""
if "chat_streams" not in self.db.db.list_collection_names():
self.db.db.create_collection("chat_streams")
if "chat_streams" not in self.db.list_collection_names():
self.db.create_collection("chat_streams")
# 创建索引
self.db.db.chat_streams.create_index([("stream_id", 1)], unique=True)
self.db.db.chat_streams.create_index(
self.db.chat_streams.create_index([("stream_id", 1)], unique=True)
self.db.chat_streams.create_index(
[("platform", 1), ("user_info.user_id", 1), ("group_info.group_id", 1)]
)
@@ -168,7 +168,7 @@ class ChatManager:
return stream
# 检查数据库中是否存在
data = self.db.db.chat_streams.find_one({"stream_id": stream_id})
data = self.db.chat_streams.find_one({"stream_id": stream_id})
if data:
stream = ChatStream.from_dict(data)
# 更新用户信息和群组信息
@@ -204,7 +204,7 @@ class ChatManager:
async def _save_stream(self, stream: ChatStream):
"""保存聊天流到数据库"""
if not stream.saved:
self.db.db.chat_streams.update_one(
self.db.chat_streams.update_one(
{"stream_id": stream.stream_id}, {"$set": stream.to_dict()}, upsert=True
)
stream.saved = True
@@ -216,7 +216,7 @@ class ChatManager:
async def load_all_streams(self):
"""从数据库加载所有聊天流"""
all_streams = self.db.db.chat_streams.find({})
all_streams = self.db.chat_streams.find({})
for data in all_streams:
stream = ChatStream.from_dict(data)
self.streams[stream.stream_id] = stream

View File

@@ -37,8 +37,7 @@ class BotConfig:
ban_user_id = set()
build_memory_interval: int = 30 # 记忆构建间隔(秒)
forget_memory_interval: int = 300 # 记忆遗忘间隔(秒)
EMOJI_CHECK_INTERVAL: int = 120 # 表情包检查间隔(分钟)
EMOJI_REGISTER_INTERVAL: int = 10 # 表情包注册间隔(分钟)
EMOJI_SAVE: bool = True # 偷表情包
@@ -69,6 +68,7 @@ class BotConfig:
enable_advance_output: bool = False # 是否启用高级输出
enable_kuuki_read: bool = True # 是否启用读空气功能
enable_debug_output: bool = False # 是否启用调试输出
enable_friend_chat: bool = False # 是否启用好友聊天
mood_update_interval: float = 1.0 # 情绪更新间隔 单位秒
mood_decay_rate: float = 0.95 # 情绪衰减率
@@ -95,6 +95,12 @@ class BotConfig:
PERSONALITY_2: float = 0.3 # 第二种人格概率
PERSONALITY_3: float = 0.1 # 第三种人格概率
build_memory_interval: int = 600 # 记忆构建间隔(秒)
forget_memory_interval: int = 600 # 记忆遗忘间隔(秒)
memory_forget_time: int = 24 # 记忆遗忘时间(小时)
memory_forget_percentage: float = 0.01 # 记忆遗忘比例
memory_compress_rate: float = 0.1 # 记忆压缩率
memory_ban_words: list = field(
default_factory=lambda: ["表情包", "图片", "回复", "聊天记录"]
) # 添加新的配置项默认值
@@ -294,6 +300,11 @@ class BotConfig:
if config.INNER_VERSION in SpecifierSet(">=0.0.4"):
config.memory_ban_words = set(memory_config.get("memory_ban_words", []))
if config.INNER_VERSION in SpecifierSet(">=0.0.7"):
config.memory_forget_time = memory_config.get("memory_forget_time", config.memory_forget_time)
config.memory_forget_percentage = memory_config.get("memory_forget_percentage", config.memory_forget_percentage)
config.memory_compress_rate = memory_config.get("memory_compress_rate", config.memory_compress_rate)
def mood(parent: dict):
mood_config = parent["mood"]
config.mood_update_interval = mood_config.get("mood_update_interval", config.mood_update_interval)
@@ -327,7 +338,9 @@ class BotConfig:
others_config = parent["others"]
config.enable_advance_output = others_config.get("enable_advance_output", config.enable_advance_output)
config.enable_kuuki_read = others_config.get("enable_kuuki_read", config.enable_kuuki_read)
config.enable_debug_output = others_config.get("enable_debug_output", config.enable_debug_output)
if config.INNER_VERSION in SpecifierSet(">=0.0.7"):
config.enable_debug_output = others_config.get("enable_debug_output", config.enable_debug_output)
config.enable_friend_chat = others_config.get("enable_friend_chat", config.enable_friend_chat)
# 版本表达式:>=1.0.0,<2.0.0
# 允许字段func: method, support: str, notice: str, necessary: bool

View File

@@ -76,16 +76,16 @@ class EmojiManager:
没有索引的话,数据库每次查询都需要扫描全部数据,建立索引后可以大大提高查询效率。
"""
if 'emoji' not in self.db.db.list_collection_names():
self.db.db.create_collection('emoji')
self.db.db.emoji.create_index([('embedding', '2dsphere')])
self.db.db.emoji.create_index([('filename', 1)], unique=True)
if 'emoji' not in self.db.list_collection_names():
self.db.create_collection('emoji')
self.db.emoji.create_index([('embedding', '2dsphere')])
self.db.emoji.create_index([('filename', 1)], unique=True)
def record_usage(self, emoji_id: str):
"""记录表情使用次数"""
try:
self._ensure_db()
self.db.db.emoji.update_one(
self.db.emoji.update_one(
{'_id': emoji_id},
{'$inc': {'usage_count': 1}}
)
@@ -119,7 +119,7 @@ class EmojiManager:
try:
# 获取所有表情包
all_emojis = list(self.db.db.emoji.find({}, {'_id': 1, 'path': 1, 'embedding': 1, 'description': 1}))
all_emojis = list(self.db.emoji.find({}, {'_id': 1, 'path': 1, 'embedding': 1, 'description': 1}))
if not all_emojis:
logger.warning("数据库中没有任何表情包")
@@ -157,10 +157,11 @@ class EmojiManager:
if selected_emoji and 'path' in selected_emoji:
# 更新使用次数
self.db.db.emoji.update_one(
self.db.emoji.update_one(
{'_id': selected_emoji['_id']},
{'$inc': {'usage_count': 1}}
)
logger.success(
f"找到匹配的表情包: {selected_emoji.get('description', '无描述')} (相似度: {similarity:.4f})")
# 稍微改一下文本描述,不然容易产生幻觉,描述已经包含 表情包 了
@@ -176,8 +177,10 @@ class EmojiManager:
logger.error(f"获取表情包失败: {str(e)}")
return None
async def _get_emoji_discription(self, image_base64: str) -> str:
"""获取表情包的标签使用image_manager的描述生成功能"""
try:
# 使用image_manager获取描述去掉前后的方括号和"表情包:"前缀
description = await image_manager.get_emoji_description(image_base64)
@@ -236,7 +239,7 @@ class EmojiManager:
image_hash = hashlib.md5(image_bytes).hexdigest()
# 检查是否已经注册过
existing_emoji = self.db.db['emoji'].find_one({'filename': filename})
existing_emoji = self.db['emoji'].find_one({'filename': filename})
description = None
if existing_emoji:
@@ -272,11 +275,14 @@ class EmojiManager:
# 获取表情包的描述
description = await self._get_emoji_discription(image_base64)
if global_config.EMOJI_CHECK:
check = await self._check_emoji(image_base64)
if '' not in check:
os.remove(image_path)
logger.info(f"描述: {description}")
logger.info(f"描述: {description}")
logger.info(f"其不满足过滤规则,被剔除 {check}")
continue
@@ -287,6 +293,7 @@ class EmojiManager:
if description is not None:
embedding = await get_embedding(description)
# 准备数据库记录
emoji_record = {
'filename': filename,
@@ -298,10 +305,11 @@ class EmojiManager:
}
# 保存到emoji数据库
self.db.db['emoji'].insert_one(emoji_record)
self.db['emoji'].insert_one(emoji_record)
logger.success(f"注册新表情包: {filename}")
logger.info(f"描述: {description}")
# 保存到images数据库
image_doc = {
'hash': image_hash,
@@ -338,7 +346,7 @@ class EmojiManager:
try:
self._ensure_db()
# 获取所有表情包记录
all_emojis = list(self.db.db.emoji.find())
all_emojis = list(self.db.emoji.find())
removed_count = 0
total_count = len(all_emojis)
@@ -346,13 +354,13 @@ class EmojiManager:
try:
if 'path' not in emoji:
logger.warning(f"发现无效记录缺少path字段ID: {emoji.get('_id', 'unknown')}")
self.db.db.emoji.delete_one({'_id': emoji['_id']})
self.db.emoji.delete_one({'_id': emoji['_id']})
removed_count += 1
continue
if 'embedding' not in emoji:
logger.warning(f"发现过时记录缺少embedding字段ID: {emoji.get('_id', 'unknown')}")
self.db.db.emoji.delete_one({'_id': emoji['_id']})
self.db.emoji.delete_one({'_id': emoji['_id']})
removed_count += 1
continue
@@ -360,7 +368,7 @@ class EmojiManager:
if not os.path.exists(emoji['path']):
logger.warning(f"表情包文件已被删除: {emoji['path']}")
# 从数据库中删除记录
result = self.db.db.emoji.delete_one({'_id': emoji['_id']})
result = self.db.emoji.delete_one({'_id': emoji['_id']})
if result.deleted_count > 0:
logger.debug(f"成功删除数据库记录: {emoji['_id']}")
removed_count += 1
@@ -371,7 +379,7 @@ class EmojiManager:
continue
# 验证清理结果
remaining_count = self.db.db.emoji.count_documents({})
remaining_count = self.db.emoji.count_documents({})
if removed_count > 0:
logger.success(f"已清理 {removed_count} 个失效的表情包记录")
logger.info(f"清理前总数: {total_count} | 清理后总数: {remaining_count}")
@@ -389,5 +397,7 @@ class EmojiManager:
# 创建全局单例
emoji_manager = EmojiManager()

View File

@@ -8,7 +8,7 @@ from loguru import logger
from ...common.database import Database
from ..models.utils_model import LLM_request
from .config import global_config
from .message import MessageRecv, MessageThinking, MessageSending,Message
from .message import MessageRecv, MessageThinking, Message
from .prompt_builder import prompt_builder
from .relationship_manager import relationship_manager
from .utils import process_llm_response
@@ -154,7 +154,7 @@ class ResponseGenerator:
reasoning_content: str,
):
"""保存对话记录到数据库"""
self.db.db.reasoning_logs.insert_one(
self.db.reasoning_logs.insert_one(
{
"time": time.time(),
"chat_id": message.chat_stream.stream_id,

View File

@@ -1,19 +1,25 @@
import time
import html
import re
import json
from dataclasses import dataclass
from typing import Dict, ForwardRef, List, Optional, Union
from typing import Dict, List, Optional
import urllib3
from loguru import logger
from .utils_image import image_manager
from .message_base import Seg, GroupInfo, UserInfo, BaseMessageInfo, MessageBase
from .chat_stream import ChatStream, chat_manager
# 禁用SSL警告
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
#这个类是消息数据类,用于存储和管理消息数据。
#它定义了消息的属性包括群组ID、用户ID、消息ID、原始消息内容、纯文本内容和时间戳。
#它还定义了两个辅助属性keywords用于提取消息的关键词is_plain_text用于判断消息是否为纯文本。
# 这个类是消息数据类,用于存储和管理消息数据。
# 它定义了消息的属性包括群组ID、用户ID、消息ID、原始消息内容、纯文本内容和时间戳。
# 它还定义了两个辅助属性keywords用于提取消息的关键词is_plain_text用于判断消息是否为纯文本。
@dataclass
class Message(MessageBase):
@@ -69,6 +75,20 @@ class MessageRecv(Message):
message_dict: MessageCQ序列化后的字典
"""
self.message_info = BaseMessageInfo.from_dict(message_dict.get('message_info', {}))
message_segment = message_dict.get('message_segment', {})
if message_segment.get('data','') == '[json]':
# 提取json消息中的展示信息
pattern = r'\[CQ:json,data=(?P<json_data>.+?)\]'
match = re.search(pattern, message_dict.get('raw_message',''))
raw_json = html.unescape(match.group('json_data'))
try:
json_message = json.loads(raw_json)
except json.JSONDecodeError:
json_message = {}
message_segment['data'] = json_message.get('prompt','')
self.message_segment = Seg.from_dict(message_dict.get('message_segment', {}))
self.raw_message = message_dict.get('raw_message')
@@ -86,7 +106,9 @@ class MessageRecv(Message):
这个方法必须在创建实例后显式调用,因为它包含异步操作。
"""
self.processed_plain_text = await self._process_message_segments(self.message_segment)
self.processed_plain_text = await self._process_message_segments(
self.message_segment
)
self.detailed_plain_text = self._generate_detailed_text()
async def _process_message_segments(self, segment: Seg) -> str:
@@ -98,14 +120,14 @@ class MessageRecv(Message):
Returns:
str: 处理后的文本
"""
if segment.type == 'seglist':
if segment.type == "seglist":
# 处理消息段列表
segments_text = []
for seg in segment.data:
processed = await self._process_message_segments(seg)
if processed:
segments_text.append(processed)
return ' '.join(segments_text)
return " ".join(segments_text)
else:
# 处理单个消息段
return await self._process_single_segment(segment)
@@ -120,31 +142,35 @@ class MessageRecv(Message):
str: 处理后的文本
"""
try:
if seg.type == 'text':
if seg.type == "text":
return seg.data
elif seg.type == 'image':
elif seg.type == "image":
# 如果是base64图片数据
if isinstance(seg.data, str):
return await image_manager.get_image_description(seg.data)
return '[图片]'
elif seg.type == 'emoji':
self.is_emoji=True
return "[图片]"
elif seg.type == "emoji":
self.is_emoji = True
if isinstance(seg.data, str):
return await image_manager.get_emoji_description(seg.data)
return '[表情]'
return "[表情]"
else:
return f"[{seg.type}:{str(seg.data)}]"
except Exception as e:
logger.error(f"处理消息段失败: {str(e)}, 类型: {seg.type}, 数据: {seg.data}")
logger.error(
f"处理消息段失败: {str(e)}, 类型: {seg.type}, 数据: {seg.data}"
)
return f"[处理失败的{seg.type}消息]"
def _generate_detailed_text(self) -> str:
"""生成详细文本,包含时间和用户信息"""
time_str = time.strftime("%m-%d %H:%M:%S", time.localtime(self.message_info.time))
time_str = time.strftime(
"%m-%d %H:%M:%S", time.localtime(self.message_info.time)
)
user_info = self.message_info.user_info
name = (
f"{user_info.user_nickname}(ta的昵称:{user_info.user_cardname},ta的id:{user_info.user_id})"
if user_info.user_cardname!=''
if user_info.user_cardname != ""
else f"{user_info.user_nickname}(ta的id:{user_info.user_id})"
)
return f"[{time_str}] {name}: {self.processed_plain_text}\n"
@@ -160,7 +186,7 @@ class MessageProcessBase(Message):
chat_stream: ChatStream,
bot_user_info: UserInfo,
message_segment: Optional[Seg] = None,
reply: Optional['MessageRecv'] = None
reply: Optional["MessageRecv"] = None,
):
# 调用父类初始化
super().__init__(
@@ -169,7 +195,7 @@ class MessageProcessBase(Message):
chat_stream=chat_stream,
user_info=bot_user_info,
message_segment=message_segment,
reply=reply
reply=reply,
)
# 处理状态相关属性
@@ -190,14 +216,14 @@ class MessageProcessBase(Message):
Returns:
str: 处理后的文本
"""
if segment.type == 'seglist':
if segment.type == "seglist":
# 处理消息段列表
segments_text = []
for seg in segment.data:
processed = await self._process_message_segments(seg)
if processed:
segments_text.append(processed)
return ' '.join(segments_text)
return " ".join(segments_text)
else:
# 处理单个消息段
return await self._process_single_segment(segment)
@@ -212,39 +238,44 @@ class MessageProcessBase(Message):
str: 处理后的文本
"""
try:
if seg.type == 'text':
if seg.type == "text":
return seg.data
elif seg.type == 'image':
elif seg.type == "image":
# 如果是base64图片数据
if isinstance(seg.data, str):
return await image_manager.get_image_description(seg.data)
return '[图片]'
elif seg.type == 'emoji':
return "[图片]"
elif seg.type == "emoji":
if isinstance(seg.data, str):
return await image_manager.get_emoji_description(seg.data)
return '[表情]'
elif seg.type == 'at':
return "[表情]"
elif seg.type == "at":
return f"[@{seg.data}]"
elif seg.type == 'reply':
if self.reply and hasattr(self.reply, 'processed_plain_text'):
elif seg.type == "reply":
if self.reply and hasattr(self.reply, "processed_plain_text"):
return f"[回复:{self.reply.processed_plain_text}]"
else:
return f"[{seg.type}:{str(seg.data)}]"
except Exception as e:
logger.error(f"处理消息段失败: {str(e)}, 类型: {seg.type}, 数据: {seg.data}")
logger.error(
f"处理消息段失败: {str(e)}, 类型: {seg.type}, 数据: {seg.data}"
)
return f"[处理失败的{seg.type}消息]"
def _generate_detailed_text(self) -> str:
"""生成详细文本,包含时间和用户信息"""
time_str = time.strftime("%m-%d %H:%M:%S", time.localtime(self.message_info.time))
time_str = time.strftime(
"%m-%d %H:%M:%S", time.localtime(self.message_info.time)
)
user_info = self.message_info.user_info
name = (
f"{user_info.user_nickname}(ta的昵称:{user_info.user_cardname},ta的id:{user_info.user_id})"
if user_info.user_cardname != ''
if user_info.user_cardname != ""
else f"{user_info.user_nickname}(ta的id:{user_info.user_id})"
)
return f"[{time_str}] {name}: {self.processed_plain_text}\n"
@dataclass
class MessageThinking(MessageProcessBase):
"""思考状态的消息类"""
@@ -254,7 +285,7 @@ class MessageThinking(MessageProcessBase):
message_id: str,
chat_stream: ChatStream,
bot_user_info: UserInfo,
reply: Optional['MessageRecv'] = None
reply: Optional["MessageRecv"] = None,
):
# 调用父类初始化
super().__init__(
@@ -262,12 +293,13 @@ class MessageThinking(MessageProcessBase):
chat_stream=chat_stream,
bot_user_info=bot_user_info,
message_segment=None, # 思考状态不需要消息段
reply=reply
reply=reply,
)
# 思考状态特有属性
self.interrupt = False
@dataclass
class MessageSending(MessageProcessBase):
"""发送状态的消息类"""
@@ -277,10 +309,11 @@ class MessageSending(MessageProcessBase):
message_id: str,
chat_stream: ChatStream,
bot_user_info: UserInfo,
sender_info: UserInfo, # 用来记录发送者信息,用于私聊回复
message_segment: Seg,
reply: Optional['MessageRecv'] = None,
reply: Optional["MessageRecv"] = None,
is_head: bool = False,
is_emoji: bool = False
is_emoji: bool = False,
):
# 调用父类初始化
super().__init__(
@@ -288,28 +321,34 @@ class MessageSending(MessageProcessBase):
chat_stream=chat_stream,
bot_user_info=bot_user_info,
message_segment=message_segment,
reply=reply
reply=reply,
)
# 发送状态特有属性
self.sender_info = sender_info
self.reply_to_message_id = reply.message_info.message_id if reply else None
self.is_head = is_head
self.is_emoji = is_emoji
def set_reply(self, reply: Optional['MessageRecv']) -> None:
def set_reply(self, reply: Optional["MessageRecv"]) -> None:
"""设置回复消息"""
if reply:
self.reply = reply
self.reply_to_message_id = self.reply.message_info.message_id
self.message_segment = Seg(type='seglist', data=[
Seg(type='reply', data=reply.message_info.message_id),
self.message_segment
])
self.message_segment = Seg(
type="seglist",
data=[
Seg(type="reply", data=reply.message_info.message_id),
self.message_segment,
],
)
async def process(self) -> None:
"""处理消息内容,生成纯文本和详细文本"""
if self.message_segment:
self.processed_plain_text = await self._process_message_segments(self.message_segment)
self.processed_plain_text = await self._process_message_segments(
self.message_segment
)
self.detailed_plain_text = self._generate_detailed_text()
@classmethod
@@ -318,8 +357,8 @@ class MessageSending(MessageProcessBase):
thinking: MessageThinking,
message_segment: Seg,
is_head: bool = False,
is_emoji: bool = False
) -> 'MessageSending':
is_emoji: bool = False,
) -> "MessageSending":
"""从思考状态消息创建发送状态消息"""
return cls(
message_id=thinking.message_info.message_id,
@@ -328,17 +367,26 @@ class MessageSending(MessageProcessBase):
bot_user_info=thinking.message_info.user_info,
reply=thinking.reply,
is_head=is_head,
is_emoji=is_emoji
is_emoji=is_emoji,
)
def to_dict(self):
ret= super().to_dict()
ret['message_info']['user_info']=self.chat_stream.user_info.to_dict()
ret = super().to_dict()
ret["message_info"]["user_info"] = self.chat_stream.user_info.to_dict()
return ret
def is_private_message(self) -> bool:
"""判断是否为私聊消息"""
return (
self.message_info.group_info is None
or self.message_info.group_info.group_id is None
)
@dataclass
class MessageSet:
"""消息集合类,可以存储多个发送消息"""
def __init__(self, chat_stream: ChatStream, message_id: str):
self.chat_stream = chat_stream
self.message_id = message_id
@@ -389,6 +437,3 @@ class MessageSet:
def __len__(self) -> int:
return len(self.messages)

View File

@@ -1,5 +1,5 @@
from dataclasses import dataclass, asdict
from typing import List, Optional, Union, Any, Dict
from typing import List, Optional, Union, Dict
@dataclass
class Seg:

View File

@@ -1,19 +1,21 @@
import time
from dataclasses import dataclass
from typing import Dict, ForwardRef, List, Optional, Union
from typing import Dict, Optional
import urllib3
from .cq_code import CQCode, cq_code_tool
from .cq_code import cq_code_tool
from .utils_cq import parse_cq_code
from .utils_user import get_groupname, get_user_cardname, get_user_nickname
from .utils_user import get_groupname
from .message_base import Seg, GroupInfo, UserInfo, BaseMessageInfo, MessageBase
# 禁用SSL警告
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
#这个类是消息数据类,用于存储和管理消息数据。
#它定义了消息的属性包括群组ID、用户ID、消息ID、原始消息内容、纯文本内容和时间戳。
#它还定义了两个辅助属性keywords用于提取消息的关键词is_plain_text用于判断消息是否为纯文本。
# 这个类是消息数据类,用于存储和管理消息数据。
# 它定义了消息的属性包括群组ID、用户ID、消息ID、原始消息内容、纯文本内容和时间戳。
# 它还定义了两个辅助属性keywords用于提取消息的关键词is_plain_text用于判断消息是否为纯文本。
@dataclass
class MessageCQ(MessageBase):
@@ -24,27 +26,17 @@ class MessageCQ(MessageBase):
- user_id: 发送者/接收者ID
- platform: 平台标识(默认为"qq"
"""
def __init__(
self,
message_id: int,
user_info: UserInfo,
group_info: Optional[GroupInfo] = None,
platform: str = "qq"
self, message_id: int, user_info: UserInfo, group_info: Optional[GroupInfo] = None, platform: str = "qq"
):
# 构造基础消息信息
message_info = BaseMessageInfo(
platform=platform,
message_id=message_id,
time=int(time.time()),
group_info=group_info,
user_info=user_info
platform=platform, message_id=message_id, time=int(time.time()), group_info=group_info, user_info=user_info
)
# 调用父类初始化message_segment 由子类设置
super().__init__(
message_info=message_info,
message_segment=None,
raw_message=None
)
super().__init__(message_info=message_info, message_segment=None, raw_message=None)
@dataclass
class MessageRecvCQ(MessageCQ):
@@ -62,7 +54,11 @@ class MessageRecvCQ(MessageCQ):
# 调用父类初始化
super().__init__(message_id, user_info, group_info, platform)
if group_info and group_info.group_name is None:
# 私聊消息不携带group_info
if group_info is None:
pass
elif group_info.group_name is None:
group_info.group_name = get_groupname(group_info.group_id)
# 解析消息段
@@ -76,7 +72,7 @@ class MessageRecvCQ(MessageCQ):
start = 0
while True:
cq_start = message.find('[CQ:', start)
cq_start = message.find("[CQ:", start)
if cq_start == -1:
if start < len(message):
text = message[start:].strip()
@@ -89,20 +85,20 @@ class MessageRecvCQ(MessageCQ):
if text:
cq_code_dict_list.append(parse_cq_code(text))
cq_end = message.find(']', cq_start)
cq_end = message.find("]", cq_start)
if cq_end == -1:
text = message[cq_start:].strip()
if text:
cq_code_dict_list.append(parse_cq_code(text))
break
cq_code = message[cq_start:cq_end + 1]
cq_code = message[cq_start : cq_end + 1]
cq_code_dict_list.append(parse_cq_code(cq_code))
start = cq_end + 1
# 转换CQ码为Seg对象
for code_item in cq_code_dict_list:
message_obj = cq_code_tool.cq_from_dict_to_class(code_item,msg=self,reply=reply_message)
message_obj = cq_code_tool.cq_from_dict_to_class(code_item, msg=self, reply=reply_message)
if message_obj.translated_segments:
segments.append(message_obj.translated_segments)
@@ -111,59 +107,58 @@ class MessageRecvCQ(MessageCQ):
return segments[0]
# 否则返回seglist类型的Seg
return Seg(type='seglist', data=segments)
return Seg(type="seglist", data=segments)
def to_dict(self) -> Dict:
"""转换为字典格式,包含所有必要信息"""
base_dict = super().to_dict()
return base_dict
@dataclass
class MessageSendCQ(MessageCQ):
"""QQ发送消息类用于将Seg对象转换为raw_message"""
def __init__(
self,
data: Dict
):
def __init__(self, data: Dict):
# 调用父类初始化
message_info = BaseMessageInfo.from_dict(data.get('message_info', {}))
message_segment = Seg.from_dict(data.get('message_segment', {}))
message_info = BaseMessageInfo.from_dict(data.get("message_info", {}))
message_segment = Seg.from_dict(data.get("message_segment", {}))
super().__init__(
message_info.message_id,
message_info.user_info,
message_info.group_info if message_info.group_info else None,
message_info.platform
)
message_info.platform,
)
self.message_segment = message_segment
self.raw_message = self._generate_raw_message()
def _generate_raw_message(self, ) -> str:
def _generate_raw_message(
self,
) -> str:
"""将Seg对象转换为raw_message"""
segments = []
# 处理消息段
if self.message_segment.type == 'seglist':
if self.message_segment.type == "seglist":
for seg in self.message_segment.data:
segments.append(self._seg_to_cq_code(seg))
else:
segments.append(self._seg_to_cq_code(self.message_segment))
return ''.join(segments)
return "".join(segments)
def _seg_to_cq_code(self, seg: Seg) -> str:
"""将单个Seg对象转换为CQ码字符串"""
if seg.type == 'text':
if seg.type == "text":
return str(seg.data)
elif seg.type == 'image':
elif seg.type == "image":
return cq_code_tool.create_image_cq_base64(seg.data)
elif seg.type == 'emoji':
elif seg.type == "emoji":
return cq_code_tool.create_emoji_cq_base64(seg.data)
elif seg.type == 'at':
elif seg.type == "at":
return f"[CQ:at,qq={seg.data}]"
elif seg.type == 'reply':
elif seg.type == "reply":
return cq_code_tool.create_reply_cq(int(seg.data))
else:
return f"[{seg.data}]"

View File

@@ -5,12 +5,12 @@ from typing import Dict, List, Optional, Union
from loguru import logger
from nonebot.adapters.onebot.v11 import Bot
from .cq_code import cq_code_tool
from .message_cq import MessageSendCQ
from .message import MessageSending, MessageThinking, MessageRecv,MessageSet
from .message import MessageSending, MessageThinking, MessageRecv, MessageSet
from .storage import MessageStorage
from .config import global_config
from .chat_stream import chat_manager
from .utils import truncate_message
class Message_Sender:
@@ -26,42 +26,47 @@ class Message_Sender:
self._current_bot = bot
async def send_message(
self,
message: MessageSending,
self,
message: MessageSending,
) -> None:
"""发送消息"""
if isinstance(message, MessageSending):
message_json = message.to_dict()
message_send=MessageSendCQ(
data=message_json
)
if message_send.message_info.group_info:
message_send = MessageSendCQ(data=message_json)
# logger.debug(message_send.message_info,message_send.raw_message)
message_preview = truncate_message(message.processed_plain_text)
if (
message_send.message_info.group_info
and message_send.message_info.group_info.group_id
):
try:
await self._current_bot.send_group_msg(
group_id=message.message_info.group_info.group_id,
message=message_send.raw_message,
auto_escape=False
auto_escape=False,
)
logger.success(f"[调试] 发送消息{message.processed_plain_text}成功")
logger.success(f"[调试] 发送消息{message_preview}成功")
except Exception as e:
logger.error(f"[调试] 发生错误 {e}")
logger.error(f"[调试] 发送消息{message.processed_plain_text}失败")
logger.error(f"[调试] 发送消息{message_preview}失败")
else:
try:
logger.debug(message.message_info.user_info)
await self._current_bot.send_private_msg(
user_id=message.message_info.user_info.user_id,
user_id=message.sender_info.user_id,
message=message_send.raw_message,
auto_escape=False
auto_escape=False,
)
logger.success(f"[调试] 发送消息{message.processed_plain_text}成功")
logger.success(f"[调试] 发送消息{message_preview}成功")
except Exception as e:
logger.error(f"发生错误 {e}")
logger.error(f"[调试] 发送消息{message.processed_plain_text}失败")
logger.error(f"[调试] 发生错误 {e}")
logger.error(f"[调试] 发送消息{message_preview}失败")
class MessageContainer:
"""单个聊天流的发送/思考消息容器"""
def __init__(self, chat_id: str, max_size: int = 100):
self.chat_id = chat_id
self.max_size = max_size
@@ -88,7 +93,7 @@ class MessageContainer:
"""获取thinking_start_time最早的消息对象"""
if not self.messages:
return None
earliest_time = float('inf')
earliest_time = float("inf")
earliest_message = None
for msg in self.messages:
msg_time = msg.thinking_start_time
@@ -127,6 +132,7 @@ class MessageContainer:
class MessageManager:
"""管理所有聊天流的消息容器"""
def __init__(self):
self.containers: Dict[str, MessageContainer] = {} # chat_id -> MessageContainer
self.storage = MessageStorage()
@@ -138,7 +144,9 @@ class MessageManager:
self.containers[chat_id] = MessageContainer(chat_id)
return self.containers[chat_id]
def add_message(self, message: Union[MessageThinking, MessageSending, MessageSet]) -> None:
def add_message(
self, message: Union[MessageThinking, MessageSending, MessageSet]
) -> None:
chat_stream = message.chat_stream
if not chat_stream:
raise ValueError("无法找到对应的聊天流")
@@ -155,7 +163,11 @@ class MessageManager:
if isinstance(message_earliest, MessageThinking):
message_earliest.update_thinking_time()
thinking_time = message_earliest.thinking_time
print(f"消息正在思考中,已思考{int(thinking_time)}\r", end='', flush=True)
print(
f"消息正在思考中,已思考{int(thinking_time)}\r",
end="",
flush=True,
)
# 检查是否超时
if thinking_time > global_config.thinking_timeout:
@@ -163,15 +175,23 @@ class MessageManager:
container.remove_message(message_earliest)
else:
if message_earliest.is_head and message_earliest.update_thinking_time() > 30:
if (
message_earliest.is_head
and message_earliest.update_thinking_time() > 30
and not message_earliest.is_private_message() # 避免在私聊时插入reply
):
await message_sender.send_message(message_earliest.set_reply())
else:
await message_sender.send_message(message_earliest)
await message_earliest.process()
print(f"\033[1;34m[调试]\033[0m 消息'{message_earliest.processed_plain_text}'正在发送中")
print(
f"\033[1;34m[调试]\033[0m 消息“{truncate_message(message_earliest.processed_plain_text)}”正在发送中"
)
await self.storage.store_message(message_earliest, message_earliest.chat_stream,None)
await self.storage.store_message(
message_earliest, message_earliest.chat_stream, None
)
container.remove_message(message_earliest)
@@ -183,7 +203,11 @@ class MessageManager:
continue
try:
if msg.is_head and msg.update_thinking_time() > 30:
if (
msg.is_head
and msg.update_thinking_time() > 30
and not message_earliest.is_private_message() # 避免在私聊时插入reply
):
await message_sender.send_message(msg.set_reply())
else:
await message_sender.send_message(msg)
@@ -191,7 +215,7 @@ class MessageManager:
# if msg.is_emoji:
# msg.processed_plain_text = "[表情包]"
await msg.process()
await self.storage.store_message(msg,msg.chat_stream, None)
await self.storage.store_message(msg, msg.chat_stream, None)
if not container.remove_message(msg):
logger.warning("尝试删除不存在的消息")

View File

@@ -9,7 +9,7 @@ from ..moods.moods import MoodManager
from ..schedule.schedule_generator import bot_schedule
from .config import global_config
from .utils import get_embedding, get_recent_group_detailed_plain_text
from .chat_stream import ChatStream, chat_manager
from .chat_stream import chat_manager
class PromptBuilder:
@@ -311,7 +311,7 @@ class PromptBuilder:
{"$project": {"content": 1, "similarity": 1}}
]
results = list(self.db.db.knowledges.aggregate(pipeline))
results = list(self.db.knowledges.aggregate(pipeline))
# print(f"\033[1;34m[调试]\033[0m获取知识库内容结果: {results}")
if not results:

View File

@@ -1,6 +1,5 @@
import asyncio
from typing import Optional, Union
from typing import Optional, Union
from typing import Optional
from loguru import logger
from ...common.database import Database
@@ -169,7 +168,7 @@ class RelationshipManager:
async def load_all_relationships(self):
"""加载所有关系对象"""
db = Database.get_instance()
all_relationships = db.db.relationships.find({})
all_relationships = db.relationships.find({})
for data in all_relationships:
await self.load_relationship(data)
@@ -177,7 +176,7 @@ class RelationshipManager:
"""每5分钟自动保存一次关系数据"""
db = Database.get_instance()
# 获取所有关系记录
all_relationships = db.db.relationships.find({})
all_relationships = db.relationships.find({})
# 依次加载每条记录
for data in all_relationships:
await self.load_relationship(data)
@@ -207,7 +206,7 @@ class RelationshipManager:
saved = relationship.saved
db = Database.get_instance()
db.db.relationships.update_one(
db.relationships.update_one(
{'user_id': user_id, 'platform': platform},
{'$set': {
'platform': platform,

View File

@@ -1,8 +1,6 @@
from typing import Optional, Union
from typing import Optional, Union
from ...common.database import Database
from .message_base import MessageBase
from .message import MessageSending, MessageRecv
from .chat_stream import ChatStream
from loguru import logger
@@ -25,7 +23,7 @@ class MessageStorage:
"detailed_plain_text": message.detailed_plain_text,
"topic": topic,
}
self.db.db.messages.insert_one(message_data)
self.db.messages.insert_one(message_data)
except Exception:
logger.exception("存储消息失败")

View File

@@ -12,8 +12,8 @@ from loguru import logger
from ..models.utils_model import LLM_request
from ..utils.typo_generator import ChineseTypoGenerator
from .config import global_config
from .message import MessageThinking, MessageRecv,MessageSending,MessageProcessBase,Message
from .message_base import MessageBase,BaseMessageInfo,UserInfo,GroupInfo
from .message import MessageRecv,Message
from .message_base import UserInfo
from .chat_stream import ChatStream
from ..moods.moods import MoodManager
@@ -39,9 +39,13 @@ def db_message_to_str(message_dict: Dict) -> str:
def is_mentioned_bot_in_message(message: MessageRecv) -> bool:
"""检查消息是否提到了机器人"""
keywords = [global_config.BOT_NICKNAME]
nicknames = global_config.BOT_ALIAS_NAMES
for keyword in keywords:
if keyword in message.processed_plain_text:
return True
for nickname in nicknames:
if nickname in message.processed_plain_text:
return True
return False
@@ -402,3 +406,10 @@ def find_similar_topics_simple(text: str, topics: list, top_k: int = 5) -> list:
# 按相似度降序排序并返回前k个
return sorted(similarities, key=lambda x: x[1], reverse=True)[:top_k]
def truncate_message(message: str, max_length=20) -> str:
"""截断消息,使其不超过指定长度"""
if len(message) > max_length:
return message[:max_length] + "..."
return message

View File

@@ -1,16 +1,12 @@
import base64
import io
import os
import time
import zlib
import aiohttp
import hashlib
from typing import Optional, Tuple, Union
from urllib.parse import urlparse
from typing import Optional, Union
from loguru import logger
from nonebot import get_driver
from PIL import Image
from ...common.database import Database
from ..chat.config import global_config
@@ -44,20 +40,20 @@ class ImageManager:
def _ensure_image_collection(self):
"""确保images集合存在并创建索引"""
if 'images' not in self.db.db.list_collection_names():
self.db.db.create_collection('images')
if 'images' not in self.db.list_collection_names():
self.db.create_collection('images')
# 创建索引
self.db.db.images.create_index([('hash', 1)], unique=True)
self.db.db.images.create_index([('url', 1)])
self.db.db.images.create_index([('path', 1)])
self.db.images.create_index([('hash', 1)], unique=True)
self.db.images.create_index([('url', 1)])
self.db.images.create_index([('path', 1)])
def _ensure_description_collection(self):
"""确保image_descriptions集合存在并创建索引"""
if 'image_descriptions' not in self.db.db.list_collection_names():
self.db.db.create_collection('image_descriptions')
if 'image_descriptions' not in self.db.list_collection_names():
self.db.create_collection('image_descriptions')
# 创建索引
self.db.db.image_descriptions.create_index([('hash', 1)], unique=True)
self.db.db.image_descriptions.create_index([('type', 1)])
self.db.image_descriptions.create_index([('hash', 1)], unique=True)
self.db.image_descriptions.create_index([('type', 1)])
def _get_description_from_db(self, image_hash: str, description_type: str) -> Optional[str]:
"""从数据库获取图片描述
@@ -69,7 +65,7 @@ class ImageManager:
Returns:
Optional[str]: 描述文本如果不存在则返回None
"""
result= self.db.db.image_descriptions.find_one({
result= self.db.image_descriptions.find_one({
'hash': image_hash,
'type': description_type
})
@@ -83,7 +79,7 @@ class ImageManager:
description: 描述文本
description_type: 描述类型 ('emoji''image')
"""
self.db.db.image_descriptions.update_one(
self.db.image_descriptions.update_one(
{'hash': image_hash, 'type': description_type},
{
'$set': {
@@ -125,7 +121,7 @@ class ImageManager:
image_hash = hashlib.md5(image_bytes).hexdigest()
# 查重
existing = self.db.db.images.find_one({'hash': image_hash})
existing = self.db.images.find_one({'hash': image_hash})
if existing:
return existing['path']
@@ -146,7 +142,7 @@ class ImageManager:
'description': description,
'timestamp': timestamp
}
self.db.db.images.insert_one(image_doc)
self.db.images.insert_one(image_doc)
return file_path
@@ -163,7 +159,7 @@ class ImageManager:
"""
try:
# 先查找是否已存在
existing = self.db.db.images.find_one({'url': url})
existing = self.db.images.find_one({'url': url})
if existing:
return existing['path']
@@ -207,7 +203,7 @@ class ImageManager:
Returns:
bool: 是否存在
"""
return self.db.db.images.find_one({'url': url}) is not None
return self.db.images.find_one({'url': url}) is not None
def check_hash_exists(self, image_data: Union[str, bytes], is_base64: bool = False) -> bool:
"""检查图像是否已存在
@@ -230,7 +226,7 @@ class ImageManager:
return False
image_hash = hashlib.md5(image_bytes).hexdigest()
return self.db.db.images.find_one({'hash': image_hash}) is not None
return self.db.images.find_one({'hash': image_hash}) is not None
except Exception as e:
logger.error(f"检查哈希失败: {str(e)}")
@@ -273,7 +269,7 @@ class ImageManager:
'description': description,
'timestamp': timestamp
}
self.db.db.images.update_one(
self.db.images.update_one(
{'hash': image_hash},
{'$set': image_doc},
upsert=True
@@ -330,7 +326,7 @@ class ImageManager:
'description': description,
'timestamp': timestamp
}
self.db.db.images.update_one(
self.db.images.update_one(
{'hash': image_hash},
{'$set': image_doc},
upsert=True

View File

@@ -1,13 +1,9 @@
import asyncio
from typing import Dict
from loguru import logger
from typing import Dict
from loguru import logger
from .config import global_config
from .message_base import UserInfo, GroupInfo
from .chat_stream import chat_manager,ChatStream
from .chat_stream import ChatStream
class WillingManager:

View File

@@ -0,0 +1,10 @@
from nonebot import get_app
from .api import router
from loguru import logger
# 获取主应用实例并挂载路由
app = get_app()
app.include_router(router, prefix="/api")
# 打印日志方便确认API已注册
logger.success("配置重载API已注册可通过 /api/reload-config 访问")

View File

@@ -0,0 +1,17 @@
from fastapi import APIRouter, HTTPException
from src.plugins.chat.config import BotConfig
import os
# 创建APIRouter而不是FastAPI实例
router = APIRouter()
@router.post("/reload-config")
async def reload_config():
try:
bot_config_path = os.path.join(BotConfig.get_config_dir(), "bot_config.toml")
global_config = BotConfig.load_config(config_path=bot_config_path)
return {"message": "配置重载成功", "status": "success"}
except FileNotFoundError as e:
raise HTTPException(status_code=404, detail=str(e))
except Exception as e:
raise HTTPException(status_code=500, detail=f"重载配置时发生错误: {str(e)}")

View File

@@ -0,0 +1,3 @@
import requests
response = requests.post("http://localhost:8080/api/reload-config")
print(response.json())

View File

@@ -1,199 +0,0 @@
import os
import sys
import time
import requests
from dotenv import load_dotenv
# 添加项目根目录到 Python 路径
root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
sys.path.append(root_path)
# 加载根目录下的env.edv文件
env_path = os.path.join(root_path, ".env.dev")
if not os.path.exists(env_path):
raise FileNotFoundError(f"配置文件不存在: {env_path}")
load_dotenv(env_path)
from src.common.database import Database
# 从环境变量获取配置
Database.initialize(
uri=os.getenv("MONGODB_URI"),
host=os.getenv("MONGODB_HOST", "127.0.0.1"),
port=int(os.getenv("MONGODB_PORT", "27017")),
db_name=os.getenv("DATABASE_NAME", "MegBot"),
username=os.getenv("MONGODB_USERNAME"),
password=os.getenv("MONGODB_PASSWORD"),
auth_source=os.getenv("MONGODB_AUTH_SOURCE"),
)
class KnowledgeLibrary:
def __init__(self):
self.db = Database.get_instance()
self.raw_info_dir = "data/raw_info"
self._ensure_dirs()
self.api_key = os.getenv("SILICONFLOW_KEY")
if not self.api_key:
raise ValueError("SILICONFLOW_API_KEY 环境变量未设置")
def _ensure_dirs(self):
"""确保必要的目录存在"""
os.makedirs(self.raw_info_dir, exist_ok=True)
def get_embedding(self, text: str) -> list:
"""获取文本的embedding向量"""
url = "https://api.siliconflow.cn/v1/embeddings"
payload = {
"model": "BAAI/bge-m3",
"input": text,
"encoding_format": "float"
}
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
response = requests.post(url, json=payload, headers=headers)
if response.status_code != 200:
print(f"获取embedding失败: {response.text}")
return None
return response.json()['data'][0]['embedding']
def process_files(self):
"""处理raw_info目录下的所有txt文件"""
for filename in os.listdir(self.raw_info_dir):
if filename.endswith('.txt'):
file_path = os.path.join(self.raw_info_dir, filename)
self.process_single_file(file_path)
def process_single_file(self, file_path: str):
"""处理单个文件"""
try:
# 检查文件是否已处理
if self.db.db.processed_files.find_one({"file_path": file_path}):
print(f"文件已处理过,跳过: {file_path}")
return
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 按1024字符分段
segments = [content[i:i+600] for i in range(0, len(content), 300)]
# 处理每个分段
for segment in segments:
if not segment.strip(): # 跳过空段
continue
# 获取embedding
embedding = self.get_embedding(segment)
if not embedding:
continue
# 存储到数据库
doc = {
"content": segment,
"embedding": embedding,
"file_path": file_path,
"segment_length": len(segment)
}
# 使用文本内容的哈希值作为唯一标识
content_hash = hash(segment)
# 更新或插入文档
self.db.db.knowledges.update_one(
{"content_hash": content_hash},
{"$set": doc},
upsert=True
)
# 记录文件已处理
self.db.db.processed_files.insert_one({
"file_path": file_path,
"processed_time": time.time()
})
print(f"成功处理文件: {file_path}")
except Exception as e:
print(f"处理文件 {file_path} 时出错: {str(e)}")
def search_similar_segments(self, query: str, limit: int = 5) -> list:
"""搜索与查询文本相似的片段"""
query_embedding = self.get_embedding(query)
if not query_embedding:
return []
# 使用余弦相似度计算
pipeline = [
{
"$addFields": {
"dotProduct": {
"$reduce": {
"input": {"$range": [0, {"$size": "$embedding"}]},
"initialValue": 0,
"in": {
"$add": [
"$$value",
{"$multiply": [
{"$arrayElemAt": ["$embedding", "$$this"]},
{"$arrayElemAt": [query_embedding, "$$this"]}
]}
]
}
}
},
"magnitude1": {
"$sqrt": {
"$reduce": {
"input": "$embedding",
"initialValue": 0,
"in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}
}
}
},
"magnitude2": {
"$sqrt": {
"$reduce": {
"input": query_embedding,
"initialValue": 0,
"in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}
}
}
}
}
},
{
"$addFields": {
"similarity": {
"$divide": ["$dotProduct", {"$multiply": ["$magnitude1", "$magnitude2"]}]
}
}
},
{"$sort": {"similarity": -1}},
{"$limit": limit},
{"$project": {"content": 1, "similarity": 1, "file_path": 1}}
]
results = list(self.db.db.knowledges.aggregate(pipeline))
return results
# 创建单例实例
knowledge_library = KnowledgeLibrary()
if __name__ == "__main__":
# 测试知识库功能
print("开始处理知识库文件...")
knowledge_library.process_files()
# 测试搜索功能
test_query = "麦麦评价一下僕と花"
print(f"\n搜索与'{test_query}'相似的内容:")
results = knowledge_library.search_similar_segments(test_query)
for result in results:
print(f"相似度: {result['similarity']:.4f}")
print(f"内容: {result['content'][:100]}...")
print("-" * 50)

View File

@@ -96,7 +96,7 @@ class Memory_graph:
dot_data = {
"concept": node
}
self.db.db.store_memory_dots.insert_one(dot_data)
self.db.store_memory_dots.insert_one(dot_data)
@property
def dots(self):
@@ -106,7 +106,7 @@ class Memory_graph:
def get_random_chat_from_db(self, length: int, timestamp: str):
# 从数据库中根据时间戳获取离其最近的聊天记录
chat_text = ''
closest_record = self.db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)]) # 调试输出
closest_record = self.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)]) # 调试输出
logger.info(
f"距离time最近的消息时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(closest_record['time'])))}")
@@ -115,7 +115,7 @@ class Memory_graph:
group_id = closest_record['group_id'] # 获取groupid
# 获取该时间戳之后的length条消息且groupid相同
chat_record = list(
self.db.db.messages.find({"time": {"$gt": closest_time}, "group_id": group_id}).sort('time', 1).limit(
self.db.messages.find({"time": {"$gt": closest_time}, "group_id": group_id}).sort('time', 1).limit(
length))
for record in chat_record:
time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(record['time'])))
@@ -130,34 +130,34 @@ class Memory_graph:
def save_graph_to_db(self):
# 清空现有的图数据
self.db.db.graph_data.delete_many({})
self.db.graph_data.delete_many({})
# 保存节点
for node in self.G.nodes(data=True):
node_data = {
'concept': node[0],
'memory_items': node[1].get('memory_items', []) # 默认为空列表
}
self.db.db.graph_data.nodes.insert_one(node_data)
self.db.graph_data.nodes.insert_one(node_data)
# 保存边
for edge in self.G.edges():
edge_data = {
'source': edge[0],
'target': edge[1]
}
self.db.db.graph_data.edges.insert_one(edge_data)
self.db.graph_data.edges.insert_one(edge_data)
def load_graph_from_db(self):
# 清空当前图
self.G.clear()
# 加载节点
nodes = self.db.db.graph_data.nodes.find()
nodes = self.db.graph_data.nodes.find()
for node in nodes:
memory_items = node.get('memory_items', [])
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
self.G.add_node(node['concept'], memory_items=memory_items)
# 加载边
edges = self.db.db.graph_data.edges.find()
edges = self.db.graph_data.edges.find()
for edge in edges:
self.G.add_edge(edge['source'], edge['target'])

View File

@@ -303,7 +303,7 @@ class Hippocampus:
return topic_num
async def operation_build_memory(self, chat_size=20):
time_frequency = {'near': 3, 'mid': 8, 'far': 5}
time_frequency = {'near': 1, 'mid': 4, 'far': 4}
memory_samples = self.get_memory_sample(chat_size, time_frequency)
for i, messages in enumerate(memory_samples, 1):
@@ -315,7 +315,7 @@ class Hippocampus:
bar = '' * filled_length + '-' * (bar_length - filled_length)
logger.debug(f"进度: [{bar}] {progress:.1f}% ({i}/{len(memory_samples)})")
compress_rate = 0.1
compress_rate = global_config.memory_compress_rate
compressed_memory, similar_topics_dict = await self.memory_compress(messages, compress_rate)
logger.info(f"压缩后记忆数量: {len(compressed_memory)},似曾相识的话题: {len(similar_topics_dict)}")
@@ -523,9 +523,14 @@ class Hippocampus:
async def operation_forget_topic(self, percentage=0.1):
"""随机选择图中一定比例的节点和边进行检查,根据时间条件决定是否遗忘"""
# 检查数据库是否为空
all_nodes = list(self.memory_graph.G.nodes())
all_edges = list(self.memory_graph.G.edges())
if not all_nodes and not all_edges:
logger.info("记忆图为空,无需进行遗忘操作")
return
check_nodes_count = max(1, int(len(all_nodes) * percentage))
check_edges_count = max(1, int(len(all_edges) * percentage))
@@ -546,7 +551,7 @@ class Hippocampus:
# print(f"float(last_modified):{float(last_modified)}" )
# print(f"current_time:{current_time}")
# print(f"current_time - last_modified:{current_time - last_modified}")
if current_time - last_modified > 3600*24: # test
if current_time - last_modified > 3600*global_config.memory_forget_time: # test
current_strength = edge_data.get('strength', 1)
new_strength = current_strength - 1
@@ -887,15 +892,6 @@ config = driver.config
start_time = time.time()
Database.initialize(
uri=os.getenv("MONGODB_URI"),
host=os.getenv("MONGODB_HOST", "127.0.0.1"),
port=int(os.getenv("MONGODB_PORT", "27017")),
db_name=os.getenv("DATABASE_NAME", "MegBot"),
username=os.getenv("MONGODB_USERNAME"),
password=os.getenv("MONGODB_PASSWORD"),
auth_source=os.getenv("MONGODB_AUTH_SOURCE"),
)
# 创建记忆图
memory_graph = Memory_graph()
# 创建海马体

View File

@@ -10,7 +10,6 @@ from pathlib import Path
import matplotlib.pyplot as plt
import networkx as nx
import pymongo
from dotenv import load_dotenv
from loguru import logger
import jieba

View File

@@ -41,10 +41,10 @@ class LLM_request:
"""初始化数据库集合"""
try:
# 创建llm_usage集合的索引
self.db.db.llm_usage.create_index([("timestamp", 1)])
self.db.db.llm_usage.create_index([("model_name", 1)])
self.db.db.llm_usage.create_index([("user_id", 1)])
self.db.db.llm_usage.create_index([("request_type", 1)])
self.db.llm_usage.create_index([("timestamp", 1)])
self.db.llm_usage.create_index([("model_name", 1)])
self.db.llm_usage.create_index([("user_id", 1)])
self.db.llm_usage.create_index([("request_type", 1)])
except Exception:
logger.error("创建数据库索引失败")
@@ -73,7 +73,7 @@ class LLM_request:
"status": "success",
"timestamp": datetime.now()
}
self.db.db.llm_usage.insert_one(usage_data)
self.db.llm_usage.insert_one(usage_data)
logger.info(
f"Token使用情况 - 模型: {self.model_name}, "
f"用户: {user_id}, 类型: {request_type}, "

View File

@@ -14,16 +14,6 @@ from ..models.utils_model import LLM_request
driver = get_driver()
config = driver.config
Database.initialize(
uri=os.getenv("MONGODB_URI"),
host=os.getenv("MONGODB_HOST", "127.0.0.1"),
port=int(os.getenv("MONGODB_PORT", "27017")),
db_name=os.getenv("DATABASE_NAME", "MegBot"),
username=os.getenv("MONGODB_USERNAME"),
password=os.getenv("MONGODB_PASSWORD"),
auth_source=os.getenv("MONGODB_AUTH_SOURCE"),
)
class ScheduleGenerator:
def __init__(self):
# 根据global_config.llm_normal这一字典配置指定模型
@@ -56,7 +46,7 @@ class ScheduleGenerator:
schedule_text = str
existing_schedule = self.db.db.schedule.find_one({"date": date_str})
existing_schedule = self.db.schedule.find_one({"date": date_str})
if existing_schedule:
logger.debug(f"{date_str}的日程已存在:")
schedule_text = existing_schedule["schedule"]
@@ -73,7 +63,7 @@ class ScheduleGenerator:
try:
schedule_text, _ = await self.llm_scheduler.generate_response(prompt)
self.db.db.schedule.insert_one({"date": date_str, "schedule": schedule_text})
self.db.schedule.insert_one({"date": date_str, "schedule": schedule_text})
except Exception as e:
logger.error(f"生成日程失败: {str(e)}")
schedule_text = "生成日程时出错了"
@@ -153,7 +143,7 @@ class ScheduleGenerator:
"""打印完整的日程安排"""
if not self._parse_schedule(self.today_schedule_text):
logger.warning("今日日程有误,将在下次运行时重新生成")
self.db.db.schedule.delete_one({"date": datetime.datetime.now().strftime("%Y-%m-%d")})
self.db.schedule.delete_one({"date": datetime.datetime.now().strftime("%Y-%m-%d")})
else:
logger.info("=== 今日日程安排 ===")
for time_str, activity in self.today_schedule.items():

View File

@@ -53,7 +53,7 @@ class LLMStatistics:
"costs_by_model": defaultdict(float)
}
cursor = self.db.db.llm_usage.find({
cursor = self.db.llm_usage.find({
"timestamp": {"$gte": start_time}
})

View File

@@ -0,0 +1,383 @@
import os
import sys
import time
import requests
from dotenv import load_dotenv
import hashlib
from datetime import datetime
from tqdm import tqdm
from rich.console import Console
from rich.table import Table
# 添加项目根目录到 Python 路径
root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
sys.path.append(root_path)
# 现在可以导入src模块
from src.common.database import Database
# 加载根目录下的env.edv文件
env_path = os.path.join(root_path, ".env.prod")
if not os.path.exists(env_path):
raise FileNotFoundError(f"配置文件不存在: {env_path}")
load_dotenv(env_path)
class KnowledgeLibrary:
def __init__(self):
# 初始化数据库连接
if Database._instance is None:
Database.initialize(
uri=os.getenv("MONGODB_URI"),
host=os.getenv("MONGODB_HOST", "127.0.0.1"),
port=int(os.getenv("MONGODB_PORT", "27017")),
db_name=os.getenv("DATABASE_NAME", "MegBot"),
username=os.getenv("MONGODB_USERNAME"),
password=os.getenv("MONGODB_PASSWORD"),
auth_source=os.getenv("MONGODB_AUTH_SOURCE"),
)
self.db = Database.get_instance()
self.raw_info_dir = "data/raw_info"
self._ensure_dirs()
self.api_key = os.getenv("SILICONFLOW_KEY")
if not self.api_key:
raise ValueError("SILICONFLOW_API_KEY 环境变量未设置")
self.console = Console()
def _ensure_dirs(self):
"""确保必要的目录存在"""
os.makedirs(self.raw_info_dir, exist_ok=True)
def read_file(self, file_path: str) -> str:
"""读取文件内容"""
with open(file_path, 'r', encoding='utf-8') as f:
return f.read()
def split_content(self, content: str, max_length: int = 512) -> list:
"""将内容分割成适当大小的块,保持段落完整性
Args:
content: 要分割的文本内容
max_length: 每个块的最大长度
Returns:
list: 分割后的文本块列表
"""
# 首先按段落分割
paragraphs = [p.strip() for p in content.split('\n\n') if p.strip()]
chunks = []
current_chunk = []
current_length = 0
for para in paragraphs:
para_length = len(para)
# 如果单个段落就超过最大长度
if para_length > max_length:
# 如果当前chunk不为空先保存
if current_chunk:
chunks.append('\n'.join(current_chunk))
current_chunk = []
current_length = 0
# 将长段落按句子分割
sentences = [s.strip() for s in para.replace('', '\n').replace('', '\n').replace('', '\n').split('\n') if s.strip()]
temp_chunk = []
temp_length = 0
for sentence in sentences:
sentence_length = len(sentence)
if sentence_length > max_length:
# 如果单个句子超长,强制按长度分割
if temp_chunk:
chunks.append('\n'.join(temp_chunk))
temp_chunk = []
temp_length = 0
for i in range(0, len(sentence), max_length):
chunks.append(sentence[i:i + max_length])
elif temp_length + sentence_length + 1 <= max_length:
temp_chunk.append(sentence)
temp_length += sentence_length + 1
else:
chunks.append('\n'.join(temp_chunk))
temp_chunk = [sentence]
temp_length = sentence_length
if temp_chunk:
chunks.append('\n'.join(temp_chunk))
# 如果当前段落加上现有chunk不超过最大长度
elif current_length + para_length + 1 <= max_length:
current_chunk.append(para)
current_length += para_length + 1
else:
# 保存当前chunk并开始新的chunk
chunks.append('\n'.join(current_chunk))
current_chunk = [para]
current_length = para_length
# 添加最后一个chunk
if current_chunk:
chunks.append('\n'.join(current_chunk))
return chunks
def get_embedding(self, text: str) -> list:
"""获取文本的embedding向量"""
url = "https://api.siliconflow.cn/v1/embeddings"
payload = {
"model": "BAAI/bge-m3",
"input": text,
"encoding_format": "float"
}
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
response = requests.post(url, json=payload, headers=headers)
if response.status_code != 200:
print(f"获取embedding失败: {response.text}")
return None
return response.json()['data'][0]['embedding']
def process_files(self, knowledge_length:int=512):
"""处理raw_info目录下的所有txt文件"""
txt_files = [f for f in os.listdir(self.raw_info_dir) if f.endswith('.txt')]
if not txt_files:
self.console.print("[red]警告:在 {} 目录下没有找到任何txt文件[/red]".format(self.raw_info_dir))
self.console.print("[yellow]请将需要处理的文本文件放入该目录后再运行程序[/yellow]")
return
total_stats = {
"processed_files": 0,
"total_chunks": 0,
"failed_files": [],
"skipped_files": []
}
self.console.print(f"\n[bold blue]开始处理知识库文件 - 共{len(txt_files)}个文件[/bold blue]")
for filename in tqdm(txt_files, desc="处理文件进度"):
file_path = os.path.join(self.raw_info_dir, filename)
result = self.process_single_file(file_path, knowledge_length)
self._update_stats(total_stats, result, filename)
self._display_processing_results(total_stats)
def process_single_file(self, file_path: str, knowledge_length: int = 512):
"""处理单个文件"""
result = {
"status": "success",
"chunks_processed": 0,
"error": None
}
try:
current_hash = self.calculate_file_hash(file_path)
processed_record = self.db.db.processed_files.find_one({"file_path": file_path})
if processed_record:
if processed_record.get("hash") == current_hash:
if knowledge_length in processed_record.get("split_by", []):
result["status"] = "skipped"
return result
content = self.read_file(file_path)
chunks = self.split_content(content, knowledge_length)
for chunk in tqdm(chunks, desc=f"处理 {os.path.basename(file_path)} 的文本块", leave=False):
embedding = self.get_embedding(chunk)
if embedding:
knowledge = {
"content": chunk,
"embedding": embedding,
"source_file": file_path,
"split_length": knowledge_length,
"created_at": datetime.now()
}
self.db.db.knowledges.insert_one(knowledge)
result["chunks_processed"] += 1
split_by = processed_record.get("split_by", []) if processed_record else []
if knowledge_length not in split_by:
split_by.append(knowledge_length)
self.db.db.processed_files.update_one(
{"file_path": file_path},
{
"$set": {
"hash": current_hash,
"last_processed": datetime.now(),
"split_by": split_by
}
},
upsert=True
)
except Exception as e:
result["status"] = "failed"
result["error"] = str(e)
return result
def _update_stats(self, total_stats, result, filename):
"""更新总体统计信息"""
if result["status"] == "success":
total_stats["processed_files"] += 1
total_stats["total_chunks"] += result["chunks_processed"]
elif result["status"] == "failed":
total_stats["failed_files"].append((filename, result["error"]))
elif result["status"] == "skipped":
total_stats["skipped_files"].append(filename)
def _display_processing_results(self, stats):
"""显示处理结果统计"""
self.console.print("\n[bold green]处理完成!统计信息如下:[/bold green]")
table = Table(show_header=True, header_style="bold magenta")
table.add_column("统计项", style="dim")
table.add_column("数值")
table.add_row("成功处理文件数", str(stats["processed_files"]))
table.add_row("处理的知识块总数", str(stats["total_chunks"]))
table.add_row("跳过的文件数", str(len(stats["skipped_files"])))
table.add_row("失败的文件数", str(len(stats["failed_files"])))
self.console.print(table)
if stats["failed_files"]:
self.console.print("\n[bold red]处理失败的文件:[/bold red]")
for filename, error in stats["failed_files"]:
self.console.print(f"[red]- {filename}: {error}[/red]")
if stats["skipped_files"]:
self.console.print("\n[bold yellow]跳过的文件(已处理):[/bold yellow]")
for filename in stats["skipped_files"]:
self.console.print(f"[yellow]- {filename}[/yellow]")
def calculate_file_hash(self, file_path):
"""计算文件的MD5哈希值"""
hash_md5 = hashlib.md5()
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def search_similar_segments(self, query: str, limit: int = 5) -> list:
"""搜索与查询文本相似的片段"""
query_embedding = self.get_embedding(query)
if not query_embedding:
return []
# 使用余弦相似度计算
pipeline = [
{
"$addFields": {
"dotProduct": {
"$reduce": {
"input": {"$range": [0, {"$size": "$embedding"}]},
"initialValue": 0,
"in": {
"$add": [
"$$value",
{"$multiply": [
{"$arrayElemAt": ["$embedding", "$$this"]},
{"$arrayElemAt": [query_embedding, "$$this"]}
]}
]
}
}
},
"magnitude1": {
"$sqrt": {
"$reduce": {
"input": "$embedding",
"initialValue": 0,
"in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}
}
}
},
"magnitude2": {
"$sqrt": {
"$reduce": {
"input": query_embedding,
"initialValue": 0,
"in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}
}
}
}
}
},
{
"$addFields": {
"similarity": {
"$divide": ["$dotProduct", {"$multiply": ["$magnitude1", "$magnitude2"]}]
}
}
},
{"$sort": {"similarity": -1}},
{"$limit": limit},
{"$project": {"content": 1, "similarity": 1, "file_path": 1}}
]
results = list(self.db.db.knowledges.aggregate(pipeline))
return results
# 创建单例实例
knowledge_library = KnowledgeLibrary()
if __name__ == "__main__":
console = Console()
console.print("[bold green]知识库处理工具[/bold green]")
while True:
console.print("\n请选择要执行的操作:")
console.print("[1] 麦麦开始学习")
console.print("[2] 麦麦全部忘光光(仅知识)")
console.print("[q] 退出程序")
choice = input("\n请输入选项: ").strip()
if choice.lower() == 'q':
console.print("[yellow]程序退出[/yellow]")
sys.exit(0)
elif choice == '2':
confirm = input("确定要删除所有知识吗?这个操作不可撤销!(y/n): ").strip().lower()
if confirm == 'y':
knowledge_library.db.db.knowledges.delete_many({})
console.print("[green]已清空所有知识![/green]")
continue
elif choice == '1':
if not os.path.exists(knowledge_library.raw_info_dir):
console.print(f"[yellow]创建目录:{knowledge_library.raw_info_dir}[/yellow]")
os.makedirs(knowledge_library.raw_info_dir, exist_ok=True)
# 询问分割长度
while True:
try:
length_input = input("请输入知识分割长度默认512输入q退出回车使用默认值: ").strip()
if length_input.lower() == 'q':
break
if not length_input: # 如果直接回车,使用默认值
knowledge_length = 512
break
knowledge_length = int(length_input)
if knowledge_length <= 0:
print("分割长度必须大于0请重新输入")
continue
break
except ValueError:
print("请输入有效的数字")
continue
if length_input.lower() == 'q':
continue
# 测试知识库功能
print(f"开始处理知识库文件,使用分割长度: {knowledge_length}...")
knowledge_library.process_files(knowledge_length=knowledge_length)
else:
console.print("[red]无效的选项,请重新选择[/red]")
continue

View File

@@ -1,5 +1,5 @@
[inner]
version = "0.0.6"
version = "0.0.8"
#如果你想要修改配置文件请在修改后将version的值进行变更
#如果新增项目请在BotConfig类下新增相应的变量
@@ -65,8 +65,13 @@ model_r1_distill_probability = 0.1 # 麦麦回答时选择次要回复模型3
max_response_length = 1024 # 麦麦回答的最大token数
[memory]
build_memory_interval = 300 # 记忆构建间隔 单位秒
forget_memory_interval = 300 # 记忆遗忘间隔 单位秒
build_memory_interval = 600 # 记忆构建间隔 单位秒 间隔越低,麦麦学习越多,但是冗余信息也会增多
memory_compress_rate = 0.1 # 记忆压缩率 控制记忆精简程度 建议保持默认,调高可以获得更多信息,但是冗余信息也会增多
forget_memory_interval = 600 # 记忆遗忘间隔 单位秒 间隔越低,麦麦遗忘越频繁,记忆更精简,但更难学习
memory_forget_time = 24 #多长时间后的记忆会被遗忘 单位小时
memory_forget_percentage = 0.01 # 记忆遗忘比例 控制记忆遗忘程度 越大遗忘越多 建议保持默认
memory_ban_words = [ #不希望记忆的词
# "403","张三"
@@ -101,6 +106,7 @@ word_replace_rate=0.006 # 整词替换概率
enable_advance_output = true # 是否启用高级输出
enable_kuuki_read = true # 是否启用读空气功能
enable_debug_output = false # 是否启用调试输出
enable_friend_chat = false # 是否启用好友聊天
[groups]
talk_allowed = [

View File

@@ -0,0 +1,4 @@
更新版本后建议删除数据库messages中所有内容不然会出现报错
该操作不会影响你的记忆
如果显示配置文件版本过低运行根目录的bat

View File

@@ -0,0 +1,45 @@
@echo off
setlocal enabledelayedexpansion
chcp 65001
cd /d %~dp0
echo =====================================
echo 选择Python环境:
echo 1 - venv (推荐)
echo 2 - conda
echo =====================================
choice /c 12 /n /m "输入数字(1或2): "
if errorlevel 2 (
echo =====================================
set "CONDA_ENV="
set /p CONDA_ENV="请输入要激活的 conda 环境名称: "
:: 检查输入是否为空
if "!CONDA_ENV!"=="" (
echo 错误:环境名称不能为空
pause
exit /b 1
)
call conda activate !CONDA_ENV!
if errorlevel 1 (
echo 激活 conda 环境失败
pause
exit /b 1
)
echo Conda 环境 "!CONDA_ENV!" 激活成功
python config/auto_update.py
) else (
if exist "venv\Scripts\python.exe" (
venv\Scripts\python config/auto_update.py
) else (
echo =====================================
echo 错误: venv环境不存在请先创建虚拟环境
pause
exit /b 1
)
)
endlocal
pause

45
麦麦开始学习.bat Normal file
View File

@@ -0,0 +1,45 @@
@echo off
setlocal enabledelayedexpansion
chcp 65001
cd /d %~dp0
echo =====================================
echo 选择Python环境:
echo 1 - venv (推荐)
echo 2 - conda
echo =====================================
choice /c 12 /n /m "输入数字(1或2): "
if errorlevel 2 (
echo =====================================
set "CONDA_ENV="
set /p CONDA_ENV="请输入要激活的 conda 环境名称: "
:: 检查输入是否为空
if "!CONDA_ENV!"=="" (
echo 错误:环境名称不能为空
pause
exit /b 1
)
call conda activate !CONDA_ENV!
if errorlevel 1 (
echo 激活 conda 环境失败
pause
exit /b 1
)
echo Conda 环境 "!CONDA_ENV!" 激活成功
python src/plugins/zhishi/knowledge_library.py
) else (
if exist "venv\Scripts\python.exe" (
venv\Scripts\python src/plugins/zhishi/knowledge_library.py
) else (
echo =====================================
echo 错误: venv环境不存在请先创建虚拟环境
pause
exit /b 1
)
)
endlocal
pause