Merge remote-tracking branch 'upstream/debug' into tc_refractor

This commit is contained in:
Rikki
2025-03-11 06:01:54 +08:00
47 changed files with 3696 additions and 1392 deletions

View File

@@ -5,6 +5,7 @@ on:
branches:
- main
- debug # 新增 debug 分支触发
- stable-dev
tags:
- 'v*'
workflow_dispatch:
@@ -34,6 +35,8 @@ jobs:
echo "tags=${{ secrets.DOCKERHUB_USERNAME }}/maimbot:main,${{ secrets.DOCKERHUB_USERNAME }}/maimbot:latest" >> $GITHUB_OUTPUT
elif [ "${{ github.ref }}" == "refs/heads/debug" ]; then
echo "tags=${{ secrets.DOCKERHUB_USERNAME }}/maimbot:debug" >> $GITHUB_OUTPUT
elif [ "${{ github.ref }}" == "refs/heads/stable-dev" ]; then
echo "tags=${{ secrets.DOCKERHUB_USERNAME }}/maimbot:stable-dev" >> $GITHUB_OUTPUT
fi
- name: Build and Push Docker Image

5
.gitignore vendored
View File

@@ -193,9 +193,8 @@ cython_debug/
# jieba
jieba.cache
# vscode
/.vscode
# .vscode
!.vscode/settings.json
# direnv
/.direnv

View File

@@ -3,7 +3,7 @@
<div align="center">
![Python Version](https://img.shields.io/badge/Python-3.x-blue)
![Python Version](https://img.shields.io/badge/Python-3.9+-blue)
![License](https://img.shields.io/github/license/SengokuCola/MaiMBot)
![Status](https://img.shields.io/badge/状态-开发中-yellow)
@@ -29,15 +29,21 @@
</a>
</div>
> ⚠️ **注意事项**
> [!WARNING]
> - 项目处于活跃开发阶段,代码可能随时更改
> - 文档未完善,有问题可以提交 Issue 或者 Discussion
> - QQ机器人存在被限制风险请自行了解谨慎使用
> - 由于持续迭代可能存在一些已知或未知的bug
> - 由于开发中可能消耗较多token
**交流群**: 766798517 一群人较多,建议加下面的(开发和建议相关讨论)不一定有空回复,会优先写文档和代码
**交流群**: 571780722 另一个群(开发和建议相关讨论)不一定有空回复,会优先写文档和代码
## 💬交流群
- [一群](https://qm.qq.com/q/VQ3XZrWgMs) 766798517 ,建议加下面的(开发和建议相关讨论)不一定有空回复,会优先写文档和代码
- [二群](https://qm.qq.com/q/RzmCiRtHEW) 571780722 (开发和建议相关讨论)不一定有空回复,会优先写文档和代码
- [三群](https://qm.qq.com/q/wlH5eT8OmQ) 1035228475开发和建议相关讨论不一定有空回复会优先写文档和代码
**其他平台版本**
- (由 [CabLate](https://github.com/cablate) 贡献) [Telegram 与其他平台(未来可能会有)的版本](https://github.com/cablate/MaiMBot/tree/telegram) - [集中讨论串](https://github.com/SengokuCola/MaiMBot/discussions/149)
##
<div align="left">
@@ -46,11 +52,16 @@
### 部署方式
如果你不知道Docker是什么建议寻找相关教程或使用手动部署
- 📦 **Windows 一键傻瓜式部署**:请运行项目根目录中的 `run.bat`,部署完成后请参照后续配置指南进行配置
- [📦 Windows 手动部署指南 ](docs/manual_deploy_windows.md)
- [📦 Linux 手动部署指南 ](docs/manual_deploy_linux.md)
如果你不知道Docker是什么建议寻找相关教程或使用手动部署 **现在不建议使用docker更新慢可能不适配**
- [🐳 Docker部署指南](docs/docker_deploy.md)
- [📦 手动部署指南](docs/manual_deploy.md)
### 配置说明
- [🎀 新手配置指南](docs/installation_cute.md) - 通俗易懂的配置教程,适合初次使用的猫娘
@@ -129,9 +140,10 @@
## 📌 注意事项
SengokuCola纯编程外行面向cursor编程很多代码史一样多多包涵
> ⚠️ **警告**:本应用生成内容来自人工智能模型,由 AI 生成请仔细甄别请勿用于违反法律的用途AI生成内容不代表本人观点和立场。
SengokuCola纯编程外行面向cursor编程很多代码史一样多多包涵
> [!WARNING]
> 本应用生成内容来自人工智能模型,由 AI 生成请仔细甄别请勿用于违反法律的用途AI生成内容不代表本人观点和立场。
## 致谢
[nonebot2](https://github.com/nonebot/nonebot2): 跨平台 Python 异步聊天机器人框架
@@ -142,7 +154,7 @@ SengokuCola纯编程外行面向cursor编程很多代码史一样多多包
感谢各位大佬!
<a href="https://github.com/SengokuCola/MaiMBot/graphs/contributors">
<img src="https://contrib.rocks/image?repo=SengokuCola/MaiMBot&time=true" />
<img src="https://contrib.rocks/image?repo=SengokuCola/MaiMBot" />
</a>

183
bot.py
View File

@@ -1,12 +1,26 @@
import asyncio
import os
import shutil
import sys
import nonebot
import time
import uvicorn
from dotenv import load_dotenv
from loguru import logger
from nonebot.adapters.onebot.v11 import Adapter
import platform
'''彩蛋'''
from colorama import Fore, init
# 获取没有加载env时的环境变量
env_mask = {key: os.getenv(key) for key in os.environ}
uvicorn_server = None
def easter_egg():
# 彩蛋
from colorama import init, Fore
init()
text = "多年以后面对AI行刑队张三将会回想起他2023年在会议上讨论人工智能的那个下午"
@@ -15,12 +29,13 @@ rainbow_text = ""
for i, char in enumerate(text):
rainbow_text += rainbow_colors[i % len(rainbow_colors)] + char
print(rainbow_text)
'''彩蛋'''
def init_config():
# 初次启动检测
if not os.path.exists("config/bot_config.toml"):
logger.warning("检测到bot_config.toml不存在正在从模板复制")
import shutil
# 检查config目录是否存在
if not os.path.exists("config"):
os.makedirs("config")
@@ -29,6 +44,8 @@ if not os.path.exists("config/bot_config.toml"):
shutil.copy("template/bot_config_template.toml", "config/bot_config.toml")
logger.info("复制完成请修改config/bot_config.toml和.env.prod中的配置后重新启动")
def init_env():
# 初始化.env 默认ENVIRONMENT=prod
if not os.path.exists(".env"):
with open(".env", "w") as f:
@@ -39,33 +56,142 @@ if not os.path.exists(".env"):
logger.error("检测到.env.prod文件不存在")
shutil.copy("template.env", "./.env.prod")
# 检测.env.dev文件是否存在不存在的话直接复制生产环境配置
if not os.path.exists(".env.dev"):
logger.error("检测到.env.dev文件不存在")
shutil.copy(".env.prod", "./.env.dev")
# 首先加载基础环境变量.env
if os.path.exists(".env"):
load_dotenv(".env")
logger.success("成功加载基础环境变量配置")
# 根据 ENVIRONMENT 加载对应的环境配置
if os.getenv("ENVIRONMENT") == "prod":
def load_env():
# 使用闭包实现对加载器的横向扩展,避免大量重复判断
def prod():
logger.success("加载生产环境变量配置")
load_dotenv(".env.prod", override=True) # override=True 允许覆盖已存在的环境变量
elif os.getenv("ENVIRONMENT") == "dev":
def dev():
logger.success("加载开发环境变量配置")
load_dotenv(".env.dev", override=True) # override=True 允许覆盖已存在的环境变量
elif os.path.exists(f".env.{os.getenv('ENVIRONMENT')}"):
logger.success(f"加载{os.getenv('ENVIRONMENT')}环境变量配置")
load_dotenv(f".env.{os.getenv('ENVIRONMENT')}", override=True) # override=True 允许覆盖已存在的环境变量
fn_map = {
"prod": prod,
"dev": dev
}
env = os.getenv("ENVIRONMENT")
logger.info(f"[load_env] 当前的 ENVIRONMENT 变量值:{env}")
if env in fn_map:
fn_map[env]() # 根据映射执行闭包函数
elif os.path.exists(f".env.{env}"):
logger.success(f"加载{env}环境变量配置")
load_dotenv(f".env.{env}", override=True) # override=True 允许覆盖已存在的环境变量
else:
logger.error(f"ENVIRONMENT配置错误请检查.env文件中的ENVIRONMENT变量对应.env.{os.getenv('ENVIRONMENT')}是否存在")
exit(1)
logger.error(f"ENVIRONMENT 配置错误,请检查 .env 文件中的 ENVIRONMENT 变量对应 .env.{env} 是否存在")
RuntimeError(f"ENVIRONMENT 配置错误,请检查 .env 文件中的 ENVIRONMENT 变量及对应 .env.{env} 是否存在")
# 检测Key是否存在
if not os.getenv("SILICONFLOW_KEY"):
logger.error("缺失必要的API KEY")
logger.error(f"请至少在.env.{os.getenv('ENVIRONMENT')}文件中填写SILICONFLOW_KEY后重新启动")
exit(1)
# 获取所有环境变量
def load_logger():
logger.remove() # 移除默认配置
logger.add(
sys.stderr,
format="<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> <fg #777777>|</> <level>{level: <7}</level> <fg "
"#777777>|</> <cyan>{name:.<8}</cyan>:<cyan>{function:.<8}</cyan>:<cyan>{line: >4}</cyan> <fg "
"#777777>-</> <level>{message}</level>",
colorize=True,
level=os.getenv("LOG_LEVEL", "INFO"), # 根据环境设置日志级别默认为INFO
filter=lambda record: "nonebot" not in record["name"]
)
def scan_provider(env_config: dict):
provider = {}
# 利用未初始化 env 时获取的 env_mask 来对新的环境变量集去重
# 避免 GPG_KEY 这样的变量干扰检查
env_config = dict(filter(lambda item: item[0] not in env_mask, env_config.items()))
# 遍历 env_config 的所有键
for key in env_config:
# 检查键是否符合 {provider}_BASE_URL 或 {provider}_KEY 的格式
if key.endswith("_BASE_URL") or key.endswith("_KEY"):
# 提取 provider 名称
provider_name = key.split("_", 1)[0] # 从左分割一次,取第一部分
# 初始化 provider 的字典(如果尚未初始化)
if provider_name not in provider:
provider[provider_name] = {"url": None, "key": None}
# 根据键的类型填充 url 或 key
if key.endswith("_BASE_URL"):
provider[provider_name]["url"] = env_config[key]
elif key.endswith("_KEY"):
provider[provider_name]["key"] = env_config[key]
# 检查每个 provider 是否同时存在 url 和 key
for provider_name, config in provider.items():
if config["url"] is None or config["key"] is None:
logger.error(
f"provider 内容:{config}\n"
f"env_config 内容:{env_config}"
)
raise ValueError(f"请检查 '{provider_name}' 提供商配置是否丢失 BASE_URL 或 KEY 环境变量")
async def graceful_shutdown():
try:
global uvicorn_server
if uvicorn_server:
uvicorn_server.force_exit = True # 强制退出
await uvicorn_server.shutdown()
tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
for task in tasks:
task.cancel()
await asyncio.gather(*tasks, return_exceptions=True)
except Exception as e:
logger.error(f"麦麦关闭失败: {e}")
async def uvicorn_main():
global uvicorn_server
config = uvicorn.Config(
app="__main__:app",
host=os.getenv("HOST", "127.0.0.1"),
port=int(os.getenv("PORT", 8080)),
reload=os.getenv("ENVIRONMENT") == "dev",
timeout_graceful_shutdown=5,
log_config=None,
access_log=False
)
server = uvicorn.Server(config)
uvicorn_server = server
await server.serve()
def raw_main():
# 利用 TZ 环境变量设定程序工作的时区
# 仅保证行为一致,不依赖 localtime(),实际对生产环境几乎没有作用
if platform.system().lower() != 'windows':
time.tzset()
easter_egg()
load_logger()
init_config()
init_env()
load_env()
load_logger()
env_config = {key: os.getenv(key) for key in os.environ}
scan_provider(env_config)
# 设置基础配置
base_config = {
@@ -78,11 +204,30 @@ base_config = {
nonebot.init(**base_config, **env_config)
# 注册适配器
global driver
driver = nonebot.get_driver()
driver.register_adapter(Adapter)
# 加载插件
nonebot.load_plugins("src/plugins")
if __name__ == "__main__":
nonebot.run()
try:
raw_main()
global app
app = nonebot.get_asgi()
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(uvicorn_main())
except KeyboardInterrupt:
logger.warning("麦麦会努力做的更好的!正在停止中......")
except Exception as e:
logger.error(f"主程序异常: {e}")
finally:
loop.run_until_complete(graceful_shutdown())
loop.close()
logger.info("进程终止完毕,麦麦开始休眠......下次再见哦!")

6
changelog.md Normal file
View File

@@ -0,0 +1,6 @@
# Changelog
## [0.5.12] - 2025-3-9
### Added
- 新增了 我是测试

12
changelog_config.md Normal file
View File

@@ -0,0 +1,12 @@
# Changelog
## [0.0.5] - 2025-3-11
### Added
- 新增了 `alias_names` 配置项,用于指定麦麦的别名。
## [0.0.4] - 2025-3-9
### Added
- 新增了 `memory_ban_words` 配置项,用于指定不希望记忆的词汇。

View File

@@ -2,47 +2,47 @@ services:
napcat:
container_name: napcat
environment:
- tz=Asia/Shanghai
- TZ=Asia/Shanghai
- NAPCAT_UID=${NAPCAT_UID}
- NAPCAT_GID=${NAPCAT_GID}
- NAPCAT_GID=${NAPCAT_GID} # 让 NapCat 获取当前用户 GID,UID防止权限问题
ports:
- 3000:3000
- 3001:3001
- 6099:6099
restart: always
restart: unless-stopped
volumes:
- napcatQQ:/app/.config/QQ
- napcatCONFIG:/app/napcat/config
- maimbotDATA:/MaiMBot/data # 麦麦的图片等要给napcat不然发送图片会有问题
- napcatQQ:/app/.config/QQ # 持久化 QQ 本体
- napcatCONFIG:/app/napcat/config # 持久化 NapCat 配置文件
- maimbotDATA:/MaiMBot/data # NapCat 和 NoneBot 共享此卷,否则发送图片会有问题
image: mlikiowa/napcat-docker:latest
mongodb:
container_name: mongodb
environment:
- tz=Asia/Shanghai
- TZ=Asia/Shanghai
# - MONGO_INITDB_ROOT_USERNAME=your_username
# - MONGO_INITDB_ROOT_PASSWORD=your_password
expose:
- "27017"
restart: always
restart: unless-stopped
volumes:
- mongodb:/data/db
- mongodbCONFIG:/data/configdb
- mongodb:/data/db # 持久化 MongoDB 数据库
- mongodbCONFIG:/data/configdb # 持久化 MongoDB 配置文件
image: mongo:latest
maimbot:
container_name: maimbot
environment:
- tz=Asia/Shanghai
- TZ=Asia/Shanghai
expose:
- "8080"
restart: always
restart: unless-stopped
depends_on:
- mongodb
- napcat
volumes:
- napcatCONFIG:/MaiMBot/napcat # 自动根据配置中的qq号创建ws反向客户端配置
- ./bot_config.toml:/MaiMBot/config/bot_config.toml
- maimbotDATA:/MaiMBot/data
- ./.env.prod:/MaiMBot/.env.prod
- napcatCONFIG:/MaiMBot/napcat # 自动根据配置中的 QQ 号创建 ws 反向客户端配置
- ./bot_config.toml:/MaiMBot/config/bot_config.toml # Toml 配置文件映射
- maimbotDATA:/MaiMBot/data # NapCat 和 NoneBot 共享此卷,否则发送图片会有问题
- ./.env.prod:/MaiMBot/.env.prod # Toml 配置文件映射
image: sengokucola/maimbot:latest
volumes:

20
docs/Jonathan R.md Normal file
View File

@@ -0,0 +1,20 @@
Jonathan R. Wolpaw 在 “Memory in neuroscience: rhetoric versus reality.” 一文中提到,从神经科学的感觉运动假设出发,整个神经系统的功能是将经验与适当的行为联系起来,而不是单纯的信息存储。
Jonathan R,Wolpaw. (2019). Memory in neuroscience: rhetoric versus reality.. Behavioral and cognitive neuroscience reviews(2).
1. **单一过程理论**
- 单一过程理论认为,识别记忆主要是基于熟悉性这一单一因素的影响。熟悉性是指对刺激的一种自动的、无意识的感知,它可以使我们在没有回忆起具体细节的情况下,判断一个刺激是否曾经出现过。
- 例如在一些实验中研究者发现被试可以在没有回忆起具体学习情境的情况下对曾经出现过的刺激做出正确的判断这被认为是熟悉性在起作用1。
2. **双重过程理论**
- 双重过程理论则认为,识别记忆是基于两个过程:回忆和熟悉性。回忆是指对过去经验的有意识的回忆,它可以使我们回忆起具体的细节和情境;熟悉性则是一种自动的、无意识的感知。
- 该理论认为,在识别记忆中,回忆和熟悉性共同作用,使我们能够判断一个刺激是否曾经出现过。例如,在 “记得 / 知道” 范式中被试被要求判断他们对一个刺激的记忆是基于回忆还是熟悉性。研究发现被试可以区分这两种不同的记忆过程这为双重过程理论提供了支持1。
1. **神经元节点与连接**:借鉴神经网络原理,将每个记忆单元视为一个神经元节点。节点之间通过连接相互关联,连接的强度代表记忆之间的关联程度。在形态学联想记忆中,具有相似形态特征的记忆节点连接强度较高。例如,苹果和橘子的记忆节点,由于在形状、都是水果等形态语义特征上相似,它们之间的连接强度大于苹果与汽车记忆节点间的连接强度。
2. **记忆聚类与层次结构**:依据形态特征的相似性对记忆进行聚类,形成不同的记忆簇。每个记忆簇内部的记忆具有较高的相似性,而不同记忆簇之间的记忆相似性较低。同时,构建记忆的层次结构,高层次的记忆节点代表更抽象、概括的概念,低层次的记忆节点对应具体的实例。比如,“水果” 作为高层次记忆节点,连接着 “苹果”“橘子”“香蕉” 等低层次具体水果的记忆节点。
3. **网络的动态更新**:随着新记忆的不断加入,记忆网络动态调整。新记忆节点根据其形态特征与现有网络中的节点建立连接,同时影响相关连接的强度。若新记忆与某个记忆簇的特征高度相似,则被纳入该记忆簇;若具有独特特征,则可能引发新的记忆簇的形成。例如,当系统学习到一种新的水果 “番石榴”,它会根据番石榴的形态、语义等特征,在记忆网络中找到与之最相似的区域(如水果记忆簇),并建立相应连接,同时调整周围节点连接强度以适应这一新记忆。
- **相似性联想**:该理论认为,当两个或多个事物在形态上具有相似性时,它们在记忆中会形成关联。例如,梨和苹果在形状和都是水果这一属性上有相似性,所以当我们看到梨时,很容易通过形态学联想记忆联想到苹果。这种相似性联想有助于我们对新事物进行分类和理解,当遇到一个新的类似水果时,我们可以通过与已有的水果记忆进行相似性匹配,来推测它的一些特征。
- **时空关联性联想**除了相似性联想MAM 还强调时空关联性联想。如果两个事物在时间或空间上经常同时出现,它们也会在记忆中形成关联。比如,每次在公园里看到花的时候,都能听到鸟儿的叫声,那么花和鸟儿叫声的形态特征(花的视觉形态和鸟叫的听觉形态)就会在记忆中形成关联,以后听到鸟叫可能就会联想到公园里的花。

View File

@@ -1,22 +1,95 @@
# 🐳 Docker 部署指南
## 部署步骤推荐,但不一定是最新
## 部署步骤 (推荐,但不一定是最新)
**"更新镜像与容器"部分在本文档 [Part 6](#6-更新镜像与容器)**
### 0. 前提说明
**本文假设读者已具备一定的 Docker 基础知识。若您对 Docker 不熟悉,建议先参考相关教程或文档进行学习,或选择使用 [📦Linux手动部署指南](./manual_deploy_linux.md) 或 [📦Windows手动部署指南](./manual_deploy_windows.md) 。**
### 1. 获取Docker配置文件
- 建议先单独创建好一个文件夹并进入,作为工作目录
1. 获取配置文件:
```bash
wget https://raw.githubusercontent.com/SengokuCola/MaiMBot/main/docker-compose.yml
wget https://raw.githubusercontent.com/SengokuCola/MaiMBot/main/docker-compose.yml -O docker-compose.yml
```
2. 启动服务:
- 若需要启用MongoDB数据库的用户名和密码可进入docker-compose.yml取消MongoDB处的注释并修改变量旁 `=` 后方的值为你的用户名和密码\
修改后请注意在之后配置 `.env.prod` 文件时指定MongoDB数据库的用户名密码
### 2. 启动服务
- **!!! 请在第一次启动前确保当前工作目录下 `.env.prod``bot_config.toml` 文件存在 !!!**\
由于Docker文件映射行为的特殊性若宿主机的映射路径不存在可能导致意外的目录创建而不会创建文件由于此处需要文件映射到文件需提前确保文件存在且路径正确可使用如下命令:
```bash
touch .env.prod
touch bot_config.toml
```
- 启动Docker容器:
```bash
NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker compose up -d
# 旧版Docker中可能找不到docker compose请使用docker-compose工具替代
NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker-compose up -d
```
3. 修改配置后重启:
### 3. 修改配置并重启Docker
- 请前往 [🎀新手配置指南](./installation_cute.md) 或 [⚙️标准配置指南](./installation_standard.md) 完成 `.env.prod``bot_config.toml` 配置文件的编写\
**需要注意 `.env.prod` 中HOST处IP的填写Docker中部署和系统中直接安装的配置会有所不同**
- 重启Docker容器:
```bash
docker restart maimbot # 若修改过容器名称则替换maimbot为你自定的名称
```
- 下方命令可以但不推荐只是同时重启NapCat、MongoDB、MaiMBot三个服务
```bash
NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker compose restart
# 旧版Docker中可能找不到docker compose请使用docker-compose工具替代
NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker-compose restart
```
### 4. 登入NapCat管理页添加反向WebSocket
- 在浏览器地址栏输入 `http://<宿主机IP>:6099/` 进入NapCat的管理Web页添加一个Websocket客户端
> 网络配置 -> 新建 -> Websocket客户端
- Websocket客户端的名称自定URL栏填入 `ws://maimbot:8080/onebot/v11/ws`,启用并保存即可\
(若修改过容器名称则替换maimbot为你自定的名称)
### 5. 部署完成,愉快地和麦麦对话吧!
### 6. 更新镜像与容器
- 拉取最新镜像
```bash
docker-compose pull
```
- 执行启动容器指令,该指令会自动重建镜像有更新的容器并启动
```bash
NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker compose up -d
# 旧版Docker中可能找不到docker compose请使用docker-compose工具替代
NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker-compose up -d
```
## ⚠️ 注意事项
- 目前部署方案仍在测试中,可能存在未知问题

View File

@@ -52,12 +52,12 @@ key = "SILICONFLOW_KEY" # 用同一张门票就可以啦
如果你想用DeepSeek官方的服务就要这样改
```toml
[model.llm_reasoning]
name = "Pro/deepseek-ai/DeepSeek-R1"
name = "deepseek-reasoner" # 改成对应的模型名称,这里为DeepseekR1
base_url = "DEEP_SEEK_BASE_URL" # 改成去DeepSeek游乐园
key = "DEEP_SEEK_KEY" # 用DeepSeek的门票
[model.llm_normal]
name = "Pro/deepseek-ai/DeepSeek-V3"
name = "deepseek-chat" # 改成对应的模型名称,这里为DeepseekV3
base_url = "DEEP_SEEK_BASE_URL" # 也去DeepSeek游乐园
key = "DEEP_SEEK_KEY" # 用同一张DeepSeek门票
```
@@ -88,11 +88,11 @@ CHAT_ANY_WHERE_KEY=your_key
CHAT_ANY_WHERE_BASE_URL=https://api.chatanywhere.tech/v1
# 如果你不知道这是什么,那么下面这些不用改,保持原样就好啦
HOST=127.0.0.1
HOST=127.0.0.1 # 如果使用Docker部署需要改成0.0.0.0喵,不然听不见群友讲话了喵
PORT=8080
# 这些是数据库设置,一般也不用改呢
MONGODB_HOST=127.0.0.1
MONGODB_HOST=127.0.0.1 # 如果使用Docker部署需要改成数据库容器的名字喵默认是mongodb喵
MONGODB_PORT=27017
DATABASE_NAME=MegBot
MONGODB_USERNAME = "" # 如果数据库需要用户名,就在这里填写喵
@@ -110,7 +110,8 @@ PLUGINS=["src2.plugins.chat"] # 这里是机器人的插件列表呢
```toml
[bot]
qq = "把这里改成你的机器人QQ号喵" # 填写你的机器人QQ号
nickname = "麦麦" # 机器人的名字,你可以改成你喜欢的任何名字哦
nickname = "麦麦" # 机器人的名字,你可以改成你喜欢的任何名字哦建议和机器人QQ名称/群昵称一样哦
alias_names = ["小麦", "阿麦"] # 也可以用这个招呼机器人,可以不设置呢
[personality]
# 这里可以设置机器人的性格呢,让它更有趣一些喵

View File

@@ -34,7 +34,7 @@ key = "SILICONFLOW_KEY" # 引用.env.prod中定义的密钥
如需切换到其他API服务只需修改引用
```toml
[model.llm_reasoning]
name = "Pro/deepseek-ai/DeepSeek-R1"
name = "deepseek-reasoner" # 改成对应的模型名称,这里为DeepseekR1
base_url = "DEEP_SEEK_BASE_URL" # 切换为DeepSeek服务
key = "DEEP_SEEK_KEY" # 使用DeepSeek密钥
```
@@ -52,12 +52,12 @@ CHAT_ANY_WHERE_KEY=your_key
CHAT_ANY_WHERE_BASE_URL=https://api.chatanywhere.tech/v1
# 服务配置
HOST=127.0.0.1
PORT=8080
HOST=127.0.0.1 # 如果使用Docker部署需要改成0.0.0.0否则QQ消息无法传入
PORT=8080 # 与反向端口相同
# 数据库配置
MONGODB_HOST=127.0.0.1
MONGODB_PORT=27017
MONGODB_HOST=127.0.0.1 # 如果使用Docker部署需要改成数据库容器的名字默认是mongodb
MONGODB_PORT=27017 # MongoDB端口
DATABASE_NAME=MegBot
MONGODB_USERNAME = "" # 数据库用户名
MONGODB_PASSWORD = "" # 数据库密码
@@ -72,6 +72,9 @@ PLUGINS=["src2.plugins.chat"]
[bot]
qq = "机器人QQ号" # 必填
nickname = "麦麦" # 机器人昵称
# alias_names: 配置机器人可使用的别名。当机器人在群聊或对话中被调用时,别名可以作为直接命令或提及机器人的关键字使用。
# 该配置项为字符串数组。例如: ["小麦", "阿麦"]
alias_names = ["小麦", "阿麦"] # 机器人别名
[personality]
prompt_personality = [

115
docs/manual_deploy_linux.md Normal file
View File

@@ -0,0 +1,115 @@
# 📦 Linux系统如何手动部署MaiMbot麦麦
## 准备工作
- 一台联网的Linux设备本教程以Ubuntu/Debian系为例
- QQ小号QQ框架的使用可能导致qq被风控严重小概率可能会导致账号封禁强烈不推荐使用大号
- 可用的大模型API
- 一个AI助手网上随便搜一家打开来用都行可以帮你解决一些不懂的问题
- 以下内容假设你对Linux系统有一定的了解如果觉得难以理解请直接用Windows系统部署[Windows系统部署指南](./manual_deploy_windows.md)
## 你需要知道什么?
- 如何正确向AI助手提问来学习新知识
- Python是什么
- Python的虚拟环境是什么如何创建虚拟环境
- 命令行是什么
- 数据库是什么如何安装并启动MongoDB
- 如何运行一个QQ机器人以及NapCat框架是什么
---
## 环境配置
### 1⃣ **确认Python版本**
需确保Python版本为3.9及以上
```bash
python --version
# 或
python3 --version
```
如果版本低于3.9请更新Python版本。
```bash
# Ubuntu/Debian
sudo apt update
sudo apt install python3.9
# 如执行了这一步建议在执行时将python3指向python3.9
# 更新替代方案,设置 python3.9 为默认的 python3 版本:
sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1
sudo update-alternatives --config python3
```
### 2⃣ **创建虚拟环境**
```bash
# 方法1使用venv(推荐)
python3 -m venv maimbot
source maimbot/bin/activate # 激活环境
# 方法2使用conda需先安装Miniconda
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
bash Miniconda3-latest-Linux-x86_64.sh
conda create -n maimbot python=3.9
conda activate maimbot
# 通过以上方法创建并进入虚拟环境后,再执行以下命令
# 安装依赖(任选一种环境)
pip install -r requirements.txt
```
---
## 数据库配置
### 3⃣ **安装并启动MongoDB**
- 安装与启动: Debian参考[官方文档](https://docs.mongodb.com/manual/tutorial/install-mongodb-on-debian/)Ubuntu参考[官方文档](https://docs.mongodb.com/manual/tutorial/install-mongodb-on-ubuntu/)
- 默认连接本地27017端口
---
## NapCat配置
### 4⃣ **安装NapCat框架**
- 参考[NapCat官方文档](https://www.napcat.wiki/guide/boot/Shell#napcat-installer-linux%E4%B8%80%E9%94%AE%E4%BD%BF%E7%94%A8%E8%84%9A%E6%9C%AC-%E6%94%AF%E6%8C%81ubuntu-20-debian-10-centos9)安装
- 使用QQ小号登录添加反向WS地址: `ws://127.0.0.1:8080/onebot/v11/ws`
---
## 配置文件设置
### 5⃣ **配置文件设置让麦麦Bot正常工作**
- 修改环境配置文件: `.env.prod`
- 修改机器人配置文件: `bot_config.toml`
---
## 启动机器人
### 6⃣ **启动麦麦机器人**
```bash
# 在项目目录下操作
nb run
# 或
python3 bot.py
```
---
## **其他组件(可选)**
- 直接运行 knowledge.py生成知识库
---
## 常见问题
🔧 权限问题: 在命令前加 `sudo`
🔌 端口占用: 使用 `sudo lsof -i :8080` 查看端口占用
🛡️ 防火墙: 确保8080/27017端口开放
```bash
sudo ufw allow 8080/tcp
sudo ufw allow 27017/tcp
```

View File

@@ -1,4 +1,4 @@
# 📦 如何手动部署MaiMbot麦麦
# 📦 Windows系统如何手动部署MaiMbot麦麦
## 你需要什么?
@@ -30,7 +30,7 @@
在创建虚拟环境之前请确保你的电脑上安装了Python 3.9及以上版本。如果没有,可以按以下步骤安装:
1. 访问Python官网下载页面https://www.python.org/downloads/release/python-3913/
1. 访问Python官网下载页面: https://www.python.org/downloads/release/python-3913/
2. 下载Windows安装程序 (64-bit): `python-3.9.13-amd64.exe`
3. 运行安装程序,并确保勾选"Add Python 3.9 to PATH"选项
4. 点击"Install Now"开始安装
@@ -79,11 +79,11 @@ pip install -r requirements.txt
### 3⃣ **配置NapCat让麦麦bot与qq取得联系**
- 安装并登录NapCat用你的qq小号
- 添加反向WS`ws://localhost:8080/onebot/v11/ws`
- 添加反向WS: `ws://127.0.0.1:8080/onebot/v11/ws`
### 4⃣ **配置文件设置让麦麦Bot正常工作**
- 修改环境配置文件`.env.prod`
- 修改机器人配置文件`bot_config.toml`
- 修改环境配置文件: `.env.prod`
- 修改机器人配置文件: `bot_config.toml`
### 5⃣ **启动麦麦机器人**
- 打开命令行cd到对应路径

View File

@@ -22,6 +22,7 @@
pythonEnv = pkgs.python3.withPackages (
ps: with ps; [
ruff
pymongo
python-dotenv
pydantic

View File

@@ -1,23 +1,51 @@
[project]
name = "Megbot"
name = "MaiMaiBot"
version = "0.1.0"
description = "New Bot Project"
description = "MaiMaiBot"
[tool.nonebot]
plugins = ["src.plugins.chat"]
plugin_dirs = ["src/plugins"]
[tool.ruff]
# 设置 Python 版本
target-version = "py39"
include = ["*.py"]
# 行长度设置
line-length = 120
[tool.ruff.lint]
fixable = ["ALL"]
unfixable = []
# 如果一个变量的名称以下划线开头,即使它未被使用,也不应该被视为错误或警告。
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
# 启用的规则
select = [
"E", # pycodestyle 错误
"F", # pyflakes
"I", # isort
"B", # flake8-bugbear
]
# 行长度设置
line-length = 88
ignore = ["E711"]
[tool.ruff.format]
docstring-code-format = true
indent-style = "space"
# 使用双引号表示字符串
quote-style = "double"
# 尊重魔法尾随逗号
# 例如:
# items = [
# "apple",
# "banana",
# "cherry",
# ]
skip-magic-trailing-comma = false
# 自动检测合适的换行符
line-ending = "auto"

Binary file not shown.

View File

@@ -1,6 +1,10 @@
@ECHO OFF
chcp 65001
REM python -m venv venv
if not exist "venv" (
python -m venv venv
call venv\Scripts\activate.bat
REM pip install -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple --upgrade -r requirements.txt
pip install -i https://mirrors.aliyun.com/pypi/simple --upgrade -r requirements.txt
) else (
call venv\Scripts\activate.bat
)
python run.py

58
run.py
View File

@@ -1,7 +1,7 @@
import os
import subprocess
import zipfile
import sys
import requests
from tqdm import tqdm
@@ -37,7 +37,7 @@ def extract_files(zip_path, target_dir):
f.write(zip_ref.read(file))
def run_cmd(command: str, open_new_window: bool = False):
def run_cmd(command: str, open_new_window: bool = True):
"""
运行 cmd 命令
@@ -45,26 +45,19 @@ def run_cmd(command: str, open_new_window: bool = False):
command (str): 指定要运行的命令
open_new_window (bool): 指定是否新建一个 cmd 窗口运行
"""
creationflags = 0
if open_new_window:
creationflags = subprocess.CREATE_NEW_CONSOLE
subprocess.Popen(
[
"cmd.exe",
"/c",
command,
],
creationflags=creationflags,
)
command = "start " + command
subprocess.Popen(command, shell=True)
def run_maimbot():
run_cmd(r"napcat\NapCatWinBootMain.exe 10001", False)
if not os.path.exists(r"mongodb\db"):
os.makedirs(r"mongodb\db")
run_cmd(
r"mongodb\bin\mongod.exe --dbpath=" + os.getcwd() + r"\mongodb\db --port 27017",
True,
r"mongodb\bin\mongod.exe --dbpath=" + os.getcwd() + r"\mongodb\db --port 27017"
)
run_cmd("nb run", True)
run_cmd("nb run")
def install_mongodb():
@@ -90,14 +83,32 @@ def install_mongodb():
extract_files("mongodb.zip", "mongodb")
print("MongoDB 下载完成")
os.remove("mongodb.zip")
choice = input(
"是否安装 MongoDB Compass此软件可以以可视化的方式修改数据库建议安装Y/n"
).upper()
if choice == "Y" or choice == "":
install_mongodb_compass()
def install_mongodb_compass():
run_cmd(
r"powershell Start-Process powershell -Verb runAs 'Set-ExecutionPolicy RemoteSigned'"
)
input("请在弹出的用户账户控制中点击“是”后按任意键继续安装")
run_cmd(r"powershell mongodb\bin\Install-Compass.ps1")
input("按任意键启动麦麦")
input("如不需要启动此窗口可直接关闭,无需等待 Compass 安装完成")
run_maimbot()
def install_napcat():
run_cmd("start https://github.com/NapNeko/NapCatQQ/releases", True)
run_cmd("start https://github.com/NapNeko/NapCatQQ/releases", False)
print("请检查弹出的浏览器窗口,点击**第一个**蓝色的“Win64无头” 下载 napcat")
napcat_filename = input(
"下载完成后请把文件复制到此文件夹,并将**不包含后缀的文件名**输入至此窗口,如 NapCat.32793.Shell"
)
if(napcat_filename[-4:] == ".zip"):
napcat_filename = napcat_filename[:-4]
extract_files(napcat_filename + ".zip", "napcat")
print("NapCat 安装完成")
os.remove(napcat_filename + ".zip")
@@ -105,11 +116,15 @@ def install_napcat():
if __name__ == "__main__":
os.system("cls")
if sys.version_info < (3, 9):
print("当前 Python 版本过低,最低版本为 3.9,请更新 Python 版本")
print("按任意键退出")
input()
exit(1)
choice = input(
"请输入要进行的操作:\n"
"1.首次安装\n"
"2.运行麦麦\n"
"3.运行麦麦并启动可视化推理界面\n"
)
os.system("cls")
if choice == "1":
@@ -117,6 +132,9 @@ if __name__ == "__main__":
install_mongodb()
elif choice == "2":
run_maimbot()
elif choice == "3":
run_maimbot()
run_cmd("python src/gui/reasoning_gui.py", True)
choice = input("是否启动推理可视化y/N").upper()
if choice == "Y":
run_cmd(r"python src\gui\reasoning_gui.py")
choice = input("是否启动记忆可视化y/N").upper()
if choice == "Y":
run_cmd(r"python src/plugins/memory_system/memory_manual_build.py")

29
run_memory_vis.bat Normal file
View File

@@ -0,0 +1,29 @@
@echo on
chcp 65001 > nul
set /p CONDA_ENV="请输入要激活的 conda 环境名称: "
call conda activate %CONDA_ENV%
if errorlevel 1 (
echo 激活 conda 环境失败
pause
exit /b 1
)
echo Conda 环境 "%CONDA_ENV%" 激活成功
set /p OPTION="请选择运行选项 (1: 运行全部绘制, 2: 运行简单绘制): "
if "%OPTION%"=="1" (
python src/plugins/memory_system/memory_manual_build.py
) else if "%OPTION%"=="2" (
python src/plugins/memory_system/draw_memory.py
) else (
echo 无效的选项
pause
exit /b 1
)
if errorlevel 1 (
echo 命令执行失败,错误代码 %errorlevel%
pause
exit /b 1
)
echo 脚本成功完成
pause

View File

@@ -5,6 +5,9 @@ import threading
import time
from datetime import datetime
from typing import Dict, List
from loguru import logger
from typing import Optional
from pymongo import MongoClient
import customtkinter as ctk
from dotenv import load_dotenv
@@ -17,23 +20,20 @@ root_dir = os.path.abspath(os.path.join(current_dir, '..', '..'))
# 加载环境变量
if os.path.exists(os.path.join(root_dir, '.env.dev')):
load_dotenv(os.path.join(root_dir, '.env.dev'))
print("成功加载开发环境配置")
logger.info("成功加载开发环境配置")
elif os.path.exists(os.path.join(root_dir, '.env.prod')):
load_dotenv(os.path.join(root_dir, '.env.prod'))
print("成功加载生产环境配置")
logger.info("成功加载生产环境配置")
else:
print("未找到环境配置文件")
logger.error("未找到环境配置文件")
sys.exit(1)
from typing import Optional
from pymongo import MongoClient
class Database:
_instance: Optional["Database"] = None
def __init__(self, host: str, port: int, db_name: str, username: str = None, password: str = None, auth_source: str = None):
def __init__(self, host: str, port: int, db_name: str, username: str = None, password: str = None,
auth_source: str = None):
if username and password:
self.client = MongoClient(
host=host,
@@ -47,7 +47,8 @@ class Database:
self.db = self.client[db_name]
@classmethod
def initialize(cls, host: str, port: int, db_name: str, username: str = None, password: str = None, auth_source: str = None) -> "Database":
def initialize(cls, host: str, port: int, db_name: str, username: str = None, password: str = None,
auth_source: str = None) -> "Database":
if cls._instance is None:
cls._instance = cls(host, port, db_name, username, password, auth_source)
return cls._instance
@@ -59,12 +60,11 @@ class Database:
return cls._instance
class ReasoningGUI:
def __init__(self):
# 记录启动时间戳转换为Unix时间戳
self.start_timestamp = datetime.now().timestamp()
print(f"程序启动时间戳: {self.start_timestamp}")
logger.info(f"程序启动时间戳: {self.start_timestamp}")
# 设置主题
ctk.set_appearance_mode("dark")
@@ -79,15 +79,15 @@ class ReasoningGUI:
# 初始化数据库连接
try:
self.db = Database.get_instance().db
print("数据库连接成功")
logger.success("数据库连接成功")
except RuntimeError:
print("数据库未初始化,正在尝试初始化...")
logger.warning("数据库未初始化,正在尝试初始化...")
try:
Database.initialize("localhost", 27017, "maimai_bot")
Database.initialize("127.0.0.1", 27017, "maimai_bot")
self.db = Database.get_instance().db
print("数据库初始化成功")
except Exception as e:
print(f"数据库初始化失败: {e}")
logger.success("数据库初始化成功")
except Exception:
logger.exception("数据库初始化失败")
sys.exit(1)
# 存储群组数据
@@ -285,12 +285,12 @@ class ReasoningGUI:
try:
# 从数据库获取最新数据,只获取启动时间之后的记录
query = {"time": {"$gt": self.start_timestamp}}
print(f"查询条件: {query}")
logger.debug(f"查询条件: {query}")
# 先获取一条记录检查时间格式
sample = self.db.reasoning_logs.find_one()
if sample:
print(f"样本记录时间格式: {type(sample['time'])} 值: {sample['time']}")
logger.debug(f"样本记录时间格式: {type(sample['time'])} 值: {sample['time']}")
cursor = self.db.reasoning_logs.find(query).sort("time", -1)
new_data = {}
@@ -299,7 +299,7 @@ class ReasoningGUI:
for item in cursor:
# 调试输出
if total_count == 0:
print(f"记录时间: {item['time']}, 类型: {type(item['time'])}")
logger.debug(f"记录时间: {item['time']}, 类型: {type(item['time'])}")
total_count += 1
group_id = str(item.get('group_id', 'unknown'))
@@ -312,7 +312,7 @@ class ReasoningGUI:
elif isinstance(item['time'], datetime):
time_obj = item['time']
else:
print(f"未知的时间格式: {type(item['time'])}")
logger.warning(f"未知的时间格式: {type(item['time'])}")
time_obj = datetime.now() # 使用当前时间作为后备
new_data[group_id].append({
@@ -325,12 +325,12 @@ class ReasoningGUI:
'prompt': item.get('prompt', '') # 添加prompt字段
})
print(f"从数据库加载了 {total_count} 条记录,分布在 {len(new_data)} 个群组中")
logger.info(f"从数据库加载了 {total_count} 条记录,分布在 {len(new_data)} 个群组中")
# 更新数据
if new_data != self.group_data:
self.group_data = new_data
print("数据已更新,正在刷新显示...")
logger.info("数据已更新,正在刷新显示...")
# 将更新任务添加到队列
self.update_queue.put({'type': 'update_group_list'})
if self.group_data:
@@ -341,8 +341,8 @@ class ReasoningGUI:
'type': 'update_display',
'group_id': self.selected_group_id
})
except Exception as e:
print(f"自动更新出错: {e}")
except Exception:
logger.exception("自动更新出错")
# 每5秒更新一次
time.sleep(5)
@@ -371,6 +371,5 @@ def main():
app.run()
if __name__ == "__main__":
main()

View File

@@ -1,12 +1,9 @@
import asyncio
import os
import random
import time
from loguru import logger
from nonebot import get_driver, on_command, on_message, require
from nonebot import get_driver, on_message, require
from nonebot.adapters.onebot.v11 import Bot, GroupMessageEvent, Message, MessageSegment
from nonebot.rule import to_me
from nonebot.typing import T_State
from ...common.database import Database
@@ -19,6 +16,10 @@ from .emoji_manager import emoji_manager
from .relationship_manager import relationship_manager
from .willing_manager import willing_manager
from .chat_stream import chat_manager
from ..memory_system.memory import hippocampus, memory_graph
from .bot import ChatBot
from .message_sender import message_manager, message_sender
# 创建LLM统计实例
llm_stats = LLMStatistics("llm_statistics.txt")
@@ -38,20 +39,13 @@ Database.initialize(
password=config.MONGODB_PASSWORD,
auth_source=config.MONGODB_AUTH_SOURCE
)
print("\033[1;32m[初始化数据库完成]\033[0m")
logger.success("初始化数据库成功")
# 导入其他模块
from ..memory_system.memory import hippocampus, memory_graph
from .bot import ChatBot
# from .message_send_control import message_sender
from .message_sender import message_manager, message_sender
# 初始化表情管理器
emoji_manager.initialize()
print(f"\033[1;32m正在唤醒{global_config.BOT_NICKNAME}......\033[0m")
logger.debug(f"正在唤醒{global_config.BOT_NICKNAME}......")
# 创建机器人实例
chat_bot = ChatBot()
# 注册群消息处理器
@@ -60,71 +54,80 @@ group_msg = on_message(priority=5)
scheduler = require("nonebot_plugin_apscheduler").scheduler
@driver.on_startup
async def start_background_tasks():
"""启动后台任务"""
# 启动LLM统计
llm_stats.start()
print("\033[1;32m[初始化]\033[0m LLM统计功能启动")
logger.success("LLM统计功能启动成功")
# 初始化并启动情绪管理器
mood_manager = MoodManager.get_instance()
mood_manager.start_mood_update(update_interval=global_config.mood_update_interval)
print("\033[1;32m[初始化]\033[0m 情绪管理器启动")
logger.success("情绪管理器启动成功")
# 只启动表情包管理任务
asyncio.create_task(emoji_manager.start_periodic_check(interval_MINS=global_config.EMOJI_CHECK_INTERVAL))
await bot_schedule.initialize()
bot_schedule.print_schedule()
@driver.on_startup
async def init_relationships():
"""在 NoneBot2 启动时初始化关系管理器"""
print("\033[1;32m[初始化]\033[0m 正在加载用户关系数据...")
logger.debug("正在加载用户关系数据...")
await relationship_manager.load_all_relationships()
asyncio.create_task(relationship_manager._start_relationship_manager())
@driver.on_bot_connect
async def _(bot: Bot):
"""Bot连接成功时的处理"""
global _message_manager_started
print(f"\033[1;38;5;208m-----------{global_config.BOT_NICKNAME}成功连接!-----------\033[0m")
logger.debug(f"-----------{global_config.BOT_NICKNAME}成功连接!-----------")
await willing_manager.ensure_started()
message_sender.set_bot(bot)
print("\033[1;38;5;208m-----------消息发送器已启动!-----------\033[0m")
logger.success("-----------消息发送器已启动!-----------")
if not _message_manager_started:
asyncio.create_task(message_manager.start_processor())
_message_manager_started = True
print("\033[1;38;5;208m-----------消息处理器已启动!-----------\033[0m")
logger.success("-----------消息处理器已启动!-----------")
asyncio.create_task(emoji_manager._periodic_scan(interval_MINS=global_config.EMOJI_REGISTER_INTERVAL))
print("\033[1;38;5;208m-----------开始偷表情包!-----------\033[0m")
logger.success("-----------开始偷表情包!-----------")
asyncio.create_task(chat_manager._initialize())
asyncio.create_task(chat_manager._auto_save_task())
@group_msg.handle()
async def _(bot: Bot, event: GroupMessageEvent, state: T_State):
await chat_bot.handle_message(event, bot)
# 添加build_memory定时任务
@scheduler.scheduled_job("interval", seconds=global_config.build_memory_interval, id="build_memory")
async def build_memory_task():
"""每build_memory_interval秒执行一次记忆构建"""
print("\033[1;32m[记忆构建]\033[0m -------------------------------------------开始构建记忆-------------------------------------------")
logger.debug(
"[记忆构建]"
"------------------------------------开始构建记忆--------------------------------------")
start_time = time.time()
await hippocampus.operation_build_memory(chat_size=20)
end_time = time.time()
print(f"\033[1;32m[记忆构建]\033[0m -------------------------------------------记忆构建完成:耗时: {end_time - start_time:.2f} 秒-------------------------------------------")
logger.success(
f"[记忆构建]--------------------------记忆构建完成:耗时: {end_time - start_time:.2f} "
"秒-------------------------------------------")
@scheduler.scheduled_job("interval", seconds=global_config.forget_memory_interval, id="forget_memory")
async def forget_memory_task():
"""每30秒执行一次记忆构建"""
# print("\033[1;32m[记忆遗忘]\033[0m 开始遗忘记忆...")
# await hippocampus.operation_forget_topic(percentage=0.1)
# print("\033[1;32m[记忆遗忘]\033[0m 记忆遗忘完成")
print("\033[1;32m[记忆遗忘]\033[0m 开始遗忘记忆...")
await hippocampus.operation_forget_topic(percentage=0.1)
print("\033[1;32m[记忆遗忘]\033[0m 记忆遗忘完成")
@scheduler.scheduled_job("interval", seconds=global_config.build_memory_interval + 10, id="merge_memory")
async def merge_memory_task():
@@ -133,9 +136,9 @@ async def merge_memory_task():
# await hippocampus.operation_merge_memory(percentage=0.1)
# print("\033[1;32m[记忆整合]\033[0m 记忆整合完成")
@scheduler.scheduled_job("interval", seconds=30, id="print_mood")
async def print_mood_task():
"""每30秒打印一次情绪状态"""
mood_manager = MoodManager.get_instance()
mood_manager.print_mood_status()

View File

@@ -1,3 +1,4 @@
import re
import time
from random import random
from loguru import logger
@@ -96,8 +97,17 @@ class ChatBot:
# 过滤词
for word in global_config.ban_words:
if word in message.processed_plain_text:
logger.info(f"\033[1;32m[{groupinfo.group_name}]{userinfo.user_nickname}:\033[0m {message.processed_plain_text}")
logger.info(f"\033[1;32m[过滤词识别]\033[0m 消息中含有{word}filtered")
logger.info(
f"[{groupinfo.group_name}]{userinfo.user_nickname}:{message.processed_plain_text}")
logger.info(f"[过滤词识别]消息中含有{word}filtered")
return
# 正则表达式过滤
for pattern in global_config.ban_msgs_regex:
if re.search(pattern, message.raw_message):
logger.info(
f"[{message.group_name}]{message.user_nickname}:{message.raw_message}")
logger.info(f"[正则表达式过滤]消息匹配到{pattern}filtered")
return
current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(messageinfo.time))
@@ -108,7 +118,8 @@ class ChatBot:
topic = ''
interested_rate = 0
interested_rate = await hippocampus.memory_activate_value(message.processed_plain_text) / 100
print(f"\033[1;32m[记忆激活]\033[0m 对{message.processed_plain_text}的激活度:---------------------------------------{interested_rate}\n")
logger.debug(f"{message.processed_plain_text}"
f"的激活度:{interested_rate}")
# logger.info(f"\033[1;32m[主题识别]\033[0m 使用{global_config.topic_extract}主题: {topic}")
await self.storage.store_message(message,chat, topic[0] if topic else None)
@@ -124,7 +135,10 @@ class ChatBot:
)
current_willing = willing_manager.get_willing(chat_stream=chat)
print(f"\033[1;32m[{current_time}][{chat.group_info.group_name}]{chat.user_info.user_nickname}:\033[0m {message.processed_plain_text}\033[1;36m[回复意愿:{current_willing:.2f}][概率:{reply_probability * 100:.1f}%]\033[0m")
logger.info(
f"[{current_time}][{chat.group_info.group_name}]{chat.user_info.user_nickname}:"
f"{message.processed_plain_text}[回复意愿:{current_willing:.2f}][概率:{reply_probability * 100:.1f}%]"
)
response = None
@@ -162,7 +176,7 @@ class ChatBot:
# 如果找不到思考消息,直接返回
if not thinking_message:
print(f"\033[1;33m[警告]\033[0m 未找到对应的思考消息,可能已超时被移除")
logger.warning("未找到对应的思考消息,可能已超时被移除")
return
# 记录开始思考的时间,避免从思考到回复的时间太久
@@ -205,7 +219,7 @@ class ChatBot:
# 检查是否 <没有找到> emoji
if emoji_raw != None:
emoji_path,discription = emoji_raw
emoji_path, description = emoji_raw
emoji_cq = image_path_to_base64(emoji_path)
@@ -226,7 +240,7 @@ class ChatBot:
)
message_manager.add_message(bot_message)
emotion = await self.gpt._get_emotion_tags(raw_content)
print(f"'{response}' 获取到的情感标签为:{emotion}")
logger.debug(f"'{response}' 获取到的情感标签为:{emotion}")
valuedict = {
'happy': 0.5,
'angry': -1,
@@ -240,9 +254,10 @@ class ChatBot:
# 使用情绪管理器更新情绪
self.mood_manager.update_mood_from_emotion(emotion[0], global_config.mood_intensity_factor)
willing_manager.change_reply_willing_after_sent(
chat_stream=chat
)
# willing_manager.change_reply_willing_after_sent(
# chat_stream=chat
# )
# 创建全局ChatBot实例
chat_bot = ChatBot()

View File

@@ -1,16 +1,23 @@
import os
from dataclasses import dataclass, field
from typing import Dict, Optional
from typing import Dict, List, Optional
import tomli
from loguru import logger
from packaging import version
from packaging.version import Version, InvalidVersion
from packaging.specifiers import SpecifierSet, InvalidSpecifier
@dataclass
class BotConfig:
"""机器人配置类"""
INNER_VERSION: Version = None
BOT_QQ: Optional[int] = 1
BOT_NICKNAME: Optional[str] = None
BOT_ALIAS_NAMES: List[str] = field(default_factory=list) # 别名,可以通过这个叫它
# 消息处理相关配置
MIN_TEXT_LENGTH: int = 2 # 最小处理文本长度
@@ -38,6 +45,7 @@ class BotConfig:
EMOJI_CHECK_PROMPT: str = "符合公序良俗" # 表情包过滤要求
ban_words = set()
ban_msgs_regex = set()
max_response_length: int = 1024 # 最大回复长度
@@ -64,168 +72,351 @@ class BotConfig:
mood_decay_rate: float = 0.95 # 情绪衰减率
mood_intensity_factor: float = 0.7 # 情绪强度因子
keywords_reaction_rules = [] # 关键词回复规则
chinese_typo_enable = True # 是否启用中文错别字生成器
chinese_typo_error_rate = 0.03 # 单字替换概率
chinese_typo_min_freq = 7 # 最小字频阈值
chinese_typo_tone_error_rate = 0.2 # 声调错误概率
chinese_typo_word_replace_rate = 0.02 # 整词替换概率
# 默认人设
PROMPT_PERSONALITY = [
"曾经是一个学习地质的女大学生,现在学习心理学和脑科学,你会刷贴吧",
"是一个女大学生,你有黑色头发,你会刷小红书",
"是一个女大学生你会刷b站对ACG文化感兴趣"
"是一个女大学生你会刷b站对ACG文化感兴趣",
]
PROMPT_SCHEDULE_GEN = "一个曾经学习地质,现在学习心理学和脑科学的女大学生喜欢刷qq贴吧知乎和小红书"
PERSONALITY_1: float = 0.6 # 第一种人格概率
PERSONALITY_2: float = 0.3 # 第二种人格概率
PERSONALITY_3: float = 0.1 # 第三种人格概率
memory_ban_words: list = field(
default_factory=lambda: ["表情包", "图片", "回复", "聊天记录"]
) # 添加新的配置项默认值
@staticmethod
def get_config_dir() -> str:
"""获取配置文件目录"""
current_dir = os.path.dirname(os.path.abspath(__file__))
root_dir = os.path.abspath(os.path.join(current_dir, '..', '..', '..'))
config_dir = os.path.join(root_dir, 'config')
root_dir = os.path.abspath(os.path.join(current_dir, "..", "..", ".."))
config_dir = os.path.join(root_dir, "config")
if not os.path.exists(config_dir):
os.makedirs(config_dir)
return config_dir
@classmethod
def convert_to_specifierset(cls, value: str) -> SpecifierSet:
"""将 字符串 版本表达式转换成 SpecifierSet
Args:
value[str]: 版本表达式(字符串)
Returns:
SpecifierSet
"""
try:
converted = SpecifierSet(value)
except InvalidSpecifier:
logger.error(f"{value} 分类使用了错误的版本约束表达式\n", "请阅读 https://semver.org/lang/zh-CN/ 修改代码")
exit(1)
return converted
@classmethod
def get_config_version(cls, toml: dict) -> Version:
"""提取配置文件的 SpecifierSet 版本数据
Args:
toml[dict]: 输入的配置文件字典
Returns:
Version
"""
if "inner" in toml:
try:
config_version: str = toml["inner"]["version"]
except KeyError as e:
logger.error("配置文件中 inner 段 不存在, 这是错误的配置文件")
raise KeyError(f"配置文件中 inner 段 不存在 {e}, 这是错误的配置文件") from e
else:
toml["inner"] = {"version": "0.0.0"}
config_version = toml["inner"]["version"]
try:
ver = version.parse(config_version)
except InvalidVersion as e:
logger.error(
"配置文件中 inner段 的 version 键是错误的版本描述\n"
"请阅读 https://semver.org/lang/zh-CN/ 修改配置,并参考本项目指定的模板进行修改\n"
"本项目在不同的版本下有不同的模板,请注意识别"
)
raise InvalidVersion("配置文件中 inner段 的 version 键是错误的版本描述\n") from e
return ver
@classmethod
def load_config(cls, config_path: str = None) -> "BotConfig":
"""从TOML配置文件加载配置"""
config = cls()
if os.path.exists(config_path):
with open(config_path, "rb") as f:
try:
toml_dict = tomli.load(f)
except(tomli.TOMLDecodeError) as e:
logger.critical(f"配置文件bot_config.toml填写有误请检查第{e.lineno}行第{e.colno}处:{e.msg}")
exit(1)
if 'personality' in toml_dict:
personality_config=toml_dict['personality']
personality=personality_config.get('prompt_personality')
def personality(parent: dict):
personality_config = parent["personality"]
personality = personality_config.get("prompt_personality")
if len(personality) >= 2:
logger.info(f"载入自定义人格:{personality}")
config.PROMPT_PERSONALITY=personality_config.get('prompt_personality',config.PROMPT_PERSONALITY)
logger.debug(f"载入自定义人格:{personality}")
config.PROMPT_PERSONALITY = personality_config.get("prompt_personality", config.PROMPT_PERSONALITY)
logger.info(f"载入自定义日程prompt:{personality_config.get('prompt_schedule', config.PROMPT_SCHEDULE_GEN)}")
config.PROMPT_SCHEDULE_GEN=personality_config.get('prompt_schedule',config.PROMPT_SCHEDULE_GEN)
config.PERSONALITY_1=personality_config.get('personality_1_probability',config.PERSONALITY_1)
config.PERSONALITY_2=personality_config.get('personality_2_probability',config.PERSONALITY_2)
config.PERSONALITY_3=personality_config.get('personality_3_probability',config.PERSONALITY_3)
config.PROMPT_SCHEDULE_GEN = personality_config.get("prompt_schedule", config.PROMPT_SCHEDULE_GEN)
if "emoji" in toml_dict:
emoji_config = toml_dict["emoji"]
if config.INNER_VERSION in SpecifierSet(">=0.0.2"):
config.PERSONALITY_1 = personality_config.get("personality_1_probability", config.PERSONALITY_1)
config.PERSONALITY_2 = personality_config.get("personality_2_probability", config.PERSONALITY_2)
config.PERSONALITY_3 = personality_config.get("personality_3_probability", config.PERSONALITY_3)
def emoji(parent: dict):
emoji_config = parent["emoji"]
config.EMOJI_CHECK_INTERVAL = emoji_config.get("check_interval", config.EMOJI_CHECK_INTERVAL)
config.EMOJI_REGISTER_INTERVAL = emoji_config.get("register_interval", config.EMOJI_REGISTER_INTERVAL)
config.EMOJI_CHECK_PROMPT = emoji_config.get('check_prompt',config.EMOJI_CHECK_PROMPT)
config.EMOJI_SAVE = emoji_config.get('auto_save',config.EMOJI_SAVE)
config.EMOJI_CHECK = emoji_config.get('enable_check',config.EMOJI_CHECK)
config.EMOJI_CHECK_PROMPT = emoji_config.get("check_prompt", config.EMOJI_CHECK_PROMPT)
config.EMOJI_SAVE = emoji_config.get("auto_save", config.EMOJI_SAVE)
config.EMOJI_CHECK = emoji_config.get("enable_check", config.EMOJI_CHECK)
if "cq_code" in toml_dict:
cq_code_config = toml_dict["cq_code"]
def cq_code(parent: dict):
cq_code_config = parent["cq_code"]
config.ENABLE_PIC_TRANSLATE = cq_code_config.get("enable_pic_translate", config.ENABLE_PIC_TRANSLATE)
def bot(parent: dict):
# 机器人基础配置
if "bot" in toml_dict:
bot_config = toml_dict["bot"]
bot_config = parent["bot"]
bot_qq = bot_config.get("qq")
config.BOT_QQ = int(bot_qq)
config.BOT_NICKNAME = bot_config.get("nickname", config.BOT_NICKNAME)
if "response" in toml_dict:
response_config = toml_dict["response"]
if config.INNER_VERSION in SpecifierSet(">=0.0.5"):
config.BOT_ALIAS_NAMES = bot_config.get("alias_names", config.BOT_ALIAS_NAMES)
def response(parent: dict):
response_config = parent["response"]
config.MODEL_R1_PROBABILITY = response_config.get("model_r1_probability", config.MODEL_R1_PROBABILITY)
config.MODEL_V3_PROBABILITY = response_config.get("model_v3_probability", config.MODEL_V3_PROBABILITY)
config.MODEL_R1_DISTILL_PROBABILITY = response_config.get("model_r1_distill_probability", config.MODEL_R1_DISTILL_PROBABILITY)
config.MODEL_R1_DISTILL_PROBABILITY = response_config.get(
"model_r1_distill_probability", config.MODEL_R1_DISTILL_PROBABILITY
)
config.max_response_length = response_config.get("max_response_length", config.max_response_length)
def model(parent: dict):
# 加载模型配置
if "model" in toml_dict:
model_config = toml_dict["model"]
model_config: dict = parent["model"]
if "llm_reasoning" in model_config:
config.llm_reasoning = model_config["llm_reasoning"]
config_list = [
"llm_reasoning",
"llm_reasoning_minor",
"llm_normal",
"llm_normal_minor",
"llm_topic_judge",
"llm_summary_by_topic",
"llm_emotion_judge",
"vlm",
"embedding",
"moderation",
]
if "llm_reasoning_minor" in model_config:
config.llm_reasoning_minor = model_config["llm_reasoning_minor"]
for item in config_list:
if item in model_config:
cfg_item: dict = model_config[item]
if "llm_normal" in model_config:
config.llm_normal = model_config["llm_normal"]
# base_url 的例子: SILICONFLOW_BASE_URL
# key 的例子: SILICONFLOW_KEY
cfg_target = {"name": "", "base_url": "", "key": "", "pri_in": 0, "pri_out": 0}
if "llm_normal_minor" in model_config:
config.llm_normal_minor = model_config["llm_normal_minor"]
if config.INNER_VERSION in SpecifierSet("<=0.0.0"):
cfg_target = cfg_item
if "llm_topic_judge" in model_config:
config.llm_topic_judge = model_config["llm_topic_judge"]
elif config.INNER_VERSION in SpecifierSet(">=0.0.1"):
stable_item = ["name", "pri_in", "pri_out"]
pricing_item = ["pri_in", "pri_out"]
# 从配置中原始拷贝稳定字段
for i in stable_item:
# 如果 字段 属于计费项 且获取不到,那默认值是 0
if i in pricing_item and i not in cfg_item:
cfg_target[i] = 0
else:
# 没有特殊情况则原样复制
try:
cfg_target[i] = cfg_item[i]
except KeyError as e:
logger.error(f"{item} 中的必要字段不存在,请检查")
raise KeyError(f"{item} 中的必要字段 {e} 不存在,请检查") from e
if "llm_summary_by_topic" in model_config:
config.llm_summary_by_topic = model_config["llm_summary_by_topic"]
provider = cfg_item.get("provider")
if provider is None:
logger.error(f"provider 字段在模型配置 {item} 中不存在,请检查")
raise KeyError(f"provider 字段在模型配置 {item} 中不存在,请检查")
if "llm_emotion_judge" in model_config:
config.llm_emotion_judge = model_config["llm_emotion_judge"]
cfg_target["base_url"] = f"{provider}_BASE_URL"
cfg_target["key"] = f"{provider}_KEY"
if "vlm" in model_config:
config.vlm = model_config["vlm"]
# 如果 列表中的项目在 model_config 中,利用反射来设置对应项目
setattr(config, item, cfg_target)
else:
logger.error(f"模型 {item} 在config中不存在请检查")
raise KeyError(f"模型 {item} 在config中不存在请检查")
if "embedding" in model_config:
config.embedding = model_config["embedding"]
if "moderation" in model_config:
config.moderation = model_config["moderation"]
# 消息配置
if "message" in toml_dict:
msg_config = toml_dict["message"]
def message(parent: dict):
msg_config = parent["message"]
config.MIN_TEXT_LENGTH = msg_config.get("min_text_length", config.MIN_TEXT_LENGTH)
config.MAX_CONTEXT_SIZE = msg_config.get("max_context_size", config.MAX_CONTEXT_SIZE)
config.emoji_chance = msg_config.get("emoji_chance", config.emoji_chance)
config.ban_words = msg_config.get("ban_words", config.ban_words)
if config.INNER_VERSION in SpecifierSet(">=0.0.2"):
config.thinking_timeout = msg_config.get("thinking_timeout", config.thinking_timeout)
config.response_willing_amplifier = msg_config.get("response_willing_amplifier", config.response_willing_amplifier)
config.response_interested_rate_amplifier = msg_config.get("response_interested_rate_amplifier", config.response_interested_rate_amplifier)
config.response_willing_amplifier = msg_config.get(
"response_willing_amplifier", config.response_willing_amplifier
)
config.response_interested_rate_amplifier = msg_config.get(
"response_interested_rate_amplifier", config.response_interested_rate_amplifier
)
config.down_frequency_rate = msg_config.get("down_frequency_rate", config.down_frequency_rate)
if "memory" in toml_dict:
memory_config = toml_dict["memory"]
if config.INNER_VERSION in SpecifierSet(">=0.0.6"):
config.ban_msgs_regex = msg_config.get("ban_msgs_regex", config.ban_msgs_regex)
def memory(parent: dict):
memory_config = parent["memory"]
config.build_memory_interval = memory_config.get("build_memory_interval", config.build_memory_interval)
config.forget_memory_interval = memory_config.get("forget_memory_interval", config.forget_memory_interval)
if "mood" in toml_dict:
mood_config = toml_dict["mood"]
# 在版本 >= 0.0.4 时才处理新增的配置项
if config.INNER_VERSION in SpecifierSet(">=0.0.4"):
config.memory_ban_words = set(memory_config.get("memory_ban_words", []))
def mood(parent: dict):
mood_config = parent["mood"]
config.mood_update_interval = mood_config.get("mood_update_interval", config.mood_update_interval)
config.mood_decay_rate = mood_config.get("mood_decay_rate", config.mood_decay_rate)
config.mood_intensity_factor = mood_config.get("mood_intensity_factor", config.mood_intensity_factor)
# 群组配置
if "groups" in toml_dict:
groups_config = toml_dict["groups"]
def keywords_reaction(parent: dict):
keywords_reaction_config = parent["keywords_reaction"]
if keywords_reaction_config.get("enable", False):
config.keywords_reaction_rules = keywords_reaction_config.get("rules", config.keywords_reaction_rules)
def chinese_typo(parent: dict):
chinese_typo_config = parent["chinese_typo"]
config.chinese_typo_enable = chinese_typo_config.get("enable", config.chinese_typo_enable)
config.chinese_typo_error_rate = chinese_typo_config.get("error_rate", config.chinese_typo_error_rate)
config.chinese_typo_min_freq = chinese_typo_config.get("min_freq", config.chinese_typo_min_freq)
config.chinese_typo_tone_error_rate = chinese_typo_config.get(
"tone_error_rate", config.chinese_typo_tone_error_rate
)
config.chinese_typo_word_replace_rate = chinese_typo_config.get(
"word_replace_rate", config.chinese_typo_word_replace_rate
)
def groups(parent: dict):
groups_config = parent["groups"]
config.talk_allowed_groups = set(groups_config.get("talk_allowed", []))
config.talk_frequency_down_groups = set(groups_config.get("talk_frequency_down", []))
config.ban_user_id = set(groups_config.get("ban_user_id", []))
if "others" in toml_dict:
others_config = toml_dict["others"]
def others(parent: dict):
others_config = parent["others"]
config.enable_advance_output = others_config.get("enable_advance_output", config.enable_advance_output)
config.enable_kuuki_read = others_config.get("enable_kuuki_read", config.enable_kuuki_read)
# 版本表达式:>=1.0.0,<2.0.0
# 允许字段func: method, support: str, notice: str, necessary: bool
# 如果使用 notice 字段,在该组配置加载时,会展示该字段对用户的警示
# 例如:"notice": "personality 将在 1.3.2 后被移除",那么在有效版本中的用户就会虽然可以
# 正常执行程序,但是会看到这条自定义提示
include_configs = {
"personality": {"func": personality, "support": ">=0.0.0"},
"emoji": {"func": emoji, "support": ">=0.0.0"},
"cq_code": {"func": cq_code, "support": ">=0.0.0"},
"bot": {"func": bot, "support": ">=0.0.0"},
"response": {"func": response, "support": ">=0.0.0"},
"model": {"func": model, "support": ">=0.0.0"},
"message": {"func": message, "support": ">=0.0.0"},
"memory": {"func": memory, "support": ">=0.0.0", "necessary": False},
"mood": {"func": mood, "support": ">=0.0.0"},
"keywords_reaction": {"func": keywords_reaction, "support": ">=0.0.2", "necessary": False},
"chinese_typo": {"func": chinese_typo, "support": ">=0.0.3", "necessary": False},
"groups": {"func": groups, "support": ">=0.0.0"},
"others": {"func": others, "support": ">=0.0.0"},
}
# 原地修改,将 字符串版本表达式 转换成 版本对象
for key in include_configs:
item_support = include_configs[key]["support"]
include_configs[key]["support"] = cls.convert_to_specifierset(item_support)
if os.path.exists(config_path):
with open(config_path, "rb") as f:
try:
toml_dict = tomli.load(f)
except tomli.TOMLDecodeError as e:
logger.critical(f"配置文件bot_config.toml填写有误请检查第{e.lineno}行第{e.colno}处:{e.msg}")
exit(1)
# 获取配置文件版本
config.INNER_VERSION = cls.get_config_version(toml_dict)
# 如果在配置中找到了需要的项,调用对应项的闭包函数处理
for key in include_configs:
if key in toml_dict:
group_specifierset: SpecifierSet = include_configs[key]["support"]
# 检查配置文件版本是否在支持范围内
if config.INNER_VERSION in group_specifierset:
# 如果版本在支持范围内,检查是否存在通知
if "notice" in include_configs[key]:
logger.warning(include_configs[key]["notice"])
include_configs[key]["func"](toml_dict)
else:
# 如果版本不在支持范围内,崩溃并提示用户
logger.error(
f"配置文件中的 '{key}' 字段的版本 ({config.INNER_VERSION}) 不在支持范围内。\n"
f"当前程序仅支持以下版本范围: {group_specifierset}"
)
raise InvalidVersion(f"当前程序仅支持以下版本范围: {group_specifierset}")
# 如果 necessary 项目存在,而且显式声明是 False进入特殊处理
elif "necessary" in include_configs[key] and include_configs[key].get("necessary") is False:
# 通过 pass 处理的项虽然直接忽略也是可以的,但是为了不增加理解困难,依然需要在这里显式处理
if key == "keywords_reaction":
pass
else:
# 如果用户根本没有需要的配置项,提示缺少配置
logger.error(f"配置文件中缺少必需的字段: '{key}'")
raise KeyError(f"配置文件中缺少必需的字段: '{key}'")
logger.success(f"成功加载配置文件: {config_path}")
return config
# 获取配置文件路径
# 获取配置文件路径
bot_config_floder_path = BotConfig.get_config_dir()
print(f"正在品鉴配置文件目录: {bot_config_floder_path}")
logger.debug(f"正在品鉴配置文件目录: {bot_config_floder_path}")
bot_config_path = os.path.join(bot_config_floder_path, "bot_config.toml")
if os.path.exists(bot_config_path):
# 如果开发环境配置文件不存在,则使用默认配置文件
print(f"异常的新鲜,异常的美味: {bot_config_path}")
logger.debug(f"异常的新鲜,异常的美味: {bot_config_path}")
logger.info("使用bot配置文件")
else:
logger.info("没有找到美味")
# 配置文件不存在
logger.error("配置文件不存在,请检查路径: {bot_config_path}")
raise FileNotFoundError(f"配置文件不存在: {bot_config_path}")
global_config = BotConfig.load_config(config_path=bot_config_path)
if not global_config.enable_advance_output:
logger.remove()
pass

View File

@@ -170,11 +170,11 @@ class CQCode:
except (requests.exceptions.SSLError, requests.exceptions.HTTPError) as e:
if retry == max_retries - 1:
print(f"\033[1;31m[致命错误]\033[0m 最终请求失败: {str(e)}")
logger.error(f"最终请求失败: {str(e)}")
time.sleep(1.5**retry) # 指数退避
except Exception as e:
print(f"\033[1;33m[未知错误]\033[0m {str(e)}")
except Exception:
logger.exception("[未知错误]")
return None
return None

View File

@@ -36,7 +36,9 @@ class EmojiManager:
self.db = Database.get_instance()
self._scan_task = None
self.vlm = LLM_request(model=global_config.vlm, temperature=0.3, max_tokens=1000)
self.llm_emotion_judge = LLM_request(model=global_config.llm_normal_minor, max_tokens=60,temperature=0.8) #更高的温度更少的token后续可以根据情绪来调整温度
self.llm_emotion_judge = LLM_request(model=global_config.llm_normal_minor, max_tokens=60,
temperature=0.8) # 更高的温度更少的token后续可以根据情绪来调整温度
def _ensure_emoji_dir(self):
"""确保表情存储目录存在"""
@@ -52,8 +54,8 @@ class EmojiManager:
self._initialized = True
# 启动时执行一次完整性检查
self.check_emoji_file_integrity()
except Exception as e:
logger.error(f"初始化表情管理器失败: {str(e)}")
except Exception:
logger.exception("初始化表情管理器失败")
def _ensure_db(self):
"""确保数据库已初始化"""
@@ -117,7 +119,7 @@ class EmojiManager:
try:
# 获取所有表情包
all_emojis = list(self.db.db.emoji.find({}, {'_id': 1, 'path': 1, 'embedding': 1, 'discription': 1}))
all_emojis = list(self.db.db.emoji.find({}, {'_id': 1, 'path': 1, 'embedding': 1, 'description': 1}))
if not all_emojis:
logger.warning("数据库中没有任何表情包")
@@ -159,9 +161,10 @@ class EmojiManager:
{'_id': selected_emoji['_id']},
{'$inc': {'usage_count': 1}}
)
logger.success(f"找到匹配的表情包: {selected_emoji.get('discription', '无描述')} (相似度: {similarity:.4f})")
logger.success(
f"找到匹配的表情包: {selected_emoji.get('description', '无描述')} (相似度: {similarity:.4f})")
# 稍微改一下文本描述,不然容易产生幻觉,描述已经包含 表情包 了
return selected_emoji['path'],"[ %s ]" % selected_emoji.get('discription', '无描述')
return selected_emoji['path'], "[ %s ]" % selected_emoji.get('description', '无描述')
except Exception as search_error:
logger.error(f"搜索表情包失败: {str(search_error)}")
@@ -202,7 +205,7 @@ class EmojiManager:
try:
prompt = f'这是{global_config.BOT_NICKNAME}将要发送的消息内容:\n{text}\n若要为其配上表情包,请你输出这个表情包应该表达怎样的情感,应该给人什么样的感觉,不要太简洁也不要太长,注意不要输出任何对消息内容的分析内容,只输出\"一种什么样的感觉\"中间的形容词部分。'
content, _ = await self.llm_emotion_judge.generate_response_async(prompt)
content, _ = await self.llm_emotion_judge.generate_response_async(prompt,temperature=1.5)
logger.info(f"输出描述: {content}")
return content
@@ -217,7 +220,8 @@ class EmojiManager:
os.makedirs(emoji_dir, exist_ok=True)
# 获取所有支持的图片文件
files_to_process = [f for f in os.listdir(emoji_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png', '.gif'))]
files_to_process = [f for f in os.listdir(emoji_dir) if
f.lower().endswith(('.jpg', '.jpeg', '.png', '.gif'))]
for filename in files_to_process:
image_path = os.path.join(emoji_dir, filename)
@@ -273,10 +277,14 @@ class EmojiManager:
if '' not in check:
os.remove(image_path)
logger.info(f"描述: {description}")
logger.info(f"描述: {description}")
logger.info(f"其不满足过滤规则,被剔除 {check}")
continue
logger.info(f"check通过 {check}")
if description is not None:
embedding = await get_embedding(description)
if description is not None:
embedding = await get_embedding(description)
# 准备数据库记录
@@ -313,18 +321,16 @@ class EmojiManager:
else:
logger.warning(f"跳过表情包: {filename}")
except Exception as e:
logger.error(f"扫描表情包失败: {str(e)}")
logger.error(traceback.format_exc())
except Exception:
logger.exception("扫描表情包失败")
async def _periodic_scan(self, interval_MINS: int = 10):
"""定期扫描新表情包"""
while True:
print("\033[1;36m[表情包]\033[0m 开始扫描新表情包...")
logger.info("开始扫描新表情包...")
await self.scan_new_emojis()
await asyncio.sleep(interval_MINS * 60) # 每600秒扫描一次
def check_emoji_file_integrity(self):
"""检查表情包文件完整性
如果文件已被删除,则从数据库中移除对应记录
@@ -356,7 +362,7 @@ class EmojiManager:
# 从数据库中删除记录
result = self.db.db.emoji.delete_one({'_id': emoji['_id']})
if result.deleted_count > 0:
logger.success(f"成功删除数据库记录: {emoji['_id']}")
logger.debug(f"成功删除数据库记录: {emoji['_id']}")
removed_count += 1
else:
logger.error(f"删除数据库记录失败: {emoji['_id']}")
@@ -382,6 +388,6 @@ class EmojiManager:
await asyncio.sleep(interval_MINS * 60)
# 创建全局单例
emoji_manager = EmojiManager()

View File

@@ -3,6 +3,7 @@ import time
from typing import List, Optional, Tuple, Union
from nonebot import get_driver
from loguru import logger
from ...common.database import Database
from ..models.utils_model import LLM_request
@@ -55,9 +56,7 @@ class ResponseGenerator:
self.current_model_type = "r1_distill"
current_model = self.model_r1_distill
print(
f"+++++++++++++++++{global_config.BOT_NICKNAME}{self.current_model_type}思考中+++++++++++++++++"
)
logger.info(f"{global_config.BOT_NICKNAME}{self.current_model_type}思考中")
model_response = await self._generate_response_with_model(
message, current_model
@@ -65,7 +64,7 @@ class ResponseGenerator:
raw_content = model_response
if model_response:
print(f"{global_config.BOT_NICKNAME}的回复是:{model_response}")
logger.info(f'{global_config.BOT_NICKNAME}的回复是:{model_response}')
model_response = await self._process_response(model_response)
if model_response:
return model_response, raw_content
@@ -122,8 +121,8 @@ class ResponseGenerator:
# 生成回复
try:
content, reasoning_content = await model.generate_response(prompt)
except Exception as e:
print(f"生成回复时出错: {e}")
except Exception:
logger.exception("生成回复时出错")
return None
# 保存到数据库
@@ -219,7 +218,7 @@ class InitiativeMessageGenerate:
prompt_builder._build_initiative_prompt_select(message.group_id)
)
content_select, reasoning = self.model_v3.generate_response(topic_select_prompt)
print(f"[DEBUG] {content_select} {reasoning}")
logger.debug(f"{content_select} {reasoning}")
topics_list = [dot[0] for dot in dots_for_select]
if content_select:
if content_select in topics_list:
@@ -232,12 +231,12 @@ class InitiativeMessageGenerate:
select_dot[1], prompt_template
)
content_check, reasoning_check = self.model_v3.generate_response(prompt_check)
print(f"[DEBUG] {content_check} {reasoning_check}")
logger.info(f"{content_check} {reasoning_check}")
if "yes" not in content_check.lower():
return None
prompt = prompt_builder._build_initiative_prompt(
select_dot, prompt_template, memory
)
content, reasoning = self.model_r1.generate_response_async(prompt)
print(f"[DEBUG] {content} {reasoning}")
logger.debug(f"[DEBUG] {content} {reasoning}")
return content

View File

@@ -2,6 +2,7 @@ import asyncio
import time
from typing import Dict, List, Optional, Union
from loguru import logger
from nonebot.adapters.onebot.v11 import Bot
from .cq_code import cq_code_tool
@@ -14,6 +15,7 @@ from .chat_stream import chat_manager
class Message_Sender:
"""发送器"""
def __init__(self):
self.message_interval = (0.5, 1) # 消息间隔时间范围(秒)
self.last_send_time = 0
@@ -41,10 +43,10 @@ class Message_Sender:
message=message_send.raw_message,
auto_escape=False
)
print(f"\033[1;34m[调试]\033[0m 发送消息{message.processed_plain_text}成功")
logger.success(f"[调试] 发送消息{message.processed_plain_text}成功")
except Exception as e:
print(f"发生错误 {e}")
print(f"\033[1;34m[调试]\033[0m 发送消息{message.processed_plain_text}失败")
logger.error(f"[调试] 发生错误 {e}")
logger.error(f"[调试] 发送消息{message.processed_plain_text}失败")
else:
try:
await self._current_bot.send_private_msg(
@@ -52,10 +54,10 @@ class Message_Sender:
message=message_send.raw_message,
auto_escape=False
)
print(f"\033[1;34m[调试]\033[0m 发送消息{message.processed_plain_text}成功")
logger.success(f"[调试] 发送消息{message.processed_plain_text}成功")
except Exception as e:
print(f"发生错误 {e}")
print(f"\033[1;34m[调试]\033[0m 发送消息{message.processed_plain_text}失败")
logger.error(f"发生错误 {e}")
logger.error(f"[调试] 发送消息{message.processed_plain_text}失败")
class MessageContainer:
@@ -110,8 +112,8 @@ class MessageContainer:
self.messages.remove(message)
return True
return False
except Exception as e:
print(f"\033[1;31m[错误]\033[0m 移除消息时发生错误: {e}")
except Exception:
logger.exception("移除消息时发生错误")
return False
def has_messages(self) -> bool:
@@ -152,11 +154,11 @@ class MessageManager:
if isinstance(message_earliest, MessageThinking):
message_earliest.update_thinking_time()
thinking_time = message_earliest.thinking_time
print(f"\033[1;34m[调试]\033[0m 消息正在思考中,已思考{int(thinking_time)}\033[K\r", end='', flush=True)
print(f"消息正在思考中,已思考{int(thinking_time)}\r", end='', flush=True)
# 检查是否超时
if thinking_time > global_config.thinking_timeout:
print(f"\033[1;33m[警告]\033[0m 消息思考超时({thinking_time}秒),移除该消息")
logger.warning(f"消息思考超时({thinking_time}秒),移除该消息")
container.remove_message(message_earliest)
else:
print(f"\033[1;34m[调试]\033[0m 消息'{message_earliest.processed_plain_text}'正在发送中")
@@ -174,7 +176,7 @@ class MessageManager:
message_timeout = container.get_timeout_messages()
if message_timeout:
print(f"\033[1;34m[调试]\033[0m 发现{len(message_timeout)}条超时消息")
logger.warning(f"发现{len(message_timeout)}条超时消息")
for msg in message_timeout:
if msg == message_earliest:
continue
@@ -191,9 +193,9 @@ class MessageManager:
await self.storage.store_message(msg,msg.chat_stream, None)
if not container.remove_message(msg):
print("\033[1;33m[警告]\033[0m 尝试删除不存在的消息")
except Exception as e:
print(f"\033[1;31m[错误]\033[0m 处理超时消息时发生错误: {e}")
logger.warning("尝试删除不存在的消息")
except Exception:
logger.exception("处理超时消息时发生错误")
continue
async def start_processor(self):
@@ -206,6 +208,7 @@ class MessageManager:
await asyncio.gather(*tasks)
# 创建全局消息管理器实例
message_manager = MessageManager()
# 创建全局发送器实例

View File

@@ -1,6 +1,7 @@
import random
import time
from typing import Optional
from loguru import logger
from ...common.database import Database
from ..memory_system.memory import hippocampus, memory_graph
@@ -48,12 +49,10 @@ class PromptBuilder:
# 开始构建prompt
# 心情
mood_manager = MoodManager.get_instance()
mood_prompt = mood_manager.get_prompt()
# 日程构建
current_date = time.strftime("%Y-%m-%d", time.localtime())
current_time = time.strftime("%H:%M:%S", time.localtime())
@@ -67,10 +66,11 @@ class PromptBuilder:
promt_info_prompt = ''
prompt_info = await self.get_prompt_info(message_txt, threshold=0.5)
if prompt_info:
prompt_info = f'''\n----------------------------------------------------\n你有以下这些[知识]\n{prompt_info}\n请你记住上面的[知识],之后可能会用到\n----------------------------------------------------\n'''
prompt_info = f'''你有以下这些[知识]{prompt_info}请你记住上面的[
知识],之后可能会用到-'''
end_time = time.time()
print(f"\033[1;32m[知识检索]\033[0m 耗时: {(end_time - start_time):.3f}")
logger.debug(f"知识检索耗时: {(end_time - start_time):.3f}")
# 获取聊天上下文
chat_in_group=True
@@ -108,14 +108,12 @@ class PromptBuilder:
memory_prompt = "看到这些聊天,你想起来:\n" + "\n".join(memory_items) + "\n"
# 打印调试信息
print("\n\033[1;32m[记忆检索]\033[0m 找到以下相关记忆:")
logger.debug("[记忆检索]找到以下相关记忆:")
for memory in relevant_memories:
print(f"- 主题「{memory['topic']}」[相似度: {memory['similarity']:.2f}]: {memory['content']}")
logger.debug(f"- 主题「{memory['topic']}」[相似度: {memory['similarity']:.2f}]: {memory['content']}")
end_time = time.time()
print(f"\033[1;32m[回忆耗时]\033[0m 耗时: {(end_time - start_time):.3f}")
logger.info(f"回忆耗时: {(end_time - start_time):.3f}")
# 激活prompt构建
activate_prompt = ''
@@ -123,36 +121,38 @@ class PromptBuilder:
activate_prompt = f"以上是群里正在进行的聊天,{memory_prompt} 现在昵称为 '{sender_name}' 的用户说的:{message_txt}。引起了你的注意,你和ta{relation_prompt},{mood_prompt},你想要{relation_prompt_2}"
else:
activate_prompt = f"以上是你正在和{sender_name}私聊的内容,{memory_prompt} 现在昵称为 '{sender_name}' 的用户说的:{message_txt}。引起了你的注意,你和ta{relation_prompt},{mood_prompt},你想要{relation_prompt_2}"
#检测机器人相关词汇
bot_keywords = ['人机', 'bot', '机器', '入机', 'robot', '机器人']
is_bot = any(keyword in message_txt.lower() for keyword in bot_keywords)
if is_bot:
is_bot_prompt = '有人说你是人机或者机器人,否定这一事实,攻击或反问他。注意,只有认为你是机器人才回复,否则不要否认'
else:
is_bot_prompt = ''
# 关键词检测与反应
keywords_reaction_prompt = ''
for rule in global_config.keywords_reaction_rules:
if rule.get("enable", False):
if any(keyword in message_txt.lower() for keyword in rule.get("keywords", [])):
logger.info(f"检测到以下关键词之一:{rule.get('keywords', [])},触发反应:{rule.get('reaction', '')}")
keywords_reaction_prompt += rule.get("reaction", "") + ''
#人格选择
personality=global_config.PROMPT_PERSONALITY
probability_1 = global_config.PERSONALITY_1
probability_2 = global_config.PERSONALITY_2
probability_3 = global_config.PERSONALITY_3
prompt_personality = ''
prompt_personality = f'{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},你还有很多别名:{"/".join(global_config.BOT_ALIAS_NAMES)}'
personality_choice = random.random()
if chat_in_group:
prompt_in_group=f"你正在浏览{chat_stream.platform}"
else:
prompt_in_group=f"你正在{chat_stream.platform}上和{sender_name}私聊"
if personality_choice < probability_1: # 第一种人格
prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME}{personality[0]}{prompt_in_group},{promt_info_prompt},
现在请你给出日常且口语化的回复,平淡一些,尽量简短一些。{is_bot_prompt}
prompt_personality += f'''{personality[0]}, 你正在浏览qq群,{promt_info_prompt},
现在请你给出日常且口语化的回复,平淡一些,尽量简短一些。{keywords_reaction_prompt}
请注意把握群里的聊天内容,不要刻意突出自身学科背景,不要回复的太有条理,可以有个性。'''
elif personality_choice < probability_1 + probability_2: # 第二种人格
prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME}{personality[1]}{prompt_in_group}{promt_info_prompt},
现在请你给出日常且口语化的回复,请表现你自己的见解,不要一昧迎合,尽量简短一些。{is_bot_prompt}
prompt_personality += f'''{personality[1]}, 你正在浏览qq群{promt_info_prompt},
现在请你给出日常且口语化的回复,请表现你自己的见解,不要一昧迎合,尽量简短一些。{keywords_reaction_prompt}
请你表达自己的见解和观点。可以有个性。'''
else: # 第三种人格
prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME}{personality[2]}{prompt_in_group}{promt_info_prompt},
现在请你给出日常且口语化的回复,请表现你自己的见解,不要一昧迎合,尽量简短一些。{is_bot_prompt}
prompt_personality += f'''{personality[2]}, 你正在浏览qq群{promt_info_prompt},
现在请你给出日常且口语化的回复,请表现你自己的见解,不要一昧迎合,尽量简短一些。{keywords_reaction_prompt}
请你表达自己的见解和观点。可以有个性。'''
# 中文高手(新加的好玩功能)
@@ -191,7 +191,7 @@ class PromptBuilder:
return prompt, prompt_check_if_response
def _build_initiative_prompt_select(self,group_id):
def _build_initiative_prompt_select(self, group_id, probability_1=0.8, probability_2=0.1):
current_date = time.strftime("%Y-%m-%d", time.localtime())
current_time = time.strftime("%H:%M:%S", time.localtime())
bot_schedule_now_time, bot_schedule_now_activity = bot_schedule.get_current_task()
@@ -199,7 +199,9 @@ class PromptBuilder:
chat_talking_prompt = ''
if group_id:
chat_talking_prompt = get_recent_group_detailed_plain_text(self.db, group_id, limit=global_config.MAX_CONTEXT_SIZE,combine = True)
chat_talking_prompt = get_recent_group_detailed_plain_text(self.db, group_id,
limit=global_config.MAX_CONTEXT_SIZE,
combine=True)
chat_talking_prompt = f"以下是群里正在聊天的内容:\n{chat_talking_prompt}"
# print(f"\033[1;34m[调试]\033[0m 已从数据库获取群 {group_id} 的消息记录:{chat_talking_prompt}")
@@ -242,10 +244,9 @@ class PromptBuilder:
prompt_for_initiative = f"{prompt_regular}你现在想在群里发言,回忆了一下,想到一个话题,是{selected_node['concept']},关于这个话题的记忆有\n{memory}\n,请在把握群里的聊天内容的基础上,综合群内的氛围,以日常且口语化的口吻,简短且随意一点进行发言,不要说的太有条理,可以有个性。记住不要输出多余内容(包括前后缀,冒号和引号,括号,表情等)"
return prompt_for_initiative
async def get_prompt_info(self, message: str, threshold: float):
related_info = ''
print(f"\033[1;34m[调试]\033[0m 获取知识库内容,元消息:{message[:30]}...,消息长度: {len(message)}")
logger.debug(f"获取知识库内容,元消息:{message[:30]}...,消息长度: {len(message)}")
embedding = await get_embedding(message)
related_info += self.get_info_from_db(embedding, threshold=threshold)
@@ -319,4 +320,5 @@ class PromptBuilder:
# 返回所有找到的内容,用换行分隔
return '\n'.join(str(result['content']) for result in results)
prompt_builder = PromptBuilder()

View File

@@ -1,6 +1,7 @@
import asyncio
from typing import Optional, Union
from typing import Optional, Union
from loguru import logger
from ...common.database import Database
from .message_base import UserInfo
@@ -13,6 +14,7 @@ class Impression:
relationship_value: float = None
class Relationship:
user_id: int = None
platform: str = None
@@ -121,7 +123,7 @@ class RelationshipManager:
# 如果不存在且提供了user_info则创建新的关系
if user_info is not None:
return await self.update_relationship(chat_stream=chat_stream, **kwargs)
print(f"\033[1;31m[关系管理]\033[0m 用户 {user_id}({platform}) 不存在,无法更新")
logger.warning(f"[关系管理] 用户 {user_id}({platform}) 不存在,无法更新")
return None
def get_relationship(self,
@@ -179,10 +181,10 @@ class RelationshipManager:
# 依次加载每条记录
for data in all_relationships:
await self.load_relationship(data)
print(f"\033[1;32m[关系管理]\033[0m 已加载 {len(self.relationships)} 条关系记录")
logger.debug(f"[关系管理] 已加载 {len(self.relationships)} 条关系记录")
while True:
print("\033[1;32m[关系管理]\033[0m 正在自动保存关系")
logger.debug("正在自动保存关系")
await asyncio.sleep(300) # 等待300秒(5分钟)
await self._save_all_relationships()

View File

@@ -5,6 +5,8 @@ from ...common.database import Database
from .message_base import MessageBase
from .message import MessageSending, MessageRecv
from .chat_stream import ChatStream
from loguru import logger
class MessageStorage:
def __init__(self):
@@ -24,7 +26,7 @@ class MessageStorage:
"topic": topic,
}
self.db.db.messages.insert_one(message_data)
except Exception as e:
print(f"\033[1;31m[错误]\033[0m 存储消息失败: {e}")
except Exception:
logger.exception("存储消息失败")
# 如果需要其他存储相关的函数,可以在这里添加

View File

@@ -4,10 +4,12 @@ from nonebot import get_driver
from ..models.utils_model import LLM_request
from .config import global_config
from loguru import logger
driver = get_driver()
config = driver.config
class TopicIdentifier:
def __init__(self):
self.llm_topic_judge = LLM_request(model=global_config.llm_topic_judge)
@@ -25,7 +27,7 @@ class TopicIdentifier:
topic, _ = await self.llm_topic_judge.generate_response(prompt)
if not topic:
print("\033[1;31m[错误]\033[0m LLM API 返回为空")
logger.error("LLM API 返回为空")
return None
# 直接在这里处理主题解析
@@ -35,7 +37,8 @@ class TopicIdentifier:
# 解析主题字符串为列表
topic_list = [t.strip() for t in topic.split(",") if t.strip()]
print(f"\033[1;32m[主题识别]\033[0m 主题: {topic_list}")
logger.info(f"主题: {topic_list}")
return topic_list if topic_list else None
topic_identifier = TopicIdentifier()

View File

@@ -7,6 +7,7 @@ from typing import Dict, List
import jieba
import numpy as np
from nonebot import get_driver
from loguru import logger
from ..models.utils_model import LLM_request
from ..utils.typo_generator import ChineseTypoGenerator
@@ -21,7 +22,7 @@ config = driver.config
def db_message_to_str(message_dict: Dict) -> str:
print(f"message_dict: {message_dict}")
logger.debug(f"message_dict: {message_dict}")
time_str = time.strftime("%m-%d %H:%M:%S", time.localtime(message_dict["time"]))
try:
name = "[(%s)%s]%s" % (
@@ -30,7 +31,7 @@ def db_message_to_str(message_dict: Dict) -> str:
name = message_dict.get("user_nickname", "") or f"用户{message_dict['user_id']}"
content = message_dict.get("processed_plain_text", "")
result = f"[{time_str}] {name}: {content}\n"
print(f"result: {result}")
logger.debug(f"result: {result}")
return result
@@ -71,8 +72,12 @@ def calculate_information_content(text):
def get_cloest_chat_from_db(db, length: int, timestamp: str):
"""从数据库中获取最接近指定时间戳的聊天记录,并记录读取次数"""
chat_text = ''
"""从数据库中获取最接近指定时间戳的聊天记录,并记录读取次数
Returns:
list: 消息记录字典列表,每个字典包含消息内容和时间信息
"""
chat_records = []
closest_record = db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)])
if closest_record and closest_record.get('memorized', 0) < 4:
@@ -84,11 +89,10 @@ def get_cloest_chat_from_db(db, length: int, timestamp: str):
).sort('time', 1).limit(length))
# 更新每条消息的memorized属性
for record in chat_records:
# 检查当前记录的memorized值
for record in records:
current_memorized = record.get('memorized', 0)
if current_memorized > 3:
# print(f"消息已读取3次跳过")
print("消息已读取3次跳过")
return ''
# 更新memorized值
@@ -97,11 +101,14 @@ def get_cloest_chat_from_db(db, length: int, timestamp: str):
{"$set": {"memorized": current_memorized + 1}}
)
chat_text += record["detailed_plain_text"]
# 添加到记录列表中
chat_records.append({
'text': record["detailed_plain_text"],
'time': record["time"],
'group_id': record["group_id"]
})
return chat_text
# print(f"消息已读取3次跳过")
return ''
return chat_records
async def get_recent_group_messages(db, chat_id:str, limit: int = 12) -> list:
@@ -142,7 +149,7 @@ async def get_recent_group_messages(db, chat_id:str, limit: int = 12) -> list:
)
message_objects.append(msg)
except KeyError:
print("[WARNING] 数据库中存在无效的消息")
logger.warning("数据库中存在无效的消息")
continue
# 按时间正序排列
@@ -259,11 +266,10 @@ def split_into_sentences_w_remove_punctuation(text: str) -> List[str]:
sentence = sentence.replace('', ' ').replace(',', ' ')
sentences_done.append(sentence)
print(f"处理后的句子: {sentences_done}")
logger.info(f"处理后的句子: {sentences_done}")
return sentences_done
def random_remove_punctuation(text: str) -> str:
"""随机处理标点符号,模拟人类打字习惯
@@ -291,43 +297,70 @@ def random_remove_punctuation(text: str) -> str:
return result
def process_llm_response(text: str) -> List[str]:
# processed_response = process_text_with_typos(content)
if len(text) > 300:
print(f"回复过长 ({len(text)} 字符),返回默认回复")
if len(text) > 200:
logger.warning(f"回复过长 ({len(text)} 字符),返回默认回复")
return ['懒得说']
# 处理长消息
typo_generator = ChineseTypoGenerator(
error_rate=0.03,
min_freq=7,
tone_error_rate=0.2,
word_replace_rate=0.02
error_rate=global_config.chinese_typo_error_rate,
min_freq=global_config.chinese_typo_min_freq,
tone_error_rate=global_config.chinese_typo_tone_error_rate,
word_replace_rate=global_config.chinese_typo_word_replace_rate
)
typoed_text = typo_generator.create_typo_sentence(text)[0]
sentences = split_into_sentences_w_remove_punctuation(typoed_text)
split_sentences = split_into_sentences_w_remove_punctuation(text)
sentences = []
for sentence in split_sentences:
if global_config.chinese_typo_enable:
typoed_text, typo_corrections = typo_generator.create_typo_sentence(sentence)
sentences.append(typoed_text)
if typo_corrections:
sentences.append(typo_corrections)
else:
sentences.append(sentence)
# 检查分割后的消息数量是否过多超过3条
if len(sentences) > 4:
print(f"分割后消息数量过多 ({len(sentences)} 条),返回默认回复")
if len(sentences) > 5:
logger.warning(f"分割后消息数量过多 ({len(sentences)} 条),返回默认回复")
return [f'{global_config.BOT_NICKNAME}不知道哦']
return sentences
def calculate_typing_time(input_string: str, chinese_time: float = 0.2, english_time: float = 0.1) -> float:
def calculate_typing_time(input_string: str, chinese_time: float = 0.4, english_time: float = 0.2) -> float:
"""
计算输入字符串所需的时间,中文和英文字符有不同的输入时间
input_string (str): 输入的字符串
chinese_time (float): 中文字符的输入时间默认为0.3
english_time (float): 英文字符的输入时间默认为0.15
chinese_time (float): 中文字符的输入时间默认为0.2
english_time (float): 英文字符的输入时间默认为0.1秒
特殊情况:
- 如果只有一个中文字符将使用3倍的中文输入时间
- 在所有输入结束后额外加上回车时间0.3秒
"""
mood_manager = MoodManager.get_instance()
# 将0-1的唤醒度映射到-1到1
mood_arousal = mood_manager.current_mood.arousal
# 映射到0.5到2倍的速度系数
typing_speed_multiplier = 1.5 ** mood_arousal # 唤醒度为1时速度翻倍,为-1时速度减半
chinese_time *= 1 / typing_speed_multiplier
english_time *= 1 / typing_speed_multiplier
# 计算中文字符数
chinese_chars = sum(1 for char in input_string if '\u4e00' <= char <= '\u9fff')
# 如果只有一个中文字符使用3倍时间
if chinese_chars == 1 and len(input_string.strip()) == 1:
return chinese_time * 3 + 0.3 # 加上回车时间
# 正常计算所有字符的输入时间
total_time = 0.0
for char in input_string:
if '\u4e00' <= char <= '\u9fff': # 判断是否为中文字符
total_time += chinese_time
else: # 其他字符(如英文)
total_time += english_time
return total_time
return total_time + 0.3 # 加上回车时间
def cosine_similarity(v1, v2):

View File

@@ -16,6 +16,8 @@ class WillingManager:
self.chat_reply_willing: Dict[str, float] = {} # 存储每个聊天流的回复意愿
self._decay_task = None
self._started = False
self.min_reply_willing = 0.01
self.attenuation_coefficient = 0.75
async def _decay_reply_willing(self):
"""定期衰减回复意愿"""
@@ -33,12 +35,9 @@ class WillingManager:
return self.chat_reply_willing.get(stream.stream_id, 0)
return 0
def set_willing(self, chat_id: str, willing: float):
"""设置指定聊天流的回复意愿"""
self.chat_reply_willing[chat_id] = willing
def set_willing(self, chat_id: str, willing: float):
"""设置指定聊天流的回复意愿"""
self.chat_reply_willing[chat_id] = willing
def set_willing(self, chat_id: int, willing: float):
"""设置指定群组的回复意愿"""
self.group_reply_willing[chat_id] = willing
async def change_reply_willing_received(self,
chat_stream:ChatStream,
@@ -51,47 +50,67 @@ class WillingManager:
# 获取或创建聊天流
stream = chat_stream
chat_id = stream.stream_id
group_id = stream.group_info.group_id
# 若非目标回复群组则直接return
if group_id not in config.talk_allowed_groups:
reply_probability = 0
return reply_probability
current_willing = self.chat_reply_willing.get(chat_id, 0)
# print(f"初始意愿: {current_willing}")
if is_mentioned_bot and current_willing < 1.0:
current_willing += 0.9
print(f"被提及, 当前意愿: {current_willing}")
elif is_mentioned_bot:
current_willing += 0.05
print(f"被重复提及, 当前意愿: {current_willing}")
logger.debug(f"[{chat_id}]的初始回复意愿: {current_willing}")
# 根据消息类型被cue/表情包)调控
if is_mentioned_bot:
current_willing = min(
3.0,
current_willing + 0.9
)
logger.debug(f"被提及, 当前意愿: {current_willing}")
if is_emoji:
current_willing *= 0.1
print(f"表情包, 当前意愿: {current_willing}")
logger.debug(f"表情包, 当前意愿: {current_willing}")
print(f"放大系数_interested_rate: {global_config.response_interested_rate_amplifier}")
interested_rate *= global_config.response_interested_rate_amplifier #放大回复兴趣度
if interested_rate > 0.4:
# print(f"兴趣度: {interested_rate}, 当前意愿: {current_willing}")
current_willing += interested_rate-0.4
# 兴趣放大系数,若兴趣 > 0.4则增加回复概率
interested_rate_amplifier = global_config.response_interested_rate_amplifier
logger.debug(f"放大系数_interested_rate: {interested_rate_amplifier}")
interested_rate *= interested_rate_amplifier
current_willing *= global_config.response_willing_amplifier #放大回复意愿
# print(f"放大系数_willing: {global_config.response_willing_amplifier}, 当前意愿: {current_willing}")
current_willing += max(
0.0,
interested_rate - 0.4
)
reply_probability = max((current_willing - 0.45) * 2, 0)
# 回复意愿系数调控,独立乘区
willing_amplifier = max(
global_config.response_willing_amplifier,
self.min_reply_willing
)
current_willing *= willing_amplifier
logger.debug(f"放大系数_willing: {global_config.response_willing_amplifier}, 当前意愿: {current_willing}")
# 检查群组权限(如果是群聊)
if chat_stream.group_info:
if chat_stream.group_info.group_id not in config.talk_allowed_groups:
current_willing = 0
reply_probability = 0
# 回复概率迭代保底0.01回复概率
reply_probability = max(
(current_willing - 0.45) * 2,
self.min_reply_willing
)
if chat_stream.group_info.group_id in config.talk_frequency_down_groups:
reply_probability = reply_probability / global_config.down_frequency_rate
# 降低目标低频群组回复概率
down_frequency_rate = max(
1.0,
global_config.down_frequency_rate
)
if group_id in config.talk_frequency_down_groups:
reply_probability = reply_probability / down_frequency_rate
reply_probability = min(reply_probability, 1)
if reply_probability < 0:
reply_probability = 0
self.chat_reply_willing[chat_id] = min(current_willing, 3.0)
self.chat_reply_willing[chat_id] = min(current_willing, 3.0)
self.group_reply_willing[group_id] = min(current_willing, 3.0)
logger.debug(f"当前群组{group_id}回复概率:{reply_probability}")
return reply_probability
def change_reply_willing_sent(self, chat_stream:ChatStream):
@@ -116,5 +135,6 @@ class WillingManager:
self._decay_task = asyncio.create_task(self._decay_reply_willing())
self._started = True
# 创建全局实例
willing_manager = WillingManager()

View File

@@ -19,7 +19,7 @@ from src.common.database import Database
# 从环境变量获取配置
Database.initialize(
host=os.getenv("MONGODB_HOST", "localhost"),
host=os.getenv("MONGODB_HOST", "127.0.0.1"),
port=int(os.getenv("MONGODB_PORT", "27017")),
db_name=os.getenv("DATABASE_NAME", "maimai"),
username=os.getenv("MONGODB_USERNAME"),
@@ -79,7 +79,7 @@ class KnowledgeLibrary:
content = f.read()
# 按1024字符分段
segments = [content[i:i+600] for i in range(0, len(content), 600)]
segments = [content[i:i+600] for i in range(0, len(content), 300)]
# 处理每个分段
for segment in segments:

View File

View File

@@ -7,6 +7,7 @@ import jieba
import matplotlib.pyplot as plt
import networkx as nx
from dotenv import load_dotenv
from loguru import logger
sys.path.append("C:/GitHub/MaiMBot") # 添加项目根目录到 Python 路径
from src.common.database import Database # 使用正确的导入语法
@@ -99,18 +100,20 @@ class Memory_graph:
# 返回所有节点对应的 Memory_dot 对象
return [self.get_dot(node) for node in self.G.nodes()]
def get_random_chat_from_db(self, length: int, timestamp: str):
# 从数据库中根据时间戳获取离其最近的聊天记录
chat_text = ''
closest_record = self.db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)]) # 调试输出
print(f"距离time最近的消息时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(closest_record['time'])))}")
logger.info(
f"距离time最近的消息时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(closest_record['time'])))}")
if closest_record:
closest_time = closest_record['time']
group_id = closest_record['group_id'] # 获取groupid
# 获取该时间戳之后的length条消息且groupid相同
chat_record = list(self.db.db.messages.find({"time": {"$gt": closest_time}, "group_id": group_id}).sort('time', 1).limit(length))
chat_record = list(
self.db.db.messages.find({"time": {"$gt": closest_time}, "group_id": group_id}).sort('time', 1).limit(
length))
for record in chat_record:
time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(record['time'])))
try:
@@ -179,90 +182,30 @@ def main():
break
first_layer_items, second_layer_items = memory_graph.get_related_item(query)
if first_layer_items or second_layer_items:
print("\n第一层记忆:")
logger.debug("第一层记忆:")
for item in first_layer_items:
print(item)
print("\n第二层记忆:")
logger.debug(item)
logger.debug("第二层记忆:")
for item in second_layer_items:
print(item)
logger.debug(item)
else:
print("未找到相关记忆。")
logger.debug("未找到相关记忆。")
def segment_text(text):
seg_text = list(jieba.cut(text))
return seg_text
def find_topic(text, topic_num):
prompt = f'这是一段文字:{text}。请你从这段话中总结出{topic_num}个话题,帮我列出来,用逗号隔开,尽可能精简。只需要列举{topic_num}个话题就好,不要告诉我其他内容。'
return prompt
def topic_what(text, topic):
prompt = f'这是一段文字:{text}。我想知道这记忆里有什么关于{topic}的话题,帮我总结成一句自然的话,可以包含时间和人物。只输出这句话就好'
return prompt
def visualize_graph(memory_graph: Memory_graph, color_by_memory: bool = False):
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
G = memory_graph.G
# 保存图到本地
nx.write_gml(G, "memory_graph.gml") # 保存为 GML 格式
# 根据连接条数或记忆数量设置节点颜色
node_colors = []
nodes = list(G.nodes()) # 获取图中实际的节点列表
if color_by_memory:
# 计算每个节点的记忆数量
memory_counts = []
for node in nodes:
memory_items = G.nodes[node].get('memory_items', [])
if isinstance(memory_items, list):
count = len(memory_items)
else:
count = 1 if memory_items else 0
memory_counts.append(count)
max_memories = max(memory_counts) if memory_counts else 1
for count in memory_counts:
# 使用不同的颜色方案:红色表示记忆多,蓝色表示记忆少
if max_memories > 0:
intensity = min(1.0, count / max_memories)
color = (intensity, 0, 1.0 - intensity) # 从蓝色渐变到红色
else:
color = (0, 0, 1) # 如果没有记忆,则为蓝色
node_colors.append(color)
else:
# 使用原来的连接数量着色方案
max_degree = max(G.degree(), key=lambda x: x[1])[1] if G.degree() else 1
for node in nodes:
degree = G.degree(node)
if max_degree > 0:
red = min(1.0, degree / max_degree)
blue = 1.0 - red
color = (red, 0, blue)
else:
color = (0, 0, 1)
node_colors.append(color)
# 绘制图形
plt.figure(figsize=(12, 8))
pos = nx.spring_layout(G, k=1, iterations=50)
nx.draw(G, pos,
with_labels=True,
node_color=node_colors,
node_size=200,
font_size=10,
font_family='SimHei',
font_weight='bold')
title = '记忆图谱可视化 - ' + ('按记忆数量着色' if color_by_memory else '按连接数量着色')
plt.title(title, fontsize=16, fontfamily='SimHei')
plt.show()
def visualize_graph_lite(memory_graph: Memory_graph, color_by_memory: bool = False):
# 设置中文字体
@@ -280,18 +223,18 @@ def visualize_graph_lite(memory_graph: Memory_graph, color_by_memory: bool = Fal
memory_items = H.nodes[node].get('memory_items', [])
memory_count = len(memory_items) if isinstance(memory_items, list) else (1 if memory_items else 0)
degree = H.degree(node)
if memory_count < 5 or degree < 2: # 改为小于2而不是小于等于2
if memory_count < 3 or degree < 2: # 改为小于2而不是小于等于2
nodes_to_remove.append(node)
H.remove_nodes_from(nodes_to_remove)
# 如果过滤后没有节点,则返回
if len(H.nodes()) == 0:
print("过滤后没有符合条件的节点可显示")
logger.debug("过滤后没有符合条件的节点可显示")
return
# 保存图到本地
nx.write_gml(H, "memory_graph.gml") # 保存为 GML 格式
# nx.write_gml(H, "memory_graph.gml") # 保存为 GML 格式
# 计算节点大小和颜色
node_colors = []
@@ -315,21 +258,23 @@ def visualize_graph_lite(memory_graph: Memory_graph, color_by_memory: bool = Fal
memory_count = len(memory_items) if isinstance(memory_items, list) else (1 if memory_items else 0)
# 使用指数函数使变化更明显
ratio = memory_count / max_memories
size = 500 + 5000 * (ratio ** 2) # 使用方函数使差异明显
size = 500 + 5000 * (ratio) # 使用1.5次方函数使差异不那么明显
node_sizes.append(size)
# 计算节点颜色(基于连接数)
degree = H.degree(node)
# 红色分量随着度数增加而增加
red = min(1.0, degree / max_degree)
r = (degree / max_degree) ** 0.3
red = min(1.0, r)
# 蓝色分量随着度数减少而增加
blue = 1.0 - red
color = (red, 0, blue)
blue = max(0.0, 1 - red)
# blue = 1
color = (red, 0.1, blue)
node_colors.append(color)
# 绘制图形
plt.figure(figsize=(12, 8))
pos = nx.spring_layout(H, k=1.5, iterations=50) # 增加k值使节点分布更开
pos = nx.spring_layout(H, k=1, iterations=50) # 增加k值使节点分布更开
nx.draw(H, pos,
with_labels=True,
node_color=node_colors,
@@ -339,13 +284,12 @@ def visualize_graph_lite(memory_graph: Memory_graph, color_by_memory: bool = Fal
font_weight='bold',
edge_color='gray',
width=0.5,
alpha=0.7)
alpha=0.9)
title = '记忆图谱可视化 - 节点大小表示记忆数量,颜色表示连接数'
plt.title(title, fontsize=16, fontfamily='SimHei')
plt.show()
if __name__ == "__main__":
main()

View File

@@ -7,6 +7,7 @@ import time
import jieba
import networkx as nx
from loguru import logger
from ...common.database import Database # 使用正确的导入语法
from ..chat.config import global_config
from ..chat.utils import (
@@ -24,26 +25,46 @@ class Memory_graph:
self.db = Database.get_instance()
def connect_dot(self, concept1, concept2):
# 如果边已存在,增加 strength
# 避免自连接
if concept1 == concept2:
return
current_time = datetime.datetime.now().timestamp()
# 如果边已存在,增加 strength
if self.G.has_edge(concept1, concept2):
self.G[concept1][concept2]['strength'] = self.G[concept1][concept2].get('strength', 1) + 1
# 更新最后修改时间
self.G[concept1][concept2]['last_modified'] = current_time
else:
# 如果是新边初始化 strength 为 1
self.G.add_edge(concept1, concept2, strength=1)
# 如果是新边,初始化 strength 为 1
self.G.add_edge(concept1, concept2,
strength=1,
created_time=current_time, # 添加创建时间
last_modified=current_time) # 添加最后修改时间
def add_dot(self, concept, memory):
current_time = datetime.datetime.now().timestamp()
if concept in self.G:
# 如果节点已存在,将新记忆添加到现有列表中
if 'memory_items' in self.G.nodes[concept]:
if not isinstance(self.G.nodes[concept]['memory_items'], list):
# 如果当前不是列表,将其转换为列表
self.G.nodes[concept]['memory_items'] = [self.G.nodes[concept]['memory_items']]
self.G.nodes[concept]['memory_items'].append(memory)
# 更新最后修改时间
self.G.nodes[concept]['last_modified'] = current_time
else:
self.G.nodes[concept]['memory_items'] = [memory]
# 如果节点存在但没有memory_items,说明是第一次添加memory,设置created_time
if 'created_time' not in self.G.nodes[concept]:
self.G.nodes[concept]['created_time'] = current_time
self.G.nodes[concept]['last_modified'] = current_time
else:
# 如果是新节点创建新的记忆列表
self.G.add_node(concept, memory_items=[memory])
# 如果是新节点,创建新的记忆列表
self.G.add_node(concept,
memory_items=[memory],
created_time=current_time, # 添加创建时间
last_modified=current_time) # 添加最后修改时间
def get_dot(self, concept):
# 检查节点是否存在于图中
@@ -158,56 +179,116 @@ class Hippocampus:
return hash(f"{nodes[0]}:{nodes[1]}")
def get_memory_sample(self, chat_size=20, time_frequency: dict = {'near': 2, 'mid': 4, 'far': 3}):
"""获取记忆样本
Returns:
list: 消息记录列表,每个元素是一个消息记录字典列表
"""
current_timestamp = datetime.datetime.now().timestamp()
chat_text = []
chat_samples = []
# 短期1h 中期4h 长期24h
for _ in range(time_frequency.get('near')): # 循环10次
random_time = current_timestamp - random.randint(1, 3600) # 随机时间
chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
chat_text.append(chat_)
for _ in range(time_frequency.get('mid')): # 循环10次
random_time = current_timestamp - random.randint(3600, 3600*4) # 随机时间
chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
chat_text.append(chat_)
for _ in range(time_frequency.get('far')): # 循环10次
random_time = current_timestamp - random.randint(3600*4, 3600*24) # 随机时间
chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
chat_text.append(chat_)
return [text for text in chat_text if text]
for _ in range(time_frequency.get('near')):
random_time = current_timestamp - random.randint(1, 3600)
messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
if messages:
chat_samples.append(messages)
async def memory_compress(self, input_text, compress_rate=0.1):
print(input_text)
for _ in range(time_frequency.get('mid')):
random_time = current_timestamp - random.randint(3600, 3600 * 4)
messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
if messages:
chat_samples.append(messages)
for _ in range(time_frequency.get('far')):
random_time = current_timestamp - random.randint(3600 * 4, 3600 * 24)
messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
if messages:
chat_samples.append(messages)
return chat_samples
async def memory_compress(self, messages: list, compress_rate=0.1):
"""压缩消息记录为记忆
Returns:
tuple: (压缩记忆集合, 相似主题字典)
"""
if not messages:
return set(), {}
# 合并消息文本,同时保留时间信息
input_text = ""
time_info = ""
# 计算最早和最晚时间
earliest_time = min(msg['time'] for msg in messages)
latest_time = max(msg['time'] for msg in messages)
earliest_dt = datetime.datetime.fromtimestamp(earliest_time)
latest_dt = datetime.datetime.fromtimestamp(latest_time)
# 如果是同一年
if earliest_dt.year == latest_dt.year:
earliest_str = earliest_dt.strftime("%m-%d %H:%M:%S")
latest_str = latest_dt.strftime("%m-%d %H:%M:%S")
time_info += f"是在{earliest_dt.year}年,{earliest_str}{latest_str} 的对话:\n"
else:
earliest_str = earliest_dt.strftime("%Y-%m-%d %H:%M:%S")
latest_str = latest_dt.strftime("%Y-%m-%d %H:%M:%S")
time_info += f"是从 {earliest_str}{latest_str} 的对话:\n"
for msg in messages:
input_text += f"{msg['text']}\n"
logger.debug(input_text)
#获取topics
topic_num = self.calculate_topic_num(input_text, compress_rate)
topics_response = await self.llm_topic_judge.generate_response(self.find_topic_llm(input_text, topic_num))
# 修改话题处理逻辑
# 定义需要过滤的关键词
filter_keywords = ['表情包', '图片', '回复', '聊天记录']
# 过滤topics
topics = [topic.strip() for topic in topics_response[0].replace("", ",").replace("", ",").replace(" ", ",").split(",") if topic.strip()]
filter_keywords = global_config.memory_ban_words
topics = [topic.strip() for topic in
topics_response[0].replace("", ",").replace("", ",").replace(" ", ",").split(",") if topic.strip()]
filtered_topics = [topic for topic in topics if not any(keyword in topic for keyword in filter_keywords)]
# print(f"原始话题: {topics}")
print(f"过滤后话题: {filtered_topics}")
logger.info(f"过滤后话题: {filtered_topics}")
# 使用过滤后的话题继续处理
# 创建所有话题的请求任务
tasks = []
for topic in filtered_topics:
topic_what_prompt = self.topic_what(input_text, topic)
# 创建异步任务
topic_what_prompt = self.topic_what(input_text, topic, time_info)
task = self.llm_summary_by_topic.generate_response_async(topic_what_prompt)
tasks.append((topic.strip(), task))
# 等待所有任务完成
compressed_memory = set()
similar_topics_dict = {} # 存储每个话题的相似主题列表
for topic, task in tasks:
response = await task
if response:
compressed_memory.add((topic, response[0]))
# 为每个话题查找相似的已存在主题
existing_topics = list(self.memory_graph.G.nodes())
similar_topics = []
return compressed_memory
for existing_topic in existing_topics:
topic_words = set(jieba.cut(topic))
existing_words = set(jieba.cut(existing_topic))
all_words = topic_words | existing_words
v1 = [1 if word in topic_words else 0 for word in all_words]
v2 = [1 if word in existing_words else 0 for word in all_words]
similarity = cosine_similarity(v1, v2)
if similarity >= 0.6:
similar_topics.append((existing_topic, similarity))
similar_topics.sort(key=lambda x: x[1], reverse=True)
similar_topics = similar_topics[:5]
similar_topics_dict[topic] = similar_topics
return compressed_memory, similar_topics_dict
def calculate_topic_num(self, text, compress_rate):
"""计算文本的话题数量"""
@@ -215,37 +296,46 @@ class Hippocampus:
topic_by_length = text.count('\n') * compress_rate
topic_by_information_content = max(1, min(5, int((information_content - 3) * 2)))
topic_num = int((topic_by_length + topic_by_information_content) / 2)
print(f"topic_by_length: {topic_by_length}, topic_by_information_content: {topic_by_information_content}, topic_num: {topic_num}")
logger.debug(
f"topic_by_length: {topic_by_length}, topic_by_information_content: {topic_by_information_content}, "
f"topic_num: {topic_num}")
return topic_num
async def operation_build_memory(self, chat_size=20):
# 最近消息获取频率
time_frequency = {'near':2,'mid':4,'far':2}
memory_sample = self.get_memory_sample(chat_size,time_frequency)
time_frequency = {'near': 3, 'mid': 8, 'far': 5}
memory_samples = self.get_memory_sample(chat_size, time_frequency)
for i, input_text in enumerate(memory_sample, 1):
# 加载进度可视化
for i, messages in enumerate(memory_samples, 1):
all_topics = []
progress = (i / len(memory_sample)) * 100
# 加载进度可视化
progress = (i / len(memory_samples)) * 100
bar_length = 30
filled_length = int(bar_length * i // len(memory_sample))
filled_length = int(bar_length * i // len(memory_samples))
bar = '' * filled_length + '-' * (bar_length - filled_length)
print(f"\n进度: [{bar}] {progress:.1f}% ({i}/{len(memory_sample)})")
logger.debug(f"进度: [{bar}] {progress:.1f}% ({i}/{len(memory_samples)})")
# 生成压缩后记忆 ,表现为 (话题,记忆) 的元组
compressed_memory = set()
compress_rate = 0.1
compressed_memory = await self.memory_compress(input_text, compress_rate)
print(f"\033[1;33m压缩后记忆数量\033[0m: {len(compressed_memory)}")
compressed_memory, similar_topics_dict = await self.memory_compress(messages, compress_rate)
logger.info(f"压缩后记忆数量: {len(compressed_memory)},似曾相识的话题: {len(similar_topics_dict)}")
# 将记忆加入到图谱中
for topic, memory in compressed_memory:
print(f"\033[1;32m添加节点\033[0m: {topic}")
logger.info(f"添加节点: {topic}")
self.memory_graph.add_dot(topic, memory)
all_topics.append(topic) # 收集所有话题
all_topics.append(topic)
# 连接相似的已存在主题
if topic in similar_topics_dict:
similar_topics = similar_topics_dict[topic]
for similar_topic, similarity in similar_topics:
if topic != similar_topic:
strength = int(similarity * 10)
logger.info(f"连接相似节点: {topic}{similar_topic} (强度: {strength})")
self.memory_graph.G.add_edge(topic, similar_topic, strength=strength)
# 连接同批次的相关话题
for i in range(len(all_topics)):
for j in range(i + 1, len(all_topics)):
print(f"\033[1;32m连接节点\033[0m: {all_topics[i]}{all_topics[j]}")
logger.info(f"连接同批次节点: {all_topics[i]}{all_topics[j]}")
self.memory_graph.connect_dot(all_topics[i], all_topics[j])
self.sync_memory_to_db()
@@ -256,7 +346,7 @@ class Hippocampus:
db_nodes = list(self.memory_graph.db.db.graph_data.nodes.find())
memory_nodes = list(self.memory_graph.G.nodes(data=True))
# 转换数据库节点为字典格式方便查找
# 转换数据库节点为字典格式,方便查找
db_nodes_dict = {node['concept']: node for node in db_nodes}
# 检查并更新节点
@@ -268,12 +358,18 @@ class Hippocampus:
# 计算内存中节点的特征值
memory_hash = self.calculate_node_hash(concept, memory_items)
# 获取时间信息
created_time = data.get('created_time', datetime.datetime.now().timestamp())
last_modified = data.get('last_modified', datetime.datetime.now().timestamp())
if concept not in db_nodes_dict:
# 数据库中缺少的节点添加
# 数据库中缺少的节点,添加
node_data = {
'concept': concept,
'memory_items': memory_items,
'hash': memory_hash
'hash': memory_hash,
'created_time': created_time,
'last_modified': last_modified
}
self.memory_graph.db.db.graph_data.nodes.insert_one(node_data)
else:
@@ -281,25 +377,21 @@ class Hippocampus:
db_node = db_nodes_dict[concept]
db_hash = db_node.get('hash', None)
# 如果特征值不同则更新节点
# 如果特征值不同,则更新节点
if db_hash != memory_hash:
self.memory_graph.db.db.graph_data.nodes.update_one(
{'concept': concept},
{'$set': {
'memory_items': memory_items,
'hash': memory_hash
'hash': memory_hash,
'created_time': created_time,
'last_modified': last_modified
}}
)
# 检查并删除数据库中多余的节点
memory_concepts = set(node[0] for node in memory_nodes)
for db_node in db_nodes:
if db_node['concept'] not in memory_concepts:
self.memory_graph.db.db.graph_data.nodes.delete_one({'concept': db_node['concept']})
# 处理边的信息
db_edges = list(self.memory_graph.db.db.graph_data.edges.find())
memory_edges = list(self.memory_graph.G.edges())
memory_edges = list(self.memory_graph.G.edges(data=True))
# 创建边的哈希值字典
db_edge_dict = {}
@@ -311,10 +403,14 @@ class Hippocampus:
}
# 检查并更新边
for source, target in memory_edges:
for source, target, data in memory_edges:
edge_hash = self.calculate_edge_hash(source, target)
edge_key = (source, target)
strength = self.memory_graph.G[source][target].get('strength', 1)
strength = data.get('strength', 1)
# 获取边的时间信息
created_time = data.get('created_time', datetime.datetime.now().timestamp())
last_modified = data.get('last_modified', datetime.datetime.now().timestamp())
if edge_key not in db_edge_dict:
# 添加新边
@@ -322,7 +418,9 @@ class Hippocampus:
'source': source,
'target': target,
'strength': strength,
'hash': edge_hash
'hash': edge_hash,
'created_time': created_time,
'last_modified': last_modified
}
self.memory_graph.db.db.graph_data.edges.insert_one(edge_data)
else:
@@ -332,20 +430,12 @@ class Hippocampus:
{'source': source, 'target': target},
{'$set': {
'hash': edge_hash,
'strength': strength
'strength': strength,
'created_time': created_time,
'last_modified': last_modified
}}
)
# 删除多余的边
memory_edge_set = set(memory_edges)
for edge_key in db_edge_dict:
if edge_key not in memory_edge_set:
source, target = edge_key
self.memory_graph.db.db.graph_data.edges.delete_one({
'source': source,
'target': target
})
def sync_memory_from_db(self):
"""从数据库同步数据到内存中的图结构"""
# 清空当前图
@@ -359,61 +449,107 @@ class Hippocampus:
# 确保memory_items是列表
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
# 获取时间信息
created_time = node.get('created_time', datetime.datetime.now().timestamp())
last_modified = node.get('last_modified', datetime.datetime.now().timestamp())
# 添加节点到图中
self.memory_graph.G.add_node(concept, memory_items=memory_items)
self.memory_graph.G.add_node(concept,
memory_items=memory_items,
created_time=created_time,
last_modified=last_modified)
# 从数据库加载所有边
edges = self.memory_graph.db.db.graph_data.edges.find()
for edge in edges:
source = edge['source']
target = edge['target']
strength = edge.get('strength', 1) # 获取 strength默认为 1
strength = edge.get('strength', 1) # 获取 strength,默认为 1
# 获取时间信息
created_time = edge.get('created_time', datetime.datetime.now().timestamp())
last_modified = edge.get('last_modified', datetime.datetime.now().timestamp())
# 只有当源节点和目标节点都存在时才添加边
if source in self.memory_graph.G and target in self.memory_graph.G:
self.memory_graph.G.add_edge(source, target, strength=strength)
self.memory_graph.G.add_edge(source, target,
strength=strength,
created_time=created_time,
last_modified=last_modified)
async def operation_forget_topic(self, percentage=0.1):
"""随机选择图中一定比例的节点进行检查根据条件决定是否遗忘"""
# 获取所有节点
"""随机选择图中一定比例的节点和边进行检查,根据时间条件决定是否遗忘"""
all_nodes = list(self.memory_graph.G.nodes())
# 计算要检查的节点数量
check_count = max(1, int(len(all_nodes) * percentage))
# 随机选择节点
nodes_to_check = random.sample(all_nodes, check_count)
all_edges = list(self.memory_graph.G.edges())
forgotten_nodes = []
check_nodes_count = max(1, int(len(all_nodes) * percentage))
check_edges_count = max(1, int(len(all_edges) * percentage))
nodes_to_check = random.sample(all_nodes, check_nodes_count)
edges_to_check = random.sample(all_edges, check_edges_count)
edge_changes = {'weakened': 0, 'removed': 0}
node_changes = {'reduced': 0, 'removed': 0}
current_time = datetime.datetime.now().timestamp()
# 检查并遗忘连接
logger.info("开始检查连接...")
for source, target in edges_to_check:
edge_data = self.memory_graph.G[source][target]
last_modified = edge_data.get('last_modified')
# print(source,target)
# print(f"float(last_modified):{float(last_modified)}" )
# print(f"current_time:{current_time}")
# print(f"current_time - last_modified:{current_time - last_modified}")
if current_time - last_modified > 3600*24: # test
current_strength = edge_data.get('strength', 1)
new_strength = current_strength - 1
if new_strength <= 0:
self.memory_graph.G.remove_edge(source, target)
edge_changes['removed'] += 1
logger.info(f"\033[1;31m[连接移除]\033[0m {source} - {target}")
else:
edge_data['strength'] = new_strength
edge_data['last_modified'] = current_time
edge_changes['weakened'] += 1
logger.info(f"\033[1;34m[连接减弱]\033[0m {source} - {target} (强度: {current_strength} -> {new_strength})")
# 检查并遗忘话题
logger.info("开始检查节点...")
for node in nodes_to_check:
# 获取节点的连接数
connections = self.memory_graph.G.degree(node)
node_data = self.memory_graph.G.nodes[node]
last_modified = node_data.get('last_modified', current_time)
# 获取节点的内容条数
memory_items = self.memory_graph.G.nodes[node].get('memory_items', [])
if current_time - last_modified > 3600*24: # test
memory_items = node_data.get('memory_items', [])
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
content_count = len(memory_items)
# 检查连接强度
weak_connections = True
if connections > 1: # 只有当连接数大于1时才检查强度
for neighbor in self.memory_graph.G.neighbors(node):
strength = self.memory_graph.G[node][neighbor].get('strength', 1)
if strength > 2:
weak_connections = False
break
if memory_items:
current_count = len(memory_items)
removed_item = random.choice(memory_items)
memory_items.remove(removed_item)
# 如果满足遗忘条件
if (connections <= 1 and weak_connections) or content_count <= 2:
removed_item = self.memory_graph.forget_topic(node)
if removed_item:
forgotten_nodes.append((node, removed_item))
print(f"遗忘节点 {node} 的记忆: {removed_item}")
# 同步到数据库
if forgotten_nodes:
self.sync_memory_to_db()
print(f"完成遗忘操作,共遗忘 {len(forgotten_nodes)} 个节点的记忆")
if memory_items:
self.memory_graph.G.nodes[node]['memory_items'] = memory_items
self.memory_graph.G.nodes[node]['last_modified'] = current_time
node_changes['reduced'] += 1
logger.info(f"\033[1;33m[记忆减少]\033[0m {node} (记忆数量: {current_count} -> {len(memory_items)})")
else:
print("本次检查没有节点满足遗忘条件")
self.memory_graph.G.remove_node(node)
node_changes['removed'] += 1
logger.info(f"\033[1;31m[节点移除]\033[0m {node}")
if any(count > 0 for count in edge_changes.values()) or any(count > 0 for count in node_changes.values()):
self.sync_memory_to_db()
logger.info("\n遗忘操作统计:")
logger.info(f"连接变化: {edge_changes['weakened']} 个减弱, {edge_changes['removed']} 个移除")
logger.info(f"节点变化: {node_changes['reduced']} 个减少记忆, {node_changes['removed']} 个移除")
else:
logger.info("\n本次检查没有节点或连接满足遗忘条件")
async def merge_memory(self, topic):
"""
@@ -436,11 +572,11 @@ class Hippocampus:
# 拼接成文本
merged_text = "\n".join(selected_memories)
print(f"\n[合并记忆] 话题: {topic}")
print(f"选择的记忆:\n{merged_text}")
logger.debug(f"\n[合并记忆] 话题: {topic}")
logger.debug(f"选择的记忆:\n{merged_text}")
# 使用memory_compress生成新的压缩记忆
compressed_memories = await self.memory_compress(merged_text, 0.1)
compressed_memories, _ = await self.memory_compress(selected_memories, 0.1)
# 从原记忆列表中移除被选中的记忆
for memory in selected_memories:
@@ -449,11 +585,11 @@ class Hippocampus:
# 添加新的压缩记忆
for _, compressed_memory in compressed_memories:
memory_items.append(compressed_memory)
print(f"添加压缩记忆: {compressed_memory}")
logger.info(f"添加压缩记忆: {compressed_memory}")
# 更新节点的记忆项
self.memory_graph.G.nodes[topic]['memory_items'] = memory_items
print(f"完成记忆合并,当前记忆数量: {len(memory_items)}")
logger.debug(f"完成记忆合并,当前记忆数量: {len(memory_items)}")
async def operation_merge_memory(self, percentage=0.1):
"""
@@ -479,23 +615,23 @@ class Hippocampus:
# 如果内容数量超过100进行合并
if content_count > 100:
print(f"\n检查节点: {node}, 当前记忆数量: {content_count}")
logger.debug(f"检查节点: {node}, 当前记忆数量: {content_count}")
await self.merge_memory(node)
merged_nodes.append(node)
# 同步到数据库
if merged_nodes:
self.sync_memory_to_db()
print(f"\n完成记忆合并操作,共处理 {len(merged_nodes)} 个节点")
logger.debug(f"完成记忆合并操作,共处理 {len(merged_nodes)} 个节点")
else:
print("\n本次检查没有需要合并的节点")
logger.debug("本次检查没有需要合并的节点")
def find_topic_llm(self, text, topic_num):
prompt = f'这是一段文字:{text}。请你从这段话中总结出{topic_num}个关键的概念,可以是名词,动词,或者特定人物,帮我列出来,用逗号,隔开,尽可能精简。只需要列举{topic_num}个话题就好,不要有序号,不要告诉我其他内容。'
return prompt
def topic_what(self,text, topic):
prompt = f'这是一段文字:{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好'
def topic_what(self, text, topic, time_info):
prompt = f'这是一段文字{time_info}{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好'
return prompt
async def _identify_topics(self, text: str) -> list:
@@ -509,7 +645,8 @@ class Hippocampus:
"""
topics_response = await self.llm_topic_judge.generate_response(self.find_topic_llm(text, 5))
# print(f"话题: {topics_response[0]}")
topics = [topic.strip() for topic in topics_response[0].replace("", ",").replace("", ",").replace(" ", ",").split(",") if topic.strip()]
topics = [topic.strip() for topic in
topics_response[0].replace("", ",").replace("", ",").replace(" ", ",").split(",") if topic.strip()]
# print(f"话题: {topics}")
return topics
@@ -582,7 +719,7 @@ class Hippocampus:
async def memory_activate_value(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.3) -> int:
"""计算输入文本对记忆的激活程度"""
print(f"\033[1;32m[记忆激活]\033[0m 识别主题: {await self._identify_topics(text)}")
logger.info(f"识别主题: {await self._identify_topics(text)}")
# 识别主题
identified_topics = await self._identify_topics(text)
@@ -613,7 +750,8 @@ class Hippocampus:
penalty = 1.0 / (1 + math.log(content_count + 1))
activation = int(score * 50 * penalty)
print(f"\033[1;32m[记忆激活]\033[0m 单主题「{topic}」- 相似度: {score:.3f}, 内容数: {content_count}, 激活值: {activation}")
logger.info(
f"[记忆激活]单主题「{topic}」- 相似度: {score:.3f}, 内容数: {content_count}, 激活值: {activation}")
return activation
# 计算关键词匹配率,同时考虑内容数量
@@ -640,7 +778,8 @@ class Hippocampus:
matched_topics.add(input_topic)
adjusted_sim = sim * penalty
topic_similarities[input_topic] = max(topic_similarities.get(input_topic, 0), adjusted_sim)
print(f"\033[1;32m[记忆激活]\033[0m 主题「{input_topic}」-> 「{memory_topic}」(内容数: {content_count}, 相似度: {adjusted_sim:.3f})")
logger.info(
f"[记忆激活]主题「{input_topic}」-> 「{memory_topic}」(内容数: {content_count}, 相似度: {adjusted_sim:.3f})")
# 计算主题匹配率和平均相似度
topic_match = len(matched_topics) / len(identified_topics)
@@ -648,11 +787,13 @@ class Hippocampus:
# 计算最终激活值
activation = int((topic_match + average_similarities) / 2 * 100)
print(f"\033[1;32m[记忆激活]\033[0m 匹配率: {topic_match:.3f}, 平均相似度: {average_similarities:.3f}, 激活值: {activation}")
logger.info(
f"[记忆激活]匹配率: {topic_match:.3f}, 平均相似度: {average_similarities:.3f}, 激活值: {activation}")
return activation
async def get_relevant_memories(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.4, max_memory_num: int = 5) -> list:
async def get_relevant_memories(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.4,
max_memory_num: int = 5) -> list:
"""根据输入文本获取相关的记忆内容"""
# 识别主题
identified_topics = await self._identify_topics(text)
@@ -722,4 +863,4 @@ hippocampus = Hippocampus(memory_graph)
hippocampus.sync_memory_from_db()
end_time = time.time()
print(f"\033[32m[加载海马体耗时: {end_time - start_time:.2f}]\033[0m")
logger.success(f"加载海马体耗时: {end_time - start_time:.2f}")

View File

@@ -13,6 +13,7 @@ import networkx as nx
import pymongo
from dotenv import load_dotenv
from loguru import logger
import jieba
# from chat.config import global_config
sys.path.append("C:/GitHub/MaiMBot") # 添加项目根目录到 Python 路径
@@ -86,21 +87,24 @@ def calculate_information_content(text):
return entropy
def get_cloest_chat_from_db(db, length: int, timestamp: str):
"""从数据库中获取最接近指定时间戳的聊天记录,并记录读取次数"""
chat_text = ''
"""从数据库中获取最接近指定时间戳的聊天记录,并记录读取次数
Returns:
list: 消息记录字典列表,每个字典包含消息内容和时间信息
"""
chat_records = []
closest_record = db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)])
if closest_record and closest_record.get('memorized', 0) < 4:
closest_time = closest_record['time']
group_id = closest_record['group_id'] # 获取groupid
group_id = closest_record['group_id']
# 获取该时间戳之后的length条消息且groupid相同
chat_records = list(db.db.messages.find(
records = list(db.db.messages.find(
{"time": {"$gt": closest_time}, "group_id": group_id}
).sort('time', 1).limit(length))
# 更新每条消息的memorized属性
for record in chat_records:
# 检查当前记录的memorized值
for record in records:
current_memorized = record.get('memorized', 0)
if current_memorized > 3:
print("消息已读取3次跳过")
@@ -112,11 +116,14 @@ def get_cloest_chat_from_db(db, length: int, timestamp: str):
{"$set": {"memorized": current_memorized + 1}}
)
chat_text += record["detailed_plain_text"]
# 添加到记录列表中
chat_records.append({
'text': record["detailed_plain_text"],
'time': record["time"],
'group_id': record["group_id"]
})
return chat_text
print("消息已读取3次跳过")
return ''
return chat_records
class Memory_graph:
def __init__(self):
@@ -205,22 +212,34 @@ class Hippocampus:
self.llm_model_summary = LLMModel(model_name="Qwen/Qwen2.5-32B-Instruct")
def get_memory_sample(self, chat_size=20, time_frequency:dict={'near':2,'mid':4,'far':3}):
"""获取记忆样本
Returns:
list: 消息记录列表,每个元素是一个消息记录字典列表
"""
current_timestamp = datetime.datetime.now().timestamp()
chat_text = []
chat_samples = []
# 短期1h 中期4h 长期24h
for _ in range(time_frequency.get('near')): # 循环10次
random_time = current_timestamp - random.randint(1, 3600*4) # 随机时间
chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
chat_text.append(chat_)
for _ in range(time_frequency.get('mid')): # 循环10次
random_time = current_timestamp - random.randint(3600*4, 3600*24) # 随机时间
chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
chat_text.append(chat_)
for _ in range(time_frequency.get('far')): # 循环10次
random_time = current_timestamp - random.randint(3600*24, 3600*24*7) # 随机时间
chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
chat_text.append(chat_)
return [chat for chat in chat_text if chat]
for _ in range(time_frequency.get('near')):
random_time = current_timestamp - random.randint(1, 3600*4)
messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
if messages:
chat_samples.append(messages)
for _ in range(time_frequency.get('mid')):
random_time = current_timestamp - random.randint(3600*4, 3600*24)
messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
if messages:
chat_samples.append(messages)
for _ in range(time_frequency.get('far')):
random_time = current_timestamp - random.randint(3600*24, 3600*24*7)
messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
if messages:
chat_samples.append(messages)
return chat_samples
def calculate_topic_num(self,text, compress_rate):
"""计算文本的话题数量"""
@@ -231,16 +250,49 @@ class Hippocampus:
print(f"topic_by_length: {topic_by_length}, topic_by_information_content: {topic_by_information_content}, topic_num: {topic_num}")
return topic_num
async def memory_compress(self, input_text, compress_rate=0.1):
async def memory_compress(self, messages: list, compress_rate=0.1):
"""压缩消息记录为记忆
Args:
messages: 消息记录字典列表每个字典包含text和time字段
compress_rate: 压缩率
Returns:
set: (话题, 记忆) 元组集合
"""
if not messages:
return set()
# 合并消息文本,同时保留时间信息
input_text = ""
time_info = ""
# 计算最早和最晚时间
earliest_time = min(msg['time'] for msg in messages)
latest_time = max(msg['time'] for msg in messages)
earliest_dt = datetime.datetime.fromtimestamp(earliest_time)
latest_dt = datetime.datetime.fromtimestamp(latest_time)
# 如果是同一年
if earliest_dt.year == latest_dt.year:
earliest_str = earliest_dt.strftime("%m-%d %H:%M:%S")
latest_str = latest_dt.strftime("%m-%d %H:%M:%S")
time_info += f"是在{earliest_dt.year}年,{earliest_str}{latest_str} 的对话:\n"
else:
earliest_str = earliest_dt.strftime("%Y-%m-%d %H:%M:%S")
latest_str = latest_dt.strftime("%Y-%m-%d %H:%M:%S")
time_info += f"是从 {earliest_str}{latest_str} 的对话:\n"
for msg in messages:
input_text += f"{msg['text']}\n"
print(input_text)
topic_num = self.calculate_topic_num(input_text, compress_rate)
topics_response = self.llm_model_get_topic.generate_response(self.find_topic_llm(input_text, topic_num))
# 修改话题处理逻辑
# 定义需要过滤的关键词
filter_keywords = ['表情包', '图片', '回复', '聊天记录']
# 过滤topics
filter_keywords = ['表情包', '图片', '回复', '聊天记录']
topics = [topic.strip() for topic in topics_response[0].replace("", ",").replace("", ",").replace(" ", ",").split(",") if topic.strip()]
filtered_topics = [topic for topic in topics if not any(keyword in topic for keyword in filter_keywords)]
@@ -250,7 +302,7 @@ class Hippocampus:
# 创建所有话题的请求任务
tasks = []
for topic in filtered_topics:
topic_what_prompt = self.topic_what(input_text, topic)
topic_what_prompt = self.topic_what(input_text, topic , time_info)
# 创建异步任务
task = self.llm_model_small.generate_response_async(topic_what_prompt)
tasks.append((topic.strip(), task))
@@ -267,38 +319,36 @@ class Hippocampus:
async def operation_build_memory(self, chat_size=12):
# 最近消息获取频率
time_frequency = {'near': 3, 'mid': 8, 'far': 5}
memory_sample = self.get_memory_sample(chat_size, time_frequency)
memory_samples = self.get_memory_sample(chat_size, time_frequency)
all_topics = [] # 用于存储所有话题
for i, input_text in enumerate(memory_sample, 1):
for i, messages in enumerate(memory_samples, 1):
# 加载进度可视化
all_topics = []
progress = (i / len(memory_sample)) * 100
progress = (i / len(memory_samples)) * 100
bar_length = 30
filled_length = int(bar_length * i // len(memory_sample))
filled_length = int(bar_length * i // len(memory_samples))
bar = '' * filled_length + '-' * (bar_length - filled_length)
print(f"\n进度: [{bar}] {progress:.1f}% ({i}/{len(memory_sample)})")
print(f"\n进度: [{bar}] {progress:.1f}% ({i}/{len(memory_samples)})")
# 生成压缩后记忆 ,表现为 (话题,记忆) 的元组
compressed_memory = set()
# 生成压缩后记忆
compress_rate = 0.1
compressed_memory = await self.memory_compress(input_text, compress_rate)
compressed_memory = await self.memory_compress(messages, compress_rate)
print(f"\033[1;33m压缩后记忆数量\033[0m: {len(compressed_memory)}")
# 将记忆加入到图谱中
for topic, memory in compressed_memory:
print(f"\033[1;32m添加节点\033[0m: {topic}")
self.memory_graph.add_dot(topic, memory)
all_topics.append(topic) # 收集所有话题
all_topics.append(topic)
# 连接相关话题
for i in range(len(all_topics)):
for j in range(i + 1, len(all_topics)):
print(f"\033[1;32m连接节点\033[0m: {all_topics[i]}{all_topics[j]}")
self.memory_graph.connect_dot(all_topics[i], all_topics[j])
self.sync_memory_to_db()
def sync_memory_from_db(self):
@@ -375,7 +425,7 @@ class Hippocampus:
if concept not in db_nodes_dict:
# 数据库中缺少的节点,添加
logger.info(f"添加新节点: {concept}")
# logger.info(f"添加新节点: {concept}")
node_data = {
'concept': concept,
'memory_items': memory_items,
@@ -389,7 +439,7 @@ class Hippocampus:
# 如果特征值不同,则更新节点
if db_hash != memory_hash:
logger.info(f"更新节点内容: {concept}")
# logger.info(f"更新节点内容: {concept}")
self.memory_graph.db.db.graph_data.nodes.update_one(
{'concept': concept},
{'$set': {
@@ -402,7 +452,7 @@ class Hippocampus:
memory_concepts = set(node[0] for node in memory_nodes)
for db_node in db_nodes:
if db_node['concept'] not in memory_concepts:
logger.info(f"删除多余节点: {db_node['concept']}")
# logger.info(f"删除多余节点: {db_node['concept']}")
self.memory_graph.db.db.graph_data.nodes.delete_one({'concept': db_node['concept']})
# 处理边的信息
@@ -460,9 +510,10 @@ class Hippocampus:
prompt = f'这是一段文字:{text}。请你从这段话中总结出{topic_num}个关键的概念,可以是名词,动词,或者特定人物,帮我列出来,用逗号,隔开,尽可能精简。只需要列举{topic_num}个话题就好,不要有序号,不要告诉我其他内容。'
return prompt
def topic_what(self,text, topic):
def topic_what(self,text, topic, time_info):
# prompt = f'这是一段文字:{text}。我想知道这段文字里有什么关于{topic}的话题,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好'
prompt = f'这是一段文字:{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好'
# 获取当前时间
prompt = f'这是一段文字,{time_info}{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好'
return prompt
def remove_node_from_db(self, topic):
@@ -597,7 +648,7 @@ class Hippocampus:
print(f"选择的记忆:\n{merged_text}")
# 使用memory_compress生成新的压缩记忆
compressed_memories = await self.memory_compress(merged_text, 0.1)
compressed_memories = await self.memory_compress(selected_memories, 0.1)
# 从原记忆列表中移除被选中的记忆
for memory in selected_memories:
@@ -647,6 +698,164 @@ class Hippocampus:
else:
print("\n本次检查没有需要合并的节点")
async def _identify_topics(self, text: str) -> list:
"""从文本中识别可能的主题"""
topics_response = self.llm_model_get_topic.generate_response(self.find_topic_llm(text, 5))
topics = [topic.strip() for topic in topics_response[0].replace("", ",").replace("", ",").replace(" ", ",").split(",") if topic.strip()]
return topics
def _find_similar_topics(self, topics: list, similarity_threshold: float = 0.4, debug_info: str = "") -> list:
"""查找与给定主题相似的记忆主题"""
all_memory_topics = list(self.memory_graph.G.nodes())
all_similar_topics = []
for topic in topics:
if debug_info:
pass
topic_vector = text_to_vector(topic)
has_similar_topic = False
for memory_topic in all_memory_topics:
memory_vector = text_to_vector(memory_topic)
all_words = set(topic_vector.keys()) | set(memory_vector.keys())
v1 = [topic_vector.get(word, 0) for word in all_words]
v2 = [memory_vector.get(word, 0) for word in all_words]
similarity = cosine_similarity(v1, v2)
if similarity >= similarity_threshold:
has_similar_topic = True
all_similar_topics.append((memory_topic, similarity))
return all_similar_topics
def _get_top_topics(self, similar_topics: list, max_topics: int = 5) -> list:
"""获取相似度最高的主题"""
seen_topics = set()
top_topics = []
for topic, score in sorted(similar_topics, key=lambda x: x[1], reverse=True):
if topic not in seen_topics and len(top_topics) < max_topics:
seen_topics.add(topic)
top_topics.append((topic, score))
return top_topics
async def memory_activate_value(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.3) -> int:
"""计算输入文本对记忆的激活程度"""
logger.info(f"[记忆激活]识别主题: {await self._identify_topics(text)}")
identified_topics = await self._identify_topics(text)
if not identified_topics:
return 0
all_similar_topics = self._find_similar_topics(
identified_topics,
similarity_threshold=similarity_threshold,
debug_info="记忆激活"
)
if not all_similar_topics:
return 0
top_topics = self._get_top_topics(all_similar_topics, max_topics)
if len(top_topics) == 1:
topic, score = top_topics[0]
memory_items = self.memory_graph.G.nodes[topic].get('memory_items', [])
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
content_count = len(memory_items)
penalty = 1.0 / (1 + math.log(content_count + 1))
activation = int(score * 50 * penalty)
print(f"\033[1;32m[记忆激活]\033[0m 单主题「{topic}」- 相似度: {score:.3f}, 内容数: {content_count}, 激活值: {activation}")
return activation
matched_topics = set()
topic_similarities = {}
for memory_topic, similarity in top_topics:
memory_items = self.memory_graph.G.nodes[memory_topic].get('memory_items', [])
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
content_count = len(memory_items)
penalty = 1.0 / (1 + math.log(content_count + 1))
for input_topic in identified_topics:
topic_vector = text_to_vector(input_topic)
memory_vector = text_to_vector(memory_topic)
all_words = set(topic_vector.keys()) | set(memory_vector.keys())
v1 = [topic_vector.get(word, 0) for word in all_words]
v2 = [memory_vector.get(word, 0) for word in all_words]
sim = cosine_similarity(v1, v2)
if sim >= similarity_threshold:
matched_topics.add(input_topic)
adjusted_sim = sim * penalty
topic_similarities[input_topic] = max(topic_similarities.get(input_topic, 0), adjusted_sim)
print(f"\033[1;32m[记忆激活]\033[0m 主题「{input_topic}」-> 「{memory_topic}」(内容数: {content_count}, 相似度: {adjusted_sim:.3f})")
topic_match = len(matched_topics) / len(identified_topics)
average_similarities = sum(topic_similarities.values()) / len(topic_similarities) if topic_similarities else 0
activation = int((topic_match + average_similarities) / 2 * 100)
print(f"\033[1;32m[记忆激活]\033[0m 匹配率: {topic_match:.3f}, 平均相似度: {average_similarities:.3f}, 激活值: {activation}")
return activation
async def get_relevant_memories(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.4, max_memory_num: int = 5) -> list:
"""根据输入文本获取相关的记忆内容"""
identified_topics = await self._identify_topics(text)
all_similar_topics = self._find_similar_topics(
identified_topics,
similarity_threshold=similarity_threshold,
debug_info="记忆检索"
)
relevant_topics = self._get_top_topics(all_similar_topics, max_topics)
relevant_memories = []
for topic, score in relevant_topics:
first_layer, _ = self.memory_graph.get_related_item(topic, depth=1)
if first_layer:
if len(first_layer) > max_memory_num/2:
first_layer = random.sample(first_layer, max_memory_num//2)
for memory in first_layer:
relevant_memories.append({
'topic': topic,
'similarity': score,
'content': memory
})
relevant_memories.sort(key=lambda x: x['similarity'], reverse=True)
if len(relevant_memories) > max_memory_num:
relevant_memories = random.sample(relevant_memories, max_memory_num)
return relevant_memories
def segment_text(text):
"""使用jieba进行文本分词"""
seg_text = list(jieba.cut(text))
return seg_text
def text_to_vector(text):
"""将文本转换为词频向量"""
words = segment_text(text)
vector = {}
for word in words:
vector[word] = vector.get(word, 0) + 1
return vector
def cosine_similarity(v1, v2):
"""计算两个向量的余弦相似度"""
dot_product = sum(a * b for a, b in zip(v1, v2))
norm1 = math.sqrt(sum(a * a for a in v1))
norm2 = math.sqrt(sum(b * b for b in v2))
if norm1 == 0 or norm2 == 0:
return 0
return dot_product / (norm1 * norm2)
def visualize_graph_lite(memory_graph: Memory_graph, color_by_memory: bool = False):
# 设置中文字体
@@ -735,7 +944,7 @@ async def main():
db = Database.get_instance()
start_time = time.time()
test_pare = {'do_build_memory':True,'do_forget_topic':False,'do_visualize_graph':True,'do_query':False,'do_merge_memory':False}
test_pare = {'do_build_memory':False,'do_forget_topic':False,'do_visualize_graph':True,'do_query':False,'do_merge_memory':False}
# 创建记忆图
memory_graph = Memory_graph()

File diff suppressed because it is too large Load Diff

View File

@@ -24,6 +24,7 @@ class LLM_request:
self.api_key = getattr(config, model["key"])
self.base_url = getattr(config, model["base_url"])
except AttributeError as e:
logger.error(f"原始 model dict 信息:{model}")
logger.error(f"配置错误:找不到对应的配置项 - {str(e)}")
raise ValueError(f"配置错误:找不到对应的配置项 - {str(e)}") from e
self.model_name = model["name"]
@@ -44,8 +45,8 @@ class LLM_request:
self.db.db.llm_usage.create_index([("model_name", 1)])
self.db.db.llm_usage.create_index([("user_id", 1)])
self.db.db.llm_usage.create_index([("request_type", 1)])
except Exception as e:
logger.error(f"创建数据库索引失败: {e}")
except Exception:
logger.error("创建数据库索引失败")
def _record_usage(self, prompt_tokens: int, completion_tokens: int, total_tokens: int,
user_id: str = "system", request_type: str = "chat",
@@ -79,8 +80,8 @@ class LLM_request:
f"提示词: {prompt_tokens}, 完成: {completion_tokens}, "
f"总计: {total_tokens}"
)
except Exception as e:
logger.error(f"记录token使用情况失败: {e}")
except Exception:
logger.error("记录token使用情况失败")
def _calculate_cost(self, prompt_tokens: int, completion_tokens: int) -> float:
"""计算API调用成本
@@ -143,9 +144,9 @@ class LLM_request:
# 判断是否为流式
stream_mode = self.params.get("stream", False)
if self.params.get("stream", False) is True:
logger.info(f"进入流式输出模式发送请求到URL: {api_url}")
logger.debug(f"进入流式输出模式发送请求到URL: {api_url}")
else:
logger.info(f"发送请求到URL: {api_url}")
logger.debug(f"发送请求到URL: {api_url}")
logger.info(f"使用模型: {self.model_name}")
# 构建请求体
@@ -182,6 +183,28 @@ class LLM_request:
continue
elif response.status in policy["abort_codes"]:
logger.error(f"错误码: {response.status} - {error_code_mapping.get(response.status)}")
if response.status == 403:
# 尝试降级Pro模型
if self.model_name.startswith(
"Pro/") and self.base_url == "https://api.siliconflow.cn/v1/":
old_model_name = self.model_name
self.model_name = self.model_name[4:] # 移除"Pro/"前缀
logger.warning(f"检测到403错误模型从 {old_model_name} 降级为 {self.model_name}")
# 对全局配置进行更新
if hasattr(global_config, 'llm_normal') and global_config.llm_normal.get(
'name') == old_model_name:
global_config.llm_normal['name'] = self.model_name
logger.warning("已将全局配置中的 llm_normal 模型降级")
# 更新payload中的模型名
if payload and 'model' in payload:
payload['model'] = self.model_name
# 重新尝试请求
retry -= 1 # 不计入重试次数
continue
raise RuntimeError(f"请求被拒绝: {error_code_mapping.get(response.status)}")
response.raise_for_status()
@@ -204,8 +227,8 @@ class LLM_request:
if delta_content is None:
delta_content = ""
accumulated_content += delta_content
except Exception as e:
logger.error(f"解析流式输出错误: {e}")
except Exception:
logger.exception("解析流式输出错")
content = accumulated_content
reasoning_content = ""
think_match = re.search(r'<think>(.*?)</think>', content, re.DOTALL)
@@ -213,12 +236,15 @@ class LLM_request:
reasoning_content = think_match.group(1).strip()
content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL).strip()
# 构造一个伪result以便调用自定义响应处理器或默认处理器
result = {"choices": [{"message": {"content": content, "reasoning_content": reasoning_content}}]}
return response_handler(result) if response_handler else self._default_response_handler(result, user_id, request_type, endpoint)
result = {
"choices": [{"message": {"content": content, "reasoning_content": reasoning_content}}]}
return response_handler(result) if response_handler else self._default_response_handler(
result, user_id, request_type, endpoint)
else:
result = await response.json()
# 使用自定义处理器或默认处理
return response_handler(result) if response_handler else self._default_response_handler(result, user_id, request_type, endpoint)
return response_handler(result) if response_handler else self._default_response_handler(
result, user_id, request_type, endpoint)
except Exception as e:
if retry < policy["max_retries"] - 1:
@@ -242,7 +268,8 @@ class LLM_request:
# 复制一份参数,避免直接修改原始数据
new_params = dict(params)
# 定义需要转换的模型列表
models_needing_transformation = ["o3-mini", "o1-mini", "o1-preview", "o1-2024-12-17", "o1-preview-2024-09-12", "o3-mini-2025-01-31", "o1-mini-2024-09-12"]
models_needing_transformation = ["o3-mini", "o1-mini", "o1-preview", "o1-2024-12-17", "o1-preview-2024-09-12",
"o3-mini-2025-01-31", "o1-mini-2024-09-12"]
if self.model_name.lower() in models_needing_transformation:
# 删除 'temprature' 参数(如果存在)
new_params.pop("temperature", None)
@@ -278,11 +305,11 @@ class LLM_request:
**params_copy
}
# 如果 payload 中依然存在 max_tokens 且需要转换,在这里进行再次检查
if self.model_name.lower() in ["o3-mini", "o1-mini", "o1-preview", "o1-2024-12-17", "o1-preview-2024-09-12", "o3-mini-2025-01-31", "o1-mini-2024-09-12"] and "max_tokens" in payload:
if self.model_name.lower() in ["o3-mini", "o1-mini", "o1-preview", "o1-2024-12-17", "o1-preview-2024-09-12",
"o3-mini-2025-01-31", "o1-mini-2024-09-12"] and "max_tokens" in payload:
payload["max_completion_tokens"] = payload.pop("max_tokens")
return payload
def _default_response_handler(self, result: dict, user_id: str = "system",
request_type: str = "chat", endpoint: str = "/chat/completions") -> Tuple:
"""默认响应解析"""
@@ -329,7 +356,7 @@ class LLM_request:
"""构建请求头"""
if no_key:
return {
"Authorization": f"Bearer **********",
"Authorization": "Bearer **********",
"Content-Type": "application/json"
}
else:
@@ -384,6 +411,7 @@ class LLM_request:
Returns:
list: embedding向量如果失败则返回None
"""
def embedding_handler(result):
"""处理响应"""
if "data" in result and len(result["data"]) > 0:

View File

@@ -4,7 +4,7 @@ import time
from dataclasses import dataclass
from ..chat.config import global_config
from loguru import logger
@dataclass
class MoodState:
@@ -51,11 +51,11 @@ class MoodManager:
# 情绪词映射表 (valence, arousal)
self.emotion_map = {
'happy': (0.8, 0.6), # 高愉悦度,中等唤醒度
'angry': (-0.7, 0.8), # 负愉悦度,高唤醒度
'angry': (-0.7, 0.7), # 负愉悦度,高唤醒度
'sad': (-0.6, 0.3), # 负愉悦度,低唤醒度
'surprised': (0.4, 0.9), # 中等愉悦度,高唤醒度
'surprised': (0.4, 0.8), # 中等愉悦度,高唤醒度
'disgusted': (-0.8, 0.5), # 高负愉悦度,中等唤醒度
'fearful': (-0.7, 0.7), # 负愉悦度,高唤醒度
'fearful': (-0.7, 0.6), # 负愉悦度,高唤醒度
'neutral': (0.0, 0.5), # 中性愉悦度,中等唤醒度
}
@@ -64,15 +64,20 @@ class MoodManager:
# 第一象限:高唤醒,正愉悦
(0.5, 0.7): "兴奋",
(0.3, 0.8): "快乐",
(0.2, 0.65): "满足",
# 第二象限:高唤醒,负愉悦
(-0.5, 0.7): "愤怒",
(-0.3, 0.8): "焦虑",
(-0.2, 0.65): "烦躁",
# 第三象限:低唤醒,负愉悦
(-0.5, 0.3): "悲伤",
(-0.3, 0.2): "疲倦",
(-0.3, 0.35): "疲倦",
(-0.4, 0.15): "疲倦",
# 第四象限:低唤醒,正愉悦
(0.5, 0.3): "放松",
(0.3, 0.2): "平静"
(0.2, 0.45): "平静",
(0.3, 0.4): "安宁",
(0.5, 0.3): "放松"
}
@classmethod
@@ -119,9 +124,13 @@ class MoodManager:
current_time = time.time()
time_diff = current_time - self.last_update
# 应用衰减公式
self.current_mood.valence *= math.pow(1 - self.decay_rate_valence, time_diff)
self.current_mood.arousal *= math.pow(1 - self.decay_rate_arousal, time_diff)
# Valence 向中性0回归
valence_target = 0.0
self.current_mood.valence = valence_target + (self.current_mood.valence - valence_target) * math.exp(-self.decay_rate_valence * time_diff)
# Arousal 向中性0.5)回归
arousal_target = 0.5
self.current_mood.arousal = arousal_target + (self.current_mood.arousal - arousal_target) * math.exp(-self.decay_rate_arousal * time_diff)
# 确保值在合理范围内
self.current_mood.valence = max(-1.0, min(1.0, self.current_mood.valence))
@@ -201,7 +210,7 @@ class MoodManager:
def print_mood_status(self) -> None:
"""打印当前情绪状态"""
print(f"\033[1;35m[情绪状态]\033[0m 愉悦度: {self.current_mood.valence:.2f}, "
logger.info(f"[情绪状态]愉悦度: {self.current_mood.valence:.2f}, "
f"唤醒度: {self.current_mood.arousal:.2f}, "
f"心情: {self.current_mood.text}")

View File

@@ -13,7 +13,6 @@ from ..models.utils_model import LLM_request
driver = get_driver()
config = driver.config
Database.initialize(
host=config.MONGODB_HOST,
port=int(config.MONGODB_PORT),
@@ -23,6 +22,7 @@ Database.initialize(
auth_source=config.MONGODB_AUTH_SOURCE
)
class ScheduleGenerator:
def __init__(self):
# 根据global_config.llm_normal这一字典配置指定模型
@@ -42,31 +42,33 @@ class ScheduleGenerator:
yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
self.today_schedule_text, self.today_schedule = await self.generate_daily_schedule(target_date=today)
self.tomorrow_schedule_text, self.tomorrow_schedule = await self.generate_daily_schedule(target_date=tomorrow,read_only=True)
self.yesterday_schedule_text, self.yesterday_schedule = await self.generate_daily_schedule(target_date=yesterday,read_only=True)
self.tomorrow_schedule_text, self.tomorrow_schedule = await self.generate_daily_schedule(target_date=tomorrow,
read_only=True)
self.yesterday_schedule_text, self.yesterday_schedule = await self.generate_daily_schedule(
target_date=yesterday, read_only=True)
async def generate_daily_schedule(self, target_date: datetime.datetime = None,read_only:bool = False) -> Dict[str, str]:
async def generate_daily_schedule(self, target_date: datetime.datetime = None, read_only: bool = False) -> Dict[
str, str]:
date_str = target_date.strftime("%Y-%m-%d")
weekday = target_date.strftime("%A")
schedule_text = str
existing_schedule = self.db.db.schedule.find_one({"date": date_str})
if existing_schedule:
print(f"{date_str}的日程已存在:")
logger.debug(f"{date_str}的日程已存在:")
schedule_text = existing_schedule["schedule"]
# print(self.schedule_text)
elif read_only == False:
print(f"{date_str}的日程不存在,准备生成新的日程。")
elif not read_only:
logger.debug(f"{date_str}的日程不存在,准备生成新的日程。")
prompt = f"""我是{global_config.BOT_NICKNAME}{global_config.PROMPT_SCHEDULE_GEN},请为我生成{date_str}{weekday})的日程安排,包括:""" + \
"""
1. 早上的学习和工作安排
2. 下午的活动和任务
3. 晚上的计划和休息时间
请按照时间顺序列出具体时间点和对应的活动用一个时间点而不是时间段来表示时间用JSON格式返回日程表仅返回内容不要返回注释时间采用24小时制格式为{"时间": "活动","时间": "活动",...}。"""
请按照时间顺序列出具体时间点和对应的活动用一个时间点而不是时间段来表示时间用JSON格式返回日程表仅返回内容不要返回注释不要添加任何markdown或代码块样式时间采用24小时制格式为{"时间": "活动","时间": "活动",...}。"""
try:
schedule_text, _ = await self.llm_scheduler.generate_response(prompt)
@@ -76,7 +78,7 @@ class ScheduleGenerator:
schedule_text = "生成日程时出错了"
# print(self.schedule_text)
else:
print(f"{date_str}的日程不存在。")
logger.debug(f"{date_str}的日程不存在。")
schedule_text = "忘了"
return schedule_text, None
@@ -89,9 +91,8 @@ class ScheduleGenerator:
try:
schedule_dict = json.loads(schedule_text)
return schedule_dict
except json.JSONDecodeError as e:
print(schedule_text)
print(f"解析日程失败: {str(e)}")
except json.JSONDecodeError:
logger.exception("解析日程失败: {}".format(schedule_text))
return False
def _parse_time(self, time_str: str) -> str:
@@ -150,13 +151,14 @@ class ScheduleGenerator:
def print_schedule(self):
"""打印完整的日程安排"""
if not self._parse_schedule(self.today_schedule_text):
print("今日日程有误,将在下次运行时重新生成")
logger.warning("今日日程有误,将在下次运行时重新生成")
self.db.db.schedule.delete_one({"date": datetime.datetime.now().strftime("%Y-%m-%d")})
else:
print("\n=== 今日日程安排 ===")
logger.info("=== 今日日程安排 ===")
for time_str, activity in self.today_schedule.items():
print(f"时间[{time_str}]: 活动[{activity}]")
print("==================\n")
logger.info(f"时间[{time_str}]: 活动[{activity}]")
logger.info("==================")
# def main():
# # 使用示例

View File

@@ -3,6 +3,7 @@ import time
from collections import defaultdict
from datetime import datetime, timedelta
from typing import Any, Dict
from loguru import logger
from ...common.database import Database
@@ -153,8 +154,8 @@ class LLMStatistics:
try:
all_stats = self._collect_all_statistics()
self._save_statistics(all_stats)
except Exception as e:
print(f"\033[1;31m[错误]\033[0m 统计数据处理失败: {e}")
except Exception:
logger.exception("统计数据处理失败")
# 等待1分钟
for _ in range(60):

View File

@@ -284,10 +284,13 @@ class ChineseTypoGenerator:
返回:
typo_sentence: 包含错别字的句子
typo_info: 错别字信息列表
correction_suggestion: 随机选择的一个纠正建议,返回正确的字/词
"""
result = []
typo_info = []
word_typos = [] # 记录词语错误对(错词,正确词)
char_typos = [] # 记录单字错误对(错字,正确字)
current_pos = 0
# 分词
words = self._segment_sentence(sentence)
@@ -296,6 +299,7 @@ class ChineseTypoGenerator:
# 如果是标点符号或空格,直接添加
if all(not self._is_chinese_char(c) for c in word):
result.append(word)
current_pos += len(word)
continue
# 获取词语的拼音
@@ -316,6 +320,8 @@ class ChineseTypoGenerator:
' '.join(word_pinyin),
' '.join(self._get_word_pinyin(typo_word)),
orig_freq, typo_freq))
word_typos.append((typo_word, word)) # 记录(错词,正确词)对
current_pos += len(typo_word)
continue
# 如果不进行整词替换,则进行单字替换
@@ -333,11 +339,15 @@ class ChineseTypoGenerator:
result.append(typo_char)
typo_py = pinyin(typo_char, style=Style.TONE3)[0][0]
typo_info.append((char, typo_char, py, typo_py, orig_freq, typo_freq))
char_typos.append((typo_char, char)) # 记录(错字,正确字)对
current_pos += 1
continue
result.append(char)
current_pos += 1
else:
# 处理多字词的单字替换
word_result = []
word_start_pos = current_pos
for i, (char, py) in enumerate(zip(word, word_pinyin)):
# 词中的字替换概率降低
word_error_rate = self.error_rate * (0.7 ** (len(word) - 1))
@@ -353,11 +363,24 @@ class ChineseTypoGenerator:
word_result.append(typo_char)
typo_py = pinyin(typo_char, style=Style.TONE3)[0][0]
typo_info.append((char, typo_char, py, typo_py, orig_freq, typo_freq))
char_typos.append((typo_char, char)) # 记录(错字,正确字)对
continue
word_result.append(char)
result.append(''.join(word_result))
current_pos += len(word)
return ''.join(result), typo_info
# 优先从词语错误中选择,如果没有则从单字错误中选择
correction_suggestion = None
# 50%概率返回纠正建议
if random.random() < 0.5:
if word_typos:
wrong_word, correct_word = random.choice(word_typos)
correction_suggestion = correct_word
elif char_typos:
wrong_char, correct_char = random.choice(char_typos)
correction_suggestion = correct_char
return ''.join(result), correction_suggestion
def format_typo_info(self, typo_info):
"""
@@ -419,16 +442,16 @@ def main():
# 创建包含错别字的句子
start_time = time.time()
typo_sentence, typo_info = typo_generator.create_typo_sentence(sentence)
typo_sentence, correction_suggestion = typo_generator.create_typo_sentence(sentence)
# 打印结果
print("\n原句:", sentence)
print("错字版:", typo_sentence)
# 打印错别字信息
if typo_info:
print("\n错别字信息")
print(typo_generator.format_typo_info(typo_info))
# 打印纠正建议
if correction_suggestion:
print("\n随机纠正建议")
print(f"应该改为:{correction_suggestion}")
# 计算并打印总耗时
end_time = time.time()

View File

@@ -11,6 +11,8 @@ from pathlib import Path
import random
import math
import time
from loguru import logger
class ChineseTypoGenerator:
def __init__(self,
@@ -36,7 +38,7 @@ class ChineseTypoGenerator:
self.max_freq_diff = max_freq_diff
# 加载数据
print("正在加载汉字数据库,请稍候...")
logger.debug("正在加载汉字数据库,请稍候...")
self.pinyin_dict = self._create_pinyin_dict()
self.char_frequency = self._load_or_create_char_frequency()
@@ -399,9 +401,10 @@ class ChineseTypoGenerator:
for key, value in kwargs.items():
if hasattr(self, key):
setattr(self, key, value)
print(f"参数 {key} 已设置为 {value}")
logger.debug(f"参数 {key} 已设置为 {value}")
else:
print(f"警告: 参数 {key} 不存在")
logger.warning(f"警告: 参数 {key} 不存在")
def main():
# 创建错别字生成器实例
@@ -420,18 +423,18 @@ def main():
typo_sentence, typo_info = typo_generator.create_typo_sentence(sentence)
# 打印结果
print("\n原句:", sentence)
print("错字版:", typo_sentence)
logger.debug("原句:", sentence)
logger.debug("错字版:", typo_sentence)
# 打印错别字信息
if typo_info:
print("\n错别字信息:")
print(typo_generator.format_typo_info(typo_info))
logger.debug(f"错别字信息:{typo_generator.format_typo_info(typo_info)})")
# 计算并打印总耗时
end_time = time.time()
total_time = end_time - start_time
print(f"\n总耗时:{total_time:.2f}")
logger.debug(f"总耗时:{total_time:.2f}")
if __name__ == "__main__":
main()

View File

@@ -1,48 +0,0 @@
import os
import sys
from pathlib import Path
import tomli
import tomli_w
def sync_configs():
# 读取两个配置文件
try:
with open('bot_config_dev.toml', 'rb') as f: # tomli需要使用二进制模式读取
dev_config = tomli.load(f)
with open('bot_config.toml', 'rb') as f:
prod_config = tomli.load(f)
except FileNotFoundError as e:
print(f"错误:找不到配置文件 - {e}")
sys.exit(1)
except tomli.TOMLDecodeError as e:
print(f"错误TOML格式解析失败 - {e}")
sys.exit(1)
# 递归合并配置
def merge_configs(source, target):
for key, value in source.items():
if key not in target:
target[key] = value
elif isinstance(value, dict) and isinstance(target[key], dict):
merge_configs(value, target[key])
# 将dev配置的新属性合并到prod配置中
merge_configs(dev_config, prod_config)
# 保存更新后的配置
try:
with open('bot_config.toml', 'wb') as f: # tomli_w需要使用二进制模式写入
tomli_w.dump(prod_config, f)
print("配置文件同步完成!")
except Exception as e:
print(f"错误:保存配置文件失败 - {e}")
sys.exit(1)
if __name__ == '__main__':
# 确保在正确的目录下运行
script_dir = Path(__file__).parent
os.chdir(script_dir)
sync_configs()

View File

@@ -1,6 +1,21 @@
[inner]
version = "0.0.6"
#如果你想要修改配置文件请在修改后将version的值进行变更
#如果新增项目请在BotConfig类下新增相应的变量
#1.如果你修改的是[]层级项目,例如你新增了 [memory],那么请在config.py的 load_config函数中的include_configs字典中新增"内容":{
#"func":memory,
#"support":">=0.0.0", #新的版本号
#"necessary":False #是否必须
#}
#2.如果你修改的是[]下的项目,例如你新增了[memory]下的 memory_ban_words ,那么请在config.py的 load_config函数中的 memory函数下新增版本判断:
# if config.INNER_VERSION in SpecifierSet(">=0.0.2"):
# config.memory_ban_words = set(memory_config.get("memory_ban_words", []))
[bot]
qq = 123
nickname = "麦麦"
alias_names = ["小麦", "阿麦"]
[personality]
prompt_personality = [
@@ -26,6 +41,13 @@ ban_words = [
# "403","张三"
]
ban_msgs_regex = [
# 需要过滤的消息原始消息匹配的正则表达式匹配到的消息将被过滤支持CQ码若不了解正则表达式请勿修改
#"https?://[^\\s]+", # 匹配https链接
#"\\d{4}-\\d{2}-\\d{2}", # 匹配日期
# "\\[CQ:at,qq=\\d+\\]" # 匹配@
]
[emoji]
check_interval = 120 # 检查表情包的时间间隔
register_interval = 10 # 注册表情包的时间间隔
@@ -46,11 +68,35 @@ max_response_length = 1024 # 麦麦回答的最大token数
build_memory_interval = 300 # 记忆构建间隔 单位秒
forget_memory_interval = 300 # 记忆遗忘间隔 单位秒
memory_ban_words = [ #不希望记忆的词
# "403","张三"
]
[mood]
mood_update_interval = 1.0 # 情绪更新间隔 单位秒
mood_decay_rate = 0.95 # 情绪衰减率
mood_intensity_factor = 1.0 # 情绪强度因子
[keywords_reaction] # 针对某个关键词作出反应
enable = true # 关键词反应功能的总开关
[[keywords_reaction.rules]] # 如果想要新增多个关键词直接复制本条修改keywords和reaction即可
enable = true # 是否启用此条为了人类在未来AI战争能更好地识别AIbushi默认开启
keywords = ["人机", "bot", "机器", "入机", "robot", "机器人"] # 会触发反应的关键词
reaction = "有人说你是人机或者机器人,否定这一事实,攻击或反问他。注意,只有认为你是机器人才回复,否则不要否认" # 触发之后添加的提示词
[[keywords_reaction.rules]] # 就像这样复制
enable = false # 仅作示例,不会触发
keywords = ["测试关键词回复","test",""]
reaction = "回答“测试成功”"
[chinese_typo]
enable = true # 是否启用中文错别字生成器
error_rate=0.006 # 单字替换概率
min_freq=7 # 最小字频阈值
tone_error_rate=0.2 # 声调错误概率
word_replace_rate=0.006 # 整词替换概率
[others]
enable_advance_output = true # 是否启用高级输出
enable_kuuki_read = true # 是否启用读空气功能
@@ -80,49 +126,42 @@ ban_user_id = [] #禁止回复消息的QQ号
[model.llm_reasoning] #回复模型1 主要回复模型
name = "Pro/deepseek-ai/DeepSeek-R1"
base_url = "SILICONFLOW_BASE_URL"
key = "SILICONFLOW_KEY"
provider = "SILICONFLOW"
pri_in = 0 #模型的输入价格(非必填,可以记录消耗)
pri_out = 0 #模型的输出价格(非必填,可以记录消耗)
[model.llm_reasoning_minor] #回复模型3 次要回复模型
name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
base_url = "SILICONFLOW_BASE_URL"
key = "SILICONFLOW_KEY"
provider = "SILICONFLOW"
#非推理模型
[model.llm_normal] #V3 回复模型2 次要回复模型
name = "Pro/deepseek-ai/DeepSeek-V3"
base_url = "SILICONFLOW_BASE_URL"
key = "SILICONFLOW_KEY"
provider = "SILICONFLOW"
[model.llm_normal_minor] #V2.5
name = "deepseek-ai/DeepSeek-V2.5"
base_url = "SILICONFLOW_BASE_URL"
key = "SILICONFLOW_KEY"
provider = "SILICONFLOW"
[model.llm_emotion_judge] #主题判断 0.7/m
name = "Qwen/Qwen2.5-14B-Instruct"
base_url = "SILICONFLOW_BASE_URL"
key = "SILICONFLOW_KEY"
provider = "SILICONFLOW"
[model.llm_topic_judge] #主题判断建议使用qwen2.5 7b
name = "Pro/Qwen/Qwen2.5-7B-Instruct"
base_url = "SILICONFLOW_BASE_URL"
key = "SILICONFLOW_KEY"
provider = "SILICONFLOW"
[model.llm_summary_by_topic] #建议使用qwen2.5 32b 及以上
name = "Qwen/Qwen2.5-32B-Instruct"
base_url = "SILICONFLOW_BASE_URL"
key = "SILICONFLOW_KEY"
provider = "SILICONFLOW"
pri_in = 0
pri_out = 0
[model.moderation] #内容审核 未启用
name = ""
base_url = "SILICONFLOW_BASE_URL"
key = "SILICONFLOW_KEY"
provider = "SILICONFLOW"
pri_in = 0
pri_out = 0
@@ -130,8 +169,7 @@ pri_out = 0
[model.vlm] #图像识别 0.35/m
name = "Pro/Qwen/Qwen2-VL-7B-Instruct"
base_url = "SILICONFLOW_BASE_URL"
key = "SILICONFLOW_KEY"
provider = "SILICONFLOW"
@@ -139,5 +177,4 @@ key = "SILICONFLOW_KEY"
[model.embedding] #嵌入
name = "BAAI/bge-m3"
base_url = "SILICONFLOW_BASE_URL"
key = "SILICONFLOW_KEY"
provider = "SILICONFLOW"