better 更好的记忆抽取策略,并且移除了无用选项

This commit is contained in:
SengokuCola
2025-03-21 14:37:19 +08:00
parent e0a7bf7e99
commit 6c3afa84c4
14 changed files with 547 additions and 1282 deletions

View File

@@ -147,9 +147,7 @@ enable_check = false # 是否要检查表情包是不是合适的喵
check_prompt = "符合公序良俗" # 检查表情包的标准呢 check_prompt = "符合公序良俗" # 检查表情包的标准呢
[others] [others]
enable_advance_output = true # 是否要显示更多的运行信息呢
enable_kuuki_read = true # 让机器人能够"察言观色"喵 enable_kuuki_read = true # 让机器人能够"察言观色"喵
enable_debug_output = false # 是否启用调试输出喵
enable_friend_chat = false # 是否启用好友聊天喵 enable_friend_chat = false # 是否启用好友聊天喵
[groups] [groups]

View File

@@ -115,9 +115,7 @@ talk_frequency_down = [] # 降低回复频率的群号
ban_user_id = [] # 禁止回复的用户QQ号 ban_user_id = [] # 禁止回复的用户QQ号
[others] [others]
enable_advance_output = true # 是否启用高级输出
enable_kuuki_read = true # 是否启用读空气功能 enable_kuuki_read = true # 是否启用读空气功能
enable_debug_output = false # 是否启用调试输出
enable_friend_chat = false # 是否启用好友聊天 enable_friend_chat = false # 是否启用好友聊天
# 模型配置 # 模型配置

View File

@@ -31,9 +31,10 @@ _handler_registry: Dict[str, List[int]] = {}
current_file_path = Path(__file__).resolve() current_file_path = Path(__file__).resolve()
LOG_ROOT = "logs" LOG_ROOT = "logs"
ENABLE_ADVANCE_OUTPUT = False ENABLE_ADVANCE_OUTPUT = os.getenv("SIMPLE_OUTPUT", "false")
print(f"ENABLE_ADVANCE_OUTPUT: {ENABLE_ADVANCE_OUTPUT}")
if ENABLE_ADVANCE_OUTPUT: if not ENABLE_ADVANCE_OUTPUT:
# 默认全局配置 # 默认全局配置
DEFAULT_CONFIG = { DEFAULT_CONFIG = {
# 日志级别配置 # 日志级别配置

View File

@@ -110,7 +110,7 @@ async def build_memory_task():
"""每build_memory_interval秒执行一次记忆构建""" """每build_memory_interval秒执行一次记忆构建"""
logger.debug("[记忆构建]------------------------------------开始构建记忆--------------------------------------") logger.debug("[记忆构建]------------------------------------开始构建记忆--------------------------------------")
start_time = time.time() start_time = time.time()
await hippocampus.operation_build_memory(chat_size=20) await hippocampus.operation_build_memory()
end_time = time.time() end_time = time.time()
logger.success( logger.success(
f"[记忆构建]--------------------------记忆构建完成:耗时: {end_time - start_time:.2f} " f"[记忆构建]--------------------------记忆构建完成:耗时: {end_time - start_time:.2f} "

View File

@@ -68,9 +68,9 @@ class BotConfig:
MODEL_V3_PROBABILITY: float = 0.1 # V3模型概率 MODEL_V3_PROBABILITY: float = 0.1 # V3模型概率
MODEL_R1_DISTILL_PROBABILITY: float = 0.1 # R1蒸馏模型概率 MODEL_R1_DISTILL_PROBABILITY: float = 0.1 # R1蒸馏模型概率
enable_advance_output: bool = False # 是否启用高级输出 # enable_advance_output: bool = False # 是否启用高级输出
enable_kuuki_read: bool = True # 是否启用读空气功能 enable_kuuki_read: bool = True # 是否启用读空气功能
enable_debug_output: bool = False # 是否启用调试输出 # enable_debug_output: bool = False # 是否启用调试输出
enable_friend_chat: bool = False # 是否启用好友聊天 enable_friend_chat: bool = False # 是否启用好友聊天
mood_update_interval: float = 1.0 # 情绪更新间隔 单位秒 mood_update_interval: float = 1.0 # 情绪更新间隔 单位秒
@@ -106,6 +106,11 @@ class BotConfig:
memory_forget_time: int = 24 # 记忆遗忘时间(小时) memory_forget_time: int = 24 # 记忆遗忘时间(小时)
memory_forget_percentage: float = 0.01 # 记忆遗忘比例 memory_forget_percentage: float = 0.01 # 记忆遗忘比例
memory_compress_rate: float = 0.1 # 记忆压缩率 memory_compress_rate: float = 0.1 # 记忆压缩率
build_memory_sample_num: int = 10 # 记忆构建采样数量
build_memory_sample_length: int = 20 # 记忆构建采样长度
memory_build_distribution: list = field(
default_factory=lambda: [4,2,0.6,24,8,0.4]
) # 记忆构建分布参数分布1均值标准差权重分布2均值标准差权重
memory_ban_words: list = field( memory_ban_words: list = field(
default_factory=lambda: ["表情包", "图片", "回复", "聊天记录"] default_factory=lambda: ["表情包", "图片", "回复", "聊天记录"]
) # 添加新的配置项默认值 ) # 添加新的配置项默认值
@@ -315,6 +320,11 @@ class BotConfig:
"memory_forget_percentage", config.memory_forget_percentage "memory_forget_percentage", config.memory_forget_percentage
) )
config.memory_compress_rate = memory_config.get("memory_compress_rate", config.memory_compress_rate) config.memory_compress_rate = memory_config.get("memory_compress_rate", config.memory_compress_rate)
if config.INNER_VERSION in SpecifierSet(">=0.0.11"):
config.memory_build_distribution = memory_config.get("memory_build_distribution", config.memory_build_distribution)
config.build_memory_sample_num = memory_config.get("build_memory_sample_num", config.build_memory_sample_num)
config.build_memory_sample_length = memory_config.get("build_memory_sample_length", config.build_memory_sample_length)
def remote(parent: dict): def remote(parent: dict):
remote_config = parent["remote"] remote_config = parent["remote"]
@@ -351,10 +361,10 @@ class BotConfig:
def others(parent: dict): def others(parent: dict):
others_config = parent["others"] others_config = parent["others"]
config.enable_advance_output = others_config.get("enable_advance_output", config.enable_advance_output) # config.enable_advance_output = others_config.get("enable_advance_output", config.enable_advance_output)
config.enable_kuuki_read = others_config.get("enable_kuuki_read", config.enable_kuuki_read) config.enable_kuuki_read = others_config.get("enable_kuuki_read", config.enable_kuuki_read)
if config.INNER_VERSION in SpecifierSet(">=0.0.7"): if config.INNER_VERSION in SpecifierSet(">=0.0.7"):
config.enable_debug_output = others_config.get("enable_debug_output", config.enable_debug_output) # config.enable_debug_output = others_config.get("enable_debug_output", config.enable_debug_output)
config.enable_friend_chat = others_config.get("enable_friend_chat", config.enable_friend_chat) config.enable_friend_chat = others_config.get("enable_friend_chat", config.enable_friend_chat)
# 版本表达式:>=1.0.0,<2.0.0 # 版本表达式:>=1.0.0,<2.0.0

View File

@@ -18,6 +18,7 @@ from ..chat.utils import (
) )
from ..models.utils_model import LLM_request from ..models.utils_model import LLM_request
from src.common.logger import get_module_logger, LogConfig, MEMORY_STYLE_CONFIG from src.common.logger import get_module_logger, LogConfig, MEMORY_STYLE_CONFIG
from src.plugins.memory_system.sample_distribution import MemoryBuildScheduler
# 定义日志配置 # 定义日志配置
memory_config = LogConfig( memory_config = LogConfig(
@@ -195,19 +196,9 @@ class Hippocampus:
return hash(f"{nodes[0]}:{nodes[1]}") return hash(f"{nodes[0]}:{nodes[1]}")
def random_get_msg_snippet(self, target_timestamp: float, chat_size: int, max_memorized_time_per_msg: int) -> list: def random_get_msg_snippet(self, target_timestamp: float, chat_size: int, max_memorized_time_per_msg: int) -> list:
"""随机抽取一段时间内的消息片段
Args:
- target_timestamp: 目标时间戳
- chat_size: 抽取的消息数量
- max_memorized_time_per_msg: 每条消息的最大记忆次数
Returns:
- list: 抽取出的消息记录列表
"""
try_count = 0 try_count = 0
# 最多尝试次抽取 # 最多尝试2次抽取
while try_count < 3: while try_count < 2:
messages = get_closest_chat_from_db(length=chat_size, timestamp=target_timestamp) messages = get_closest_chat_from_db(length=chat_size, timestamp=target_timestamp)
if messages: if messages:
# 检查messages是否均没有达到记忆次数限制 # 检查messages是否均没有达到记忆次数限制
@@ -224,54 +215,37 @@ class Hippocampus:
) )
return messages return messages
try_count += 1 try_count += 1
# 三次尝试均失败
return None return None
def get_memory_sample(self, chat_size=20, time_frequency=None): def get_memory_sample(self):
"""获取记忆样本
Returns:
list: 消息记录列表,每个元素是一个消息记录字典列表
"""
# 硬编码:每条消息最大记忆次数 # 硬编码:每条消息最大记忆次数
# 如有需求可写入global_config # 如有需求可写入global_config
if time_frequency is None:
time_frequency = {"near": 2, "mid": 4, "far": 3}
max_memorized_time_per_msg = 3 max_memorized_time_per_msg = 3
current_timestamp = datetime.datetime.now().timestamp() # 创建双峰分布的记忆调度器
scheduler = MemoryBuildScheduler(
n_hours1=global_config.memory_build_distribution[0], # 第一个分布均值4小时前
std_hours1=global_config.memory_build_distribution[1], # 第一个分布标准差
weight1=global_config.memory_build_distribution[2], # 第一个分布权重 60%
n_hours2=global_config.memory_build_distribution[3], # 第二个分布均值24小时前
std_hours2=global_config.memory_build_distribution[4], # 第二个分布标准差
weight2=global_config.memory_build_distribution[5], # 第二个分布权重 40%
total_samples=global_config.build_memory_sample_num # 总共生成10个时间点
)
# 生成时间戳数组
timestamps = scheduler.get_timestamp_array()
logger.debug(f"生成的时间戳数组: {timestamps}")
chat_samples = [] chat_samples = []
for timestamp in timestamps:
# 短期1h 中期4h 长期24h messages = self.random_get_msg_snippet(timestamp, global_config.build_memory_sample_length, max_memorized_time_per_msg)
logger.debug("正在抽取短期消息样本")
for i in range(time_frequency.get("near")):
random_time = current_timestamp - random.randint(1, 3600)
messages = self.random_get_msg_snippet(random_time, chat_size, max_memorized_time_per_msg)
if messages: if messages:
logger.debug(f"成功抽取短期消息样本{len(messages)}") time_diff = (datetime.datetime.now().timestamp() - timestamp) / 3600
logger.debug(f"成功抽取 {time_diff:.1f} 小时前的消息样本,共{len(messages)}")
chat_samples.append(messages) chat_samples.append(messages)
else: else:
logger.warning(f"{i}次短期消息样本抽取失败") logger.warning(f"时间戳 {timestamp}消息样本抽取失败")
logger.debug("正在抽取中期消息样本")
for i in range(time_frequency.get("mid")):
random_time = current_timestamp - random.randint(3600, 3600 * 4)
messages = self.random_get_msg_snippet(random_time, chat_size, max_memorized_time_per_msg)
if messages:
logger.debug(f"成功抽取中期消息样本{len(messages)}")
chat_samples.append(messages)
else:
logger.warning(f"{i}次中期消息样本抽取失败")
logger.debug("正在抽取长期消息样本")
for i in range(time_frequency.get("far")):
random_time = current_timestamp - random.randint(3600 * 4, 3600 * 24)
messages = self.random_get_msg_snippet(random_time, chat_size, max_memorized_time_per_msg)
if messages:
logger.debug(f"成功抽取长期消息样本{len(messages)}")
chat_samples.append(messages)
else:
logger.warning(f"{i}次长期消息样本抽取失败")
return chat_samples return chat_samples
@@ -372,9 +346,8 @@ class Hippocampus:
) )
return topic_num return topic_num
async def operation_build_memory(self, chat_size=20): async def operation_build_memory(self):
time_frequency = {"near": 1, "mid": 4, "far": 4} memory_samples = self.get_memory_sample()
memory_samples = self.get_memory_sample(chat_size, time_frequency)
for i, messages in enumerate(memory_samples, 1): for i, messages in enumerate(memory_samples, 1):
all_topics = [] all_topics = []

View File

@@ -7,11 +7,9 @@ import sys
import time import time
from collections import Counter from collections import Counter
from pathlib import Path from pathlib import Path
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import networkx as nx import networkx as nx
from dotenv import load_dotenv from dotenv import load_dotenv
from src.common.logger import get_module_logger
import jieba import jieba
# from chat.config import global_config # from chat.config import global_config
@@ -19,6 +17,7 @@ import jieba
root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../..")) root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
sys.path.append(root_path) sys.path.append(root_path)
from src.common.logger import get_module_logger
from src.common.database import db # noqa E402 from src.common.database import db # noqa E402
from src.plugins.memory_system.offline_llm import LLMModel # noqa E402 from src.plugins.memory_system.offline_llm import LLMModel # noqa E402

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,172 @@
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import time
from datetime import datetime, timedelta
class DistributionVisualizer:
def __init__(self, mean=0, std=1, skewness=0, sample_size=10):
"""
初始化分布可视化器
参数:
mean (float): 期望均值
std (float): 标准差
skewness (float): 偏度
sample_size (int): 样本大小
"""
self.mean = mean
self.std = std
self.skewness = skewness
self.sample_size = sample_size
self.samples = None
def generate_samples(self):
"""生成具有指定参数的样本"""
if self.skewness == 0:
# 对于无偏度的情况,直接使用正态分布
self.samples = np.random.normal(loc=self.mean, scale=self.std, size=self.sample_size)
else:
# 使用 scipy.stats 生成具有偏度的分布
self.samples = stats.skewnorm.rvs(a=self.skewness,
loc=self.mean,
scale=self.std,
size=self.sample_size)
def get_weighted_samples(self):
"""获取加权后的样本数列"""
if self.samples is None:
self.generate_samples()
# 将样本值乘以样本大小
return self.samples * self.sample_size
def get_statistics(self):
"""获取分布的统计信息"""
if self.samples is None:
self.generate_samples()
return {
"均值": np.mean(self.samples),
"标准差": np.std(self.samples),
"实际偏度": stats.skew(self.samples)
}
class MemoryBuildScheduler:
def __init__(self,
n_hours1, std_hours1, weight1,
n_hours2, std_hours2, weight2,
total_samples=50):
"""
初始化记忆构建调度器
参数:
n_hours1 (float): 第一个分布的均值(距离现在的小时数)
std_hours1 (float): 第一个分布的标准差(小时)
weight1 (float): 第一个分布的权重
n_hours2 (float): 第二个分布的均值(距离现在的小时数)
std_hours2 (float): 第二个分布的标准差(小时)
weight2 (float): 第二个分布的权重
total_samples (int): 要生成的总时间点数量
"""
# 归一化权重
total_weight = weight1 + weight2
self.weight1 = weight1 / total_weight
self.weight2 = weight2 / total_weight
self.n_hours1 = n_hours1
self.std_hours1 = std_hours1
self.n_hours2 = n_hours2
self.std_hours2 = std_hours2
self.total_samples = total_samples
self.base_time = datetime.now()
def generate_time_samples(self):
"""生成混合分布的时间采样点"""
# 根据权重计算每个分布的样本数
samples1 = int(self.total_samples * self.weight1)
samples2 = self.total_samples - samples1
# 生成两个正态分布的小时偏移
hours_offset1 = np.random.normal(
loc=self.n_hours1,
scale=self.std_hours1,
size=samples1
)
hours_offset2 = np.random.normal(
loc=self.n_hours2,
scale=self.std_hours2,
size=samples2
)
# 合并两个分布的偏移
hours_offset = np.concatenate([hours_offset1, hours_offset2])
# 将偏移转换为实际时间戳(使用绝对值确保时间点在过去)
timestamps = [self.base_time - timedelta(hours=abs(offset)) for offset in hours_offset]
# 按时间排序(从最早到最近)
return sorted(timestamps)
def get_timestamp_array(self):
"""返回时间戳数组"""
timestamps = self.generate_time_samples()
return [int(t.timestamp()) for t in timestamps]
def print_time_samples(timestamps, show_distribution=True):
"""打印时间样本和分布信息"""
print(f"\n生成的{len(timestamps)}个时间点分布:")
print("序号".ljust(5), "时间戳".ljust(25), "距现在(小时)")
print("-" * 50)
now = datetime.now()
time_diffs = []
for i, timestamp in enumerate(timestamps, 1):
hours_diff = (now - timestamp).total_seconds() / 3600
time_diffs.append(hours_diff)
print(f"{str(i).ljust(5)} {timestamp.strftime('%Y-%m-%d %H:%M:%S').ljust(25)} {hours_diff:.2f}")
# 打印统计信息
print("\n统计信息:")
print(f"平均时间偏移:{np.mean(time_diffs):.2f}小时")
print(f"标准差:{np.std(time_diffs):.2f}小时")
print(f"最早时间:{min(timestamps).strftime('%Y-%m-%d %H:%M:%S')} ({max(time_diffs):.2f}小时前)")
print(f"最近时间:{max(timestamps).strftime('%Y-%m-%d %H:%M:%S')} ({min(time_diffs):.2f}小时前)")
if show_distribution:
# 计算时间分布的直方图
hist, bins = np.histogram(time_diffs, bins=40)
print("\n时间分布(每个*代表一个时间点):")
for i in range(len(hist)):
if hist[i] > 0:
print(f"{bins[i]:6.1f}-{bins[i+1]:6.1f}小时: {'*' * int(hist[i])}")
# 使用示例
if __name__ == "__main__":
# 创建一个双峰分布的记忆调度器
scheduler = MemoryBuildScheduler(
n_hours1=12, # 第一个分布均值12小时前
std_hours1=8, # 第一个分布标准差
weight1=0.7, # 第一个分布权重 70%
n_hours2=36, # 第二个分布均值36小时前
std_hours2=24, # 第二个分布标准差
weight2=0.3, # 第二个分布权重 30%
total_samples=50 # 总共生成50个时间点
)
# 生成时间分布
timestamps = scheduler.generate_time_samples()
# 打印结果,包含分布可视化
print_time_samples(timestamps, show_distribution=True)
# 打印时间戳数组
timestamp_array = scheduler.get_timestamp_array()
print("\n时间戳数组Unix时间戳")
print("[", end="")
for i, ts in enumerate(timestamp_array):
if i > 0:
print(", ", end="")
print(ts, end="")
print("]")

View File

@@ -0,0 +1,123 @@
import asyncio
import os
import time
from typing import Tuple, Union
import aiohttp
import requests
from src.common.logger import get_module_logger
logger = get_module_logger("offline_llm")
class LLMModel:
def __init__(self, model_name="deepseek-ai/DeepSeek-V3", **kwargs):
self.model_name = model_name
self.params = kwargs
self.api_key = os.getenv("SILICONFLOW_KEY")
self.base_url = os.getenv("SILICONFLOW_BASE_URL")
if not self.api_key or not self.base_url:
raise ValueError("环境变量未正确加载SILICONFLOW_KEY 或 SILICONFLOW_BASE_URL 未设置")
logger.info(f"API URL: {self.base_url}") # 使用 logger 记录 base_url
def generate_response(self, prompt: str) -> Union[str, Tuple[str, str]]:
"""根据输入的提示生成模型的响应"""
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
# 构建请求体
data = {
"model": self.model_name,
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.5,
**self.params,
}
# 发送请求到完整的 chat/completions 端点
api_url = f"{self.base_url.rstrip('/')}/chat/completions"
logger.info(f"Request URL: {api_url}") # 记录请求的 URL
max_retries = 3
base_wait_time = 15 # 基础等待时间(秒)
for retry in range(max_retries):
try:
response = requests.post(api_url, headers=headers, json=data)
if response.status_code == 429:
wait_time = base_wait_time * (2**retry) # 指数退避
logger.warning(f"遇到请求限制(429),等待{wait_time}秒后重试...")
time.sleep(wait_time)
continue
response.raise_for_status() # 检查其他响应状态
result = response.json()
if "choices" in result and len(result["choices"]) > 0:
content = result["choices"][0]["message"]["content"]
reasoning_content = result["choices"][0]["message"].get("reasoning_content", "")
return content, reasoning_content
return "没有返回结果", ""
except Exception as e:
if retry < max_retries - 1: # 如果还有重试机会
wait_time = base_wait_time * (2**retry)
logger.error(f"[回复]请求失败,等待{wait_time}秒后重试... 错误: {str(e)}")
time.sleep(wait_time)
else:
logger.error(f"请求失败: {str(e)}")
return f"请求失败: {str(e)}", ""
logger.error("达到最大重试次数,请求仍然失败")
return "达到最大重试次数,请求仍然失败", ""
async def generate_response_async(self, prompt: str) -> Union[str, Tuple[str, str]]:
"""异步方式根据输入的提示生成模型的响应"""
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
# 构建请求体
data = {
"model": self.model_name,
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.5,
**self.params,
}
# 发送请求到完整的 chat/completions 端点
api_url = f"{self.base_url.rstrip('/')}/chat/completions"
logger.info(f"Request URL: {api_url}") # 记录请求的 URL
max_retries = 3
base_wait_time = 15
async with aiohttp.ClientSession() as session:
for retry in range(max_retries):
try:
async with session.post(api_url, headers=headers, json=data) as response:
if response.status == 429:
wait_time = base_wait_time * (2**retry) # 指数退避
logger.warning(f"遇到请求限制(429),等待{wait_time}秒后重试...")
await asyncio.sleep(wait_time)
continue
response.raise_for_status() # 检查其他响应状态
result = await response.json()
if "choices" in result and len(result["choices"]) > 0:
content = result["choices"][0]["message"]["content"]
reasoning_content = result["choices"][0]["message"].get("reasoning_content", "")
return content, reasoning_content
return "没有返回结果", ""
except Exception as e:
if retry < max_retries - 1: # 如果还有重试机会
wait_time = base_wait_time * (2**retry)
logger.error(f"[回复]请求失败,等待{wait_time}秒后重试... 错误: {str(e)}")
await asyncio.sleep(wait_time)
else:
logger.error(f"请求失败: {str(e)}")
return f"请求失败: {str(e)}", ""
logger.error("达到最大重试次数,请求仍然失败")
return "达到最大重试次数,请求仍然失败", ""

View File

@@ -0,0 +1,192 @@
import datetime
import json
import re
import os
import sys
from typing import Dict, Union
from nonebot import get_driver
# 添加项目根目录到 Python 路径
root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
sys.path.append(root_path)
# from src.plugins.chat.config import global_config
from src.common.database import db # 使用正确的导入语法
from src.plugins.schedule.offline_llm import LLMModel
from src.common.logger import get_module_logger
logger = get_module_logger("scheduler")
class ScheduleGenerator:
enable_output: bool = True
def __init__(self):
# 使用离线LLM模型
self.llm_scheduler = LLMModel(model_name="Pro/deepseek-ai/DeepSeek-V3", temperature=0.9)
self.today_schedule_text = ""
self.today_schedule = {}
self.tomorrow_schedule_text = ""
self.tomorrow_schedule = {}
self.yesterday_schedule_text = ""
self.yesterday_schedule = {}
async def initialize(self):
today = datetime.datetime.now()
tomorrow = datetime.datetime.now() + datetime.timedelta(days=1)
yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
self.today_schedule_text, self.today_schedule = await self.generate_daily_schedule(target_date=today)
self.tomorrow_schedule_text, self.tomorrow_schedule = await self.generate_daily_schedule(
target_date=tomorrow, read_only=True
)
self.yesterday_schedule_text, self.yesterday_schedule = await self.generate_daily_schedule(
target_date=yesterday, read_only=True
)
async def generate_daily_schedule(
self, target_date: datetime.datetime = None, read_only: bool = False
) -> Dict[str, str]:
date_str = target_date.strftime("%Y-%m-%d")
weekday = target_date.strftime("%A")
schedule_text = str
existing_schedule = db.schedule.find_one({"date": date_str})
if existing_schedule:
if self.enable_output:
logger.debug(f"{date_str}的日程已存在:")
schedule_text = existing_schedule["schedule"]
# print(self.schedule_text)
elif not read_only:
logger.debug(f"{date_str}的日程不存在,准备生成新的日程。")
prompt = (
f"""我是{global_config.BOT_NICKNAME}{global_config.PROMPT_SCHEDULE_GEN},请为我生成{date_str}{weekday})的日程安排,包括:"""
+ """
1. 早上的学习和工作安排
2. 下午的活动和任务
3. 晚上的计划和休息时间
请按照时间顺序列出具体时间点和对应的活动用一个时间点而不是时间段来表示时间用JSON格式返回日程表
仅返回内容不要返回注释不要添加任何markdown或代码块样式时间采用24小时制
格式为{"时间": "活动","时间": "活动",...}。"""
)
try:
schedule_text, _ = self.llm_scheduler.generate_response(prompt)
db.schedule.insert_one({"date": date_str, "schedule": schedule_text})
self.enable_output = True
except Exception as e:
logger.error(f"生成日程失败: {str(e)}")
schedule_text = "生成日程时出错了"
# print(self.schedule_text)
else:
if self.enable_output:
logger.debug(f"{date_str}的日程不存在。")
schedule_text = "忘了"
return schedule_text, None
schedule_form = self._parse_schedule(schedule_text)
return schedule_text, schedule_form
def _parse_schedule(self, schedule_text: str) -> Union[bool, Dict[str, str]]:
"""解析日程文本,转换为时间和活动的字典"""
try:
reg = r"\{(.|\r|\n)+\}"
matched = re.search(reg, schedule_text)[0]
schedule_dict = json.loads(matched)
return schedule_dict
except json.JSONDecodeError:
logger.exception("解析日程失败: {}".format(schedule_text))
return False
def _parse_time(self, time_str: str) -> str:
"""解析时间字符串,转换为时间"""
return datetime.datetime.strptime(time_str, "%H:%M")
def get_current_task(self) -> str:
"""获取当前时间应该进行的任务"""
current_time = datetime.datetime.now().strftime("%H:%M")
# 找到最接近当前时间的任务
closest_time = None
min_diff = float("inf")
# 检查今天的日程
if not self.today_schedule:
return "摸鱼"
for time_str in self.today_schedule.keys():
diff = abs(self._time_diff(current_time, time_str))
if closest_time is None or diff < min_diff:
closest_time = time_str
min_diff = diff
# 检查昨天的日程中的晚间任务
if self.yesterday_schedule:
for time_str in self.yesterday_schedule.keys():
if time_str >= "20:00": # 只考虑晚上8点之后的任务
# 计算与昨天这个时间点的差异需要加24小时
diff = abs(self._time_diff(current_time, time_str))
if diff < min_diff:
closest_time = time_str
min_diff = diff
return closest_time, self.yesterday_schedule[closest_time]
if closest_time:
return closest_time, self.today_schedule[closest_time]
return "摸鱼"
def _time_diff(self, time1: str, time2: str) -> int:
"""计算两个时间字符串之间的分钟差"""
if time1 == "24:00":
time1 = "23:59"
if time2 == "24:00":
time2 = "23:59"
t1 = datetime.datetime.strptime(time1, "%H:%M")
t2 = datetime.datetime.strptime(time2, "%H:%M")
diff = int((t2 - t1).total_seconds() / 60)
# 考虑时间的循环性
if diff < -720:
diff += 1440 # 加一天的分钟
elif diff > 720:
diff -= 1440 # 减一天的分钟
# print(f"时间1[{time1}]: 时间2[{time2}],差值[{diff}]分钟")
return diff
def print_schedule(self):
"""打印完整的日程安排"""
if not self._parse_schedule(self.today_schedule_text):
logger.warning("今日日程有误,将在下次运行时重新生成")
db.schedule.delete_one({"date": datetime.datetime.now().strftime("%Y-%m-%d")})
else:
logger.info("=== 今日日程安排 ===")
for time_str, activity in self.today_schedule.items():
logger.info(f"时间[{time_str}]: 活动[{activity}]")
logger.info("==================")
self.enable_output = False
async def main():
# 使用示例
scheduler = ScheduleGenerator()
await scheduler.initialize()
scheduler.print_schedule()
print("\n当前任务:")
print(await scheduler.get_current_task())
print("昨天日程:")
print(scheduler.yesterday_schedule)
print("今天日程:")
print(scheduler.today_schedule)
print("明天日程:")
print(scheduler.tomorrow_schedule)
# 当作为组件导入时使用的实例
bot_schedule = ScheduleGenerator()
if __name__ == "__main__":
import asyncio
# 当直接运行此文件时执行
asyncio.run(main())

View File

@@ -1,12 +1,15 @@
import datetime import datetime
import json import json
import re import re
import os
import sys
from typing import Dict, Union from typing import Dict, Union
from nonebot import get_driver from nonebot import get_driver
from src.plugins.chat.config import global_config # 添加项目根目录到 Python 路径
from src.plugins.chat.config import global_config
from ...common.database import db # 使用正确的导入语法 from ...common.database import db # 使用正确的导入语法
from ..models.utils_model import LLM_request from ..models.utils_model import LLM_request
from src.common.logger import get_module_logger from src.common.logger import get_module_logger
@@ -165,24 +168,5 @@ class ScheduleGenerator:
logger.info(f"时间[{time_str}]: 活动[{activity}]") logger.info(f"时间[{time_str}]: 活动[{activity}]")
logger.info("==================") logger.info("==================")
self.enable_output = False self.enable_output = False
# 当作为组件导入时使用的实例
# def main():
# # 使用示例
# scheduler = ScheduleGenerator()
# # new_schedule = scheduler.generate_daily_schedule()
# scheduler.print_schedule()
# print("\n当前任务")
# print(scheduler.get_current_task())
# print("昨天日程:")
# print(scheduler.yesterday_schedule)
# print("今天日程:")
# print(scheduler.today_schedule)
# print("明天日程:")
# print(scheduler.tomorrow_schedule)
# if __name__ == "__main__":
# main()
bot_schedule = ScheduleGenerator() bot_schedule = ScheduleGenerator()

View File

@@ -1,8 +1,6 @@
HOST=127.0.0.1 HOST=127.0.0.1
PORT=8080 PORT=8080
ENABLE_ADVANCE_OUTPUT=false
# 插件配置 # 插件配置
PLUGINS=["src2.plugins.chat"] PLUGINS=["src2.plugins.chat"]
@@ -31,6 +29,7 @@ CHAT_ANY_WHERE_KEY=
SILICONFLOW_KEY= SILICONFLOW_KEY=
# 定义日志相关配置 # 定义日志相关配置
SIMPLE_OUTPUT=true # 精简控制台输出格式
CONSOLE_LOG_LEVEL=INFO # 自定义日志的默认控制台输出日志级别 CONSOLE_LOG_LEVEL=INFO # 自定义日志的默认控制台输出日志级别
FILE_LOG_LEVEL=DEBUG # 自定义日志的默认文件输出日志级别 FILE_LOG_LEVEL=DEBUG # 自定义日志的默认文件输出日志级别
DEFAULT_CONSOLE_LOG_LEVEL=SUCCESS # 原生日志的控制台输出日志级别nonebot就是这一类 DEFAULT_CONSOLE_LOG_LEVEL=SUCCESS # 原生日志的控制台输出日志级别nonebot就是这一类

View File

@@ -1,5 +1,5 @@
[inner] [inner]
version = "0.0.10" version = "0.0.11"
#以下是给开发人员阅读的,一般用户不需要阅读 #以下是给开发人员阅读的,一般用户不需要阅读
#如果你想要修改配置文件请在修改后将version的值进行变更 #如果你想要修改配置文件请在修改后将version的值进行变更
@@ -66,12 +66,15 @@ model_r1_distill_probability = 0.1 # 麦麦回答时选择次要回复模型3
max_response_length = 1024 # 麦麦回答的最大token数 max_response_length = 1024 # 麦麦回答的最大token数
[willing] [willing]
willing_mode = "classical" willing_mode = "classical" # 回复意愿模式 经典模式
# willing_mode = "dynamic" # willing_mode = "dynamic" # 动态模式(可能不兼容)
# willing_mode = "custom" # willing_mode = "custom" # 自定义模式(可自行调整
[memory] [memory]
build_memory_interval = 2000 # 记忆构建间隔 单位秒 间隔越低,麦麦学习越多,但是冗余信息也会增多 build_memory_interval = 2000 # 记忆构建间隔 单位秒 间隔越低,麦麦学习越多,但是冗余信息也会增多
build_memory_distribution = [4,2,0.6,24,8,0.4] # 记忆构建分布参数分布1均值标准差权重分布2均值标准差权重
build_memory_sample_num = 10 # 采样数量,数值越高记忆采样次数越多
build_memory_sample_length = 20 # 采样长度,数值越高一段记忆内容越丰富
memory_compress_rate = 0.1 # 记忆压缩率 控制记忆精简程度 建议保持默认,调高可以获得更多信息,但是冗余信息也会增多 memory_compress_rate = 0.1 # 记忆压缩率 控制记忆精简程度 建议保持默认,调高可以获得更多信息,但是冗余信息也会增多
forget_memory_interval = 1000 # 记忆遗忘间隔 单位秒 间隔越低,麦麦遗忘越频繁,记忆更精简,但更难学习 forget_memory_interval = 1000 # 记忆遗忘间隔 单位秒 间隔越低,麦麦遗忘越频繁,记忆更精简,但更难学习
@@ -109,9 +112,7 @@ tone_error_rate=0.2 # 声调错误概率
word_replace_rate=0.006 # 整词替换概率 word_replace_rate=0.006 # 整词替换概率
[others] [others]
enable_advance_output = false # 是否启用高级输出
enable_kuuki_read = true # 是否启用读空气功能 enable_kuuki_read = true # 是否启用读空气功能
enable_debug_output = false # 是否启用调试输出
enable_friend_chat = false # 是否启用好友聊天 enable_friend_chat = false # 是否启用好友聊天
[groups] [groups]
@@ -120,9 +121,9 @@ talk_allowed = [
123, 123,
] #可以回复消息的群 ] #可以回复消息的群
talk_frequency_down = [] #降低回复频率的群 talk_frequency_down = [] #降低回复频率的群
ban_user_id = [] #禁止回复消息的QQ号 ban_user_id = [] #禁止回复和读取消息的QQ号
[remote] #测试功能,发送统计信息,主要是看全球有多少只麦麦 [remote] #发送统计信息,主要是看全球有多少只麦麦
enable = true enable = true