Merge remote-tracking branch 'upstream/debug' into tc_refractor
This commit is contained in:
@@ -1,12 +1,9 @@
|
||||
import asyncio
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
|
||||
from loguru import logger
|
||||
from nonebot import get_driver, on_command, on_message, require
|
||||
from nonebot import get_driver, on_message, require
|
||||
from nonebot.adapters.onebot.v11 import Bot, GroupMessageEvent, Message, MessageSegment
|
||||
from nonebot.rule import to_me
|
||||
from nonebot.typing import T_State
|
||||
|
||||
from ...common.database import Database
|
||||
@@ -19,6 +16,10 @@ from .emoji_manager import emoji_manager
|
||||
from .relationship_manager import relationship_manager
|
||||
from .willing_manager import willing_manager
|
||||
from .chat_stream import chat_manager
|
||||
from ..memory_system.memory import hippocampus, memory_graph
|
||||
from .bot import ChatBot
|
||||
from .message_sender import message_manager, message_sender
|
||||
|
||||
|
||||
# 创建LLM统计实例
|
||||
llm_stats = LLMStatistics("llm_statistics.txt")
|
||||
@@ -31,27 +32,20 @@ driver = get_driver()
|
||||
config = driver.config
|
||||
|
||||
Database.initialize(
|
||||
host= config.MONGODB_HOST,
|
||||
port= int(config.MONGODB_PORT),
|
||||
db_name= config.DATABASE_NAME,
|
||||
username= config.MONGODB_USERNAME,
|
||||
password= config.MONGODB_PASSWORD,
|
||||
auth_source= config.MONGODB_AUTH_SOURCE
|
||||
host=config.MONGODB_HOST,
|
||||
port=int(config.MONGODB_PORT),
|
||||
db_name=config.DATABASE_NAME,
|
||||
username=config.MONGODB_USERNAME,
|
||||
password=config.MONGODB_PASSWORD,
|
||||
auth_source=config.MONGODB_AUTH_SOURCE
|
||||
)
|
||||
print("\033[1;32m[初始化数据库完成]\033[0m")
|
||||
logger.success("初始化数据库成功")
|
||||
|
||||
|
||||
# 导入其他模块
|
||||
from ..memory_system.memory import hippocampus, memory_graph
|
||||
from .bot import ChatBot
|
||||
|
||||
# from .message_send_control import message_sender
|
||||
from .message_sender import message_manager, message_sender
|
||||
|
||||
# 初始化表情管理器
|
||||
emoji_manager.initialize()
|
||||
|
||||
print(f"\033[1;32m正在唤醒{global_config.BOT_NICKNAME}......\033[0m")
|
||||
logger.debug(f"正在唤醒{global_config.BOT_NICKNAME}......")
|
||||
# 创建机器人实例
|
||||
chat_bot = ChatBot()
|
||||
# 注册群消息处理器
|
||||
@@ -60,71 +54,80 @@ group_msg = on_message(priority=5)
|
||||
scheduler = require("nonebot_plugin_apscheduler").scheduler
|
||||
|
||||
|
||||
|
||||
@driver.on_startup
|
||||
async def start_background_tasks():
|
||||
"""启动后台任务"""
|
||||
# 启动LLM统计
|
||||
llm_stats.start()
|
||||
print("\033[1;32m[初始化]\033[0m LLM统计功能已启动")
|
||||
|
||||
logger.success("LLM统计功能启动成功")
|
||||
|
||||
# 初始化并启动情绪管理器
|
||||
mood_manager = MoodManager.get_instance()
|
||||
mood_manager.start_mood_update(update_interval=global_config.mood_update_interval)
|
||||
print("\033[1;32m[初始化]\033[0m 情绪管理器已启动")
|
||||
|
||||
logger.success("情绪管理器启动成功")
|
||||
|
||||
# 只启动表情包管理任务
|
||||
asyncio.create_task(emoji_manager.start_periodic_check(interval_MINS=global_config.EMOJI_CHECK_INTERVAL))
|
||||
await bot_schedule.initialize()
|
||||
bot_schedule.print_schedule()
|
||||
|
||||
|
||||
|
||||
@driver.on_startup
|
||||
async def init_relationships():
|
||||
"""在 NoneBot2 启动时初始化关系管理器"""
|
||||
print("\033[1;32m[初始化]\033[0m 正在加载用户关系数据...")
|
||||
logger.debug("正在加载用户关系数据...")
|
||||
await relationship_manager.load_all_relationships()
|
||||
asyncio.create_task(relationship_manager._start_relationship_manager())
|
||||
|
||||
|
||||
@driver.on_bot_connect
|
||||
async def _(bot: Bot):
|
||||
"""Bot连接成功时的处理"""
|
||||
global _message_manager_started
|
||||
print(f"\033[1;38;5;208m-----------{global_config.BOT_NICKNAME}成功连接!-----------\033[0m")
|
||||
logger.debug(f"-----------{global_config.BOT_NICKNAME}成功连接!-----------")
|
||||
await willing_manager.ensure_started()
|
||||
|
||||
|
||||
message_sender.set_bot(bot)
|
||||
print("\033[1;38;5;208m-----------消息发送器已启动!-----------\033[0m")
|
||||
|
||||
logger.success("-----------消息发送器已启动!-----------")
|
||||
|
||||
if not _message_manager_started:
|
||||
asyncio.create_task(message_manager.start_processor())
|
||||
_message_manager_started = True
|
||||
print("\033[1;38;5;208m-----------消息处理器已启动!-----------\033[0m")
|
||||
|
||||
logger.success("-----------消息处理器已启动!-----------")
|
||||
|
||||
asyncio.create_task(emoji_manager._periodic_scan(interval_MINS=global_config.EMOJI_REGISTER_INTERVAL))
|
||||
print("\033[1;38;5;208m-----------开始偷表情包!-----------\033[0m")
|
||||
logger.success("-----------开始偷表情包!-----------")
|
||||
asyncio.create_task(chat_manager._initialize())
|
||||
asyncio.create_task(chat_manager._auto_save_task())
|
||||
|
||||
|
||||
|
||||
@group_msg.handle()
|
||||
async def _(bot: Bot, event: GroupMessageEvent, state: T_State):
|
||||
await chat_bot.handle_message(event, bot)
|
||||
|
||||
|
||||
# 添加build_memory定时任务
|
||||
@scheduler.scheduled_job("interval", seconds=global_config.build_memory_interval, id="build_memory")
|
||||
async def build_memory_task():
|
||||
"""每build_memory_interval秒执行一次记忆构建"""
|
||||
print("\033[1;32m[记忆构建]\033[0m -------------------------------------------开始构建记忆-------------------------------------------")
|
||||
logger.debug(
|
||||
"[记忆构建]"
|
||||
"------------------------------------开始构建记忆--------------------------------------")
|
||||
start_time = time.time()
|
||||
await hippocampus.operation_build_memory(chat_size=20)
|
||||
end_time = time.time()
|
||||
print(f"\033[1;32m[记忆构建]\033[0m -------------------------------------------记忆构建完成:耗时: {end_time - start_time:.2f} 秒-------------------------------------------")
|
||||
|
||||
@scheduler.scheduled_job("interval", seconds=global_config.forget_memory_interval, id="forget_memory")
|
||||
logger.success(
|
||||
f"[记忆构建]--------------------------记忆构建完成:耗时: {end_time - start_time:.2f} "
|
||||
"秒-------------------------------------------")
|
||||
|
||||
|
||||
@scheduler.scheduled_job("interval", seconds=global_config.forget_memory_interval, id="forget_memory")
|
||||
async def forget_memory_task():
|
||||
"""每30秒执行一次记忆构建"""
|
||||
# print("\033[1;32m[记忆遗忘]\033[0m 开始遗忘记忆...")
|
||||
# await hippocampus.operation_forget_topic(percentage=0.1)
|
||||
# print("\033[1;32m[记忆遗忘]\033[0m 记忆遗忘完成")
|
||||
print("\033[1;32m[记忆遗忘]\033[0m 开始遗忘记忆...")
|
||||
await hippocampus.operation_forget_topic(percentage=0.1)
|
||||
print("\033[1;32m[记忆遗忘]\033[0m 记忆遗忘完成")
|
||||
|
||||
|
||||
@scheduler.scheduled_job("interval", seconds=global_config.build_memory_interval + 10, id="merge_memory")
|
||||
async def merge_memory_task():
|
||||
@@ -133,9 +136,9 @@ async def merge_memory_task():
|
||||
# await hippocampus.operation_merge_memory(percentage=0.1)
|
||||
# print("\033[1;32m[记忆整合]\033[0m 记忆整合完成")
|
||||
|
||||
|
||||
@scheduler.scheduled_job("interval", seconds=30, id="print_mood")
|
||||
async def print_mood_task():
|
||||
"""每30秒打印一次情绪状态"""
|
||||
mood_manager = MoodManager.get_instance()
|
||||
mood_manager.print_mood_status()
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import re
|
||||
import time
|
||||
from random import random
|
||||
from loguru import logger
|
||||
@@ -31,10 +32,10 @@ class ChatBot:
|
||||
self._started = False
|
||||
self.mood_manager = MoodManager.get_instance() # 获取情绪管理器单例
|
||||
self.mood_manager.start_mood_update() # 启动情绪更新
|
||||
|
||||
|
||||
self.emoji_chance = 0.2 # 发送表情包的基础概率
|
||||
# self.message_streams = MessageStreamContainer()
|
||||
|
||||
|
||||
async def _ensure_started(self):
|
||||
"""确保所有任务已启动"""
|
||||
if not self._started:
|
||||
@@ -42,9 +43,9 @@ class ChatBot:
|
||||
|
||||
async def handle_message(self, event: GroupMessageEvent, bot: Bot) -> None:
|
||||
"""处理收到的群消息"""
|
||||
|
||||
|
||||
self.bot = bot # 更新 bot 实例
|
||||
|
||||
|
||||
# group_info = await bot.get_group_info(group_id=event.group_id)
|
||||
# sender_info = await bot.get_group_member_info(group_id=event.group_id, user_id=event.user_id, no_cache=True)
|
||||
|
||||
@@ -96,8 +97,17 @@ class ChatBot:
|
||||
# 过滤词
|
||||
for word in global_config.ban_words:
|
||||
if word in message.processed_plain_text:
|
||||
logger.info(f"\033[1;32m[{groupinfo.group_name}]{userinfo.user_nickname}:\033[0m {message.processed_plain_text}")
|
||||
logger.info(f"\033[1;32m[过滤词识别]\033[0m 消息中含有{word},filtered")
|
||||
logger.info(
|
||||
f"[{groupinfo.group_name}]{userinfo.user_nickname}:{message.processed_plain_text}")
|
||||
logger.info(f"[过滤词识别]消息中含有{word},filtered")
|
||||
return
|
||||
|
||||
# 正则表达式过滤
|
||||
for pattern in global_config.ban_msgs_regex:
|
||||
if re.search(pattern, message.raw_message):
|
||||
logger.info(
|
||||
f"[{message.group_name}]{message.user_nickname}:{message.raw_message}")
|
||||
logger.info(f"[正则表达式过滤]消息匹配到{pattern},filtered")
|
||||
return
|
||||
|
||||
current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(messageinfo.time))
|
||||
@@ -107,8 +117,9 @@ class ChatBot:
|
||||
# topic=await topic_identifier.identify_topic_llm(message.processed_plain_text)
|
||||
topic = ''
|
||||
interested_rate = 0
|
||||
interested_rate = await hippocampus.memory_activate_value(message.processed_plain_text)/100
|
||||
print(f"\033[1;32m[记忆激活]\033[0m 对{message.processed_plain_text}的激活度:---------------------------------------{interested_rate}\n")
|
||||
interested_rate = await hippocampus.memory_activate_value(message.processed_plain_text) / 100
|
||||
logger.debug(f"对{message.processed_plain_text}"
|
||||
f"的激活度:{interested_rate}")
|
||||
# logger.info(f"\033[1;32m[主题识别]\033[0m 使用{global_config.topic_extract}主题: {topic}")
|
||||
|
||||
await self.storage.store_message(message,chat, topic[0] if topic else None)
|
||||
@@ -124,7 +135,10 @@ class ChatBot:
|
||||
)
|
||||
current_willing = willing_manager.get_willing(chat_stream=chat)
|
||||
|
||||
print(f"\033[1;32m[{current_time}][{chat.group_info.group_name}]{chat.user_info.user_nickname}:\033[0m {message.processed_plain_text}\033[1;36m[回复意愿:{current_willing:.2f}][概率:{reply_probability * 100:.1f}%]\033[0m")
|
||||
logger.info(
|
||||
f"[{current_time}][{chat.group_info.group_name}]{chat.user_info.user_nickname}:"
|
||||
f"{message.processed_plain_text}[回复意愿:{current_willing:.2f}][概率:{reply_probability * 100:.1f}%]"
|
||||
)
|
||||
|
||||
response = None
|
||||
|
||||
@@ -159,13 +173,13 @@ class ChatBot:
|
||||
thinking_message = msg
|
||||
container.messages.remove(msg)
|
||||
break
|
||||
|
||||
|
||||
# 如果找不到思考消息,直接返回
|
||||
if not thinking_message:
|
||||
print(f"\033[1;33m[警告]\033[0m 未找到对应的思考消息,可能已超时被移除")
|
||||
logger.warning("未找到对应的思考消息,可能已超时被移除")
|
||||
return
|
||||
|
||||
#记录开始思考的时间,避免从思考到回复的时间太久
|
||||
|
||||
# 记录开始思考的时间,避免从思考到回复的时间太久
|
||||
thinking_start_time = thinking_message.thinking_start_time
|
||||
message_set = MessageSet(chat, think_id)
|
||||
message_set = MessageSet(chat, think_id)
|
||||
@@ -175,7 +189,7 @@ class ChatBot:
|
||||
mark_head = False
|
||||
for msg in response:
|
||||
# print(f"\033[1;32m[回复内容]\033[0m {msg}")
|
||||
#通过时间改变时间戳
|
||||
# 通过时间改变时间戳
|
||||
typing_time = calculate_typing_time(msg)
|
||||
accu_typing_time += typing_time
|
||||
timepoint = tinking_time_point + accu_typing_time
|
||||
@@ -193,19 +207,19 @@ class ChatBot:
|
||||
if not mark_head:
|
||||
mark_head = True
|
||||
message_set.add_message(bot_message)
|
||||
|
||||
#message_set 可以直接加入 message_manager
|
||||
|
||||
# message_set 可以直接加入 message_manager
|
||||
# print(f"\033[1;32m[回复]\033[0m 将回复载入发送容器")
|
||||
message_manager.add_message(message_set)
|
||||
|
||||
|
||||
bot_response_time = tinking_time_point
|
||||
|
||||
if random() < global_config.emoji_chance:
|
||||
emoji_raw = await emoji_manager.get_emoji_for_text(response)
|
||||
|
||||
|
||||
# 检查是否 <没有找到> emoji
|
||||
if emoji_raw != None:
|
||||
emoji_path,discription = emoji_raw
|
||||
emoji_path, description = emoji_raw
|
||||
|
||||
emoji_cq = image_path_to_base64(emoji_path)
|
||||
|
||||
@@ -226,8 +240,8 @@ class ChatBot:
|
||||
)
|
||||
message_manager.add_message(bot_message)
|
||||
emotion = await self.gpt._get_emotion_tags(raw_content)
|
||||
print(f"为 '{response}' 获取到的情感标签为:{emotion}")
|
||||
valuedict={
|
||||
logger.debug(f"为 '{response}' 获取到的情感标签为:{emotion}")
|
||||
valuedict = {
|
||||
'happy': 0.5,
|
||||
'angry': -1,
|
||||
'sad': -0.5,
|
||||
@@ -240,9 +254,10 @@ class ChatBot:
|
||||
# 使用情绪管理器更新情绪
|
||||
self.mood_manager.update_mood_from_emotion(emotion[0], global_config.mood_intensity_factor)
|
||||
|
||||
willing_manager.change_reply_willing_after_sent(
|
||||
chat_stream=chat
|
||||
)
|
||||
# willing_manager.change_reply_willing_after_sent(
|
||||
# chat_stream=chat
|
||||
# )
|
||||
|
||||
|
||||
# 创建全局ChatBot实例
|
||||
chat_bot = ChatBot()
|
||||
chat_bot = ChatBot()
|
||||
|
||||
@@ -1,46 +1,54 @@
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, Optional
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import tomli
|
||||
from loguru import logger
|
||||
from packaging import version
|
||||
from packaging.version import Version, InvalidVersion
|
||||
from packaging.specifiers import SpecifierSet, InvalidSpecifier
|
||||
|
||||
|
||||
@dataclass
|
||||
class BotConfig:
|
||||
"""机器人配置类"""
|
||||
"""机器人配置类"""
|
||||
|
||||
INNER_VERSION: Version = None
|
||||
|
||||
BOT_QQ: Optional[int] = 1
|
||||
BOT_NICKNAME: Optional[str] = None
|
||||
|
||||
BOT_ALIAS_NAMES: List[str] = field(default_factory=list) # 别名,可以通过这个叫它
|
||||
|
||||
# 消息处理相关配置
|
||||
MIN_TEXT_LENGTH: int = 2 # 最小处理文本长度
|
||||
MAX_CONTEXT_SIZE: int = 15 # 上下文最大消息数
|
||||
emoji_chance: float = 0.2 # 发送表情包的基础概率
|
||||
|
||||
|
||||
ENABLE_PIC_TRANSLATE: bool = True # 是否启用图片翻译
|
||||
|
||||
|
||||
talk_allowed_groups = set()
|
||||
talk_frequency_down_groups = set()
|
||||
thinking_timeout: int = 100 # 思考时间
|
||||
|
||||
|
||||
response_willing_amplifier: float = 1.0 # 回复意愿放大系数
|
||||
response_interested_rate_amplifier: float = 1.0 # 回复兴趣度放大系数
|
||||
down_frequency_rate: float = 3.5 # 降低回复频率的群组回复意愿降低系数
|
||||
|
||||
|
||||
ban_user_id = set()
|
||||
|
||||
|
||||
build_memory_interval: int = 30 # 记忆构建间隔(秒)
|
||||
forget_memory_interval: int = 300 # 记忆遗忘间隔(秒)
|
||||
EMOJI_CHECK_INTERVAL: int = 120 # 表情包检查间隔(分钟)
|
||||
EMOJI_REGISTER_INTERVAL: int = 10 # 表情包注册间隔(分钟)
|
||||
EMOJI_SAVE: bool = True # 偷表情包
|
||||
EMOJI_CHECK: bool = False #是否开启过滤
|
||||
EMOJI_CHECK_PROMPT: str = "符合公序良俗" # 表情包过滤要求
|
||||
EMOJI_CHECK: bool = False # 是否开启过滤
|
||||
EMOJI_CHECK_PROMPT: str = "符合公序良俗" # 表情包过滤要求
|
||||
|
||||
ban_words = set()
|
||||
ban_msgs_regex = set()
|
||||
|
||||
max_response_length: int = 1024 # 最大回复长度
|
||||
|
||||
|
||||
# 模型配置
|
||||
llm_reasoning: Dict[str, str] = field(default_factory=lambda: {})
|
||||
llm_reasoning_minor: Dict[str, str] = field(default_factory=lambda: {})
|
||||
@@ -56,176 +64,359 @@ class BotConfig:
|
||||
MODEL_R1_PROBABILITY: float = 0.8 # R1模型概率
|
||||
MODEL_V3_PROBABILITY: float = 0.1 # V3模型概率
|
||||
MODEL_R1_DISTILL_PROBABILITY: float = 0.1 # R1蒸馏模型概率
|
||||
|
||||
|
||||
enable_advance_output: bool = False # 是否启用高级输出
|
||||
enable_kuuki_read: bool = True # 是否启用读空气功能
|
||||
|
||||
mood_update_interval: float = 1.0 # 情绪更新间隔 单位秒
|
||||
mood_decay_rate: float = 0.95 # 情绪衰减率
|
||||
mood_intensity_factor: float = 0.7 # 情绪强度因子
|
||||
enable_kuuki_read: bool = True # 是否启用读空气功能
|
||||
|
||||
mood_update_interval: float = 1.0 # 情绪更新间隔 单位秒
|
||||
mood_decay_rate: float = 0.95 # 情绪衰减率
|
||||
mood_intensity_factor: float = 0.7 # 情绪强度因子
|
||||
|
||||
keywords_reaction_rules = [] # 关键词回复规则
|
||||
|
||||
chinese_typo_enable = True # 是否启用中文错别字生成器
|
||||
chinese_typo_error_rate = 0.03 # 单字替换概率
|
||||
chinese_typo_min_freq = 7 # 最小字频阈值
|
||||
chinese_typo_tone_error_rate = 0.2 # 声调错误概率
|
||||
chinese_typo_word_replace_rate = 0.02 # 整词替换概率
|
||||
|
||||
# 默认人设
|
||||
PROMPT_PERSONALITY=[
|
||||
PROMPT_PERSONALITY = [
|
||||
"曾经是一个学习地质的女大学生,现在学习心理学和脑科学,你会刷贴吧",
|
||||
"是一个女大学生,你有黑色头发,你会刷小红书",
|
||||
"是一个女大学生,你会刷b站,对ACG文化感兴趣"
|
||||
"是一个女大学生,你会刷b站,对ACG文化感兴趣",
|
||||
]
|
||||
PROMPT_SCHEDULE_GEN="一个曾经学习地质,现在学习心理学和脑科学的女大学生,喜欢刷qq,贴吧,知乎和小红书"
|
||||
|
||||
PERSONALITY_1: float = 0.6 # 第一种人格概率
|
||||
PERSONALITY_2: float = 0.3 # 第二种人格概率
|
||||
PERSONALITY_3: float = 0.1 # 第三种人格概率
|
||||
|
||||
|
||||
PROMPT_SCHEDULE_GEN = "一个曾经学习地质,现在学习心理学和脑科学的女大学生,喜欢刷qq,贴吧,知乎和小红书"
|
||||
|
||||
PERSONALITY_1: float = 0.6 # 第一种人格概率
|
||||
PERSONALITY_2: float = 0.3 # 第二种人格概率
|
||||
PERSONALITY_3: float = 0.1 # 第三种人格概率
|
||||
|
||||
memory_ban_words: list = field(
|
||||
default_factory=lambda: ["表情包", "图片", "回复", "聊天记录"]
|
||||
) # 添加新的配置项默认值
|
||||
|
||||
@staticmethod
|
||||
def get_config_dir() -> str:
|
||||
"""获取配置文件目录"""
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
root_dir = os.path.abspath(os.path.join(current_dir, '..', '..', '..'))
|
||||
config_dir = os.path.join(root_dir, 'config')
|
||||
root_dir = os.path.abspath(os.path.join(current_dir, "..", "..", ".."))
|
||||
config_dir = os.path.join(root_dir, "config")
|
||||
if not os.path.exists(config_dir):
|
||||
os.makedirs(config_dir)
|
||||
return config_dir
|
||||
|
||||
|
||||
@classmethod
|
||||
def convert_to_specifierset(cls, value: str) -> SpecifierSet:
|
||||
"""将 字符串 版本表达式转换成 SpecifierSet
|
||||
Args:
|
||||
value[str]: 版本表达式(字符串)
|
||||
Returns:
|
||||
SpecifierSet
|
||||
"""
|
||||
|
||||
try:
|
||||
converted = SpecifierSet(value)
|
||||
except InvalidSpecifier:
|
||||
logger.error(f"{value} 分类使用了错误的版本约束表达式\n", "请阅读 https://semver.org/lang/zh-CN/ 修改代码")
|
||||
exit(1)
|
||||
|
||||
return converted
|
||||
|
||||
@classmethod
|
||||
def get_config_version(cls, toml: dict) -> Version:
|
||||
"""提取配置文件的 SpecifierSet 版本数据
|
||||
Args:
|
||||
toml[dict]: 输入的配置文件字典
|
||||
Returns:
|
||||
Version
|
||||
"""
|
||||
|
||||
if "inner" in toml:
|
||||
try:
|
||||
config_version: str = toml["inner"]["version"]
|
||||
except KeyError as e:
|
||||
logger.error("配置文件中 inner 段 不存在, 这是错误的配置文件")
|
||||
raise KeyError(f"配置文件中 inner 段 不存在 {e}, 这是错误的配置文件") from e
|
||||
else:
|
||||
toml["inner"] = {"version": "0.0.0"}
|
||||
config_version = toml["inner"]["version"]
|
||||
|
||||
try:
|
||||
ver = version.parse(config_version)
|
||||
except InvalidVersion as e:
|
||||
logger.error(
|
||||
"配置文件中 inner段 的 version 键是错误的版本描述\n"
|
||||
"请阅读 https://semver.org/lang/zh-CN/ 修改配置,并参考本项目指定的模板进行修改\n"
|
||||
"本项目在不同的版本下有不同的模板,请注意识别"
|
||||
)
|
||||
raise InvalidVersion("配置文件中 inner段 的 version 键是错误的版本描述\n") from e
|
||||
|
||||
return ver
|
||||
|
||||
@classmethod
|
||||
def load_config(cls, config_path: str = None) -> "BotConfig":
|
||||
"""从TOML配置文件加载配置"""
|
||||
config = cls()
|
||||
|
||||
def personality(parent: dict):
|
||||
personality_config = parent["personality"]
|
||||
personality = personality_config.get("prompt_personality")
|
||||
if len(personality) >= 2:
|
||||
logger.debug(f"载入自定义人格:{personality}")
|
||||
config.PROMPT_PERSONALITY = personality_config.get("prompt_personality", config.PROMPT_PERSONALITY)
|
||||
logger.info(f"载入自定义日程prompt:{personality_config.get('prompt_schedule', config.PROMPT_SCHEDULE_GEN)}")
|
||||
config.PROMPT_SCHEDULE_GEN = personality_config.get("prompt_schedule", config.PROMPT_SCHEDULE_GEN)
|
||||
|
||||
if config.INNER_VERSION in SpecifierSet(">=0.0.2"):
|
||||
config.PERSONALITY_1 = personality_config.get("personality_1_probability", config.PERSONALITY_1)
|
||||
config.PERSONALITY_2 = personality_config.get("personality_2_probability", config.PERSONALITY_2)
|
||||
config.PERSONALITY_3 = personality_config.get("personality_3_probability", config.PERSONALITY_3)
|
||||
|
||||
def emoji(parent: dict):
|
||||
emoji_config = parent["emoji"]
|
||||
config.EMOJI_CHECK_INTERVAL = emoji_config.get("check_interval", config.EMOJI_CHECK_INTERVAL)
|
||||
config.EMOJI_REGISTER_INTERVAL = emoji_config.get("register_interval", config.EMOJI_REGISTER_INTERVAL)
|
||||
config.EMOJI_CHECK_PROMPT = emoji_config.get("check_prompt", config.EMOJI_CHECK_PROMPT)
|
||||
config.EMOJI_SAVE = emoji_config.get("auto_save", config.EMOJI_SAVE)
|
||||
config.EMOJI_CHECK = emoji_config.get("enable_check", config.EMOJI_CHECK)
|
||||
|
||||
def cq_code(parent: dict):
|
||||
cq_code_config = parent["cq_code"]
|
||||
config.ENABLE_PIC_TRANSLATE = cq_code_config.get("enable_pic_translate", config.ENABLE_PIC_TRANSLATE)
|
||||
|
||||
def bot(parent: dict):
|
||||
# 机器人基础配置
|
||||
bot_config = parent["bot"]
|
||||
bot_qq = bot_config.get("qq")
|
||||
config.BOT_QQ = int(bot_qq)
|
||||
config.BOT_NICKNAME = bot_config.get("nickname", config.BOT_NICKNAME)
|
||||
|
||||
if config.INNER_VERSION in SpecifierSet(">=0.0.5"):
|
||||
config.BOT_ALIAS_NAMES = bot_config.get("alias_names", config.BOT_ALIAS_NAMES)
|
||||
|
||||
def response(parent: dict):
|
||||
response_config = parent["response"]
|
||||
config.MODEL_R1_PROBABILITY = response_config.get("model_r1_probability", config.MODEL_R1_PROBABILITY)
|
||||
config.MODEL_V3_PROBABILITY = response_config.get("model_v3_probability", config.MODEL_V3_PROBABILITY)
|
||||
config.MODEL_R1_DISTILL_PROBABILITY = response_config.get(
|
||||
"model_r1_distill_probability", config.MODEL_R1_DISTILL_PROBABILITY
|
||||
)
|
||||
config.max_response_length = response_config.get("max_response_length", config.max_response_length)
|
||||
|
||||
def model(parent: dict):
|
||||
# 加载模型配置
|
||||
model_config: dict = parent["model"]
|
||||
|
||||
config_list = [
|
||||
"llm_reasoning",
|
||||
"llm_reasoning_minor",
|
||||
"llm_normal",
|
||||
"llm_normal_minor",
|
||||
"llm_topic_judge",
|
||||
"llm_summary_by_topic",
|
||||
"llm_emotion_judge",
|
||||
"vlm",
|
||||
"embedding",
|
||||
"moderation",
|
||||
]
|
||||
|
||||
for item in config_list:
|
||||
if item in model_config:
|
||||
cfg_item: dict = model_config[item]
|
||||
|
||||
# base_url 的例子: SILICONFLOW_BASE_URL
|
||||
# key 的例子: SILICONFLOW_KEY
|
||||
cfg_target = {"name": "", "base_url": "", "key": "", "pri_in": 0, "pri_out": 0}
|
||||
|
||||
if config.INNER_VERSION in SpecifierSet("<=0.0.0"):
|
||||
cfg_target = cfg_item
|
||||
|
||||
elif config.INNER_VERSION in SpecifierSet(">=0.0.1"):
|
||||
stable_item = ["name", "pri_in", "pri_out"]
|
||||
pricing_item = ["pri_in", "pri_out"]
|
||||
# 从配置中原始拷贝稳定字段
|
||||
for i in stable_item:
|
||||
# 如果 字段 属于计费项 且获取不到,那默认值是 0
|
||||
if i in pricing_item and i not in cfg_item:
|
||||
cfg_target[i] = 0
|
||||
else:
|
||||
# 没有特殊情况则原样复制
|
||||
try:
|
||||
cfg_target[i] = cfg_item[i]
|
||||
except KeyError as e:
|
||||
logger.error(f"{item} 中的必要字段不存在,请检查")
|
||||
raise KeyError(f"{item} 中的必要字段 {e} 不存在,请检查") from e
|
||||
|
||||
provider = cfg_item.get("provider")
|
||||
if provider is None:
|
||||
logger.error(f"provider 字段在模型配置 {item} 中不存在,请检查")
|
||||
raise KeyError(f"provider 字段在模型配置 {item} 中不存在,请检查")
|
||||
|
||||
cfg_target["base_url"] = f"{provider}_BASE_URL"
|
||||
cfg_target["key"] = f"{provider}_KEY"
|
||||
|
||||
# 如果 列表中的项目在 model_config 中,利用反射来设置对应项目
|
||||
setattr(config, item, cfg_target)
|
||||
else:
|
||||
logger.error(f"模型 {item} 在config中不存在,请检查")
|
||||
raise KeyError(f"模型 {item} 在config中不存在,请检查")
|
||||
|
||||
def message(parent: dict):
|
||||
msg_config = parent["message"]
|
||||
config.MIN_TEXT_LENGTH = msg_config.get("min_text_length", config.MIN_TEXT_LENGTH)
|
||||
config.MAX_CONTEXT_SIZE = msg_config.get("max_context_size", config.MAX_CONTEXT_SIZE)
|
||||
config.emoji_chance = msg_config.get("emoji_chance", config.emoji_chance)
|
||||
config.ban_words = msg_config.get("ban_words", config.ban_words)
|
||||
|
||||
if config.INNER_VERSION in SpecifierSet(">=0.0.2"):
|
||||
config.thinking_timeout = msg_config.get("thinking_timeout", config.thinking_timeout)
|
||||
config.response_willing_amplifier = msg_config.get(
|
||||
"response_willing_amplifier", config.response_willing_amplifier
|
||||
)
|
||||
config.response_interested_rate_amplifier = msg_config.get(
|
||||
"response_interested_rate_amplifier", config.response_interested_rate_amplifier
|
||||
)
|
||||
config.down_frequency_rate = msg_config.get("down_frequency_rate", config.down_frequency_rate)
|
||||
|
||||
if config.INNER_VERSION in SpecifierSet(">=0.0.6"):
|
||||
config.ban_msgs_regex = msg_config.get("ban_msgs_regex", config.ban_msgs_regex)
|
||||
|
||||
def memory(parent: dict):
|
||||
memory_config = parent["memory"]
|
||||
config.build_memory_interval = memory_config.get("build_memory_interval", config.build_memory_interval)
|
||||
config.forget_memory_interval = memory_config.get("forget_memory_interval", config.forget_memory_interval)
|
||||
|
||||
# 在版本 >= 0.0.4 时才处理新增的配置项
|
||||
if config.INNER_VERSION in SpecifierSet(">=0.0.4"):
|
||||
config.memory_ban_words = set(memory_config.get("memory_ban_words", []))
|
||||
|
||||
def mood(parent: dict):
|
||||
mood_config = parent["mood"]
|
||||
config.mood_update_interval = mood_config.get("mood_update_interval", config.mood_update_interval)
|
||||
config.mood_decay_rate = mood_config.get("mood_decay_rate", config.mood_decay_rate)
|
||||
config.mood_intensity_factor = mood_config.get("mood_intensity_factor", config.mood_intensity_factor)
|
||||
|
||||
def keywords_reaction(parent: dict):
|
||||
keywords_reaction_config = parent["keywords_reaction"]
|
||||
if keywords_reaction_config.get("enable", False):
|
||||
config.keywords_reaction_rules = keywords_reaction_config.get("rules", config.keywords_reaction_rules)
|
||||
|
||||
def chinese_typo(parent: dict):
|
||||
chinese_typo_config = parent["chinese_typo"]
|
||||
config.chinese_typo_enable = chinese_typo_config.get("enable", config.chinese_typo_enable)
|
||||
config.chinese_typo_error_rate = chinese_typo_config.get("error_rate", config.chinese_typo_error_rate)
|
||||
config.chinese_typo_min_freq = chinese_typo_config.get("min_freq", config.chinese_typo_min_freq)
|
||||
config.chinese_typo_tone_error_rate = chinese_typo_config.get(
|
||||
"tone_error_rate", config.chinese_typo_tone_error_rate
|
||||
)
|
||||
config.chinese_typo_word_replace_rate = chinese_typo_config.get(
|
||||
"word_replace_rate", config.chinese_typo_word_replace_rate
|
||||
)
|
||||
|
||||
def groups(parent: dict):
|
||||
groups_config = parent["groups"]
|
||||
config.talk_allowed_groups = set(groups_config.get("talk_allowed", []))
|
||||
config.talk_frequency_down_groups = set(groups_config.get("talk_frequency_down", []))
|
||||
config.ban_user_id = set(groups_config.get("ban_user_id", []))
|
||||
|
||||
def others(parent: dict):
|
||||
others_config = parent["others"]
|
||||
config.enable_advance_output = others_config.get("enable_advance_output", config.enable_advance_output)
|
||||
config.enable_kuuki_read = others_config.get("enable_kuuki_read", config.enable_kuuki_read)
|
||||
|
||||
# 版本表达式:>=1.0.0,<2.0.0
|
||||
# 允许字段:func: method, support: str, notice: str, necessary: bool
|
||||
# 如果使用 notice 字段,在该组配置加载时,会展示该字段对用户的警示
|
||||
# 例如:"notice": "personality 将在 1.3.2 后被移除",那么在有效版本中的用户就会虽然可以
|
||||
# 正常执行程序,但是会看到这条自定义提示
|
||||
include_configs = {
|
||||
"personality": {"func": personality, "support": ">=0.0.0"},
|
||||
"emoji": {"func": emoji, "support": ">=0.0.0"},
|
||||
"cq_code": {"func": cq_code, "support": ">=0.0.0"},
|
||||
"bot": {"func": bot, "support": ">=0.0.0"},
|
||||
"response": {"func": response, "support": ">=0.0.0"},
|
||||
"model": {"func": model, "support": ">=0.0.0"},
|
||||
"message": {"func": message, "support": ">=0.0.0"},
|
||||
"memory": {"func": memory, "support": ">=0.0.0", "necessary": False},
|
||||
"mood": {"func": mood, "support": ">=0.0.0"},
|
||||
"keywords_reaction": {"func": keywords_reaction, "support": ">=0.0.2", "necessary": False},
|
||||
"chinese_typo": {"func": chinese_typo, "support": ">=0.0.3", "necessary": False},
|
||||
"groups": {"func": groups, "support": ">=0.0.0"},
|
||||
"others": {"func": others, "support": ">=0.0.0"},
|
||||
}
|
||||
|
||||
# 原地修改,将 字符串版本表达式 转换成 版本对象
|
||||
for key in include_configs:
|
||||
item_support = include_configs[key]["support"]
|
||||
include_configs[key]["support"] = cls.convert_to_specifierset(item_support)
|
||||
|
||||
if os.path.exists(config_path):
|
||||
with open(config_path, "rb") as f:
|
||||
try:
|
||||
toml_dict = tomli.load(f)
|
||||
except(tomli.TOMLDecodeError) as e:
|
||||
except tomli.TOMLDecodeError as e:
|
||||
logger.critical(f"配置文件bot_config.toml填写有误,请检查第{e.lineno}行第{e.colno}处:{e.msg}")
|
||||
exit(1)
|
||||
|
||||
if 'personality' in toml_dict:
|
||||
personality_config=toml_dict['personality']
|
||||
personality=personality_config.get('prompt_personality')
|
||||
if len(personality) >= 2:
|
||||
logger.info(f"载入自定义人格:{personality}")
|
||||
config.PROMPT_PERSONALITY=personality_config.get('prompt_personality',config.PROMPT_PERSONALITY)
|
||||
logger.info(f"载入自定义日程prompt:{personality_config.get('prompt_schedule',config.PROMPT_SCHEDULE_GEN)}")
|
||||
config.PROMPT_SCHEDULE_GEN=personality_config.get('prompt_schedule',config.PROMPT_SCHEDULE_GEN)
|
||||
config.PERSONALITY_1=personality_config.get('personality_1_probability',config.PERSONALITY_1)
|
||||
config.PERSONALITY_2=personality_config.get('personality_2_probability',config.PERSONALITY_2)
|
||||
config.PERSONALITY_3=personality_config.get('personality_3_probability',config.PERSONALITY_3)
|
||||
|
||||
if "emoji" in toml_dict:
|
||||
emoji_config = toml_dict["emoji"]
|
||||
config.EMOJI_CHECK_INTERVAL = emoji_config.get("check_interval", config.EMOJI_CHECK_INTERVAL)
|
||||
config.EMOJI_REGISTER_INTERVAL = emoji_config.get("register_interval", config.EMOJI_REGISTER_INTERVAL)
|
||||
config.EMOJI_CHECK_PROMPT = emoji_config.get('check_prompt',config.EMOJI_CHECK_PROMPT)
|
||||
config.EMOJI_SAVE = emoji_config.get('auto_save',config.EMOJI_SAVE)
|
||||
config.EMOJI_CHECK = emoji_config.get('enable_check',config.EMOJI_CHECK)
|
||||
|
||||
if "cq_code" in toml_dict:
|
||||
cq_code_config = toml_dict["cq_code"]
|
||||
config.ENABLE_PIC_TRANSLATE = cq_code_config.get("enable_pic_translate", config.ENABLE_PIC_TRANSLATE)
|
||||
|
||||
# 机器人基础配置
|
||||
if "bot" in toml_dict:
|
||||
bot_config = toml_dict["bot"]
|
||||
bot_qq = bot_config.get("qq")
|
||||
config.BOT_QQ = int(bot_qq)
|
||||
config.BOT_NICKNAME = bot_config.get("nickname", config.BOT_NICKNAME)
|
||||
|
||||
if "response" in toml_dict:
|
||||
response_config = toml_dict["response"]
|
||||
config.MODEL_R1_PROBABILITY = response_config.get("model_r1_probability", config.MODEL_R1_PROBABILITY)
|
||||
config.MODEL_V3_PROBABILITY = response_config.get("model_v3_probability", config.MODEL_V3_PROBABILITY)
|
||||
config.MODEL_R1_DISTILL_PROBABILITY = response_config.get("model_r1_distill_probability", config.MODEL_R1_DISTILL_PROBABILITY)
|
||||
config.max_response_length = response_config.get("max_response_length", config.max_response_length)
|
||||
|
||||
# 加载模型配置
|
||||
if "model" in toml_dict:
|
||||
model_config = toml_dict["model"]
|
||||
|
||||
if "llm_reasoning" in model_config:
|
||||
config.llm_reasoning = model_config["llm_reasoning"]
|
||||
|
||||
if "llm_reasoning_minor" in model_config:
|
||||
config.llm_reasoning_minor = model_config["llm_reasoning_minor"]
|
||||
|
||||
if "llm_normal" in model_config:
|
||||
config.llm_normal = model_config["llm_normal"]
|
||||
|
||||
if "llm_normal_minor" in model_config:
|
||||
config.llm_normal_minor = model_config["llm_normal_minor"]
|
||||
|
||||
if "llm_topic_judge" in model_config:
|
||||
config.llm_topic_judge = model_config["llm_topic_judge"]
|
||||
|
||||
if "llm_summary_by_topic" in model_config:
|
||||
config.llm_summary_by_topic = model_config["llm_summary_by_topic"]
|
||||
|
||||
if "llm_emotion_judge" in model_config:
|
||||
config.llm_emotion_judge = model_config["llm_emotion_judge"]
|
||||
|
||||
if "vlm" in model_config:
|
||||
config.vlm = model_config["vlm"]
|
||||
|
||||
if "embedding" in model_config:
|
||||
config.embedding = model_config["embedding"]
|
||||
|
||||
if "moderation" in model_config:
|
||||
config.moderation = model_config["moderation"]
|
||||
|
||||
# 消息配置
|
||||
if "message" in toml_dict:
|
||||
msg_config = toml_dict["message"]
|
||||
config.MIN_TEXT_LENGTH = msg_config.get("min_text_length", config.MIN_TEXT_LENGTH)
|
||||
config.MAX_CONTEXT_SIZE = msg_config.get("max_context_size", config.MAX_CONTEXT_SIZE)
|
||||
config.emoji_chance = msg_config.get("emoji_chance", config.emoji_chance)
|
||||
config.ban_words=msg_config.get("ban_words",config.ban_words)
|
||||
config.thinking_timeout = msg_config.get("thinking_timeout", config.thinking_timeout)
|
||||
config.response_willing_amplifier = msg_config.get("response_willing_amplifier", config.response_willing_amplifier)
|
||||
config.response_interested_rate_amplifier = msg_config.get("response_interested_rate_amplifier", config.response_interested_rate_amplifier)
|
||||
config.down_frequency_rate = msg_config.get("down_frequency_rate", config.down_frequency_rate)
|
||||
# 获取配置文件版本
|
||||
config.INNER_VERSION = cls.get_config_version(toml_dict)
|
||||
|
||||
# 如果在配置中找到了需要的项,调用对应项的闭包函数处理
|
||||
for key in include_configs:
|
||||
if key in toml_dict:
|
||||
group_specifierset: SpecifierSet = include_configs[key]["support"]
|
||||
|
||||
# 检查配置文件版本是否在支持范围内
|
||||
if config.INNER_VERSION in group_specifierset:
|
||||
# 如果版本在支持范围内,检查是否存在通知
|
||||
if "notice" in include_configs[key]:
|
||||
logger.warning(include_configs[key]["notice"])
|
||||
|
||||
include_configs[key]["func"](toml_dict)
|
||||
|
||||
else:
|
||||
# 如果版本不在支持范围内,崩溃并提示用户
|
||||
logger.error(
|
||||
f"配置文件中的 '{key}' 字段的版本 ({config.INNER_VERSION}) 不在支持范围内。\n"
|
||||
f"当前程序仅支持以下版本范围: {group_specifierset}"
|
||||
)
|
||||
raise InvalidVersion(f"当前程序仅支持以下版本范围: {group_specifierset}")
|
||||
|
||||
# 如果 necessary 项目存在,而且显式声明是 False,进入特殊处理
|
||||
elif "necessary" in include_configs[key] and include_configs[key].get("necessary") is False:
|
||||
# 通过 pass 处理的项虽然直接忽略也是可以的,但是为了不增加理解困难,依然需要在这里显式处理
|
||||
if key == "keywords_reaction":
|
||||
pass
|
||||
|
||||
else:
|
||||
# 如果用户根本没有需要的配置项,提示缺少配置
|
||||
logger.error(f"配置文件中缺少必需的字段: '{key}'")
|
||||
raise KeyError(f"配置文件中缺少必需的字段: '{key}'")
|
||||
|
||||
logger.success(f"成功加载配置文件: {config_path}")
|
||||
|
||||
return config
|
||||
|
||||
|
||||
if "memory" in toml_dict:
|
||||
memory_config = toml_dict["memory"]
|
||||
config.build_memory_interval = memory_config.get("build_memory_interval", config.build_memory_interval)
|
||||
config.forget_memory_interval = memory_config.get("forget_memory_interval", config.forget_memory_interval)
|
||||
|
||||
if "mood" in toml_dict:
|
||||
mood_config = toml_dict["mood"]
|
||||
config.mood_update_interval = mood_config.get("mood_update_interval", config.mood_update_interval)
|
||||
config.mood_decay_rate = mood_config.get("mood_decay_rate", config.mood_decay_rate)
|
||||
config.mood_intensity_factor = mood_config.get("mood_intensity_factor", config.mood_intensity_factor)
|
||||
|
||||
# 群组配置
|
||||
if "groups" in toml_dict:
|
||||
groups_config = toml_dict["groups"]
|
||||
config.talk_allowed_groups = set(groups_config.get("talk_allowed", []))
|
||||
config.talk_frequency_down_groups = set(groups_config.get("talk_frequency_down", []))
|
||||
config.ban_user_id = set(groups_config.get("ban_user_id", []))
|
||||
|
||||
if "others" in toml_dict:
|
||||
others_config = toml_dict["others"]
|
||||
config.enable_advance_output = others_config.get("enable_advance_output", config.enable_advance_output)
|
||||
config.enable_kuuki_read = others_config.get("enable_kuuki_read", config.enable_kuuki_read)
|
||||
|
||||
logger.success(f"成功加载配置文件: {config_path}")
|
||||
|
||||
return config
|
||||
|
||||
# 获取配置文件路径
|
||||
|
||||
bot_config_floder_path = BotConfig.get_config_dir()
|
||||
print(f"正在品鉴配置文件目录: {bot_config_floder_path}")
|
||||
logger.debug(f"正在品鉴配置文件目录: {bot_config_floder_path}")
|
||||
|
||||
bot_config_path = os.path.join(bot_config_floder_path, "bot_config.toml")
|
||||
|
||||
if os.path.exists(bot_config_path):
|
||||
# 如果开发环境配置文件不存在,则使用默认配置文件
|
||||
print(f"异常的新鲜,异常的美味: {bot_config_path}")
|
||||
logger.debug(f"异常的新鲜,异常的美味: {bot_config_path}")
|
||||
logger.info("使用bot配置文件")
|
||||
else:
|
||||
logger.info("没有找到美味")
|
||||
# 配置文件不存在
|
||||
logger.error("配置文件不存在,请检查路径: {bot_config_path}")
|
||||
raise FileNotFoundError(f"配置文件不存在: {bot_config_path}")
|
||||
|
||||
global_config = BotConfig.load_config(config_path=bot_config_path)
|
||||
|
||||
|
||||
if not global_config.enable_advance_output:
|
||||
logger.remove()
|
||||
pass
|
||||
|
||||
|
||||
@@ -170,11 +170,11 @@ class CQCode:
|
||||
|
||||
except (requests.exceptions.SSLError, requests.exceptions.HTTPError) as e:
|
||||
if retry == max_retries - 1:
|
||||
print(f"\033[1;31m[致命错误]\033[0m 最终请求失败: {str(e)}")
|
||||
logger.error(f"最终请求失败: {str(e)}")
|
||||
time.sleep(1.5**retry) # 指数退避
|
||||
|
||||
except Exception as e:
|
||||
print(f"\033[1;33m[未知错误]\033[0m {str(e)}")
|
||||
except Exception:
|
||||
logger.exception("[未知错误]")
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
@@ -24,24 +24,26 @@ image_manager = ImageManager()
|
||||
class EmojiManager:
|
||||
_instance = None
|
||||
EMOJI_DIR = "data/emoji" # 表情包存储目录
|
||||
|
||||
|
||||
def __new__(cls):
|
||||
if cls._instance is None:
|
||||
cls._instance = super().__new__(cls)
|
||||
cls._instance.db = None
|
||||
cls._instance._initialized = False
|
||||
return cls._instance
|
||||
|
||||
|
||||
def __init__(self):
|
||||
self.db = Database.get_instance()
|
||||
self._scan_task = None
|
||||
self.vlm = LLM_request(model=global_config.vlm, temperature=0.3, max_tokens=1000)
|
||||
self.llm_emotion_judge = LLM_request(model=global_config.llm_normal_minor, max_tokens=60,temperature=0.8) #更高的温度,更少的token(后续可以根据情绪来调整温度)
|
||||
|
||||
self.llm_emotion_judge = LLM_request(model=global_config.llm_normal_minor, max_tokens=60,
|
||||
temperature=0.8) # 更高的温度,更少的token(后续可以根据情绪来调整温度)
|
||||
|
||||
|
||||
def _ensure_emoji_dir(self):
|
||||
"""确保表情存储目录存在"""
|
||||
os.makedirs(self.EMOJI_DIR, exist_ok=True)
|
||||
|
||||
|
||||
def initialize(self):
|
||||
"""初始化数据库连接和表情目录"""
|
||||
if not self._initialized:
|
||||
@@ -52,16 +54,16 @@ class EmojiManager:
|
||||
self._initialized = True
|
||||
# 启动时执行一次完整性检查
|
||||
self.check_emoji_file_integrity()
|
||||
except Exception as e:
|
||||
logger.error(f"初始化表情管理器失败: {str(e)}")
|
||||
|
||||
except Exception:
|
||||
logger.exception("初始化表情管理器失败")
|
||||
|
||||
def _ensure_db(self):
|
||||
"""确保数据库已初始化"""
|
||||
if not self._initialized:
|
||||
self.initialize()
|
||||
if not self._initialized:
|
||||
raise RuntimeError("EmojiManager not initialized")
|
||||
|
||||
|
||||
def _ensure_emoji_collection(self):
|
||||
"""确保emoji集合存在并创建索引
|
||||
|
||||
@@ -78,7 +80,7 @@ class EmojiManager:
|
||||
self.db.db.create_collection('emoji')
|
||||
self.db.db.emoji.create_index([('embedding', '2dsphere')])
|
||||
self.db.db.emoji.create_index([('filename', 1)], unique=True)
|
||||
|
||||
|
||||
def record_usage(self, emoji_id: str):
|
||||
"""记录表情使用次数"""
|
||||
try:
|
||||
@@ -104,9 +106,9 @@ class EmojiManager:
|
||||
"""
|
||||
try:
|
||||
self._ensure_db()
|
||||
|
||||
|
||||
# 获取文本的embedding
|
||||
text_for_search= await self._get_kimoji_for_text(text)
|
||||
text_for_search = await self._get_kimoji_for_text(text)
|
||||
if not text_for_search:
|
||||
logger.error("无法获取文本的情绪")
|
||||
return None
|
||||
@@ -114,15 +116,15 @@ class EmojiManager:
|
||||
if not text_embedding:
|
||||
logger.error("无法获取文本的embedding")
|
||||
return None
|
||||
|
||||
|
||||
try:
|
||||
# 获取所有表情包
|
||||
all_emojis = list(self.db.db.emoji.find({}, {'_id': 1, 'path': 1, 'embedding': 1, 'discription': 1}))
|
||||
|
||||
all_emojis = list(self.db.db.emoji.find({}, {'_id': 1, 'path': 1, 'embedding': 1, 'description': 1}))
|
||||
|
||||
if not all_emojis:
|
||||
logger.warning("数据库中没有任何表情包")
|
||||
return None
|
||||
|
||||
|
||||
# 计算余弦相似度并排序
|
||||
def cosine_similarity(v1, v2):
|
||||
if not v1 or not v2:
|
||||
@@ -133,23 +135,23 @@ class EmojiManager:
|
||||
if norm_v1 == 0 or norm_v2 == 0:
|
||||
return 0
|
||||
return dot_product / (norm_v1 * norm_v2)
|
||||
|
||||
|
||||
# 计算所有表情包与输入文本的相似度
|
||||
emoji_similarities = [
|
||||
(emoji, cosine_similarity(text_embedding, emoji.get('embedding', [])))
|
||||
for emoji in all_emojis
|
||||
]
|
||||
|
||||
|
||||
# 按相似度降序排序
|
||||
emoji_similarities.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
|
||||
# 获取前3个最相似的表情包
|
||||
top_10_emojis = emoji_similarities[:10 if len(emoji_similarities) > 10 else len(emoji_similarities)]
|
||||
|
||||
if not top_10_emojis:
|
||||
logger.warning("未找到匹配的表情包")
|
||||
return None
|
||||
|
||||
|
||||
# 从前3个中随机选择一个
|
||||
selected_emoji, similarity = random.choice(top_10_emojis)
|
||||
|
||||
@@ -159,16 +161,17 @@ class EmojiManager:
|
||||
{'_id': selected_emoji['_id']},
|
||||
{'$inc': {'usage_count': 1}}
|
||||
)
|
||||
logger.success(f"找到匹配的表情包: {selected_emoji.get('discription', '无描述')} (相似度: {similarity:.4f})")
|
||||
logger.success(
|
||||
f"找到匹配的表情包: {selected_emoji.get('description', '无描述')} (相似度: {similarity:.4f})")
|
||||
# 稍微改一下文本描述,不然容易产生幻觉,描述已经包含 表情包 了
|
||||
return selected_emoji['path'],"[ %s ]" % selected_emoji.get('discription', '无描述')
|
||||
|
||||
return selected_emoji['path'], "[ %s ]" % selected_emoji.get('description', '无描述')
|
||||
|
||||
except Exception as search_error:
|
||||
logger.error(f"搜索表情包失败: {str(search_error)}")
|
||||
return None
|
||||
|
||||
|
||||
return None
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取表情包失败: {str(e)}")
|
||||
return None
|
||||
@@ -185,31 +188,31 @@ class EmojiManager:
|
||||
except Exception as e:
|
||||
logger.error(f"获取标签失败: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
async def _check_emoji(self, image_base64: str) -> str:
|
||||
try:
|
||||
prompt = f'这是一个表情包,请回答这个表情包是否满足\"{global_config.EMOJI_CHECK_PROMPT}\"的要求,是则回答是,否则回答否,不要出现任何其他内容'
|
||||
|
||||
|
||||
content, _ = await self.vlm.generate_response_for_image(prompt, image_base64)
|
||||
logger.debug(f"输出描述: {content}")
|
||||
return content
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取标签失败: {str(e)}")
|
||||
return None
|
||||
|
||||
async def _get_kimoji_for_text(self, text:str):
|
||||
|
||||
async def _get_kimoji_for_text(self, text: str):
|
||||
try:
|
||||
prompt = f'这是{global_config.BOT_NICKNAME}将要发送的消息内容:\n{text}\n若要为其配上表情包,请你输出这个表情包应该表达怎样的情感,应该给人什么样的感觉,不要太简洁也不要太长,注意不要输出任何对消息内容的分析内容,只输出\"一种什么样的感觉\"中间的形容词部分。'
|
||||
|
||||
content, _ = await self.llm_emotion_judge.generate_response_async(prompt)
|
||||
|
||||
content, _ = await self.llm_emotion_judge.generate_response_async(prompt,temperature=1.5)
|
||||
logger.info(f"输出描述: {content}")
|
||||
return content
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取标签失败: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
async def scan_new_emojis(self):
|
||||
"""扫描新的表情包"""
|
||||
try:
|
||||
@@ -217,8 +220,9 @@ class EmojiManager:
|
||||
os.makedirs(emoji_dir, exist_ok=True)
|
||||
|
||||
# 获取所有支持的图片文件
|
||||
files_to_process = [f for f in os.listdir(emoji_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png', '.gif'))]
|
||||
|
||||
files_to_process = [f for f in os.listdir(emoji_dir) if
|
||||
f.lower().endswith(('.jpg', '.jpeg', '.png', '.gif'))]
|
||||
|
||||
for filename in files_to_process:
|
||||
image_path = os.path.join(emoji_dir, filename)
|
||||
|
||||
@@ -273,10 +277,14 @@ class EmojiManager:
|
||||
if '是' not in check:
|
||||
os.remove(image_path)
|
||||
logger.info(f"描述: {description}")
|
||||
logger.info(f"描述: {description}")
|
||||
logger.info(f"其不满足过滤规则,被剔除 {check}")
|
||||
continue
|
||||
logger.info(f"check通过 {check}")
|
||||
|
||||
if description is not None:
|
||||
embedding = await get_embedding(description)
|
||||
|
||||
if description is not None:
|
||||
embedding = await get_embedding(description)
|
||||
# 准备数据库记录
|
||||
@@ -312,19 +320,17 @@ class EmojiManager:
|
||||
logger.success(f"同步保存到images集合: {filename}")
|
||||
else:
|
||||
logger.warning(f"跳过表情包: {filename}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"扫描表情包失败: {str(e)}")
|
||||
logger.error(traceback.format_exc())
|
||||
|
||||
|
||||
except Exception:
|
||||
logger.exception("扫描表情包失败")
|
||||
|
||||
async def _periodic_scan(self, interval_MINS: int = 10):
|
||||
"""定期扫描新表情包"""
|
||||
while True:
|
||||
print("\033[1;36m[表情包]\033[0m 开始扫描新表情包...")
|
||||
logger.info("开始扫描新表情包...")
|
||||
await self.scan_new_emojis()
|
||||
await asyncio.sleep(interval_MINS * 60) # 每600秒扫描一次
|
||||
|
||||
|
||||
def check_emoji_file_integrity(self):
|
||||
"""检查表情包文件完整性
|
||||
如果文件已被删除,则从数据库中移除对应记录
|
||||
@@ -335,7 +341,7 @@ class EmojiManager:
|
||||
all_emojis = list(self.db.db.emoji.find())
|
||||
removed_count = 0
|
||||
total_count = len(all_emojis)
|
||||
|
||||
|
||||
for emoji in all_emojis:
|
||||
try:
|
||||
if 'path' not in emoji:
|
||||
@@ -343,27 +349,27 @@ class EmojiManager:
|
||||
self.db.db.emoji.delete_one({'_id': emoji['_id']})
|
||||
removed_count += 1
|
||||
continue
|
||||
|
||||
|
||||
if 'embedding' not in emoji:
|
||||
logger.warning(f"发现过时记录(缺少embedding字段),ID: {emoji.get('_id', 'unknown')}")
|
||||
self.db.db.emoji.delete_one({'_id': emoji['_id']})
|
||||
removed_count += 1
|
||||
continue
|
||||
|
||||
|
||||
# 检查文件是否存在
|
||||
if not os.path.exists(emoji['path']):
|
||||
logger.warning(f"表情包文件已被删除: {emoji['path']}")
|
||||
# 从数据库中删除记录
|
||||
result = self.db.db.emoji.delete_one({'_id': emoji['_id']})
|
||||
if result.deleted_count > 0:
|
||||
logger.success(f"成功删除数据库记录: {emoji['_id']}")
|
||||
logger.debug(f"成功删除数据库记录: {emoji['_id']}")
|
||||
removed_count += 1
|
||||
else:
|
||||
logger.error(f"删除数据库记录失败: {emoji['_id']}")
|
||||
except Exception as item_error:
|
||||
logger.error(f"处理表情包记录时出错: {str(item_error)}")
|
||||
continue
|
||||
|
||||
|
||||
# 验证清理结果
|
||||
remaining_count = self.db.db.emoji.count_documents({})
|
||||
if removed_count > 0:
|
||||
@@ -371,7 +377,7 @@ class EmojiManager:
|
||||
logger.info(f"清理前总数: {total_count} | 清理后总数: {remaining_count}")
|
||||
else:
|
||||
logger.info(f"已检查 {total_count} 个表情包记录")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"检查表情包完整性失败: {str(e)}")
|
||||
logger.error(traceback.format_exc())
|
||||
@@ -382,6 +388,6 @@ class EmojiManager:
|
||||
await asyncio.sleep(interval_MINS * 60)
|
||||
|
||||
|
||||
|
||||
# 创建全局单例
|
||||
emoji_manager = EmojiManager()
|
||||
emoji_manager = EmojiManager()
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ import time
|
||||
from typing import List, Optional, Tuple, Union
|
||||
|
||||
from nonebot import get_driver
|
||||
from loguru import logger
|
||||
|
||||
from ...common.database import Database
|
||||
from ..models.utils_model import LLM_request
|
||||
@@ -55,9 +56,7 @@ class ResponseGenerator:
|
||||
self.current_model_type = "r1_distill"
|
||||
current_model = self.model_r1_distill
|
||||
|
||||
print(
|
||||
f"+++++++++++++++++{global_config.BOT_NICKNAME}{self.current_model_type}思考中+++++++++++++++++"
|
||||
)
|
||||
logger.info(f"{global_config.BOT_NICKNAME}{self.current_model_type}思考中")
|
||||
|
||||
model_response = await self._generate_response_with_model(
|
||||
message, current_model
|
||||
@@ -65,7 +64,7 @@ class ResponseGenerator:
|
||||
raw_content = model_response
|
||||
|
||||
if model_response:
|
||||
print(f"{global_config.BOT_NICKNAME}的回复是:{model_response}")
|
||||
logger.info(f'{global_config.BOT_NICKNAME}的回复是:{model_response}')
|
||||
model_response = await self._process_response(model_response)
|
||||
if model_response:
|
||||
return model_response, raw_content
|
||||
@@ -122,8 +121,8 @@ class ResponseGenerator:
|
||||
# 生成回复
|
||||
try:
|
||||
content, reasoning_content = await model.generate_response(prompt)
|
||||
except Exception as e:
|
||||
print(f"生成回复时出错: {e}")
|
||||
except Exception:
|
||||
logger.exception("生成回复时出错")
|
||||
return None
|
||||
|
||||
# 保存到数据库
|
||||
@@ -219,7 +218,7 @@ class InitiativeMessageGenerate:
|
||||
prompt_builder._build_initiative_prompt_select(message.group_id)
|
||||
)
|
||||
content_select, reasoning = self.model_v3.generate_response(topic_select_prompt)
|
||||
print(f"[DEBUG] {content_select} {reasoning}")
|
||||
logger.debug(f"{content_select} {reasoning}")
|
||||
topics_list = [dot[0] for dot in dots_for_select]
|
||||
if content_select:
|
||||
if content_select in topics_list:
|
||||
@@ -232,12 +231,12 @@ class InitiativeMessageGenerate:
|
||||
select_dot[1], prompt_template
|
||||
)
|
||||
content_check, reasoning_check = self.model_v3.generate_response(prompt_check)
|
||||
print(f"[DEBUG] {content_check} {reasoning_check}")
|
||||
logger.info(f"{content_check} {reasoning_check}")
|
||||
if "yes" not in content_check.lower():
|
||||
return None
|
||||
prompt = prompt_builder._build_initiative_prompt(
|
||||
select_dot, prompt_template, memory
|
||||
)
|
||||
content, reasoning = self.model_r1.generate_response_async(prompt)
|
||||
print(f"[DEBUG] {content} {reasoning}")
|
||||
logger.debug(f"[DEBUG] {content} {reasoning}")
|
||||
return content
|
||||
|
||||
@@ -2,6 +2,7 @@ import asyncio
|
||||
import time
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
from loguru import logger
|
||||
from nonebot.adapters.onebot.v11 import Bot
|
||||
|
||||
from .cq_code import cq_code_tool
|
||||
@@ -14,11 +15,12 @@ from .chat_stream import chat_manager
|
||||
|
||||
class Message_Sender:
|
||||
"""发送器"""
|
||||
|
||||
def __init__(self):
|
||||
self.message_interval = (0.5, 1) # 消息间隔时间范围(秒)
|
||||
self.last_send_time = 0
|
||||
self._current_bot = None
|
||||
|
||||
|
||||
def set_bot(self, bot: Bot):
|
||||
"""设置当前bot实例"""
|
||||
self._current_bot = bot
|
||||
@@ -41,10 +43,10 @@ class Message_Sender:
|
||||
message=message_send.raw_message,
|
||||
auto_escape=False
|
||||
)
|
||||
print(f"\033[1;34m[调试]\033[0m 发送消息{message.processed_plain_text}成功")
|
||||
logger.success(f"[调试] 发送消息{message.processed_plain_text}成功")
|
||||
except Exception as e:
|
||||
print(f"发生错误 {e}")
|
||||
print(f"\033[1;34m[调试]\033[0m 发送消息{message.processed_plain_text}失败")
|
||||
logger.error(f"[调试] 发生错误 {e}")
|
||||
logger.error(f"[调试] 发送消息{message.processed_plain_text}失败")
|
||||
else:
|
||||
try:
|
||||
await self._current_bot.send_private_msg(
|
||||
@@ -52,10 +54,10 @@ class Message_Sender:
|
||||
message=message_send.raw_message,
|
||||
auto_escape=False
|
||||
)
|
||||
print(f"\033[1;34m[调试]\033[0m 发送消息{message.processed_plain_text}成功")
|
||||
logger.success(f"[调试] 发送消息{message.processed_plain_text}成功")
|
||||
except Exception as e:
|
||||
print(f"发生错误 {e}")
|
||||
print(f"\033[1;34m[调试]\033[0m 发送消息{message.processed_plain_text}失败")
|
||||
logger.error(f"发生错误 {e}")
|
||||
logger.error(f"[调试] 发送消息{message.processed_plain_text}失败")
|
||||
|
||||
|
||||
class MessageContainer:
|
||||
@@ -71,15 +73,15 @@ class MessageContainer:
|
||||
"""获取所有超时的Message_Sending对象(思考时间超过30秒),按thinking_start_time排序"""
|
||||
current_time = time.time()
|
||||
timeout_messages = []
|
||||
|
||||
|
||||
for msg in self.messages:
|
||||
if isinstance(msg, MessageSending):
|
||||
if current_time - msg.thinking_start_time > self.thinking_timeout:
|
||||
timeout_messages.append(msg)
|
||||
|
||||
|
||||
# 按thinking_start_time排序,时间早的在前面
|
||||
timeout_messages.sort(key=lambda x: x.thinking_start_time)
|
||||
|
||||
|
||||
return timeout_messages
|
||||
|
||||
def get_earliest_message(self) -> Optional[Union[MessageThinking, MessageSending]]:
|
||||
@@ -88,11 +90,11 @@ class MessageContainer:
|
||||
return None
|
||||
earliest_time = float('inf')
|
||||
earliest_message = None
|
||||
for msg in self.messages:
|
||||
for msg in self.messages:
|
||||
msg_time = msg.thinking_start_time
|
||||
if msg_time < earliest_time:
|
||||
earliest_time = msg_time
|
||||
earliest_message = msg
|
||||
earliest_message = msg
|
||||
return earliest_message
|
||||
|
||||
def add_message(self, message: Union[MessageThinking, MessageSending]) -> None:
|
||||
@@ -110,10 +112,10 @@ class MessageContainer:
|
||||
self.messages.remove(message)
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"\033[1;31m[错误]\033[0m 移除消息时发生错误: {e}")
|
||||
except Exception:
|
||||
logger.exception("移除消息时发生错误")
|
||||
return False
|
||||
|
||||
|
||||
def has_messages(self) -> bool:
|
||||
"""检查是否有待发送的消息"""
|
||||
return bool(self.messages)
|
||||
@@ -121,7 +123,7 @@ class MessageContainer:
|
||||
def get_all_messages(self) -> List[Union[MessageSending, MessageThinking]]:
|
||||
"""获取所有消息"""
|
||||
return list(self.messages)
|
||||
|
||||
|
||||
|
||||
class MessageManager:
|
||||
"""管理所有聊天流的消息容器"""
|
||||
@@ -152,11 +154,11 @@ class MessageManager:
|
||||
if isinstance(message_earliest, MessageThinking):
|
||||
message_earliest.update_thinking_time()
|
||||
thinking_time = message_earliest.thinking_time
|
||||
print(f"\033[1;34m[调试]\033[0m 消息正在思考中,已思考{int(thinking_time)}秒\033[K\r", end='', flush=True)
|
||||
|
||||
print(f"消息正在思考中,已思考{int(thinking_time)}秒\r", end='', flush=True)
|
||||
|
||||
# 检查是否超时
|
||||
if thinking_time > global_config.thinking_timeout:
|
||||
print(f"\033[1;33m[警告]\033[0m 消息思考超时({thinking_time}秒),移除该消息")
|
||||
logger.warning(f"消息思考超时({thinking_time}秒),移除该消息")
|
||||
container.remove_message(message_earliest)
|
||||
else:
|
||||
print(f"\033[1;34m[调试]\033[0m 消息'{message_earliest.processed_plain_text}'正在发送中")
|
||||
@@ -174,7 +176,7 @@ class MessageManager:
|
||||
|
||||
message_timeout = container.get_timeout_messages()
|
||||
if message_timeout:
|
||||
print(f"\033[1;34m[调试]\033[0m 发现{len(message_timeout)}条超时消息")
|
||||
logger.warning(f"发现{len(message_timeout)}条超时消息")
|
||||
for msg in message_timeout:
|
||||
if msg == message_earliest:
|
||||
continue
|
||||
@@ -191,11 +193,11 @@ class MessageManager:
|
||||
await self.storage.store_message(msg,msg.chat_stream, None)
|
||||
|
||||
if not container.remove_message(msg):
|
||||
print("\033[1;33m[警告]\033[0m 尝试删除不存在的消息")
|
||||
except Exception as e:
|
||||
print(f"\033[1;31m[错误]\033[0m 处理超时消息时发生错误: {e}")
|
||||
logger.warning("尝试删除不存在的消息")
|
||||
except Exception:
|
||||
logger.exception("处理超时消息时发生错误")
|
||||
continue
|
||||
|
||||
|
||||
async def start_processor(self):
|
||||
"""启动消息处理器"""
|
||||
while self._running:
|
||||
@@ -206,6 +208,7 @@ class MessageManager:
|
||||
|
||||
await asyncio.gather(*tasks)
|
||||
|
||||
|
||||
# 创建全局消息管理器实例
|
||||
message_manager = MessageManager()
|
||||
# 创建全局发送器实例
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import random
|
||||
import time
|
||||
from typing import Optional
|
||||
from loguru import logger
|
||||
|
||||
from ...common.database import Database
|
||||
from ..memory_system.memory import hippocampus, memory_graph
|
||||
@@ -34,44 +35,43 @@ class PromptBuilder:
|
||||
|
||||
Returns:
|
||||
str: 构建好的prompt
|
||||
"""
|
||||
#先禁用关系
|
||||
"""
|
||||
# 先禁用关系
|
||||
if 0 > 30:
|
||||
relation_prompt = "关系特别特别好,你很喜欢喜欢他"
|
||||
relation_prompt_2 = "热情发言或者回复"
|
||||
elif 0 <-20:
|
||||
elif 0 < -20:
|
||||
relation_prompt = "关系很差,你很讨厌他"
|
||||
relation_prompt_2 = "骂他"
|
||||
else:
|
||||
relation_prompt = "关系一般"
|
||||
relation_prompt_2 = "发言或者回复"
|
||||
|
||||
#开始构建prompt
|
||||
|
||||
|
||||
#心情
|
||||
|
||||
# 开始构建prompt
|
||||
|
||||
# 心情
|
||||
mood_manager = MoodManager.get_instance()
|
||||
mood_prompt = mood_manager.get_prompt()
|
||||
|
||||
|
||||
#日程构建
|
||||
|
||||
# 日程构建
|
||||
current_date = time.strftime("%Y-%m-%d", time.localtime())
|
||||
current_time = time.strftime("%H:%M:%S", time.localtime())
|
||||
bot_schedule_now_time,bot_schedule_now_activity = bot_schedule.get_current_task()
|
||||
bot_schedule_now_time, bot_schedule_now_activity = bot_schedule.get_current_task()
|
||||
prompt_date = f'''今天是{current_date},现在是{current_time},你今天的日程是:\n{bot_schedule.today_schedule}\n你现在正在{bot_schedule_now_activity}\n'''
|
||||
|
||||
#知识构建
|
||||
# 知识构建
|
||||
start_time = time.time()
|
||||
|
||||
|
||||
prompt_info = ''
|
||||
promt_info_prompt = ''
|
||||
prompt_info = await self.get_prompt_info(message_txt,threshold=0.5)
|
||||
prompt_info = await self.get_prompt_info(message_txt, threshold=0.5)
|
||||
if prompt_info:
|
||||
prompt_info = f'''\n----------------------------------------------------\n你有以下这些[知识]:\n{prompt_info}\n请你记住上面的[知识],之后可能会用到\n----------------------------------------------------\n'''
|
||||
|
||||
prompt_info = f'''你有以下这些[知识]:{prompt_info}请你记住上面的[
|
||||
知识],之后可能会用到-'''
|
||||
|
||||
end_time = time.time()
|
||||
print(f"\033[1;32m[知识检索]\033[0m 耗时: {(end_time - start_time):.3f}秒")
|
||||
|
||||
logger.debug(f"知识检索耗时: {(end_time - start_time):.3f}秒")
|
||||
|
||||
# 获取聊天上下文
|
||||
chat_in_group=True
|
||||
chat_talking_prompt = ''
|
||||
@@ -90,7 +90,7 @@ class PromptBuilder:
|
||||
# 使用新的记忆获取方法
|
||||
memory_prompt = ''
|
||||
start_time = time.time()
|
||||
|
||||
|
||||
# 调用 hippocampus 的 get_relevant_memories 方法
|
||||
relevant_memories = await hippocampus.get_relevant_memories(
|
||||
text=message_txt,
|
||||
@@ -98,64 +98,64 @@ class PromptBuilder:
|
||||
similarity_threshold=0.4,
|
||||
max_memory_num=5
|
||||
)
|
||||
|
||||
|
||||
if relevant_memories:
|
||||
# 格式化记忆内容
|
||||
memory_items = []
|
||||
for memory in relevant_memories:
|
||||
memory_items.append(f"关于「{memory['topic']}」的记忆:{memory['content']}")
|
||||
|
||||
|
||||
memory_prompt = "看到这些聊天,你想起来:\n" + "\n".join(memory_items) + "\n"
|
||||
|
||||
|
||||
# 打印调试信息
|
||||
print("\n\033[1;32m[记忆检索]\033[0m 找到以下相关记忆:")
|
||||
logger.debug("[记忆检索]找到以下相关记忆:")
|
||||
for memory in relevant_memories:
|
||||
print(f"- 主题「{memory['topic']}」[相似度: {memory['similarity']:.2f}]: {memory['content']}")
|
||||
|
||||
logger.debug(f"- 主题「{memory['topic']}」[相似度: {memory['similarity']:.2f}]: {memory['content']}")
|
||||
|
||||
end_time = time.time()
|
||||
print(f"\033[1;32m[回忆耗时]\033[0m 耗时: {(end_time - start_time):.3f}秒")
|
||||
|
||||
|
||||
|
||||
#激活prompt构建
|
||||
logger.info(f"回忆耗时: {(end_time - start_time):.3f}秒")
|
||||
|
||||
# 激活prompt构建
|
||||
activate_prompt = ''
|
||||
if chat_in_group:
|
||||
activate_prompt = f"以上是群里正在进行的聊天,{memory_prompt} 现在昵称为 '{sender_name}' 的用户说的:{message_txt}。引起了你的注意,你和ta{relation_prompt},{mood_prompt},你想要{relation_prompt_2}。"
|
||||
else:
|
||||
activate_prompt = f"以上是你正在和{sender_name}私聊的内容,{memory_prompt} 现在昵称为 '{sender_name}' 的用户说的:{message_txt}。引起了你的注意,你和ta{relation_prompt},{mood_prompt},你想要{relation_prompt_2}。"
|
||||
#检测机器人相关词汇
|
||||
bot_keywords = ['人机', 'bot', '机器', '入机', 'robot', '机器人']
|
||||
is_bot = any(keyword in message_txt.lower() for keyword in bot_keywords)
|
||||
if is_bot:
|
||||
is_bot_prompt = '有人说你是人机或者机器人,否定这一事实,攻击或反问他。注意,只有认为你是机器人才回复,否则不要否认'
|
||||
else:
|
||||
is_bot_prompt = ''
|
||||
|
||||
# 关键词检测与反应
|
||||
keywords_reaction_prompt = ''
|
||||
for rule in global_config.keywords_reaction_rules:
|
||||
if rule.get("enable", False):
|
||||
if any(keyword in message_txt.lower() for keyword in rule.get("keywords", [])):
|
||||
logger.info(f"检测到以下关键词之一:{rule.get('keywords', [])},触发反应:{rule.get('reaction', '')}")
|
||||
keywords_reaction_prompt += rule.get("reaction", "") + ','
|
||||
|
||||
#人格选择
|
||||
personality=global_config.PROMPT_PERSONALITY
|
||||
probability_1 = global_config.PERSONALITY_1
|
||||
probability_2 = global_config.PERSONALITY_2
|
||||
probability_3 = global_config.PERSONALITY_3
|
||||
prompt_personality = ''
|
||||
|
||||
prompt_personality = f'{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},你还有很多别名:{"/".join(global_config.BOT_ALIAS_NAMES)},'
|
||||
personality_choice = random.random()
|
||||
if chat_in_group:
|
||||
prompt_in_group=f"你正在浏览{chat_stream.platform}群"
|
||||
else:
|
||||
prompt_in_group=f"你正在{chat_stream.platform}上和{sender_name}私聊"
|
||||
if personality_choice < probability_1: # 第一种人格
|
||||
prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},{personality[0]},{prompt_in_group},{promt_info_prompt},
|
||||
现在请你给出日常且口语化的回复,平淡一些,尽量简短一些。{is_bot_prompt}
|
||||
prompt_personality += f'''{personality[0]}, 你正在浏览qq群,{promt_info_prompt},
|
||||
现在请你给出日常且口语化的回复,平淡一些,尽量简短一些。{keywords_reaction_prompt}
|
||||
请注意把握群里的聊天内容,不要刻意突出自身学科背景,不要回复的太有条理,可以有个性。'''
|
||||
elif personality_choice < probability_1 + probability_2: # 第二种人格
|
||||
prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},{personality[1]},{prompt_in_group},{promt_info_prompt},
|
||||
现在请你给出日常且口语化的回复,请表现你自己的见解,不要一昧迎合,尽量简短一些。{is_bot_prompt}
|
||||
prompt_personality += f'''{personality[1]}, 你正在浏览qq群,{promt_info_prompt},
|
||||
现在请你给出日常且口语化的回复,请表现你自己的见解,不要一昧迎合,尽量简短一些。{keywords_reaction_prompt}
|
||||
请你表达自己的见解和观点。可以有个性。'''
|
||||
else: # 第三种人格
|
||||
prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},{personality[2]},{prompt_in_group},{promt_info_prompt},
|
||||
现在请你给出日常且口语化的回复,请表现你自己的见解,不要一昧迎合,尽量简短一些。{is_bot_prompt}
|
||||
prompt_personality += f'''{personality[2]}, 你正在浏览qq群,{promt_info_prompt},
|
||||
现在请你给出日常且口语化的回复,请表现你自己的见解,不要一昧迎合,尽量简短一些。{keywords_reaction_prompt}
|
||||
请你表达自己的见解和观点。可以有个性。'''
|
||||
|
||||
#中文高手(新加的好玩功能)
|
||||
|
||||
# 中文高手(新加的好玩功能)
|
||||
prompt_ger = ''
|
||||
if random.random() < 0.04:
|
||||
prompt_ger += '你喜欢用倒装句'
|
||||
@@ -163,23 +163,23 @@ class PromptBuilder:
|
||||
prompt_ger += '你喜欢用反问句'
|
||||
if random.random() < 0.01:
|
||||
prompt_ger += '你喜欢用文言文'
|
||||
|
||||
#额外信息要求
|
||||
extra_info = '''但是记得回复平淡一些,简短一些,尤其注意在没明确提到时不要过多提及自身的背景, 不要直接回复别人发的表情包,记住不要输出多余内容(包括前后缀,冒号和引号,括号,表情等),只需要输出回复内容就好,不要输出其他任何内容'''
|
||||
|
||||
#合并prompt
|
||||
|
||||
# 额外信息要求
|
||||
extra_info = '''但是记得回复平淡一些,简短一些,尤其注意在没明确提到时不要过多提及自身的背景, 不要直接回复别人发的表情包,记住不要输出多余内容(包括前后缀,冒号和引号,括号,表情等),只需要输出回复内容就好,不要输出其他任何内容'''
|
||||
|
||||
# 合并prompt
|
||||
prompt = ""
|
||||
prompt += f"{prompt_info}\n"
|
||||
prompt += f"{prompt_date}\n"
|
||||
prompt += f"{chat_talking_prompt}\n"
|
||||
prompt += f"{chat_talking_prompt}\n"
|
||||
prompt += f"{prompt_personality}\n"
|
||||
prompt += f"{prompt_ger}\n"
|
||||
prompt += f"{extra_info}\n"
|
||||
|
||||
'''读空气prompt处理'''
|
||||
activate_prompt_check=f"以上是群里正在进行的聊天,昵称为 '{sender_name}' 的用户说的:{message_txt}。引起了你的注意,你和他{relation_prompt},你想要{relation_prompt_2},但是这不一定是合适的时机,请你决定是否要回应这条消息。"
|
||||
prompt += f"{extra_info}\n"
|
||||
|
||||
'''读空气prompt处理'''
|
||||
activate_prompt_check = f"以上是群里正在进行的聊天,昵称为 '{sender_name}' 的用户说的:{message_txt}。引起了你的注意,你和他{relation_prompt},你想要{relation_prompt_2},但是这不一定是合适的时机,请你决定是否要回应这条消息。"
|
||||
prompt_personality_check = ''
|
||||
extra_check_info=f"请注意把握群里的聊天内容的基础上,综合群内的氛围,例如,和{global_config.BOT_NICKNAME}相关的话题要积极回复,如果是at自己的消息一定要回复,如果自己正在和别人聊天一定要回复,其他话题如果合适搭话也可以回复,如果认为应该回复请输出yes,否则输出no,请注意是决定是否需要回复,而不是编写回复内容,除了yes和no不要输出任何回复内容。"
|
||||
extra_check_info = f"请注意把握群里的聊天内容的基础上,综合群内的氛围,例如,和{global_config.BOT_NICKNAME}相关的话题要积极回复,如果是at自己的消息一定要回复,如果自己正在和别人聊天一定要回复,其他话题如果合适搭话也可以回复,如果认为应该回复请输出yes,否则输出no,请注意是决定是否需要回复,而不是编写回复内容,除了yes和no不要输出任何回复内容。"
|
||||
if personality_choice < probability_1: # 第一种人格
|
||||
prompt_personality_check = f'''你的网名叫{global_config.BOT_NICKNAME},{personality[0]}, 你正在浏览qq群,{promt_info_prompt} {activate_prompt_check} {extra_check_info}'''
|
||||
elif personality_choice < probability_1 + probability_2: # 第二种人格
|
||||
@@ -187,34 +187,36 @@ class PromptBuilder:
|
||||
else: # 第三种人格
|
||||
prompt_personality_check = f'''你的网名叫{global_config.BOT_NICKNAME},{personality[2]}, 你正在浏览qq群,{promt_info_prompt} {activate_prompt_check} {extra_check_info}'''
|
||||
|
||||
prompt_check_if_response=f"{prompt_info}\n{prompt_date}\n{chat_talking_prompt}\n{prompt_personality_check}"
|
||||
|
||||
return prompt,prompt_check_if_response
|
||||
|
||||
def _build_initiative_prompt_select(self,group_id):
|
||||
prompt_check_if_response = f"{prompt_info}\n{prompt_date}\n{chat_talking_prompt}\n{prompt_personality_check}"
|
||||
|
||||
return prompt, prompt_check_if_response
|
||||
|
||||
def _build_initiative_prompt_select(self, group_id, probability_1=0.8, probability_2=0.1):
|
||||
current_date = time.strftime("%Y-%m-%d", time.localtime())
|
||||
current_time = time.strftime("%H:%M:%S", time.localtime())
|
||||
bot_schedule_now_time,bot_schedule_now_activity = bot_schedule.get_current_task()
|
||||
bot_schedule_now_time, bot_schedule_now_activity = bot_schedule.get_current_task()
|
||||
prompt_date = f'''今天是{current_date},现在是{current_time},你今天的日程是:\n{bot_schedule.today_schedule}\n你现在正在{bot_schedule_now_activity}\n'''
|
||||
|
||||
chat_talking_prompt = ''
|
||||
if group_id:
|
||||
chat_talking_prompt = get_recent_group_detailed_plain_text(self.db, group_id, limit=global_config.MAX_CONTEXT_SIZE,combine = True)
|
||||
|
||||
chat_talking_prompt = get_recent_group_detailed_plain_text(self.db, group_id,
|
||||
limit=global_config.MAX_CONTEXT_SIZE,
|
||||
combine=True)
|
||||
|
||||
chat_talking_prompt = f"以下是群里正在聊天的内容:\n{chat_talking_prompt}"
|
||||
# print(f"\033[1;34m[调试]\033[0m 已从数据库获取群 {group_id} 的消息记录:{chat_talking_prompt}")
|
||||
# print(f"\033[1;34m[调试]\033[0m 已从数据库获取群 {group_id} 的消息记录:{chat_talking_prompt}")
|
||||
|
||||
# 获取主动发言的话题
|
||||
all_nodes=memory_graph.dots
|
||||
all_nodes=filter(lambda dot:len(dot[1]['memory_items'])>3,all_nodes)
|
||||
nodes_for_select=random.sample(all_nodes,5)
|
||||
topics=[info[0] for info in nodes_for_select]
|
||||
infos=[info[1] for info in nodes_for_select]
|
||||
all_nodes = memory_graph.dots
|
||||
all_nodes = filter(lambda dot: len(dot[1]['memory_items']) > 3, all_nodes)
|
||||
nodes_for_select = random.sample(all_nodes, 5)
|
||||
topics = [info[0] for info in nodes_for_select]
|
||||
infos = [info[1] for info in nodes_for_select]
|
||||
|
||||
#激活prompt构建
|
||||
# 激活prompt构建
|
||||
activate_prompt = ''
|
||||
activate_prompt = "以上是群里正在进行的聊天。"
|
||||
personality=global_config.PROMPT_PERSONALITY
|
||||
personality = global_config.PROMPT_PERSONALITY
|
||||
prompt_personality = ''
|
||||
personality_choice = random.random()
|
||||
if personality_choice < probability_1: # 第一种人格
|
||||
@@ -223,32 +225,31 @@ class PromptBuilder:
|
||||
prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},{personality[1]}'''
|
||||
else: # 第三种人格
|
||||
prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},{personality[2]}'''
|
||||
|
||||
topics_str=','.join(f"\"{topics}\"")
|
||||
prompt_for_select=f"你现在想在群里发言,回忆了一下,想到几个话题,分别是{topics_str},综合当前状态以及群内气氛,请你在其中选择一个合适的话题,注意只需要输出话题,除了话题什么也不要输出(双引号也不要输出)"
|
||||
|
||||
prompt_initiative_select=f"{prompt_date}\n{prompt_personality}\n{prompt_for_select}"
|
||||
prompt_regular=f"{prompt_date}\n{prompt_personality}"
|
||||
|
||||
return prompt_initiative_select,nodes_for_select,prompt_regular
|
||||
|
||||
def _build_initiative_prompt_check(self,selected_node,prompt_regular):
|
||||
memory=random.sample(selected_node['memory_items'],3)
|
||||
memory='\n'.join(memory)
|
||||
prompt_for_check=f"{prompt_regular}你现在想在群里发言,回忆了一下,想到一个话题,是{selected_node['concept']},关于这个话题的记忆有\n{memory}\n,以这个作为主题发言合适吗?请在把握群里的聊天内容的基础上,综合群内的氛围,如果认为应该发言请输出yes,否则输出no,请注意是决定是否需要发言,而不是编写回复内容,除了yes和no不要输出任何回复内容。"
|
||||
return prompt_for_check,memory
|
||||
|
||||
def _build_initiative_prompt(self,selected_node,prompt_regular,memory):
|
||||
prompt_for_initiative=f"{prompt_regular}你现在想在群里发言,回忆了一下,想到一个话题,是{selected_node['concept']},关于这个话题的记忆有\n{memory}\n,请在把握群里的聊天内容的基础上,综合群内的氛围,以日常且口语化的口吻,简短且随意一点进行发言,不要说的太有条理,可以有个性。记住不要输出多余内容(包括前后缀,冒号和引号,括号,表情等)"
|
||||
topics_str = ','.join(f"\"{topics}\"")
|
||||
prompt_for_select = f"你现在想在群里发言,回忆了一下,想到几个话题,分别是{topics_str},综合当前状态以及群内气氛,请你在其中选择一个合适的话题,注意只需要输出话题,除了话题什么也不要输出(双引号也不要输出)"
|
||||
|
||||
prompt_initiative_select = f"{prompt_date}\n{prompt_personality}\n{prompt_for_select}"
|
||||
prompt_regular = f"{prompt_date}\n{prompt_personality}"
|
||||
|
||||
return prompt_initiative_select, nodes_for_select, prompt_regular
|
||||
|
||||
def _build_initiative_prompt_check(self, selected_node, prompt_regular):
|
||||
memory = random.sample(selected_node['memory_items'], 3)
|
||||
memory = '\n'.join(memory)
|
||||
prompt_for_check = f"{prompt_regular}你现在想在群里发言,回忆了一下,想到一个话题,是{selected_node['concept']},关于这个话题的记忆有\n{memory}\n,以这个作为主题发言合适吗?请在把握群里的聊天内容的基础上,综合群内的氛围,如果认为应该发言请输出yes,否则输出no,请注意是决定是否需要发言,而不是编写回复内容,除了yes和no不要输出任何回复内容。"
|
||||
return prompt_for_check, memory
|
||||
|
||||
def _build_initiative_prompt(self, selected_node, prompt_regular, memory):
|
||||
prompt_for_initiative = f"{prompt_regular}你现在想在群里发言,回忆了一下,想到一个话题,是{selected_node['concept']},关于这个话题的记忆有\n{memory}\n,请在把握群里的聊天内容的基础上,综合群内的氛围,以日常且口语化的口吻,简短且随意一点进行发言,不要说的太有条理,可以有个性。记住不要输出多余内容(包括前后缀,冒号和引号,括号,表情等)"
|
||||
return prompt_for_initiative
|
||||
|
||||
|
||||
async def get_prompt_info(self,message:str,threshold:float):
|
||||
async def get_prompt_info(self, message: str, threshold: float):
|
||||
related_info = ''
|
||||
print(f"\033[1;34m[调试]\033[0m 获取知识库内容,元消息:{message[:30]}...,消息长度: {len(message)}")
|
||||
logger.debug(f"获取知识库内容,元消息:{message[:30]}...,消息长度: {len(message)}")
|
||||
embedding = await get_embedding(message)
|
||||
related_info += self.get_info_from_db(embedding,threshold=threshold)
|
||||
|
||||
related_info += self.get_info_from_db(embedding, threshold=threshold)
|
||||
|
||||
return related_info
|
||||
|
||||
def get_info_from_db(self, query_embedding: list, limit: int = 1, threshold: float = 0.5) -> str:
|
||||
@@ -309,14 +310,15 @@ class PromptBuilder:
|
||||
{"$limit": limit},
|
||||
{"$project": {"content": 1, "similarity": 1}}
|
||||
]
|
||||
|
||||
|
||||
results = list(self.db.db.knowledges.aggregate(pipeline))
|
||||
# print(f"\033[1;34m[调试]\033[0m获取知识库内容结果: {results}")
|
||||
|
||||
|
||||
if not results:
|
||||
return ''
|
||||
|
||||
|
||||
# 返回所有找到的内容,用换行分隔
|
||||
return '\n'.join(str(result['content']) for result in results)
|
||||
|
||||
prompt_builder = PromptBuilder()
|
||||
|
||||
|
||||
prompt_builder = PromptBuilder()
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import asyncio
|
||||
from typing import Optional, Union
|
||||
from typing import Optional, Union
|
||||
from loguru import logger
|
||||
|
||||
from ...common.database import Database
|
||||
from .message_base import UserInfo
|
||||
@@ -10,9 +11,10 @@ class Impression:
|
||||
traits: str = None
|
||||
called: str = None
|
||||
know_time: float = None
|
||||
|
||||
|
||||
relationship_value: float = None
|
||||
|
||||
|
||||
class Relationship:
|
||||
user_id: int = None
|
||||
platform: str = None
|
||||
@@ -79,7 +81,7 @@ class RelationshipManager:
|
||||
# 保存到数据库
|
||||
await self.storage_relationship(relationship)
|
||||
relationship.saved = True
|
||||
|
||||
|
||||
return relationship
|
||||
|
||||
async def update_relationship_value(self,
|
||||
@@ -121,7 +123,7 @@ class RelationshipManager:
|
||||
# 如果不存在且提供了user_info,则创建新的关系
|
||||
if user_info is not None:
|
||||
return await self.update_relationship(chat_stream=chat_stream, **kwargs)
|
||||
print(f"\033[1;31m[关系管理]\033[0m 用户 {user_id}({platform}) 不存在,无法更新")
|
||||
logger.warning(f"[关系管理] 用户 {user_id}({platform}) 不存在,无法更新")
|
||||
return None
|
||||
|
||||
def get_relationship(self,
|
||||
@@ -151,7 +153,7 @@ class RelationshipManager:
|
||||
return self.relationships[key]
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
async def load_relationship(self, data: dict) -> Relationship:
|
||||
"""从数据库加载或创建新的关系对象"""
|
||||
# 确保data中有platform字段,如果没有则默认为'qq'
|
||||
@@ -163,14 +165,14 @@ class RelationshipManager:
|
||||
key = (rela.user_id, rela.platform)
|
||||
self.relationships[key] = rela
|
||||
return rela
|
||||
|
||||
|
||||
async def load_all_relationships(self):
|
||||
"""加载所有关系对象"""
|
||||
db = Database.get_instance()
|
||||
all_relationships = db.db.relationships.find({})
|
||||
for data in all_relationships:
|
||||
await self.load_relationship(data)
|
||||
|
||||
|
||||
async def _start_relationship_manager(self):
|
||||
"""每5分钟自动保存一次关系数据"""
|
||||
db = Database.get_instance()
|
||||
@@ -179,15 +181,15 @@ class RelationshipManager:
|
||||
# 依次加载每条记录
|
||||
for data in all_relationships:
|
||||
await self.load_relationship(data)
|
||||
print(f"\033[1;32m[关系管理]\033[0m 已加载 {len(self.relationships)} 条关系记录")
|
||||
logger.debug(f"[关系管理] 已加载 {len(self.relationships)} 条关系记录")
|
||||
|
||||
while True:
|
||||
print("\033[1;32m[关系管理]\033[0m 正在自动保存关系")
|
||||
logger.debug("正在自动保存关系")
|
||||
await asyncio.sleep(300) # 等待300秒(5分钟)
|
||||
await self._save_all_relationships()
|
||||
|
||||
|
||||
async def _save_all_relationships(self):
|
||||
"""将所有关系数据保存到数据库"""
|
||||
"""将所有关系数据保存到数据库"""
|
||||
# 保存所有关系数据
|
||||
for (userid, platform), relationship in self.relationships.items():
|
||||
if not relationship.saved:
|
||||
@@ -203,7 +205,7 @@ class RelationshipManager:
|
||||
gender = relationship.gender
|
||||
age = relationship.age
|
||||
saved = relationship.saved
|
||||
|
||||
|
||||
db = Database.get_instance()
|
||||
db.db.relationships.update_one(
|
||||
{'user_id': user_id, 'platform': platform},
|
||||
@@ -252,4 +254,4 @@ class RelationshipManager:
|
||||
return "某人"
|
||||
|
||||
|
||||
relationship_manager = RelationshipManager()
|
||||
relationship_manager = RelationshipManager()
|
||||
|
||||
@@ -5,6 +5,8 @@ from ...common.database import Database
|
||||
from .message_base import MessageBase
|
||||
from .message import MessageSending, MessageRecv
|
||||
from .chat_stream import ChatStream
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class MessageStorage:
|
||||
def __init__(self):
|
||||
@@ -24,7 +26,7 @@ class MessageStorage:
|
||||
"topic": topic,
|
||||
}
|
||||
self.db.db.messages.insert_one(message_data)
|
||||
except Exception as e:
|
||||
print(f"\033[1;31m[错误]\033[0m 存储消息失败: {e}")
|
||||
except Exception:
|
||||
logger.exception("存储消息失败")
|
||||
|
||||
# 如果需要其他存储相关的函数,可以在这里添加
|
||||
# 如果需要其他存储相关的函数,可以在这里添加
|
||||
|
||||
@@ -4,9 +4,11 @@ from nonebot import get_driver
|
||||
|
||||
from ..models.utils_model import LLM_request
|
||||
from .config import global_config
|
||||
from loguru import logger
|
||||
|
||||
driver = get_driver()
|
||||
config = driver.config
|
||||
config = driver.config
|
||||
|
||||
|
||||
class TopicIdentifier:
|
||||
def __init__(self):
|
||||
@@ -23,19 +25,20 @@ class TopicIdentifier:
|
||||
|
||||
# 使用 LLM_request 类进行请求
|
||||
topic, _ = await self.llm_topic_judge.generate_response(prompt)
|
||||
|
||||
|
||||
if not topic:
|
||||
print("\033[1;31m[错误]\033[0m LLM API 返回为空")
|
||||
logger.error("LLM API 返回为空")
|
||||
return None
|
||||
|
||||
|
||||
# 直接在这里处理主题解析
|
||||
if not topic or topic == "无主题":
|
||||
return None
|
||||
|
||||
|
||||
# 解析主题字符串为列表
|
||||
topic_list = [t.strip() for t in topic.split(",") if t.strip()]
|
||||
|
||||
print(f"\033[1;32m[主题识别]\033[0m 主题: {topic_list}")
|
||||
|
||||
logger.info(f"主题: {topic_list}")
|
||||
return topic_list if topic_list else None
|
||||
|
||||
topic_identifier = TopicIdentifier()
|
||||
|
||||
topic_identifier = TopicIdentifier()
|
||||
|
||||
@@ -7,6 +7,7 @@ from typing import Dict, List
|
||||
import jieba
|
||||
import numpy as np
|
||||
from nonebot import get_driver
|
||||
from loguru import logger
|
||||
|
||||
from ..models.utils_model import LLM_request
|
||||
from ..utils.typo_generator import ChineseTypoGenerator
|
||||
@@ -21,16 +22,16 @@ config = driver.config
|
||||
|
||||
|
||||
def db_message_to_str(message_dict: Dict) -> str:
|
||||
print(f"message_dict: {message_dict}")
|
||||
logger.debug(f"message_dict: {message_dict}")
|
||||
time_str = time.strftime("%m-%d %H:%M:%S", time.localtime(message_dict["time"]))
|
||||
try:
|
||||
name = "[(%s)%s]%s" % (
|
||||
message_dict['user_id'], message_dict.get("user_nickname", ""), message_dict.get("user_cardname", ""))
|
||||
message_dict['user_id'], message_dict.get("user_nickname", ""), message_dict.get("user_cardname", ""))
|
||||
except:
|
||||
name = message_dict.get("user_nickname", "") or f"用户{message_dict['user_id']}"
|
||||
content = message_dict.get("processed_plain_text", "")
|
||||
result = f"[{time_str}] {name}: {content}\n"
|
||||
print(f"result: {result}")
|
||||
logger.debug(f"result: {result}")
|
||||
return result
|
||||
|
||||
|
||||
@@ -71,37 +72,43 @@ def calculate_information_content(text):
|
||||
|
||||
|
||||
def get_cloest_chat_from_db(db, length: int, timestamp: str):
|
||||
"""从数据库中获取最接近指定时间戳的聊天记录,并记录读取次数"""
|
||||
chat_text = ''
|
||||
"""从数据库中获取最接近指定时间戳的聊天记录,并记录读取次数
|
||||
|
||||
Returns:
|
||||
list: 消息记录字典列表,每个字典包含消息内容和时间信息
|
||||
"""
|
||||
chat_records = []
|
||||
closest_record = db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)])
|
||||
|
||||
if closest_record and closest_record.get('memorized', 0) < 4:
|
||||
|
||||
if closest_record and closest_record.get('memorized', 0) < 4:
|
||||
closest_time = closest_record['time']
|
||||
chat_id = closest_record['chat_id'] # 获取groupid
|
||||
# 获取该时间戳之后的length条消息,且groupid相同
|
||||
chat_records = list(db.db.messages.find(
|
||||
{"time": {"$gt": closest_time}, "chat_id": chat_id}
|
||||
).sort('time', 1).limit(length))
|
||||
|
||||
|
||||
# 更新每条消息的memorized属性
|
||||
for record in chat_records:
|
||||
# 检查当前记录的memorized值
|
||||
for record in records:
|
||||
current_memorized = record.get('memorized', 0)
|
||||
if current_memorized > 3:
|
||||
# print(f"消息已读取3次,跳过")
|
||||
print("消息已读取3次,跳过")
|
||||
return ''
|
||||
|
||||
|
||||
# 更新memorized值
|
||||
db.db.messages.update_one(
|
||||
{"_id": record["_id"]},
|
||||
{"$set": {"memorized": current_memorized + 1}}
|
||||
)
|
||||
|
||||
chat_text += record["detailed_plain_text"]
|
||||
|
||||
return chat_text
|
||||
# print(f"消息已读取3次,跳过")
|
||||
return ''
|
||||
|
||||
# 添加到记录列表中
|
||||
chat_records.append({
|
||||
'text': record["detailed_plain_text"],
|
||||
'time': record["time"],
|
||||
'group_id': record["group_id"]
|
||||
})
|
||||
|
||||
return chat_records
|
||||
|
||||
|
||||
async def get_recent_group_messages(db, chat_id:str, limit: int = 12) -> list:
|
||||
@@ -142,7 +149,7 @@ async def get_recent_group_messages(db, chat_id:str, limit: int = 12) -> list:
|
||||
)
|
||||
message_objects.append(msg)
|
||||
except KeyError:
|
||||
print("[WARNING] 数据库中存在无效的消息")
|
||||
logger.warning("数据库中存在无效的消息")
|
||||
continue
|
||||
|
||||
# 按时间正序排列
|
||||
@@ -259,11 +266,10 @@ def split_into_sentences_w_remove_punctuation(text: str) -> List[str]:
|
||||
sentence = sentence.replace(',', ' ').replace(',', ' ')
|
||||
sentences_done.append(sentence)
|
||||
|
||||
print(f"处理后的句子: {sentences_done}")
|
||||
logger.info(f"处理后的句子: {sentences_done}")
|
||||
return sentences_done
|
||||
|
||||
|
||||
|
||||
def random_remove_punctuation(text: str) -> str:
|
||||
"""随机处理标点符号,模拟人类打字习惯
|
||||
|
||||
@@ -291,43 +297,70 @@ def random_remove_punctuation(text: str) -> str:
|
||||
return result
|
||||
|
||||
|
||||
|
||||
def process_llm_response(text: str) -> List[str]:
|
||||
# processed_response = process_text_with_typos(content)
|
||||
if len(text) > 300:
|
||||
print(f"回复过长 ({len(text)} 字符),返回默认回复")
|
||||
if len(text) > 200:
|
||||
logger.warning(f"回复过长 ({len(text)} 字符),返回默认回复")
|
||||
return ['懒得说']
|
||||
# 处理长消息
|
||||
typo_generator = ChineseTypoGenerator(
|
||||
error_rate=0.03,
|
||||
min_freq=7,
|
||||
tone_error_rate=0.2,
|
||||
word_replace_rate=0.02
|
||||
error_rate=global_config.chinese_typo_error_rate,
|
||||
min_freq=global_config.chinese_typo_min_freq,
|
||||
tone_error_rate=global_config.chinese_typo_tone_error_rate,
|
||||
word_replace_rate=global_config.chinese_typo_word_replace_rate
|
||||
)
|
||||
typoed_text = typo_generator.create_typo_sentence(text)[0]
|
||||
sentences = split_into_sentences_w_remove_punctuation(typoed_text)
|
||||
split_sentences = split_into_sentences_w_remove_punctuation(text)
|
||||
sentences = []
|
||||
for sentence in split_sentences:
|
||||
if global_config.chinese_typo_enable:
|
||||
typoed_text, typo_corrections = typo_generator.create_typo_sentence(sentence)
|
||||
sentences.append(typoed_text)
|
||||
if typo_corrections:
|
||||
sentences.append(typo_corrections)
|
||||
else:
|
||||
sentences.append(sentence)
|
||||
# 检查分割后的消息数量是否过多(超过3条)
|
||||
if len(sentences) > 4:
|
||||
print(f"分割后消息数量过多 ({len(sentences)} 条),返回默认回复")
|
||||
|
||||
if len(sentences) > 5:
|
||||
logger.warning(f"分割后消息数量过多 ({len(sentences)} 条),返回默认回复")
|
||||
return [f'{global_config.BOT_NICKNAME}不知道哦']
|
||||
|
||||
return sentences
|
||||
|
||||
|
||||
def calculate_typing_time(input_string: str, chinese_time: float = 0.2, english_time: float = 0.1) -> float:
|
||||
def calculate_typing_time(input_string: str, chinese_time: float = 0.4, english_time: float = 0.2) -> float:
|
||||
"""
|
||||
计算输入字符串所需的时间,中文和英文字符有不同的输入时间
|
||||
input_string (str): 输入的字符串
|
||||
chinese_time (float): 中文字符的输入时间,默认为0.3秒
|
||||
english_time (float): 英文字符的输入时间,默认为0.15秒
|
||||
chinese_time (float): 中文字符的输入时间,默认为0.2秒
|
||||
english_time (float): 英文字符的输入时间,默认为0.1秒
|
||||
|
||||
特殊情况:
|
||||
- 如果只有一个中文字符,将使用3倍的中文输入时间
|
||||
- 在所有输入结束后,额外加上回车时间0.3秒
|
||||
"""
|
||||
mood_manager = MoodManager.get_instance()
|
||||
# 将0-1的唤醒度映射到-1到1
|
||||
mood_arousal = mood_manager.current_mood.arousal
|
||||
# 映射到0.5到2倍的速度系数
|
||||
typing_speed_multiplier = 1.5 ** mood_arousal # 唤醒度为1时速度翻倍,为-1时速度减半
|
||||
chinese_time *= 1 / typing_speed_multiplier
|
||||
english_time *= 1 / typing_speed_multiplier
|
||||
# 计算中文字符数
|
||||
chinese_chars = sum(1 for char in input_string if '\u4e00' <= char <= '\u9fff')
|
||||
|
||||
# 如果只有一个中文字符,使用3倍时间
|
||||
if chinese_chars == 1 and len(input_string.strip()) == 1:
|
||||
return chinese_time * 3 + 0.3 # 加上回车时间
|
||||
|
||||
# 正常计算所有字符的输入时间
|
||||
total_time = 0.0
|
||||
for char in input_string:
|
||||
if '\u4e00' <= char <= '\u9fff': # 判断是否为中文字符
|
||||
total_time += chinese_time
|
||||
else: # 其他字符(如英文)
|
||||
total_time += english_time
|
||||
return total_time
|
||||
return total_time + 0.3 # 加上回车时间
|
||||
|
||||
|
||||
def cosine_similarity(v1, v2):
|
||||
|
||||
@@ -16,7 +16,9 @@ class WillingManager:
|
||||
self.chat_reply_willing: Dict[str, float] = {} # 存储每个聊天流的回复意愿
|
||||
self._decay_task = None
|
||||
self._started = False
|
||||
|
||||
self.min_reply_willing = 0.01
|
||||
self.attenuation_coefficient = 0.75
|
||||
|
||||
async def _decay_reply_willing(self):
|
||||
"""定期衰减回复意愿"""
|
||||
while True:
|
||||
@@ -33,12 +35,9 @@ class WillingManager:
|
||||
return self.chat_reply_willing.get(stream.stream_id, 0)
|
||||
return 0
|
||||
|
||||
def set_willing(self, chat_id: str, willing: float):
|
||||
"""设置指定聊天流的回复意愿"""
|
||||
self.chat_reply_willing[chat_id] = willing
|
||||
def set_willing(self, chat_id: str, willing: float):
|
||||
"""设置指定聊天流的回复意愿"""
|
||||
self.chat_reply_willing[chat_id] = willing
|
||||
def set_willing(self, chat_id: int, willing: float):
|
||||
"""设置指定群组的回复意愿"""
|
||||
self.group_reply_willing[chat_id] = willing
|
||||
|
||||
async def change_reply_willing_received(self,
|
||||
chat_stream:ChatStream,
|
||||
@@ -51,47 +50,67 @@ class WillingManager:
|
||||
# 获取或创建聊天流
|
||||
stream = chat_stream
|
||||
chat_id = stream.stream_id
|
||||
group_id = stream.group_info.group_id
|
||||
|
||||
# 若非目标回复群组,则直接return
|
||||
if group_id not in config.talk_allowed_groups:
|
||||
reply_probability = 0
|
||||
return reply_probability
|
||||
|
||||
|
||||
current_willing = self.chat_reply_willing.get(chat_id, 0)
|
||||
|
||||
# print(f"初始意愿: {current_willing}")
|
||||
if is_mentioned_bot and current_willing < 1.0:
|
||||
current_willing += 0.9
|
||||
print(f"被提及, 当前意愿: {current_willing}")
|
||||
elif is_mentioned_bot:
|
||||
current_willing += 0.05
|
||||
print(f"被重复提及, 当前意愿: {current_willing}")
|
||||
|
||||
logger.debug(f"[{chat_id}]的初始回复意愿: {current_willing}")
|
||||
|
||||
|
||||
# 根据消息类型(被cue/表情包)调控
|
||||
if is_mentioned_bot:
|
||||
current_willing = min(
|
||||
3.0,
|
||||
current_willing + 0.9
|
||||
)
|
||||
logger.debug(f"被提及, 当前意愿: {current_willing}")
|
||||
|
||||
if is_emoji:
|
||||
current_willing *= 0.1
|
||||
print(f"表情包, 当前意愿: {current_willing}")
|
||||
|
||||
print(f"放大系数_interested_rate: {global_config.response_interested_rate_amplifier}")
|
||||
interested_rate *= global_config.response_interested_rate_amplifier #放大回复兴趣度
|
||||
if interested_rate > 0.4:
|
||||
# print(f"兴趣度: {interested_rate}, 当前意愿: {current_willing}")
|
||||
current_willing += interested_rate-0.4
|
||||
|
||||
current_willing *= global_config.response_willing_amplifier #放大回复意愿
|
||||
# print(f"放大系数_willing: {global_config.response_willing_amplifier}, 当前意愿: {current_willing}")
|
||||
|
||||
reply_probability = max((current_willing - 0.45) * 2, 0)
|
||||
|
||||
# 检查群组权限(如果是群聊)
|
||||
if chat_stream.group_info:
|
||||
if chat_stream.group_info.group_id not in config.talk_allowed_groups:
|
||||
current_willing = 0
|
||||
reply_probability = 0
|
||||
|
||||
if chat_stream.group_info.group_id in config.talk_frequency_down_groups:
|
||||
reply_probability = reply_probability / global_config.down_frequency_rate
|
||||
logger.debug(f"表情包, 当前意愿: {current_willing}")
|
||||
|
||||
# 兴趣放大系数,若兴趣 > 0.4则增加回复概率
|
||||
interested_rate_amplifier = global_config.response_interested_rate_amplifier
|
||||
logger.debug(f"放大系数_interested_rate: {interested_rate_amplifier}")
|
||||
interested_rate *= interested_rate_amplifier
|
||||
|
||||
current_willing += max(
|
||||
0.0,
|
||||
interested_rate - 0.4
|
||||
)
|
||||
|
||||
# 回复意愿系数调控,独立乘区
|
||||
willing_amplifier = max(
|
||||
global_config.response_willing_amplifier,
|
||||
self.min_reply_willing
|
||||
)
|
||||
current_willing *= willing_amplifier
|
||||
logger.debug(f"放大系数_willing: {global_config.response_willing_amplifier}, 当前意愿: {current_willing}")
|
||||
|
||||
# 回复概率迭代,保底0.01回复概率
|
||||
reply_probability = max(
|
||||
(current_willing - 0.45) * 2,
|
||||
self.min_reply_willing
|
||||
)
|
||||
|
||||
# 降低目标低频群组回复概率
|
||||
down_frequency_rate = max(
|
||||
1.0,
|
||||
global_config.down_frequency_rate
|
||||
)
|
||||
if group_id in config.talk_frequency_down_groups:
|
||||
reply_probability = reply_probability / down_frequency_rate
|
||||
|
||||
reply_probability = min(reply_probability, 1)
|
||||
if reply_probability < 0:
|
||||
reply_probability = 0
|
||||
|
||||
self.chat_reply_willing[chat_id] = min(current_willing, 3.0)
|
||||
self.chat_reply_willing[chat_id] = min(current_willing, 3.0)
|
||||
|
||||
self.group_reply_willing[group_id] = min(current_willing, 3.0)
|
||||
logger.debug(f"当前群组{group_id}回复概率:{reply_probability}")
|
||||
return reply_probability
|
||||
|
||||
def change_reply_willing_sent(self, chat_stream:ChatStream):
|
||||
@@ -116,5 +135,6 @@ class WillingManager:
|
||||
self._decay_task = asyncio.create_task(self._decay_reply_willing())
|
||||
self._started = True
|
||||
|
||||
|
||||
# 创建全局实例
|
||||
willing_manager = WillingManager()
|
||||
willing_manager = WillingManager()
|
||||
|
||||
@@ -19,7 +19,7 @@ from src.common.database import Database
|
||||
|
||||
# 从环境变量获取配置
|
||||
Database.initialize(
|
||||
host=os.getenv("MONGODB_HOST", "localhost"),
|
||||
host=os.getenv("MONGODB_HOST", "127.0.0.1"),
|
||||
port=int(os.getenv("MONGODB_PORT", "27017")),
|
||||
db_name=os.getenv("DATABASE_NAME", "maimai"),
|
||||
username=os.getenv("MONGODB_USERNAME"),
|
||||
@@ -79,7 +79,7 @@ class KnowledgeLibrary:
|
||||
content = f.read()
|
||||
|
||||
# 按1024字符分段
|
||||
segments = [content[i:i+600] for i in range(0, len(content), 600)]
|
||||
segments = [content[i:i+600] for i in range(0, len(content), 300)]
|
||||
|
||||
# 处理每个分段
|
||||
for segment in segments:
|
||||
|
||||
0
src/plugins/memory_system/__init__.py
Normal file
0
src/plugins/memory_system/__init__.py
Normal file
@@ -7,6 +7,7 @@ import jieba
|
||||
import matplotlib.pyplot as plt
|
||||
import networkx as nx
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
sys.path.append("C:/GitHub/MaiMBot") # 添加项目根目录到 Python 路径
|
||||
from src.common.database import Database # 使用正确的导入语法
|
||||
@@ -15,15 +16,15 @@ from src.common.database import Database # 使用正确的导入语法
|
||||
env_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), '.env.dev')
|
||||
load_dotenv(env_path)
|
||||
|
||||
|
||||
|
||||
class Memory_graph:
|
||||
def __init__(self):
|
||||
self.G = nx.Graph() # 使用 networkx 的图结构
|
||||
self.db = Database.get_instance()
|
||||
|
||||
|
||||
def connect_dot(self, concept1, concept2):
|
||||
self.G.add_edge(concept1, concept2)
|
||||
|
||||
|
||||
def add_dot(self, concept, memory):
|
||||
if concept in self.G:
|
||||
# 如果节点已存在,将新记忆添加到现有列表中
|
||||
@@ -37,7 +38,7 @@ class Memory_graph:
|
||||
else:
|
||||
# 如果是新节点,创建新的记忆列表
|
||||
self.G.add_node(concept, memory_items=[memory])
|
||||
|
||||
|
||||
def get_dot(self, concept):
|
||||
# 检查节点是否存在于图中
|
||||
if concept in self.G:
|
||||
@@ -45,20 +46,20 @@ class Memory_graph:
|
||||
node_data = self.G.nodes[concept]
|
||||
# print(node_data)
|
||||
# 创建新的Memory_dot对象
|
||||
return concept,node_data
|
||||
return concept, node_data
|
||||
return None
|
||||
|
||||
def get_related_item(self, topic, depth=1):
|
||||
if topic not in self.G:
|
||||
return [], []
|
||||
|
||||
|
||||
first_layer_items = []
|
||||
second_layer_items = []
|
||||
|
||||
|
||||
# 获取相邻节点
|
||||
neighbors = list(self.G.neighbors(topic))
|
||||
# print(f"第一层: {topic}")
|
||||
|
||||
|
||||
# 获取当前节点的记忆项
|
||||
node_data = self.get_dot(topic)
|
||||
if node_data:
|
||||
@@ -69,7 +70,7 @@ class Memory_graph:
|
||||
first_layer_items.extend(memory_items)
|
||||
else:
|
||||
first_layer_items.append(memory_items)
|
||||
|
||||
|
||||
# 只在depth=2时获取第二层记忆
|
||||
if depth >= 2:
|
||||
# 获取相邻节点的记忆项
|
||||
@@ -84,42 +85,44 @@ class Memory_graph:
|
||||
second_layer_items.extend(memory_items)
|
||||
else:
|
||||
second_layer_items.append(memory_items)
|
||||
|
||||
|
||||
return first_layer_items, second_layer_items
|
||||
|
||||
|
||||
def store_memory(self):
|
||||
for node in self.G.nodes():
|
||||
dot_data = {
|
||||
"concept": node
|
||||
}
|
||||
self.db.db.store_memory_dots.insert_one(dot_data)
|
||||
|
||||
|
||||
@property
|
||||
def dots(self):
|
||||
# 返回所有节点对应的 Memory_dot 对象
|
||||
return [self.get_dot(node) for node in self.G.nodes()]
|
||||
|
||||
|
||||
|
||||
def get_random_chat_from_db(self, length: int, timestamp: str):
|
||||
# 从数据库中根据时间戳获取离其最近的聊天记录
|
||||
chat_text = ''
|
||||
closest_record = self.db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)]) # 调试输出
|
||||
print(f"距离time最近的消息时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(closest_record['time'])))}")
|
||||
|
||||
logger.info(
|
||||
f"距离time最近的消息时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(closest_record['time'])))}")
|
||||
|
||||
if closest_record:
|
||||
closest_time = closest_record['time']
|
||||
group_id = closest_record['group_id'] # 获取groupid
|
||||
# 获取该时间戳之后的length条消息,且groupid相同
|
||||
chat_record = list(self.db.db.messages.find({"time": {"$gt": closest_time}, "group_id": group_id}).sort('time', 1).limit(length))
|
||||
chat_record = list(
|
||||
self.db.db.messages.find({"time": {"$gt": closest_time}, "group_id": group_id}).sort('time', 1).limit(
|
||||
length))
|
||||
for record in chat_record:
|
||||
time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(record['time'])))
|
||||
try:
|
||||
displayname="[(%s)%s]%s" % (record["user_id"],record["user_nickname"],record["user_cardname"])
|
||||
displayname = "[(%s)%s]%s" % (record["user_id"], record["user_nickname"], record["user_cardname"])
|
||||
except:
|
||||
displayname=record["user_nickname"] or "用户" + str(record["user_id"])
|
||||
displayname = record["user_nickname"] or "用户" + str(record["user_id"])
|
||||
chat_text += f'[{time_str}] {displayname}: {record["processed_plain_text"]}\n' # 添加发送者和时间信息
|
||||
return chat_text
|
||||
|
||||
|
||||
return [] # 如果没有找到记录,返回空列表
|
||||
|
||||
def save_graph_to_db(self):
|
||||
@@ -166,138 +169,78 @@ def main():
|
||||
password=os.getenv("MONGODB_PASSWORD", ""),
|
||||
auth_source=os.getenv("MONGODB_AUTH_SOURCE", "")
|
||||
)
|
||||
|
||||
|
||||
memory_graph = Memory_graph()
|
||||
memory_graph.load_graph_from_db()
|
||||
|
||||
|
||||
# 只显示一次优化后的图形
|
||||
visualize_graph_lite(memory_graph)
|
||||
|
||||
|
||||
while True:
|
||||
query = input("请输入新的查询概念(输入'退出'以结束):")
|
||||
if query.lower() == '退出':
|
||||
break
|
||||
first_layer_items, second_layer_items = memory_graph.get_related_item(query)
|
||||
if first_layer_items or second_layer_items:
|
||||
print("\n第一层记忆:")
|
||||
logger.debug("第一层记忆:")
|
||||
for item in first_layer_items:
|
||||
print(item)
|
||||
print("\n第二层记忆:")
|
||||
logger.debug(item)
|
||||
logger.debug("第二层记忆:")
|
||||
for item in second_layer_items:
|
||||
print(item)
|
||||
logger.debug(item)
|
||||
else:
|
||||
print("未找到相关记忆。")
|
||||
|
||||
logger.debug("未找到相关记忆。")
|
||||
|
||||
|
||||
def segment_text(text):
|
||||
seg_text = list(jieba.cut(text))
|
||||
return seg_text
|
||||
return seg_text
|
||||
|
||||
|
||||
def find_topic(text, topic_num):
|
||||
prompt = f'这是一段文字:{text}。请你从这段话中总结出{topic_num}个话题,帮我列出来,用逗号隔开,尽可能精简。只需要列举{topic_num}个话题就好,不要告诉我其他内容。'
|
||||
return prompt
|
||||
|
||||
|
||||
def topic_what(text, topic):
|
||||
prompt = f'这是一段文字:{text}。我想知道这记忆里有什么关于{topic}的话题,帮我总结成一句自然的话,可以包含时间和人物。只输出这句话就好'
|
||||
return prompt
|
||||
|
||||
def visualize_graph(memory_graph: Memory_graph, color_by_memory: bool = False):
|
||||
# 设置中文字体
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
|
||||
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
|
||||
|
||||
G = memory_graph.G
|
||||
|
||||
# 保存图到本地
|
||||
nx.write_gml(G, "memory_graph.gml") # 保存为 GML 格式
|
||||
|
||||
# 根据连接条数或记忆数量设置节点颜色
|
||||
node_colors = []
|
||||
nodes = list(G.nodes()) # 获取图中实际的节点列表
|
||||
|
||||
if color_by_memory:
|
||||
# 计算每个节点的记忆数量
|
||||
memory_counts = []
|
||||
for node in nodes:
|
||||
memory_items = G.nodes[node].get('memory_items', [])
|
||||
if isinstance(memory_items, list):
|
||||
count = len(memory_items)
|
||||
else:
|
||||
count = 1 if memory_items else 0
|
||||
memory_counts.append(count)
|
||||
max_memories = max(memory_counts) if memory_counts else 1
|
||||
|
||||
for count in memory_counts:
|
||||
# 使用不同的颜色方案:红色表示记忆多,蓝色表示记忆少
|
||||
if max_memories > 0:
|
||||
intensity = min(1.0, count / max_memories)
|
||||
color = (intensity, 0, 1.0 - intensity) # 从蓝色渐变到红色
|
||||
else:
|
||||
color = (0, 0, 1) # 如果没有记忆,则为蓝色
|
||||
node_colors.append(color)
|
||||
else:
|
||||
# 使用原来的连接数量着色方案
|
||||
max_degree = max(G.degree(), key=lambda x: x[1])[1] if G.degree() else 1
|
||||
for node in nodes:
|
||||
degree = G.degree(node)
|
||||
if max_degree > 0:
|
||||
red = min(1.0, degree / max_degree)
|
||||
blue = 1.0 - red
|
||||
color = (red, 0, blue)
|
||||
else:
|
||||
color = (0, 0, 1)
|
||||
node_colors.append(color)
|
||||
|
||||
# 绘制图形
|
||||
plt.figure(figsize=(12, 8))
|
||||
pos = nx.spring_layout(G, k=1, iterations=50)
|
||||
nx.draw(G, pos,
|
||||
with_labels=True,
|
||||
node_color=node_colors,
|
||||
node_size=200,
|
||||
font_size=10,
|
||||
font_family='SimHei',
|
||||
font_weight='bold')
|
||||
|
||||
title = '记忆图谱可视化 - ' + ('按记忆数量着色' if color_by_memory else '按连接数量着色')
|
||||
plt.title(title, fontsize=16, fontfamily='SimHei')
|
||||
plt.show()
|
||||
|
||||
|
||||
def visualize_graph_lite(memory_graph: Memory_graph, color_by_memory: bool = False):
|
||||
# 设置中文字体
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
|
||||
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
|
||||
|
||||
|
||||
G = memory_graph.G
|
||||
|
||||
|
||||
# 创建一个新图用于可视化
|
||||
H = G.copy()
|
||||
|
||||
|
||||
# 移除只有一条记忆的节点和连接数少于3的节点
|
||||
nodes_to_remove = []
|
||||
for node in H.nodes():
|
||||
memory_items = H.nodes[node].get('memory_items', [])
|
||||
memory_count = len(memory_items) if isinstance(memory_items, list) else (1 if memory_items else 0)
|
||||
degree = H.degree(node)
|
||||
if memory_count < 5 or degree < 2: # 改为小于2而不是小于等于2
|
||||
if memory_count < 3 or degree < 2: # 改为小于2而不是小于等于2
|
||||
nodes_to_remove.append(node)
|
||||
|
||||
|
||||
H.remove_nodes_from(nodes_to_remove)
|
||||
|
||||
|
||||
# 如果过滤后没有节点,则返回
|
||||
if len(H.nodes()) == 0:
|
||||
print("过滤后没有符合条件的节点可显示")
|
||||
logger.debug("过滤后没有符合条件的节点可显示")
|
||||
return
|
||||
|
||||
|
||||
# 保存图到本地
|
||||
nx.write_gml(H, "memory_graph.gml") # 保存为 GML 格式
|
||||
# nx.write_gml(H, "memory_graph.gml") # 保存为 GML 格式
|
||||
|
||||
# 计算节点大小和颜色
|
||||
node_colors = []
|
||||
node_sizes = []
|
||||
nodes = list(H.nodes())
|
||||
|
||||
|
||||
# 获取最大记忆数和最大度数用于归一化
|
||||
max_memories = 1
|
||||
max_degree = 1
|
||||
@@ -307,7 +250,7 @@ def visualize_graph_lite(memory_graph: Memory_graph, color_by_memory: bool = Fal
|
||||
degree = H.degree(node)
|
||||
max_memories = max(max_memories, memory_count)
|
||||
max_degree = max(max_degree, degree)
|
||||
|
||||
|
||||
# 计算每个节点的大小和颜色
|
||||
for node in nodes:
|
||||
# 计算节点大小(基于记忆数量)
|
||||
@@ -315,37 +258,38 @@ def visualize_graph_lite(memory_graph: Memory_graph, color_by_memory: bool = Fal
|
||||
memory_count = len(memory_items) if isinstance(memory_items, list) else (1 if memory_items else 0)
|
||||
# 使用指数函数使变化更明显
|
||||
ratio = memory_count / max_memories
|
||||
size = 500 + 5000 * (ratio ** 2) # 使用平方函数使差异更明显
|
||||
size = 500 + 5000 * (ratio) # 使用1.5次方函数使差异不那么明显
|
||||
node_sizes.append(size)
|
||||
|
||||
|
||||
# 计算节点颜色(基于连接数)
|
||||
degree = H.degree(node)
|
||||
# 红色分量随着度数增加而增加
|
||||
red = min(1.0, degree / max_degree)
|
||||
r = (degree / max_degree) ** 0.3
|
||||
red = min(1.0, r)
|
||||
# 蓝色分量随着度数减少而增加
|
||||
blue = 1.0 - red
|
||||
color = (red, 0, blue)
|
||||
blue = max(0.0, 1 - red)
|
||||
# blue = 1
|
||||
color = (red, 0.1, blue)
|
||||
node_colors.append(color)
|
||||
|
||||
|
||||
# 绘制图形
|
||||
plt.figure(figsize=(12, 8))
|
||||
pos = nx.spring_layout(H, k=1.5, iterations=50) # 增加k值使节点分布更开
|
||||
nx.draw(H, pos,
|
||||
with_labels=True,
|
||||
node_color=node_colors,
|
||||
node_size=node_sizes,
|
||||
font_size=10,
|
||||
font_family='SimHei',
|
||||
font_weight='bold',
|
||||
edge_color='gray',
|
||||
width=0.5,
|
||||
alpha=0.7)
|
||||
|
||||
pos = nx.spring_layout(H, k=1, iterations=50) # 增加k值使节点分布更开
|
||||
nx.draw(H, pos,
|
||||
with_labels=True,
|
||||
node_color=node_colors,
|
||||
node_size=node_sizes,
|
||||
font_size=10,
|
||||
font_family='SimHei',
|
||||
font_weight='bold',
|
||||
edge_color='gray',
|
||||
width=0.5,
|
||||
alpha=0.9)
|
||||
|
||||
title = '记忆图谱可视化 - 节点大小表示记忆数量,颜色表示连接数'
|
||||
plt.title(title, fontsize=16, fontfamily='SimHei')
|
||||
plt.show()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
|
||||
@@ -7,6 +7,7 @@ import time
|
||||
import jieba
|
||||
import networkx as nx
|
||||
|
||||
from loguru import logger
|
||||
from ...common.database import Database # 使用正确的导入语法
|
||||
from ..chat.config import global_config
|
||||
from ..chat.utils import (
|
||||
@@ -22,29 +23,49 @@ class Memory_graph:
|
||||
def __init__(self):
|
||||
self.G = nx.Graph() # 使用 networkx 的图结构
|
||||
self.db = Database.get_instance()
|
||||
|
||||
|
||||
def connect_dot(self, concept1, concept2):
|
||||
# 如果边已存在,增加 strength
|
||||
# 避免自连接
|
||||
if concept1 == concept2:
|
||||
return
|
||||
|
||||
current_time = datetime.datetime.now().timestamp()
|
||||
|
||||
# 如果边已存在,增加 strength
|
||||
if self.G.has_edge(concept1, concept2):
|
||||
self.G[concept1][concept2]['strength'] = self.G[concept1][concept2].get('strength', 1) + 1
|
||||
# 更新最后修改时间
|
||||
self.G[concept1][concept2]['last_modified'] = current_time
|
||||
else:
|
||||
# 如果是新边,初始化 strength 为 1
|
||||
self.G.add_edge(concept1, concept2, strength=1)
|
||||
|
||||
# 如果是新边,初始化 strength 为 1
|
||||
self.G.add_edge(concept1, concept2,
|
||||
strength=1,
|
||||
created_time=current_time, # 添加创建时间
|
||||
last_modified=current_time) # 添加最后修改时间
|
||||
|
||||
def add_dot(self, concept, memory):
|
||||
current_time = datetime.datetime.now().timestamp()
|
||||
|
||||
if concept in self.G:
|
||||
# 如果节点已存在,将新记忆添加到现有列表中
|
||||
if 'memory_items' in self.G.nodes[concept]:
|
||||
if not isinstance(self.G.nodes[concept]['memory_items'], list):
|
||||
# 如果当前不是列表,将其转换为列表
|
||||
self.G.nodes[concept]['memory_items'] = [self.G.nodes[concept]['memory_items']]
|
||||
self.G.nodes[concept]['memory_items'].append(memory)
|
||||
# 更新最后修改时间
|
||||
self.G.nodes[concept]['last_modified'] = current_time
|
||||
else:
|
||||
self.G.nodes[concept]['memory_items'] = [memory]
|
||||
# 如果节点存在但没有memory_items,说明是第一次添加memory,设置created_time
|
||||
if 'created_time' not in self.G.nodes[concept]:
|
||||
self.G.nodes[concept]['created_time'] = current_time
|
||||
self.G.nodes[concept]['last_modified'] = current_time
|
||||
else:
|
||||
# 如果是新节点,创建新的记忆列表
|
||||
self.G.add_node(concept, memory_items=[memory])
|
||||
|
||||
# 如果是新节点,创建新的记忆列表
|
||||
self.G.add_node(concept,
|
||||
memory_items=[memory],
|
||||
created_time=current_time, # 添加创建时间
|
||||
last_modified=current_time) # 添加最后修改时间
|
||||
|
||||
def get_dot(self, concept):
|
||||
# 检查节点是否存在于图中
|
||||
if concept in self.G:
|
||||
@@ -56,13 +77,13 @@ class Memory_graph:
|
||||
def get_related_item(self, topic, depth=1):
|
||||
if topic not in self.G:
|
||||
return [], []
|
||||
|
||||
|
||||
first_layer_items = []
|
||||
second_layer_items = []
|
||||
|
||||
|
||||
# 获取相邻节点
|
||||
neighbors = list(self.G.neighbors(topic))
|
||||
|
||||
|
||||
# 获取当前节点的记忆项
|
||||
node_data = self.get_dot(topic)
|
||||
if node_data:
|
||||
@@ -73,7 +94,7 @@ class Memory_graph:
|
||||
first_layer_items.extend(memory_items)
|
||||
else:
|
||||
first_layer_items.append(memory_items)
|
||||
|
||||
|
||||
# 只在depth=2时获取第二层记忆
|
||||
if depth >= 2:
|
||||
# 获取相邻节点的记忆项
|
||||
@@ -87,9 +108,9 @@ class Memory_graph:
|
||||
second_layer_items.extend(memory_items)
|
||||
else:
|
||||
second_layer_items.append(memory_items)
|
||||
|
||||
|
||||
return first_layer_items, second_layer_items
|
||||
|
||||
|
||||
@property
|
||||
def dots(self):
|
||||
# 返回所有节点对应的 Memory_dot 对象
|
||||
@@ -99,43 +120,43 @@ class Memory_graph:
|
||||
"""随机删除指定话题中的一条记忆,如果话题没有记忆则移除该话题节点"""
|
||||
if topic not in self.G:
|
||||
return None
|
||||
|
||||
|
||||
# 获取话题节点数据
|
||||
node_data = self.G.nodes[topic]
|
||||
|
||||
|
||||
# 如果节点存在memory_items
|
||||
if 'memory_items' in node_data:
|
||||
memory_items = node_data['memory_items']
|
||||
|
||||
|
||||
# 确保memory_items是列表
|
||||
if not isinstance(memory_items, list):
|
||||
memory_items = [memory_items] if memory_items else []
|
||||
|
||||
|
||||
# 如果有记忆项可以删除
|
||||
if memory_items:
|
||||
# 随机选择一个记忆项删除
|
||||
removed_item = random.choice(memory_items)
|
||||
memory_items.remove(removed_item)
|
||||
|
||||
|
||||
# 更新节点的记忆项
|
||||
if memory_items:
|
||||
self.G.nodes[topic]['memory_items'] = memory_items
|
||||
else:
|
||||
# 如果没有记忆项了,删除整个节点
|
||||
self.G.remove_node(topic)
|
||||
|
||||
|
||||
return removed_item
|
||||
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# 海马体
|
||||
class Hippocampus:
|
||||
def __init__(self,memory_graph:Memory_graph):
|
||||
def __init__(self, memory_graph: Memory_graph):
|
||||
self.memory_graph = memory_graph
|
||||
self.llm_topic_judge = LLM_request(model = global_config.llm_topic_judge,temperature=0.5)
|
||||
self.llm_summary_by_topic = LLM_request(model = global_config.llm_summary_by_topic,temperature=0.5)
|
||||
|
||||
self.llm_topic_judge = LLM_request(model=global_config.llm_topic_judge, temperature=0.5)
|
||||
self.llm_summary_by_topic = LLM_request(model=global_config.llm_summary_by_topic, temperature=0.5)
|
||||
|
||||
def get_all_node_names(self) -> list:
|
||||
"""获取记忆图中所有节点的名字列表
|
||||
|
||||
@@ -156,98 +177,167 @@ class Hippocampus:
|
||||
"""计算边的特征值"""
|
||||
nodes = sorted([source, target])
|
||||
return hash(f"{nodes[0]}:{nodes[1]}")
|
||||
|
||||
def get_memory_sample(self, chat_size=20, time_frequency: dict = {'near': 2, 'mid': 4, 'far': 3}):
|
||||
"""获取记忆样本
|
||||
|
||||
def get_memory_sample(self,chat_size=20,time_frequency:dict={'near':2,'mid':4,'far':3}):
|
||||
Returns:
|
||||
list: 消息记录列表,每个元素是一个消息记录字典列表
|
||||
"""
|
||||
current_timestamp = datetime.datetime.now().timestamp()
|
||||
chat_text = []
|
||||
#短期:1h 中期:4h 长期:24h
|
||||
for _ in range(time_frequency.get('near')): # 循环10次
|
||||
random_time = current_timestamp - random.randint(1, 3600) # 随机时间
|
||||
chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
|
||||
chat_text.append(chat_)
|
||||
for _ in range(time_frequency.get('mid')): # 循环10次
|
||||
random_time = current_timestamp - random.randint(3600, 3600*4) # 随机时间
|
||||
chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
|
||||
chat_text.append(chat_)
|
||||
for _ in range(time_frequency.get('far')): # 循环10次
|
||||
random_time = current_timestamp - random.randint(3600*4, 3600*24) # 随机时间
|
||||
chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
|
||||
chat_text.append(chat_)
|
||||
return [text for text in chat_text if text]
|
||||
|
||||
async def memory_compress(self, input_text, compress_rate=0.1):
|
||||
print(input_text)
|
||||
chat_samples = []
|
||||
|
||||
# 短期:1h 中期:4h 长期:24h
|
||||
for _ in range(time_frequency.get('near')):
|
||||
random_time = current_timestamp - random.randint(1, 3600)
|
||||
messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
|
||||
if messages:
|
||||
chat_samples.append(messages)
|
||||
|
||||
for _ in range(time_frequency.get('mid')):
|
||||
random_time = current_timestamp - random.randint(3600, 3600 * 4)
|
||||
messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
|
||||
if messages:
|
||||
chat_samples.append(messages)
|
||||
|
||||
for _ in range(time_frequency.get('far')):
|
||||
random_time = current_timestamp - random.randint(3600 * 4, 3600 * 24)
|
||||
messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
|
||||
if messages:
|
||||
chat_samples.append(messages)
|
||||
|
||||
return chat_samples
|
||||
|
||||
async def memory_compress(self, messages: list, compress_rate=0.1):
|
||||
"""压缩消息记录为记忆
|
||||
|
||||
#获取topics
|
||||
Returns:
|
||||
tuple: (压缩记忆集合, 相似主题字典)
|
||||
"""
|
||||
if not messages:
|
||||
return set(), {}
|
||||
|
||||
# 合并消息文本,同时保留时间信息
|
||||
input_text = ""
|
||||
time_info = ""
|
||||
# 计算最早和最晚时间
|
||||
earliest_time = min(msg['time'] for msg in messages)
|
||||
latest_time = max(msg['time'] for msg in messages)
|
||||
|
||||
earliest_dt = datetime.datetime.fromtimestamp(earliest_time)
|
||||
latest_dt = datetime.datetime.fromtimestamp(latest_time)
|
||||
|
||||
# 如果是同一年
|
||||
if earliest_dt.year == latest_dt.year:
|
||||
earliest_str = earliest_dt.strftime("%m-%d %H:%M:%S")
|
||||
latest_str = latest_dt.strftime("%m-%d %H:%M:%S")
|
||||
time_info += f"是在{earliest_dt.year}年,{earliest_str} 到 {latest_str} 的对话:\n"
|
||||
else:
|
||||
earliest_str = earliest_dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
latest_str = latest_dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
time_info += f"是从 {earliest_str} 到 {latest_str} 的对话:\n"
|
||||
|
||||
for msg in messages:
|
||||
input_text += f"{msg['text']}\n"
|
||||
|
||||
logger.debug(input_text)
|
||||
|
||||
topic_num = self.calculate_topic_num(input_text, compress_rate)
|
||||
topics_response = await self.llm_topic_judge.generate_response(self.find_topic_llm(input_text, topic_num))
|
||||
# 修改话题处理逻辑
|
||||
# 定义需要过滤的关键词
|
||||
filter_keywords = ['表情包', '图片', '回复', '聊天记录']
|
||||
|
||||
|
||||
# 过滤topics
|
||||
topics = [topic.strip() for topic in topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()]
|
||||
filter_keywords = global_config.memory_ban_words
|
||||
topics = [topic.strip() for topic in
|
||||
topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()]
|
||||
filtered_topics = [topic for topic in topics if not any(keyword in topic for keyword in filter_keywords)]
|
||||
|
||||
# print(f"原始话题: {topics}")
|
||||
print(f"过滤后话题: {filtered_topics}")
|
||||
|
||||
# 使用过滤后的话题继续处理
|
||||
|
||||
logger.info(f"过滤后话题: {filtered_topics}")
|
||||
|
||||
# 创建所有话题的请求任务
|
||||
tasks = []
|
||||
for topic in filtered_topics:
|
||||
topic_what_prompt = self.topic_what(input_text, topic)
|
||||
# 创建异步任务
|
||||
topic_what_prompt = self.topic_what(input_text, topic, time_info)
|
||||
task = self.llm_summary_by_topic.generate_response_async(topic_what_prompt)
|
||||
tasks.append((topic.strip(), task))
|
||||
|
||||
|
||||
# 等待所有任务完成
|
||||
compressed_memory = set()
|
||||
similar_topics_dict = {} # 存储每个话题的相似主题列表
|
||||
for topic, task in tasks:
|
||||
response = await task
|
||||
if response:
|
||||
compressed_memory.add((topic, response[0]))
|
||||
# 为每个话题查找相似的已存在主题
|
||||
existing_topics = list(self.memory_graph.G.nodes())
|
||||
similar_topics = []
|
||||
|
||||
return compressed_memory
|
||||
for existing_topic in existing_topics:
|
||||
topic_words = set(jieba.cut(topic))
|
||||
existing_words = set(jieba.cut(existing_topic))
|
||||
|
||||
all_words = topic_words | existing_words
|
||||
v1 = [1 if word in topic_words else 0 for word in all_words]
|
||||
v2 = [1 if word in existing_words else 0 for word in all_words]
|
||||
|
||||
similarity = cosine_similarity(v1, v2)
|
||||
|
||||
if similarity >= 0.6:
|
||||
similar_topics.append((existing_topic, similarity))
|
||||
|
||||
similar_topics.sort(key=lambda x: x[1], reverse=True)
|
||||
similar_topics = similar_topics[:5]
|
||||
similar_topics_dict[topic] = similar_topics
|
||||
|
||||
def calculate_topic_num(self,text, compress_rate):
|
||||
return compressed_memory, similar_topics_dict
|
||||
|
||||
def calculate_topic_num(self, text, compress_rate):
|
||||
"""计算文本的话题数量"""
|
||||
information_content = calculate_information_content(text)
|
||||
topic_by_length = text.count('\n')*compress_rate
|
||||
topic_by_information_content = max(1, min(5, int((information_content-3) * 2)))
|
||||
topic_num = int((topic_by_length + topic_by_information_content)/2)
|
||||
print(f"topic_by_length: {topic_by_length}, topic_by_information_content: {topic_by_information_content}, topic_num: {topic_num}")
|
||||
topic_by_length = text.count('\n') * compress_rate
|
||||
topic_by_information_content = max(1, min(5, int((information_content - 3) * 2)))
|
||||
topic_num = int((topic_by_length + topic_by_information_content) / 2)
|
||||
logger.debug(
|
||||
f"topic_by_length: {topic_by_length}, topic_by_information_content: {topic_by_information_content}, "
|
||||
f"topic_num: {topic_num}")
|
||||
return topic_num
|
||||
|
||||
async def operation_build_memory(self,chat_size=20):
|
||||
# 最近消息获取频率
|
||||
time_frequency = {'near':2,'mid':4,'far':2}
|
||||
memory_sample = self.get_memory_sample(chat_size,time_frequency)
|
||||
async def operation_build_memory(self, chat_size=20):
|
||||
time_frequency = {'near': 3, 'mid': 8, 'far': 5}
|
||||
memory_samples = self.get_memory_sample(chat_size, time_frequency)
|
||||
|
||||
for i, input_text in enumerate(memory_sample, 1):
|
||||
# 加载进度可视化
|
||||
for i, messages in enumerate(memory_samples, 1):
|
||||
all_topics = []
|
||||
progress = (i / len(memory_sample)) * 100
|
||||
# 加载进度可视化
|
||||
progress = (i / len(memory_samples)) * 100
|
||||
bar_length = 30
|
||||
filled_length = int(bar_length * i // len(memory_sample))
|
||||
filled_length = int(bar_length * i // len(memory_samples))
|
||||
bar = '█' * filled_length + '-' * (bar_length - filled_length)
|
||||
print(f"\n进度: [{bar}] {progress:.1f}% ({i}/{len(memory_sample)})")
|
||||
logger.debug(f"进度: [{bar}] {progress:.1f}% ({i}/{len(memory_samples)})")
|
||||
|
||||
# 生成压缩后记忆 ,表现为 (话题,记忆) 的元组
|
||||
compressed_memory = set()
|
||||
compress_rate = 0.1
|
||||
compressed_memory = await self.memory_compress(input_text, compress_rate)
|
||||
print(f"\033[1;33m压缩后记忆数量\033[0m: {len(compressed_memory)}")
|
||||
compressed_memory, similar_topics_dict = await self.memory_compress(messages, compress_rate)
|
||||
logger.info(f"压缩后记忆数量: {len(compressed_memory)},似曾相识的话题: {len(similar_topics_dict)}")
|
||||
|
||||
# 将记忆加入到图谱中
|
||||
for topic, memory in compressed_memory:
|
||||
print(f"\033[1;32m添加节点\033[0m: {topic}")
|
||||
logger.info(f"添加节点: {topic}")
|
||||
self.memory_graph.add_dot(topic, memory)
|
||||
all_topics.append(topic) # 收集所有话题
|
||||
all_topics.append(topic)
|
||||
|
||||
# 连接相似的已存在主题
|
||||
if topic in similar_topics_dict:
|
||||
similar_topics = similar_topics_dict[topic]
|
||||
for similar_topic, similarity in similar_topics:
|
||||
if topic != similar_topic:
|
||||
strength = int(similarity * 10)
|
||||
logger.info(f"连接相似节点: {topic} 和 {similar_topic} (强度: {strength})")
|
||||
self.memory_graph.G.add_edge(topic, similar_topic, strength=strength)
|
||||
|
||||
# 连接同批次的相关话题
|
||||
for i in range(len(all_topics)):
|
||||
for j in range(i + 1, len(all_topics)):
|
||||
print(f"\033[1;32m连接节点\033[0m: {all_topics[i]} 和 {all_topics[j]}")
|
||||
logger.info(f"连接同批次节点: {all_topics[i]} 和 {all_topics[j]}")
|
||||
self.memory_graph.connect_dot(all_topics[i], all_topics[j])
|
||||
|
||||
|
||||
self.sync_memory_to_db()
|
||||
|
||||
def sync_memory_to_db(self):
|
||||
@@ -255,52 +345,54 @@ class Hippocampus:
|
||||
# 获取数据库中所有节点和内存中所有节点
|
||||
db_nodes = list(self.memory_graph.db.db.graph_data.nodes.find())
|
||||
memory_nodes = list(self.memory_graph.G.nodes(data=True))
|
||||
|
||||
# 转换数据库节点为字典格式,方便查找
|
||||
|
||||
# 转换数据库节点为字典格式,方便查找
|
||||
db_nodes_dict = {node['concept']: node for node in db_nodes}
|
||||
|
||||
|
||||
# 检查并更新节点
|
||||
for concept, data in memory_nodes:
|
||||
memory_items = data.get('memory_items', [])
|
||||
if not isinstance(memory_items, list):
|
||||
memory_items = [memory_items] if memory_items else []
|
||||
|
||||
|
||||
# 计算内存中节点的特征值
|
||||
memory_hash = self.calculate_node_hash(concept, memory_items)
|
||||
|
||||
|
||||
# 获取时间信息
|
||||
created_time = data.get('created_time', datetime.datetime.now().timestamp())
|
||||
last_modified = data.get('last_modified', datetime.datetime.now().timestamp())
|
||||
|
||||
if concept not in db_nodes_dict:
|
||||
# 数据库中缺少的节点,添加
|
||||
# 数据库中缺少的节点,添加
|
||||
node_data = {
|
||||
'concept': concept,
|
||||
'memory_items': memory_items,
|
||||
'hash': memory_hash
|
||||
'hash': memory_hash,
|
||||
'created_time': created_time,
|
||||
'last_modified': last_modified
|
||||
}
|
||||
self.memory_graph.db.db.graph_data.nodes.insert_one(node_data)
|
||||
else:
|
||||
# 获取数据库中节点的特征值
|
||||
db_node = db_nodes_dict[concept]
|
||||
db_hash = db_node.get('hash', None)
|
||||
|
||||
# 如果特征值不同,则更新节点
|
||||
|
||||
# 如果特征值不同,则更新节点
|
||||
if db_hash != memory_hash:
|
||||
self.memory_graph.db.db.graph_data.nodes.update_one(
|
||||
{'concept': concept},
|
||||
{'$set': {
|
||||
'memory_items': memory_items,
|
||||
'hash': memory_hash
|
||||
'hash': memory_hash,
|
||||
'created_time': created_time,
|
||||
'last_modified': last_modified
|
||||
}}
|
||||
)
|
||||
|
||||
# 检查并删除数据库中多余的节点
|
||||
memory_concepts = set(node[0] for node in memory_nodes)
|
||||
for db_node in db_nodes:
|
||||
if db_node['concept'] not in memory_concepts:
|
||||
self.memory_graph.db.db.graph_data.nodes.delete_one({'concept': db_node['concept']})
|
||||
|
||||
|
||||
# 处理边的信息
|
||||
db_edges = list(self.memory_graph.db.db.graph_data.edges.find())
|
||||
memory_edges = list(self.memory_graph.G.edges())
|
||||
|
||||
memory_edges = list(self.memory_graph.G.edges(data=True))
|
||||
|
||||
# 创建边的哈希值字典
|
||||
db_edge_dict = {}
|
||||
for edge in db_edges:
|
||||
@@ -309,20 +401,26 @@ class Hippocampus:
|
||||
'hash': edge_hash,
|
||||
'strength': edge.get('strength', 1)
|
||||
}
|
||||
|
||||
|
||||
# 检查并更新边
|
||||
for source, target in memory_edges:
|
||||
for source, target, data in memory_edges:
|
||||
edge_hash = self.calculate_edge_hash(source, target)
|
||||
edge_key = (source, target)
|
||||
strength = self.memory_graph.G[source][target].get('strength', 1)
|
||||
strength = data.get('strength', 1)
|
||||
|
||||
# 获取边的时间信息
|
||||
created_time = data.get('created_time', datetime.datetime.now().timestamp())
|
||||
last_modified = data.get('last_modified', datetime.datetime.now().timestamp())
|
||||
|
||||
if edge_key not in db_edge_dict:
|
||||
# 添加新边
|
||||
edge_data = {
|
||||
'source': source,
|
||||
'target': target,
|
||||
'strength': strength,
|
||||
'hash': edge_hash
|
||||
'hash': edge_hash,
|
||||
'created_time': created_time,
|
||||
'last_modified': last_modified
|
||||
}
|
||||
self.memory_graph.db.db.graph_data.edges.insert_one(edge_data)
|
||||
else:
|
||||
@@ -332,25 +430,17 @@ class Hippocampus:
|
||||
{'source': source, 'target': target},
|
||||
{'$set': {
|
||||
'hash': edge_hash,
|
||||
'strength': strength
|
||||
'strength': strength,
|
||||
'created_time': created_time,
|
||||
'last_modified': last_modified
|
||||
}}
|
||||
)
|
||||
|
||||
# 删除多余的边
|
||||
memory_edge_set = set(memory_edges)
|
||||
for edge_key in db_edge_dict:
|
||||
if edge_key not in memory_edge_set:
|
||||
source, target = edge_key
|
||||
self.memory_graph.db.db.graph_data.edges.delete_one({
|
||||
'source': source,
|
||||
'target': target
|
||||
})
|
||||
|
||||
def sync_memory_from_db(self):
|
||||
"""从数据库同步数据到内存中的图结构"""
|
||||
# 清空当前图
|
||||
self.memory_graph.G.clear()
|
||||
|
||||
|
||||
# 从数据库加载所有节点
|
||||
nodes = self.memory_graph.db.db.graph_data.nodes.find()
|
||||
for node in nodes:
|
||||
@@ -359,61 +449,107 @@ class Hippocampus:
|
||||
# 确保memory_items是列表
|
||||
if not isinstance(memory_items, list):
|
||||
memory_items = [memory_items] if memory_items else []
|
||||
# 添加节点到图中
|
||||
self.memory_graph.G.add_node(concept, memory_items=memory_items)
|
||||
|
||||
# 获取时间信息
|
||||
created_time = node.get('created_time', datetime.datetime.now().timestamp())
|
||||
last_modified = node.get('last_modified', datetime.datetime.now().timestamp())
|
||||
|
||||
# 添加节点到图中
|
||||
self.memory_graph.G.add_node(concept,
|
||||
memory_items=memory_items,
|
||||
created_time=created_time,
|
||||
last_modified=last_modified)
|
||||
|
||||
# 从数据库加载所有边
|
||||
edges = self.memory_graph.db.db.graph_data.edges.find()
|
||||
for edge in edges:
|
||||
source = edge['source']
|
||||
target = edge['target']
|
||||
strength = edge.get('strength', 1) # 获取 strength,默认为 1
|
||||
strength = edge.get('strength', 1) # 获取 strength,默认为 1
|
||||
|
||||
# 获取时间信息
|
||||
created_time = edge.get('created_time', datetime.datetime.now().timestamp())
|
||||
last_modified = edge.get('last_modified', datetime.datetime.now().timestamp())
|
||||
|
||||
# 只有当源节点和目标节点都存在时才添加边
|
||||
if source in self.memory_graph.G and target in self.memory_graph.G:
|
||||
self.memory_graph.G.add_edge(source, target, strength=strength)
|
||||
|
||||
self.memory_graph.G.add_edge(source, target,
|
||||
strength=strength,
|
||||
created_time=created_time,
|
||||
last_modified=last_modified)
|
||||
|
||||
async def operation_forget_topic(self, percentage=0.1):
|
||||
"""随机选择图中一定比例的节点进行检查,根据条件决定是否遗忘"""
|
||||
# 获取所有节点
|
||||
"""随机选择图中一定比例的节点和边进行检查,根据时间条件决定是否遗忘"""
|
||||
all_nodes = list(self.memory_graph.G.nodes())
|
||||
# 计算要检查的节点数量
|
||||
check_count = max(1, int(len(all_nodes) * percentage))
|
||||
# 随机选择节点
|
||||
nodes_to_check = random.sample(all_nodes, check_count)
|
||||
all_edges = list(self.memory_graph.G.edges())
|
||||
|
||||
forgotten_nodes = []
|
||||
check_nodes_count = max(1, int(len(all_nodes) * percentage))
|
||||
check_edges_count = max(1, int(len(all_edges) * percentage))
|
||||
|
||||
nodes_to_check = random.sample(all_nodes, check_nodes_count)
|
||||
edges_to_check = random.sample(all_edges, check_edges_count)
|
||||
|
||||
edge_changes = {'weakened': 0, 'removed': 0}
|
||||
node_changes = {'reduced': 0, 'removed': 0}
|
||||
|
||||
current_time = datetime.datetime.now().timestamp()
|
||||
|
||||
# 检查并遗忘连接
|
||||
logger.info("开始检查连接...")
|
||||
for source, target in edges_to_check:
|
||||
edge_data = self.memory_graph.G[source][target]
|
||||
last_modified = edge_data.get('last_modified')
|
||||
# print(source,target)
|
||||
# print(f"float(last_modified):{float(last_modified)}" )
|
||||
# print(f"current_time:{current_time}")
|
||||
# print(f"current_time - last_modified:{current_time - last_modified}")
|
||||
if current_time - last_modified > 3600*24: # test
|
||||
current_strength = edge_data.get('strength', 1)
|
||||
new_strength = current_strength - 1
|
||||
|
||||
if new_strength <= 0:
|
||||
self.memory_graph.G.remove_edge(source, target)
|
||||
edge_changes['removed'] += 1
|
||||
logger.info(f"\033[1;31m[连接移除]\033[0m {source} - {target}")
|
||||
else:
|
||||
edge_data['strength'] = new_strength
|
||||
edge_data['last_modified'] = current_time
|
||||
edge_changes['weakened'] += 1
|
||||
logger.info(f"\033[1;34m[连接减弱]\033[0m {source} - {target} (强度: {current_strength} -> {new_strength})")
|
||||
|
||||
# 检查并遗忘话题
|
||||
logger.info("开始检查节点...")
|
||||
for node in nodes_to_check:
|
||||
# 获取节点的连接数
|
||||
connections = self.memory_graph.G.degree(node)
|
||||
node_data = self.memory_graph.G.nodes[node]
|
||||
last_modified = node_data.get('last_modified', current_time)
|
||||
|
||||
# 获取节点的内容条数
|
||||
memory_items = self.memory_graph.G.nodes[node].get('memory_items', [])
|
||||
if not isinstance(memory_items, list):
|
||||
memory_items = [memory_items] if memory_items else []
|
||||
content_count = len(memory_items)
|
||||
|
||||
# 检查连接强度
|
||||
weak_connections = True
|
||||
if connections > 1: # 只有当连接数大于1时才检查强度
|
||||
for neighbor in self.memory_graph.G.neighbors(node):
|
||||
strength = self.memory_graph.G[node][neighbor].get('strength', 1)
|
||||
if strength > 2:
|
||||
weak_connections = False
|
||||
break
|
||||
|
||||
# 如果满足遗忘条件
|
||||
if (connections <= 1 and weak_connections) or content_count <= 2:
|
||||
removed_item = self.memory_graph.forget_topic(node)
|
||||
if removed_item:
|
||||
forgotten_nodes.append((node, removed_item))
|
||||
print(f"遗忘节点 {node} 的记忆: {removed_item}")
|
||||
if current_time - last_modified > 3600*24: # test
|
||||
memory_items = node_data.get('memory_items', [])
|
||||
if not isinstance(memory_items, list):
|
||||
memory_items = [memory_items] if memory_items else []
|
||||
|
||||
if memory_items:
|
||||
current_count = len(memory_items)
|
||||
removed_item = random.choice(memory_items)
|
||||
memory_items.remove(removed_item)
|
||||
|
||||
if memory_items:
|
||||
self.memory_graph.G.nodes[node]['memory_items'] = memory_items
|
||||
self.memory_graph.G.nodes[node]['last_modified'] = current_time
|
||||
node_changes['reduced'] += 1
|
||||
logger.info(f"\033[1;33m[记忆减少]\033[0m {node} (记忆数量: {current_count} -> {len(memory_items)})")
|
||||
else:
|
||||
self.memory_graph.G.remove_node(node)
|
||||
node_changes['removed'] += 1
|
||||
logger.info(f"\033[1;31m[节点移除]\033[0m {node}")
|
||||
|
||||
# 同步到数据库
|
||||
if forgotten_nodes:
|
||||
if any(count > 0 for count in edge_changes.values()) or any(count > 0 for count in node_changes.values()):
|
||||
self.sync_memory_to_db()
|
||||
print(f"完成遗忘操作,共遗忘 {len(forgotten_nodes)} 个节点的记忆")
|
||||
logger.info("\n遗忘操作统计:")
|
||||
logger.info(f"连接变化: {edge_changes['weakened']} 个减弱, {edge_changes['removed']} 个移除")
|
||||
logger.info(f"节点变化: {node_changes['reduced']} 个减少记忆, {node_changes['removed']} 个移除")
|
||||
else:
|
||||
print("本次检查没有节点满足遗忘条件")
|
||||
logger.info("\n本次检查没有节点或连接满足遗忘条件")
|
||||
|
||||
async def merge_memory(self, topic):
|
||||
"""
|
||||
@@ -426,35 +562,35 @@ class Hippocampus:
|
||||
memory_items = self.memory_graph.G.nodes[topic].get('memory_items', [])
|
||||
if not isinstance(memory_items, list):
|
||||
memory_items = [memory_items] if memory_items else []
|
||||
|
||||
|
||||
# 如果记忆项不足,直接返回
|
||||
if len(memory_items) < 10:
|
||||
return
|
||||
|
||||
|
||||
# 随机选择10条记忆
|
||||
selected_memories = random.sample(memory_items, 10)
|
||||
|
||||
|
||||
# 拼接成文本
|
||||
merged_text = "\n".join(selected_memories)
|
||||
print(f"\n[合并记忆] 话题: {topic}")
|
||||
print(f"选择的记忆:\n{merged_text}")
|
||||
|
||||
logger.debug(f"\n[合并记忆] 话题: {topic}")
|
||||
logger.debug(f"选择的记忆:\n{merged_text}")
|
||||
|
||||
# 使用memory_compress生成新的压缩记忆
|
||||
compressed_memories = await self.memory_compress(merged_text, 0.1)
|
||||
|
||||
compressed_memories, _ = await self.memory_compress(selected_memories, 0.1)
|
||||
|
||||
# 从原记忆列表中移除被选中的记忆
|
||||
for memory in selected_memories:
|
||||
memory_items.remove(memory)
|
||||
|
||||
|
||||
# 添加新的压缩记忆
|
||||
for _, compressed_memory in compressed_memories:
|
||||
memory_items.append(compressed_memory)
|
||||
print(f"添加压缩记忆: {compressed_memory}")
|
||||
|
||||
logger.info(f"添加压缩记忆: {compressed_memory}")
|
||||
|
||||
# 更新节点的记忆项
|
||||
self.memory_graph.G.nodes[topic]['memory_items'] = memory_items
|
||||
print(f"完成记忆合并,当前记忆数量: {len(memory_items)}")
|
||||
|
||||
logger.debug(f"完成记忆合并,当前记忆数量: {len(memory_items)}")
|
||||
|
||||
async def operation_merge_memory(self, percentage=0.1):
|
||||
"""
|
||||
随机检查一定比例的节点,对内容数量超过100的节点进行记忆合并
|
||||
@@ -468,7 +604,7 @@ class Hippocampus:
|
||||
check_count = max(1, int(len(all_nodes) * percentage))
|
||||
# 随机选择节点
|
||||
nodes_to_check = random.sample(all_nodes, check_count)
|
||||
|
||||
|
||||
merged_nodes = []
|
||||
for node in nodes_to_check:
|
||||
# 获取节点的内容条数
|
||||
@@ -476,26 +612,26 @@ class Hippocampus:
|
||||
if not isinstance(memory_items, list):
|
||||
memory_items = [memory_items] if memory_items else []
|
||||
content_count = len(memory_items)
|
||||
|
||||
|
||||
# 如果内容数量超过100,进行合并
|
||||
if content_count > 100:
|
||||
print(f"\n检查节点: {node}, 当前记忆数量: {content_count}")
|
||||
logger.debug(f"检查节点: {node}, 当前记忆数量: {content_count}")
|
||||
await self.merge_memory(node)
|
||||
merged_nodes.append(node)
|
||||
|
||||
|
||||
# 同步到数据库
|
||||
if merged_nodes:
|
||||
self.sync_memory_to_db()
|
||||
print(f"\n完成记忆合并操作,共处理 {len(merged_nodes)} 个节点")
|
||||
logger.debug(f"完成记忆合并操作,共处理 {len(merged_nodes)} 个节点")
|
||||
else:
|
||||
print("\n本次检查没有需要合并的节点")
|
||||
logger.debug("本次检查没有需要合并的节点")
|
||||
|
||||
def find_topic_llm(self,text, topic_num):
|
||||
def find_topic_llm(self, text, topic_num):
|
||||
prompt = f'这是一段文字:{text}。请你从这段话中总结出{topic_num}个关键的概念,可以是名词,动词,或者特定人物,帮我列出来,用逗号,隔开,尽可能精简。只需要列举{topic_num}个话题就好,不要有序号,不要告诉我其他内容。'
|
||||
return prompt
|
||||
|
||||
def topic_what(self,text, topic):
|
||||
prompt = f'这是一段文字:{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好'
|
||||
def topic_what(self, text, topic, time_info):
|
||||
prompt = f'这是一段文字,{time_info}:{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好'
|
||||
return prompt
|
||||
|
||||
async def _identify_topics(self, text: str) -> list:
|
||||
@@ -509,11 +645,12 @@ class Hippocampus:
|
||||
"""
|
||||
topics_response = await self.llm_topic_judge.generate_response(self.find_topic_llm(text, 5))
|
||||
# print(f"话题: {topics_response[0]}")
|
||||
topics = [topic.strip() for topic in topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()]
|
||||
topics = [topic.strip() for topic in
|
||||
topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()]
|
||||
# print(f"话题: {topics}")
|
||||
|
||||
|
||||
return topics
|
||||
|
||||
|
||||
def _find_similar_topics(self, topics: list, similarity_threshold: float = 0.4, debug_info: str = "") -> list:
|
||||
"""查找与给定主题相似的记忆主题
|
||||
|
||||
@@ -527,16 +664,16 @@ class Hippocampus:
|
||||
"""
|
||||
all_memory_topics = self.get_all_node_names()
|
||||
all_similar_topics = []
|
||||
|
||||
|
||||
# 计算每个识别出的主题与记忆主题的相似度
|
||||
for topic in topics:
|
||||
if debug_info:
|
||||
# print(f"\033[1;32m[{debug_info}]\033[0m 正在思考有没有见过: {topic}")
|
||||
pass
|
||||
|
||||
|
||||
topic_vector = text_to_vector(topic)
|
||||
has_similar_topic = False
|
||||
|
||||
|
||||
for memory_topic in all_memory_topics:
|
||||
memory_vector = text_to_vector(memory_topic)
|
||||
# 获取所有唯一词
|
||||
@@ -546,20 +683,20 @@ class Hippocampus:
|
||||
v2 = [memory_vector.get(word, 0) for word in all_words]
|
||||
# 计算相似度
|
||||
similarity = cosine_similarity(v1, v2)
|
||||
|
||||
|
||||
if similarity >= similarity_threshold:
|
||||
has_similar_topic = True
|
||||
if debug_info:
|
||||
# print(f"\033[1;32m[{debug_info}]\033[0m 找到相似主题: {topic} -> {memory_topic} (相似度: {similarity:.2f})")
|
||||
pass
|
||||
all_similar_topics.append((memory_topic, similarity))
|
||||
|
||||
|
||||
if not has_similar_topic and debug_info:
|
||||
# print(f"\033[1;31m[{debug_info}]\033[0m 没有见过: {topic} ,呃呃")
|
||||
pass
|
||||
|
||||
|
||||
return all_similar_topics
|
||||
|
||||
|
||||
def _get_top_topics(self, similar_topics: list, max_topics: int = 5) -> list:
|
||||
"""获取相似度最高的主题
|
||||
|
||||
@@ -572,36 +709,36 @@ class Hippocampus:
|
||||
"""
|
||||
seen_topics = set()
|
||||
top_topics = []
|
||||
|
||||
|
||||
for topic, score in sorted(similar_topics, key=lambda x: x[1], reverse=True):
|
||||
if topic not in seen_topics and len(top_topics) < max_topics:
|
||||
seen_topics.add(topic)
|
||||
top_topics.append((topic, score))
|
||||
|
||||
|
||||
return top_topics
|
||||
|
||||
async def memory_activate_value(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.3) -> int:
|
||||
"""计算输入文本对记忆的激活程度"""
|
||||
print(f"\033[1;32m[记忆激活]\033[0m 识别主题: {await self._identify_topics(text)}")
|
||||
|
||||
logger.info(f"识别主题: {await self._identify_topics(text)}")
|
||||
|
||||
# 识别主题
|
||||
identified_topics = await self._identify_topics(text)
|
||||
if not identified_topics:
|
||||
return 0
|
||||
|
||||
|
||||
# 查找相似主题
|
||||
all_similar_topics = self._find_similar_topics(
|
||||
identified_topics,
|
||||
identified_topics,
|
||||
similarity_threshold=similarity_threshold,
|
||||
debug_info="记忆激活"
|
||||
)
|
||||
|
||||
|
||||
if not all_similar_topics:
|
||||
return 0
|
||||
|
||||
|
||||
# 获取最相关的主题
|
||||
top_topics = self._get_top_topics(all_similar_topics, max_topics)
|
||||
|
||||
|
||||
# 如果只找到一个主题,进行惩罚
|
||||
if len(top_topics) == 1:
|
||||
topic, score = top_topics[0]
|
||||
@@ -611,15 +748,16 @@ class Hippocampus:
|
||||
memory_items = [memory_items] if memory_items else []
|
||||
content_count = len(memory_items)
|
||||
penalty = 1.0 / (1 + math.log(content_count + 1))
|
||||
|
||||
|
||||
activation = int(score * 50 * penalty)
|
||||
print(f"\033[1;32m[记忆激活]\033[0m 单主题「{topic}」- 相似度: {score:.3f}, 内容数: {content_count}, 激活值: {activation}")
|
||||
logger.info(
|
||||
f"[记忆激活]单主题「{topic}」- 相似度: {score:.3f}, 内容数: {content_count}, 激活值: {activation}")
|
||||
return activation
|
||||
|
||||
|
||||
# 计算关键词匹配率,同时考虑内容数量
|
||||
matched_topics = set()
|
||||
topic_similarities = {}
|
||||
|
||||
|
||||
for memory_topic, similarity in top_topics:
|
||||
# 计算内容数量惩罚
|
||||
memory_items = self.memory_graph.G.nodes[memory_topic].get('memory_items', [])
|
||||
@@ -627,7 +765,7 @@ class Hippocampus:
|
||||
memory_items = [memory_items] if memory_items else []
|
||||
content_count = len(memory_items)
|
||||
penalty = 1.0 / (1 + math.log(content_count + 1))
|
||||
|
||||
|
||||
# 对每个记忆主题,检查它与哪些输入主题相似
|
||||
for input_topic in identified_topics:
|
||||
topic_vector = text_to_vector(input_topic)
|
||||
@@ -640,33 +778,36 @@ class Hippocampus:
|
||||
matched_topics.add(input_topic)
|
||||
adjusted_sim = sim * penalty
|
||||
topic_similarities[input_topic] = max(topic_similarities.get(input_topic, 0), adjusted_sim)
|
||||
print(f"\033[1;32m[记忆激活]\033[0m 主题「{input_topic}」-> 「{memory_topic}」(内容数: {content_count}, 相似度: {adjusted_sim:.3f})")
|
||||
|
||||
logger.info(
|
||||
f"[记忆激活]主题「{input_topic}」-> 「{memory_topic}」(内容数: {content_count}, 相似度: {adjusted_sim:.3f})")
|
||||
|
||||
# 计算主题匹配率和平均相似度
|
||||
topic_match = len(matched_topics) / len(identified_topics)
|
||||
average_similarities = sum(topic_similarities.values()) / len(topic_similarities) if topic_similarities else 0
|
||||
|
||||
|
||||
# 计算最终激活值
|
||||
activation = int((topic_match + average_similarities) / 2 * 100)
|
||||
print(f"\033[1;32m[记忆激活]\033[0m 匹配率: {topic_match:.3f}, 平均相似度: {average_similarities:.3f}, 激活值: {activation}")
|
||||
|
||||
logger.info(
|
||||
f"[记忆激活]匹配率: {topic_match:.3f}, 平均相似度: {average_similarities:.3f}, 激活值: {activation}")
|
||||
|
||||
return activation
|
||||
|
||||
async def get_relevant_memories(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.4, max_memory_num: int = 5) -> list:
|
||||
async def get_relevant_memories(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.4,
|
||||
max_memory_num: int = 5) -> list:
|
||||
"""根据输入文本获取相关的记忆内容"""
|
||||
# 识别主题
|
||||
identified_topics = await self._identify_topics(text)
|
||||
|
||||
|
||||
# 查找相似主题
|
||||
all_similar_topics = self._find_similar_topics(
|
||||
identified_topics,
|
||||
identified_topics,
|
||||
similarity_threshold=similarity_threshold,
|
||||
debug_info="记忆检索"
|
||||
)
|
||||
|
||||
|
||||
# 获取最相关的主题
|
||||
relevant_topics = self._get_top_topics(all_similar_topics, max_topics)
|
||||
|
||||
|
||||
# 获取相关记忆内容
|
||||
relevant_memories = []
|
||||
for topic, score in relevant_topics:
|
||||
@@ -674,8 +815,8 @@ class Hippocampus:
|
||||
first_layer, _ = self.memory_graph.get_related_item(topic, depth=1)
|
||||
if first_layer:
|
||||
# 如果记忆条数超过限制,随机选择指定数量的记忆
|
||||
if len(first_layer) > max_memory_num/2:
|
||||
first_layer = random.sample(first_layer, max_memory_num//2)
|
||||
if len(first_layer) > max_memory_num / 2:
|
||||
first_layer = random.sample(first_layer, max_memory_num // 2)
|
||||
# 为每条记忆添加来源主题和相似度信息
|
||||
for memory in first_layer:
|
||||
relevant_memories.append({
|
||||
@@ -683,20 +824,20 @@ class Hippocampus:
|
||||
'similarity': score,
|
||||
'content': memory
|
||||
})
|
||||
|
||||
|
||||
# 如果记忆数量超过5个,随机选择5个
|
||||
# 按相似度排序
|
||||
relevant_memories.sort(key=lambda x: x['similarity'], reverse=True)
|
||||
|
||||
|
||||
if len(relevant_memories) > max_memory_num:
|
||||
relevant_memories = random.sample(relevant_memories, max_memory_num)
|
||||
|
||||
|
||||
return relevant_memories
|
||||
|
||||
|
||||
def segment_text(text):
|
||||
seg_text = list(jieba.cut(text))
|
||||
return seg_text
|
||||
return seg_text
|
||||
|
||||
|
||||
from nonebot import get_driver
|
||||
@@ -707,19 +848,19 @@ config = driver.config
|
||||
start_time = time.time()
|
||||
|
||||
Database.initialize(
|
||||
host= config.MONGODB_HOST,
|
||||
port= config.MONGODB_PORT,
|
||||
db_name= config.DATABASE_NAME,
|
||||
username= config.MONGODB_USERNAME,
|
||||
password= config.MONGODB_PASSWORD,
|
||||
host=config.MONGODB_HOST,
|
||||
port=config.MONGODB_PORT,
|
||||
db_name=config.DATABASE_NAME,
|
||||
username=config.MONGODB_USERNAME,
|
||||
password=config.MONGODB_PASSWORD,
|
||||
auth_source=config.MONGODB_AUTH_SOURCE
|
||||
)
|
||||
#创建记忆图
|
||||
# 创建记忆图
|
||||
memory_graph = Memory_graph()
|
||||
#创建海马体
|
||||
# 创建海马体
|
||||
hippocampus = Hippocampus(memory_graph)
|
||||
#从数据库加载记忆图
|
||||
# 从数据库加载记忆图
|
||||
hippocampus.sync_memory_from_db()
|
||||
|
||||
end_time = time.time()
|
||||
print(f"\033[32m[加载海马体耗时: {end_time - start_time:.2f} 秒]\033[0m")
|
||||
logger.success(f"加载海马体耗时: {end_time - start_time:.2f} 秒")
|
||||
|
||||
@@ -13,6 +13,7 @@ import networkx as nx
|
||||
import pymongo
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
import jieba
|
||||
|
||||
# from chat.config import global_config
|
||||
sys.path.append("C:/GitHub/MaiMBot") # 添加项目根目录到 Python 路径
|
||||
@@ -86,23 +87,26 @@ def calculate_information_content(text):
|
||||
return entropy
|
||||
|
||||
def get_cloest_chat_from_db(db, length: int, timestamp: str):
|
||||
"""从数据库中获取最接近指定时间戳的聊天记录,并记录读取次数"""
|
||||
chat_text = ''
|
||||
"""从数据库中获取最接近指定时间戳的聊天记录,并记录读取次数
|
||||
|
||||
Returns:
|
||||
list: 消息记录字典列表,每个字典包含消息内容和时间信息
|
||||
"""
|
||||
chat_records = []
|
||||
closest_record = db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)])
|
||||
|
||||
if closest_record and closest_record.get('memorized', 0) < 4:
|
||||
closest_time = closest_record['time']
|
||||
group_id = closest_record['group_id'] # 获取groupid
|
||||
group_id = closest_record['group_id']
|
||||
# 获取该时间戳之后的length条消息,且groupid相同
|
||||
chat_records = list(db.db.messages.find(
|
||||
records = list(db.db.messages.find(
|
||||
{"time": {"$gt": closest_time}, "group_id": group_id}
|
||||
).sort('time', 1).limit(length))
|
||||
|
||||
# 更新每条消息的memorized属性
|
||||
for record in chat_records:
|
||||
# 检查当前记录的memorized值
|
||||
for record in records:
|
||||
current_memorized = record.get('memorized', 0)
|
||||
if current_memorized > 3:
|
||||
if current_memorized > 3:
|
||||
print("消息已读取3次,跳过")
|
||||
return ''
|
||||
|
||||
@@ -112,11 +116,14 @@ def get_cloest_chat_from_db(db, length: int, timestamp: str):
|
||||
{"$set": {"memorized": current_memorized + 1}}
|
||||
)
|
||||
|
||||
chat_text += record["detailed_plain_text"]
|
||||
# 添加到记录列表中
|
||||
chat_records.append({
|
||||
'text': record["detailed_plain_text"],
|
||||
'time': record["time"],
|
||||
'group_id': record["group_id"]
|
||||
})
|
||||
|
||||
return chat_text
|
||||
print("消息已读取3次,跳过")
|
||||
return ''
|
||||
return chat_records
|
||||
|
||||
class Memory_graph:
|
||||
def __init__(self):
|
||||
@@ -205,22 +212,34 @@ class Hippocampus:
|
||||
self.llm_model_summary = LLMModel(model_name="Qwen/Qwen2.5-32B-Instruct")
|
||||
|
||||
def get_memory_sample(self, chat_size=20, time_frequency:dict={'near':2,'mid':4,'far':3}):
|
||||
"""获取记忆样本
|
||||
|
||||
Returns:
|
||||
list: 消息记录列表,每个元素是一个消息记录字典列表
|
||||
"""
|
||||
current_timestamp = datetime.datetime.now().timestamp()
|
||||
chat_text = []
|
||||
#短期:1h 中期:4h 长期:24h
|
||||
for _ in range(time_frequency.get('near')): # 循环10次
|
||||
random_time = current_timestamp - random.randint(1, 3600*4) # 随机时间
|
||||
chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
|
||||
chat_text.append(chat_)
|
||||
for _ in range(time_frequency.get('mid')): # 循环10次
|
||||
random_time = current_timestamp - random.randint(3600*4, 3600*24) # 随机时间
|
||||
chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
|
||||
chat_text.append(chat_)
|
||||
for _ in range(time_frequency.get('far')): # 循环10次
|
||||
random_time = current_timestamp - random.randint(3600*24, 3600*24*7) # 随机时间
|
||||
chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
|
||||
chat_text.append(chat_)
|
||||
return [chat for chat in chat_text if chat]
|
||||
chat_samples = []
|
||||
|
||||
# 短期:1h 中期:4h 长期:24h
|
||||
for _ in range(time_frequency.get('near')):
|
||||
random_time = current_timestamp - random.randint(1, 3600*4)
|
||||
messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
|
||||
if messages:
|
||||
chat_samples.append(messages)
|
||||
|
||||
for _ in range(time_frequency.get('mid')):
|
||||
random_time = current_timestamp - random.randint(3600*4, 3600*24)
|
||||
messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
|
||||
if messages:
|
||||
chat_samples.append(messages)
|
||||
|
||||
for _ in range(time_frequency.get('far')):
|
||||
random_time = current_timestamp - random.randint(3600*24, 3600*24*7)
|
||||
messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
|
||||
if messages:
|
||||
chat_samples.append(messages)
|
||||
|
||||
return chat_samples
|
||||
|
||||
def calculate_topic_num(self,text, compress_rate):
|
||||
"""计算文本的话题数量"""
|
||||
@@ -231,16 +250,49 @@ class Hippocampus:
|
||||
print(f"topic_by_length: {topic_by_length}, topic_by_information_content: {topic_by_information_content}, topic_num: {topic_num}")
|
||||
return topic_num
|
||||
|
||||
async def memory_compress(self, input_text, compress_rate=0.1):
|
||||
async def memory_compress(self, messages: list, compress_rate=0.1):
|
||||
"""压缩消息记录为记忆
|
||||
|
||||
Args:
|
||||
messages: 消息记录字典列表,每个字典包含text和time字段
|
||||
compress_rate: 压缩率
|
||||
|
||||
Returns:
|
||||
set: (话题, 记忆) 元组集合
|
||||
"""
|
||||
if not messages:
|
||||
return set()
|
||||
|
||||
# 合并消息文本,同时保留时间信息
|
||||
input_text = ""
|
||||
time_info = ""
|
||||
# 计算最早和最晚时间
|
||||
earliest_time = min(msg['time'] for msg in messages)
|
||||
latest_time = max(msg['time'] for msg in messages)
|
||||
|
||||
earliest_dt = datetime.datetime.fromtimestamp(earliest_time)
|
||||
latest_dt = datetime.datetime.fromtimestamp(latest_time)
|
||||
|
||||
# 如果是同一年
|
||||
if earliest_dt.year == latest_dt.year:
|
||||
earliest_str = earliest_dt.strftime("%m-%d %H:%M:%S")
|
||||
latest_str = latest_dt.strftime("%m-%d %H:%M:%S")
|
||||
time_info += f"是在{earliest_dt.year}年,{earliest_str} 到 {latest_str} 的对话:\n"
|
||||
else:
|
||||
earliest_str = earliest_dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
latest_str = latest_dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
time_info += f"是从 {earliest_str} 到 {latest_str} 的对话:\n"
|
||||
|
||||
for msg in messages:
|
||||
input_text += f"{msg['text']}\n"
|
||||
|
||||
print(input_text)
|
||||
|
||||
topic_num = self.calculate_topic_num(input_text, compress_rate)
|
||||
topics_response = self.llm_model_get_topic.generate_response(self.find_topic_llm(input_text, topic_num))
|
||||
# 修改话题处理逻辑
|
||||
# 定义需要过滤的关键词
|
||||
filter_keywords = ['表情包', '图片', '回复', '聊天记录']
|
||||
|
||||
# 过滤topics
|
||||
filter_keywords = ['表情包', '图片', '回复', '聊天记录']
|
||||
topics = [topic.strip() for topic in topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()]
|
||||
filtered_topics = [topic for topic in topics if not any(keyword in topic for keyword in filter_keywords)]
|
||||
|
||||
@@ -250,7 +302,7 @@ class Hippocampus:
|
||||
# 创建所有话题的请求任务
|
||||
tasks = []
|
||||
for topic in filtered_topics:
|
||||
topic_what_prompt = self.topic_what(input_text, topic)
|
||||
topic_what_prompt = self.topic_what(input_text, topic , time_info)
|
||||
# 创建异步任务
|
||||
task = self.llm_model_small.generate_response_async(topic_what_prompt)
|
||||
tasks.append((topic.strip(), task))
|
||||
@@ -267,37 +319,35 @@ class Hippocampus:
|
||||
async def operation_build_memory(self, chat_size=12):
|
||||
# 最近消息获取频率
|
||||
time_frequency = {'near': 3, 'mid': 8, 'far': 5}
|
||||
memory_sample = self.get_memory_sample(chat_size, time_frequency)
|
||||
memory_samples = self.get_memory_sample(chat_size, time_frequency)
|
||||
|
||||
all_topics = [] # 用于存储所有话题
|
||||
|
||||
for i, input_text in enumerate(memory_sample, 1):
|
||||
for i, messages in enumerate(memory_samples, 1):
|
||||
# 加载进度可视化
|
||||
all_topics = []
|
||||
progress = (i / len(memory_sample)) * 100
|
||||
progress = (i / len(memory_samples)) * 100
|
||||
bar_length = 30
|
||||
filled_length = int(bar_length * i // len(memory_sample))
|
||||
filled_length = int(bar_length * i // len(memory_samples))
|
||||
bar = '█' * filled_length + '-' * (bar_length - filled_length)
|
||||
print(f"\n进度: [{bar}] {progress:.1f}% ({i}/{len(memory_sample)})")
|
||||
print(f"\n进度: [{bar}] {progress:.1f}% ({i}/{len(memory_samples)})")
|
||||
|
||||
# 生成压缩后记忆 ,表现为 (话题,记忆) 的元组
|
||||
compressed_memory = set()
|
||||
# 生成压缩后记忆
|
||||
compress_rate = 0.1
|
||||
compressed_memory = await self.memory_compress(input_text, compress_rate)
|
||||
compressed_memory = await self.memory_compress(messages, compress_rate)
|
||||
print(f"\033[1;33m压缩后记忆数量\033[0m: {len(compressed_memory)}")
|
||||
|
||||
# 将记忆加入到图谱中
|
||||
for topic, memory in compressed_memory:
|
||||
print(f"\033[1;32m添加节点\033[0m: {topic}")
|
||||
self.memory_graph.add_dot(topic, memory)
|
||||
all_topics.append(topic) # 收集所有话题
|
||||
all_topics.append(topic)
|
||||
|
||||
# 连接相关话题
|
||||
for i in range(len(all_topics)):
|
||||
for j in range(i + 1, len(all_topics)):
|
||||
print(f"\033[1;32m连接节点\033[0m: {all_topics[i]} 和 {all_topics[j]}")
|
||||
self.memory_graph.connect_dot(all_topics[i], all_topics[j])
|
||||
|
||||
|
||||
|
||||
|
||||
self.sync_memory_to_db()
|
||||
|
||||
@@ -375,7 +425,7 @@ class Hippocampus:
|
||||
|
||||
if concept not in db_nodes_dict:
|
||||
# 数据库中缺少的节点,添加
|
||||
logger.info(f"添加新节点: {concept}")
|
||||
# logger.info(f"添加新节点: {concept}")
|
||||
node_data = {
|
||||
'concept': concept,
|
||||
'memory_items': memory_items,
|
||||
@@ -389,7 +439,7 @@ class Hippocampus:
|
||||
|
||||
# 如果特征值不同,则更新节点
|
||||
if db_hash != memory_hash:
|
||||
logger.info(f"更新节点内容: {concept}")
|
||||
# logger.info(f"更新节点内容: {concept}")
|
||||
self.memory_graph.db.db.graph_data.nodes.update_one(
|
||||
{'concept': concept},
|
||||
{'$set': {
|
||||
@@ -402,7 +452,7 @@ class Hippocampus:
|
||||
memory_concepts = set(node[0] for node in memory_nodes)
|
||||
for db_node in db_nodes:
|
||||
if db_node['concept'] not in memory_concepts:
|
||||
logger.info(f"删除多余节点: {db_node['concept']}")
|
||||
# logger.info(f"删除多余节点: {db_node['concept']}")
|
||||
self.memory_graph.db.db.graph_data.nodes.delete_one({'concept': db_node['concept']})
|
||||
|
||||
# 处理边的信息
|
||||
@@ -460,9 +510,10 @@ class Hippocampus:
|
||||
prompt = f'这是一段文字:{text}。请你从这段话中总结出{topic_num}个关键的概念,可以是名词,动词,或者特定人物,帮我列出来,用逗号,隔开,尽可能精简。只需要列举{topic_num}个话题就好,不要有序号,不要告诉我其他内容。'
|
||||
return prompt
|
||||
|
||||
def topic_what(self,text, topic):
|
||||
def topic_what(self,text, topic, time_info):
|
||||
# prompt = f'这是一段文字:{text}。我想知道这段文字里有什么关于{topic}的话题,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好'
|
||||
prompt = f'这是一段文字:{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好'
|
||||
# 获取当前时间
|
||||
prompt = f'这是一段文字,{time_info}:{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好'
|
||||
return prompt
|
||||
|
||||
def remove_node_from_db(self, topic):
|
||||
@@ -597,7 +648,7 @@ class Hippocampus:
|
||||
print(f"选择的记忆:\n{merged_text}")
|
||||
|
||||
# 使用memory_compress生成新的压缩记忆
|
||||
compressed_memories = await self.memory_compress(merged_text, 0.1)
|
||||
compressed_memories = await self.memory_compress(selected_memories, 0.1)
|
||||
|
||||
# 从原记忆列表中移除被选中的记忆
|
||||
for memory in selected_memories:
|
||||
@@ -647,6 +698,164 @@ class Hippocampus:
|
||||
else:
|
||||
print("\n本次检查没有需要合并的节点")
|
||||
|
||||
async def _identify_topics(self, text: str) -> list:
|
||||
"""从文本中识别可能的主题"""
|
||||
topics_response = self.llm_model_get_topic.generate_response(self.find_topic_llm(text, 5))
|
||||
topics = [topic.strip() for topic in topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()]
|
||||
return topics
|
||||
|
||||
def _find_similar_topics(self, topics: list, similarity_threshold: float = 0.4, debug_info: str = "") -> list:
|
||||
"""查找与给定主题相似的记忆主题"""
|
||||
all_memory_topics = list(self.memory_graph.G.nodes())
|
||||
all_similar_topics = []
|
||||
|
||||
for topic in topics:
|
||||
if debug_info:
|
||||
pass
|
||||
|
||||
topic_vector = text_to_vector(topic)
|
||||
has_similar_topic = False
|
||||
|
||||
for memory_topic in all_memory_topics:
|
||||
memory_vector = text_to_vector(memory_topic)
|
||||
all_words = set(topic_vector.keys()) | set(memory_vector.keys())
|
||||
v1 = [topic_vector.get(word, 0) for word in all_words]
|
||||
v2 = [memory_vector.get(word, 0) for word in all_words]
|
||||
similarity = cosine_similarity(v1, v2)
|
||||
|
||||
if similarity >= similarity_threshold:
|
||||
has_similar_topic = True
|
||||
all_similar_topics.append((memory_topic, similarity))
|
||||
|
||||
return all_similar_topics
|
||||
|
||||
def _get_top_topics(self, similar_topics: list, max_topics: int = 5) -> list:
|
||||
"""获取相似度最高的主题"""
|
||||
seen_topics = set()
|
||||
top_topics = []
|
||||
|
||||
for topic, score in sorted(similar_topics, key=lambda x: x[1], reverse=True):
|
||||
if topic not in seen_topics and len(top_topics) < max_topics:
|
||||
seen_topics.add(topic)
|
||||
top_topics.append((topic, score))
|
||||
|
||||
return top_topics
|
||||
|
||||
async def memory_activate_value(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.3) -> int:
|
||||
"""计算输入文本对记忆的激活程度"""
|
||||
logger.info(f"[记忆激活]识别主题: {await self._identify_topics(text)}")
|
||||
|
||||
identified_topics = await self._identify_topics(text)
|
||||
if not identified_topics:
|
||||
return 0
|
||||
|
||||
all_similar_topics = self._find_similar_topics(
|
||||
identified_topics,
|
||||
similarity_threshold=similarity_threshold,
|
||||
debug_info="记忆激活"
|
||||
)
|
||||
|
||||
if not all_similar_topics:
|
||||
return 0
|
||||
|
||||
top_topics = self._get_top_topics(all_similar_topics, max_topics)
|
||||
|
||||
if len(top_topics) == 1:
|
||||
topic, score = top_topics[0]
|
||||
memory_items = self.memory_graph.G.nodes[topic].get('memory_items', [])
|
||||
if not isinstance(memory_items, list):
|
||||
memory_items = [memory_items] if memory_items else []
|
||||
content_count = len(memory_items)
|
||||
penalty = 1.0 / (1 + math.log(content_count + 1))
|
||||
|
||||
activation = int(score * 50 * penalty)
|
||||
print(f"\033[1;32m[记忆激活]\033[0m 单主题「{topic}」- 相似度: {score:.3f}, 内容数: {content_count}, 激活值: {activation}")
|
||||
return activation
|
||||
|
||||
matched_topics = set()
|
||||
topic_similarities = {}
|
||||
|
||||
for memory_topic, similarity in top_topics:
|
||||
memory_items = self.memory_graph.G.nodes[memory_topic].get('memory_items', [])
|
||||
if not isinstance(memory_items, list):
|
||||
memory_items = [memory_items] if memory_items else []
|
||||
content_count = len(memory_items)
|
||||
penalty = 1.0 / (1 + math.log(content_count + 1))
|
||||
|
||||
for input_topic in identified_topics:
|
||||
topic_vector = text_to_vector(input_topic)
|
||||
memory_vector = text_to_vector(memory_topic)
|
||||
all_words = set(topic_vector.keys()) | set(memory_vector.keys())
|
||||
v1 = [topic_vector.get(word, 0) for word in all_words]
|
||||
v2 = [memory_vector.get(word, 0) for word in all_words]
|
||||
sim = cosine_similarity(v1, v2)
|
||||
if sim >= similarity_threshold:
|
||||
matched_topics.add(input_topic)
|
||||
adjusted_sim = sim * penalty
|
||||
topic_similarities[input_topic] = max(topic_similarities.get(input_topic, 0), adjusted_sim)
|
||||
print(f"\033[1;32m[记忆激活]\033[0m 主题「{input_topic}」-> 「{memory_topic}」(内容数: {content_count}, 相似度: {adjusted_sim:.3f})")
|
||||
|
||||
topic_match = len(matched_topics) / len(identified_topics)
|
||||
average_similarities = sum(topic_similarities.values()) / len(topic_similarities) if topic_similarities else 0
|
||||
|
||||
activation = int((topic_match + average_similarities) / 2 * 100)
|
||||
print(f"\033[1;32m[记忆激活]\033[0m 匹配率: {topic_match:.3f}, 平均相似度: {average_similarities:.3f}, 激活值: {activation}")
|
||||
|
||||
return activation
|
||||
|
||||
async def get_relevant_memories(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.4, max_memory_num: int = 5) -> list:
|
||||
"""根据输入文本获取相关的记忆内容"""
|
||||
identified_topics = await self._identify_topics(text)
|
||||
|
||||
all_similar_topics = self._find_similar_topics(
|
||||
identified_topics,
|
||||
similarity_threshold=similarity_threshold,
|
||||
debug_info="记忆检索"
|
||||
)
|
||||
|
||||
relevant_topics = self._get_top_topics(all_similar_topics, max_topics)
|
||||
|
||||
relevant_memories = []
|
||||
for topic, score in relevant_topics:
|
||||
first_layer, _ = self.memory_graph.get_related_item(topic, depth=1)
|
||||
if first_layer:
|
||||
if len(first_layer) > max_memory_num/2:
|
||||
first_layer = random.sample(first_layer, max_memory_num//2)
|
||||
for memory in first_layer:
|
||||
relevant_memories.append({
|
||||
'topic': topic,
|
||||
'similarity': score,
|
||||
'content': memory
|
||||
})
|
||||
|
||||
relevant_memories.sort(key=lambda x: x['similarity'], reverse=True)
|
||||
|
||||
if len(relevant_memories) > max_memory_num:
|
||||
relevant_memories = random.sample(relevant_memories, max_memory_num)
|
||||
|
||||
return relevant_memories
|
||||
|
||||
def segment_text(text):
|
||||
"""使用jieba进行文本分词"""
|
||||
seg_text = list(jieba.cut(text))
|
||||
return seg_text
|
||||
|
||||
def text_to_vector(text):
|
||||
"""将文本转换为词频向量"""
|
||||
words = segment_text(text)
|
||||
vector = {}
|
||||
for word in words:
|
||||
vector[word] = vector.get(word, 0) + 1
|
||||
return vector
|
||||
|
||||
def cosine_similarity(v1, v2):
|
||||
"""计算两个向量的余弦相似度"""
|
||||
dot_product = sum(a * b for a, b in zip(v1, v2))
|
||||
norm1 = math.sqrt(sum(a * a for a in v1))
|
||||
norm2 = math.sqrt(sum(b * b for b in v2))
|
||||
if norm1 == 0 or norm2 == 0:
|
||||
return 0
|
||||
return dot_product / (norm1 * norm2)
|
||||
|
||||
def visualize_graph_lite(memory_graph: Memory_graph, color_by_memory: bool = False):
|
||||
# 设置中文字体
|
||||
@@ -735,7 +944,7 @@ async def main():
|
||||
db = Database.get_instance()
|
||||
start_time = time.time()
|
||||
|
||||
test_pare = {'do_build_memory':True,'do_forget_topic':False,'do_visualize_graph':True,'do_query':False,'do_merge_memory':False}
|
||||
test_pare = {'do_build_memory':False,'do_forget_topic':False,'do_visualize_graph':True,'do_query':False,'do_merge_memory':False}
|
||||
|
||||
# 创建记忆图
|
||||
memory_graph = Memory_graph()
|
||||
|
||||
1208
src/plugins/memory_system/memory_test1.py
Normal file
1208
src/plugins/memory_system/memory_test1.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -24,14 +24,15 @@ class LLM_request:
|
||||
self.api_key = getattr(config, model["key"])
|
||||
self.base_url = getattr(config, model["base_url"])
|
||||
except AttributeError as e:
|
||||
logger.error(f"原始 model dict 信息:{model}")
|
||||
logger.error(f"配置错误:找不到对应的配置项 - {str(e)}")
|
||||
raise ValueError(f"配置错误:找不到对应的配置项 - {str(e)}") from e
|
||||
self.model_name = model["name"]
|
||||
self.params = kwargs
|
||||
|
||||
|
||||
self.pri_in = model.get("pri_in", 0)
|
||||
self.pri_out = model.get("pri_out", 0)
|
||||
|
||||
|
||||
# 获取数据库实例
|
||||
self.db = Database.get_instance()
|
||||
self._init_database()
|
||||
@@ -44,12 +45,12 @@ class LLM_request:
|
||||
self.db.db.llm_usage.create_index([("model_name", 1)])
|
||||
self.db.db.llm_usage.create_index([("user_id", 1)])
|
||||
self.db.db.llm_usage.create_index([("request_type", 1)])
|
||||
except Exception as e:
|
||||
logger.error(f"创建数据库索引失败: {e}")
|
||||
except Exception:
|
||||
logger.error("创建数据库索引失败")
|
||||
|
||||
def _record_usage(self, prompt_tokens: int, completion_tokens: int, total_tokens: int,
|
||||
user_id: str = "system", request_type: str = "chat",
|
||||
endpoint: str = "/chat/completions"):
|
||||
def _record_usage(self, prompt_tokens: int, completion_tokens: int, total_tokens: int,
|
||||
user_id: str = "system", request_type: str = "chat",
|
||||
endpoint: str = "/chat/completions"):
|
||||
"""记录模型使用情况到数据库
|
||||
Args:
|
||||
prompt_tokens: 输入token数
|
||||
@@ -79,8 +80,8 @@ class LLM_request:
|
||||
f"提示词: {prompt_tokens}, 完成: {completion_tokens}, "
|
||||
f"总计: {total_tokens}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"记录token使用情况失败: {e}")
|
||||
except Exception:
|
||||
logger.error("记录token使用情况失败")
|
||||
|
||||
def _calculate_cost(self, prompt_tokens: int, completion_tokens: int) -> float:
|
||||
"""计算API调用成本
|
||||
@@ -140,12 +141,12 @@ class LLM_request:
|
||||
}
|
||||
|
||||
api_url = f"{self.base_url.rstrip('/')}/{endpoint.lstrip('/')}"
|
||||
#判断是否为流式
|
||||
# 判断是否为流式
|
||||
stream_mode = self.params.get("stream", False)
|
||||
if self.params.get("stream", False) is True:
|
||||
logger.info(f"进入流式输出模式,发送请求到URL: {api_url}")
|
||||
logger.debug(f"进入流式输出模式,发送请求到URL: {api_url}")
|
||||
else:
|
||||
logger.info(f"发送请求到URL: {api_url}")
|
||||
logger.debug(f"发送请求到URL: {api_url}")
|
||||
logger.info(f"使用模型: {self.model_name}")
|
||||
|
||||
# 构建请求体
|
||||
@@ -158,7 +159,7 @@ class LLM_request:
|
||||
try:
|
||||
# 使用上下文管理器处理会话
|
||||
headers = await self._build_headers()
|
||||
#似乎是openai流式必须要的东西,不过阿里云的qwq-plus加了这个没有影响
|
||||
# 似乎是openai流式必须要的东西,不过阿里云的qwq-plus加了这个没有影响
|
||||
if stream_mode:
|
||||
headers["Accept"] = "text/event-stream"
|
||||
|
||||
@@ -182,11 +183,33 @@ class LLM_request:
|
||||
continue
|
||||
elif response.status in policy["abort_codes"]:
|
||||
logger.error(f"错误码: {response.status} - {error_code_mapping.get(response.status)}")
|
||||
if response.status == 403:
|
||||
# 尝试降级Pro模型
|
||||
if self.model_name.startswith(
|
||||
"Pro/") and self.base_url == "https://api.siliconflow.cn/v1/":
|
||||
old_model_name = self.model_name
|
||||
self.model_name = self.model_name[4:] # 移除"Pro/"前缀
|
||||
logger.warning(f"检测到403错误,模型从 {old_model_name} 降级为 {self.model_name}")
|
||||
|
||||
# 对全局配置进行更新
|
||||
if hasattr(global_config, 'llm_normal') and global_config.llm_normal.get(
|
||||
'name') == old_model_name:
|
||||
global_config.llm_normal['name'] = self.model_name
|
||||
logger.warning("已将全局配置中的 llm_normal 模型降级")
|
||||
|
||||
# 更新payload中的模型名
|
||||
if payload and 'model' in payload:
|
||||
payload['model'] = self.model_name
|
||||
|
||||
# 重新尝试请求
|
||||
retry -= 1 # 不计入重试次数
|
||||
continue
|
||||
|
||||
raise RuntimeError(f"请求被拒绝: {error_code_mapping.get(response.status)}")
|
||||
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
#将流式输出转化为非流式输出
|
||||
|
||||
# 将流式输出转化为非流式输出
|
||||
if stream_mode:
|
||||
accumulated_content = ""
|
||||
async for line_bytes in response.content:
|
||||
@@ -204,8 +227,8 @@ class LLM_request:
|
||||
if delta_content is None:
|
||||
delta_content = ""
|
||||
accumulated_content += delta_content
|
||||
except Exception as e:
|
||||
logger.error(f"解析流式输出错误: {e}")
|
||||
except Exception:
|
||||
logger.exception("解析流式输出错")
|
||||
content = accumulated_content
|
||||
reasoning_content = ""
|
||||
think_match = re.search(r'<think>(.*?)</think>', content, re.DOTALL)
|
||||
@@ -213,12 +236,15 @@ class LLM_request:
|
||||
reasoning_content = think_match.group(1).strip()
|
||||
content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL).strip()
|
||||
# 构造一个伪result以便调用自定义响应处理器或默认处理器
|
||||
result = {"choices": [{"message": {"content": content, "reasoning_content": reasoning_content}}]}
|
||||
return response_handler(result) if response_handler else self._default_response_handler(result, user_id, request_type, endpoint)
|
||||
result = {
|
||||
"choices": [{"message": {"content": content, "reasoning_content": reasoning_content}}]}
|
||||
return response_handler(result) if response_handler else self._default_response_handler(
|
||||
result, user_id, request_type, endpoint)
|
||||
else:
|
||||
result = await response.json()
|
||||
# 使用自定义处理器或默认处理
|
||||
return response_handler(result) if response_handler else self._default_response_handler(result, user_id, request_type, endpoint)
|
||||
return response_handler(result) if response_handler else self._default_response_handler(
|
||||
result, user_id, request_type, endpoint)
|
||||
|
||||
except Exception as e:
|
||||
if retry < policy["max_retries"] - 1:
|
||||
@@ -232,8 +258,8 @@ class LLM_request:
|
||||
|
||||
logger.error("达到最大重试次数,请求仍然失败")
|
||||
raise RuntimeError("达到最大重试次数,API请求仍然失败")
|
||||
|
||||
async def _transform_parameters(self, params: dict) ->dict:
|
||||
|
||||
async def _transform_parameters(self, params: dict) -> dict:
|
||||
"""
|
||||
根据模型名称转换参数:
|
||||
- 对于需要转换的OpenAI CoT系列模型(例如 "o3-mini"),删除 'temprature' 参数,
|
||||
@@ -242,7 +268,8 @@ class LLM_request:
|
||||
# 复制一份参数,避免直接修改原始数据
|
||||
new_params = dict(params)
|
||||
# 定义需要转换的模型列表
|
||||
models_needing_transformation = ["o3-mini", "o1-mini", "o1-preview", "o1-2024-12-17", "o1-preview-2024-09-12", "o3-mini-2025-01-31", "o1-mini-2024-09-12"]
|
||||
models_needing_transformation = ["o3-mini", "o1-mini", "o1-preview", "o1-2024-12-17", "o1-preview-2024-09-12",
|
||||
"o3-mini-2025-01-31", "o1-mini-2024-09-12"]
|
||||
if self.model_name.lower() in models_needing_transformation:
|
||||
# 删除 'temprature' 参数(如果存在)
|
||||
new_params.pop("temperature", None)
|
||||
@@ -278,13 +305,13 @@ class LLM_request:
|
||||
**params_copy
|
||||
}
|
||||
# 如果 payload 中依然存在 max_tokens 且需要转换,在这里进行再次检查
|
||||
if self.model_name.lower() in ["o3-mini", "o1-mini", "o1-preview", "o1-2024-12-17", "o1-preview-2024-09-12", "o3-mini-2025-01-31", "o1-mini-2024-09-12"] and "max_tokens" in payload:
|
||||
if self.model_name.lower() in ["o3-mini", "o1-mini", "o1-preview", "o1-2024-12-17", "o1-preview-2024-09-12",
|
||||
"o3-mini-2025-01-31", "o1-mini-2024-09-12"] and "max_tokens" in payload:
|
||||
payload["max_completion_tokens"] = payload.pop("max_tokens")
|
||||
return payload
|
||||
|
||||
|
||||
def _default_response_handler(self, result: dict, user_id: str = "system",
|
||||
request_type: str = "chat", endpoint: str = "/chat/completions") -> Tuple:
|
||||
def _default_response_handler(self, result: dict, user_id: str = "system",
|
||||
request_type: str = "chat", endpoint: str = "/chat/completions") -> Tuple:
|
||||
"""默认响应解析"""
|
||||
if "choices" in result and result["choices"]:
|
||||
message = result["choices"][0]["message"]
|
||||
@@ -329,15 +356,15 @@ class LLM_request:
|
||||
"""构建请求头"""
|
||||
if no_key:
|
||||
return {
|
||||
"Authorization": f"Bearer **********",
|
||||
"Authorization": "Bearer **********",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
# 防止小朋友们截图自己的key
|
||||
}
|
||||
# 防止小朋友们截图自己的key
|
||||
|
||||
async def generate_response(self, prompt: str) -> Tuple[str, str]:
|
||||
"""根据输入的提示生成模型的异步响应"""
|
||||
@@ -384,6 +411,7 @@ class LLM_request:
|
||||
Returns:
|
||||
list: embedding向量,如果失败则返回None
|
||||
"""
|
||||
|
||||
def embedding_handler(result):
|
||||
"""处理响应"""
|
||||
if "data" in result and len(result["data"]) > 0:
|
||||
|
||||
@@ -4,7 +4,7 @@ import time
|
||||
from dataclasses import dataclass
|
||||
|
||||
from ..chat.config import global_config
|
||||
|
||||
from loguru import logger
|
||||
|
||||
@dataclass
|
||||
class MoodState:
|
||||
@@ -51,11 +51,11 @@ class MoodManager:
|
||||
# 情绪词映射表 (valence, arousal)
|
||||
self.emotion_map = {
|
||||
'happy': (0.8, 0.6), # 高愉悦度,中等唤醒度
|
||||
'angry': (-0.7, 0.8), # 负愉悦度,高唤醒度
|
||||
'angry': (-0.7, 0.7), # 负愉悦度,高唤醒度
|
||||
'sad': (-0.6, 0.3), # 负愉悦度,低唤醒度
|
||||
'surprised': (0.4, 0.9), # 中等愉悦度,高唤醒度
|
||||
'surprised': (0.4, 0.8), # 中等愉悦度,高唤醒度
|
||||
'disgusted': (-0.8, 0.5), # 高负愉悦度,中等唤醒度
|
||||
'fearful': (-0.7, 0.7), # 负愉悦度,高唤醒度
|
||||
'fearful': (-0.7, 0.6), # 负愉悦度,高唤醒度
|
||||
'neutral': (0.0, 0.5), # 中性愉悦度,中等唤醒度
|
||||
}
|
||||
|
||||
@@ -64,15 +64,20 @@ class MoodManager:
|
||||
# 第一象限:高唤醒,正愉悦
|
||||
(0.5, 0.7): "兴奋",
|
||||
(0.3, 0.8): "快乐",
|
||||
(0.2, 0.65): "满足",
|
||||
# 第二象限:高唤醒,负愉悦
|
||||
(-0.5, 0.7): "愤怒",
|
||||
(-0.3, 0.8): "焦虑",
|
||||
(-0.2, 0.65): "烦躁",
|
||||
# 第三象限:低唤醒,负愉悦
|
||||
(-0.5, 0.3): "悲伤",
|
||||
(-0.3, 0.2): "疲倦",
|
||||
(-0.3, 0.35): "疲倦",
|
||||
(-0.4, 0.15): "疲倦",
|
||||
# 第四象限:低唤醒,正愉悦
|
||||
(0.5, 0.3): "放松",
|
||||
(0.3, 0.2): "平静"
|
||||
(0.2, 0.45): "平静",
|
||||
(0.3, 0.4): "安宁",
|
||||
(0.5, 0.3): "放松"
|
||||
|
||||
}
|
||||
|
||||
@classmethod
|
||||
@@ -119,9 +124,13 @@ class MoodManager:
|
||||
current_time = time.time()
|
||||
time_diff = current_time - self.last_update
|
||||
|
||||
# 应用衰减公式
|
||||
self.current_mood.valence *= math.pow(1 - self.decay_rate_valence, time_diff)
|
||||
self.current_mood.arousal *= math.pow(1 - self.decay_rate_arousal, time_diff)
|
||||
# Valence 向中性(0)回归
|
||||
valence_target = 0.0
|
||||
self.current_mood.valence = valence_target + (self.current_mood.valence - valence_target) * math.exp(-self.decay_rate_valence * time_diff)
|
||||
|
||||
# Arousal 向中性(0.5)回归
|
||||
arousal_target = 0.5
|
||||
self.current_mood.arousal = arousal_target + (self.current_mood.arousal - arousal_target) * math.exp(-self.decay_rate_arousal * time_diff)
|
||||
|
||||
# 确保值在合理范围内
|
||||
self.current_mood.valence = max(-1.0, min(1.0, self.current_mood.valence))
|
||||
@@ -201,7 +210,7 @@ class MoodManager:
|
||||
|
||||
def print_mood_status(self) -> None:
|
||||
"""打印当前情绪状态"""
|
||||
print(f"\033[1;35m[情绪状态]\033[0m 愉悦度: {self.current_mood.valence:.2f}, "
|
||||
logger.info(f"[情绪状态]愉悦度: {self.current_mood.valence:.2f}, "
|
||||
f"唤醒度: {self.current_mood.arousal:.2f}, "
|
||||
f"心情: {self.current_mood.text}")
|
||||
|
||||
|
||||
@@ -13,21 +13,21 @@ from ..models.utils_model import LLM_request
|
||||
driver = get_driver()
|
||||
config = driver.config
|
||||
|
||||
|
||||
Database.initialize(
|
||||
host= config.MONGODB_HOST,
|
||||
port= int(config.MONGODB_PORT),
|
||||
db_name= config.DATABASE_NAME,
|
||||
username= config.MONGODB_USERNAME,
|
||||
password= config.MONGODB_PASSWORD,
|
||||
auth_source=config.MONGODB_AUTH_SOURCE
|
||||
)
|
||||
host=config.MONGODB_HOST,
|
||||
port=int(config.MONGODB_PORT),
|
||||
db_name=config.DATABASE_NAME,
|
||||
username=config.MONGODB_USERNAME,
|
||||
password=config.MONGODB_PASSWORD,
|
||||
auth_source=config.MONGODB_AUTH_SOURCE
|
||||
)
|
||||
|
||||
|
||||
class ScheduleGenerator:
|
||||
def __init__(self):
|
||||
#根据global_config.llm_normal这一字典配置指定模型
|
||||
# 根据global_config.llm_normal这一字典配置指定模型
|
||||
# self.llm_scheduler = LLMModel(model = global_config.llm_normal,temperature=0.9)
|
||||
self.llm_scheduler = LLM_request(model = global_config.llm_normal,temperature=0.9)
|
||||
self.llm_scheduler = LLM_request(model=global_config.llm_normal, temperature=0.9)
|
||||
self.db = Database.get_instance()
|
||||
self.today_schedule_text = ""
|
||||
self.today_schedule = {}
|
||||
@@ -35,39 +35,41 @@ class ScheduleGenerator:
|
||||
self.tomorrow_schedule = {}
|
||||
self.yesterday_schedule_text = ""
|
||||
self.yesterday_schedule = {}
|
||||
|
||||
|
||||
async def initialize(self):
|
||||
today = datetime.datetime.now()
|
||||
tomorrow = datetime.datetime.now() + datetime.timedelta(days=1)
|
||||
yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
|
||||
|
||||
|
||||
self.today_schedule_text, self.today_schedule = await self.generate_daily_schedule(target_date=today)
|
||||
self.tomorrow_schedule_text, self.tomorrow_schedule = await self.generate_daily_schedule(target_date=tomorrow,read_only=True)
|
||||
self.yesterday_schedule_text, self.yesterday_schedule = await self.generate_daily_schedule(target_date=yesterday,read_only=True)
|
||||
|
||||
async def generate_daily_schedule(self, target_date: datetime.datetime = None,read_only:bool = False) -> Dict[str, str]:
|
||||
|
||||
self.tomorrow_schedule_text, self.tomorrow_schedule = await self.generate_daily_schedule(target_date=tomorrow,
|
||||
read_only=True)
|
||||
self.yesterday_schedule_text, self.yesterday_schedule = await self.generate_daily_schedule(
|
||||
target_date=yesterday, read_only=True)
|
||||
|
||||
async def generate_daily_schedule(self, target_date: datetime.datetime = None, read_only: bool = False) -> Dict[
|
||||
str, str]:
|
||||
|
||||
date_str = target_date.strftime("%Y-%m-%d")
|
||||
weekday = target_date.strftime("%A")
|
||||
|
||||
|
||||
schedule_text = str
|
||||
|
||||
|
||||
existing_schedule = self.db.db.schedule.find_one({"date": date_str})
|
||||
if existing_schedule:
|
||||
print(f"{date_str}的日程已存在:")
|
||||
logger.debug(f"{date_str}的日程已存在:")
|
||||
schedule_text = existing_schedule["schedule"]
|
||||
# print(self.schedule_text)
|
||||
|
||||
elif read_only == False:
|
||||
print(f"{date_str}的日程不存在,准备生成新的日程。")
|
||||
prompt = f"""我是{global_config.BOT_NICKNAME},{global_config.PROMPT_SCHEDULE_GEN},请为我生成{date_str}({weekday})的日程安排,包括:"""+\
|
||||
"""
|
||||
elif not read_only:
|
||||
logger.debug(f"{date_str}的日程不存在,准备生成新的日程。")
|
||||
prompt = f"""我是{global_config.BOT_NICKNAME},{global_config.PROMPT_SCHEDULE_GEN},请为我生成{date_str}({weekday})的日程安排,包括:""" + \
|
||||
"""
|
||||
1. 早上的学习和工作安排
|
||||
2. 下午的活动和任务
|
||||
3. 晚上的计划和休息时间
|
||||
请按照时间顺序列出具体时间点和对应的活动,用一个时间点而不是时间段来表示时间,用JSON格式返回日程表,仅返回内容,不要返回注释,时间采用24小时制,格式为{"时间": "活动","时间": "活动",...}。"""
|
||||
|
||||
请按照时间顺序列出具体时间点和对应的活动,用一个时间点而不是时间段来表示时间,用JSON格式返回日程表,仅返回内容,不要返回注释,不要添加任何markdown或代码块样式,时间采用24小时制,格式为{"时间": "活动","时间": "活动",...}。"""
|
||||
|
||||
try:
|
||||
schedule_text, _ = await self.llm_scheduler.generate_response(prompt)
|
||||
self.db.db.schedule.insert_one({"date": date_str, "schedule": schedule_text})
|
||||
@@ -76,36 +78,35 @@ class ScheduleGenerator:
|
||||
schedule_text = "生成日程时出错了"
|
||||
# print(self.schedule_text)
|
||||
else:
|
||||
print(f"{date_str}的日程不存在。")
|
||||
logger.debug(f"{date_str}的日程不存在。")
|
||||
schedule_text = "忘了"
|
||||
|
||||
return schedule_text,None
|
||||
|
||||
return schedule_text, None
|
||||
|
||||
schedule_form = self._parse_schedule(schedule_text)
|
||||
return schedule_text,schedule_form
|
||||
|
||||
return schedule_text, schedule_form
|
||||
|
||||
def _parse_schedule(self, schedule_text: str) -> Union[bool, Dict[str, str]]:
|
||||
"""解析日程文本,转换为时间和活动的字典"""
|
||||
try:
|
||||
try:
|
||||
schedule_dict = json.loads(schedule_text)
|
||||
return schedule_dict
|
||||
except json.JSONDecodeError as e:
|
||||
print(schedule_text)
|
||||
print(f"解析日程失败: {str(e)}")
|
||||
except json.JSONDecodeError:
|
||||
logger.exception("解析日程失败: {}".format(schedule_text))
|
||||
return False
|
||||
|
||||
|
||||
def _parse_time(self, time_str: str) -> str:
|
||||
"""解析时间字符串,转换为时间"""
|
||||
return datetime.datetime.strptime(time_str, "%H:%M")
|
||||
|
||||
|
||||
def get_current_task(self) -> str:
|
||||
"""获取当前时间应该进行的任务"""
|
||||
current_time = datetime.datetime.now().strftime("%H:%M")
|
||||
|
||||
|
||||
# 找到最接近当前时间的任务
|
||||
closest_time = None
|
||||
min_diff = float('inf')
|
||||
|
||||
|
||||
# 检查今天的日程
|
||||
if not self.today_schedule:
|
||||
return "摸鱼"
|
||||
@@ -114,7 +115,7 @@ class ScheduleGenerator:
|
||||
if closest_time is None or diff < min_diff:
|
||||
closest_time = time_str
|
||||
min_diff = diff
|
||||
|
||||
|
||||
# 检查昨天的日程中的晚间任务
|
||||
if self.yesterday_schedule:
|
||||
for time_str in self.yesterday_schedule.keys():
|
||||
@@ -125,17 +126,17 @@ class ScheduleGenerator:
|
||||
closest_time = time_str
|
||||
min_diff = diff
|
||||
return closest_time, self.yesterday_schedule[closest_time]
|
||||
|
||||
|
||||
if closest_time:
|
||||
return closest_time, self.today_schedule[closest_time]
|
||||
return "摸鱼"
|
||||
|
||||
|
||||
def _time_diff(self, time1: str, time2: str) -> int:
|
||||
"""计算两个时间字符串之间的分钟差"""
|
||||
if time1=="24:00":
|
||||
time1="23:59"
|
||||
if time2=="24:00":
|
||||
time2="23:59"
|
||||
if time1 == "24:00":
|
||||
time1 = "23:59"
|
||||
if time2 == "24:00":
|
||||
time2 = "23:59"
|
||||
t1 = datetime.datetime.strptime(time1, "%H:%M")
|
||||
t2 = datetime.datetime.strptime(time2, "%H:%M")
|
||||
diff = int((t2 - t1).total_seconds() / 60)
|
||||
@@ -146,17 +147,18 @@ class ScheduleGenerator:
|
||||
diff -= 1440 # 减一天的分钟
|
||||
# print(f"时间1[{time1}]: 时间2[{time2}],差值[{diff}]分钟")
|
||||
return diff
|
||||
|
||||
|
||||
def print_schedule(self):
|
||||
"""打印完整的日程安排"""
|
||||
if not self._parse_schedule(self.today_schedule_text):
|
||||
print("今日日程有误,将在下次运行时重新生成")
|
||||
logger.warning("今日日程有误,将在下次运行时重新生成")
|
||||
self.db.db.schedule.delete_one({"date": datetime.datetime.now().strftime("%Y-%m-%d")})
|
||||
else:
|
||||
print("\n=== 今日日程安排 ===")
|
||||
logger.info("=== 今日日程安排 ===")
|
||||
for time_str, activity in self.today_schedule.items():
|
||||
print(f"时间[{time_str}]: 活动[{activity}]")
|
||||
print("==================\n")
|
||||
logger.info(f"时间[{time_str}]: 活动[{activity}]")
|
||||
logger.info("==================")
|
||||
|
||||
|
||||
# def main():
|
||||
# # 使用示例
|
||||
@@ -165,7 +167,7 @@ class ScheduleGenerator:
|
||||
# scheduler.print_schedule()
|
||||
# print("\n当前任务:")
|
||||
# print(scheduler.get_current_task())
|
||||
|
||||
|
||||
# print("昨天日程:")
|
||||
# print(scheduler.yesterday_schedule)
|
||||
# print("今天日程:")
|
||||
@@ -175,5 +177,5 @@ class ScheduleGenerator:
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# main()
|
||||
|
||||
|
||||
bot_schedule = ScheduleGenerator()
|
||||
|
||||
@@ -3,6 +3,7 @@ import time
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Dict
|
||||
from loguru import logger
|
||||
|
||||
from ...common.database import Database
|
||||
|
||||
@@ -153,8 +154,8 @@ class LLMStatistics:
|
||||
try:
|
||||
all_stats = self._collect_all_statistics()
|
||||
self._save_statistics(all_stats)
|
||||
except Exception as e:
|
||||
print(f"\033[1;31m[错误]\033[0m 统计数据处理失败: {e}")
|
||||
except Exception:
|
||||
logger.exception("统计数据处理失败")
|
||||
|
||||
# 等待1分钟
|
||||
for _ in range(60):
|
||||
|
||||
@@ -284,10 +284,13 @@ class ChineseTypoGenerator:
|
||||
|
||||
返回:
|
||||
typo_sentence: 包含错别字的句子
|
||||
typo_info: 错别字信息列表
|
||||
correction_suggestion: 随机选择的一个纠正建议,返回正确的字/词
|
||||
"""
|
||||
result = []
|
||||
typo_info = []
|
||||
word_typos = [] # 记录词语错误对(错词,正确词)
|
||||
char_typos = [] # 记录单字错误对(错字,正确字)
|
||||
current_pos = 0
|
||||
|
||||
# 分词
|
||||
words = self._segment_sentence(sentence)
|
||||
@@ -296,6 +299,7 @@ class ChineseTypoGenerator:
|
||||
# 如果是标点符号或空格,直接添加
|
||||
if all(not self._is_chinese_char(c) for c in word):
|
||||
result.append(word)
|
||||
current_pos += len(word)
|
||||
continue
|
||||
|
||||
# 获取词语的拼音
|
||||
@@ -316,6 +320,8 @@ class ChineseTypoGenerator:
|
||||
' '.join(word_pinyin),
|
||||
' '.join(self._get_word_pinyin(typo_word)),
|
||||
orig_freq, typo_freq))
|
||||
word_typos.append((typo_word, word)) # 记录(错词,正确词)对
|
||||
current_pos += len(typo_word)
|
||||
continue
|
||||
|
||||
# 如果不进行整词替换,则进行单字替换
|
||||
@@ -333,11 +339,15 @@ class ChineseTypoGenerator:
|
||||
result.append(typo_char)
|
||||
typo_py = pinyin(typo_char, style=Style.TONE3)[0][0]
|
||||
typo_info.append((char, typo_char, py, typo_py, orig_freq, typo_freq))
|
||||
char_typos.append((typo_char, char)) # 记录(错字,正确字)对
|
||||
current_pos += 1
|
||||
continue
|
||||
result.append(char)
|
||||
current_pos += 1
|
||||
else:
|
||||
# 处理多字词的单字替换
|
||||
word_result = []
|
||||
word_start_pos = current_pos
|
||||
for i, (char, py) in enumerate(zip(word, word_pinyin)):
|
||||
# 词中的字替换概率降低
|
||||
word_error_rate = self.error_rate * (0.7 ** (len(word) - 1))
|
||||
@@ -353,11 +363,24 @@ class ChineseTypoGenerator:
|
||||
word_result.append(typo_char)
|
||||
typo_py = pinyin(typo_char, style=Style.TONE3)[0][0]
|
||||
typo_info.append((char, typo_char, py, typo_py, orig_freq, typo_freq))
|
||||
char_typos.append((typo_char, char)) # 记录(错字,正确字)对
|
||||
continue
|
||||
word_result.append(char)
|
||||
result.append(''.join(word_result))
|
||||
current_pos += len(word)
|
||||
|
||||
return ''.join(result), typo_info
|
||||
# 优先从词语错误中选择,如果没有则从单字错误中选择
|
||||
correction_suggestion = None
|
||||
# 50%概率返回纠正建议
|
||||
if random.random() < 0.5:
|
||||
if word_typos:
|
||||
wrong_word, correct_word = random.choice(word_typos)
|
||||
correction_suggestion = correct_word
|
||||
elif char_typos:
|
||||
wrong_char, correct_char = random.choice(char_typos)
|
||||
correction_suggestion = correct_char
|
||||
|
||||
return ''.join(result), correction_suggestion
|
||||
|
||||
def format_typo_info(self, typo_info):
|
||||
"""
|
||||
@@ -419,16 +442,16 @@ def main():
|
||||
|
||||
# 创建包含错别字的句子
|
||||
start_time = time.time()
|
||||
typo_sentence, typo_info = typo_generator.create_typo_sentence(sentence)
|
||||
typo_sentence, correction_suggestion = typo_generator.create_typo_sentence(sentence)
|
||||
|
||||
# 打印结果
|
||||
print("\n原句:", sentence)
|
||||
print("错字版:", typo_sentence)
|
||||
|
||||
# 打印错别字信息
|
||||
if typo_info:
|
||||
print("\n错别字信息:")
|
||||
print(typo_generator.format_typo_info(typo_info))
|
||||
# 打印纠正建议
|
||||
if correction_suggestion:
|
||||
print("\n随机纠正建议:")
|
||||
print(f"应该改为:{correction_suggestion}")
|
||||
|
||||
# 计算并打印总耗时
|
||||
end_time = time.time()
|
||||
|
||||
Reference in New Issue
Block a user