Merge remote-tracking branch 'upstream/debug' into feat_regix

This commit is contained in:
Rikki
2025-03-11 05:05:07 +08:00
parent cd96644a4c
commit a54ca8ce18
35 changed files with 1746 additions and 341 deletions

View File

@@ -121,9 +121,9 @@ async def build_memory_task():
@scheduler.scheduled_job("interval", seconds=global_config.forget_memory_interval, id="forget_memory")
async def forget_memory_task():
"""每30秒执行一次记忆构建"""
# print("\033[1;32m[记忆遗忘]\033[0m 开始遗忘记忆...")
# await hippocampus.operation_forget_topic(percentage=0.1)
# print("\033[1;32m[记忆遗忘]\033[0m 记忆遗忘完成")
print("\033[1;32m[记忆遗忘]\033[0m 开始遗忘记忆...")
await hippocampus.operation_forget_topic(percentage=0.1)
print("\033[1;32m[记忆遗忘]\033[0m 记忆遗忘完成")
@scheduler.scheduled_job("interval", seconds=global_config.build_memory_interval + 10, id="merge_memory")

View File

@@ -138,7 +138,7 @@ class ChatBot:
# 如果找不到思考消息,直接返回
if not thinking_message:
logger.warning(f"未找到对应的思考消息,可能已超时被移除")
logger.warning("未找到对应的思考消息,可能已超时被移除")
return
# 记录开始思考的时间,避免从思考到回复的时间太久
@@ -187,7 +187,7 @@ class ChatBot:
# 检查是否 <没有找到> emoji
if emoji_raw != None:
emoji_path, discription = emoji_raw
emoji_path, description = emoji_raw
emoji_cq = CQCode.create_emoji_cq(emoji_path)
@@ -203,7 +203,7 @@ class ChatBot:
raw_message=emoji_cq,
plain_text=emoji_cq,
processed_plain_text=emoji_cq,
detailed_plain_text=discription,
detailed_plain_text=description,
user_nickname=global_config.BOT_NICKNAME,
group_name=message.group_name,
time=bot_response_time,

View File

@@ -1,6 +1,6 @@
import os
from dataclasses import dataclass, field
from typing import Dict, Optional
from typing import Dict, List, Optional
import tomli
from loguru import logger
@@ -12,10 +12,12 @@ from packaging.specifiers import SpecifierSet, InvalidSpecifier
@dataclass
class BotConfig:
"""机器人配置类"""
INNER_VERSION: Version = None
BOT_QQ: Optional[int] = 1
BOT_NICKNAME: Optional[str] = None
BOT_ALIAS_NAMES: List[str] = field(default_factory=list) # 别名,可以通过这个叫它
# 消息处理相关配置
MIN_TEXT_LENGTH: int = 2 # 最小处理文本长度
@@ -82,23 +84,25 @@ class BotConfig:
PROMPT_PERSONALITY = [
"曾经是一个学习地质的女大学生,现在学习心理学和脑科学,你会刷贴吧",
"是一个女大学生,你有黑色头发,你会刷小红书",
"是一个女大学生你会刷b站对ACG文化感兴趣"
"是一个女大学生你会刷b站对ACG文化感兴趣",
]
PROMPT_SCHEDULE_GEN="一个曾经学习地质,现在学习心理学和脑科学的女大学生喜欢刷qq贴吧知乎和小红书"
PERSONALITY_1: float = 0.6 # 第一种人格概率
PERSONALITY_2: float = 0.3 # 第二种人格概率
PERSONALITY_3: float = 0.1 # 第三种人格概率
memory_ban_words: list = field(default_factory=lambda: ['表情包', '图片', '回复', '聊天记录']) # 添加新的配置项默认值
PROMPT_SCHEDULE_GEN = "一个曾经学习地质,现在学习心理学和脑科学的女大学生喜欢刷qq贴吧知乎和小红书"
PERSONALITY_1: float = 0.6 # 第一种人格概率
PERSONALITY_2: float = 0.3 # 第二种人格概率
PERSONALITY_3: float = 0.1 # 第三种人格概率
memory_ban_words: list = field(
default_factory=lambda: ["表情包", "图片", "回复", "聊天记录"]
) # 添加新的配置项默认值
@staticmethod
def get_config_dir() -> str:
"""获取配置文件目录"""
current_dir = os.path.dirname(os.path.abspath(__file__))
root_dir = os.path.abspath(os.path.join(current_dir, '..', '..', '..'))
config_dir = os.path.join(root_dir, 'config')
root_dir = os.path.abspath(os.path.join(current_dir, "..", "..", ".."))
config_dir = os.path.join(root_dir, "config")
if not os.path.exists(config_dir):
os.makedirs(config_dir)
return config_dir
@@ -109,35 +113,32 @@ class BotConfig:
Args:
value[str]: 版本表达式(字符串)
Returns:
SpecifierSet
SpecifierSet
"""
try:
converted = SpecifierSet(value)
except InvalidSpecifier as e:
logger.error(
f"{value} 分类使用了错误的版本约束表达式\n",
"请阅读 https://semver.org/lang/zh-CN/ 修改代码"
)
except InvalidSpecifier:
logger.error(f"{value} 分类使用了错误的版本约束表达式\n", "请阅读 https://semver.org/lang/zh-CN/ 修改代码")
exit(1)
return converted
@classmethod
def get_config_version(cls, toml: dict) -> Version:
"""提取配置文件的 SpecifierSet 版本数据
"""提取配置文件的 SpecifierSet 版本数据
Args:
toml[dict]: 输入的配置文件字典
Returns:
Version
Version
"""
if 'inner' in toml:
if "inner" in toml:
try:
config_version: str = toml["inner"]["version"]
except KeyError as e:
logger.error(f"配置文件中 inner 段 不存在, 这是错误的配置文件")
raise KeyError(f"配置文件中 inner 段 不存在 {e}, 这是错误的配置文件")
logger.error("配置文件中 inner 段 不存在, 这是错误的配置文件")
raise KeyError(f"配置文件中 inner 段 不存在 {e}, 这是错误的配置文件") from e
else:
toml["inner"] = {"version": "0.0.0"}
config_version = toml["inner"]["version"]
@@ -150,7 +151,7 @@ class BotConfig:
"请阅读 https://semver.org/lang/zh-CN/ 修改配置,并参考本项目指定的模板进行修改\n"
"本项目在不同的版本下有不同的模板,请注意识别"
)
raise InvalidVersion("配置文件中 inner段 的 version 键是错误的版本描述\n")
raise InvalidVersion("配置文件中 inner段 的 version 键是错误的版本描述\n") from e
return ver
@@ -160,26 +161,26 @@ class BotConfig:
config = cls()
def personality(parent: dict):
personality_config = parent['personality']
personality = personality_config.get('prompt_personality')
personality_config = parent["personality"]
personality = personality_config.get("prompt_personality")
if len(personality) >= 2:
logger.debug(f"载入自定义人格:{personality}")
config.PROMPT_PERSONALITY = personality_config.get('prompt_personality', config.PROMPT_PERSONALITY)
config.PROMPT_PERSONALITY = personality_config.get("prompt_personality", config.PROMPT_PERSONALITY)
logger.info(f"载入自定义日程prompt:{personality_config.get('prompt_schedule', config.PROMPT_SCHEDULE_GEN)}")
config.PROMPT_SCHEDULE_GEN = personality_config.get('prompt_schedule', config.PROMPT_SCHEDULE_GEN)
config.PROMPT_SCHEDULE_GEN = personality_config.get("prompt_schedule", config.PROMPT_SCHEDULE_GEN)
if config.INNER_VERSION in SpecifierSet(">=0.0.2"):
config.PERSONALITY_1 = personality_config.get('personality_1_probability', config.PERSONALITY_1)
config.PERSONALITY_2 = personality_config.get('personality_2_probability', config.PERSONALITY_2)
config.PERSONALITY_3 = personality_config.get('personality_3_probability', config.PERSONALITY_3)
config.PERSONALITY_1 = personality_config.get("personality_1_probability", config.PERSONALITY_1)
config.PERSONALITY_2 = personality_config.get("personality_2_probability", config.PERSONALITY_2)
config.PERSONALITY_3 = personality_config.get("personality_3_probability", config.PERSONALITY_3)
def emoji(parent: dict):
emoji_config = parent["emoji"]
config.EMOJI_CHECK_INTERVAL = emoji_config.get("check_interval", config.EMOJI_CHECK_INTERVAL)
config.EMOJI_REGISTER_INTERVAL = emoji_config.get("register_interval", config.EMOJI_REGISTER_INTERVAL)
config.EMOJI_CHECK_PROMPT = emoji_config.get('check_prompt', config.EMOJI_CHECK_PROMPT)
config.EMOJI_SAVE = emoji_config.get('auto_save', config.EMOJI_SAVE)
config.EMOJI_CHECK = emoji_config.get('enable_check', config.EMOJI_CHECK)
config.EMOJI_CHECK_PROMPT = emoji_config.get("check_prompt", config.EMOJI_CHECK_PROMPT)
config.EMOJI_SAVE = emoji_config.get("auto_save", config.EMOJI_SAVE)
config.EMOJI_CHECK = emoji_config.get("enable_check", config.EMOJI_CHECK)
def cq_code(parent: dict):
cq_code_config = parent["cq_code"]
@@ -192,12 +193,16 @@ class BotConfig:
config.BOT_QQ = int(bot_qq)
config.BOT_NICKNAME = bot_config.get("nickname", config.BOT_NICKNAME)
if config.INNER_VERSION in SpecifierSet(">=0.0.5"):
config.BOT_ALIAS_NAMES = bot_config.get("alias_names", config.BOT_ALIAS_NAMES)
def response(parent: dict):
response_config = parent["response"]
config.MODEL_R1_PROBABILITY = response_config.get("model_r1_probability", config.MODEL_R1_PROBABILITY)
config.MODEL_V3_PROBABILITY = response_config.get("model_v3_probability", config.MODEL_V3_PROBABILITY)
config.MODEL_R1_DISTILL_PROBABILITY = response_config.get("model_r1_distill_probability",
config.MODEL_R1_DISTILL_PROBABILITY)
config.MODEL_R1_DISTILL_PROBABILITY = response_config.get(
"model_r1_distill_probability", config.MODEL_R1_DISTILL_PROBABILITY
)
config.max_response_length = response_config.get("max_response_length", config.max_response_length)
def model(parent: dict):
@@ -214,7 +219,7 @@ class BotConfig:
"llm_emotion_judge",
"vlm",
"embedding",
"moderation"
"moderation",
]
for item in config_list:
@@ -223,13 +228,7 @@ class BotConfig:
# base_url 的例子: SILICONFLOW_BASE_URL
# key 的例子: SILICONFLOW_KEY
cfg_target = {
"name": "",
"base_url": "",
"key": "",
"pri_in": 0,
"pri_out": 0
}
cfg_target = {"name": "", "base_url": "", "key": "", "pri_in": 0, "pri_out": 0}
if config.INNER_VERSION in SpecifierSet("<=0.0.0"):
cfg_target = cfg_item
@@ -248,7 +247,7 @@ class BotConfig:
cfg_target[i] = cfg_item[i]
except KeyError as e:
logger.error(f"{item} 中的必要字段不存在,请检查")
raise KeyError(f"{item} 中的必要字段 {e} 不存在,请检查")
raise KeyError(f"{item} 中的必要字段 {e} 不存在,请检查") from e
provider = cfg_item.get("provider")
if provider is None:
@@ -273,10 +272,12 @@ class BotConfig:
if config.INNER_VERSION in SpecifierSet(">=0.0.2"):
config.thinking_timeout = msg_config.get("thinking_timeout", config.thinking_timeout)
config.response_willing_amplifier = msg_config.get("response_willing_amplifier",
config.response_willing_amplifier)
config.response_interested_rate_amplifier = msg_config.get("response_interested_rate_amplifier",
config.response_interested_rate_amplifier)
config.response_willing_amplifier = msg_config.get(
"response_willing_amplifier", config.response_willing_amplifier
)
config.response_interested_rate_amplifier = msg_config.get(
"response_interested_rate_amplifier", config.response_interested_rate_amplifier
)
config.down_frequency_rate = msg_config.get("down_frequency_rate", config.down_frequency_rate)
if config.INNER_VERSION in SpecifierSet(">=0.0.5"):
@@ -286,7 +287,7 @@ class BotConfig:
memory_config = parent["memory"]
config.build_memory_interval = memory_config.get("build_memory_interval", config.build_memory_interval)
config.forget_memory_interval = memory_config.get("forget_memory_interval", config.forget_memory_interval)
# 在版本 >= 0.0.4 时才处理新增的配置项
if config.INNER_VERSION in SpecifierSet(">=0.0.4"):
config.memory_ban_words = set(memory_config.get("memory_ban_words", []))
@@ -307,10 +308,12 @@ class BotConfig:
config.chinese_typo_enable = chinese_typo_config.get("enable", config.chinese_typo_enable)
config.chinese_typo_error_rate = chinese_typo_config.get("error_rate", config.chinese_typo_error_rate)
config.chinese_typo_min_freq = chinese_typo_config.get("min_freq", config.chinese_typo_min_freq)
config.chinese_typo_tone_error_rate = chinese_typo_config.get("tone_error_rate",
config.chinese_typo_tone_error_rate)
config.chinese_typo_word_replace_rate = chinese_typo_config.get("word_replace_rate",
config.chinese_typo_word_replace_rate)
config.chinese_typo_tone_error_rate = chinese_typo_config.get(
"tone_error_rate", config.chinese_typo_tone_error_rate
)
config.chinese_typo_word_replace_rate = chinese_typo_config.get(
"word_replace_rate", config.chinese_typo_word_replace_rate
)
def groups(parent: dict):
groups_config = parent["groups"]
@@ -329,61 +332,19 @@ class BotConfig:
# 例如:"notice": "personality 将在 1.3.2 后被移除",那么在有效版本中的用户就会虽然可以
# 正常执行程序,但是会看到这条自定义提示
include_configs = {
"personality": {
"func": personality,
"support": ">=0.0.0"
},
"emoji": {
"func": emoji,
"support": ">=0.0.0"
},
"cq_code": {
"func": cq_code,
"support": ">=0.0.0"
},
"bot": {
"func": bot,
"support": ">=0.0.0"
},
"response": {
"func": response,
"support": ">=0.0.0"
},
"model": {
"func": model,
"support": ">=0.0.0"
},
"message": {
"func": message,
"support": ">=0.0.0"
},
"memory": {
"func": memory,
"support": ">=0.0.0",
"necessary": False
},
"mood": {
"func": mood,
"support": ">=0.0.0"
},
"keywords_reaction": {
"func": keywords_reaction,
"support": ">=0.0.2",
"necessary": False
},
"chinese_typo": {
"func": chinese_typo,
"support": ">=0.0.3",
"necessary": False
},
"groups": {
"func": groups,
"support": ">=0.0.0"
},
"others": {
"func": others,
"support": ">=0.0.0"
}
"personality": {"func": personality, "support": ">=0.0.0"},
"emoji": {"func": emoji, "support": ">=0.0.0"},
"cq_code": {"func": cq_code, "support": ">=0.0.0"},
"bot": {"func": bot, "support": ">=0.0.0"},
"response": {"func": response, "support": ">=0.0.0"},
"model": {"func": model, "support": ">=0.0.0"},
"message": {"func": message, "support": ">=0.0.0"},
"memory": {"func": memory, "support": ">=0.0.0", "necessary": False},
"mood": {"func": mood, "support": ">=0.0.0"},
"keywords_reaction": {"func": keywords_reaction, "support": ">=0.0.2", "necessary": False},
"chinese_typo": {"func": chinese_typo, "support": ">=0.0.3", "necessary": False},
"groups": {"func": groups, "support": ">=0.0.0"},
"others": {"func": others, "support": ">=0.0.0"},
}
# 原地修改,将 字符串版本表达式 转换成 版本对象
@@ -395,7 +356,7 @@ class BotConfig:
with open(config_path, "rb") as f:
try:
toml_dict = tomli.load(f)
except(tomli.TOMLDecodeError) as e:
except tomli.TOMLDecodeError as e:
logger.critical(f"配置文件bot_config.toml填写有误请检查第{e.lineno}行第{e.colno}处:{e.msg}")
exit(1)
@@ -410,7 +371,7 @@ class BotConfig:
# 检查配置文件版本是否在支持范围内
if config.INNER_VERSION in group_specifierset:
# 如果版本在支持范围内,检查是否存在通知
if 'notice' in include_configs[key]:
if "notice" in include_configs[key]:
logger.warning(include_configs[key]["notice"])
include_configs[key]["func"](toml_dict)
@@ -424,7 +385,7 @@ class BotConfig:
raise InvalidVersion(f"当前程序仅支持以下版本范围: {group_specifierset}")
# 如果 necessary 项目存在,而且显式声明是 False进入特殊处理
elif "necessary" in include_configs[key] and include_configs[key].get("necessary") == False:
elif "necessary" in include_configs[key] and include_configs[key].get("necessary") is False:
# 通过 pass 处理的项虽然直接忽略也是可以的,但是为了不增加理解困难,依然需要在这里显式处理
if key == "keywords_reaction":
pass

View File

@@ -155,8 +155,8 @@ class CQCode:
logger.error(f"最终请求失败: {str(e)}")
time.sleep(1.5 ** retry) # 指数退避
except Exception as e:
logger.exception(f"[未知错误]")
except Exception:
logger.exception("[未知错误]")
return None
return None
@@ -281,7 +281,7 @@ class CQCode:
logger.debug(f"合并后的转发消息: {combined_messages}")
return f"[转发消息:\n{combined_messages}]"
except Exception as e:
except Exception:
logger.exception("处理转发消息失败")
return '[转发消息]'

View File

@@ -51,8 +51,8 @@ class EmojiManager:
self._initialized = True
# 启动时执行一次完整性检查
self.check_emoji_file_integrity()
except Exception as e:
logger.exception(f"初始化表情管理器失败")
except Exception:
logger.exception("初始化表情管理器失败")
def _ensure_db(self):
"""确保数据库已初始化"""
@@ -87,8 +87,8 @@ class EmojiManager:
{'_id': emoji_id},
{'$inc': {'usage_count': 1}}
)
except Exception as e:
logger.exception(f"记录表情使用失败")
except Exception:
logger.exception("记录表情使用失败")
async def get_emoji_for_text(self, text: str) -> Optional[str]:
"""根据文本内容获取相关表情包
@@ -117,7 +117,7 @@ class EmojiManager:
try:
# 获取所有表情包
all_emojis = list(self.db.db.emoji.find({}, {'_id': 1, 'path': 1, 'embedding': 1, 'discription': 1}))
all_emojis = list(self.db.db.emoji.find({}, {'_id': 1, 'path': 1, 'embedding': 1, 'description': 1}))
if not all_emojis:
logger.warning("数据库中没有任何表情包")
@@ -160,9 +160,9 @@ class EmojiManager:
{'$inc': {'usage_count': 1}}
)
logger.success(
f"找到匹配的表情包: {selected_emoji.get('discription', '无描述')} (相似度: {similarity:.4f})")
f"找到匹配的表情包: {selected_emoji.get('description', '无描述')} (相似度: {similarity:.4f})")
# 稍微改一下文本描述,不然容易产生幻觉,描述已经包含 表情包 了
return selected_emoji['path'], "[ %s ]" % selected_emoji.get('discription', '无描述')
return selected_emoji['path'], "[ %s ]" % selected_emoji.get('description', '无描述')
except Exception as search_error:
logger.error(f"搜索表情包失败: {str(search_error)}")
@@ -174,7 +174,7 @@ class EmojiManager:
logger.error(f"获取表情包失败: {str(e)}")
return None
async def _get_emoji_discription(self, image_base64: str) -> str:
async def _get_emoji_description(self, image_base64: str) -> str:
"""获取表情包的标签"""
try:
prompt = '这是一个表情包,使用中文简洁的描述一下表情包的内容和表情包所表达的情感'
@@ -203,7 +203,7 @@ class EmojiManager:
try:
prompt = f'这是{global_config.BOT_NICKNAME}将要发送的消息内容:\n{text}\n若要为其配上表情包,请你输出这个表情包应该表达怎样的情感,应该给人什么样的感觉,不要太简洁也不要太长,注意不要输出任何对消息内容的分析内容,只输出\"一种什么样的感觉\"中间的形容词部分。'
content, _ = await self.llm_emotion_judge.generate_response_async(prompt)
content, _ = await self.llm_emotion_judge.generate_response_async(prompt,temperature=1.5)
logger.info(f"输出描述: {content}")
return content
@@ -236,36 +236,36 @@ class EmojiManager:
continue
# 获取表情包的描述
discription = await self._get_emoji_discription(image_base64)
description = await self._get_emoji_description(image_base64)
if global_config.EMOJI_CHECK:
check = await self._check_emoji(image_base64)
if '' not in check:
os.remove(image_path)
logger.info(f"描述: {discription}")
logger.info(f"描述: {description}")
logger.info(f"其不满足过滤规则,被剔除 {check}")
continue
logger.info(f"check通过 {check}")
if discription is not None:
embedding = await get_embedding(discription)
if description is not None:
embedding = await get_embedding(description)
# 准备数据库记录
emoji_record = {
'filename': filename,
'path': image_path,
'embedding': embedding,
'discription': discription,
'description': description,
'timestamp': int(time.time())
}
# 保存到数据库
self.db.db['emoji'].insert_one(emoji_record)
logger.success(f"注册新表情包: {filename}")
logger.info(f"描述: {discription}")
logger.info(f"描述: {description}")
else:
logger.warning(f"跳过表情包: {filename}")
except Exception as e:
logger.exception(f"扫描表情包失败")
except Exception:
logger.exception("扫描表情包失败")
async def _periodic_scan(self, interval_MINS: int = 10):
"""定期扫描新表情包"""

View File

@@ -94,7 +94,7 @@ class ResponseGenerator:
try:
content, reasoning_content = await model.generate_response(prompt)
except Exception:
logger.exception(f"生成回复时出错")
logger.exception("生成回复时出错")
return None
# 保存到数据库
@@ -146,7 +146,7 @@ class ResponseGenerator:
return ["neutral"]
except Exception:
logger.exception(f"获取情感标签时出错")
logger.exception("获取情感标签时出错")
return ["neutral"]
async def _process_response(self, content: str) -> Tuple[List[str], List[str]]:

View File

@@ -61,7 +61,7 @@ class Message_Sender:
auto_escape=auto_escape
)
logger.debug(f"发送消息{message}成功")
except Exception as e:
except Exception:
logger.exception(f"发送消息{message}失败")
@@ -120,7 +120,7 @@ class MessageContainer:
return True
return False
except Exception:
logger.exception(f"移除消息时发生错误")
logger.exception("移除消息时发生错误")
return False
def has_messages(self) -> bool:
@@ -214,7 +214,7 @@ class MessageManager:
if not container.remove_message(msg):
logger.warning("尝试删除不存在的消息")
except Exception:
logger.exception(f"处理超时消息时发生错误")
logger.exception("处理超时消息时发生错误")
continue
async def start_processor(self):

View File

@@ -131,18 +131,19 @@ class PromptBuilder:
probability_1 = global_config.PERSONALITY_1
probability_2 = global_config.PERSONALITY_2
probability_3 = global_config.PERSONALITY_3
prompt_personality = ''
prompt_personality = f'{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},你还有很多别名:{"/".join(global_config.BOT_ALIAS_NAMES)}'
personality_choice = random.random()
if personality_choice < probability_1: # 第一种人格
prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME}{personality[0]}, 你正在浏览qq群,{promt_info_prompt},
prompt_personality += f'''{personality[0]}, 你正在浏览qq群,{promt_info_prompt},
现在请你给出日常且口语化的回复,平淡一些,尽量简短一些。{keywords_reaction_prompt}
请注意把握群里的聊天内容,不要刻意突出自身学科背景,不要回复的太有条理,可以有个性。'''
elif personality_choice < probability_1 + probability_2: # 第二种人格
prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME}{personality[1]}, 你正在浏览qq群{promt_info_prompt},
prompt_personality += f'''{personality[1]}, 你正在浏览qq群{promt_info_prompt},
现在请你给出日常且口语化的回复,请表现你自己的见解,不要一昧迎合,尽量简短一些。{keywords_reaction_prompt}
请你表达自己的见解和观点。可以有个性。'''
else: # 第三种人格
prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME}{personality[2]}, 你正在浏览qq群{promt_info_prompt},
prompt_personality += f'''{personality[2]}, 你正在浏览qq群{promt_info_prompt},
现在请你给出日常且口语化的回复,请表现你自己的见解,不要一昧迎合,尽量简短一些。{keywords_reaction_prompt}
请你表达自己的见解和观点。可以有个性。'''

View File

@@ -45,6 +45,6 @@ class MessageStorage:
self.db.db.messages.insert_one(message_data)
except Exception:
logger.exception(f"存储消息失败")
logger.exception("存储消息失败")
# 如果需要其他存储相关的函数,可以在这里添加

View File

@@ -53,19 +53,13 @@ def db_message_to_str(message_dict: Dict) -> str:
return result
def is_mentioned_bot_in_message(message: Message) -> bool:
"""检查消息是否提到了机器人"""
keywords = [global_config.BOT_NICKNAME]
for keyword in keywords:
if keyword in message.processed_plain_text:
return True
return False
def is_mentioned_bot_in_txt(message: str) -> bool:
"""检查消息是否提到了机器人"""
keywords = [global_config.BOT_NICKNAME]
for keyword in keywords:
if global_config.BOT_NICKNAME is None:
return True
if global_config.BOT_NICKNAME in message:
return True
for keyword in global_config.BOT_ALIAS_NAMES:
if keyword in message:
return True
return False

View File

@@ -79,7 +79,7 @@ class KnowledgeLibrary:
content = f.read()
# 按1024字符分段
segments = [content[i:i+600] for i in range(0, len(content), 600)]
segments = [content[i:i+600] for i in range(0, len(content), 300)]
# 处理每个分段
for segment in segments:

View File

@@ -25,26 +25,46 @@ class Memory_graph:
self.db = Database.get_instance()
def connect_dot(self, concept1, concept2):
# 如果边已存在,增加 strength
# 避免自连接
if concept1 == concept2:
return
current_time = datetime.datetime.now().timestamp()
# 如果边已存在,增加 strength
if self.G.has_edge(concept1, concept2):
self.G[concept1][concept2]['strength'] = self.G[concept1][concept2].get('strength', 1) + 1
# 更新最后修改时间
self.G[concept1][concept2]['last_modified'] = current_time
else:
# 如果是新边初始化 strength 为 1
self.G.add_edge(concept1, concept2, strength=1)
# 如果是新边,初始化 strength 为 1
self.G.add_edge(concept1, concept2,
strength=1,
created_time=current_time, # 添加创建时间
last_modified=current_time) # 添加最后修改时间
def add_dot(self, concept, memory):
current_time = datetime.datetime.now().timestamp()
if concept in self.G:
# 如果节点已存在,将新记忆添加到现有列表中
if 'memory_items' in self.G.nodes[concept]:
if not isinstance(self.G.nodes[concept]['memory_items'], list):
# 如果当前不是列表,将其转换为列表
self.G.nodes[concept]['memory_items'] = [self.G.nodes[concept]['memory_items']]
self.G.nodes[concept]['memory_items'].append(memory)
# 更新最后修改时间
self.G.nodes[concept]['last_modified'] = current_time
else:
self.G.nodes[concept]['memory_items'] = [memory]
# 如果节点存在但没有memory_items,说明是第一次添加memory,设置created_time
if 'created_time' not in self.G.nodes[concept]:
self.G.nodes[concept]['created_time'] = current_time
self.G.nodes[concept]['last_modified'] = current_time
else:
# 如果是新节点创建新的记忆列表
self.G.add_node(concept, memory_items=[memory])
# 如果是新节点,创建新的记忆列表
self.G.add_node(concept,
memory_items=[memory],
created_time=current_time, # 添加创建时间
last_modified=current_time) # 添加最后修改时间
def get_dot(self, concept):
# 检查节点是否存在于图中
@@ -191,15 +211,11 @@ class Hippocampus:
async def memory_compress(self, messages: list, compress_rate=0.1):
"""压缩消息记录为记忆
Args:
messages: 消息记录字典列表每个字典包含text和time字段
compress_rate: 压缩率
Returns:
set: (话题, 记忆) 元组集合
tuple: (压缩记忆集合, 相似主题字典)
"""
if not messages:
return set()
return set(), {}
# 合并消息文本,同时保留时间信息
input_text = ""
@@ -246,12 +262,33 @@ class Hippocampus:
# 等待所有任务完成
compressed_memory = set()
similar_topics_dict = {} # 存储每个话题的相似主题列表
for topic, task in tasks:
response = await task
if response:
compressed_memory.add((topic, response[0]))
# 为每个话题查找相似的已存在主题
existing_topics = list(self.memory_graph.G.nodes())
similar_topics = []
for existing_topic in existing_topics:
topic_words = set(jieba.cut(topic))
existing_words = set(jieba.cut(existing_topic))
all_words = topic_words | existing_words
v1 = [1 if word in topic_words else 0 for word in all_words]
v2 = [1 if word in existing_words else 0 for word in all_words]
similarity = cosine_similarity(v1, v2)
if similarity >= 0.6:
similar_topics.append((existing_topic, similarity))
similar_topics.sort(key=lambda x: x[1], reverse=True)
similar_topics = similar_topics[:5]
similar_topics_dict[topic] = similar_topics
return compressed_memory
return compressed_memory, similar_topics_dict
def calculate_topic_num(self, text, compress_rate):
"""计算文本的话题数量"""
@@ -265,33 +302,40 @@ class Hippocampus:
return topic_num
async def operation_build_memory(self, chat_size=20):
# 最近消息获取频率
time_frequency = {'near': 2, 'mid': 4, 'far': 2}
memory_sample = self.get_memory_sample(chat_size, time_frequency)
for i, input_text in enumerate(memory_sample, 1):
# 加载进度可视化
time_frequency = {'near': 3, 'mid': 8, 'far': 5}
memory_samples = self.get_memory_sample(chat_size, time_frequency)
for i, messages in enumerate(memory_samples, 1):
all_topics = []
progress = (i / len(memory_sample)) * 100
# 加载进度可视化
progress = (i / len(memory_samples)) * 100
bar_length = 30
filled_length = int(bar_length * i // len(memory_sample))
filled_length = int(bar_length * i // len(memory_samples))
bar = '' * filled_length + '-' * (bar_length - filled_length)
logger.debug(f"进度: [{bar}] {progress:.1f}% ({i}/{len(memory_sample)})")
logger.debug(f"进度: [{bar}] {progress:.1f}% ({i}/{len(memory_samples)})")
# 生成压缩后记忆 ,表现为 (话题,记忆) 的元组
compressed_memory = set()
compress_rate = 0.1
compressed_memory = await self.memory_compress(input_text, compress_rate)
logger.info(f"压缩后记忆数量: {len(compressed_memory)}")
# 将记忆加入到图谱中
compressed_memory, similar_topics_dict = await self.memory_compress(messages, compress_rate)
logger.info(f"压缩后记忆数量: {len(compressed_memory)},似曾相识的话题: {len(similar_topics_dict)}")
for topic, memory in compressed_memory:
logger.info(f"添加节点: {topic}")
self.memory_graph.add_dot(topic, memory)
all_topics.append(topic) # 收集所有话题
all_topics.append(topic)
# 连接相似的已存在主题
if topic in similar_topics_dict:
similar_topics = similar_topics_dict[topic]
for similar_topic, similarity in similar_topics:
if topic != similar_topic:
strength = int(similarity * 10)
logger.info(f"连接相似节点: {topic}{similar_topic} (强度: {strength})")
self.memory_graph.G.add_edge(topic, similar_topic, strength=strength)
# 连接同批次的相关话题
for i in range(len(all_topics)):
for j in range(i + 1, len(all_topics)):
logger.info(f"连接节点: {all_topics[i]}{all_topics[j]}")
logger.info(f"连接同批次节点: {all_topics[i]}{all_topics[j]}")
self.memory_graph.connect_dot(all_topics[i], all_topics[j])
self.sync_memory_to_db()
@@ -302,7 +346,7 @@ class Hippocampus:
db_nodes = list(self.memory_graph.db.db.graph_data.nodes.find())
memory_nodes = list(self.memory_graph.G.nodes(data=True))
# 转换数据库节点为字典格式方便查找
# 转换数据库节点为字典格式,方便查找
db_nodes_dict = {node['concept']: node for node in db_nodes}
# 检查并更新节点
@@ -313,13 +357,19 @@ class Hippocampus:
# 计算内存中节点的特征值
memory_hash = self.calculate_node_hash(concept, memory_items)
# 获取时间信息
created_time = data.get('created_time', datetime.datetime.now().timestamp())
last_modified = data.get('last_modified', datetime.datetime.now().timestamp())
if concept not in db_nodes_dict:
# 数据库中缺少的节点添加
# 数据库中缺少的节点,添加
node_data = {
'concept': concept,
'memory_items': memory_items,
'hash': memory_hash
'hash': memory_hash,
'created_time': created_time,
'last_modified': last_modified
}
self.memory_graph.db.db.graph_data.nodes.insert_one(node_data)
else:
@@ -327,25 +377,21 @@ class Hippocampus:
db_node = db_nodes_dict[concept]
db_hash = db_node.get('hash', None)
# 如果特征值不同则更新节点
# 如果特征值不同,则更新节点
if db_hash != memory_hash:
self.memory_graph.db.db.graph_data.nodes.update_one(
{'concept': concept},
{'$set': {
'memory_items': memory_items,
'hash': memory_hash
'hash': memory_hash,
'created_time': created_time,
'last_modified': last_modified
}}
)
# 检查并删除数据库中多余的节点
memory_concepts = set(node[0] for node in memory_nodes)
for db_node in db_nodes:
if db_node['concept'] not in memory_concepts:
self.memory_graph.db.db.graph_data.nodes.delete_one({'concept': db_node['concept']})
# 处理边的信息
db_edges = list(self.memory_graph.db.db.graph_data.edges.find())
memory_edges = list(self.memory_graph.G.edges())
memory_edges = list(self.memory_graph.G.edges(data=True))
# 创建边的哈希值字典
db_edge_dict = {}
@@ -357,10 +403,14 @@ class Hippocampus:
}
# 检查并更新边
for source, target in memory_edges:
for source, target, data in memory_edges:
edge_hash = self.calculate_edge_hash(source, target)
edge_key = (source, target)
strength = self.memory_graph.G[source][target].get('strength', 1)
strength = data.get('strength', 1)
# 获取边的时间信息
created_time = data.get('created_time', datetime.datetime.now().timestamp())
last_modified = data.get('last_modified', datetime.datetime.now().timestamp())
if edge_key not in db_edge_dict:
# 添加新边
@@ -368,7 +418,9 @@ class Hippocampus:
'source': source,
'target': target,
'strength': strength,
'hash': edge_hash
'hash': edge_hash,
'created_time': created_time,
'last_modified': last_modified
}
self.memory_graph.db.db.graph_data.edges.insert_one(edge_data)
else:
@@ -378,20 +430,12 @@ class Hippocampus:
{'source': source, 'target': target},
{'$set': {
'hash': edge_hash,
'strength': strength
'strength': strength,
'created_time': created_time,
'last_modified': last_modified
}}
)
# 删除多余的边
memory_edge_set = set(memory_edges)
for edge_key in db_edge_dict:
if edge_key not in memory_edge_set:
source, target = edge_key
self.memory_graph.db.db.graph_data.edges.delete_one({
'source': source,
'target': target
})
def sync_memory_from_db(self):
"""从数据库同步数据到内存中的图结构"""
# 清空当前图
@@ -405,61 +449,107 @@ class Hippocampus:
# 确保memory_items是列表
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
# 获取时间信息
created_time = node.get('created_time', datetime.datetime.now().timestamp())
last_modified = node.get('last_modified', datetime.datetime.now().timestamp())
# 添加节点到图中
self.memory_graph.G.add_node(concept, memory_items=memory_items)
self.memory_graph.G.add_node(concept,
memory_items=memory_items,
created_time=created_time,
last_modified=last_modified)
# 从数据库加载所有边
edges = self.memory_graph.db.db.graph_data.edges.find()
for edge in edges:
source = edge['source']
target = edge['target']
strength = edge.get('strength', 1) # 获取 strength默认为 1
strength = edge.get('strength', 1) # 获取 strength,默认为 1
# 获取时间信息
created_time = edge.get('created_time', datetime.datetime.now().timestamp())
last_modified = edge.get('last_modified', datetime.datetime.now().timestamp())
# 只有当源节点和目标节点都存在时才添加边
if source in self.memory_graph.G and target in self.memory_graph.G:
self.memory_graph.G.add_edge(source, target, strength=strength)
self.memory_graph.G.add_edge(source, target,
strength=strength,
created_time=created_time,
last_modified=last_modified)
async def operation_forget_topic(self, percentage=0.1):
"""随机选择图中一定比例的节点进行检查根据条件决定是否遗忘"""
# 获取所有节点
"""随机选择图中一定比例的节点和边进行检查,根据时间条件决定是否遗忘"""
all_nodes = list(self.memory_graph.G.nodes())
# 计算要检查的节点数量
check_count = max(1, int(len(all_nodes) * percentage))
# 随机选择节点
nodes_to_check = random.sample(all_nodes, check_count)
forgotten_nodes = []
all_edges = list(self.memory_graph.G.edges())
check_nodes_count = max(1, int(len(all_nodes) * percentage))
check_edges_count = max(1, int(len(all_edges) * percentage))
nodes_to_check = random.sample(all_nodes, check_nodes_count)
edges_to_check = random.sample(all_edges, check_edges_count)
edge_changes = {'weakened': 0, 'removed': 0}
node_changes = {'reduced': 0, 'removed': 0}
current_time = datetime.datetime.now().timestamp()
# 检查并遗忘连接
logger.info("开始检查连接...")
for source, target in edges_to_check:
edge_data = self.memory_graph.G[source][target]
last_modified = edge_data.get('last_modified')
# print(source,target)
# print(f"float(last_modified):{float(last_modified)}" )
# print(f"current_time:{current_time}")
# print(f"current_time - last_modified:{current_time - last_modified}")
if current_time - last_modified > 3600*24: # test
current_strength = edge_data.get('strength', 1)
new_strength = current_strength - 1
if new_strength <= 0:
self.memory_graph.G.remove_edge(source, target)
edge_changes['removed'] += 1
logger.info(f"\033[1;31m[连接移除]\033[0m {source} - {target}")
else:
edge_data['strength'] = new_strength
edge_data['last_modified'] = current_time
edge_changes['weakened'] += 1
logger.info(f"\033[1;34m[连接减弱]\033[0m {source} - {target} (强度: {current_strength} -> {new_strength})")
# 检查并遗忘话题
logger.info("开始检查节点...")
for node in nodes_to_check:
# 获取节点的连接数
connections = self.memory_graph.G.degree(node)
# 获取节点的内容条数
memory_items = self.memory_graph.G.nodes[node].get('memory_items', [])
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
content_count = len(memory_items)
# 检查连接强度
weak_connections = True
if connections > 1: # 只有当连接数大于1时才检查强度
for neighbor in self.memory_graph.G.neighbors(node):
strength = self.memory_graph.G[node][neighbor].get('strength', 1)
if strength > 2:
weak_connections = False
break
# 如果满足遗忘条件
if (connections <= 1 and weak_connections) or content_count <= 2:
removed_item = self.memory_graph.forget_topic(node)
if removed_item:
forgotten_nodes.append((node, removed_item))
logger.debug(f"遗忘节点 {node} 的记忆: {removed_item}")
# 同步到数据库
if forgotten_nodes:
node_data = self.memory_graph.G.nodes[node]
last_modified = node_data.get('last_modified', current_time)
if current_time - last_modified > 3600*24: # test
memory_items = node_data.get('memory_items', [])
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
if memory_items:
current_count = len(memory_items)
removed_item = random.choice(memory_items)
memory_items.remove(removed_item)
if memory_items:
self.memory_graph.G.nodes[node]['memory_items'] = memory_items
self.memory_graph.G.nodes[node]['last_modified'] = current_time
node_changes['reduced'] += 1
logger.info(f"\033[1;33m[记忆减少]\033[0m {node} (记忆数量: {current_count} -> {len(memory_items)})")
else:
self.memory_graph.G.remove_node(node)
node_changes['removed'] += 1
logger.info(f"\033[1;31m[节点移除]\033[0m {node}")
if any(count > 0 for count in edge_changes.values()) or any(count > 0 for count in node_changes.values()):
self.sync_memory_to_db()
logger.debug(f"完成遗忘操作,共遗忘 {len(forgotten_nodes)} 个节点的记忆")
logger.info("\n遗忘操作统计:")
logger.info(f"连接变化: {edge_changes['weakened']} 个减弱, {edge_changes['removed']} 个移除")
logger.info(f"节点变化: {node_changes['reduced']} 个减少记忆, {node_changes['removed']} 个移除")
else:
logger.debug("本次检查没有节点满足遗忘条件")
logger.info("\n本次检查没有节点或连接满足遗忘条件")
async def merge_memory(self, topic):
"""
@@ -486,7 +576,7 @@ class Hippocampus:
logger.debug(f"选择的记忆:\n{merged_text}")
# 使用memory_compress生成新的压缩记忆
compressed_memories = await self.memory_compress(selected_memories, 0.1)
compressed_memories, _ = await self.memory_compress(selected_memories, 0.1)
# 从原记忆列表中移除被选中的记忆
for memory in selected_memories:

File diff suppressed because it is too large Load Diff

View File

@@ -44,8 +44,8 @@ class LLM_request:
self.db.db.llm_usage.create_index([("model_name", 1)])
self.db.db.llm_usage.create_index([("user_id", 1)])
self.db.db.llm_usage.create_index([("request_type", 1)])
except Exception as e:
logger.error(f"创建数据库索引失败")
except Exception:
logger.error("创建数据库索引失败")
def _record_usage(self, prompt_tokens: int, completion_tokens: int, total_tokens: int,
user_id: str = "system", request_type: str = "chat",
@@ -80,7 +80,7 @@ class LLM_request:
f"总计: {total_tokens}"
)
except Exception:
logger.error(f"记录token使用情况失败")
logger.error("记录token使用情况失败")
def _calculate_cost(self, prompt_tokens: int, completion_tokens: int) -> float:
"""计算API调用成本
@@ -194,7 +194,7 @@ class LLM_request:
if hasattr(global_config, 'llm_normal') and global_config.llm_normal.get(
'name') == old_model_name:
global_config.llm_normal['name'] = self.model_name
logger.warning(f"已将全局配置中的 llm_normal 模型降级")
logger.warning("已将全局配置中的 llm_normal 模型降级")
# 更新payload中的模型名
if payload and 'model' in payload:
@@ -227,7 +227,7 @@ class LLM_request:
delta_content = ""
accumulated_content += delta_content
except Exception:
logger.exception(f"解析流式输出错")
logger.exception("解析流式输出错")
content = accumulated_content
reasoning_content = ""
think_match = re.search(r'<think>(.*?)</think>', content, re.DOTALL)
@@ -355,7 +355,7 @@ class LLM_request:
"""构建请求头"""
if no_key:
return {
"Authorization": f"Bearer **********",
"Authorization": "Bearer **********",
"Content-Type": "application/json"
}
else:

View File

@@ -68,7 +68,7 @@ class ScheduleGenerator:
1. 早上的学习和工作安排
2. 下午的活动和任务
3. 晚上的计划和休息时间
请按照时间顺序列出具体时间点和对应的活动用一个时间点而不是时间段来表示时间用JSON格式返回日程表仅返回内容不要返回注释时间采用24小时制格式为{"时间": "活动","时间": "活动",...}。"""
请按照时间顺序列出具体时间点和对应的活动用一个时间点而不是时间段来表示时间用JSON格式返回日程表仅返回内容不要返回注释不要添加任何markdown或代码块样式时间采用24小时制格式为{"时间": "活动","时间": "活动",...}。"""
try:
schedule_text, _ = await self.llm_scheduler.generate_response(prompt)
@@ -91,7 +91,7 @@ class ScheduleGenerator:
try:
schedule_dict = json.loads(schedule_text)
return schedule_dict
except json.JSONDecodeError as e:
except json.JSONDecodeError:
logger.exception("解析日程失败: {}".format(schedule_text))
return False

View File

@@ -155,7 +155,7 @@ class LLMStatistics:
all_stats = self._collect_all_statistics()
self._save_statistics(all_stats)
except Exception:
logger.exception(f"统计数据处理失败")
logger.exception("统计数据处理失败")
# 等待1分钟
for _ in range(60):