diff --git a/bot.py b/bot.py
index 88c07939b..30714e846 100644
--- a/bot.py
+++ b/bot.py
@@ -204,8 +204,8 @@ def check_eula():
eula_confirmed = True
eula_updated = False
if eula_new_hash == os.getenv("EULA_AGREE"):
- eula_confirmed = True
- eula_updated = False
+ eula_confirmed = True
+ eula_updated = False
# 检查隐私条款确认文件是否存在
if privacy_confirm_file.exists():
@@ -214,14 +214,16 @@ def check_eula():
if privacy_new_hash == confirmed_content:
privacy_confirmed = True
privacy_updated = False
- if privacy_new_hash == os.getenv("PRIVACY_AGREE"):
- privacy_confirmed = True
- privacy_updated = False
+ if privacy_new_hash == os.getenv("PRIVACY_AGREE"):
+ privacy_confirmed = True
+ privacy_updated = False
# 如果EULA或隐私条款有更新,提示用户重新确认
if eula_updated or privacy_updated:
print("EULA或隐私条款内容已更新,请在阅读后重新确认,继续运行视为同意更新后的以上两款协议")
- print(f'输入"同意"或"confirmed"或设置环境变量"EULA_AGREE={eula_new_hash}"和"PRIVACY_AGREE={privacy_new_hash}"继续运行')
+ print(
+ f'输入"同意"或"confirmed"或设置环境变量"EULA_AGREE={eula_new_hash}"和"PRIVACY_AGREE={privacy_new_hash}"继续运行'
+ )
while True:
user_input = input().strip().lower()
if user_input in ["同意", "confirmed"]:
diff --git a/docs/installation_cute.md b/docs/installation_cute.md
index ca97f18e9..5eb5dfdcd 100644
--- a/docs/installation_cute.md
+++ b/docs/installation_cute.md
@@ -147,9 +147,7 @@ enable_check = false # 是否要检查表情包是不是合适的喵
check_prompt = "符合公序良俗" # 检查表情包的标准呢
[others]
-enable_advance_output = true # 是否要显示更多的运行信息呢
enable_kuuki_read = true # 让机器人能够"察言观色"喵
-enable_debug_output = false # 是否启用调试输出喵
enable_friend_chat = false # 是否启用好友聊天喵
[groups]
diff --git a/docs/installation_standard.md b/docs/installation_standard.md
index dcbbf0c99..a2e60f22a 100644
--- a/docs/installation_standard.md
+++ b/docs/installation_standard.md
@@ -115,9 +115,7 @@ talk_frequency_down = [] # 降低回复频率的群号
ban_user_id = [] # 禁止回复的用户QQ号
[others]
-enable_advance_output = true # 是否启用高级输出
enable_kuuki_read = true # 是否启用读空气功能
-enable_debug_output = false # 是否启用调试输出
enable_friend_chat = false # 是否启用好友聊天
# 模型配置
diff --git a/docs/linux_deploy_guide_for_beginners.md b/docs/linux_deploy_guide_for_beginners.md
index ece0a3334..1f1b0899f 100644
--- a/docs/linux_deploy_guide_for_beginners.md
+++ b/docs/linux_deploy_guide_for_beginners.md
@@ -320,7 +320,7 @@ sudo systemctl enable bot.service # 启动bot服务
sudo systemctl status bot.service # 检查bot服务状态
```
-```python
+```bash
python bot.py # 运行麦麦
```
diff --git a/requirements.txt b/requirements.txt
index 1e9e5ff25..0dfd75148 100644
Binary files a/requirements.txt and b/requirements.txt differ
diff --git a/src/common/logger.py b/src/common/logger.py
index f0b2dfe5c..91f1a1da0 100644
--- a/src/common/logger.py
+++ b/src/common/logger.py
@@ -31,9 +31,10 @@ _handler_registry: Dict[str, List[int]] = {}
current_file_path = Path(__file__).resolve()
LOG_ROOT = "logs"
-ENABLE_ADVANCE_OUTPUT = False
+SIMPLE_OUTPUT = os.getenv("SIMPLE_OUTPUT", "false")
+print(f"SIMPLE_OUTPUT: {SIMPLE_OUTPUT}")
-if ENABLE_ADVANCE_OUTPUT:
+if not SIMPLE_OUTPUT:
# 默认全局配置
DEFAULT_CONFIG = {
# 日志级别配置
@@ -85,7 +86,6 @@ MEMORY_STYLE_CONFIG = {
},
}
-# 海马体日志样式配置
SENDER_STYLE_CONFIG = {
"advanced": {
"console_format": (
@@ -152,17 +152,17 @@ CHAT_STYLE_CONFIG = {
"file_format": ("{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {extra[module]: <15} | 见闻 | {message}"),
},
"simple": {
- "console_format": ("{time:MM-DD HH:mm} | 见闻 | {message}"),
+ "console_format": ("{time:MM-DD HH:mm} | 见闻 | {message}"), # noqa: E501
"file_format": ("{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {extra[module]: <15} | 见闻 | {message}"),
},
}
-# 根据ENABLE_ADVANCE_OUTPUT选择配置
-MEMORY_STYLE_CONFIG = MEMORY_STYLE_CONFIG["advanced"] if ENABLE_ADVANCE_OUTPUT else MEMORY_STYLE_CONFIG["simple"]
-TOPIC_STYLE_CONFIG = TOPIC_STYLE_CONFIG["advanced"] if ENABLE_ADVANCE_OUTPUT else TOPIC_STYLE_CONFIG["simple"]
-SENDER_STYLE_CONFIG = SENDER_STYLE_CONFIG["advanced"] if ENABLE_ADVANCE_OUTPUT else SENDER_STYLE_CONFIG["simple"]
-LLM_STYLE_CONFIG = LLM_STYLE_CONFIG["advanced"] if ENABLE_ADVANCE_OUTPUT else LLM_STYLE_CONFIG["simple"]
-CHAT_STYLE_CONFIG = CHAT_STYLE_CONFIG["advanced"] if ENABLE_ADVANCE_OUTPUT else CHAT_STYLE_CONFIG["simple"]
+# 根据SIMPLE_OUTPUT选择配置
+MEMORY_STYLE_CONFIG = MEMORY_STYLE_CONFIG["simple"] if SIMPLE_OUTPUT else MEMORY_STYLE_CONFIG["advanced"]
+TOPIC_STYLE_CONFIG = TOPIC_STYLE_CONFIG["simple"] if SIMPLE_OUTPUT else TOPIC_STYLE_CONFIG["advanced"]
+SENDER_STYLE_CONFIG = SENDER_STYLE_CONFIG["simple"] if SIMPLE_OUTPUT else SENDER_STYLE_CONFIG["advanced"]
+LLM_STYLE_CONFIG = LLM_STYLE_CONFIG["simple"] if SIMPLE_OUTPUT else LLM_STYLE_CONFIG["advanced"]
+CHAT_STYLE_CONFIG = CHAT_STYLE_CONFIG["simple"] if SIMPLE_OUTPUT else CHAT_STYLE_CONFIG["advanced"]
def is_registered_module(record: dict) -> bool:
diff --git a/src/plugins/chat/__init__.py b/src/plugins/chat/__init__.py
index a54f781a0..7edf91558 100644
--- a/src/plugins/chat/__init__.py
+++ b/src/plugins/chat/__init__.py
@@ -92,12 +92,13 @@ async def _(bot: Bot):
@msg_in.handle()
async def _(bot: Bot, event: MessageEvent, state: T_State):
- #处理合并转发消息
+ # 处理合并转发消息
if "forward" in event.message:
- await chat_bot.handle_forward_message(event , bot)
- else :
+ await chat_bot.handle_forward_message(event, bot)
+ else:
await chat_bot.handle_message(event, bot)
+
@notice_matcher.handle()
async def _(bot: Bot, event: NoticeEvent, state: T_State):
logger.debug(f"收到通知:{event}")
@@ -110,7 +111,7 @@ async def build_memory_task():
"""每build_memory_interval秒执行一次记忆构建"""
logger.debug("[记忆构建]------------------------------------开始构建记忆--------------------------------------")
start_time = time.time()
- await hippocampus.operation_build_memory(chat_size=20)
+ await hippocampus.operation_build_memory()
end_time = time.time()
logger.success(
f"[记忆构建]--------------------------记忆构建完成:耗时: {end_time - start_time:.2f} "
diff --git a/src/plugins/chat/bot.py b/src/plugins/chat/bot.py
index d30940f97..38450f903 100644
--- a/src/plugins/chat/bot.py
+++ b/src/plugins/chat/bot.py
@@ -154,7 +154,7 @@ class ChatBot:
)
# 开始思考的时间点
thinking_time_point = round(time.time(), 2)
- logger.info(f"开始思考的时间点: {thinking_time_point}")
+ # logger.debug(f"开始思考的时间点: {thinking_time_point}")
think_id = "mt" + str(thinking_time_point)
thinking_message = MessageThinking(
message_id=think_id,
@@ -418,13 +418,12 @@ class ChatBot:
# 用户屏蔽,不区分私聊/群聊
if event.user_id in global_config.ban_user_id:
return
-
+
if isinstance(event, GroupMessageEvent):
if event.group_id:
if event.group_id not in global_config.talk_allowed_groups:
return
-
# 获取合并转发消息的详细信息
forward_info = await bot.get_forward_msg(message_id=event.message_id)
messages = forward_info["messages"]
@@ -434,17 +433,17 @@ class ChatBot:
for node in messages:
# 提取发送者昵称
nickname = node["sender"].get("nickname", "未知用户")
-
+
# 递归处理消息内容
- message_content = await self.process_message_segments(node["message"],layer=0)
-
+ message_content = await self.process_message_segments(node["message"], layer=0)
+
# 拼接为【昵称】+ 内容
processed_messages.append(f"【{nickname}】{message_content}")
# 组合所有消息
combined_message = "\n".join(processed_messages)
combined_message = f"合并转发消息内容:\n{combined_message}"
-
+
# 构建用户信息(使用转发消息的发送者)
user_info = UserInfo(
user_id=event.user_id,
@@ -456,11 +455,7 @@ class ChatBot:
# 构建群聊信息(如果是群聊)
group_info = None
if isinstance(event, GroupMessageEvent):
- group_info = GroupInfo(
- group_id=event.group_id,
- group_name=None,
- platform="qq"
- )
+ group_info = GroupInfo(group_id=event.group_id, group_name=None, platform="qq")
# 创建消息对象
message_cq = MessageRecvCQ(
@@ -475,19 +470,19 @@ class ChatBot:
# 进入标准消息处理流程
await self.message_process(message_cq)
- async def process_message_segments(self, segments: list,layer:int) -> str:
+ async def process_message_segments(self, segments: list, layer: int) -> str:
"""递归处理消息段"""
parts = []
for seg in segments:
- part = await self.process_segment(seg,layer+1)
+ part = await self.process_segment(seg, layer + 1)
parts.append(part)
return "".join(parts)
- async def process_segment(self, seg: dict , layer:int) -> str:
+ async def process_segment(self, seg: dict, layer: int) -> str:
"""处理单个消息段"""
seg_type = seg["type"]
- if layer > 3 :
- #防止有那种100层转发消息炸飞麦麦
+ if layer > 3:
+ # 防止有那种100层转发消息炸飞麦麦
return "【转发消息】"
if seg_type == "text":
return seg["data"]["text"]
@@ -504,13 +499,14 @@ class ChatBot:
nested_messages.append("合并转发消息内容:")
for node in nested_nodes:
nickname = node["sender"].get("nickname", "未知用户")
- content = await self.process_message_segments(node["message"],layer=layer)
+ content = await self.process_message_segments(node["message"], layer=layer)
# nested_messages.append('-' * layer)
nested_messages.append(f"{'--' * layer}【{nickname}】{content}")
# nested_messages.append(f"{'--' * layer}合并转发第【{layer}】层结束")
return "\n".join(nested_messages)
else:
return f"[{seg_type}]"
-
+
+
# 创建全局ChatBot实例
chat_bot = ChatBot()
diff --git a/src/plugins/chat/config.py b/src/plugins/chat/config.py
index ce30b280b..17b3cfece 100644
--- a/src/plugins/chat/config.py
+++ b/src/plugins/chat/config.py
@@ -68,9 +68,9 @@ class BotConfig:
MODEL_V3_PROBABILITY: float = 0.1 # V3模型概率
MODEL_R1_DISTILL_PROBABILITY: float = 0.1 # R1蒸馏模型概率
- enable_advance_output: bool = False # 是否启用高级输出
+ # enable_advance_output: bool = False # 是否启用高级输出
enable_kuuki_read: bool = True # 是否启用读空气功能
- enable_debug_output: bool = False # 是否启用调试输出
+ # enable_debug_output: bool = False # 是否启用调试输出
enable_friend_chat: bool = False # 是否启用好友聊天
mood_update_interval: float = 1.0 # 情绪更新间隔 单位秒
@@ -106,6 +106,11 @@ class BotConfig:
memory_forget_time: int = 24 # 记忆遗忘时间(小时)
memory_forget_percentage: float = 0.01 # 记忆遗忘比例
memory_compress_rate: float = 0.1 # 记忆压缩率
+ build_memory_sample_num: int = 10 # 记忆构建采样数量
+ build_memory_sample_length: int = 20 # 记忆构建采样长度
+ memory_build_distribution: list = field(
+ default_factory=lambda: [4,2,0.6,24,8,0.4]
+ ) # 记忆构建分布,参数:分布1均值,标准差,权重,分布2均值,标准差,权重
memory_ban_words: list = field(
default_factory=lambda: ["表情包", "图片", "回复", "聊天记录"]
) # 添加新的配置项默认值
@@ -315,6 +320,20 @@ class BotConfig:
"memory_forget_percentage", config.memory_forget_percentage
)
config.memory_compress_rate = memory_config.get("memory_compress_rate", config.memory_compress_rate)
+ if config.INNER_VERSION in SpecifierSet(">=0.0.11"):
+ config.memory_build_distribution = memory_config.get(
+ "memory_build_distribution",
+ config.memory_build_distribution
+ )
+ config.build_memory_sample_num = memory_config.get(
+ "build_memory_sample_num",
+ config.build_memory_sample_num
+ )
+ config.build_memory_sample_length = memory_config.get(
+ "build_memory_sample_length",
+ config.build_memory_sample_length
+ )
+
def remote(parent: dict):
remote_config = parent["remote"]
@@ -351,10 +370,10 @@ class BotConfig:
def others(parent: dict):
others_config = parent["others"]
- config.enable_advance_output = others_config.get("enable_advance_output", config.enable_advance_output)
+ # config.enable_advance_output = others_config.get("enable_advance_output", config.enable_advance_output)
config.enable_kuuki_read = others_config.get("enable_kuuki_read", config.enable_kuuki_read)
if config.INNER_VERSION in SpecifierSet(">=0.0.7"):
- config.enable_debug_output = others_config.get("enable_debug_output", config.enable_debug_output)
+ # config.enable_debug_output = others_config.get("enable_debug_output", config.enable_debug_output)
config.enable_friend_chat = others_config.get("enable_friend_chat", config.enable_friend_chat)
# 版本表达式:>=1.0.0,<2.0.0
diff --git a/src/plugins/chat/message_sender.py b/src/plugins/chat/message_sender.py
index 741cc2889..d79e9e7ab 100644
--- a/src/plugins/chat/message_sender.py
+++ b/src/plugins/chat/message_sender.py
@@ -220,7 +220,7 @@ class MessageManager:
message_timeout = container.get_timeout_messages()
if message_timeout:
- logger.warning(f"发现{len(message_timeout)}条超时消息")
+ logger.debug(f"发现{len(message_timeout)}条超时消息")
for msg in message_timeout:
if msg == message_earliest:
continue
diff --git a/src/plugins/chat/prompt_builder.py b/src/plugins/chat/prompt_builder.py
index 379aa4624..4ef8b6283 100644
--- a/src/plugins/chat/prompt_builder.py
+++ b/src/plugins/chat/prompt_builder.py
@@ -141,21 +141,21 @@ class PromptBuilder:
logger.debug(f"知识检索耗时: {(end_time - start_time):.3f}秒")
prompt = f"""
-今天是{current_date},现在是{current_time},你今天的日程是:\
-``\n
-{bot_schedule.today_schedule}\n
-``\n
-{prompt_info}\n
-{memory_prompt}\n
-{chat_target}\n
-{chat_talking_prompt}\n
-现在"{sender_name}"说的:\n
-``\n
-{message_txt}\n
-``\n
+今天是{current_date},现在是{current_time},你今天的日程是:
+``
+{bot_schedule.today_schedule}
+``
+{prompt_info}
+{memory_prompt}
+{chat_target}
+{chat_talking_prompt}
+现在"{sender_name}"说的:
+``
+{message_txt}
+``
引起了你的注意,{relation_prompt_all}{mood_prompt}\n
``
-你的网名叫{global_config.BOT_NICKNAME},{prompt_personality}。
+你的网名叫{global_config.BOT_NICKNAME},有人也叫你{"/".join(global_config.BOT_ALIAS_NAMES)},{prompt_personality},{prompt_personality}。
正在{bot_schedule_now_activity}的你同时也在一边{chat_target_2},现在请你读读之前的聊天记录,然后给出日常且口语化的回复,平淡一些,
尽量简短一些。{keywords_reaction_prompt}请注意把握聊天内容,不要刻意突出自身学科背景,不要回复的太有条理,可以有个性。
{prompt_ger}
diff --git a/src/plugins/chat/utils.py b/src/plugins/chat/utils.py
index 8b728ee4d..1563ea526 100644
--- a/src/plugins/chat/utils.py
+++ b/src/plugins/chat/utils.py
@@ -76,18 +76,11 @@ def calculate_information_content(text):
def get_closest_chat_from_db(length: int, timestamp: str):
- """从数据库中获取最接近指定时间戳的聊天记录
-
- Args:
- length: 要获取的消息数量
- timestamp: 时间戳
-
- Returns:
- list: 消息记录列表,每个记录包含时间和文本信息
- """
+ # print(f"获取最接近指定时间戳的聊天记录,长度: {length}, 时间戳: {timestamp}")
+ # print(f"当前时间: {timestamp},转换后时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(timestamp))}")
chat_records = []
closest_record = db.messages.find_one({"time": {"$lte": timestamp}}, sort=[("time", -1)])
-
+ # print(f"最接近的记录: {closest_record}")
if closest_record:
closest_time = closest_record["time"]
chat_id = closest_record["chat_id"] # 获取chat_id
@@ -102,7 +95,9 @@ def get_closest_chat_from_db(length: int, timestamp: str):
.sort("time", 1)
.limit(length)
)
-
+ # print(f"获取到的记录: {chat_records}")
+ length = len(chat_records)
+ # print(f"获取到的记录长度: {length}")
# 转换记录格式
formatted_records = []
for record in chat_records:
diff --git a/src/plugins/chat/utils_image.py b/src/plugins/chat/utils_image.py
index ea0c160eb..521795024 100644
--- a/src/plugins/chat/utils_image.py
+++ b/src/plugins/chat/utils_image.py
@@ -112,7 +112,7 @@ class ImageManager:
# 查询缓存的描述
cached_description = self._get_description_from_db(image_hash, "emoji")
if cached_description:
- logger.info(f"缓存表情包描述: {cached_description}")
+ logger.debug(f"缓存表情包描述: {cached_description}")
return f"[表情包:{cached_description}]"
# 调用AI获取描述
diff --git a/src/plugins/memory_system/memory.py b/src/plugins/memory_system/memory.py
index 07a7fb2ee..f5012c828 100644
--- a/src/plugins/memory_system/memory.py
+++ b/src/plugins/memory_system/memory.py
@@ -18,6 +18,7 @@ from ..chat.utils import (
)
from ..models.utils_model import LLM_request
from src.common.logger import get_module_logger, LogConfig, MEMORY_STYLE_CONFIG
+from src.plugins.memory_system.sample_distribution import MemoryBuildScheduler
# 定义日志配置
memory_config = LogConfig(
@@ -25,6 +26,11 @@ memory_config = LogConfig(
console_format=MEMORY_STYLE_CONFIG["console_format"],
file_format=MEMORY_STYLE_CONFIG["file_format"],
)
+# print(f"memory_config: {memory_config}")
+# print(f"MEMORY_STYLE_CONFIG: {MEMORY_STYLE_CONFIG}")
+# print(f"MEMORY_STYLE_CONFIG['console_format']: {MEMORY_STYLE_CONFIG['console_format']}")
+# print(f"MEMORY_STYLE_CONFIG['file_format']: {MEMORY_STYLE_CONFIG['file_format']}")
+
logger = get_module_logger("memory_system", config=memory_config)
@@ -195,25 +201,17 @@ class Hippocampus:
return hash(f"{nodes[0]}:{nodes[1]}")
def random_get_msg_snippet(self, target_timestamp: float, chat_size: int, max_memorized_time_per_msg: int) -> list:
- """随机抽取一段时间内的消息片段
- Args:
- - target_timestamp: 目标时间戳
- - chat_size: 抽取的消息数量
- - max_memorized_time_per_msg: 每条消息的最大记忆次数
-
- Returns:
- - list: 抽取出的消息记录列表
-
- """
try_count = 0
- # 最多尝试三次抽取
+ # 最多尝试2次抽取
while try_count < 3:
messages = get_closest_chat_from_db(length=chat_size, timestamp=target_timestamp)
if messages:
+ # print(f"抽取到的消息: {messages}")
# 检查messages是否均没有达到记忆次数限制
for message in messages:
if message["memorized_times"] >= max_memorized_time_per_msg:
messages = None
+ # print(f"抽取到的消息提取次数达到限制,跳过")
break
if messages:
# 成功抽取短期消息样本
@@ -224,63 +222,48 @@ class Hippocampus:
)
return messages
try_count += 1
- # 三次尝试均失败
return None
- def get_memory_sample(self, chat_size=20, time_frequency=None):
- """获取记忆样本
-
- Returns:
- list: 消息记录列表,每个元素是一个消息记录字典列表
- """
+ def get_memory_sample(self):
# 硬编码:每条消息最大记忆次数
# 如有需求可写入global_config
- if time_frequency is None:
- time_frequency = {"near": 2, "mid": 4, "far": 3}
max_memorized_time_per_msg = 3
- current_timestamp = datetime.datetime.now().timestamp()
+ # 创建双峰分布的记忆调度器
+ scheduler = MemoryBuildScheduler(
+ n_hours1=global_config.memory_build_distribution[0], # 第一个分布均值(4小时前)
+ std_hours1=global_config.memory_build_distribution[1], # 第一个分布标准差
+ weight1=global_config.memory_build_distribution[2], # 第一个分布权重 60%
+ n_hours2=global_config.memory_build_distribution[3], # 第二个分布均值(24小时前)
+ std_hours2=global_config.memory_build_distribution[4], # 第二个分布标准差
+ weight2=global_config.memory_build_distribution[5], # 第二个分布权重 40%
+ total_samples=global_config.build_memory_sample_num # 总共生成10个时间点
+ )
+
+ # 生成时间戳数组
+ timestamps = scheduler.get_timestamp_array()
+ # logger.debug(f"生成的时间戳数组: {timestamps}")
+ # print(f"生成的时间戳数组: {timestamps}")
+ # print(f"时间戳的实际时间: {[time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(ts)) for ts in timestamps]}")
+ logger.info(f"回忆往事: {[time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(ts)) for ts in timestamps]}")
chat_samples = []
-
- # 短期:1h 中期:4h 长期:24h
- logger.debug("正在抽取短期消息样本")
- for i in range(time_frequency.get("near")):
- random_time = current_timestamp - random.randint(1, 3600)
- messages = self.random_get_msg_snippet(random_time, chat_size, max_memorized_time_per_msg)
+ for timestamp in timestamps:
+ messages = self.random_get_msg_snippet(
+ timestamp,
+ global_config.build_memory_sample_length,
+ max_memorized_time_per_msg
+ )
if messages:
- logger.debug(f"成功抽取短期消息样本{len(messages)}条")
+ time_diff = (datetime.datetime.now().timestamp() - timestamp) / 3600
+ logger.debug(f"成功抽取 {time_diff:.1f} 小时前的消息样本,共{len(messages)}条")
+ # print(f"成功抽取 {time_diff:.1f} 小时前的消息样本,共{len(messages)}条")
chat_samples.append(messages)
else:
- logger.warning(f"第{i}次短期消息样本抽取失败")
-
- logger.debug("正在抽取中期消息样本")
- for i in range(time_frequency.get("mid")):
- random_time = current_timestamp - random.randint(3600, 3600 * 4)
- messages = self.random_get_msg_snippet(random_time, chat_size, max_memorized_time_per_msg)
- if messages:
- logger.debug(f"成功抽取中期消息样本{len(messages)}条")
- chat_samples.append(messages)
- else:
- logger.warning(f"第{i}次中期消息样本抽取失败")
-
- logger.debug("正在抽取长期消息样本")
- for i in range(time_frequency.get("far")):
- random_time = current_timestamp - random.randint(3600 * 4, 3600 * 24)
- messages = self.random_get_msg_snippet(random_time, chat_size, max_memorized_time_per_msg)
- if messages:
- logger.debug(f"成功抽取长期消息样本{len(messages)}条")
- chat_samples.append(messages)
- else:
- logger.warning(f"第{i}次长期消息样本抽取失败")
+ logger.debug(f"时间戳 {timestamp} 的消息样本抽取失败")
return chat_samples
async def memory_compress(self, messages: list, compress_rate=0.1):
- """压缩消息记录为记忆
-
- Returns:
- tuple: (压缩记忆集合, 相似主题字典)
- """
if not messages:
return set(), {}
@@ -313,15 +296,23 @@ class Hippocampus:
topics_response = await self.llm_topic_judge.generate_response(self.find_topic_llm(input_text, topic_num))
# 过滤topics
+ # 从配置文件获取需要过滤的关键词列表
filter_keywords = global_config.memory_ban_words
+
+ # 将topics_response[0]中的中文逗号、顿号、空格都替换成英文逗号
+ # 然后按逗号分割成列表,并去除每个topic前后的空白字符
topics = [
topic.strip()
for topic in topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",")
if topic.strip()
]
+
+ # 过滤掉包含禁用关键词的topic
+ # any()检查topic中是否包含任何一个filter_keywords中的关键词
+ # 只保留不包含禁用关键词的topic
filtered_topics = [topic for topic in topics if not any(keyword in topic for keyword in filter_keywords)]
- logger.info(f"过滤后话题: {filtered_topics}")
+ logger.debug(f"过滤后话题: {filtered_topics}")
# 创建所有话题的请求任务
tasks = []
@@ -331,31 +322,42 @@ class Hippocampus:
tasks.append((topic.strip(), task))
# 等待所有任务完成
- compressed_memory = set()
+ # 初始化压缩后的记忆集合和相似主题字典
+ compressed_memory = set() # 存储压缩后的(主题,内容)元组
similar_topics_dict = {} # 存储每个话题的相似主题列表
+
+ # 遍历每个主题及其对应的LLM任务
for topic, task in tasks:
response = await task
if response:
+ # 将主题和LLM生成的内容添加到压缩记忆中
compressed_memory.add((topic, response[0]))
- # 为每个话题查找相似的已存在主题
+
+ # 为当前主题寻找相似的已存在主题
existing_topics = list(self.memory_graph.G.nodes())
similar_topics = []
+ # 计算当前主题与每个已存在主题的相似度
for existing_topic in existing_topics:
+ # 使用jieba分词,将主题转换为词集合
topic_words = set(jieba.cut(topic))
existing_words = set(jieba.cut(existing_topic))
- all_words = topic_words | existing_words
- v1 = [1 if word in topic_words else 0 for word in all_words]
- v2 = [1 if word in existing_words else 0 for word in all_words]
+ # 构建词向量用于计算余弦相似度
+ all_words = topic_words | existing_words # 所有不重复的词
+ v1 = [1 if word in topic_words else 0 for word in all_words] # 当前主题的词向量
+ v2 = [1 if word in existing_words else 0 for word in all_words] # 已存在主题的词向量
+ # 计算余弦相似度
similarity = cosine_similarity(v1, v2)
- if similarity >= 0.6:
+ # 如果相似度超过阈值,添加到相似主题列表
+ if similarity >= 0.7:
similar_topics.append((existing_topic, similarity))
+ # 按相似度降序排序,只保留前3个最相似的主题
similar_topics.sort(key=lambda x: x[1], reverse=True)
- similar_topics = similar_topics[:5]
+ similar_topics = similar_topics[:3]
similar_topics_dict[topic] = similar_topics
return compressed_memory, similar_topics_dict
@@ -372,10 +374,10 @@ class Hippocampus:
)
return topic_num
- async def operation_build_memory(self, chat_size=20):
- time_frequency = {"near": 1, "mid": 4, "far": 4}
- memory_samples = self.get_memory_sample(chat_size, time_frequency)
-
+ async def operation_build_memory(self):
+ memory_samples = self.get_memory_sample()
+ all_added_nodes = []
+ all_added_edges = []
for i, messages in enumerate(memory_samples, 1):
all_topics = []
# 加载进度可视化
@@ -387,12 +389,13 @@ class Hippocampus:
compress_rate = global_config.memory_compress_rate
compressed_memory, similar_topics_dict = await self.memory_compress(messages, compress_rate)
- logger.info(f"压缩后记忆数量: {len(compressed_memory)},似曾相识的话题: {len(similar_topics_dict)}")
+ logger.debug(f"压缩后记忆数量: {compressed_memory},似曾相识的话题: {similar_topics_dict}")
current_time = datetime.datetime.now().timestamp()
-
+ logger.debug(f"添加节点: {', '.join(topic for topic, _ in compressed_memory)}")
+ all_added_nodes.extend(topic for topic, _ in compressed_memory)
+
for topic, memory in compressed_memory:
- logger.info(f"添加节点: {topic}")
self.memory_graph.add_dot(topic, memory)
all_topics.append(topic)
@@ -402,7 +405,8 @@ class Hippocampus:
for similar_topic, similarity in similar_topics:
if topic != similar_topic:
strength = int(similarity * 10)
- logger.info(f"连接相似节点: {topic} 和 {similar_topic} (强度: {strength})")
+ logger.debug(f"连接相似节点: {topic} 和 {similar_topic} (强度: {strength})")
+ all_added_edges.append(f"{topic}-{similar_topic}")
self.memory_graph.G.add_edge(
topic,
similar_topic,
@@ -414,9 +418,13 @@ class Hippocampus:
# 连接同批次的相关话题
for i in range(len(all_topics)):
for j in range(i + 1, len(all_topics)):
- logger.info(f"连接同批次节点: {all_topics[i]} 和 {all_topics[j]}")
+ logger.debug(f"连接同批次节点: {all_topics[i]} 和 {all_topics[j]}")
+ all_added_edges.append(f"{all_topics[i]}-{all_topics[j]}")
self.memory_graph.connect_dot(all_topics[i], all_topics[j])
+ logger.success(f"更新记忆: {', '.join(all_added_nodes)}")
+ logger.success(f"强化连接: {', '.join(all_added_edges)}")
+ # logger.success(f"强化连接: {', '.join(all_added_edges)}")
self.sync_memory_to_db()
def sync_memory_to_db(self):
diff --git a/src/plugins/memory_system/memory_manual_build.py b/src/plugins/memory_system/memory_manual_build.py
index 0bf276ddd..b575f455e 100644
--- a/src/plugins/memory_system/memory_manual_build.py
+++ b/src/plugins/memory_system/memory_manual_build.py
@@ -7,11 +7,9 @@ import sys
import time
from collections import Counter
from pathlib import Path
-
import matplotlib.pyplot as plt
import networkx as nx
from dotenv import load_dotenv
-from src.common.logger import get_module_logger
import jieba
# from chat.config import global_config
@@ -19,6 +17,7 @@ import jieba
root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
sys.path.append(root_path)
+from src.common.logger import get_module_logger # noqa: E402
from src.common.database import db # noqa E402
from src.plugins.memory_system.offline_llm import LLMModel # noqa E402
diff --git a/src/plugins/memory_system/memory_test1.py b/src/plugins/memory_system/memory_test1.py
deleted file mode 100644
index df4f892d0..000000000
--- a/src/plugins/memory_system/memory_test1.py
+++ /dev/null
@@ -1,1185 +0,0 @@
-# -*- coding: utf-8 -*-
-import datetime
-import math
-import random
-import sys
-import time
-from collections import Counter
-from pathlib import Path
-
-import matplotlib.pyplot as plt
-import networkx as nx
-from dotenv import load_dotenv
-from src.common.logger import get_module_logger
-import jieba
-
-logger = get_module_logger("mem_test")
-
-"""
-该理论认为,当两个或多个事物在形态上具有相似性时,
-它们在记忆中会形成关联。
-例如,梨和苹果在形状和都是水果这一属性上有相似性,
-所以当我们看到梨时,很容易通过形态学联想记忆联想到苹果。
-这种相似性联想有助于我们对新事物进行分类和理解,
-当遇到一个新的类似水果时,
-我们可以通过与已有的水果记忆进行相似性匹配,
-来推测它的一些特征。
-
-
-
-时空关联性联想:
-除了相似性联想,MAM 还强调时空关联性联想。
-如果两个事物在时间或空间上经常同时出现,它们也会在记忆中形成关联。
-比如,每次在公园里看到花的时候,都能听到鸟儿的叫声,
-那么花和鸟儿叫声的形态特征(花的视觉形态和鸟叫的听觉形态)就会在记忆中形成关联,
-以后听到鸟叫可能就会联想到公园里的花。
-
-"""
-
-# from chat.config import global_config
-sys.path.append("C:/GitHub/MaiMBot") # 添加项目根目录到 Python 路径
-from src.common.database import db # noqa E402
-from src.plugins.memory_system.offline_llm import LLMModel # noqa E402
-
-# 获取当前文件的目录
-current_dir = Path(__file__).resolve().parent
-# 获取项目根目录(上三层目录)
-project_root = current_dir.parent.parent.parent
-# env.dev文件路径
-env_path = project_root / ".env.dev"
-
-# 加载环境变量
-if env_path.exists():
- logger.info(f"从 {env_path} 加载环境变量")
- load_dotenv(env_path)
-else:
- logger.warning(f"未找到环境变量文件: {env_path}")
- logger.info("将使用默认配置")
-
-
-def calculate_information_content(text):
- """计算文本的信息量(熵)"""
- char_count = Counter(text)
- total_chars = len(text)
-
- entropy = 0
- for count in char_count.values():
- probability = count / total_chars
- entropy -= probability * math.log2(probability)
-
- return entropy
-
-
-def get_closest_chat_from_db(length: int, timestamp: str):
- """从数据库中获取最接近指定时间戳的聊天记录,并记录读取次数
-
- Returns:
- list: 消息记录字典列表,每个字典包含消息内容和时间信息
- """
- chat_records = []
- closest_record = db.messages.find_one({"time": {"$lte": timestamp}}, sort=[("time", -1)])
-
- if closest_record and closest_record.get("memorized", 0) < 4:
- closest_time = closest_record["time"]
- group_id = closest_record["group_id"]
- # 获取该时间戳之后的length条消息,且groupid相同
- records = list(
- db.messages.find({"time": {"$gt": closest_time}, "group_id": group_id}).sort("time", 1).limit(length)
- )
-
- # 更新每条消息的memorized属性
- for record in records:
- current_memorized = record.get("memorized", 0)
- if current_memorized > 3:
- print("消息已读取3次,跳过")
- return ""
-
- # 更新memorized值
- db.messages.update_one({"_id": record["_id"]}, {"$set": {"memorized": current_memorized + 1}})
-
- # 添加到记录列表中
- chat_records.append(
- {"text": record["detailed_plain_text"], "time": record["time"], "group_id": record["group_id"]}
- )
-
- return chat_records
-
-
-class Memory_cortex:
- def __init__(self, memory_graph: "Memory_graph"):
- self.memory_graph = memory_graph
-
- def sync_memory_from_db(self):
- """
- 从数据库同步数据到内存中的图结构
- 将清空当前内存中的图,并从数据库重新加载所有节点和边
- """
- # 清空当前图
- self.memory_graph.G.clear()
-
- # 获取当前时间作为默认时间
- default_time = datetime.datetime.now().timestamp()
-
- # 从数据库加载所有节点
- nodes = db.graph_data.nodes.find()
- for node in nodes:
- concept = node["concept"]
- memory_items = node.get("memory_items", [])
- # 确保memory_items是列表
- if not isinstance(memory_items, list):
- memory_items = [memory_items] if memory_items else []
-
- # 获取时间属性,如果不存在则使用默认时间
- created_time = node.get("created_time")
- last_modified = node.get("last_modified")
-
- # 如果时间属性不存在,则更新数据库
- if created_time is None or last_modified is None:
- created_time = default_time
- last_modified = default_time
- # 更新数据库中的节点
- db.graph_data.nodes.update_one(
- {"concept": concept}, {"$set": {"created_time": created_time, "last_modified": last_modified}}
- )
- logger.info(f"为节点 {concept} 添加默认时间属性")
-
- # 添加节点到图中,包含时间属性
- self.memory_graph.G.add_node(
- concept, memory_items=memory_items, created_time=created_time, last_modified=last_modified
- )
-
- # 从数据库加载所有边
- edges = db.graph_data.edges.find()
- for edge in edges:
- source = edge["source"]
- target = edge["target"]
-
- # 只有当源节点和目标节点都存在时才添加边
- if source in self.memory_graph.G and target in self.memory_graph.G:
- # 获取时间属性,如果不存在则使用默认时间
- created_time = edge.get("created_time")
- last_modified = edge.get("last_modified")
-
- # 如果时间属性不存在,则更新数据库
- if created_time is None or last_modified is None:
- created_time = default_time
- last_modified = default_time
- # 更新数据库中的边
- db.graph_data.edges.update_one(
- {"source": source, "target": target},
- {"$set": {"created_time": created_time, "last_modified": last_modified}},
- )
- logger.info(f"为边 {source} - {target} 添加默认时间属性")
-
- self.memory_graph.G.add_edge(
- source,
- target,
- strength=edge.get("strength", 1),
- created_time=created_time,
- last_modified=last_modified,
- )
-
- logger.success("从数据库同步记忆图谱完成")
-
- def calculate_node_hash(self, concept, memory_items):
- """
- 计算节点的特征值
- """
- if not isinstance(memory_items, list):
- memory_items = [memory_items] if memory_items else []
- # 将记忆项排序以确保相同内容生成相同的哈希值
- sorted_items = sorted(memory_items)
- # 组合概念和记忆项生成特征值
- content = f"{concept}:{'|'.join(sorted_items)}"
- return hash(content)
-
- def calculate_edge_hash(self, source, target):
- """
- 计算边的特征值
- """
- # 对源节点和目标节点排序以确保相同的边生成相同的哈希值
- nodes = sorted([source, target])
- return hash(f"{nodes[0]}:{nodes[1]}")
-
- def sync_memory_to_db(self):
- """
- 检查并同步内存中的图结构与数据库
- 使用特征值(哈希值)快速判断是否需要更新
- """
- current_time = datetime.datetime.now().timestamp()
-
- # 获取数据库中所有节点和内存中所有节点
- db_nodes = list(db.graph_data.nodes.find())
- memory_nodes = list(self.memory_graph.G.nodes(data=True))
-
- # 转换数据库节点为字典格式,方便查找
- db_nodes_dict = {node["concept"]: node for node in db_nodes}
-
- # 检查并更新节点
- for concept, data in memory_nodes:
- memory_items = data.get("memory_items", [])
- if not isinstance(memory_items, list):
- memory_items = [memory_items] if memory_items else []
-
- # 计算内存中节点的特征值
- memory_hash = self.calculate_node_hash(concept, memory_items)
-
- if concept not in db_nodes_dict:
- # 数据库中缺少的节点,添加
- node_data = {
- "concept": concept,
- "memory_items": memory_items,
- "hash": memory_hash,
- "created_time": data.get("created_time", current_time),
- "last_modified": data.get("last_modified", current_time),
- }
- db.graph_data.nodes.insert_one(node_data)
- else:
- # 获取数据库中节点的特征值
- db_node = db_nodes_dict[concept]
- db_hash = db_node.get("hash", None)
-
- # 如果特征值不同,则更新节点
- if db_hash != memory_hash:
- db.graph_data.nodes.update_one(
- {"concept": concept},
- {"$set": {"memory_items": memory_items, "hash": memory_hash, "last_modified": current_time}},
- )
-
- # 检查并删除数据库中多余的节点
- memory_concepts = set(node[0] for node in memory_nodes)
- for db_node in db_nodes:
- if db_node["concept"] not in memory_concepts:
- db.graph_data.nodes.delete_one({"concept": db_node["concept"]})
-
- # 处理边的信息
- db_edges = list(db.graph_data.edges.find())
- memory_edges = list(self.memory_graph.G.edges(data=True))
-
- # 创建边的哈希值字典
- db_edge_dict = {}
- for edge in db_edges:
- edge_hash = self.calculate_edge_hash(edge["source"], edge["target"])
- db_edge_dict[(edge["source"], edge["target"])] = {"hash": edge_hash, "strength": edge.get("strength", 1)}
-
- # 检查并更新边
- for source, target, data in memory_edges:
- edge_hash = self.calculate_edge_hash(source, target)
- edge_key = (source, target)
- strength = data.get("strength", 1)
-
- if edge_key not in db_edge_dict:
- # 添加新边
- edge_data = {
- "source": source,
- "target": target,
- "strength": strength,
- "hash": edge_hash,
- "created_time": data.get("created_time", current_time),
- "last_modified": data.get("last_modified", current_time),
- }
- db.graph_data.edges.insert_one(edge_data)
- else:
- # 检查边的特征值是否变化
- if db_edge_dict[edge_key]["hash"] != edge_hash:
- db.graph_data.edges.update_one(
- {"source": source, "target": target},
- {"$set": {"hash": edge_hash, "strength": strength, "last_modified": current_time}},
- )
-
- # 删除多余的边
- memory_edge_set = set((source, target) for source, target, _ in memory_edges)
- for edge_key in db_edge_dict:
- if edge_key not in memory_edge_set:
- source, target = edge_key
- db.graph_data.edges.delete_one({"source": source, "target": target})
-
- logger.success("完成记忆图谱与数据库的差异同步")
-
- def remove_node_from_db(self, topic):
- """
- 从数据库中删除指定节点及其相关的边
-
- Args:
- topic: 要删除的节点概念
- """
- # 删除节点
- db.graph_data.nodes.delete_one({"concept": topic})
- # 删除所有涉及该节点的边
- db.graph_data.edges.delete_many({"$or": [{"source": topic}, {"target": topic}]})
-
-
-class Memory_graph:
- def __init__(self):
- self.G = nx.Graph() # 使用 networkx 的图结构
-
- def connect_dot(self, concept1, concept2):
- # 避免自连接
- if concept1 == concept2:
- return
-
- current_time = datetime.datetime.now().timestamp()
-
- # 如果边已存在,增加 strength
- if self.G.has_edge(concept1, concept2):
- self.G[concept1][concept2]["strength"] = self.G[concept1][concept2].get("strength", 1) + 1
- # 更新最后修改时间
- self.G[concept1][concept2]["last_modified"] = current_time
- else:
- # 如果是新边,初始化 strength 为 1
- self.G.add_edge(concept1, concept2, strength=1, created_time=current_time, last_modified=current_time)
-
- def add_dot(self, concept, memory):
- current_time = datetime.datetime.now().timestamp()
-
- if concept in self.G:
- # 如果节点已存在,将新记忆添加到现有列表中
- if "memory_items" in self.G.nodes[concept]:
- if not isinstance(self.G.nodes[concept]["memory_items"], list):
- # 如果当前不是列表,将其转换为列表
- self.G.nodes[concept]["memory_items"] = [self.G.nodes[concept]["memory_items"]]
- self.G.nodes[concept]["memory_items"].append(memory)
- # 更新最后修改时间
- self.G.nodes[concept]["last_modified"] = current_time
- else:
- self.G.nodes[concept]["memory_items"] = [memory]
- self.G.nodes[concept]["last_modified"] = current_time
- else:
- # 如果是新节点,创建新的记忆列表
- self.G.add_node(concept, memory_items=[memory], created_time=current_time, last_modified=current_time)
-
- def get_dot(self, concept):
- # 检查节点是否存在于图中
- if concept in self.G:
- # 从图中获取节点数据
- node_data = self.G.nodes[concept]
- return concept, node_data
- return None
-
- def get_related_item(self, topic, depth=1):
- if topic not in self.G:
- return [], []
-
- first_layer_items = []
- second_layer_items = []
-
- # 获取相邻节点
- neighbors = list(self.G.neighbors(topic))
-
- # 获取当前节点的记忆项
- node_data = self.get_dot(topic)
- if node_data:
- concept, data = node_data
- if "memory_items" in data:
- memory_items = data["memory_items"]
- if isinstance(memory_items, list):
- first_layer_items.extend(memory_items)
- else:
- first_layer_items.append(memory_items)
-
- # 只在depth=2时获取第二层记忆
- if depth >= 2:
- # 获取相邻节点的记忆项
- for neighbor in neighbors:
- node_data = self.get_dot(neighbor)
- if node_data:
- concept, data = node_data
- if "memory_items" in data:
- memory_items = data["memory_items"]
- if isinstance(memory_items, list):
- second_layer_items.extend(memory_items)
- else:
- second_layer_items.append(memory_items)
-
- return first_layer_items, second_layer_items
-
- @property
- def dots(self):
- # 返回所有节点对应的 Memory_dot 对象
- return [self.get_dot(node) for node in self.G.nodes()]
-
-
-# 海马体
-class Hippocampus:
- def __init__(self, memory_graph: Memory_graph):
- self.memory_graph = memory_graph
- self.memory_cortex = Memory_cortex(memory_graph)
- self.llm_model = LLMModel()
- self.llm_model_small = LLMModel(model_name="deepseek-ai/DeepSeek-V2.5")
- self.llm_model_get_topic = LLMModel(model_name="Pro/Qwen/Qwen2.5-7B-Instruct")
- self.llm_model_summary = LLMModel(model_name="Qwen/Qwen2.5-32B-Instruct")
-
- def get_memory_sample(self, chat_size=20, time_frequency=None):
- """获取记忆样本
-
- Returns:
- list: 消息记录列表,每个元素是一个消息记录字典列表
- """
- if time_frequency is None:
- time_frequency = {"near": 2, "mid": 4, "far": 3}
- current_timestamp = datetime.datetime.now().timestamp()
- chat_samples = []
-
- # 短期:1h 中期:4h 长期:24h
- for _ in range(time_frequency.get("near")):
- random_time = current_timestamp - random.randint(1, 3600 * 4)
- messages = get_closest_chat_from_db(length=chat_size, timestamp=random_time)
- if messages:
- chat_samples.append(messages)
-
- for _ in range(time_frequency.get("mid")):
- random_time = current_timestamp - random.randint(3600 * 4, 3600 * 24)
- messages = get_closest_chat_from_db(length=chat_size, timestamp=random_time)
- if messages:
- chat_samples.append(messages)
-
- for _ in range(time_frequency.get("far")):
- random_time = current_timestamp - random.randint(3600 * 24, 3600 * 24 * 7)
- messages = get_closest_chat_from_db(length=chat_size, timestamp=random_time)
- if messages:
- chat_samples.append(messages)
-
- return chat_samples
-
- def calculate_topic_num(self, text, compress_rate):
- """计算文本的话题数量"""
- information_content = calculate_information_content(text)
- topic_by_length = text.count("\n") * compress_rate
- topic_by_information_content = max(1, min(5, int((information_content - 3) * 2)))
- topic_num = int((topic_by_length + topic_by_information_content) / 2)
- print(
- f"topic_by_length: {topic_by_length}, topic_by_information_content: {topic_by_information_content}, "
- f"topic_num: {topic_num}"
- )
- return topic_num
-
- async def memory_compress(self, messages: list, compress_rate=0.1):
- """压缩消息记录为记忆
-
- Args:
- messages: 消息记录字典列表,每个字典包含text和time字段
- compress_rate: 压缩率
-
- Returns:
- tuple: (压缩记忆集合, 相似主题字典)
- - 压缩记忆集合: set of (话题, 记忆) 元组
- - 相似主题字典: dict of {话题: [(相似主题, 相似度), ...]}
- """
- if not messages:
- return set(), {}
-
- # 合并消息文本,同时保留时间信息
- input_text = ""
- time_info = ""
- # 计算最早和最晚时间
- earliest_time = min(msg["time"] for msg in messages)
- latest_time = max(msg["time"] for msg in messages)
-
- earliest_dt = datetime.datetime.fromtimestamp(earliest_time)
- latest_dt = datetime.datetime.fromtimestamp(latest_time)
-
- # 如果是同一年
- if earliest_dt.year == latest_dt.year:
- earliest_str = earliest_dt.strftime("%m-%d %H:%M:%S")
- latest_str = latest_dt.strftime("%m-%d %H:%M:%S")
- time_info += f"是在{earliest_dt.year}年,{earliest_str} 到 {latest_str} 的对话:\n"
- else:
- earliest_str = earliest_dt.strftime("%Y-%m-%d %H:%M:%S")
- latest_str = latest_dt.strftime("%Y-%m-%d %H:%M:%S")
- time_info += f"是从 {earliest_str} 到 {latest_str} 的对话:\n"
-
- for msg in messages:
- input_text += f"{msg['text']}\n"
-
- print(input_text)
-
- topic_num = self.calculate_topic_num(input_text, compress_rate)
- topics_response = self.llm_model_get_topic.generate_response(self.find_topic_llm(input_text, topic_num))
-
- # 过滤topics
- filter_keywords = ["表情包", "图片", "回复", "聊天记录"]
- topics = [
- topic.strip()
- for topic in topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",")
- if topic.strip()
- ]
- filtered_topics = [topic for topic in topics if not any(keyword in topic for keyword in filter_keywords)]
-
- print(f"过滤后话题: {filtered_topics}")
-
- # 为每个话题查找相似的已存在主题
- print("\n检查相似主题:")
- similar_topics_dict = {} # 存储每个话题的相似主题列表
-
- for topic in filtered_topics:
- # 获取所有现有节点
- existing_topics = list(self.memory_graph.G.nodes())
- similar_topics = []
-
- # 对每个现有节点计算相似度
- for existing_topic in existing_topics:
- # 使用jieba分词并计算余弦相似度
- topic_words = set(jieba.cut(topic))
- existing_words = set(jieba.cut(existing_topic))
-
- # 计算词向量
- all_words = topic_words | existing_words
- v1 = [1 if word in topic_words else 0 for word in all_words]
- v2 = [1 if word in existing_words else 0 for word in all_words]
-
- # 计算余弦相似度
- similarity = cosine_similarity(v1, v2)
-
- # 如果相似度超过阈值,添加到结果中
- if similarity >= 0.6: # 设置相似度阈值
- similar_topics.append((existing_topic, similarity))
-
- # 按相似度降序排序
- similar_topics.sort(key=lambda x: x[1], reverse=True)
- # 只保留前5个最相似的主题
- similar_topics = similar_topics[:5]
-
- # 存储到字典中
- similar_topics_dict[topic] = similar_topics
-
- # 输出结果
- if similar_topics:
- print(f"\n主题「{topic}」的相似主题:")
- for similar_topic, score in similar_topics:
- print(f"- {similar_topic} (相似度: {score:.3f})")
- else:
- print(f"\n主题「{topic}」没有找到相似主题")
-
- # 创建所有话题的请求任务
- tasks = []
- for topic in filtered_topics:
- topic_what_prompt = self.topic_what(input_text, topic, time_info)
- # 创建异步任务
- task = self.llm_model_small.generate_response_async(topic_what_prompt)
- tasks.append((topic.strip(), task))
-
- # 等待所有任务完成
- compressed_memory = set()
- for topic, task in tasks:
- response = await task
- if response:
- compressed_memory.add((topic, response[0]))
-
- return compressed_memory, similar_topics_dict
-
- async def operation_build_memory(self, chat_size=12):
- # 最近消息获取频率
- time_frequency = {"near": 3, "mid": 8, "far": 5}
- memory_samples = self.get_memory_sample(chat_size, time_frequency)
-
- all_topics = [] # 用于存储所有话题
-
- for i, messages in enumerate(memory_samples, 1):
- # 加载进度可视化
- all_topics = []
- progress = (i / len(memory_samples)) * 100
- bar_length = 30
- filled_length = int(bar_length * i // len(memory_samples))
- bar = "█" * filled_length + "-" * (bar_length - filled_length)
- print(f"\n进度: [{bar}] {progress:.1f}% ({i}/{len(memory_samples)})")
-
- # 生成压缩后记忆
- compress_rate = 0.1
- compressed_memory, similar_topics_dict = await self.memory_compress(messages, compress_rate)
- print(
- f"\033[1;33m压缩后记忆数量\033[0m: {len(compressed_memory)},似曾相识的话题: {len(similar_topics_dict)}"
- )
-
- # 将记忆加入到图谱中
- for topic, memory in compressed_memory:
- print(f"\033[1;32m添加节点\033[0m: {topic}")
- self.memory_graph.add_dot(topic, memory)
- all_topics.append(topic)
-
- # 连接相似的已存在主题
- if topic in similar_topics_dict:
- similar_topics = similar_topics_dict[topic]
- for similar_topic, similarity in similar_topics:
- # 避免自连接
- if topic != similar_topic:
- # 根据相似度设置连接强度
- strength = int(similarity * 10) # 将0.3-1.0的相似度映射到3-10的强度
- print(f"\033[1;36m连接相似节点\033[0m: {topic} 和 {similar_topic} (强度: {strength})")
- # 使用相似度作为初始连接强度
- self.memory_graph.G.add_edge(topic, similar_topic, strength=strength)
-
- # 连接同批次的相关话题
- for i in range(len(all_topics)):
- for j in range(i + 1, len(all_topics)):
- print(f"\033[1;32m连接同批次节点\033[0m: {all_topics[i]} 和 {all_topics[j]}")
- self.memory_graph.connect_dot(all_topics[i], all_topics[j])
-
- self.memory_cortex.sync_memory_to_db()
-
- def forget_connection(self, source, target):
- """
- 检查并可能遗忘一个连接
-
- Args:
- source: 连接的源节点
- target: 连接的目标节点
-
- Returns:
- tuple: (是否有变化, 变化类型, 变化详情)
- 变化类型: 0-无变化, 1-强度减少, 2-连接移除
- """
- current_time = datetime.datetime.now().timestamp()
- # 获取边的属性
- edge_data = self.memory_graph.G[source][target]
- last_modified = edge_data.get("last_modified", current_time)
-
- # 如果连接超过7天未更新
- if current_time - last_modified > 6000: # test
- # 获取当前强度
- current_strength = edge_data.get("strength", 1)
- # 减少连接强度
- new_strength = current_strength - 1
- edge_data["strength"] = new_strength
- edge_data["last_modified"] = current_time
-
- # 如果强度降为0,移除连接
- if new_strength <= 0:
- self.memory_graph.G.remove_edge(source, target)
- return True, 2, f"移除连接: {source} - {target} (强度降至0)"
- else:
- return True, 1, f"减弱连接: {source} - {target} (强度: {current_strength} -> {new_strength})"
-
- return False, 0, ""
-
- def forget_topic(self, topic):
- """
- 检查并可能遗忘一个话题的记忆
-
- Args:
- topic: 要检查的话题
-
- Returns:
- tuple: (是否有变化, 变化类型, 变化详情)
- 变化类型: 0-无变化, 1-记忆减少, 2-节点移除
- """
- current_time = datetime.datetime.now().timestamp()
- # 获取节点的最后修改时间
- node_data = self.memory_graph.G.nodes[topic]
- last_modified = node_data.get("last_modified", current_time)
-
- # 如果话题超过7天未更新
- if current_time - last_modified > 3000: # test
- memory_items = node_data.get("memory_items", [])
- if not isinstance(memory_items, list):
- memory_items = [memory_items] if memory_items else []
-
- if memory_items:
- # 获取当前记忆数量
- current_count = len(memory_items)
- # 随机选择一条记忆删除
- removed_item = random.choice(memory_items)
- memory_items.remove(removed_item)
-
- if memory_items:
- # 更新节点的记忆项和最后修改时间
- self.memory_graph.G.nodes[topic]["memory_items"] = memory_items
- self.memory_graph.G.nodes[topic]["last_modified"] = current_time
- return (
- True,
- 1,
- f"减少记忆: {topic} (记忆数量: {current_count} -> "
- f"{len(memory_items)})\n被移除的记忆: {removed_item}",
- )
- else:
- # 如果没有记忆了,删除节点及其所有连接
- self.memory_graph.G.remove_node(topic)
- return True, 2, f"移除节点: {topic} (无剩余记忆)\n最后一条记忆: {removed_item}"
-
- return False, 0, ""
-
- async def operation_forget_topic(self, percentage=0.1):
- """
- 随机选择图中一定比例的节点和边进行检查,根据时间条件决定是否遗忘
-
- Args:
- percentage: 要检查的节点和边的比例,默认为0.1(10%)
- """
- # 获取所有节点和边
- all_nodes = list(self.memory_graph.G.nodes())
- all_edges = list(self.memory_graph.G.edges())
-
- # 计算要检查的数量
- check_nodes_count = max(1, int(len(all_nodes) * percentage))
- check_edges_count = max(1, int(len(all_edges) * percentage))
-
- # 随机选择要检查的节点和边
- nodes_to_check = random.sample(all_nodes, check_nodes_count)
- edges_to_check = random.sample(all_edges, check_edges_count)
-
- # 用于统计不同类型的变化
- edge_changes = {"weakened": 0, "removed": 0}
- node_changes = {"reduced": 0, "removed": 0}
-
- # 检查并遗忘连接
- print("\n开始检查连接...")
- for source, target in edges_to_check:
- changed, change_type, details = self.forget_connection(source, target)
- if changed:
- if change_type == 1:
- edge_changes["weakened"] += 1
- logger.info(f"\033[1;34m[连接减弱]\033[0m {details}")
- elif change_type == 2:
- edge_changes["removed"] += 1
- logger.info(f"\033[1;31m[连接移除]\033[0m {details}")
-
- # 检查并遗忘话题
- print("\n开始检查节点...")
- for node in nodes_to_check:
- changed, change_type, details = self.forget_topic(node)
- if changed:
- if change_type == 1:
- node_changes["reduced"] += 1
- logger.info(f"\033[1;33m[记忆减少]\033[0m {details}")
- elif change_type == 2:
- node_changes["removed"] += 1
- logger.info(f"\033[1;31m[节点移除]\033[0m {details}")
-
- # 同步到数据库
- if any(count > 0 for count in edge_changes.values()) or any(count > 0 for count in node_changes.values()):
- self.memory_cortex.sync_memory_to_db()
- print("\n遗忘操作统计:")
- print(f"连接变化: {edge_changes['weakened']} 个减弱, {edge_changes['removed']} 个移除")
- print(f"节点变化: {node_changes['reduced']} 个减少记忆, {node_changes['removed']} 个移除")
- else:
- print("\n本次检查没有节点或连接满足遗忘条件")
-
- async def merge_memory(self, topic):
- """
- 对指定话题的记忆进行合并压缩
-
- Args:
- topic: 要合并的话题节点
- """
- # 获取节点的记忆项
- memory_items = self.memory_graph.G.nodes[topic].get("memory_items", [])
- if not isinstance(memory_items, list):
- memory_items = [memory_items] if memory_items else []
-
- # 如果记忆项不足,直接返回
- if len(memory_items) < 10:
- return
-
- # 随机选择10条记忆
- selected_memories = random.sample(memory_items, 10)
-
- # 拼接成文本
- merged_text = "\n".join(selected_memories)
- print(f"\n[合并记忆] 话题: {topic}")
- print(f"选择的记忆:\n{merged_text}")
-
- # 使用memory_compress生成新的压缩记忆
- compressed_memories, _ = await self.memory_compress(selected_memories, 0.1)
-
- # 从原记忆列表中移除被选中的记忆
- for memory in selected_memories:
- memory_items.remove(memory)
-
- # 添加新的压缩记忆
- for _, compressed_memory in compressed_memories:
- memory_items.append(compressed_memory)
- print(f"添加压缩记忆: {compressed_memory}")
-
- # 更新节点的记忆项
- self.memory_graph.G.nodes[topic]["memory_items"] = memory_items
- print(f"完成记忆合并,当前记忆数量: {len(memory_items)}")
-
- async def operation_merge_memory(self, percentage=0.1):
- """
- 随机检查一定比例的节点,对内容数量超过100的节点进行记忆合并
-
- Args:
- percentage: 要检查的节点比例,默认为0.1(10%)
- """
- # 获取所有节点
- all_nodes = list(self.memory_graph.G.nodes())
- # 计算要检查的节点数量
- check_count = max(1, int(len(all_nodes) * percentage))
- # 随机选择节点
- nodes_to_check = random.sample(all_nodes, check_count)
-
- merged_nodes = []
- for node in nodes_to_check:
- # 获取节点的内容条数
- memory_items = self.memory_graph.G.nodes[node].get("memory_items", [])
- if not isinstance(memory_items, list):
- memory_items = [memory_items] if memory_items else []
- content_count = len(memory_items)
-
- # 如果内容数量超过100,进行合并
- if content_count > 100:
- print(f"\n检查节点: {node}, 当前记忆数量: {content_count}")
- await self.merge_memory(node)
- merged_nodes.append(node)
-
- # 同步到数据库
- if merged_nodes:
- self.memory_cortex.sync_memory_to_db()
- print(f"\n完成记忆合并操作,共处理 {len(merged_nodes)} 个节点")
- else:
- print("\n本次检查没有需要合并的节点")
-
- async def _identify_topics(self, text: str) -> list:
- """从文本中识别可能的主题"""
- topics_response = self.llm_model_get_topic.generate_response(self.find_topic_llm(text, 5))
- topics = [
- topic.strip()
- for topic in topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",")
- if topic.strip()
- ]
- return topics
-
- def _find_similar_topics(self, topics: list, similarity_threshold: float = 0.4, debug_info: str = "") -> list:
- """查找与给定主题相似的记忆主题"""
- all_memory_topics = list(self.memory_graph.G.nodes())
- all_similar_topics = []
-
- for topic in topics:
- if debug_info:
- pass
-
- topic_vector = text_to_vector(topic)
-
- for memory_topic in all_memory_topics:
- memory_vector = text_to_vector(memory_topic)
- all_words = set(topic_vector.keys()) | set(memory_vector.keys())
- v1 = [topic_vector.get(word, 0) for word in all_words]
- v2 = [memory_vector.get(word, 0) for word in all_words]
- similarity = cosine_similarity(v1, v2)
-
- if similarity >= similarity_threshold:
- all_similar_topics.append((memory_topic, similarity))
-
- return all_similar_topics
-
- def _get_top_topics(self, similar_topics: list, max_topics: int = 5) -> list:
- """获取相似度最高的主题"""
- seen_topics = set()
- top_topics = []
-
- for topic, score in sorted(similar_topics, key=lambda x: x[1], reverse=True):
- if topic not in seen_topics and len(top_topics) < max_topics:
- seen_topics.add(topic)
- top_topics.append((topic, score))
-
- return top_topics
-
- async def memory_activate_value(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.3) -> int:
- """计算输入文本对记忆的激活程度"""
- logger.info(f"[记忆激活]识别主题: {await self._identify_topics(text)}")
-
- identified_topics = await self._identify_topics(text)
- if not identified_topics:
- return 0
-
- all_similar_topics = self._find_similar_topics(
- identified_topics, similarity_threshold=similarity_threshold, debug_info="记忆激活"
- )
-
- if not all_similar_topics:
- return 0
-
- top_topics = self._get_top_topics(all_similar_topics, max_topics)
-
- if len(top_topics) == 1:
- topic, score = top_topics[0]
- memory_items = self.memory_graph.G.nodes[topic].get("memory_items", [])
- if not isinstance(memory_items, list):
- memory_items = [memory_items] if memory_items else []
- content_count = len(memory_items)
- penalty = 1.0 / (1 + math.log(content_count + 1))
-
- activation = int(score * 50 * penalty)
- print(
- f"\033[1;32m[记忆激活]\033[0m 单主题「{topic}」- 相似度: {score:.3f}, 内容数: {content_count}, "
- f"激活值: {activation}"
- )
- return activation
-
- matched_topics = set()
- topic_similarities = {}
-
- for memory_topic, _similarity in top_topics:
- memory_items = self.memory_graph.G.nodes[memory_topic].get("memory_items", [])
- if not isinstance(memory_items, list):
- memory_items = [memory_items] if memory_items else []
- content_count = len(memory_items)
- penalty = 1.0 / (1 + math.log(content_count + 1))
-
- for input_topic in identified_topics:
- topic_vector = text_to_vector(input_topic)
- memory_vector = text_to_vector(memory_topic)
- all_words = set(topic_vector.keys()) | set(memory_vector.keys())
- v1 = [topic_vector.get(word, 0) for word in all_words]
- v2 = [memory_vector.get(word, 0) for word in all_words]
- sim = cosine_similarity(v1, v2)
- if sim >= similarity_threshold:
- matched_topics.add(input_topic)
- adjusted_sim = sim * penalty
- topic_similarities[input_topic] = max(topic_similarities.get(input_topic, 0), adjusted_sim)
- print(
- f"\033[1;32m[记忆激活]\033[0m 主题「{input_topic}」-> "
- f"「{memory_topic}」(内容数: {content_count}, "
- f"相似度: {adjusted_sim:.3f})"
- )
-
- topic_match = len(matched_topics) / len(identified_topics)
- average_similarities = sum(topic_similarities.values()) / len(topic_similarities) if topic_similarities else 0
-
- activation = int((topic_match + average_similarities) / 2 * 100)
- print(
- f"\033[1;32m[记忆激活]\033[0m 匹配率: {topic_match:.3f}, 平均相似度: {average_similarities:.3f}, "
- f"激活值: {activation}"
- )
-
- return activation
-
- async def get_relevant_memories(
- self, text: str, max_topics: int = 5, similarity_threshold: float = 0.4, max_memory_num: int = 5
- ) -> list:
- """根据输入文本获取相关的记忆内容"""
- identified_topics = await self._identify_topics(text)
-
- all_similar_topics = self._find_similar_topics(
- identified_topics, similarity_threshold=similarity_threshold, debug_info="记忆检索"
- )
-
- relevant_topics = self._get_top_topics(all_similar_topics, max_topics)
-
- relevant_memories = []
- for topic, score in relevant_topics:
- first_layer, _ = self.memory_graph.get_related_item(topic, depth=1)
- if first_layer:
- if len(first_layer) > max_memory_num / 2:
- first_layer = random.sample(first_layer, max_memory_num // 2)
- for memory in first_layer:
- relevant_memories.append({"topic": topic, "similarity": score, "content": memory})
-
- relevant_memories.sort(key=lambda x: x["similarity"], reverse=True)
-
- if len(relevant_memories) > max_memory_num:
- relevant_memories = random.sample(relevant_memories, max_memory_num)
-
- return relevant_memories
-
- def find_topic_llm(self, text, topic_num):
- prompt = (
- f"这是一段文字:{text}。请你从这段话中总结出{topic_num}个关键的概念,可以是名词,动词,或者特定人物,帮我列出来,"
- f"用逗号,隔开,尽可能精简。只需要列举{topic_num}个话题就好,不要有序号,不要告诉我其他内容。"
- )
- return prompt
-
- def topic_what(self, text, topic, time_info):
- prompt = (
- f'这是一段文字,{time_info}:{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,'
- f"可以包含时间和人物,以及具体的观点。只输出这句话就好"
- )
- return prompt
-
-
-def segment_text(text):
- """使用jieba进行文本分词"""
- seg_text = list(jieba.cut(text))
- return seg_text
-
-
-def text_to_vector(text):
- """将文本转换为词频向量"""
- words = segment_text(text)
- vector = {}
- for word in words:
- vector[word] = vector.get(word, 0) + 1
- return vector
-
-
-def cosine_similarity(v1, v2):
- """计算两个向量的余弦相似度"""
- dot_product = sum(a * b for a, b in zip(v1, v2))
- norm1 = math.sqrt(sum(a * a for a in v1))
- norm2 = math.sqrt(sum(b * b for b in v2))
- if norm1 == 0 or norm2 == 0:
- return 0
- return dot_product / (norm1 * norm2)
-
-
-def visualize_graph_lite(memory_graph: Memory_graph, color_by_memory: bool = False):
- # 设置中文字体
- plt.rcParams["font.sans-serif"] = ["SimHei"] # 用来正常显示中文标签
- plt.rcParams["axes.unicode_minus"] = False # 用来正常显示负号
-
- G = memory_graph.G
-
- # 创建一个新图用于可视化
- H = G.copy()
-
- # 过滤掉内容数量小于2的节点
- nodes_to_remove = []
- for node in H.nodes():
- memory_items = H.nodes[node].get("memory_items", [])
- memory_count = len(memory_items) if isinstance(memory_items, list) else (1 if memory_items else 0)
- if memory_count < 2:
- nodes_to_remove.append(node)
-
- H.remove_nodes_from(nodes_to_remove)
-
- # 如果没有符合条件的节点,直接返回
- if len(H.nodes()) == 0:
- print("没有找到内容数量大于等于2的节点")
- return
-
- # 计算节点大小和颜色
- node_colors = []
- node_sizes = []
- nodes = list(H.nodes())
-
- # 获取最大记忆数用于归一化节点大小
- max_memories = 1
- for node in nodes:
- memory_items = H.nodes[node].get("memory_items", [])
- memory_count = len(memory_items) if isinstance(memory_items, list) else (1 if memory_items else 0)
- max_memories = max(max_memories, memory_count)
-
- # 计算每个节点的大小和颜色
- for node in nodes:
- # 计算节点大小(基于记忆数量)
- memory_items = H.nodes[node].get("memory_items", [])
- memory_count = len(memory_items) if isinstance(memory_items, list) else (1 if memory_items else 0)
- # 使用指数函数使变化更明显
- ratio = memory_count / max_memories
- size = 400 + 2000 * (ratio**2) # 增大节点大小
- node_sizes.append(size)
-
- # 计算节点颜色(基于连接数)
- degree = H.degree(node)
- if degree >= 30:
- node_colors.append((1.0, 0, 0)) # 亮红色 (#FF0000)
- else:
- # 将1-10映射到0-1的范围
- color_ratio = (degree - 1) / 29.0 if degree > 1 else 0
- # 使用蓝到红的渐变
- red = min(0.9, color_ratio)
- blue = max(0.0, 1.0 - color_ratio)
- node_colors.append((red, 0, blue))
-
- # 绘制图形
- plt.figure(figsize=(16, 12)) # 减小图形尺寸
- pos = nx.spring_layout(
- H,
- k=1, # 调整节点间斥力
- iterations=100, # 增加迭代次数
- scale=1.5, # 减小布局尺寸
- weight="strength",
- ) # 使用边的strength属性作为权重
-
- nx.draw(
- H,
- pos,
- with_labels=True,
- node_color=node_colors,
- node_size=node_sizes,
- font_size=12, # 保持增大的字体大小
- font_family="SimHei",
- font_weight="bold",
- edge_color="gray",
- width=1.5,
- ) # 统一的边宽度
-
- title = """记忆图谱可视化(仅显示内容≥2的节点)
-节点大小表示记忆数量
-节点颜色:蓝(弱连接)到红(强连接)渐变,边的透明度表示连接强度
-连接强度越大的节点距离越近"""
- plt.title(title, fontsize=16, fontfamily="SimHei")
- plt.show()
-
-
-async def main():
- # 初始化数据库
- logger.info("正在初始化数据库连接...")
- start_time = time.time()
-
- test_pare = {
- "do_build_memory": True,
- "do_forget_topic": False,
- "do_visualize_graph": True,
- "do_query": False,
- "do_merge_memory": False,
- }
-
- # 创建记忆图
- memory_graph = Memory_graph()
-
- # 创建海马体
- hippocampus = Hippocampus(memory_graph)
-
- # 从数据库同步数据
- hippocampus.memory_cortex.sync_memory_from_db()
-
- end_time = time.time()
- logger.info(f"\033[32m[加载海马体耗时: {end_time - start_time:.2f} 秒]\033[0m")
-
- # 构建记忆
- if test_pare["do_build_memory"]:
- logger.info("开始构建记忆...")
- chat_size = 20
- await hippocampus.operation_build_memory(chat_size=chat_size)
-
- end_time = time.time()
- logger.info(
- f"\033[32m[构建记忆耗时: {end_time - start_time:.2f} 秒,chat_size={chat_size},chat_count = 16]\033[0m"
- )
-
- if test_pare["do_forget_topic"]:
- logger.info("开始遗忘记忆...")
- await hippocampus.operation_forget_topic(percentage=0.01)
-
- end_time = time.time()
- logger.info(f"\033[32m[遗忘记忆耗时: {end_time - start_time:.2f} 秒]\033[0m")
-
- if test_pare["do_merge_memory"]:
- logger.info("开始合并记忆...")
- await hippocampus.operation_merge_memory(percentage=0.1)
-
- end_time = time.time()
- logger.info(f"\033[32m[合并记忆耗时: {end_time - start_time:.2f} 秒]\033[0m")
-
- if test_pare["do_visualize_graph"]:
- # 展示优化后的图形
- logger.info("生成记忆图谱可视化...")
- print("\n生成优化后的记忆图谱:")
- visualize_graph_lite(memory_graph)
-
- if test_pare["do_query"]:
- # 交互式查询
- while True:
- query = input("\n请输入新的查询概念(输入'退出'以结束):")
- if query.lower() == "退出":
- break
-
- items_list = memory_graph.get_related_item(query)
- if items_list:
- first_layer, second_layer = items_list
- if first_layer:
- print("\n直接相关的记忆:")
- for item in first_layer:
- print(f"- {item}")
- if second_layer:
- print("\n间接相关的记忆:")
- for item in second_layer:
- print(f"- {item}")
- else:
- print("未找到相关记忆。")
-
-
-if __name__ == "__main__":
- import asyncio
-
- asyncio.run(main())
diff --git a/src/plugins/memory_system/sample_distribution.py b/src/plugins/memory_system/sample_distribution.py
new file mode 100644
index 000000000..dbe4b88a4
--- /dev/null
+++ b/src/plugins/memory_system/sample_distribution.py
@@ -0,0 +1,170 @@
+import numpy as np
+from scipy import stats
+from datetime import datetime, timedelta
+
+class DistributionVisualizer:
+ def __init__(self, mean=0, std=1, skewness=0, sample_size=10):
+ """
+ 初始化分布可视化器
+
+ 参数:
+ mean (float): 期望均值
+ std (float): 标准差
+ skewness (float): 偏度
+ sample_size (int): 样本大小
+ """
+ self.mean = mean
+ self.std = std
+ self.skewness = skewness
+ self.sample_size = sample_size
+ self.samples = None
+
+ def generate_samples(self):
+ """生成具有指定参数的样本"""
+ if self.skewness == 0:
+ # 对于无偏度的情况,直接使用正态分布
+ self.samples = np.random.normal(loc=self.mean, scale=self.std, size=self.sample_size)
+ else:
+ # 使用 scipy.stats 生成具有偏度的分布
+ self.samples = stats.skewnorm.rvs(a=self.skewness,
+ loc=self.mean,
+ scale=self.std,
+ size=self.sample_size)
+
+ def get_weighted_samples(self):
+ """获取加权后的样本数列"""
+ if self.samples is None:
+ self.generate_samples()
+ # 将样本值乘以样本大小
+ return self.samples * self.sample_size
+
+ def get_statistics(self):
+ """获取分布的统计信息"""
+ if self.samples is None:
+ self.generate_samples()
+
+ return {
+ "均值": np.mean(self.samples),
+ "标准差": np.std(self.samples),
+ "实际偏度": stats.skew(self.samples)
+ }
+
+class MemoryBuildScheduler:
+ def __init__(self,
+ n_hours1, std_hours1, weight1,
+ n_hours2, std_hours2, weight2,
+ total_samples=50):
+ """
+ 初始化记忆构建调度器
+
+ 参数:
+ n_hours1 (float): 第一个分布的均值(距离现在的小时数)
+ std_hours1 (float): 第一个分布的标准差(小时)
+ weight1 (float): 第一个分布的权重
+ n_hours2 (float): 第二个分布的均值(距离现在的小时数)
+ std_hours2 (float): 第二个分布的标准差(小时)
+ weight2 (float): 第二个分布的权重
+ total_samples (int): 要生成的总时间点数量
+ """
+ # 归一化权重
+ total_weight = weight1 + weight2
+ self.weight1 = weight1 / total_weight
+ self.weight2 = weight2 / total_weight
+
+ self.n_hours1 = n_hours1
+ self.std_hours1 = std_hours1
+ self.n_hours2 = n_hours2
+ self.std_hours2 = std_hours2
+ self.total_samples = total_samples
+ self.base_time = datetime.now()
+
+ def generate_time_samples(self):
+ """生成混合分布的时间采样点"""
+ # 根据权重计算每个分布的样本数
+ samples1 = int(self.total_samples * self.weight1)
+ samples2 = self.total_samples - samples1
+
+ # 生成两个正态分布的小时偏移
+ hours_offset1 = np.random.normal(
+ loc=self.n_hours1,
+ scale=self.std_hours1,
+ size=samples1
+ )
+
+ hours_offset2 = np.random.normal(
+ loc=self.n_hours2,
+ scale=self.std_hours2,
+ size=samples2
+ )
+
+ # 合并两个分布的偏移
+ hours_offset = np.concatenate([hours_offset1, hours_offset2])
+
+ # 将偏移转换为实际时间戳(使用绝对值确保时间点在过去)
+ timestamps = [self.base_time - timedelta(hours=abs(offset)) for offset in hours_offset]
+
+ # 按时间排序(从最早到最近)
+ return sorted(timestamps)
+
+ def get_timestamp_array(self):
+ """返回时间戳数组"""
+ timestamps = self.generate_time_samples()
+ return [int(t.timestamp()) for t in timestamps]
+
+def print_time_samples(timestamps, show_distribution=True):
+ """打印时间样本和分布信息"""
+ print(f"\n生成的{len(timestamps)}个时间点分布:")
+ print("序号".ljust(5), "时间戳".ljust(25), "距现在(小时)")
+ print("-" * 50)
+
+ now = datetime.now()
+ time_diffs = []
+
+ for i, timestamp in enumerate(timestamps, 1):
+ hours_diff = (now - timestamp).total_seconds() / 3600
+ time_diffs.append(hours_diff)
+ print(f"{str(i).ljust(5)} {timestamp.strftime('%Y-%m-%d %H:%M:%S').ljust(25)} {hours_diff:.2f}")
+
+ # 打印统计信息
+ print("\n统计信息:")
+ print(f"平均时间偏移:{np.mean(time_diffs):.2f}小时")
+ print(f"标准差:{np.std(time_diffs):.2f}小时")
+ print(f"最早时间:{min(timestamps).strftime('%Y-%m-%d %H:%M:%S')} ({max(time_diffs):.2f}小时前)")
+ print(f"最近时间:{max(timestamps).strftime('%Y-%m-%d %H:%M:%S')} ({min(time_diffs):.2f}小时前)")
+
+ if show_distribution:
+ # 计算时间分布的直方图
+ hist, bins = np.histogram(time_diffs, bins=40)
+ print("\n时间分布(每个*代表一个时间点):")
+ for i in range(len(hist)):
+ if hist[i] > 0:
+ print(f"{bins[i]:6.1f}-{bins[i+1]:6.1f}小时: {'*' * int(hist[i])}")
+
+# 使用示例
+if __name__ == "__main__":
+ # 创建一个双峰分布的记忆调度器
+ scheduler = MemoryBuildScheduler(
+ n_hours1=12, # 第一个分布均值(12小时前)
+ std_hours1=8, # 第一个分布标准差
+ weight1=0.7, # 第一个分布权重 70%
+ n_hours2=36, # 第二个分布均值(36小时前)
+ std_hours2=24, # 第二个分布标准差
+ weight2=0.3, # 第二个分布权重 30%
+ total_samples=50 # 总共生成50个时间点
+ )
+
+ # 生成时间分布
+ timestamps = scheduler.generate_time_samples()
+
+ # 打印结果,包含分布可视化
+ print_time_samples(timestamps, show_distribution=True)
+
+ # 打印时间戳数组
+ timestamp_array = scheduler.get_timestamp_array()
+ print("\n时间戳数组(Unix时间戳):")
+ print("[", end="")
+ for i, ts in enumerate(timestamp_array):
+ if i > 0:
+ print(", ", end="")
+ print(ts, end="")
+ print("]")
\ No newline at end of file
diff --git a/src/plugins/personality/big5_test.py b/src/plugins/personality/big5_test.py
index e77dfbc4f..c66e6ec4e 100644
--- a/src/plugins/personality/big5_test.py
+++ b/src/plugins/personality/big5_test.py
@@ -4,10 +4,9 @@
# from .questionnaire import PERSONALITY_QUESTIONS, FACTOR_DESCRIPTIONS
import os
-import random
import sys
from pathlib import Path
-from src.plugins.personality.questionnaire import PERSONALITY_QUESTIONS,FACTOR_DESCRIPTIONS
+import random
current_dir = Path(__file__).resolve().parent
project_root = current_dir.parent.parent.parent
@@ -16,14 +15,14 @@ env_path = project_root / ".env.prod"
root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
sys.path.append(root_path)
-
+from src.plugins.personality.questionnaire import PERSONALITY_QUESTIONS, FACTOR_DESCRIPTIONS # noqa: E402
class BigFiveTest:
def __init__(self):
self.questions = PERSONALITY_QUESTIONS
self.factors = FACTOR_DESCRIPTIONS
-
+
def run_test(self):
"""运行测试并收集答案"""
print("\n欢迎参加中国大五人格测试!")
@@ -35,17 +34,17 @@ class BigFiveTest:
print("5 = 比较符合")
print("6 = 完全符合")
print("\n请认真阅读每个描述,选择最符合您实际情况的选项。\n")
-
+
# 创建题目序号到题目的映射
- questions_map = {q['id']: q for q in self.questions}
-
+ questions_map = {q["id"]: q for q in self.questions}
+
# 获取所有题目ID并随机打乱顺序
question_ids = list(questions_map.keys())
random.shuffle(question_ids)
-
+
answers = {}
total_questions = len(question_ids)
-
+
for i, question_id in enumerate(question_ids, 1):
question = questions_map[question_id]
while True:
@@ -59,52 +58,43 @@ class BigFiveTest:
print("请输入1-6之间的数字!")
except ValueError:
print("请输入有效的数字!")
-
+
return self.calculate_scores(answers)
-
+
def calculate_scores(self, answers):
"""计算各维度得分"""
results = {}
- factor_questions = {
- "外向性": [],
- "神经质": [],
- "严谨性": [],
- "开放性": [],
- "宜人性": []
- }
-
+ factor_questions = {"外向性": [], "神经质": [], "严谨性": [], "开放性": [], "宜人性": []}
+
# 将题目按因子分类
for q in self.questions:
- factor_questions[q['factor']].append(q)
-
+ factor_questions[q["factor"]].append(q)
+
# 计算每个维度的得分
for factor, questions in factor_questions.items():
total_score = 0
for q in questions:
- score = answers[q['id']]
+ score = answers[q["id"]]
# 处理反向计分题目
- if q['reverse_scoring']:
+ if q["reverse_scoring"]:
score = 7 - score # 6分量表反向计分为7减原始分
total_score += score
-
+
# 计算平均分
avg_score = round(total_score / len(questions), 2)
- results[factor] = {
- "得分": avg_score,
- "题目数": len(questions),
- "总分": total_score
- }
-
+ results[factor] = {"得分": avg_score, "题目数": len(questions), "总分": total_score}
+
return results
def get_factor_description(self, factor):
"""获取因子的详细描述"""
return self.factors[factor]
+
def main():
test = BigFiveTest()
results = test.run_test()
-
+
print("\n测试结果:")
print("=" * 50)
for factor, data in results.items():
@@ -112,9 +102,10 @@ def main():
print(f"平均分: {data['得分']} (总分: {data['总分']}, 题目数: {data['题目数']})")
print("-" * 30)
description = test.get_factor_description(factor)
- print("维度说明:", description['description'][:100] + "...")
- print("\n特征词:", ", ".join(description['trait_words']))
+ print("维度说明:", description["description"][:100] + "...")
+ print("\n特征词:", ", ".join(description["trait_words"]))
print("=" * 50)
-
+
+
if __name__ == "__main__":
main()
diff --git a/src/plugins/personality/combined_test.py b/src/plugins/personality/combined_test.py
index 96ca3736a..b08fb458a 100644
--- a/src/plugins/personality/combined_test.py
+++ b/src/plugins/personality/combined_test.py
@@ -1,14 +1,11 @@
+from typing import Dict
import json
import os
-import random
+from pathlib import Path
import sys
from datetime import datetime
-from pathlib import Path
-from typing import Dict
+import random
from scipy import stats # 添加scipy导入用于t检验
-from src.plugins.personality.big5_test import BigFiveTest
-from src.plugins.personality.renqingziji import PersonalityEvaluator_direct
-from src.plugins.personality.questionnaire import FACTOR_DESCRIPTIONS, PERSONALITY_QUESTIONS
current_dir = Path(__file__).resolve().parent
project_root = current_dir.parent.parent.parent
@@ -17,6 +14,9 @@ env_path = project_root / ".env.prod"
root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
sys.path.append(root_path)
+from src.plugins.personality.big5_test import BigFiveTest # noqa: E402
+from src.plugins.personality.renqingziji import PersonalityEvaluator_direct # noqa: E402
+from src.plugins.personality.questionnaire import FACTOR_DESCRIPTIONS, PERSONALITY_QUESTIONS # noqa: E402
class CombinedPersonalityTest:
@@ -24,7 +24,7 @@ class CombinedPersonalityTest:
self.big5_test = BigFiveTest()
self.scenario_test = PersonalityEvaluator_direct()
self.dimensions = ["开放性", "严谨性", "外向性", "宜人性", "神经质"]
-
+
def run_combined_test(self):
"""运行组合测试"""
print("\n=== 人格特征综合评估系统 ===")
@@ -33,12 +33,12 @@ class CombinedPersonalityTest:
print("2. 情景反应测评(15个场景)")
print("\n两种测评完成后,将对比分析结果的异同。")
input("\n准备好开始第一部分(问卷测评)了吗?按回车继续...")
-
+
# 运行问卷测试
print("\n=== 第一部分:问卷测评 ===")
print("本部分采用六级评分,请根据每个描述与您的符合程度进行打分:")
print("1 = 完全不符合")
- print("2 = 比较不符合")
+ print("2 = 比较不符合")
print("3 = 有点不符合")
print("4 = 有点符合")
print("5 = 比较符合")
@@ -48,42 +48,39 @@ class CombinedPersonalityTest:
print("2. 根据您想要扮演的角色特征来回答")
print("\n无论选择哪种方式,请保持一致并认真回答每个问题。")
input("\n按回车开始答题...")
-
+
questionnaire_results = self.run_questionnaire()
-
+
# 转换问卷结果格式以便比较
- questionnaire_scores = {
- factor: data["得分"]
- for factor, data in questionnaire_results.items()
- }
-
+ questionnaire_scores = {factor: data["得分"] for factor, data in questionnaire_results.items()}
+
# 运行情景测试
print("\n=== 第二部分:情景反应测评 ===")
print("接下来,您将面对一系列具体场景,请描述您在每个场景中可能的反应。")
print("每个场景都会评估不同的人格维度,共15个场景。")
print("您可以选择提供自己的真实反应,也可以选择扮演一个您创作的角色来回答。")
input("\n准备好开始了吗?按回车继续...")
-
+
scenario_results = self.run_scenario_test()
-
+
# 比较和展示结果
self.compare_and_display_results(questionnaire_scores, scenario_results)
-
+
# 保存结果
self.save_results(questionnaire_scores, scenario_results)
def run_questionnaire(self):
"""运行问卷测试部分"""
# 创建题目序号到题目的映射
- questions_map = {q['id']: q for q in PERSONALITY_QUESTIONS}
-
+ questions_map = {q["id"]: q for q in PERSONALITY_QUESTIONS}
+
# 获取所有题目ID并随机打乱顺序
question_ids = list(questions_map.keys())
random.shuffle(question_ids)
-
+
answers = {}
total_questions = len(question_ids)
-
+
for i, question_id in enumerate(question_ids, 1):
question = questions_map[question_id]
while True:
@@ -98,48 +95,38 @@ class CombinedPersonalityTest:
print("请输入1-6之间的数字!")
except ValueError:
print("请输入有效的数字!")
-
+
# 每10题显示一次进度
if i % 10 == 0:
- print(f"\n已完成 {i}/{total_questions} 题 ({int(i/total_questions*100)}%)")
-
+ print(f"\n已完成 {i}/{total_questions} 题 ({int(i / total_questions * 100)}%)")
+
return self.calculate_questionnaire_scores(answers)
-
+
def calculate_questionnaire_scores(self, answers):
"""计算问卷测试的维度得分"""
results = {}
- factor_questions = {
- "外向性": [],
- "神经质": [],
- "严谨性": [],
- "开放性": [],
- "宜人性": []
- }
-
+ factor_questions = {"外向性": [], "神经质": [], "严谨性": [], "开放性": [], "宜人性": []}
+
# 将题目按因子分类
for q in PERSONALITY_QUESTIONS:
- factor_questions[q['factor']].append(q)
-
+ factor_questions[q["factor"]].append(q)
+
# 计算每个维度的得分
for factor, questions in factor_questions.items():
total_score = 0
for q in questions:
- score = answers[q['id']]
+ score = answers[q["id"]]
# 处理反向计分题目
- if q['reverse_scoring']:
+ if q["reverse_scoring"]:
score = 7 - score # 6分量表反向计分为7减原始分
total_score += score
-
+
# 计算平均分
avg_score = round(total_score / len(questions), 2)
- results[factor] = {
- "得分": avg_score,
- "题目数": len(questions),
- "总分": total_score
- }
-
+ results[factor] = {"得分": avg_score, "题目数": len(questions), "总分": total_score}
+
return results
-
+
def run_scenario_test(self):
"""运行情景测试部分"""
final_scores = {"开放性": 0, "严谨性": 0, "外向性": 0, "宜人性": 0, "神经质": 0}
@@ -161,11 +148,7 @@ class CombinedPersonalityTest:
continue
print("\n正在评估您的描述...")
- scores = self.scenario_test.evaluate_response(
- scenario_data["场景"],
- response,
- scenario_data["评估维度"]
- )
+ scores = self.scenario_test.evaluate_response(scenario_data["场景"], response, scenario_data["评估维度"])
# 更新分数
for dimension, score in scores.items():
@@ -179,7 +162,7 @@ class CombinedPersonalityTest:
# 每5个场景显示一次总进度
if i % 5 == 0:
- print(f"\n已完成 {i}/{len(scenarios)} 个场景 ({int(i/len(scenarios)*100)}%)")
+ print(f"\n已完成 {i}/{len(scenarios)} 个场景 ({int(i / len(scenarios) * 100)}%)")
if i < len(scenarios):
input("\n按回车继续下一个场景...")
@@ -187,11 +170,8 @@ class CombinedPersonalityTest:
# 计算平均分
for dimension in final_scores:
if dimension_counts[dimension] > 0:
- final_scores[dimension] = round(
- final_scores[dimension] / dimension_counts[dimension],
- 2
- )
-
+ final_scores[dimension] = round(final_scores[dimension] / dimension_counts[dimension], 2)
+
return final_scores
def compare_and_display_results(self, questionnaire_scores: Dict, scenario_scores: Dict):
@@ -200,39 +180,43 @@ class CombinedPersonalityTest:
print("\n" + "=" * 60)
print(f"{'维度':<8} {'问卷得分':>10} {'情景得分':>10} {'差异':>10} {'差异程度':>10}")
print("-" * 60)
-
+
# 收集每个维度的得分用于统计分析
questionnaire_values = []
scenario_values = []
diffs = []
-
+
for dimension in self.dimensions:
q_score = questionnaire_scores[dimension]
s_score = scenario_scores[dimension]
diff = round(abs(q_score - s_score), 2)
-
+
questionnaire_values.append(q_score)
scenario_values.append(s_score)
diffs.append(diff)
-
+
# 计算差异程度
diff_level = "低" if diff < 0.5 else "中" if diff < 1.0 else "高"
print(f"{dimension:<8} {q_score:>10.2f} {s_score:>10.2f} {diff:>10.2f} {diff_level:>10}")
-
+
print("=" * 60)
-
+
# 计算整体统计指标
mean_diff = sum(diffs) / len(diffs)
std_diff = (sum((x - mean_diff) ** 2 for x in diffs) / (len(diffs) - 1)) ** 0.5
-
+
# 计算效应量 (Cohen's d)
- pooled_std = ((sum((x - sum(questionnaire_values)/len(questionnaire_values))**2 for x in questionnaire_values) +
- sum((x - sum(scenario_values)/len(scenario_values))**2 for x in scenario_values)) /
- (2 * len(self.dimensions) - 2)) ** 0.5
-
+ pooled_std = (
+ (
+ sum((x - sum(questionnaire_values) / len(questionnaire_values)) ** 2 for x in questionnaire_values)
+ + sum((x - sum(scenario_values) / len(scenario_values)) ** 2 for x in scenario_values)
+ )
+ / (2 * len(self.dimensions) - 2)
+ ) ** 0.5
+
if pooled_std != 0:
cohens_d = abs(mean_diff / pooled_std)
-
+
# 解释效应量
if cohens_d < 0.2:
effect_size = "微小"
@@ -242,7 +226,7 @@ class CombinedPersonalityTest:
effect_size = "中等"
else:
effect_size = "大"
-
+
# 对所有维度进行整体t检验
t_stat, p_value = stats.ttest_rel(questionnaire_values, scenario_values)
print("\n整体统计分析:")
@@ -252,31 +236,33 @@ class CombinedPersonalityTest:
print(f"效应量大小: {effect_size}")
print(f"t统计量: {t_stat:.3f}")
print(f"p值: {p_value:.3f}")
-
+
if p_value < 0.05:
print("结论: 两种测评方法的结果存在显著差异 (p < 0.05)")
else:
print("结论: 两种测评方法的结果无显著差异 (p >= 0.05)")
-
+
print("\n维度说明:")
for dimension in self.dimensions:
print(f"\n{dimension}:")
desc = FACTOR_DESCRIPTIONS[dimension]
print(f"定义:{desc['description']}")
print(f"特征词:{', '.join(desc['trait_words'])}")
-
+
# 分析显著差异
significant_diffs = []
for dimension in self.dimensions:
diff = abs(questionnaire_scores[dimension] - scenario_scores[dimension])
if diff >= 1.0: # 差异大于等于1分视为显著
- significant_diffs.append({
- "dimension": dimension,
- "diff": diff,
- "questionnaire": questionnaire_scores[dimension],
- "scenario": scenario_scores[dimension]
- })
-
+ significant_diffs.append(
+ {
+ "dimension": dimension,
+ "diff": diff,
+ "questionnaire": questionnaire_scores[dimension],
+ "scenario": scenario_scores[dimension],
+ }
+ )
+
if significant_diffs:
print("\n\n显著差异分析:")
print("-" * 40)
@@ -285,9 +271,9 @@ class CombinedPersonalityTest:
print(f"问卷得分:{diff['questionnaire']:.2f}")
print(f"情景得分:{diff['scenario']:.2f}")
print(f"差异值:{diff['diff']:.2f}")
-
+
# 分析可能的原因
- if diff['questionnaire'] > diff['scenario']:
+ if diff["questionnaire"] > diff["scenario"]:
print("可能原因:在问卷中的自我评价较高,但在具体情景中的表现较为保守。")
else:
print("可能原因:在具体情景中表现出更多该维度特征,而在问卷自评时较为保守。")
@@ -298,38 +284,37 @@ class CombinedPersonalityTest:
"测试时间": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"问卷测评结果": questionnaire_scores,
"情景测评结果": scenario_scores,
- "维度说明": FACTOR_DESCRIPTIONS
+ "维度说明": FACTOR_DESCRIPTIONS,
}
-
+
# 确保目录存在
os.makedirs("results", exist_ok=True)
-
+
# 生成带时间戳的文件名
filename = f"results/personality_combined_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
-
+
# 保存到文件
with open(filename, "w", encoding="utf-8") as f:
json.dump(results, f, ensure_ascii=False, indent=2)
-
+
print(f"\n完整的测评结果已保存到:{filename}")
+
def load_existing_results():
"""检查并加载已有的测试结果"""
results_dir = "results"
if not os.path.exists(results_dir):
return None
-
+
# 获取所有personality_combined开头的文件
- result_files = [f for f in os.listdir(results_dir)
- if f.startswith("personality_combined_") and f.endswith(".json")]
-
+ result_files = [f for f in os.listdir(results_dir) if f.startswith("personality_combined_") and f.endswith(".json")]
+
if not result_files:
return None
-
+
# 按文件修改时间排序,获取最新的结果文件
- latest_file = max(result_files,
- key=lambda f: os.path.getmtime(os.path.join(results_dir, f)))
-
+ latest_file = max(result_files, key=lambda f: os.path.getmtime(os.path.join(results_dir, f)))
+
print(f"\n发现已有的测试结果:{latest_file}")
try:
with open(os.path.join(results_dir, latest_file), "r", encoding="utf-8") as f:
@@ -339,24 +324,26 @@ def load_existing_results():
print(f"读取结果文件时出错:{str(e)}")
return None
+
def main():
test = CombinedPersonalityTest()
-
+
# 检查是否存在已有结果
existing_results = load_existing_results()
-
+
if existing_results:
print("\n=== 使用已有测试结果进行分析 ===")
print(f"测试时间:{existing_results['测试时间']}")
-
+
questionnaire_scores = existing_results["问卷测评结果"]
scenario_scores = existing_results["情景测评结果"]
-
+
# 直接进行结果对比分析
test.compare_and_display_results(questionnaire_scores, scenario_scores)
else:
print("\n未找到已有的测试结果,开始新的测试...")
test.run_combined_test()
+
if __name__ == "__main__":
- main()
\ No newline at end of file
+ main()
diff --git a/src/plugins/personality/questionnaire.py b/src/plugins/personality/questionnaire.py
index 3e1a7897e..8e965061d 100644
--- a/src/plugins/personality/questionnaire.py
+++ b/src/plugins/personality/questionnaire.py
@@ -1,7 +1,9 @@
-# 人格测试问卷题目 王孟成, 戴晓阳, & 姚树桥. (2011). 中国大五人格问卷的初步编制Ⅲ:简式版的制定及信效度检验.
-# 中国临床心理学杂志, 19(04), Article 04.
-# 王孟成, 戴晓阳, & 姚树桥. (2010). 中国大五人格问卷的初步编制Ⅰ:理论框架与信度分析.
-# 中国临床心理学杂志, 18(05), Article 05.
+# 人格测试问卷题目
+# 王孟成, 戴晓阳, & 姚树桥. (2011).
+# 中国大五人格问卷的初步编制Ⅲ:简式版的制定及信效度检验. 中国临床心理学杂志, 19(04), Article 04.
+
+# 王孟成, 戴晓阳, & 姚树桥. (2010).
+# 中国大五人格问卷的初步编制Ⅰ:理论框架与信度分析. 中国临床心理学杂志, 18(05), Article 05.
PERSONALITY_QUESTIONS = [
# 神经质维度 (F1)
@@ -9,168 +11,132 @@ PERSONALITY_QUESTIONS = [
{"id": 2, "content": "我常感到害怕", "factor": "神经质", "reverse_scoring": False},
{"id": 3, "content": "有时我觉得自己一无是处", "factor": "神经质", "reverse_scoring": False},
{"id": 4, "content": "我很少感到忧郁或沮丧", "factor": "神经质", "reverse_scoring": True},
- {"id": 5, "content": "别人一句漫不经心的话,我常会联系在自己身上",
- "factor": "神经质", "reverse_scoring": False},
- {"id": 6, "content": "在面对压力时,我有种快要崩溃的感觉",
- "factor": "神经质", "reverse_scoring": False},
- {"id": 7, "content": "我常担忧一些无关紧要的事情",
- "factor": "神经质", "reverse_scoring": False},
- {"id": 8, "content": "我常常感到内心不踏实",
- "factor": "神经质", "reverse_scoring": False},
-
+ {"id": 5, "content": "别人一句漫不经心的话,我常会联系在自己身上", "factor": "神经质", "reverse_scoring": False},
+ {"id": 6, "content": "在面对压力时,我有种快要崩溃的感觉", "factor": "神经质", "reverse_scoring": False},
+ {"id": 7, "content": "我常担忧一些无关紧要的事情", "factor": "神经质", "reverse_scoring": False},
+ {"id": 8, "content": "我常常感到内心不踏实", "factor": "神经质", "reverse_scoring": False},
# 严谨性维度 (F2)
- {"id": 9, "content": "在工作上,我常只求能应付过去便可",
- "factor": "严谨性", "reverse_scoring": True},
- {"id": 10, "content": "一旦确定了目标,我会坚持努力地实现它",
- "factor": "严谨性", "reverse_scoring": False},
- {"id": 11, "content": "我常常是仔细考虑之后才做出决定",
- "factor": "严谨性", "reverse_scoring": False},
- {"id": 12, "content": "别人认为我是个慎重的人",
- "factor": "严谨性", "reverse_scoring": False},
- {"id": 13, "content": "做事讲究逻辑和条理是我的一个特点",
- "factor": "严谨性", "reverse_scoring": False},
- {"id": 14, "content": "我喜欢一开头就把事情计划好",
- "factor": "严谨性", "reverse_scoring": False},
- {"id": 15, "content": "我工作或学习很勤奋",
- "factor": "严谨性", "reverse_scoring": False},
- {"id": 16, "content": "我是个倾尽全力做事的人",
- "factor": "严谨性", "reverse_scoring": False},
-
+ {"id": 9, "content": "在工作上,我常只求能应付过去便可", "factor": "严谨性", "reverse_scoring": True},
+ {"id": 10, "content": "一旦确定了目标,我会坚持努力地实现它", "factor": "严谨性", "reverse_scoring": False},
+ {"id": 11, "content": "我常常是仔细考虑之后才做出决定", "factor": "严谨性", "reverse_scoring": False},
+ {"id": 12, "content": "别人认为我是个慎重的人", "factor": "严谨性", "reverse_scoring": False},
+ {"id": 13, "content": "做事讲究逻辑和条理是我的一个特点", "factor": "严谨性", "reverse_scoring": False},
+ {"id": 14, "content": "我喜欢一开头就把事情计划好", "factor": "严谨性", "reverse_scoring": False},
+ {"id": 15, "content": "我工作或学习很勤奋", "factor": "严谨性", "reverse_scoring": False},
+ {"id": 16, "content": "我是个倾尽全力做事的人", "factor": "严谨性", "reverse_scoring": False},
# 宜人性维度 (F3)
- {"id": 17, "content": "尽管人类社会存在着一些阴暗的东西(如战争、罪恶、欺诈),"
- "我仍然相信人性总的来说是善良的", "factor": "宜人性", "reverse_scoring": False},
- {"id": 18, "content": "我觉得大部分人基本上是心怀善意的",
- "factor": "宜人性", "reverse_scoring": False},
- {"id": 19, "content": "虽然社会上有骗子,但我觉得大部分人还是可信的",
- "factor": "宜人性", "reverse_scoring": False},
- {"id": 20, "content": "我不太关心别人是否受到不公正的待遇",
- "factor": "宜人性", "reverse_scoring": True},
- {"id": 21, "content": "我时常觉得别人的痛苦与我无关",
- "factor": "宜人性", "reverse_scoring": True},
- {"id": 22, "content": "我常为那些遭遇不幸的人感到难过",
- "factor": "宜人性", "reverse_scoring": False},
- {"id": 23, "content": "我是那种只照顾好自己,不替别人担忧的人",
- "factor": "宜人性", "reverse_scoring": True},
- {"id": 24, "content": "当别人向我诉说不幸时,我常感到难过",
- "factor": "宜人性", "reverse_scoring": False},
-
+ {
+ "id": 17,
+ "content": "尽管人类社会存在着一些阴暗的东西(如战争、罪恶、欺诈),我仍然相信人性总的来说是善良的",
+ "factor": "宜人性",
+ "reverse_scoring": False,
+ },
+ {"id": 18, "content": "我觉得大部分人基本上是心怀善意的", "factor": "宜人性", "reverse_scoring": False},
+ {"id": 19, "content": "虽然社会上有骗子,但我觉得大部分人还是可信的", "factor": "宜人性", "reverse_scoring": False},
+ {"id": 20, "content": "我不太关心别人是否受到不公正的待遇", "factor": "宜人性", "reverse_scoring": True},
+ {"id": 21, "content": "我时常觉得别人的痛苦与我无关", "factor": "宜人性", "reverse_scoring": True},
+ {"id": 22, "content": "我常为那些遭遇不幸的人感到难过", "factor": "宜人性", "reverse_scoring": False},
+ {"id": 23, "content": "我是那种只照顾好自己,不替别人担忧的人", "factor": "宜人性", "reverse_scoring": True},
+ {"id": 24, "content": "当别人向我诉说不幸时,我常感到难过", "factor": "宜人性", "reverse_scoring": False},
# 开放性维度 (F4)
- {"id": 25, "content": "我的想象力相当丰富",
- "factor": "开放性", "reverse_scoring": False},
- {"id": 26, "content": "我头脑中经常充满生动的画面",
- "factor": "开放性", "reverse_scoring": False},
- {"id": 27, "content": "我对许多事情有着很强的好奇心",
- "factor": "开放性", "reverse_scoring": False},
- {"id": 28, "content": "我喜欢冒险",
- "factor": "开放性", "reverse_scoring": False},
- {"id": 29, "content": "我是个勇于冒险,突破常规的人",
- "factor": "开放性", "reverse_scoring": False},
- {"id": 30, "content": "我身上具有别人没有的冒险精神",
- "factor": "开放性", "reverse_scoring": False},
- {"id": 31, "content": "我渴望学习一些新东西,即使它们与我的日常生活无关",
- "factor": "开放性", "reverse_scoring": False},
- {"id": 32, "content": "我很愿意也很容易接受那些新事物、新观点、新想法",
- "factor": "开放性", "reverse_scoring": False},
-
+ {"id": 25, "content": "我的想象力相当丰富", "factor": "开放性", "reverse_scoring": False},
+ {"id": 26, "content": "我头脑中经常充满生动的画面", "factor": "开放性", "reverse_scoring": False},
+ {"id": 27, "content": "我对许多事情有着很强的好奇心", "factor": "开放性", "reverse_scoring": False},
+ {"id": 28, "content": "我喜欢冒险", "factor": "开放性", "reverse_scoring": False},
+ {"id": 29, "content": "我是个勇于冒险,突破常规的人", "factor": "开放性", "reverse_scoring": False},
+ {"id": 30, "content": "我身上具有别人没有的冒险精神", "factor": "开放性", "reverse_scoring": False},
+ {
+ "id": 31,
+ "content": "我渴望学习一些新东西,即使它们与我的日常生活无关",
+ "factor": "开放性",
+ "reverse_scoring": False,
+ },
+ {
+ "id": 32,
+ "content": "我很愿意也很容易接受那些新事物、新观点、新想法",
+ "factor": "开放性",
+ "reverse_scoring": False,
+ },
# 外向性维度 (F5)
- {"id": 33, "content": "我喜欢参加社交与娱乐聚会",
- "factor": "外向性", "reverse_scoring": False},
- {"id": 34, "content": "我对人多的聚会感到乏味",
- "factor": "外向性", "reverse_scoring": True},
- {"id": 35, "content": "我尽量避免参加人多的聚会和嘈杂的环境",
- "factor": "外向性", "reverse_scoring": True},
- {"id": 36, "content": "在热闹的聚会上,我常常表现主动并尽情玩耍",
- "factor": "外向性", "reverse_scoring": False},
- {"id": 37, "content": "有我在的场合一般不会冷场",
- "factor": "外向性", "reverse_scoring": False},
- {"id": 38, "content": "我希望成为领导者而不是被领导者",
- "factor": "外向性", "reverse_scoring": False},
- {"id": 39, "content": "在一个团体中,我希望处于领导地位",
- "factor": "外向性", "reverse_scoring": False},
- {"id": 40, "content": "别人多认为我是一个热情和友好的人",
- "factor": "外向性", "reverse_scoring": False}
+ {"id": 33, "content": "我喜欢参加社交与娱乐聚会", "factor": "外向性", "reverse_scoring": False},
+ {"id": 34, "content": "我对人多的聚会感到乏味", "factor": "外向性", "reverse_scoring": True},
+ {"id": 35, "content": "我尽量避免参加人多的聚会和嘈杂的环境", "factor": "外向性", "reverse_scoring": True},
+ {"id": 36, "content": "在热闹的聚会上,我常常表现主动并尽情玩耍", "factor": "外向性", "reverse_scoring": False},
+ {"id": 37, "content": "有我在的场合一般不会冷场", "factor": "外向性", "reverse_scoring": False},
+ {"id": 38, "content": "我希望成为领导者而不是被领导者", "factor": "外向性", "reverse_scoring": False},
+ {"id": 39, "content": "在一个团体中,我希望处于领导地位", "factor": "外向性", "reverse_scoring": False},
+ {"id": 40, "content": "别人多认为我是一个热情和友好的人", "factor": "外向性", "reverse_scoring": False},
]
# 因子维度说明
FACTOR_DESCRIPTIONS = {
"外向性": {
- "description": (
- "反映个体神经系统的强弱和动力特征。外向性主要表现为个体在人际交往和社交活动中的倾向性,"
- "包括对社交活动的兴趣、对人群的态度、社交互动中的主动程度以及在群体中的影响力。"
- "高分者倾向于积极参与社交活动,乐于与人交往,善于表达自我,并往往在群体中发挥领导作用;"
- "低分者则倾向于独处,不喜欢热闹的社交场合,表现出内向、安静的特征。"
- ),
+ "description": "反映个体神经系统的强弱和动力特征。外向性主要表现为个体在人际交往和社交活动中的倾向性,"
+ "包括对社交活动的兴趣、"
+ "对人群的态度、社交互动中的主动程度以及在群体中的影响力。高分者倾向于积极参与社交活动,乐于与人交往,善于表达自我,"
+ "并往往在群体中发挥领导作用;低分者则倾向于独处,不喜欢热闹的社交场合,表现出内向、安静的特征。",
"trait_words": ["热情", "活力", "社交", "主动"],
"subfactors": {
"合群性": "个体愿意与他人聚在一起,即接近人群的倾向;高分表现乐群、好交际,低分表现封闭、独处",
"热情": "个体对待别人时所表现出的态度;高分表现热情好客,低分表现冷淡",
"支配性": "个体喜欢指使、操纵他人,倾向于领导别人的特点;高分表现好强、发号施令,低分表现顺从、低调",
- "活跃": "个体精力充沛,活跃、主动性等特点;高分表现活跃,低分表现安静"
- }
+ "活跃": "个体精力充沛,活跃、主动性等特点;高分表现活跃,低分表现安静",
+ },
},
"神经质": {
- "description": (
- "反映个体情绪的状态和体验内心苦恼的倾向性。这个维度主要关注个体在面对压力、挫折和"
- "日常生活挑战时的情绪稳定性和适应能力。它包含了对焦虑、抑郁、愤怒等负面情绪的敏感程度,"
- "以及个体对这些情绪的调节和控制能力。高分者容易体验负面情绪,对压力较为敏感,情绪波动较大;"
- "低分者则表现出较强的情绪稳定性,能够较好地应对压力和挫折。"
- ),
+ "description": "反映个体情绪的状态和体验内心苦恼的倾向性。这个维度主要关注个体在面对压力、"
+ "挫折和日常生活挑战时的情绪稳定性和适应能力。它包含了对焦虑、抑郁、愤怒等负面情绪的敏感程度,"
+ "以及个体对这些情绪的调节和控制能力。高分者容易体验负面情绪,对压力较为敏感,情绪波动较大;"
+ "低分者则表现出较强的情绪稳定性,能够较好地应对压力和挫折。",
"trait_words": ["稳定", "沉着", "从容", "坚韧"],
"subfactors": {
"焦虑": "个体体验焦虑感的个体差异;高分表现坐立不安,低分表现平静",
"抑郁": "个体体验抑郁情感的个体差异;高分表现郁郁寡欢,低分表现平静",
- "敏感多疑": "个体常常关注自己的内心活动,行为和过于意识人对自己的看法、评价;"
- "高分表现敏感多疑,低分表现淡定、自信",
+ "敏感多疑": "个体常常关注自己的内心活动,行为和过于意识人对自己的看法、评价;高分表现敏感多疑,"
+ "低分表现淡定、自信",
"脆弱性": "个体在危机或困难面前无力、脆弱的特点;高分表现无能、易受伤、逃避,低分表现坚强",
- "愤怒-敌意": "个体准备体验愤怒,及相关情绪的状态;高分表现暴躁易怒,低分表现平静"
- }
+ "愤怒-敌意": "个体准备体验愤怒,及相关情绪的状态;高分表现暴躁易怒,低分表现平静",
+ },
},
"严谨性": {
- "description": (
- "反映个体在目标导向行为上的组织、坚持和动机特征。这个维度体现了个体在工作、学习等"
- "目标性活动中的自我约束和行为管理能力。它涉及到个体的责任感、自律性、计划性、条理性以及"
- "完成任务的态度。高分者往往表现出强烈的责任心、良好的组织能力、谨慎的决策风格和持续的"
- "努力精神;低分者则可能表现出随意性强、缺乏规划、做事马虎或易放弃的特点。"
- ),
+ "description": "反映个体在目标导向行为上的组织、坚持和动机特征。这个维度体现了个体在工作、"
+ "学习等目标性活动中的自我约束和行为管理能力。它涉及到个体的责任感、自律性、计划性、条理性以及完成任务的态度。"
+ "高分者往往表现出强烈的责任心、良好的组织能力、谨慎的决策风格和持续的努力精神;低分者则可能表现出随意性强、"
+ "缺乏规划、做事马虎或易放弃的特点。",
"trait_words": ["负责", "自律", "条理", "勤奋"],
"subfactors": {
- "责任心": "个体对待任务和他人认真负责,以及对自己承诺的信守;"
- "高分表现有责任心、负责任,低分表现推卸责任、逃避处罚",
+ "责任心": "个体对待任务和他人认真负责,以及对自己承诺的信守;高分表现有责任心、负责任,"
+ "低分表现推卸责任、逃避处罚",
"自我控制": "个体约束自己的能力,及自始至终的坚持性;高分表现自制、有毅力,低分表现冲动、无毅力",
"审慎性": "个体在采取具体行动前的心理状态;高分表现谨慎、小心,低分表现鲁莽、草率",
"条理性": "个体处理事务和工作的秩序,条理和逻辑性;高分表现整洁、有秩序,低分表现混乱、遗漏",
- "勤奋": "个体工作和学习的努力程度及为达到目标而表现出的进取精神;高分表现勤奋、刻苦,低分表现懒散"
- }
+ "勤奋": "个体工作和学习的努力程度及为达到目标而表现出的进取精神;高分表现勤奋、刻苦,低分表现懒散",
+ },
},
"开放性": {
- "description": (
- "反映个体对新异事物、新观念和新经验的接受程度,以及在思维和行为方面的创新倾向。"
- "这个维度体现了个体在认知和体验方面的广度、深度和灵活性。它包括对艺术的欣赏能力、"
- "对知识的求知欲、想象力的丰富程度,以及对冒险和创新的态度。高分者往往具有丰富的想象力、"
- "广泛的兴趣、开放的思维方式和创新的倾向;低分者则倾向于保守、传统,喜欢熟悉和常规的事物。"
- ),
+ "description": "反映个体对新异事物、新观念和新经验的接受程度,以及在思维和行为方面的创新倾向。"
+ "这个维度体现了个体在认知和体验方面的广度、深度和灵活性。它包括对艺术的欣赏能力、对知识的求知欲、想象力的丰富程度,"
+ "以及对冒险和创新的态度。高分者往往具有丰富的想象力、广泛的兴趣、开放的思维方式和创新的倾向;低分者则倾向于保守、"
+ "传统,喜欢熟悉和常规的事物。",
"trait_words": ["创新", "好奇", "艺术", "冒险"],
"subfactors": {
"幻想": "个体富于幻想和想象的水平;高分表现想象力丰富,低分表现想象力匮乏",
"审美": "个体对于艺术和美的敏感与热爱程度;高分表现富有艺术气息,低分表现一般对艺术不敏感",
"好奇心": "个体对未知事物的态度;高分表现兴趣广泛、好奇心浓,低分表现兴趣少、无好奇心",
"冒险精神": "个体愿意尝试有风险活动的个体差异;高分表现好冒险,低分表现保守",
- "价值观念": "个体对新事物、新观念、怪异想法的态度;高分表现开放、坦然接受新事物,低分则相反"
- }
+ "价值观念": "个体对新事物、新观念、怪异想法的态度;高分表现开放、坦然接受新事物,低分则相反",
+ },
},
"宜人性": {
- "description": (
- "反映个体在人际关系中的亲和倾向,体现了对他人的关心、同情和合作意愿。这个维度主要"
- "关注个体与他人互动时的态度和行为特征,包括对他人的信任程度、同理心水平、助人意愿以及"
- "在人际冲突中的处理方式。高分者通常表现出友善、富有同情心、乐于助人的特质,善于与他人"
- "建立和谐关系;低分者则可能表现出较少的人际关注,在社交互动中更注重自身利益,较少考虑"
- "他人感受。"
- ),
+ "description": "反映个体在人际关系中的亲和倾向,体现了对他人的关心、同情和合作意愿。"
+ "这个维度主要关注个体与他人互动时的态度和行为特征,包括对他人的信任程度、同理心水平、"
+ "助人意愿以及在人际冲突中的处理方式。高分者通常表现出友善、富有同情心、乐于助人的特质,善于与他人建立和谐关系;"
+ "低分者则可能表现出较少的人际关注,在社交互动中更注重自身利益,较少考虑他人感受。",
"trait_words": ["友善", "同理", "信任", "合作"],
"subfactors": {
"信任": "个体对他人和/或他人言论的相信程度;高分表现信任他人,低分表现怀疑",
"体贴": "个体对别人的兴趣和需要的关注程度;高分表现体贴、温存,低分表现冷漠、不在乎",
- "同情": "个体对处于不利地位的人或物的态度;高分表现富有同情心,低分表现冷漠"
- }
- }
-}
\ No newline at end of file
+ "同情": "个体对处于不利地位的人或物的态度;高分表现富有同情心,低分表现冷漠",
+ },
+ },
+}
diff --git a/src/plugins/personality/renqingziji.py b/src/plugins/personality/renqingziji.py
index 5431f4e68..4b1fb3b69 100644
--- a/src/plugins/personality/renqingziji.py
+++ b/src/plugins/personality/renqingziji.py
@@ -1,29 +1,23 @@
-'''
+"""
The definition of artificial personality in this paper follows the dispositional para-digm and adapts a definition of
personality developed for humans [17]:
-Personality for a human is the "whole and organisation of relatively stable tendencies and patterns of experience and
-behaviour within one person (distinguishing it from other persons)".
-This definition is modified for artificial personality:
-Artificial personality describes the relatively stable tendencies
-and patterns of behav-iour of an AI-based machine that
-can be designed by developers and designers via different modalities, such as language, creating the impression
-of individuality of a humanized social agent when users interact with the machine.'''
+Personality for a human is the "whole and organisation of relatively stable tendencies and patterns of experience and
+behaviour within one person (distinguishing it from other persons)". This definition is modified for artificial
+personality:
+Artificial personality describes the relatively stable tendencies and patterns of behav-iour of an AI-based machine that
+can be designed by developers and designers via different modalities, such as language, creating the impression
+of individuality of a humanized social agent when users interact with the machine."""
+from typing import Dict, List
import json
import os
-import sys
from pathlib import Path
-from typing import Dict, List
-
from dotenv import load_dotenv
+import sys
-from src.plugins.personality.offline_llm import LLMModel
-from src.plugins.personality.questionnaire import FACTOR_DESCRIPTIONS
-from src.plugins.personality.scene import get_scene_by_factor, PERSONALITY_SCENES
-
-'''
+"""
第一种方案:基于情景评估的人格测定
-'''
+"""
current_dir = Path(__file__).resolve().parent
project_root = current_dir.parent.parent.parent
env_path = project_root / ".env.prod"
@@ -31,6 +25,9 @@ env_path = project_root / ".env.prod"
root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
sys.path.append(root_path)
+from src.plugins.personality.scene import get_scene_by_factor, PERSONALITY_SCENES # noqa: E402
+from src.plugins.personality.questionnaire import FACTOR_DESCRIPTIONS # noqa: E402
+from src.plugins.personality.offline_llm import LLMModel # noqa: E402
# 加载环境变量
if env_path.exists():
@@ -45,32 +42,31 @@ class PersonalityEvaluator_direct:
def __init__(self):
self.personality_traits = {"开放性": 0, "严谨性": 0, "外向性": 0, "宜人性": 0, "神经质": 0}
self.scenarios = []
-
+
# 为每个人格特质获取对应的场景
for trait in PERSONALITY_SCENES:
scenes = get_scene_by_factor(trait)
if not scenes:
continue
-
+
# 从每个维度选择3个场景
import random
+
scene_keys = list(scenes.keys())
selected_scenes = random.sample(scene_keys, min(3, len(scene_keys)))
-
+
for scene_key in selected_scenes:
scene = scenes[scene_key]
-
+
# 为每个场景添加评估维度
# 主维度是当前特质,次维度随机选择一个其他特质
other_traits = [t for t in PERSONALITY_SCENES if t != trait]
secondary_trait = random.choice(other_traits)
-
- self.scenarios.append({
- "场景": scene["scenario"],
- "评估维度": [trait, secondary_trait],
- "场景编号": scene_key
- })
-
+
+ self.scenarios.append(
+ {"场景": scene["scenario"], "评估维度": [trait, secondary_trait], "场景编号": scene_key}
+ )
+
self.llm = LLMModel()
def evaluate_response(self, scenario: str, response: str, dimensions: List[str]) -> Dict[str, float]:
@@ -83,9 +79,9 @@ class PersonalityEvaluator_direct:
desc = FACTOR_DESCRIPTIONS.get(dim, "")
if desc:
dimension_descriptions.append(f"- {dim}:{desc}")
-
+
dimensions_text = "\n".join(dimension_descriptions)
-
+
prompt = f"""请根据以下场景和用户描述,评估用户在大五人格模型中的相关维度得分(1-6分)。
场景描述:
@@ -183,11 +179,7 @@ def main():
print(f"测试场景数:{dimension_counts[trait]}")
# 保存结果
- result = {
- "final_scores": final_scores,
- "dimension_counts": dimension_counts,
- "scenarios": evaluator.scenarios
- }
+ result = {"final_scores": final_scores, "dimension_counts": dimension_counts, "scenarios": evaluator.scenarios}
# 确保目录存在
os.makedirs("results", exist_ok=True)
diff --git a/src/plugins/personality/scene.py b/src/plugins/personality/scene.py
index 9bf3b4ec1..0ce094a36 100644
--- a/src/plugins/personality/scene.py
+++ b/src/plugins/personality/scene.py
@@ -8,7 +8,7 @@ PERSONALITY_SCENES = {
同事:「嗨!你是新来的同事吧?我是市场部的小林。」
同事看起来很友善,还主动介绍说:「待会午饭时间,我们部门有几个人准备一起去楼下新开的餐厅,你要一起来吗?可以认识一下其他同事。」""",
- "explanation": "这个场景通过职场社交情境,观察个体对于新环境、新社交圈的态度和反应倾向。"
+ "explanation": "这个场景通过职场社交情境,观察个体对于新环境、新社交圈的态度和反应倾向。",
},
"场景2": {
"scenario": """在大学班级群里,班长发起了一个组织班级联谊活动的投票:
@@ -16,7 +16,7 @@ PERSONALITY_SCENES = {
班长:「大家好!下周末我们准备举办一次班级联谊活动,地点在学校附近的KTV。想请大家报名参加,也欢迎大家邀请其他班级的同学!」
已经有几个同学在群里积极响应,有人@你问你要不要一起参加。""",
- "explanation": "通过班级活动场景,观察个体对群体社交活动的参与意愿。"
+ "explanation": "通过班级活动场景,观察个体对群体社交活动的参与意愿。",
},
"场景3": {
"scenario": """你在社交平台上发布了一条动态,收到了很多陌生网友的评论和私信:
@@ -24,13 +24,14 @@ PERSONALITY_SCENES = {
网友A:「你说的这个观点很有意思!想和你多交流一下。」
网友B:「我也对这个话题很感兴趣,要不要建个群一起讨论?」""",
- "explanation": "通过网络社交场景,观察个体对线上社交的态度。"
+ "explanation": "通过网络社交场景,观察个体对线上社交的态度。",
},
"场景4": {
"scenario": """你暗恋的对象今天主动来找你:
-对方:「那个...我最近在准备一个演讲比赛,听说你口才很好。能不能请你帮我看看演讲稿,顺便给我一些建议?如果你有时间的话,可以一起吃个饭聊聊。」""",
- "explanation": "通过恋爱情境,观察个体在面对心仪对象时的社交表现。"
+对方:「那个...我最近在准备一个演讲比赛,听说你口才很好。能不能请你帮我看看演讲稿,顺便给我一些建议?"""
+ """如果你有时间的话,可以一起吃个饭聊聊。」""",
+ "explanation": "通过恋爱情境,观察个体在面对心仪对象时的社交表现。",
},
"场景5": {
"scenario": """在一次线下读书会上,主持人突然点名让你分享读后感:
@@ -38,19 +39,18 @@ PERSONALITY_SCENES = {
主持人:「听说你对这本书很有见解,能不能和大家分享一下你的想法?」
现场有二十多个陌生的读书爱好者,都期待地看着你。""",
- "explanation": "通过即兴发言场景,观察个体的社交表现欲和公众表达能力。"
- }
+ "explanation": "通过即兴发言场景,观察个体的社交表现欲和公众表达能力。",
+ },
},
-
"神经质": {
"场景1": {
- "scenario": """你正在准备一个重要的项目演示,这关系到你的晋升机会。就在演示前30分钟
-,你收到了主管发来的消息:
+ "scenario": """你正在准备一个重要的项目演示,这关系到你的晋升机会。"""
+ """就在演示前30分钟,你收到了主管发来的消息:
+
主管:「临时有个变动,CEO也会来听你的演示。他对这个项目特别感兴趣。」
-正当你准备回复时,主管又发来一条:「对了,能不能把演示时间压缩到15分钟?CEO下午还有其他安排。
-你之前准备的是30分钟的版本对吧?」""",
- "explanation": "这个场景通过突发的压力情境,观察个体在面对计划外变化时的情绪反应和调节能力。"
+正当你准备回复时,主管又发来一条:「对了,能不能把演示时间压缩到15分钟?CEO下午还有其他安排。你之前准备的是30分钟的版本对吧?」""",
+ "explanation": "这个场景通过突发的压力情境,观察个体在面对计划外变化时的情绪反应和调节能力。",
},
"场景2": {
"scenario": """期末考试前一天晚上,你收到了好朋友发来的消息:
@@ -58,7 +58,7 @@ PERSONALITY_SCENES = {
好朋友:「不好意思这么晚打扰你...我看你平时成绩很好,能不能帮我解答几个问题?我真的很担心明天的考试。」
你看了看时间,已经是晚上11点,而你原本计划的复习还没完成。""",
- "explanation": "通过考试压力场景,观察个体在时间紧张时的情绪管理。"
+ "explanation": "通过考试压力场景,观察个体在时间紧张时的情绪管理。",
},
"场景3": {
"scenario": """你在社交媒体上发表的一个观点引发了争议,有不少人开始批评你:
@@ -68,7 +68,7 @@ PERSONALITY_SCENES = {
网友B:「建议楼主先去补补课再来发言。」
评论区里的负面评论越来越多,还有人开始人身攻击。""",
- "explanation": "通过网络争议场景,观察个体面对批评时的心理承受能力。"
+ "explanation": "通过网络争议场景,观察个体面对批评时的心理承受能力。",
},
"场景4": {
"scenario": """你和恋人约好今天一起看电影,但在约定时间前半小时,对方发来消息:
@@ -78,7 +78,7 @@ PERSONALITY_SCENES = {
二十分钟后,对方又发来消息:「可能要再等等,抱歉!」
电影快要开始了,但对方还是没有出现。""",
- "explanation": "通过恋爱情境,观察个体对不确定性的忍耐程度。"
+ "explanation": "通过恋爱情境,观察个体对不确定性的忍耐程度。",
},
"场景5": {
"scenario": """在一次重要的小组展示中,你的组员在演示途中突然卡壳了:
@@ -86,10 +86,9 @@ PERSONALITY_SCENES = {
组员小声对你说:「我忘词了,接下来的部分是什么来着...」
台下的老师和同学都在等待,气氛有些尴尬。""",
- "explanation": "通过公开场合的突发状况,观察个体的应急反应和压力处理能力。"
- }
+ "explanation": "通过公开场合的突发状况,观察个体的应急反应和压力处理能力。",
+ },
},
-
"严谨性": {
"场景1": {
"scenario": """你是团队的项目负责人,刚刚接手了一个为期两个月的重要项目。在第一次团队会议上:
@@ -99,7 +98,7 @@ PERSONALITY_SCENES = {
小张:「要不要先列个时间表?不过感觉太详细的计划也没必要,点到为止就行。」
小李:「客户那边说如果能提前完成有奖励,我觉得我们可以先做快一点的部分。」""",
- "explanation": "这个场景通过项目管理情境,体现个体在工作方法、计划性和责任心方面的特征。"
+ "explanation": "这个场景通过项目管理情境,体现个体在工作方法、计划性和责任心方面的特征。",
},
"场景2": {
"scenario": """期末小组作业,组长让大家分工完成一份研究报告。在截止日期前三天:
@@ -109,7 +108,7 @@ PERSONALITY_SCENES = {
组员B:「我这边可能还要一天才能完成,最近太忙了。」
组员C发来一份没有任何引用出处、可能存在抄袭的内容:「我写完了,你们看看怎么样?」""",
- "explanation": "通过学习场景,观察个体对学术规范和质量要求的重视程度。"
+ "explanation": "通过学习场景,观察个体对学术规范和质量要求的重视程度。",
},
"场景3": {
"scenario": """你在一个兴趣小组的群聊中,大家正在讨论举办一次线下活动:
@@ -119,7 +118,7 @@ PERSONALITY_SCENES = {
成员B:「对啊,随意一点挺好的。」
成员C:「人来了自然就热闹了。」""",
- "explanation": "通过活动组织场景,观察个体对活动计划的态度。"
+ "explanation": "通过活动组织场景,观察个体对活动计划的态度。",
},
"场景4": {
"scenario": """你和恋人计划一起去旅游,对方说:
@@ -127,7 +126,7 @@ PERSONALITY_SCENES = {
恋人:「我们就随心而行吧!订个目的地,其他的到了再说,这样更有意思。」
距离出发还有一周时间,但机票、住宿和具体行程都还没有确定。""",
- "explanation": "通过旅行规划场景,观察个体的计划性和对不确定性的接受程度。"
+ "explanation": "通过旅行规划场景,观察个体的计划性和对不确定性的接受程度。",
},
"场景5": {
"scenario": """在一个重要的团队项目中,你发现一个同事的工作存在明显错误:
@@ -135,20 +134,19 @@ PERSONALITY_SCENES = {
同事:「差不多就行了,反正领导也看不出来。」
这个错误可能不会立即造成问题,但长期来看可能会影响项目质量。""",
- "explanation": "通过工作质量场景,观察个体对细节和标准的坚持程度。"
- }
+ "explanation": "通过工作质量场景,观察个体对细节和标准的坚持程度。",
+ },
},
-
"开放性": {
"场景1": {
"scenario": """周末下午,你的好友小美兴致勃勃地给你打电话:
-小美:「我刚发现一个特别有意思的沉浸式艺术展!不是传统那种挂画的展览,而是把整个空间都变成了艺术品。观众要穿特制的服装,
-还要带上VR眼镜,好像还有AI实时互动!」
+小美:「我刚发现一个特别有意思的沉浸式艺术展!不是传统那种挂画的展览,而是把整个空间都变成了艺术品。"""
+ """观众要穿特制的服装,还要带上VR眼镜,好像还有AI实时互动!」
-小美继续说:「虽然票价不便宜,但听说体验很独特。网上评价两极分化,有人说是前所未有的艺术革新,
-也有人说是哗众取宠。要不要周末一起去体验一下?」""",
- "explanation": "这个场景通过新型艺术体验,反映个体对创新事物的接受程度和尝试意愿。"
+小美继续说:「虽然票价不便宜,但听说体验很独特。网上评价两极分化,有人说是前所未有的艺术革新,也有人说是哗众取宠。"""
+ """要不要周末一起去体验一下?」""",
+ "explanation": "这个场景通过新型艺术体验,反映个体对创新事物的接受程度和尝试意愿。",
},
"场景2": {
"scenario": """在一节创意写作课上,老师提出了一个特别的作业:
@@ -156,16 +154,16 @@ PERSONALITY_SCENES = {
老师:「下周的作业是用AI写作工具协助创作一篇小说。你们可以自由探索如何与AI合作,打破传统写作方式。」
班上随即展开了激烈讨论,有人认为这是对创作的亵渎,也有人对这种新形式感到兴奋。""",
- "explanation": "通过新技术应用场景,观察个体对创新学习方式的态度。"
+ "explanation": "通过新技术应用场景,观察个体对创新学习方式的态度。",
},
"场景3": {
"scenario": """在社交媒体上,你看到一个朋友分享了一种新的生活方式:
-「最近我在尝试'数字游牧'生活,就是一边远程工作一边环游世界。没有固定住所,住青旅或短租,认识来自世界各地的朋友。
-虽然有时会很不稳定,但这种自由的生活方式真的很棒!」
+「最近我在尝试'数字游牧'生活,就是一边远程工作一边环游世界。"""
+ """没有固定住所,住青旅或短租,认识来自世界各地的朋友。虽然有时会很不稳定,但这种自由的生活方式真的很棒!」
评论区里争论不断,有人向往这种生活,也有人觉得太冒险。""",
- "explanation": "通过另类生活方式,观察个体对非传统选择的态度。"
+ "explanation": "通过另类生活方式,观察个体对非传统选择的态度。",
},
"场景4": {
"scenario": """你的恋人突然提出了一个想法:
@@ -173,7 +171,7 @@ PERSONALITY_SCENES = {
恋人:「我们要不要尝试一下开放式关系?就是在保持彼此关系的同时,也允许和其他人发展感情。现在国外很多年轻人都这样。」
这个提议让你感到意外,你之前从未考虑过这种可能性。""",
- "explanation": "通过感情观念场景,观察个体对非传统关系模式的接受度。"
+ "explanation": "通过感情观念场景,观察个体对非传统关系模式的接受度。",
},
"场景5": {
"scenario": """在一次朋友聚会上,大家正在讨论未来职业规划:
@@ -183,10 +181,9 @@ PERSONALITY_SCENES = {
朋友B:「我想去学习生物科技,准备转行做人造肉研发。」
朋友C:「我在考虑加入一个区块链创业项目,虽然风险很大。」""",
- "explanation": "通过职业选择场景,观察个体对新兴领域的探索意愿。"
- }
+ "explanation": "通过职业选择场景,观察个体对新兴领域的探索意愿。",
+ },
},
-
"宜人性": {
"场景1": {
"scenario": """在回家的公交车上,你遇到这样一幕:
@@ -198,7 +195,7 @@ PERSONALITY_SCENES = {
年轻人B:「现在的老年人真是...我看她包里还有菜,肯定是去菜市场买完菜回来的,这么多人都不知道叫子女开车接送。」
就在这时,老奶奶一个趔趄,差点摔倒。她扶住了扶手,但包里的东西洒了一些出来。""",
- "explanation": "这个场景通过公共场合的助人情境,体现个体的同理心和对他人需求的关注程度。"
+ "explanation": "这个场景通过公共场合的助人情境,体现个体的同理心和对他人需求的关注程度。",
},
"场景2": {
"scenario": """在班级群里,有同学发起为生病住院的同学捐款:
@@ -208,7 +205,7 @@ PERSONALITY_SCENES = {
同学B:「我觉得这是他家里的事,我们不方便参与吧。」
同学C:「但是都是同学一场,帮帮忙也是应该的。」""",
- "explanation": "通过同学互助场景,观察个体的助人意愿和同理心。"
+ "explanation": "通过同学互助场景,观察个体的助人意愿和同理心。",
},
"场景3": {
"scenario": """在一个网络讨论组里,有人发布了求助信息:
@@ -219,7 +216,7 @@ PERSONALITY_SCENES = {
「生活本来就是这样,想开点!」
「你这样子太消极了,要积极面对。」
「谁还没点烦心事啊,过段时间就好了。」""",
- "explanation": "通过网络互助场景,观察个体的共情能力和安慰方式。"
+ "explanation": "通过网络互助场景,观察个体的共情能力和安慰方式。",
},
"场景4": {
"scenario": """你的恋人向你倾诉工作压力:
@@ -227,7 +224,7 @@ PERSONALITY_SCENES = {
恋人:「最近工作真的好累,感觉快坚持不下去了...」
但今天你也遇到了很多烦心事,心情也不太好。""",
- "explanation": "通过感情关系场景,观察个体在自身状态不佳时的关怀能力。"
+ "explanation": "通过感情关系场景,观察个体在自身状态不佳时的关怀能力。",
},
"场景5": {
"scenario": """在一次团队项目中,新来的同事小王因为经验不足,造成了一个严重的错误。在部门会议上:
@@ -235,27 +232,29 @@ PERSONALITY_SCENES = {
主管:「这个错误造成了很大的损失,是谁负责的这部分?」
小王看起来很紧张,欲言又止。你知道是他造成的错误,同时你也是这个项目的共同负责人。""",
- "explanation": "通过职场情境,观察个体在面对他人过错时的态度和处理方式。"
- }
- }
+ "explanation": "通过职场情境,观察个体在面对他人过错时的态度和处理方式。",
+ },
+ },
}
+
def get_scene_by_factor(factor: str) -> Dict:
"""
根据人格因子获取对应的情景测试
-
+
Args:
factor (str): 人格因子名称
-
+
Returns:
Dict: 包含情景描述的字典
"""
return PERSONALITY_SCENES.get(factor, None)
+
def get_all_scenes() -> Dict:
"""
获取所有情景测试
-
+
Returns:
Dict: 所有情景测试的字典
"""
diff --git a/src/plugins/schedule/offline_llm.py b/src/plugins/schedule/offline_llm.py
new file mode 100644
index 000000000..e4dc23f93
--- /dev/null
+++ b/src/plugins/schedule/offline_llm.py
@@ -0,0 +1,123 @@
+import asyncio
+import os
+import time
+from typing import Tuple, Union
+
+import aiohttp
+import requests
+from src.common.logger import get_module_logger
+
+logger = get_module_logger("offline_llm")
+
+
+class LLMModel:
+ def __init__(self, model_name="deepseek-ai/DeepSeek-V3", **kwargs):
+ self.model_name = model_name
+ self.params = kwargs
+ self.api_key = os.getenv("SILICONFLOW_KEY")
+ self.base_url = os.getenv("SILICONFLOW_BASE_URL")
+
+ if not self.api_key or not self.base_url:
+ raise ValueError("环境变量未正确加载:SILICONFLOW_KEY 或 SILICONFLOW_BASE_URL 未设置")
+
+ logger.info(f"API URL: {self.base_url}") # 使用 logger 记录 base_url
+
+ def generate_response(self, prompt: str) -> Union[str, Tuple[str, str]]:
+ """根据输入的提示生成模型的响应"""
+ headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
+
+ # 构建请求体
+ data = {
+ "model": self.model_name,
+ "messages": [{"role": "user", "content": prompt}],
+ "temperature": 0.5,
+ **self.params,
+ }
+
+ # 发送请求到完整的 chat/completions 端点
+ api_url = f"{self.base_url.rstrip('/')}/chat/completions"
+ logger.info(f"Request URL: {api_url}") # 记录请求的 URL
+
+ max_retries = 3
+ base_wait_time = 15 # 基础等待时间(秒)
+
+ for retry in range(max_retries):
+ try:
+ response = requests.post(api_url, headers=headers, json=data)
+
+ if response.status_code == 429:
+ wait_time = base_wait_time * (2**retry) # 指数退避
+ logger.warning(f"遇到请求限制(429),等待{wait_time}秒后重试...")
+ time.sleep(wait_time)
+ continue
+
+ response.raise_for_status() # 检查其他响应状态
+
+ result = response.json()
+ if "choices" in result and len(result["choices"]) > 0:
+ content = result["choices"][0]["message"]["content"]
+ reasoning_content = result["choices"][0]["message"].get("reasoning_content", "")
+ return content, reasoning_content
+ return "没有返回结果", ""
+
+ except Exception as e:
+ if retry < max_retries - 1: # 如果还有重试机会
+ wait_time = base_wait_time * (2**retry)
+ logger.error(f"[回复]请求失败,等待{wait_time}秒后重试... 错误: {str(e)}")
+ time.sleep(wait_time)
+ else:
+ logger.error(f"请求失败: {str(e)}")
+ return f"请求失败: {str(e)}", ""
+
+ logger.error("达到最大重试次数,请求仍然失败")
+ return "达到最大重试次数,请求仍然失败", ""
+
+ async def generate_response_async(self, prompt: str) -> Union[str, Tuple[str, str]]:
+ """异步方式根据输入的提示生成模型的响应"""
+ headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
+
+ # 构建请求体
+ data = {
+ "model": self.model_name,
+ "messages": [{"role": "user", "content": prompt}],
+ "temperature": 0.5,
+ **self.params,
+ }
+
+ # 发送请求到完整的 chat/completions 端点
+ api_url = f"{self.base_url.rstrip('/')}/chat/completions"
+ logger.info(f"Request URL: {api_url}") # 记录请求的 URL
+
+ max_retries = 3
+ base_wait_time = 15
+
+ async with aiohttp.ClientSession() as session:
+ for retry in range(max_retries):
+ try:
+ async with session.post(api_url, headers=headers, json=data) as response:
+ if response.status == 429:
+ wait_time = base_wait_time * (2**retry) # 指数退避
+ logger.warning(f"遇到请求限制(429),等待{wait_time}秒后重试...")
+ await asyncio.sleep(wait_time)
+ continue
+
+ response.raise_for_status() # 检查其他响应状态
+
+ result = await response.json()
+ if "choices" in result and len(result["choices"]) > 0:
+ content = result["choices"][0]["message"]["content"]
+ reasoning_content = result["choices"][0]["message"].get("reasoning_content", "")
+ return content, reasoning_content
+ return "没有返回结果", ""
+
+ except Exception as e:
+ if retry < max_retries - 1: # 如果还有重试机会
+ wait_time = base_wait_time * (2**retry)
+ logger.error(f"[回复]请求失败,等待{wait_time}秒后重试... 错误: {str(e)}")
+ await asyncio.sleep(wait_time)
+ else:
+ logger.error(f"请求失败: {str(e)}")
+ return f"请求失败: {str(e)}", ""
+
+ logger.error("达到最大重试次数,请求仍然失败")
+ return "达到最大重试次数,请求仍然失败", ""
diff --git a/src/plugins/schedule/schedule_generator copy.py b/src/plugins/schedule/schedule_generator copy.py
new file mode 100644
index 000000000..eff0a08d6
--- /dev/null
+++ b/src/plugins/schedule/schedule_generator copy.py
@@ -0,0 +1,191 @@
+import datetime
+import json
+import re
+import os
+import sys
+from typing import Dict, Union
+
+
+# 添加项目根目录到 Python 路径
+root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
+sys.path.append(root_path)
+
+from src.common.database import db # noqa: E402
+from src.common.logger import get_module_logger # noqa: E402
+from src.plugins.schedule.offline_llm import LLMModel # noqa: E402
+from src.plugins.chat.config import global_config # noqa: E402
+
+logger = get_module_logger("scheduler")
+
+
+class ScheduleGenerator:
+ enable_output: bool = True
+
+ def __init__(self):
+ # 使用离线LLM模型
+ self.llm_scheduler = LLMModel(model_name="Pro/deepseek-ai/DeepSeek-V3", temperature=0.9)
+ self.today_schedule_text = ""
+ self.today_schedule = {}
+ self.tomorrow_schedule_text = ""
+ self.tomorrow_schedule = {}
+ self.yesterday_schedule_text = ""
+ self.yesterday_schedule = {}
+
+ async def initialize(self):
+ today = datetime.datetime.now()
+ tomorrow = datetime.datetime.now() + datetime.timedelta(days=1)
+ yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
+
+ self.today_schedule_text, self.today_schedule = await self.generate_daily_schedule(target_date=today)
+ self.tomorrow_schedule_text, self.tomorrow_schedule = await self.generate_daily_schedule(
+ target_date=tomorrow, read_only=True
+ )
+ self.yesterday_schedule_text, self.yesterday_schedule = await self.generate_daily_schedule(
+ target_date=yesterday, read_only=True
+ )
+
+ async def generate_daily_schedule(
+ self, target_date: datetime.datetime = None, read_only: bool = False
+ ) -> Dict[str, str]:
+ date_str = target_date.strftime("%Y-%m-%d")
+ weekday = target_date.strftime("%A")
+
+ schedule_text = str
+
+ existing_schedule = db.schedule.find_one({"date": date_str})
+ if existing_schedule:
+ if self.enable_output:
+ logger.debug(f"{date_str}的日程已存在:")
+ schedule_text = existing_schedule["schedule"]
+ # print(self.schedule_text)
+
+ elif not read_only:
+ logger.debug(f"{date_str}的日程不存在,准备生成新的日程。")
+ prompt = (
+ f"""我是{global_config.BOT_NICKNAME},{global_config.PROMPT_SCHEDULE_GEN},请为我生成{date_str}({weekday})的日程安排,包括:"""
+ + """
+ 1. 早上的学习和工作安排
+ 2. 下午的活动和任务
+ 3. 晚上的计划和休息时间
+ 请按照时间顺序列出具体时间点和对应的活动,用一个时间点而不是时间段来表示时间,用JSON格式返回日程表,
+ 仅返回内容,不要返回注释,不要添加任何markdown或代码块样式,时间采用24小时制,
+ 格式为{"时间": "活动","时间": "活动",...}。"""
+ )
+
+ try:
+ schedule_text, _ = self.llm_scheduler.generate_response(prompt)
+ db.schedule.insert_one({"date": date_str, "schedule": schedule_text})
+ self.enable_output = True
+ except Exception as e:
+ logger.error(f"生成日程失败: {str(e)}")
+ schedule_text = "生成日程时出错了"
+ # print(self.schedule_text)
+ else:
+ if self.enable_output:
+ logger.debug(f"{date_str}的日程不存在。")
+ schedule_text = "忘了"
+
+ return schedule_text, None
+
+ schedule_form = self._parse_schedule(schedule_text)
+ return schedule_text, schedule_form
+
+ def _parse_schedule(self, schedule_text: str) -> Union[bool, Dict[str, str]]:
+ """解析日程文本,转换为时间和活动的字典"""
+ try:
+ reg = r"\{(.|\r|\n)+\}"
+ matched = re.search(reg, schedule_text)[0]
+ schedule_dict = json.loads(matched)
+ return schedule_dict
+ except json.JSONDecodeError:
+ logger.exception("解析日程失败: {}".format(schedule_text))
+ return False
+
+ def _parse_time(self, time_str: str) -> str:
+ """解析时间字符串,转换为时间"""
+ return datetime.datetime.strptime(time_str, "%H:%M")
+
+ def get_current_task(self) -> str:
+ """获取当前时间应该进行的任务"""
+ current_time = datetime.datetime.now().strftime("%H:%M")
+
+ # 找到最接近当前时间的任务
+ closest_time = None
+ min_diff = float("inf")
+
+ # 检查今天的日程
+ if not self.today_schedule:
+ return "摸鱼"
+ for time_str in self.today_schedule.keys():
+ diff = abs(self._time_diff(current_time, time_str))
+ if closest_time is None or diff < min_diff:
+ closest_time = time_str
+ min_diff = diff
+
+ # 检查昨天的日程中的晚间任务
+ if self.yesterday_schedule:
+ for time_str in self.yesterday_schedule.keys():
+ if time_str >= "20:00": # 只考虑晚上8点之后的任务
+ # 计算与昨天这个时间点的差异(需要加24小时)
+ diff = abs(self._time_diff(current_time, time_str))
+ if diff < min_diff:
+ closest_time = time_str
+ min_diff = diff
+ return closest_time, self.yesterday_schedule[closest_time]
+
+ if closest_time:
+ return closest_time, self.today_schedule[closest_time]
+ return "摸鱼"
+
+ def _time_diff(self, time1: str, time2: str) -> int:
+ """计算两个时间字符串之间的分钟差"""
+ if time1 == "24:00":
+ time1 = "23:59"
+ if time2 == "24:00":
+ time2 = "23:59"
+ t1 = datetime.datetime.strptime(time1, "%H:%M")
+ t2 = datetime.datetime.strptime(time2, "%H:%M")
+ diff = int((t2 - t1).total_seconds() / 60)
+ # 考虑时间的循环性
+ if diff < -720:
+ diff += 1440 # 加一天的分钟
+ elif diff > 720:
+ diff -= 1440 # 减一天的分钟
+ # print(f"时间1[{time1}]: 时间2[{time2}],差值[{diff}]分钟")
+ return diff
+
+ def print_schedule(self):
+ """打印完整的日程安排"""
+ if not self._parse_schedule(self.today_schedule_text):
+ logger.warning("今日日程有误,将在下次运行时重新生成")
+ db.schedule.delete_one({"date": datetime.datetime.now().strftime("%Y-%m-%d")})
+ else:
+ logger.info("=== 今日日程安排 ===")
+ for time_str, activity in self.today_schedule.items():
+ logger.info(f"时间[{time_str}]: 活动[{activity}]")
+ logger.info("==================")
+ self.enable_output = False
+
+
+async def main():
+ # 使用示例
+ scheduler = ScheduleGenerator()
+ await scheduler.initialize()
+ scheduler.print_schedule()
+ print("\n当前任务:")
+ print(await scheduler.get_current_task())
+
+ print("昨天日程:")
+ print(scheduler.yesterday_schedule)
+ print("今天日程:")
+ print(scheduler.today_schedule)
+ print("明天日程:")
+ print(scheduler.tomorrow_schedule)
+
+# 当作为组件导入时使用的实例
+bot_schedule = ScheduleGenerator()
+
+if __name__ == "__main__":
+ import asyncio
+ # 当直接运行此文件时执行
+ asyncio.run(main())
diff --git a/src/plugins/schedule/schedule_generator.py b/src/plugins/schedule/schedule_generator.py
index 11db6664d..d58211215 100644
--- a/src/plugins/schedule/schedule_generator.py
+++ b/src/plugins/schedule/schedule_generator.py
@@ -5,8 +5,9 @@ from typing import Dict, Union
from nonebot import get_driver
-from src.plugins.chat.config import global_config
+# 添加项目根目录到 Python 路径
+from src.plugins.chat.config import global_config
from ...common.database import db # 使用正确的导入语法
from ..models.utils_model import LLM_request
from src.common.logger import get_module_logger
@@ -165,24 +166,5 @@ class ScheduleGenerator:
logger.info(f"时间[{time_str}]: 活动[{activity}]")
logger.info("==================")
self.enable_output = False
-
-
-# def main():
-# # 使用示例
-# scheduler = ScheduleGenerator()
-# # new_schedule = scheduler.generate_daily_schedule()
-# scheduler.print_schedule()
-# print("\n当前任务:")
-# print(scheduler.get_current_task())
-
-# print("昨天日程:")
-# print(scheduler.yesterday_schedule)
-# print("今天日程:")
-# print(scheduler.today_schedule)
-# print("明天日程:")
-# print(scheduler.tomorrow_schedule)
-
-# if __name__ == "__main__":
-# main()
-
+# 当作为组件导入时使用的实例
bot_schedule = ScheduleGenerator()
diff --git a/template.env b/template.env
index 6791c5842..934a331d0 100644
--- a/template.env
+++ b/template.env
@@ -1,8 +1,6 @@
HOST=127.0.0.1
PORT=8080
-ENABLE_ADVANCE_OUTPUT=false
-
# 插件配置
PLUGINS=["src2.plugins.chat"]
@@ -31,6 +29,7 @@ CHAT_ANY_WHERE_KEY=
SILICONFLOW_KEY=
# 定义日志相关配置
+SIMPLE_OUTPUT=true # 精简控制台输出格式
CONSOLE_LOG_LEVEL=INFO # 自定义日志的默认控制台输出日志级别
FILE_LOG_LEVEL=DEBUG # 自定义日志的默认文件输出日志级别
DEFAULT_CONSOLE_LOG_LEVEL=SUCCESS # 原生日志的控制台输出日志级别(nonebot就是这一类)
diff --git a/template/bot_config_template.toml b/template/bot_config_template.toml
index ec2b5fbd4..e5cf1df86 100644
--- a/template/bot_config_template.toml
+++ b/template/bot_config_template.toml
@@ -1,5 +1,5 @@
[inner]
-version = "0.0.10"
+version = "0.0.11"
#以下是给开发人员阅读的,一般用户不需要阅读
#如果你想要修改配置文件,请在修改后将version的值进行变更
@@ -66,12 +66,15 @@ model_r1_distill_probability = 0.1 # 麦麦回答时选择次要回复模型3
max_response_length = 1024 # 麦麦回答的最大token数
[willing]
-willing_mode = "classical"
-# willing_mode = "dynamic"
-# willing_mode = "custom"
+willing_mode = "classical" # 回复意愿模式 经典模式
+# willing_mode = "dynamic" # 动态模式(可能不兼容)
+# willing_mode = "custom" # 自定义模式(可自行调整
[memory]
build_memory_interval = 2000 # 记忆构建间隔 单位秒 间隔越低,麦麦学习越多,但是冗余信息也会增多
+build_memory_distribution = [4,2,0.6,24,8,0.4] # 记忆构建分布,参数:分布1均值,标准差,权重,分布2均值,标准差,权重
+build_memory_sample_num = 10 # 采样数量,数值越高记忆采样次数越多
+build_memory_sample_length = 20 # 采样长度,数值越高一段记忆内容越丰富
memory_compress_rate = 0.1 # 记忆压缩率 控制记忆精简程度 建议保持默认,调高可以获得更多信息,但是冗余信息也会增多
forget_memory_interval = 1000 # 记忆遗忘间隔 单位秒 间隔越低,麦麦遗忘越频繁,记忆更精简,但更难学习
@@ -109,9 +112,7 @@ tone_error_rate=0.2 # 声调错误概率
word_replace_rate=0.006 # 整词替换概率
[others]
-enable_advance_output = false # 是否启用高级输出
enable_kuuki_read = true # 是否启用读空气功能
-enable_debug_output = false # 是否启用调试输出
enable_friend_chat = false # 是否启用好友聊天
[groups]
@@ -120,9 +121,9 @@ talk_allowed = [
123,
] #可以回复消息的群
talk_frequency_down = [] #降低回复频率的群
-ban_user_id = [] #禁止回复消息的QQ号
+ban_user_id = [] #禁止回复和读取消息的QQ号
-[remote] #测试功能,发送统计信息,主要是看全球有多少只麦麦
+[remote] #发送统计信息,主要是看全球有多少只麦麦
enable = true
diff --git a/webui.py b/webui.py
index b598df7c0..60ffa4805 100644
--- a/webui.py
+++ b/webui.py
@@ -4,11 +4,14 @@ import toml
import signal
import sys
import requests
+
try:
from src.common.logger import get_module_logger
+
logger = get_module_logger("webui")
except ImportError:
from loguru import logger
+
# 检查并创建日志目录
log_dir = "logs/webui"
if not os.path.exists(log_dir):
@@ -24,11 +27,13 @@ import ast
from packaging import version
from decimal import Decimal
+
def signal_handler(signum, frame):
"""处理 Ctrl+C 信号"""
logger.info("收到终止信号,正在关闭 Gradio 服务器...")
sys.exit(0)
+
# 注册信号处理器
signal.signal(signal.SIGINT, signal_handler)
@@ -44,10 +49,10 @@ if not os.path.exists(".env.prod"):
raise FileNotFoundError("环境配置文件 .env.prod 不存在,请检查配置文件路径")
config_data = toml.load("config/bot_config.toml")
-#增加对老版本配置文件支持
+# 增加对老版本配置文件支持
LEGACY_CONFIG_VERSION = version.parse("0.0.1")
-#增加最低支持版本
+# 增加最低支持版本
MIN_SUPPORT_VERSION = version.parse("0.0.8")
MIN_SUPPORT_MAIMAI_VERSION = version.parse("0.5.13")
@@ -66,7 +71,7 @@ else:
HAVE_ONLINE_STATUS_VERSION = version.parse("0.0.9")
-#定义意愿模式可选项
+# 定义意愿模式可选项
WILLING_MODE_CHOICES = [
"classical",
"dynamic",
@@ -74,11 +79,10 @@ WILLING_MODE_CHOICES = [
]
-
-
-#添加WebUI配置文件版本
+# 添加WebUI配置文件版本
WEBUI_VERSION = version.parse("0.0.9")
+
# ==============================================
# env环境配置文件读取部分
def parse_env_config(config_file):
@@ -204,7 +208,7 @@ MODEL_PROVIDER_LIST = parse_model_providers(env_config_data)
# env读取保存结束
# ==============================================
-#获取在线麦麦数量
+# 获取在线麦麦数量
def get_online_maimbot(url="http://hyybuth.xyz:10058/api/clients/details", timeout=10):
@@ -331,19 +335,19 @@ def format_list_to_str(lst):
# env保存函数
def save_trigger(
- server_address,
- server_port,
- final_result_list,
- t_mongodb_host,
- t_mongodb_port,
- t_mongodb_database_name,
- t_console_log_level,
- t_file_log_level,
- t_default_console_log_level,
- t_default_file_log_level,
- t_api_provider,
- t_api_base_url,
- t_api_key,
+ server_address,
+ server_port,
+ final_result_list,
+ t_mongodb_host,
+ t_mongodb_port,
+ t_mongodb_database_name,
+ t_console_log_level,
+ t_file_log_level,
+ t_default_console_log_level,
+ t_default_file_log_level,
+ t_api_provider,
+ t_api_base_url,
+ t_api_key,
):
final_result_lists = format_list_to_str(final_result_list)
env_config_data["env_HOST"] = server_address
@@ -412,12 +416,12 @@ def save_bot_config(t_qqbot_qq, t_nickname, t_nickname_final_result):
# 监听滑块的值变化,确保总和不超过 1,并显示警告
def adjust_personality_greater_probabilities(
- t_personality_1_probability, t_personality_2_probability, t_personality_3_probability
+ t_personality_1_probability, t_personality_2_probability, t_personality_3_probability
):
total = (
- Decimal(str(t_personality_1_probability))
- + Decimal(str(t_personality_2_probability))
- + Decimal(str(t_personality_3_probability))
+ Decimal(str(t_personality_1_probability))
+ + Decimal(str(t_personality_2_probability))
+ + Decimal(str(t_personality_3_probability))
)
if total > Decimal("1.0"):
warning_message = (
@@ -428,12 +432,12 @@ def adjust_personality_greater_probabilities(
def adjust_personality_less_probabilities(
- t_personality_1_probability, t_personality_2_probability, t_personality_3_probability
+ t_personality_1_probability, t_personality_2_probability, t_personality_3_probability
):
total = (
- Decimal(str(t_personality_1_probability))
- + Decimal(str(t_personality_2_probability))
- + Decimal(str(t_personality_3_probability))
+ Decimal(str(t_personality_1_probability))
+ + Decimal(str(t_personality_2_probability))
+ + Decimal(str(t_personality_3_probability))
)
if total < Decimal("1.0"):
warning_message = (
@@ -445,9 +449,7 @@ def adjust_personality_less_probabilities(
def adjust_model_greater_probabilities(t_model_1_probability, t_model_2_probability, t_model_3_probability):
total = (
- Decimal(str(t_model_1_probability)) +
- Decimal(str(t_model_2_probability)) +
- Decimal(str(t_model_3_probability))
+ Decimal(str(t_model_1_probability)) + Decimal(str(t_model_2_probability)) + Decimal(str(t_model_3_probability))
)
if total > Decimal("1.0"):
warning_message = (
@@ -459,9 +461,7 @@ def adjust_model_greater_probabilities(t_model_1_probability, t_model_2_probabil
def adjust_model_less_probabilities(t_model_1_probability, t_model_2_probability, t_model_3_probability):
total = (
- Decimal(str(t_model_1_probability))
- + Decimal(str(t_model_2_probability))
- + Decimal(str(t_model_3_probability))
+ Decimal(str(t_model_1_probability)) + Decimal(str(t_model_2_probability)) + Decimal(str(t_model_3_probability))
)
if total < Decimal("1.0"):
warning_message = (
@@ -474,13 +474,13 @@ def adjust_model_less_probabilities(t_model_1_probability, t_model_2_probability
# ==============================================
# 人格保存函数
def save_personality_config(
- t_prompt_personality_1,
- t_prompt_personality_2,
- t_prompt_personality_3,
- t_prompt_schedule,
- t_personality_1_probability,
- t_personality_2_probability,
- t_personality_3_probability,
+ t_prompt_personality_1,
+ t_prompt_personality_2,
+ t_prompt_personality_3,
+ t_prompt_schedule,
+ t_personality_1_probability,
+ t_personality_2_probability,
+ t_personality_3_probability,
):
# 保存人格提示词
config_data["personality"]["prompt_personality"][0] = t_prompt_personality_1
@@ -501,20 +501,20 @@ def save_personality_config(
def save_message_and_emoji_config(
- t_min_text_length,
- t_max_context_size,
- t_emoji_chance,
- t_thinking_timeout,
- t_response_willing_amplifier,
- t_response_interested_rate_amplifier,
- t_down_frequency_rate,
- t_ban_words_final_result,
- t_ban_msgs_regex_final_result,
- t_check_interval,
- t_register_interval,
- t_auto_save,
- t_enable_check,
- t_check_prompt,
+ t_min_text_length,
+ t_max_context_size,
+ t_emoji_chance,
+ t_thinking_timeout,
+ t_response_willing_amplifier,
+ t_response_interested_rate_amplifier,
+ t_down_frequency_rate,
+ t_ban_words_final_result,
+ t_ban_msgs_regex_final_result,
+ t_check_interval,
+ t_register_interval,
+ t_auto_save,
+ t_enable_check,
+ t_check_prompt,
):
config_data["message"]["min_text_length"] = t_min_text_length
config_data["message"]["max_context_size"] = t_max_context_size
@@ -536,27 +536,27 @@ def save_message_and_emoji_config(
def save_response_model_config(
- t_willing_mode,
- t_model_r1_probability,
- t_model_r2_probability,
- t_model_r3_probability,
- t_max_response_length,
- t_model1_name,
- t_model1_provider,
- t_model1_pri_in,
- t_model1_pri_out,
- t_model2_name,
- t_model2_provider,
- t_model3_name,
- t_model3_provider,
- t_emotion_model_name,
- t_emotion_model_provider,
- t_topic_judge_model_name,
- t_topic_judge_model_provider,
- t_summary_by_topic_model_name,
- t_summary_by_topic_model_provider,
- t_vlm_model_name,
- t_vlm_model_provider,
+ t_willing_mode,
+ t_model_r1_probability,
+ t_model_r2_probability,
+ t_model_r3_probability,
+ t_max_response_length,
+ t_model1_name,
+ t_model1_provider,
+ t_model1_pri_in,
+ t_model1_pri_out,
+ t_model2_name,
+ t_model2_provider,
+ t_model3_name,
+ t_model3_provider,
+ t_emotion_model_name,
+ t_emotion_model_provider,
+ t_topic_judge_model_name,
+ t_topic_judge_model_provider,
+ t_summary_by_topic_model_name,
+ t_summary_by_topic_model_provider,
+ t_vlm_model_name,
+ t_vlm_model_provider,
):
if PARSED_CONFIG_VERSION >= version.parse("0.0.10"):
config_data["willing"]["willing_mode"] = t_willing_mode
@@ -586,15 +586,15 @@ def save_response_model_config(
def save_memory_mood_config(
- t_build_memory_interval,
- t_memory_compress_rate,
- t_forget_memory_interval,
- t_memory_forget_time,
- t_memory_forget_percentage,
- t_memory_ban_words_final_result,
- t_mood_update_interval,
- t_mood_decay_rate,
- t_mood_intensity_factor,
+ t_build_memory_interval,
+ t_memory_compress_rate,
+ t_forget_memory_interval,
+ t_memory_forget_time,
+ t_memory_forget_percentage,
+ t_memory_ban_words_final_result,
+ t_mood_update_interval,
+ t_mood_decay_rate,
+ t_mood_intensity_factor,
):
config_data["memory"]["build_memory_interval"] = t_build_memory_interval
config_data["memory"]["memory_compress_rate"] = t_memory_compress_rate
@@ -611,17 +611,17 @@ def save_memory_mood_config(
def save_other_config(
- t_keywords_reaction_enabled,
- t_enable_advance_output,
- t_enable_kuuki_read,
- t_enable_debug_output,
- t_enable_friend_chat,
- t_chinese_typo_enabled,
- t_error_rate,
- t_min_freq,
- t_tone_error_rate,
- t_word_replace_rate,
- t_remote_status,
+ t_keywords_reaction_enabled,
+ t_enable_advance_output,
+ t_enable_kuuki_read,
+ t_enable_debug_output,
+ t_enable_friend_chat,
+ t_chinese_typo_enabled,
+ t_error_rate,
+ t_min_freq,
+ t_tone_error_rate,
+ t_word_replace_rate,
+ t_remote_status,
):
config_data["keywords_reaction"]["enable"] = t_keywords_reaction_enabled
config_data["others"]["enable_advance_output"] = t_enable_advance_output
@@ -641,9 +641,9 @@ def save_other_config(
def save_group_config(
- t_talk_allowed_final_result,
- t_talk_frequency_down_final_result,
- t_ban_user_id_final_result,
+ t_talk_allowed_final_result,
+ t_talk_frequency_down_final_result,
+ t_ban_user_id_final_result,
):
config_data["groups"]["talk_allowed"] = t_talk_allowed_final_result
config_data["groups"]["talk_frequency_down"] = t_talk_frequency_down_final_result
@@ -1212,10 +1212,10 @@ with gr.Blocks(title="MaimBot配置文件编辑") as app:
willing_mode = gr.Dropdown(
choices=WILLING_MODE_CHOICES,
value=config_data["willing"]["willing_mode"],
- label="回复意愿模式"
+ label="回复意愿模式",
)
else:
- willing_mode = gr.Textbox(visible=False,value="disabled")
+ willing_mode = gr.Textbox(visible=False, value="disabled")
with gr.Row():
model_r1_probability = gr.Slider(
minimum=0,