secret 神秘小测验加强版

This commit is contained in:
SengokuCola
2025-03-23 00:01:26 +08:00
parent 20a06df9e5
commit 4b6a315b8e
5 changed files with 704 additions and 3 deletions

View File

@@ -23,7 +23,6 @@ import jieba
root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
sys.path.append(root_path)
from src.common.logger import get_module_logger # noqa: E402
from src.common.database import db # noqa E402
from src.plugins.memory_system.offline_llm import LLMModel # noqa E402

View File

@@ -0,0 +1,351 @@
"""
基于聊天记录的人格特征分析系统
"""
from typing import Dict, List
import json
import os
from pathlib import Path
from dotenv import load_dotenv
import sys
import random
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import matplotlib.font_manager as fm
current_dir = Path(__file__).resolve().parent
project_root = current_dir.parent.parent.parent
env_path = project_root / ".env.prod"
root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
sys.path.append(root_path)
from src.plugins.personality.scene import get_scene_by_factor, PERSONALITY_SCENES # noqa: E402
from src.plugins.personality.questionnaire import FACTOR_DESCRIPTIONS # noqa: E402
from src.plugins.personality.offline_llm import LLMModel # noqa: E402
from src.plugins.personality.who_r_u import MessageAnalyzer # noqa: E402
# 加载环境变量
if env_path.exists():
print(f"{env_path} 加载环境变量")
load_dotenv(env_path)
else:
print(f"未找到环境变量文件: {env_path}")
print("将使用默认配置")
class ChatBasedPersonalityEvaluator:
def __init__(self):
self.personality_traits = {"开放性": 0, "严谨性": 0, "外向性": 0, "宜人性": 0, "神经质": 0}
self.scenarios = []
self.message_analyzer = MessageAnalyzer()
self.llm = LLMModel()
self.trait_scores_history = defaultdict(list) # 记录每个特质的得分历史
# 为每个人格特质获取对应的场景
for trait in PERSONALITY_SCENES:
scenes = get_scene_by_factor(trait)
if not scenes:
continue
scene_keys = list(scenes.keys())
selected_scenes = random.sample(scene_keys, min(3, len(scene_keys)))
for scene_key in selected_scenes:
scene = scenes[scene_key]
other_traits = [t for t in PERSONALITY_SCENES if t != trait]
secondary_trait = random.choice(other_traits)
self.scenarios.append({
"场景": scene["scenario"],
"评估维度": [trait, secondary_trait],
"场景编号": scene_key
})
def analyze_chat_context(self, messages: List[Dict]) -> str:
"""
分析一组消息的上下文,生成场景描述
"""
context = ""
for msg in messages:
nickname = msg.get('user_info', {}).get('user_nickname', '未知用户')
content = msg.get('processed_plain_text', msg.get('detailed_plain_text', ''))
if content:
context += f"{nickname}: {content}\n"
return context
def evaluate_chat_response(
self, user_nickname: str, chat_context: str, dimensions: List[str] = None) -> Dict[str, float]:
"""
评估聊天内容在各个人格维度上的得分
"""
# 使用所有维度进行评估
dimensions = list(self.personality_traits.keys())
dimension_descriptions = []
for dim in dimensions:
desc = FACTOR_DESCRIPTIONS.get(dim, "")
if desc:
dimension_descriptions.append(f"- {dim}{desc}")
dimensions_text = "\n".join(dimension_descriptions)
prompt = f"""请根据以下聊天记录,评估"{user_nickname}"在大五人格模型中的维度得分1-6分
聊天记录:
{chat_context}
需要评估的维度说明:
{dimensions_text}
请按照以下格式输出评估结果,注意,你的评价对象是"{user_nickname}"仅输出JSON格式
{{
"开放性": 分数,
"严谨性": 分数,
"外向性": 分数,
"宜人性": 分数,
"神经质": 分数
}}
评分标准:
1 = 非常不符合该维度特征
2 = 比较不符合该维度特征
3 = 有点不符合该维度特征
4 = 有点符合该维度特征
5 = 比较符合该维度特征
6 = 非常符合该维度特征
如果你觉得某个维度没有相关信息或者无法判断请输出0分
请根据聊天记录的内容和语气结合维度说明进行评分。如果维度可以评分确保分数在1-6之间。如果没有体现请输出0分"""
try:
ai_response, _ = self.llm.generate_response(prompt)
start_idx = ai_response.find("{")
end_idx = ai_response.rfind("}") + 1
if start_idx != -1 and end_idx != 0:
json_str = ai_response[start_idx:end_idx]
scores = json.loads(json_str)
return {k: max(0, min(6, float(v))) for k, v in scores.items()}
else:
print("AI响应格式不正确使用默认评分")
return {dim: 0 for dim in dimensions}
except Exception as e:
print(f"评估过程出错:{str(e)}")
return {dim: 0 for dim in dimensions}
def evaluate_user_personality(self, qq_id: str, num_samples: int = 10, context_length: int = 5) -> Dict:
"""
基于用户的聊天记录评估人格特征
Args:
qq_id (str): 用户QQ号
num_samples (int): 要分析的聊天片段数量
context_length (int): 每个聊天片段的上下文长度
Returns:
Dict: 评估结果
"""
# 获取用户的随机消息及其上下文
chat_contexts, user_nickname = self.message_analyzer.get_user_random_contexts(
qq_id, num_messages=num_samples, context_length=context_length)
if not chat_contexts:
return {"error": f"没有找到QQ号 {qq_id} 的消息记录"}
# 初始化评分
final_scores = defaultdict(float)
dimension_counts = defaultdict(int)
chat_samples = []
# 清空历史记录
self.trait_scores_history.clear()
# 分析每个聊天上下文
for chat_context in chat_contexts:
# 评估这段聊天内容的所有维度
scores = self.evaluate_chat_response(user_nickname, chat_context)
# 记录样本
chat_samples.append({
"聊天内容": chat_context,
"评估维度": list(self.personality_traits.keys()),
"评分": scores
})
# 更新总分和历史记录
for dimension, score in scores.items():
if score > 0: # 只统计大于0的有效分数
final_scores[dimension] += score
dimension_counts[dimension] += 1
self.trait_scores_history[dimension].append(score)
# 计算平均分
average_scores = {}
for dimension in self.personality_traits:
if dimension_counts[dimension] > 0:
average_scores[dimension] = round(final_scores[dimension] / dimension_counts[dimension], 2)
else:
average_scores[dimension] = 0 # 如果没有有效分数返回0
# 生成趋势图
self._generate_trend_plot(qq_id, user_nickname)
result = {
"用户QQ": qq_id,
"用户昵称": user_nickname,
"样本数量": len(chat_samples),
"人格特征评分": average_scores,
"维度评估次数": dict(dimension_counts),
"详细样本": chat_samples,
"特质得分历史": {k: v for k, v in self.trait_scores_history.items()}
}
# 保存结果
os.makedirs("results", exist_ok=True)
result_file = f"results/personality_result_{qq_id}.json"
with open(result_file, "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
return result
def _generate_trend_plot(self, qq_id: str, user_nickname: str):
"""
生成人格特质累计平均分变化趋势图
"""
# 查找系统中可用的中文字体
chinese_fonts = []
for f in fm.fontManager.ttflist:
try:
if '' in f.name or 'SC' in f.name or '' in f.name or '' in f.name or '微软' in f.name:
chinese_fonts.append(f.name)
except Exception:
continue
if chinese_fonts:
plt.rcParams['font.sans-serif'] = chinese_fonts + ['SimHei', 'Microsoft YaHei', 'Arial Unicode MS']
else:
# 如果没有找到中文字体,使用默认字体,并将中文昵称转换为拼音或英文
try:
from pypinyin import lazy_pinyin
user_nickname = ''.join(lazy_pinyin(user_nickname))
except ImportError:
user_nickname = "User" # 如果无法转换为拼音,使用默认英文
plt.rcParams['axes.unicode_minus'] = False # 解决负号显示问题
plt.figure(figsize=(12, 6))
plt.style.use('bmh') # 使用内置的bmh样式它有类似seaborn的美观效果
colors = {
"开放性": "#FF9999",
"严谨性": "#66B2FF",
"外向性": "#99FF99",
"宜人性": "#FFCC99",
"神经质": "#FF99CC"
}
# 计算每个维度在每个时间点的累计平均分
cumulative_averages = {}
for trait, scores in self.trait_scores_history.items():
if not scores:
continue
averages = []
total = 0
valid_count = 0
for score in scores:
if score > 0: # 只计算大于0的有效分数
total += score
valid_count += 1
if valid_count > 0:
averages.append(total / valid_count)
else:
# 如果当前分数无效,使用前一个有效的平均分
if averages:
averages.append(averages[-1])
else:
continue # 跳过无效分数
if averages: # 只有在有有效分数的情况下才添加到累计平均中
cumulative_averages[trait] = averages
# 绘制每个维度的累计平均分变化趋势
for trait, averages in cumulative_averages.items():
x = range(1, len(averages) + 1)
plt.plot(x, averages, 'o-', label=trait, color=colors.get(trait), linewidth=2, markersize=8)
# 添加趋势线
z = np.polyfit(x, averages, 1)
p = np.poly1d(z)
plt.plot(x, p(x), '--', color=colors.get(trait), alpha=0.5)
plt.title(f"{user_nickname} 的人格特质累计平均分变化趋势", fontsize=14, pad=20)
plt.xlabel("评估次数", fontsize=12)
plt.ylabel("累计平均分", fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.ylim(0, 7)
plt.tight_layout()
# 保存图表
os.makedirs("results/plots", exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
plot_file = f"results/plots/personality_trend_{qq_id}_{timestamp}.png"
plt.savefig(plot_file, dpi=300, bbox_inches='tight')
plt.close()
def analyze_user_personality(qq_id: str, num_samples: int = 10, context_length: int = 5) -> str:
"""
分析用户人格特征的便捷函数
Args:
qq_id (str): 用户QQ号
num_samples (int): 要分析的聊天片段数量
context_length (int): 每个聊天片段的上下文长度
Returns:
str: 格式化的分析结果
"""
evaluator = ChatBasedPersonalityEvaluator()
result = evaluator.evaluate_user_personality(qq_id, num_samples, context_length)
if "error" in result:
return result["error"]
# 格式化输出
output = f"QQ号 {qq_id} ({result['用户昵称']}) 的人格特征分析结果:\n"
output += "=" * 50 + "\n\n"
output += "人格特征评分:\n"
for trait, score in result["人格特征评分"].items():
if score == 0:
output += f"{trait}: 数据不足,无法判断 (评估次数: {result['维度评估次数'].get(trait, 0)})\n"
else:
output += f"{trait}: {score}/6 (评估次数: {result['维度评估次数'].get(trait, 0)})\n"
# 添加变化趋势描述
if trait in result["特质得分历史"] and len(result["特质得分历史"][trait]) > 1:
scores = [s for s in result["特质得分历史"][trait] if s != 0] # 过滤掉无效分数
if len(scores) > 1: # 确保有足够的有效分数计算趋势
trend = np.polyfit(range(len(scores)), scores, 1)[0]
if abs(trend) < 0.1:
trend_desc = "保持稳定"
elif trend > 0:
trend_desc = "呈上升趋势"
else:
trend_desc = "呈下降趋势"
output += f" 变化趋势: {trend_desc} (斜率: {trend:.2f})\n"
output += f"\n分析样本数量:{result['样本数量']}\n"
output += f"结果已保存至results/personality_result_{qq_id}.json\n"
output += "变化趋势图已保存至results/plots/目录\n"
return output
if __name__ == "__main__":
# 测试代码
# test_qq = "" # 替换为要测试的QQ号
# print(analyze_user_personality(test_qq, num_samples=30, context_length=20))
# test_qq = ""
# print(analyze_user_personality(test_qq, num_samples=30, context_length=20))
test_qq = "1026294844"
print(analyze_user_personality(test_qq, num_samples=30, context_length=30))

View File

@@ -0,0 +1,196 @@
"""
The definition of artificial personality in this paper follows the dispositional para-digm and adapts a definition of
personality developed for humans [17]:
Personality for a human is the "whole and organisation of relatively stable tendencies and patterns of experience and
behaviour within one person (distinguishing it from other persons)". This definition is modified for artificial
personality:
Artificial personality describes the relatively stable tendencies and patterns of behav-iour of an AI-based machine that
can be designed by developers and designers via different modalities, such as language, creating the impression
of individuality of a humanized social agent when users interact with the machine."""
from typing import Dict, List
import json
import os
from pathlib import Path
from dotenv import load_dotenv
import sys
"""
第一种方案:基于情景评估的人格测定
"""
current_dir = Path(__file__).resolve().parent
project_root = current_dir.parent.parent.parent
env_path = project_root / ".env.prod"
root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
sys.path.append(root_path)
from src.plugins.personality.scene import get_scene_by_factor, PERSONALITY_SCENES # noqa: E402
from src.plugins.personality.questionnaire import FACTOR_DESCRIPTIONS # noqa: E402
from src.plugins.personality.offline_llm import LLMModel # noqa: E402
# 加载环境变量
if env_path.exists():
print(f"{env_path} 加载环境变量")
load_dotenv(env_path)
else:
print(f"未找到环境变量文件: {env_path}")
print("将使用默认配置")
class PersonalityEvaluator_direct:
def __init__(self):
self.personality_traits = {"开放性": 0, "严谨性": 0, "外向性": 0, "宜人性": 0, "神经质": 0}
self.scenarios = []
# 为每个人格特质获取对应的场景
for trait in PERSONALITY_SCENES:
scenes = get_scene_by_factor(trait)
if not scenes:
continue
# 从每个维度选择3个场景
import random
scene_keys = list(scenes.keys())
selected_scenes = random.sample(scene_keys, min(3, len(scene_keys)))
for scene_key in selected_scenes:
scene = scenes[scene_key]
# 为每个场景添加评估维度
# 主维度是当前特质,次维度随机选择一个其他特质
other_traits = [t for t in PERSONALITY_SCENES if t != trait]
secondary_trait = random.choice(other_traits)
self.scenarios.append(
{"场景": scene["scenario"], "评估维度": [trait, secondary_trait], "场景编号": scene_key}
)
self.llm = LLMModel()
def evaluate_response(self, scenario: str, response: str, dimensions: List[str]) -> Dict[str, float]:
"""
使用 DeepSeek AI 评估用户对特定场景的反应
"""
# 构建维度描述
dimension_descriptions = []
for dim in dimensions:
desc = FACTOR_DESCRIPTIONS.get(dim, "")
if desc:
dimension_descriptions.append(f"- {dim}{desc}")
dimensions_text = "\n".join(dimension_descriptions)
prompt = f"""请根据以下场景和用户描述评估用户在大五人格模型中的相关维度得分1-6分
场景描述:
{scenario}
用户回应:
{response}
需要评估的维度说明:
{dimensions_text}
请按照以下格式输出评估结果仅输出JSON格式
{{
"{dimensions[0]}": 分数,
"{dimensions[1]}": 分数
}}
评分标准:
1 = 非常不符合该维度特征
2 = 比较不符合该维度特征
3 = 有点不符合该维度特征
4 = 有点符合该维度特征
5 = 比较符合该维度特征
6 = 非常符合该维度特征
请根据用户的回应结合场景和维度说明进行评分。确保分数在1-6之间并给出合理的评估。"""
try:
ai_response, _ = self.llm.generate_response(prompt)
# 尝试从AI响应中提取JSON部分
start_idx = ai_response.find("{")
end_idx = ai_response.rfind("}") + 1
if start_idx != -1 and end_idx != 0:
json_str = ai_response[start_idx:end_idx]
scores = json.loads(json_str)
# 确保所有分数在1-6之间
return {k: max(1, min(6, float(v))) for k, v in scores.items()}
else:
print("AI响应格式不正确使用默认评分")
return {dim: 3.5 for dim in dimensions}
except Exception as e:
print(f"评估过程出错:{str(e)}")
return {dim: 3.5 for dim in dimensions}
def main():
print("欢迎使用人格形象创建程序!")
print("接下来您将面对一系列场景共15个。请根据您想要创建的角色形象描述在该场景下可能的反应。")
print("每个场景都会评估不同的人格维度,最终得出完整的人格特征评估。")
print("评分标准1=非常不符合2=比较不符合3=有点不符合4=有点符合5=比较符合6=非常符合")
print("\n准备好了吗?按回车键开始...")
input()
evaluator = PersonalityEvaluator_direct()
final_scores = {"开放性": 0, "严谨性": 0, "外向性": 0, "宜人性": 0, "神经质": 0}
dimension_counts = {trait: 0 for trait in final_scores.keys()}
for i, scenario_data in enumerate(evaluator.scenarios, 1):
print(f"\n场景 {i}/{len(evaluator.scenarios)} - {scenario_data['场景编号']}:")
print("-" * 50)
print(scenario_data["场景"])
print("\n请描述您的角色在这种情况下会如何反应:")
response = input().strip()
if not response:
print("反应描述不能为空!")
continue
print("\n正在评估您的描述...")
scores = evaluator.evaluate_response(scenario_data["场景"], response, scenario_data["评估维度"])
# 更新最终分数
for dimension, score in scores.items():
final_scores[dimension] += score
dimension_counts[dimension] += 1
print("\n当前评估结果:")
print("-" * 30)
for dimension, score in scores.items():
print(f"{dimension}: {score}/6")
if i < len(evaluator.scenarios):
print("\n按回车键继续下一个场景...")
input()
# 计算平均分
for dimension in final_scores:
if dimension_counts[dimension] > 0:
final_scores[dimension] = round(final_scores[dimension] / dimension_counts[dimension], 2)
print("\n最终人格特征评估结果:")
print("-" * 30)
for trait, score in final_scores.items():
print(f"{trait}: {score}/6")
print(f"测试场景数:{dimension_counts[trait]}")
# 保存结果
result = {"final_scores": final_scores, "dimension_counts": dimension_counts, "scenarios": evaluator.scenarios}
# 确保目录存在
os.makedirs("results", exist_ok=True)
# 保存到文件
with open("results/personality_result.json", "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
print("\n结果已保存到 results/personality_result.json")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,155 @@
import random
import os
import sys
from pathlib import Path
import datetime
from typing import List, Dict, Optional
current_dir = Path(__file__).resolve().parent
project_root = current_dir.parent.parent.parent
env_path = project_root / ".env.prod"
root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
sys.path.append(root_path)
from src.common.database import db # noqa: E402
class MessageAnalyzer:
def __init__(self):
self.messages_collection = db["messages"]
def get_message_context(self, message_id: int, context_length: int = 5) -> Optional[List[Dict]]:
"""
获取指定消息ID的上下文消息列表
Args:
message_id (int): 消息ID
context_length (int): 上下文长度(单侧,总长度为 2*context_length + 1
Returns:
Optional[List[Dict]]: 消息列表如果未找到则返回None
"""
# 从数据库获取指定消息
target_message = self.messages_collection.find_one({"message_id": message_id})
if not target_message:
return None
# 获取该消息的stream_id
stream_id = target_message.get('chat_info', {}).get('stream_id')
if not stream_id:
return None
# 获取同一stream_id的所有消息
stream_messages = list(self.messages_collection.find({
"chat_info.stream_id": stream_id
}).sort("time", 1))
# 找到目标消息在列表中的位置
target_index = None
for i, msg in enumerate(stream_messages):
if msg['message_id'] == message_id:
target_index = i
break
if target_index is None:
return None
# 获取目标消息前后的消息
start_index = max(0, target_index - context_length)
end_index = min(len(stream_messages), target_index + context_length + 1)
return stream_messages[start_index:end_index]
def format_messages(self, messages: List[Dict], target_message_id: Optional[int] = None) -> str:
"""
格式化消息列表为可读字符串
Args:
messages (List[Dict]): 消息列表
target_message_id (Optional[int]): 目标消息ID用于标记
Returns:
str: 格式化的消息字符串
"""
if not messages:
return "没有消息记录"
reply = ""
for msg in messages:
# 消息时间
msg_time = datetime.datetime.fromtimestamp(int(msg['time'])).strftime("%Y-%m-%d %H:%M:%S")
# 获取消息内容
message_text = msg.get('processed_plain_text', msg.get('detailed_plain_text', '无消息内容'))
nickname = msg.get('user_info', {}).get('user_nickname', '未知用户')
# 标记当前消息
is_target = "" if target_message_id and msg['message_id'] == target_message_id else " "
reply += f"{is_target}[{msg_time}] {nickname}: {message_text}\n"
if target_message_id and msg['message_id'] == target_message_id:
reply += " " + "-" * 50 + "\n"
return reply
def get_user_random_contexts(
self, qq_id: str, num_messages: int = 10, context_length: int = 5) -> tuple[List[str], str]: # noqa: E501
"""
获取用户的随机消息及其上下文
Args:
qq_id (str): QQ号
num_messages (int): 要获取的随机消息数量
context_length (int): 每条消息的上下文长度(单侧)
Returns:
tuple[List[str], str]: (每个消息上下文的格式化字符串列表, 用户昵称)
"""
if not qq_id:
return [], ""
# 获取用户所有消息
all_messages = list(self.messages_collection.find({"user_info.user_id": int(qq_id)}))
if not all_messages:
return [], ""
# 获取用户昵称
user_nickname = all_messages[0].get('chat_info', {}).get('user_info', {}).get('user_nickname', '未知用户')
# 随机选择指定数量的消息
selected_messages = random.sample(all_messages, min(num_messages, len(all_messages)))
# 按时间排序
selected_messages.sort(key=lambda x: int(x['time']))
# 存储所有上下文消息
context_list = []
# 获取每条消息的上下文
for msg in selected_messages:
message_id = msg['message_id']
# 获取消息上下文
context_messages = self.get_message_context(message_id, context_length)
if context_messages:
formatted_context = self.format_messages(context_messages, message_id)
context_list.append(formatted_context)
return context_list, user_nickname
if __name__ == "__main__":
# 测试代码
analyzer = MessageAnalyzer()
test_qq = "1026294844" # 替换为要测试的QQ号
print(f"测试QQ号: {test_qq}")
print("-" * 50)
# 获取5条消息每条消息前后各3条上下文
contexts, nickname = analyzer.get_user_random_contexts(test_qq, num_messages=5, context_length=3)
print(f"用户昵称: {nickname}\n")
# 打印每个上下文
for i, context in enumerate(contexts, 1):
print(f"\n随机消息 {i}/{len(contexts)}:")
print("-" * 30)
print(context)
print("=" * 50)

View File

@@ -41,8 +41,8 @@ class WillingManager:
interested_rate = interested_rate * config.response_interested_rate_amplifier
if interested_rate > 0.5:
current_willing += interested_rate - 0.5
if interested_rate > 0.4:
current_willing += interested_rate - 0.3
if is_mentioned_bot and current_willing < 1.0:
current_willing += 1