fix:移除无用功能

This commit is contained in:
SengokuCola
2025-04-16 20:43:46 +08:00
parent 3a1d0c6235
commit 8d88bc475c
8 changed files with 0 additions and 1430 deletions

View File

@@ -1,111 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# from .questionnaire import PERSONALITY_QUESTIONS, FACTOR_DESCRIPTIONS
import os
import sys
from pathlib import Path
import random
current_dir = Path(__file__).resolve().parent
project_root = current_dir.parent.parent.parent
env_path = project_root / ".env"
root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
sys.path.append(root_path)
from src.plugins.personality.questionnaire import PERSONALITY_QUESTIONS, FACTOR_DESCRIPTIONS # noqa: E402
class BigFiveTest:
def __init__(self):
self.questions = PERSONALITY_QUESTIONS
self.factors = FACTOR_DESCRIPTIONS
def run_test(self):
"""运行测试并收集答案"""
print("\n欢迎参加中国大五人格测试!")
print("\n本测试采用六级评分,请根据每个描述与您的符合程度进行打分:")
print("1 = 完全不符合")
print("2 = 比较不符合")
print("3 = 有点不符合")
print("4 = 有点符合")
print("5 = 比较符合")
print("6 = 完全符合")
print("\n请认真阅读每个描述,选择最符合您实际情况的选项。\n")
# 创建题目序号到题目的映射
questions_map = {q["id"]: q for q in self.questions}
# 获取所有题目ID并随机打乱顺序
question_ids = list(questions_map.keys())
random.shuffle(question_ids)
answers = {}
total_questions = len(question_ids)
for i, question_id in enumerate(question_ids, 1):
question = questions_map[question_id]
while True:
try:
print(f"\n[{i}/{total_questions}] {question['content']}")
score = int(input("您的评分1-6: "))
if 1 <= score <= 6:
answers[question_id] = score
break
else:
print("请输入1-6之间的数字")
except ValueError:
print("请输入有效的数字!")
return self.calculate_scores(answers)
def calculate_scores(self, answers):
"""计算各维度得分"""
results = {}
factor_questions = {"外向性": [], "神经质": [], "严谨性": [], "开放性": [], "宜人性": []}
# 将题目按因子分类
for q in self.questions:
factor_questions[q["factor"]].append(q)
# 计算每个维度的得分
for factor, questions in factor_questions.items():
total_score = 0
for q in questions:
score = answers[q["id"]]
# 处理反向计分题目
if q["reverse_scoring"]:
score = 7 - score # 6分量表反向计分为7减原始分
total_score += score
# 计算平均分
avg_score = round(total_score / len(questions), 2)
results[factor] = {"得分": avg_score, "题目数": len(questions), "总分": total_score}
return results
def get_factor_description(self, factor):
"""获取因子的详细描述"""
return self.factors[factor]
def main():
test = BigFiveTest()
results = test.run_test()
print("\n测试结果:")
print("=" * 50)
for factor, data in results.items():
print(f"\n{factor}:")
print(f"平均分: {data['得分']} (总分: {data['总分']}, 题目数: {data['题目数']})")
print("-" * 30)
description = test.get_factor_description(factor)
print("维度说明:", description["description"][:100] + "...")
print("\n特征词:", ", ".join(description["trait_words"]))
print("=" * 50)
if __name__ == "__main__":
main()

View File

@@ -1,353 +0,0 @@
"""
基于聊天记录的人格特征分析系统
"""
from typing import Dict, List
import json
import os
from pathlib import Path
from dotenv import load_dotenv
import sys
import random
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import matplotlib.font_manager as fm
current_dir = Path(__file__).resolve().parent
project_root = current_dir.parent.parent.parent
env_path = project_root / ".env"
root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
sys.path.append(root_path)
from src.plugins.personality.scene import get_scene_by_factor, PERSONALITY_SCENES # noqa: E402
from src.plugins.personality.questionnaire import FACTOR_DESCRIPTIONS # noqa: E402
from src.plugins.personality.offline_llm import LLMModel # noqa: E402
from src.plugins.personality.who_r_u import MessageAnalyzer # noqa: E402
# 加载环境变量
if env_path.exists():
print(f"{env_path} 加载环境变量")
load_dotenv(env_path)
else:
print(f"未找到环境变量文件: {env_path}")
print("将使用默认配置")
class ChatBasedPersonalityEvaluator:
def __init__(self):
self.personality_traits = {"开放性": 0, "严谨性": 0, "外向性": 0, "宜人性": 0, "神经质": 0}
self.scenarios = []
self.message_analyzer = MessageAnalyzer()
self.llm = LLMModel()
self.trait_scores_history = defaultdict(list) # 记录每个特质的得分历史
# 为每个人格特质获取对应的场景
for trait in PERSONALITY_SCENES:
scenes = get_scene_by_factor(trait)
if not scenes:
continue
scene_keys = list(scenes.keys())
selected_scenes = random.sample(scene_keys, min(3, len(scene_keys)))
for scene_key in selected_scenes:
scene = scenes[scene_key]
other_traits = [t for t in PERSONALITY_SCENES if t != trait]
secondary_trait = random.choice(other_traits)
self.scenarios.append(
{"场景": scene["scenario"], "评估维度": [trait, secondary_trait], "场景编号": scene_key}
)
def analyze_chat_context(self, messages: List[Dict]) -> str:
"""
分析一组消息的上下文,生成场景描述
"""
context = ""
for msg in messages:
nickname = msg.get("user_info", {}).get("user_nickname", "未知用户")
content = msg.get("processed_plain_text", msg.get("detailed_plain_text", ""))
if content:
context += f"{nickname}: {content}\n"
return context
def evaluate_chat_response(
self, user_nickname: str, chat_context: str, dimensions: List[str] = None
) -> Dict[str, float]:
"""
评估聊天内容在各个人格维度上的得分
"""
# 使用所有维度进行评估
dimensions = list(self.personality_traits.keys())
dimension_descriptions = []
for dim in dimensions:
desc = FACTOR_DESCRIPTIONS.get(dim, "")
if desc:
dimension_descriptions.append(f"- {dim}{desc}")
dimensions_text = "\n".join(dimension_descriptions)
prompt = f"""请根据以下聊天记录,评估"{user_nickname}"在大五人格模型中的维度得分1-6分
聊天记录:
{chat_context}
需要评估的维度说明:
{dimensions_text}
请按照以下格式输出评估结果,注意,你的评价对象是"{user_nickname}"仅输出JSON格式
{{
"开放性": 分数,
"严谨性": 分数,
"外向性": 分数,
"宜人性": 分数,
"神经质": 分数
}}
评分标准:
1 = 非常不符合该维度特征
2 = 比较不符合该维度特征
3 = 有点不符合该维度特征
4 = 有点符合该维度特征
5 = 比较符合该维度特征
6 = 非常符合该维度特征
如果你觉得某个维度没有相关信息或者无法判断请输出0分
请根据聊天记录的内容和语气结合维度说明进行评分。如果维度可以评分确保分数在1-6之间。如果没有体现请输出0分"""
try:
ai_response, _ = self.llm.generate_response(prompt)
start_idx = ai_response.find("{")
end_idx = ai_response.rfind("}") + 1
if start_idx != -1 and end_idx != 0:
json_str = ai_response[start_idx:end_idx]
scores = json.loads(json_str)
return {k: max(0, min(6, float(v))) for k, v in scores.items()}
else:
print("AI响应格式不正确使用默认评分")
return {dim: 0 for dim in dimensions}
except Exception as e:
print(f"评估过程出错:{str(e)}")
return {dim: 0 for dim in dimensions}
def evaluate_user_personality(self, qq_id: str, num_samples: int = 10, context_length: int = 5) -> Dict:
"""
基于用户的聊天记录评估人格特征
Args:
qq_id (str): 用户QQ号
num_samples (int): 要分析的聊天片段数量
context_length (int): 每个聊天片段的上下文长度
Returns:
Dict: 评估结果
"""
# 获取用户的随机消息及其上下文
chat_contexts, user_nickname = self.message_analyzer.get_user_random_contexts(
qq_id, num_messages=num_samples, context_length=context_length
)
if not chat_contexts:
return {"error": f"没有找到QQ号 {qq_id} 的消息记录"}
# 初始化评分
final_scores = defaultdict(float)
dimension_counts = defaultdict(int)
chat_samples = []
# 清空历史记录
self.trait_scores_history.clear()
# 分析每个聊天上下文
for chat_context in chat_contexts:
# 评估这段聊天内容的所有维度
scores = self.evaluate_chat_response(user_nickname, chat_context)
# 记录样本
chat_samples.append(
{"聊天内容": chat_context, "评估维度": list(self.personality_traits.keys()), "评分": scores}
)
# 更新总分和历史记录
for dimension, score in scores.items():
if score > 0: # 只统计大于0的有效分数
final_scores[dimension] += score
dimension_counts[dimension] += 1
self.trait_scores_history[dimension].append(score)
# 计算平均分
average_scores = {}
for dimension in self.personality_traits:
if dimension_counts[dimension] > 0:
average_scores[dimension] = round(final_scores[dimension] / dimension_counts[dimension], 2)
else:
average_scores[dimension] = 0 # 如果没有有效分数返回0
# 生成趋势图
self._generate_trend_plot(qq_id, user_nickname)
result = {
"用户QQ": qq_id,
"用户昵称": user_nickname,
"样本数量": len(chat_samples),
"人格特征评分": average_scores,
"维度评估次数": dict(dimension_counts),
"详细样本": chat_samples,
"特质得分历史": {k: v for k, v in self.trait_scores_history.items()},
}
# 保存结果
os.makedirs("results", exist_ok=True)
result_file = f"results/personality_result_{qq_id}.json"
with open(result_file, "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
return result
def _generate_trend_plot(self, qq_id: str, user_nickname: str):
"""
生成人格特质累计平均分变化趋势图
"""
# 查找系统中可用的中文字体
chinese_fonts = []
for f in fm.fontManager.ttflist:
try:
if "" in f.name or "SC" in f.name or "" in f.name or "" in f.name or "微软" in f.name:
chinese_fonts.append(f.name)
except Exception:
continue
if chinese_fonts:
plt.rcParams["font.sans-serif"] = chinese_fonts + ["SimHei", "Microsoft YaHei", "Arial Unicode MS"]
else:
# 如果没有找到中文字体,使用默认字体,并将中文昵称转换为拼音或英文
try:
from pypinyin import lazy_pinyin
user_nickname = "".join(lazy_pinyin(user_nickname))
except ImportError:
user_nickname = "User" # 如果无法转换为拼音,使用默认英文
plt.rcParams["axes.unicode_minus"] = False # 解决负号显示问题
plt.figure(figsize=(12, 6))
plt.style.use("bmh") # 使用内置的bmh样式它有类似seaborn的美观效果
colors = {
"开放性": "#FF9999",
"严谨性": "#66B2FF",
"外向性": "#99FF99",
"宜人性": "#FFCC99",
"神经质": "#FF99CC",
}
# 计算每个维度在每个时间点的累计平均分
cumulative_averages = {}
for trait, scores in self.trait_scores_history.items():
if not scores:
continue
averages = []
total = 0
valid_count = 0
for score in scores:
if score > 0: # 只计算大于0的有效分数
total += score
valid_count += 1
if valid_count > 0:
averages.append(total / valid_count)
else:
# 如果当前分数无效,使用前一个有效的平均分
if averages:
averages.append(averages[-1])
else:
continue # 跳过无效分数
if averages: # 只有在有有效分数的情况下才添加到累计平均中
cumulative_averages[trait] = averages
# 绘制每个维度的累计平均分变化趋势
for trait, averages in cumulative_averages.items():
x = range(1, len(averages) + 1)
plt.plot(x, averages, "o-", label=trait, color=colors.get(trait), linewidth=2, markersize=8)
# 添加趋势线
z = np.polyfit(x, averages, 1)
p = np.poly1d(z)
plt.plot(x, p(x), "--", color=colors.get(trait), alpha=0.5)
plt.title(f"{user_nickname} 的人格特质累计平均分变化趋势", fontsize=14, pad=20)
plt.xlabel("评估次数", fontsize=12)
plt.ylabel("累计平均分", fontsize=12)
plt.grid(True, linestyle="--", alpha=0.7)
plt.legend(loc="center left", bbox_to_anchor=(1, 0.5))
plt.ylim(0, 7)
plt.tight_layout()
# 保存图表
os.makedirs("results/plots", exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
plot_file = f"results/plots/personality_trend_{qq_id}_{timestamp}.png"
plt.savefig(plot_file, dpi=300, bbox_inches="tight")
plt.close()
def analyze_user_personality(qq_id: str, num_samples: int = 10, context_length: int = 5) -> str:
"""
分析用户人格特征的便捷函数
Args:
qq_id (str): 用户QQ号
num_samples (int): 要分析的聊天片段数量
context_length (int): 每个聊天片段的上下文长度
Returns:
str: 格式化的分析结果
"""
evaluator = ChatBasedPersonalityEvaluator()
result = evaluator.evaluate_user_personality(qq_id, num_samples, context_length)
if "error" in result:
return result["error"]
# 格式化输出
output = f"QQ号 {qq_id} ({result['用户昵称']}) 的人格特征分析结果:\n"
output += "=" * 50 + "\n\n"
output += "人格特征评分:\n"
for trait, score in result["人格特征评分"].items():
if score == 0:
output += f"{trait}: 数据不足,无法判断 (评估次数: {result['维度评估次数'].get(trait, 0)})\n"
else:
output += f"{trait}: {score}/6 (评估次数: {result['维度评估次数'].get(trait, 0)})\n"
# 添加变化趋势描述
if trait in result["特质得分历史"] and len(result["特质得分历史"][trait]) > 1:
scores = [s for s in result["特质得分历史"][trait] if s != 0] # 过滤掉无效分数
if len(scores) > 1: # 确保有足够的有效分数计算趋势
trend = np.polyfit(range(len(scores)), scores, 1)[0]
if abs(trend) < 0.1:
trend_desc = "保持稳定"
elif trend > 0:
trend_desc = "呈上升趋势"
else:
trend_desc = "呈下降趋势"
output += f" 变化趋势: {trend_desc} (斜率: {trend:.2f})\n"
output += f"\n分析样本数量:{result['样本数量']}\n"
output += f"结果已保存至results/personality_result_{qq_id}.json\n"
output += "变化趋势图已保存至results/plots/目录\n"
return output
if __name__ == "__main__":
# 测试代码
# test_qq = "" # 替换为要测试的QQ号
# print(analyze_user_personality(test_qq, num_samples=30, context_length=20))
# test_qq = ""
# print(analyze_user_personality(test_qq, num_samples=30, context_length=20))
test_qq = "1026294844"
print(analyze_user_personality(test_qq, num_samples=30, context_length=30))

View File

@@ -1,349 +0,0 @@
from typing import Dict
import json
import os
from pathlib import Path
import sys
from datetime import datetime
import random
from scipy import stats # 添加scipy导入用于t检验
current_dir = Path(__file__).resolve().parent
project_root = current_dir.parent.parent.parent
env_path = project_root / ".env"
root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
sys.path.append(root_path)
from src.plugins.personality.big5_test import BigFiveTest # noqa: E402
from src.plugins.personality.renqingziji import PersonalityEvaluator_direct # noqa: E402
from src.plugins.personality.questionnaire import FACTOR_DESCRIPTIONS, PERSONALITY_QUESTIONS # noqa: E402
class CombinedPersonalityTest:
def __init__(self):
self.big5_test = BigFiveTest()
self.scenario_test = PersonalityEvaluator_direct()
self.dimensions = ["开放性", "严谨性", "外向性", "宜人性", "神经质"]
def run_combined_test(self):
"""运行组合测试"""
print("\n=== 人格特征综合评估系统 ===")
print("\n本测试将通过两种方式评估人格特征:")
print("1. 传统问卷测评约40题")
print("2. 情景反应测评15个场景")
print("\n两种测评完成后,将对比分析结果的异同。")
input("\n准备好开始第一部分(问卷测评)了吗?按回车继续...")
# 运行问卷测试
print("\n=== 第一部分:问卷测评 ===")
print("本部分采用六级评分,请根据每个描述与您的符合程度进行打分:")
print("1 = 完全不符合")
print("2 = 比较不符合")
print("3 = 有点不符合")
print("4 = 有点符合")
print("5 = 比较符合")
print("6 = 完全符合")
print("\n重要提示:您可以选择以下两种方式之一来回答问题:")
print("1. 根据您自身的真实情况来回答")
print("2. 根据您想要扮演的角色特征来回答")
print("\n无论选择哪种方式,请保持一致并认真回答每个问题。")
input("\n按回车开始答题...")
questionnaire_results = self.run_questionnaire()
# 转换问卷结果格式以便比较
questionnaire_scores = {factor: data["得分"] for factor, data in questionnaire_results.items()}
# 运行情景测试
print("\n=== 第二部分:情景反应测评 ===")
print("接下来,您将面对一系列具体场景,请描述您在每个场景中可能的反应。")
print("每个场景都会评估不同的人格维度共15个场景。")
print("您可以选择提供自己的真实反应,也可以选择扮演一个您创作的角色来回答。")
input("\n准备好开始了吗?按回车继续...")
scenario_results = self.run_scenario_test()
# 比较和展示结果
self.compare_and_display_results(questionnaire_scores, scenario_results)
# 保存结果
self.save_results(questionnaire_scores, scenario_results)
def run_questionnaire(self):
"""运行问卷测试部分"""
# 创建题目序号到题目的映射
questions_map = {q["id"]: q for q in PERSONALITY_QUESTIONS}
# 获取所有题目ID并随机打乱顺序
question_ids = list(questions_map.keys())
random.shuffle(question_ids)
answers = {}
total_questions = len(question_ids)
for i, question_id in enumerate(question_ids, 1):
question = questions_map[question_id]
while True:
try:
print(f"\n问题 [{i}/{total_questions}]")
print(f"{question['content']}")
score = int(input("您的评分1-6: "))
if 1 <= score <= 6:
answers[question_id] = score
break
else:
print("请输入1-6之间的数字")
except ValueError:
print("请输入有效的数字!")
# 每10题显示一次进度
if i % 10 == 0:
print(f"\n已完成 {i}/{total_questions} 题 ({int(i / total_questions * 100)}%)")
return self.calculate_questionnaire_scores(answers)
def calculate_questionnaire_scores(self, answers):
"""计算问卷测试的维度得分"""
results = {}
factor_questions = {"外向性": [], "神经质": [], "严谨性": [], "开放性": [], "宜人性": []}
# 将题目按因子分类
for q in PERSONALITY_QUESTIONS:
factor_questions[q["factor"]].append(q)
# 计算每个维度的得分
for factor, questions in factor_questions.items():
total_score = 0
for q in questions:
score = answers[q["id"]]
# 处理反向计分题目
if q["reverse_scoring"]:
score = 7 - score # 6分量表反向计分为7减原始分
total_score += score
# 计算平均分
avg_score = round(total_score / len(questions), 2)
results[factor] = {"得分": avg_score, "题目数": len(questions), "总分": total_score}
return results
def run_scenario_test(self):
"""运行情景测试部分"""
final_scores = {"开放性": 0, "严谨性": 0, "外向性": 0, "宜人性": 0, "神经质": 0}
dimension_counts = {trait: 0 for trait in final_scores.keys()}
# 随机打乱场景顺序
scenarios = self.scenario_test.scenarios.copy()
random.shuffle(scenarios)
for i, scenario_data in enumerate(scenarios, 1):
print(f"\n场景 [{i}/{len(scenarios)}] - {scenario_data['场景编号']}")
print("-" * 50)
print(scenario_data["场景"])
print("\n请描述您在这种情况下会如何反应:")
response = input().strip()
if not response:
print("反应描述不能为空!")
continue
print("\n正在评估您的描述...")
scores = self.scenario_test.evaluate_response(scenario_data["场景"], response, scenario_data["评估维度"])
# 更新分数
for dimension, score in scores.items():
final_scores[dimension] += score
dimension_counts[dimension] += 1
# print("\n当前场景评估结果")
# print("-" * 30)
# for dimension, score in scores.items():
# print(f"{dimension}: {score}/6")
# 每5个场景显示一次总进度
if i % 5 == 0:
print(f"\n已完成 {i}/{len(scenarios)} 个场景 ({int(i / len(scenarios) * 100)}%)")
if i < len(scenarios):
input("\n按回车继续下一个场景...")
# 计算平均分
for dimension in final_scores:
if dimension_counts[dimension] > 0:
final_scores[dimension] = round(final_scores[dimension] / dimension_counts[dimension], 2)
return final_scores
def compare_and_display_results(self, questionnaire_scores: Dict, scenario_scores: Dict):
"""比较和展示两种测试的结果"""
print("\n=== 测评结果对比分析 ===")
print("\n" + "=" * 60)
print(f"{'维度':<8} {'问卷得分':>10} {'情景得分':>10} {'差异':>10} {'差异程度':>10}")
print("-" * 60)
# 收集每个维度的得分用于统计分析
questionnaire_values = []
scenario_values = []
diffs = []
for dimension in self.dimensions:
q_score = questionnaire_scores[dimension]
s_score = scenario_scores[dimension]
diff = round(abs(q_score - s_score), 2)
questionnaire_values.append(q_score)
scenario_values.append(s_score)
diffs.append(diff)
# 计算差异程度
diff_level = "" if diff < 0.5 else "" if diff < 1.0 else ""
print(f"{dimension:<8} {q_score:>10.2f} {s_score:>10.2f} {diff:>10.2f} {diff_level:>10}")
print("=" * 60)
# 计算整体统计指标
mean_diff = sum(diffs) / len(diffs)
std_diff = (sum((x - mean_diff) ** 2 for x in diffs) / (len(diffs) - 1)) ** 0.5
# 计算效应量 (Cohen's d)
pooled_std = (
(
sum((x - sum(questionnaire_values) / len(questionnaire_values)) ** 2 for x in questionnaire_values)
+ sum((x - sum(scenario_values) / len(scenario_values)) ** 2 for x in scenario_values)
)
/ (2 * len(self.dimensions) - 2)
) ** 0.5
if pooled_std != 0:
cohens_d = abs(mean_diff / pooled_std)
# 解释效应量
if cohens_d < 0.2:
effect_size = "微小"
elif cohens_d < 0.5:
effect_size = ""
elif cohens_d < 0.8:
effect_size = "中等"
else:
effect_size = ""
# 对所有维度进行整体t检验
t_stat, p_value = stats.ttest_rel(questionnaire_values, scenario_values)
print("\n整体统计分析:")
print(f"平均差异: {mean_diff:.3f}")
print(f"差异标准差: {std_diff:.3f}")
print(f"效应量(Cohen's d): {cohens_d:.3f}")
print(f"效应量大小: {effect_size}")
print(f"t统计量: {t_stat:.3f}")
print(f"p值: {p_value:.3f}")
if p_value < 0.05:
print("结论: 两种测评方法的结果存在显著差异 (p < 0.05)")
else:
print("结论: 两种测评方法的结果无显著差异 (p >= 0.05)")
print("\n维度说明:")
for dimension in self.dimensions:
print(f"\n{dimension}:")
desc = FACTOR_DESCRIPTIONS[dimension]
print(f"定义:{desc['description']}")
print(f"特征词:{', '.join(desc['trait_words'])}")
# 分析显著差异
significant_diffs = []
for dimension in self.dimensions:
diff = abs(questionnaire_scores[dimension] - scenario_scores[dimension])
if diff >= 1.0: # 差异大于等于1分视为显著
significant_diffs.append(
{
"dimension": dimension,
"diff": diff,
"questionnaire": questionnaire_scores[dimension],
"scenario": scenario_scores[dimension],
}
)
if significant_diffs:
print("\n\n显著差异分析:")
print("-" * 40)
for diff in significant_diffs:
print(f"\n{diff['dimension']}维度的测评结果存在显著差异:")
print(f"问卷得分:{diff['questionnaire']:.2f}")
print(f"情景得分:{diff['scenario']:.2f}")
print(f"差异值:{diff['diff']:.2f}")
# 分析可能的原因
if diff["questionnaire"] > diff["scenario"]:
print("可能原因:在问卷中的自我评价较高,但在具体情景中的表现较为保守。")
else:
print("可能原因:在具体情景中表现出更多该维度特征,而在问卷自评时较为保守。")
def save_results(self, questionnaire_scores: Dict, scenario_scores: Dict):
"""保存测试结果"""
results = {
"测试时间": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"问卷测评结果": questionnaire_scores,
"情景测评结果": scenario_scores,
"维度说明": FACTOR_DESCRIPTIONS,
}
# 确保目录存在
os.makedirs("results", exist_ok=True)
# 生成带时间戳的文件名
filename = f"results/personality_combined_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
# 保存到文件
with open(filename, "w", encoding="utf-8") as f:
json.dump(results, f, ensure_ascii=False, indent=2)
print(f"\n完整的测评结果已保存到:{filename}")
def load_existing_results():
"""检查并加载已有的测试结果"""
results_dir = "results"
if not os.path.exists(results_dir):
return None
# 获取所有personality_combined开头的文件
result_files = [f for f in os.listdir(results_dir) if f.startswith("personality_combined_") and f.endswith(".json")]
if not result_files:
return None
# 按文件修改时间排序,获取最新的结果文件
latest_file = max(result_files, key=lambda f: os.path.getmtime(os.path.join(results_dir, f)))
print(f"\n发现已有的测试结果:{latest_file}")
try:
with open(os.path.join(results_dir, latest_file), "r", encoding="utf-8") as f:
results = json.load(f)
return results
except Exception as e:
print(f"读取结果文件时出错:{str(e)}")
return None
def main():
test = CombinedPersonalityTest()
# 检查是否存在已有结果
existing_results = load_existing_results()
if existing_results:
print("\n=== 使用已有测试结果进行分析 ===")
print(f"测试时间:{existing_results['测试时间']}")
questionnaire_scores = existing_results["问卷测评结果"]
scenario_scores = existing_results["情景测评结果"]
# 直接进行结果对比分析
test.compare_and_display_results(questionnaire_scores, scenario_scores)
else:
print("\n未找到已有的测试结果,开始新的测试...")
test.run_combined_test()
if __name__ == "__main__":
main()

View File

@@ -1,123 +0,0 @@
import asyncio
import os
import time
from typing import Tuple, Union
import aiohttp
import requests
from src.common.logger import get_module_logger
logger = get_module_logger("offline_llm")
class LLMModel:
def __init__(self, model_name="Pro/deepseek-ai/DeepSeek-V3", **kwargs):
self.model_name = model_name
self.params = kwargs
self.api_key = os.getenv("SILICONFLOW_KEY")
self.base_url = os.getenv("SILICONFLOW_BASE_URL")
if not self.api_key or not self.base_url:
raise ValueError("环境变量未正确加载SILICONFLOW_KEY 或 SILICONFLOW_BASE_URL 未设置")
logger.info(f"API URL: {self.base_url}") # 使用 logger 记录 base_url
def generate_response(self, prompt: str) -> Union[str, Tuple[str, str]]:
"""根据输入的提示生成模型的响应"""
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
# 构建请求体
data = {
"model": self.model_name,
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.5,
**self.params,
}
# 发送请求到完整的 chat/completions 端点
api_url = f"{self.base_url.rstrip('/')}/chat/completions"
logger.info(f"Request URL: {api_url}") # 记录请求的 URL
max_retries = 3
base_wait_time = 15 # 基础等待时间(秒)
for retry in range(max_retries):
try:
response = requests.post(api_url, headers=headers, json=data)
if response.status_code == 429:
wait_time = base_wait_time * (2**retry) # 指数退避
logger.warning(f"遇到请求限制(429),等待{wait_time}秒后重试...")
time.sleep(wait_time)
continue
response.raise_for_status() # 检查其他响应状态
result = response.json()
if "choices" in result and len(result["choices"]) > 0:
content = result["choices"][0]["message"]["content"]
reasoning_content = result["choices"][0]["message"].get("reasoning_content", "")
return content, reasoning_content
return "没有返回结果", ""
except Exception as e:
if retry < max_retries - 1: # 如果还有重试机会
wait_time = base_wait_time * (2**retry)
logger.error(f"[回复]请求失败,等待{wait_time}秒后重试... 错误: {str(e)}")
time.sleep(wait_time)
else:
logger.error(f"请求失败: {str(e)}")
return f"请求失败: {str(e)}", ""
logger.error("达到最大重试次数,请求仍然失败")
return "达到最大重试次数,请求仍然失败", ""
async def generate_response_async(self, prompt: str) -> Union[str, Tuple[str, str]]:
"""异步方式根据输入的提示生成模型的响应"""
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
# 构建请求体
data = {
"model": self.model_name,
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.5,
**self.params,
}
# 发送请求到完整的 chat/completions 端点
api_url = f"{self.base_url.rstrip('/')}/chat/completions"
logger.info(f"Request URL: {api_url}") # 记录请求的 URL
max_retries = 3
base_wait_time = 15
async with aiohttp.ClientSession() as session:
for retry in range(max_retries):
try:
async with session.post(api_url, headers=headers, json=data) as response:
if response.status == 429:
wait_time = base_wait_time * (2**retry) # 指数退避
logger.warning(f"遇到请求限制(429),等待{wait_time}秒后重试...")
await asyncio.sleep(wait_time)
continue
response.raise_for_status() # 检查其他响应状态
result = await response.json()
if "choices" in result and len(result["choices"]) > 0:
content = result["choices"][0]["message"]["content"]
reasoning_content = result["choices"][0]["message"].get("reasoning_content", "")
return content, reasoning_content
return "没有返回结果", ""
except Exception as e:
if retry < max_retries - 1: # 如果还有重试机会
wait_time = base_wait_time * (2**retry)
logger.error(f"[回复]请求失败,等待{wait_time}秒后重试... 错误: {str(e)}")
await asyncio.sleep(wait_time)
else:
logger.error(f"请求失败: {str(e)}")
return f"请求失败: {str(e)}", ""
logger.error("达到最大重试次数,请求仍然失败")
return "达到最大重试次数,请求仍然失败", ""

View File

@@ -1,142 +0,0 @@
# 人格测试问卷题目
# 王孟成, 戴晓阳, & 姚树桥. (2011).
# 中国大五人格问卷的初步编制Ⅲ:简式版的制定及信效度检验. 中国临床心理学杂志, 19(04), Article 04.
# 王孟成, 戴晓阳, & 姚树桥. (2010).
# 中国大五人格问卷的初步编制Ⅰ:理论框架与信度分析. 中国临床心理学杂志, 18(05), Article 05.
PERSONALITY_QUESTIONS = [
# 神经质维度 (F1)
{"id": 1, "content": "我常担心有什么不好的事情要发生", "factor": "神经质", "reverse_scoring": False},
{"id": 2, "content": "我常感到害怕", "factor": "神经质", "reverse_scoring": False},
{"id": 3, "content": "有时我觉得自己一无是处", "factor": "神经质", "reverse_scoring": False},
{"id": 4, "content": "我很少感到忧郁或沮丧", "factor": "神经质", "reverse_scoring": True},
{"id": 5, "content": "别人一句漫不经心的话,我常会联系在自己身上", "factor": "神经质", "reverse_scoring": False},
{"id": 6, "content": "在面对压力时,我有种快要崩溃的感觉", "factor": "神经质", "reverse_scoring": False},
{"id": 7, "content": "我常担忧一些无关紧要的事情", "factor": "神经质", "reverse_scoring": False},
{"id": 8, "content": "我常常感到内心不踏实", "factor": "神经质", "reverse_scoring": False},
# 严谨性维度 (F2)
{"id": 9, "content": "在工作上,我常只求能应付过去便可", "factor": "严谨性", "reverse_scoring": True},
{"id": 10, "content": "一旦确定了目标,我会坚持努力地实现它", "factor": "严谨性", "reverse_scoring": False},
{"id": 11, "content": "我常常是仔细考虑之后才做出决定", "factor": "严谨性", "reverse_scoring": False},
{"id": 12, "content": "别人认为我是个慎重的人", "factor": "严谨性", "reverse_scoring": False},
{"id": 13, "content": "做事讲究逻辑和条理是我的一个特点", "factor": "严谨性", "reverse_scoring": False},
{"id": 14, "content": "我喜欢一开头就把事情计划好", "factor": "严谨性", "reverse_scoring": False},
{"id": 15, "content": "我工作或学习很勤奋", "factor": "严谨性", "reverse_scoring": False},
{"id": 16, "content": "我是个倾尽全力做事的人", "factor": "严谨性", "reverse_scoring": False},
# 宜人性维度 (F3)
{
"id": 17,
"content": "尽管人类社会存在着一些阴暗的东西(如战争、罪恶、欺诈),我仍然相信人性总的来说是善良的",
"factor": "宜人性",
"reverse_scoring": False,
},
{"id": 18, "content": "我觉得大部分人基本上是心怀善意的", "factor": "宜人性", "reverse_scoring": False},
{"id": 19, "content": "虽然社会上有骗子,但我觉得大部分人还是可信的", "factor": "宜人性", "reverse_scoring": False},
{"id": 20, "content": "我不太关心别人是否受到不公正的待遇", "factor": "宜人性", "reverse_scoring": True},
{"id": 21, "content": "我时常觉得别人的痛苦与我无关", "factor": "宜人性", "reverse_scoring": True},
{"id": 22, "content": "我常为那些遭遇不幸的人感到难过", "factor": "宜人性", "reverse_scoring": False},
{"id": 23, "content": "我是那种只照顾好自己,不替别人担忧的人", "factor": "宜人性", "reverse_scoring": True},
{"id": 24, "content": "当别人向我诉说不幸时,我常感到难过", "factor": "宜人性", "reverse_scoring": False},
# 开放性维度 (F4)
{"id": 25, "content": "我的想象力相当丰富", "factor": "开放性", "reverse_scoring": False},
{"id": 26, "content": "我头脑中经常充满生动的画面", "factor": "开放性", "reverse_scoring": False},
{"id": 27, "content": "我对许多事情有着很强的好奇心", "factor": "开放性", "reverse_scoring": False},
{"id": 28, "content": "我喜欢冒险", "factor": "开放性", "reverse_scoring": False},
{"id": 29, "content": "我是个勇于冒险,突破常规的人", "factor": "开放性", "reverse_scoring": False},
{"id": 30, "content": "我身上具有别人没有的冒险精神", "factor": "开放性", "reverse_scoring": False},
{
"id": 31,
"content": "我渴望学习一些新东西,即使它们与我的日常生活无关",
"factor": "开放性",
"reverse_scoring": False,
},
{
"id": 32,
"content": "我很愿意也很容易接受那些新事物、新观点、新想法",
"factor": "开放性",
"reverse_scoring": False,
},
# 外向性维度 (F5)
{"id": 33, "content": "我喜欢参加社交与娱乐聚会", "factor": "外向性", "reverse_scoring": False},
{"id": 34, "content": "我对人多的聚会感到乏味", "factor": "外向性", "reverse_scoring": True},
{"id": 35, "content": "我尽量避免参加人多的聚会和嘈杂的环境", "factor": "外向性", "reverse_scoring": True},
{"id": 36, "content": "在热闹的聚会上,我常常表现主动并尽情玩耍", "factor": "外向性", "reverse_scoring": False},
{"id": 37, "content": "有我在的场合一般不会冷场", "factor": "外向性", "reverse_scoring": False},
{"id": 38, "content": "我希望成为领导者而不是被领导者", "factor": "外向性", "reverse_scoring": False},
{"id": 39, "content": "在一个团体中,我希望处于领导地位", "factor": "外向性", "reverse_scoring": False},
{"id": 40, "content": "别人多认为我是一个热情和友好的人", "factor": "外向性", "reverse_scoring": False},
]
# 因子维度说明
FACTOR_DESCRIPTIONS = {
"外向性": {
"description": "反映个体神经系统的强弱和动力特征。外向性主要表现为个体在人际交往和社交活动中的倾向性,"
"包括对社交活动的兴趣、"
"对人群的态度、社交互动中的主动程度以及在群体中的影响力。高分者倾向于积极参与社交活动,乐于与人交往,善于表达自我,"
"并往往在群体中发挥领导作用;低分者则倾向于独处,不喜欢热闹的社交场合,表现出内向、安静的特征。",
"trait_words": ["热情", "活力", "社交", "主动"],
"subfactors": {
"合群性": "个体愿意与他人聚在一起,即接近人群的倾向;高分表现乐群、好交际,低分表现封闭、独处",
"热情": "个体对待别人时所表现出的态度;高分表现热情好客,低分表现冷淡",
"支配性": "个体喜欢指使、操纵他人,倾向于领导别人的特点;高分表现好强、发号施令,低分表现顺从、低调",
"活跃": "个体精力充沛,活跃、主动性等特点;高分表现活跃,低分表现安静",
},
},
"神经质": {
"description": "反映个体情绪的状态和体验内心苦恼的倾向性。这个维度主要关注个体在面对压力、"
"挫折和日常生活挑战时的情绪稳定性和适应能力。它包含了对焦虑、抑郁、愤怒等负面情绪的敏感程度,"
"以及个体对这些情绪的调节和控制能力。高分者容易体验负面情绪,对压力较为敏感,情绪波动较大;"
"低分者则表现出较强的情绪稳定性,能够较好地应对压力和挫折。",
"trait_words": ["稳定", "沉着", "从容", "坚韧"],
"subfactors": {
"焦虑": "个体体验焦虑感的个体差异;高分表现坐立不安,低分表现平静",
"抑郁": "个体体验抑郁情感的个体差异;高分表现郁郁寡欢,低分表现平静",
"敏感多疑": "个体常常关注自己的内心活动,行为和过于意识人对自己的看法、评价;高分表现敏感多疑,"
"低分表现淡定、自信",
"脆弱性": "个体在危机或困难面前无力、脆弱的特点;高分表现无能、易受伤、逃避,低分表现坚强",
"愤怒-敌意": "个体准备体验愤怒,及相关情绪的状态;高分表现暴躁易怒,低分表现平静",
},
},
"严谨性": {
"description": "反映个体在目标导向行为上的组织、坚持和动机特征。这个维度体现了个体在工作、"
"学习等目标性活动中的自我约束和行为管理能力。它涉及到个体的责任感、自律性、计划性、条理性以及完成任务的态度。"
"高分者往往表现出强烈的责任心、良好的组织能力、谨慎的决策风格和持续的努力精神;低分者则可能表现出随意性强、"
"缺乏规划、做事马虎或易放弃的特点。",
"trait_words": ["负责", "自律", "条理", "勤奋"],
"subfactors": {
"责任心": "个体对待任务和他人认真负责,以及对自己承诺的信守;高分表现有责任心、负责任,"
"低分表现推卸责任、逃避处罚",
"自我控制": "个体约束自己的能力,及自始至终的坚持性;高分表现自制、有毅力,低分表现冲动、无毅力",
"审慎性": "个体在采取具体行动前的心理状态;高分表现谨慎、小心,低分表现鲁莽、草率",
"条理性": "个体处理事务和工作的秩序,条理和逻辑性;高分表现整洁、有秩序,低分表现混乱、遗漏",
"勤奋": "个体工作和学习的努力程度及为达到目标而表现出的进取精神;高分表现勤奋、刻苦,低分表现懒散",
},
},
"开放性": {
"description": "反映个体对新异事物、新观念和新经验的接受程度,以及在思维和行为方面的创新倾向。"
"这个维度体现了个体在认知和体验方面的广度、深度和灵活性。它包括对艺术的欣赏能力、对知识的求知欲、想象力的丰富程度,"
"以及对冒险和创新的态度。高分者往往具有丰富的想象力、广泛的兴趣、开放的思维方式和创新的倾向;低分者则倾向于保守、"
"传统,喜欢熟悉和常规的事物。",
"trait_words": ["创新", "好奇", "艺术", "冒险"],
"subfactors": {
"幻想": "个体富于幻想和想象的水平;高分表现想象力丰富,低分表现想象力匮乏",
"审美": "个体对于艺术和美的敏感与热爱程度;高分表现富有艺术气息,低分表现一般对艺术不敏感",
"好奇心": "个体对未知事物的态度;高分表现兴趣广泛、好奇心浓,低分表现兴趣少、无好奇心",
"冒险精神": "个体愿意尝试有风险活动的个体差异;高分表现好冒险,低分表现保守",
"价值观念": "个体对新事物、新观念、怪异想法的态度;高分表现开放、坦然接受新事物,低分则相反",
},
},
"宜人性": {
"description": "反映个体在人际关系中的亲和倾向,体现了对他人的关心、同情和合作意愿。"
"这个维度主要关注个体与他人互动时的态度和行为特征,包括对他人的信任程度、同理心水平、"
"助人意愿以及在人际冲突中的处理方式。高分者通常表现出友善、富有同情心、乐于助人的特质,善于与他人建立和谐关系;"
"低分者则可能表现出较少的人际关注,在社交互动中更注重自身利益,较少考虑他人感受。",
"trait_words": ["友善", "同理", "信任", "合作"],
"subfactors": {
"信任": "个体对他人和/或他人言论的相信程度;高分表现信任他人,低分表现怀疑",
"体贴": "个体对别人的兴趣和需要的关注程度;高分表现体贴、温存,低分表现冷漠、不在乎",
"同情": "个体对处于不利地位的人或物的态度;高分表现富有同情心,低分表现冷漠",
},
},
}

View File

@@ -1,195 +0,0 @@
"""
The definition of artificial personality in this paper follows the dispositional para-digm and adapts a definition of
personality developed for humans [17]:
Personality for a human is the "whole and organisation of relatively stable tendencies and patterns of experience and
behaviour within one person (distinguishing it from other persons)". This definition is modified for artificial
personality:
Artificial personality describes the relatively stable tendencies and patterns of behav-iour of an AI-based machine that
can be designed by developers and designers via different modalities, such as language, creating the impression
of individuality of a humanized social agent when users interact with the machine."""
from typing import Dict, List
import json
import os
from pathlib import Path
from dotenv import load_dotenv
import sys
"""
第一种方案:基于情景评估的人格测定
"""
current_dir = Path(__file__).resolve().parent
project_root = current_dir.parent.parent.parent
env_path = project_root / ".env"
root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
sys.path.append(root_path)
from src.plugins.personality.scene import get_scene_by_factor, PERSONALITY_SCENES # noqa: E402
from src.plugins.personality.questionnaire import FACTOR_DESCRIPTIONS # noqa: E402
from src.plugins.personality.offline_llm import LLMModel # noqa: E402
# 加载环境变量
if env_path.exists():
print(f"{env_path} 加载环境变量")
load_dotenv(env_path)
else:
print(f"未找到环境变量文件: {env_path}")
print("将使用默认配置")
class PersonalityEvaluatorDirect:
def __init__(self):
self.personality_traits = {"开放性": 0, "严谨性": 0, "外向性": 0, "宜人性": 0, "神经质": 0}
self.scenarios = []
# 为每个人格特质获取对应的场景
for trait in PERSONALITY_SCENES:
scenes = get_scene_by_factor(trait)
if not scenes:
continue
# 从每个维度选择3个场景
import random
scene_keys = list(scenes.keys())
selected_scenes = random.sample(scene_keys, min(3, len(scene_keys)))
for scene_key in selected_scenes:
scene = scenes[scene_key]
# 为每个场景添加评估维度
# 主维度是当前特质,次维度随机选择一个其他特质
other_traits = [t for t in PERSONALITY_SCENES if t != trait]
secondary_trait = random.choice(other_traits)
self.scenarios.append(
{"场景": scene["scenario"], "评估维度": [trait, secondary_trait], "场景编号": scene_key}
)
self.llm = LLMModel()
def evaluate_response(self, scenario: str, response: str, dimensions: List[str]) -> Dict[str, float]:
"""
使用 DeepSeek AI 评估用户对特定场景的反应
"""
# 构建维度描述
dimension_descriptions = []
for dim in dimensions:
desc = FACTOR_DESCRIPTIONS.get(dim, "")
if desc:
dimension_descriptions.append(f"- {dim}{desc}")
dimensions_text = "\n".join(dimension_descriptions)
prompt = f"""请根据以下场景和用户描述评估用户在大五人格模型中的相关维度得分1-6分
场景描述:
{scenario}
用户回应:
{response}
需要评估的维度说明:
{dimensions_text}
请按照以下格式输出评估结果仅输出JSON格式
{{
"{dimensions[0]}": 分数,
"{dimensions[1]}": 分数
}}
评分标准:
1 = 非常不符合该维度特征
2 = 比较不符合该维度特征
3 = 有点不符合该维度特征
4 = 有点符合该维度特征
5 = 比较符合该维度特征
6 = 非常符合该维度特征
请根据用户的回应结合场景和维度说明进行评分。确保分数在1-6之间并给出合理的评估。"""
try:
ai_response, _ = self.llm.generate_response(prompt)
# 尝试从AI响应中提取JSON部分
start_idx = ai_response.find("{")
end_idx = ai_response.rfind("}") + 1
if start_idx != -1 and end_idx != 0:
json_str = ai_response[start_idx:end_idx]
scores = json.loads(json_str)
# 确保所有分数在1-6之间
return {k: max(1, min(6, float(v))) for k, v in scores.items()}
else:
print("AI响应格式不正确使用默认评分")
return {dim: 3.5 for dim in dimensions}
except Exception as e:
print(f"评估过程出错:{str(e)}")
return {dim: 3.5 for dim in dimensions}
def main():
print("欢迎使用人格形象创建程序!")
print("接下来您将面对一系列场景共15个。请根据您想要创建的角色形象描述在该场景下可能的反应。")
print("每个场景都会评估不同的人格维度,最终得出完整的人格特征评估。")
print("评分标准1=非常不符合2=比较不符合3=有点不符合4=有点符合5=比较符合6=非常符合")
print("\n准备好了吗?按回车键开始...")
input()
evaluator = PersonalityEvaluatorDirect()
final_scores = {"开放性": 0, "严谨性": 0, "外向性": 0, "宜人性": 0, "神经质": 0}
dimension_counts = {trait: 0 for trait in final_scores.keys()}
for i, scenario_data in enumerate(evaluator.scenarios, 1):
print(f"\n场景 {i}/{len(evaluator.scenarios)} - {scenario_data['场景编号']}:")
print("-" * 50)
print(scenario_data["场景"])
print("\n请描述您的角色在这种情况下会如何反应:")
response = input().strip()
if not response:
print("反应描述不能为空!")
continue
print("\n正在评估您的描述...")
scores = evaluator.evaluate_response(scenario_data["场景"], response, scenario_data["评估维度"])
# 更新最终分数
for dimension, score in scores.items():
final_scores[dimension] += score
dimension_counts[dimension] += 1
print("\n当前评估结果:")
print("-" * 30)
for dimension, score in scores.items():
print(f"{dimension}: {score}/6")
if i < len(evaluator.scenarios):
print("\n按回车键继续下一个场景...")
input()
# 计算平均分
for dimension in final_scores:
if dimension_counts[dimension] > 0:
final_scores[dimension] = round(final_scores[dimension] / dimension_counts[dimension], 2)
print("\n最终人格特征评估结果:")
print("-" * 30)
for trait, score in final_scores.items():
print(f"{trait}: {score}/6")
print(f"测试场景数:{dimension_counts[trait]}")
# 保存结果
result = {"final_scores": final_scores, "dimension_counts": dimension_counts, "scenarios": evaluator.scenarios}
# 确保目录存在
os.makedirs("results", exist_ok=True)
# 保存到文件
with open("results/personality_result.json", "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
print("\n结果已保存到 results/personality_result.json")
if __name__ == "__main__":
main()

View File

@@ -1,156 +0,0 @@
import random
import os
import sys
from pathlib import Path
import datetime
from typing import List, Dict, Optional
current_dir = Path(__file__).resolve().parent
project_root = current_dir.parent.parent.parent
env_path = project_root / ".env"
root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
sys.path.append(root_path)
from src.common.database import db # noqa: E402
class MessageAnalyzer:
def __init__(self):
self.messages_collection = db["messages"]
def get_message_context(self, message_id: int, context_length: int = 5) -> Optional[List[Dict]]:
"""
获取指定消息ID的上下文消息列表
Args:
message_id (int): 消息ID
context_length (int): 上下文长度(单侧,总长度为 2*context_length + 1
Returns:
Optional[List[Dict]]: 消息列表如果未找到则返回None
"""
# 从数据库获取指定消息
target_message = self.messages_collection.find_one({"message_id": message_id})
if not target_message:
return None
# 获取该消息的stream_id
stream_id = target_message.get("chat_info", {}).get("stream_id")
if not stream_id:
return None
# 获取同一stream_id的所有消息
stream_messages = list(self.messages_collection.find({"chat_info.stream_id": stream_id}).sort("time", 1))
# 找到目标消息在列表中的位置
target_index = None
for i, msg in enumerate(stream_messages):
if msg["message_id"] == message_id:
target_index = i
break
if target_index is None:
return None
# 获取目标消息前后的消息
start_index = max(0, target_index - context_length)
end_index = min(len(stream_messages), target_index + context_length + 1)
return stream_messages[start_index:end_index]
def format_messages(self, messages: List[Dict], target_message_id: Optional[int] = None) -> str:
"""
格式化消息列表为可读字符串
Args:
messages (List[Dict]): 消息列表
target_message_id (Optional[int]): 目标消息ID用于标记
Returns:
str: 格式化的消息字符串
"""
if not messages:
return "没有消息记录"
reply = ""
for msg in messages:
# 消息时间
msg_time = datetime.datetime.fromtimestamp(int(msg["time"])).strftime("%Y-%m-%d %H:%M:%S")
# 获取消息内容
message_text = msg.get("processed_plain_text", msg.get("detailed_plain_text", "无消息内容"))
nickname = msg.get("user_info", {}).get("user_nickname", "未知用户")
# 标记当前消息
is_target = "" if target_message_id and msg["message_id"] == target_message_id else " "
reply += f"{is_target}[{msg_time}] {nickname}: {message_text}\n"
if target_message_id and msg["message_id"] == target_message_id:
reply += " " + "-" * 50 + "\n"
return reply
def get_user_random_contexts(
self, qq_id: str, num_messages: int = 10, context_length: int = 5
) -> tuple[List[str], str]: # noqa: E501
"""
获取用户的随机消息及其上下文
Args:
qq_id (str): QQ号
num_messages (int): 要获取的随机消息数量
context_length (int): 每条消息的上下文长度(单侧)
Returns:
tuple[List[str], str]: (每个消息上下文的格式化字符串列表, 用户昵称)
"""
if not qq_id:
return [], ""
# 获取用户所有消息
all_messages = list(self.messages_collection.find({"user_info.user_id": int(qq_id)}))
if not all_messages:
return [], ""
# 获取用户昵称
user_nickname = all_messages[0].get("chat_info", {}).get("user_info", {}).get("user_nickname", "未知用户")
# 随机选择指定数量的消息
selected_messages = random.sample(all_messages, min(num_messages, len(all_messages)))
# 按时间排序
selected_messages.sort(key=lambda x: int(x["time"]))
# 存储所有上下文消息
context_list = []
# 获取每条消息的上下文
for msg in selected_messages:
message_id = msg["message_id"]
# 获取消息上下文
context_messages = self.get_message_context(message_id, context_length)
if context_messages:
formatted_context = self.format_messages(context_messages, message_id)
context_list.append(formatted_context)
return context_list, user_nickname
if __name__ == "__main__":
# 测试代码
analyzer = MessageAnalyzer()
test_qq = "1026294844" # 替换为要测试的QQ号
print(f"测试QQ号: {test_qq}")
print("-" * 50)
# 获取5条消息每条消息前后各3条上下文
contexts, nickname = analyzer.get_user_random_contexts(test_qq, num_messages=5, context_length=3)
print(f"用户昵称: {nickname}\n")
# 打印每个上下文
for i, context in enumerate(contexts, 1):
print(f"\n随机消息 {i}/{len(contexts)}:")
print("-" * 30)
print(context)
print("=" * 50)

View File

@@ -1 +0,0 @@
那是以后会用到的妙妙小工具.jpg