update:更新脚本
This commit is contained in:
@@ -48,7 +48,7 @@ def load_group_data(group_dir):
|
|||||||
"""加载单个群组的数据"""
|
"""加载单个群组的数据"""
|
||||||
json_path = Path(group_dir) / "expressions.json"
|
json_path = Path(group_dir) / "expressions.json"
|
||||||
if not json_path.exists():
|
if not json_path.exists():
|
||||||
return [], [], []
|
return [], [], [], 0
|
||||||
|
|
||||||
with open(json_path, "r", encoding="utf-8") as f:
|
with open(json_path, "r", encoding="utf-8") as f:
|
||||||
data = json.load(f)
|
data = json.load(f)
|
||||||
@@ -56,6 +56,7 @@ def load_group_data(group_dir):
|
|||||||
situations = []
|
situations = []
|
||||||
styles = []
|
styles = []
|
||||||
combined = []
|
combined = []
|
||||||
|
total_count = sum(item["count"] for item in data)
|
||||||
|
|
||||||
for item in data:
|
for item in data:
|
||||||
count = item["count"]
|
count = item["count"]
|
||||||
@@ -63,41 +64,46 @@ def load_group_data(group_dir):
|
|||||||
styles.extend([item["style"]] * count)
|
styles.extend([item["style"]] * count)
|
||||||
combined.extend([f"{item['situation']} {item['style']}"] * count)
|
combined.extend([f"{item['situation']} {item['style']}"] * count)
|
||||||
|
|
||||||
return situations, styles, combined
|
return situations, styles, combined, total_count
|
||||||
|
|
||||||
|
|
||||||
def analyze_group_similarity():
|
def analyze_group_similarity():
|
||||||
# 获取所有群组目录
|
# 获取所有群组目录
|
||||||
base_dir = Path("data/expression/learnt_style")
|
base_dir = Path("data/expression/learnt_style")
|
||||||
group_dirs = [d for d in base_dir.iterdir() if d.is_dir()]
|
group_dirs = [d for d in base_dir.iterdir() if d.is_dir()]
|
||||||
group_ids = [d.name for d in group_dirs]
|
|
||||||
|
|
||||||
# 获取群组名称
|
# 加载所有群组的数据并过滤
|
||||||
group_names = [get_group_name(group_id) for group_id in group_ids]
|
valid_groups = []
|
||||||
|
valid_names = []
|
||||||
# 加载所有群组的数据
|
valid_situations = []
|
||||||
group_situations = []
|
valid_styles = []
|
||||||
group_styles = []
|
valid_combined = []
|
||||||
group_combined = []
|
|
||||||
|
|
||||||
for d in group_dirs:
|
for d in group_dirs:
|
||||||
situations, styles, combined = load_group_data(d)
|
situations, styles, combined, total_count = load_group_data(d)
|
||||||
group_situations.append(" ".join(situations))
|
if total_count >= 50: # 只保留数据量大于等于50的群组
|
||||||
group_styles.append(" ".join(styles))
|
valid_groups.append(d)
|
||||||
group_combined.append(" ".join(combined))
|
valid_names.append(get_group_name(d.name))
|
||||||
|
valid_situations.append(" ".join(situations))
|
||||||
|
valid_styles.append(" ".join(styles))
|
||||||
|
valid_combined.append(" ".join(combined))
|
||||||
|
|
||||||
|
if not valid_groups:
|
||||||
|
print("没有找到数据量大于等于50的群组")
|
||||||
|
return
|
||||||
|
|
||||||
# 创建TF-IDF向量化器
|
# 创建TF-IDF向量化器
|
||||||
vectorizer = TfidfVectorizer()
|
vectorizer = TfidfVectorizer()
|
||||||
|
|
||||||
# 计算三种相似度矩阵
|
# 计算三种相似度矩阵
|
||||||
situation_matrix = cosine_similarity(vectorizer.fit_transform(group_situations))
|
situation_matrix = cosine_similarity(vectorizer.fit_transform(valid_situations))
|
||||||
style_matrix = cosine_similarity(vectorizer.fit_transform(group_styles))
|
style_matrix = cosine_similarity(vectorizer.fit_transform(valid_styles))
|
||||||
combined_matrix = cosine_similarity(vectorizer.fit_transform(group_combined))
|
combined_matrix = cosine_similarity(vectorizer.fit_transform(valid_combined))
|
||||||
|
|
||||||
# 对相似度矩阵进行对数变换
|
# 对相似度矩阵进行对数变换
|
||||||
log_situation_matrix = np.log1p(situation_matrix)
|
log_situation_matrix = np.log10(situation_matrix * 100 + 1) * 10 / np.log10(4)
|
||||||
log_style_matrix = np.log1p(style_matrix)
|
log_style_matrix = np.log10(style_matrix * 100 + 1) * 10 / np.log10(4)
|
||||||
log_combined_matrix = np.log1p(combined_matrix)
|
log_combined_matrix = np.log10(combined_matrix * 100 + 1) * 10 / np.log10(4)
|
||||||
|
|
||||||
# 创建一个大图,包含三个子图
|
# 创建一个大图,包含三个子图
|
||||||
plt.figure(figsize=(45, 12))
|
plt.figure(figsize=(45, 12))
|
||||||
@@ -106,45 +112,45 @@ def analyze_group_similarity():
|
|||||||
plt.subplot(1, 3, 1)
|
plt.subplot(1, 3, 1)
|
||||||
sns.heatmap(
|
sns.heatmap(
|
||||||
log_situation_matrix,
|
log_situation_matrix,
|
||||||
xticklabels=group_names,
|
xticklabels=valid_names,
|
||||||
yticklabels=group_names,
|
yticklabels=valid_names,
|
||||||
cmap="YlOrRd",
|
cmap="YlOrRd",
|
||||||
annot=True,
|
annot=True,
|
||||||
fmt=".2f",
|
fmt=".1f",
|
||||||
vmin=0,
|
vmin=0,
|
||||||
vmax=np.log1p(0.2),
|
vmax=30,
|
||||||
)
|
)
|
||||||
plt.title("群组场景相似度热力图 (对数变换)")
|
plt.title("群组场景相似度热力图 (对数百分比)")
|
||||||
plt.xticks(rotation=45, ha="right")
|
plt.xticks(rotation=45, ha="right")
|
||||||
|
|
||||||
# 表达方式相似度热力图
|
# 表达方式相似度热力图
|
||||||
plt.subplot(1, 3, 2)
|
plt.subplot(1, 3, 2)
|
||||||
sns.heatmap(
|
sns.heatmap(
|
||||||
log_style_matrix,
|
log_style_matrix,
|
||||||
xticklabels=group_names,
|
xticklabels=valid_names,
|
||||||
yticklabels=group_names,
|
yticklabels=valid_names,
|
||||||
cmap="YlOrRd",
|
cmap="YlOrRd",
|
||||||
annot=True,
|
annot=True,
|
||||||
fmt=".2f",
|
fmt=".1f",
|
||||||
vmin=0,
|
vmin=0,
|
||||||
vmax=np.log1p(0.2),
|
vmax=30,
|
||||||
)
|
)
|
||||||
plt.title("群组表达方式相似度热力图 (对数变换)")
|
plt.title("群组表达方式相似度热力图 (对数百分比)")
|
||||||
plt.xticks(rotation=45, ha="right")
|
plt.xticks(rotation=45, ha="right")
|
||||||
|
|
||||||
# 组合相似度热力图
|
# 组合相似度热力图
|
||||||
plt.subplot(1, 3, 3)
|
plt.subplot(1, 3, 3)
|
||||||
sns.heatmap(
|
sns.heatmap(
|
||||||
log_combined_matrix,
|
log_combined_matrix,
|
||||||
xticklabels=group_names,
|
xticklabels=valid_names,
|
||||||
yticklabels=group_names,
|
yticklabels=valid_names,
|
||||||
cmap="YlOrRd",
|
cmap="YlOrRd",
|
||||||
annot=True,
|
annot=True,
|
||||||
fmt=".2f",
|
fmt=".1f",
|
||||||
vmin=0,
|
vmin=0,
|
||||||
vmax=np.log1p(0.2),
|
vmax=30,
|
||||||
)
|
)
|
||||||
plt.title("群组场景+表达方式相似度热力图 (对数变换)")
|
plt.title("群组场景+表达方式相似度热力图 (对数百分比)")
|
||||||
plt.xticks(rotation=45, ha="right")
|
plt.xticks(rotation=45, ha="right")
|
||||||
|
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
@@ -156,18 +162,18 @@ def analyze_group_similarity():
|
|||||||
f.write("群组相似度详情\n")
|
f.write("群组相似度详情\n")
|
||||||
f.write("=" * 50 + "\n\n")
|
f.write("=" * 50 + "\n\n")
|
||||||
|
|
||||||
for i in range(len(group_ids)):
|
for i in range(len(valid_names)):
|
||||||
for j in range(i + 1, len(group_ids)):
|
for j in range(i + 1, len(valid_names)):
|
||||||
if log_combined_matrix[i][j] > np.log1p(0.05):
|
if log_combined_matrix[i][j] > 50:
|
||||||
f.write(f"群组1: {group_names[i]}\n")
|
f.write(f"群组1: {valid_names[i]}\n")
|
||||||
f.write(f"群组2: {group_names[j]}\n")
|
f.write(f"群组2: {valid_names[j]}\n")
|
||||||
f.write(f"场景相似度: {situation_matrix[i][j]:.4f}\n")
|
f.write(f"场景相似度: {situation_matrix[i][j]:.4f}\n")
|
||||||
f.write(f"表达方式相似度: {style_matrix[i][j]:.4f}\n")
|
f.write(f"表达方式相似度: {style_matrix[i][j]:.4f}\n")
|
||||||
f.write(f"组合相似度: {combined_matrix[i][j]:.4f}\n")
|
f.write(f"组合相似度: {combined_matrix[i][j]:.4f}\n")
|
||||||
|
|
||||||
# 获取两个群组的数据
|
# 获取两个群组的数据
|
||||||
situations1, styles1, _ = load_group_data(group_dirs[i])
|
situations1, styles1, _ = load_group_data(valid_groups[i])
|
||||||
situations2, styles2, _ = load_group_data(group_dirs[j])
|
situations2, styles2, _ = load_group_data(valid_groups[j])
|
||||||
|
|
||||||
# 找出共同的场景
|
# 找出共同的场景
|
||||||
common_situations = set(situations1) & set(situations2)
|
common_situations = set(situations1) & set(situations2)
|
||||||
|
|||||||
@@ -187,10 +187,6 @@ class ActionPlanner(BasePlanner):
|
|||||||
prompt = f"{prompt}"
|
prompt = f"{prompt}"
|
||||||
llm_content, (reasoning_content, _) = await self.planner_llm.generate_response_async(prompt=prompt)
|
llm_content, (reasoning_content, _) = await self.planner_llm.generate_response_async(prompt=prompt)
|
||||||
|
|
||||||
# logger.info(
|
|
||||||
# f"{self.log_prefix}规划器Prompt:\n{prompt}\n\nLLM 原始响应: {llm_content}'"
|
|
||||||
# )
|
|
||||||
|
|
||||||
logger.debug(f"{self.log_prefix}LLM 原始理由响应: {reasoning_content}")
|
logger.debug(f"{self.log_prefix}LLM 原始理由响应: {reasoning_content}")
|
||||||
except Exception as req_e:
|
except Exception as req_e:
|
||||||
logger.error(f"{self.log_prefix}LLM 请求执行失败: {req_e}")
|
logger.error(f"{self.log_prefix}LLM 请求执行失败: {req_e}")
|
||||||
|
|||||||
@@ -115,19 +115,19 @@ content_filtration = false # 是否启用表情包过滤,只有符合该要
|
|||||||
filtration_prompt = "符合公序良俗" # 表情包过滤要求,只有符合该要求的表情包才会被保存
|
filtration_prompt = "符合公序良俗" # 表情包过滤要求,只有符合该要求的表情包才会被保存
|
||||||
|
|
||||||
[memory]
|
[memory]
|
||||||
memory_build_interval = 2000 # 记忆构建间隔 单位秒 间隔越低,麦麦学习越多,但是冗余信息也会增多
|
memory_build_interval = 1000 # 记忆构建间隔 单位秒 间隔越低,麦麦学习越多,但是冗余信息也会增多
|
||||||
memory_build_distribution = [6.0, 3.0, 0.6, 32.0, 12.0, 0.4] # 记忆构建分布,参数:分布1均值,标准差,权重,分布2均值,标准差,权重
|
memory_build_distribution = [6.0, 3.0, 0.6, 32.0, 12.0, 0.4] # 记忆构建分布,参数:分布1均值,标准差,权重,分布2均值,标准差,权重
|
||||||
memory_build_sample_num = 6 # 采样数量,数值越高记忆采样次数越多
|
memory_build_sample_num = 4 # 采样数量,数值越高记忆采样次数越多
|
||||||
memory_build_sample_length = 40 # 采样长度,数值越高一段记忆内容越丰富
|
memory_build_sample_length = 30 # 采样长度,数值越高一段记忆内容越丰富
|
||||||
memory_compress_rate = 0.1 # 记忆压缩率 控制记忆精简程度 建议保持默认,调高可以获得更多信息,但是冗余信息也会增多
|
memory_compress_rate = 0.1 # 记忆压缩率 控制记忆精简程度 建议保持默认,调高可以获得更多信息,但是冗余信息也会增多
|
||||||
|
|
||||||
forget_memory_interval = 1000 # 记忆遗忘间隔 单位秒 间隔越低,麦麦遗忘越频繁,记忆更精简,但更难学习
|
forget_memory_interval = 1000 # 记忆遗忘间隔 单位秒 间隔越低,麦麦遗忘越频繁,记忆更精简,但更难学习
|
||||||
memory_forget_time = 24 #多长时间后的记忆会被遗忘 单位小时
|
memory_forget_time = 24 #多长时间后的记忆会被遗忘 单位小时
|
||||||
memory_forget_percentage = 0.01 # 记忆遗忘比例 控制记忆遗忘程度 越大遗忘越多 建议保持默认
|
memory_forget_percentage = 0.01 # 记忆遗忘比例 控制记忆遗忘程度 越大遗忘越多 建议保持默认
|
||||||
|
|
||||||
consolidate_memory_interval = 2000 # 记忆整合间隔 单位秒 间隔越低,麦麦整合越频繁,记忆更精简
|
consolidate_memory_interval = 1000 # 记忆整合间隔 单位秒 间隔越低,麦麦整合越频繁,记忆更精简
|
||||||
consolidation_similarity_threshold = 0.7 # 相似度阈值
|
consolidation_similarity_threshold = 0.7 # 相似度阈值
|
||||||
consolidation_check_percentage = 0.01 # 检查节点比例
|
consolidation_check_percentage = 0.05 # 检查节点比例
|
||||||
|
|
||||||
#不希望记忆的词,已经记忆的不会受到影响,需要手动清理
|
#不希望记忆的词,已经记忆的不会受到影响,需要手动清理
|
||||||
memory_ban_words = [ "表情包", "图片", "回复", "聊天记录" ]
|
memory_ban_words = [ "表情包", "图片", "回复", "聊天记录" ]
|
||||||
|
|||||||
Reference in New Issue
Block a user