fix:更新群表达方式可视化脚本,更新数据库迁移脚本
This commit is contained in:
@@ -42,21 +42,26 @@ def get_group_name(stream_id):
|
|||||||
return f"{platform}-{stream_id[:8]}"
|
return f"{platform}-{stream_id[:8]}"
|
||||||
return stream_id
|
return stream_id
|
||||||
|
|
||||||
def load_group_expressions(group_dir):
|
def load_group_data(group_dir):
|
||||||
"""加载单个群组的表达方式数据"""
|
"""加载单个群组的数据"""
|
||||||
json_path = Path(group_dir) / "expressions.json"
|
json_path = Path(group_dir) / "expressions.json"
|
||||||
if not json_path.exists():
|
if not json_path.exists():
|
||||||
return []
|
return [], [], []
|
||||||
|
|
||||||
with open(json_path, 'r', encoding='utf-8') as f:
|
with open(json_path, 'r', encoding='utf-8') as f:
|
||||||
data = json.load(f)
|
data = json.load(f)
|
||||||
|
|
||||||
# 将所有表达方式合并成一个文本
|
situations = []
|
||||||
all_expressions = []
|
styles = []
|
||||||
for item in data:
|
combined = []
|
||||||
all_expressions.extend([item['style']] * item['count'])
|
|
||||||
|
|
||||||
return ' '.join(all_expressions)
|
for item in data:
|
||||||
|
count = item['count']
|
||||||
|
situations.extend([item['situation']] * count)
|
||||||
|
styles.extend([item['style']] * count)
|
||||||
|
combined.extend([f"{item['situation']} {item['style']}"] * count)
|
||||||
|
|
||||||
|
return situations, styles, combined
|
||||||
|
|
||||||
def analyze_group_similarity():
|
def analyze_group_similarity():
|
||||||
# 获取所有群组目录
|
# 获取所有群组目录
|
||||||
@@ -67,70 +72,109 @@ def analyze_group_similarity():
|
|||||||
# 获取群组名称
|
# 获取群组名称
|
||||||
group_names = [get_group_name(group_id) for group_id in group_ids]
|
group_names = [get_group_name(group_id) for group_id in group_ids]
|
||||||
|
|
||||||
# 加载所有群组的表达方式
|
# 加载所有群组的数据
|
||||||
group_texts = [load_group_expressions(d) for d in group_dirs]
|
group_situations = []
|
||||||
|
group_styles = []
|
||||||
|
group_combined = []
|
||||||
|
|
||||||
# 使用TF-IDF向量化文本
|
for d in group_dirs:
|
||||||
|
situations, styles, combined = load_group_data(d)
|
||||||
|
group_situations.append(' '.join(situations))
|
||||||
|
group_styles.append(' '.join(styles))
|
||||||
|
group_combined.append(' '.join(combined))
|
||||||
|
|
||||||
|
# 创建TF-IDF向量化器
|
||||||
vectorizer = TfidfVectorizer()
|
vectorizer = TfidfVectorizer()
|
||||||
tfidf_matrix = vectorizer.fit_transform(group_texts)
|
|
||||||
|
|
||||||
# 计算余弦相似度
|
# 计算三种相似度矩阵
|
||||||
similarity_matrix = cosine_similarity(tfidf_matrix)
|
situation_matrix = cosine_similarity(vectorizer.fit_transform(group_situations))
|
||||||
|
style_matrix = cosine_similarity(vectorizer.fit_transform(group_styles))
|
||||||
|
combined_matrix = cosine_similarity(vectorizer.fit_transform(group_combined))
|
||||||
|
|
||||||
# 对相似度矩阵进行对数变换
|
# 对相似度矩阵进行对数变换
|
||||||
log_similarity_matrix = np.log1p(similarity_matrix)
|
log_situation_matrix = np.log1p(situation_matrix)
|
||||||
|
log_style_matrix = np.log1p(style_matrix)
|
||||||
|
log_combined_matrix = np.log1p(combined_matrix)
|
||||||
|
|
||||||
# 创建热力图
|
# 创建一个大图,包含三个子图
|
||||||
plt.figure(figsize=(15, 12))
|
plt.figure(figsize=(45, 12))
|
||||||
sns.heatmap(log_similarity_matrix,
|
|
||||||
|
# 场景相似度热力图
|
||||||
|
plt.subplot(1, 3, 1)
|
||||||
|
sns.heatmap(log_situation_matrix,
|
||||||
xticklabels=group_names,
|
xticklabels=group_names,
|
||||||
yticklabels=group_names,
|
yticklabels=group_names,
|
||||||
cmap='YlOrRd',
|
cmap='YlOrRd',
|
||||||
annot=True,
|
annot=True,
|
||||||
fmt='.2f',
|
fmt='.2f',
|
||||||
vmin=0,
|
vmin=0,
|
||||||
vmax=np.log1p(0.2)) # 调整最大值以匹配对数变换
|
vmax=np.log1p(0.2))
|
||||||
|
plt.title('群组场景相似度热力图 (对数变换)')
|
||||||
|
plt.xticks(rotation=45, ha='right')
|
||||||
|
|
||||||
|
# 表达方式相似度热力图
|
||||||
|
plt.subplot(1, 3, 2)
|
||||||
|
sns.heatmap(log_style_matrix,
|
||||||
|
xticklabels=group_names,
|
||||||
|
yticklabels=group_names,
|
||||||
|
cmap='YlOrRd',
|
||||||
|
annot=True,
|
||||||
|
fmt='.2f',
|
||||||
|
vmin=0,
|
||||||
|
vmax=np.log1p(0.2))
|
||||||
plt.title('群组表达方式相似度热力图 (对数变换)')
|
plt.title('群组表达方式相似度热力图 (对数变换)')
|
||||||
plt.xticks(rotation=45, ha='right')
|
plt.xticks(rotation=45, ha='right')
|
||||||
|
|
||||||
|
# 组合相似度热力图
|
||||||
|
plt.subplot(1, 3, 3)
|
||||||
|
sns.heatmap(log_combined_matrix,
|
||||||
|
xticklabels=group_names,
|
||||||
|
yticklabels=group_names,
|
||||||
|
cmap='YlOrRd',
|
||||||
|
annot=True,
|
||||||
|
fmt='.2f',
|
||||||
|
vmin=0,
|
||||||
|
vmax=np.log1p(0.2))
|
||||||
|
plt.title('群组场景+表达方式相似度热力图 (对数变换)')
|
||||||
|
plt.xticks(rotation=45, ha='right')
|
||||||
|
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
plt.savefig(SCRIPT_DIR / 'group_similarity_heatmap.png', dpi=300, bbox_inches='tight')
|
plt.savefig(SCRIPT_DIR / 'group_similarity_heatmaps.png', dpi=300, bbox_inches='tight')
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
# 创建网络图
|
# 保存匹配详情到文本文件
|
||||||
G = nx.Graph()
|
with open(SCRIPT_DIR / 'group_similarity_details.txt', 'w', encoding='utf-8') as f:
|
||||||
|
f.write('群组相似度详情\n')
|
||||||
|
f.write('=' * 50 + '\n\n')
|
||||||
|
|
||||||
# 添加节点
|
|
||||||
for group_id, group_name in zip(group_ids, group_names):
|
|
||||||
G.add_node(group_id, label=group_name)
|
|
||||||
|
|
||||||
# 添加边(使用对数变换后的相似度)
|
|
||||||
for i in range(len(group_ids)):
|
for i in range(len(group_ids)):
|
||||||
for j in range(i+1, len(group_ids)):
|
for j in range(i+1, len(group_ids)):
|
||||||
if log_similarity_matrix[i][j] > np.log1p(0.05): # 调整阈值
|
if log_combined_matrix[i][j] > np.log1p(0.05):
|
||||||
G.add_edge(group_ids[i], group_ids[j],
|
f.write(f'群组1: {group_names[i]}\n')
|
||||||
weight=log_similarity_matrix[i][j])
|
f.write(f'群组2: {group_names[j]}\n')
|
||||||
|
f.write(f'场景相似度: {situation_matrix[i][j]:.4f}\n')
|
||||||
|
f.write(f'表达方式相似度: {style_matrix[i][j]:.4f}\n')
|
||||||
|
f.write(f'组合相似度: {combined_matrix[i][j]:.4f}\n')
|
||||||
|
|
||||||
# 绘制网络图
|
# 获取两个群组的数据
|
||||||
plt.figure(figsize=(20, 20))
|
situations1, styles1, _ = load_group_data(group_dirs[i])
|
||||||
pos = nx.spring_layout(G, k=1, iterations=50)
|
situations2, styles2, _ = load_group_data(group_dirs[j])
|
||||||
|
|
||||||
# 绘制节点
|
# 找出共同的场景
|
||||||
nx.draw_networkx_nodes(G, pos, node_size=20000, node_color='lightblue', alpha=0.8)
|
common_situations = set(situations1) & set(situations2)
|
||||||
|
if common_situations:
|
||||||
|
f.write('\n共同场景:\n')
|
||||||
|
for situation in common_situations:
|
||||||
|
f.write(f'- {situation}\n')
|
||||||
|
|
||||||
# 绘制边
|
# 找出共同的表达方式
|
||||||
edges = G.edges()
|
common_styles = set(styles1) & set(styles2)
|
||||||
weights = [G[u][v]['weight'] * 40 for u, v in edges] # 增加线条粗细系数
|
if common_styles:
|
||||||
nx.draw_networkx_edges(G, pos, width=weights, alpha=0.6, edge_color='gray')
|
f.write('\n共同表达方式:\n')
|
||||||
|
for style in common_styles:
|
||||||
|
f.write(f'- {style}\n')
|
||||||
|
|
||||||
# 添加标签
|
f.write('\n' + '-' * 50 + '\n\n')
|
||||||
labels = {node: G.nodes[node]['label'] for node in G.nodes()}
|
|
||||||
nx.draw_networkx_labels(G, pos, labels, font_size=20, font_weight='bold')
|
|
||||||
|
|
||||||
plt.title('群组表达方式相似度网络图\n(连线粗细表示对数变换后的相似度)')
|
|
||||||
plt.axis('off')
|
|
||||||
plt.tight_layout()
|
|
||||||
plt.savefig(SCRIPT_DIR / 'group_similarity_network.png', dpi=300, bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
analyze_group_similarity()
|
analyze_group_similarity()
|
||||||
|
|||||||
@@ -182,25 +182,6 @@ class MongoToSQLiteMigrator:
|
|||||||
enable_validation=False, # 禁用数据验证
|
enable_validation=False, # 禁用数据验证
|
||||||
unique_fields=["stream_id"],
|
unique_fields=["stream_id"],
|
||||||
),
|
),
|
||||||
# LLM使用记录迁移配置
|
|
||||||
MigrationConfig(
|
|
||||||
mongo_collection="llm_usage",
|
|
||||||
target_model=LLMUsage,
|
|
||||||
field_mapping={
|
|
||||||
"model_name": "model_name",
|
|
||||||
"user_id": "user_id",
|
|
||||||
"request_type": "request_type",
|
|
||||||
"endpoint": "endpoint",
|
|
||||||
"prompt_tokens": "prompt_tokens",
|
|
||||||
"completion_tokens": "completion_tokens",
|
|
||||||
"total_tokens": "total_tokens",
|
|
||||||
"cost": "cost",
|
|
||||||
"status": "status",
|
|
||||||
"timestamp": "timestamp",
|
|
||||||
},
|
|
||||||
enable_validation=True, # 禁用数据验证"
|
|
||||||
unique_fields=["user_id", "prompt_tokens", "completion_tokens", "total_tokens", "cost"], # 组合唯一性
|
|
||||||
),
|
|
||||||
# 消息迁移配置
|
# 消息迁移配置
|
||||||
MigrationConfig(
|
MigrationConfig(
|
||||||
mongo_collection="messages",
|
mongo_collection="messages",
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ class MuteAction(PluginAction):
|
|||||||
"当有人发了擦边,或者色情内容时使用",
|
"当有人发了擦边,或者色情内容时使用",
|
||||||
"当有人要求禁言自己时使用",
|
"当有人要求禁言自己时使用",
|
||||||
]
|
]
|
||||||
default = True # 默认动作,是否手动添加到使用集
|
default = False # 默认动作,是否手动添加到使用集
|
||||||
associated_types = ["command", "text"]
|
associated_types = ["command", "text"]
|
||||||
# associated_types = ["text"]
|
# associated_types = ["text"]
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user