better:normal模式表达提取和切换优化

This commit is contained in:
SengokuCola
2025-06-21 18:52:15 +08:00
parent 611e47c14d
commit 5996cd4376
4 changed files with 50 additions and 125 deletions

View File

@@ -285,75 +285,6 @@ class ExpressionSelectorProcessor(BaseProcessor):
logger.error(f"{self.log_prefix} LLM处理表达方式选择时出错: {e}")
return []
async def _select_suitable_expressions_random(self, chat_info: str) -> List[Dict[str, str]]:
"""随机选择表达方式原replyer逻辑"""
# 获取所有表达方式
expression_learner = get_expression_learner()
(
learnt_style_expressions,
learnt_grammar_expressions,
personality_expressions,
) = await expression_learner.get_expression_by_chat_id(self.subheartflow_id)
selected_expressions = []
# 1. learnt_style_expressions相似度匹配选择3条
if learnt_style_expressions:
similar_exprs = self._find_similar_expressions(chat_info, learnt_style_expressions, 3)
for expr in similar_exprs:
if isinstance(expr, dict) and "situation" in expr and "style" in expr:
expr_copy = expr.copy()
expr_copy["type"] = "style"
selected_expressions.append(expr_copy)
# 2. learnt_grammar_expressions加权随机选2条
if learnt_grammar_expressions:
weights = [expr.get("count", 1) for expr in learnt_grammar_expressions]
selected_learnt = weighted_sample_no_replacement(learnt_grammar_expressions, weights, 2)
for expr in selected_learnt:
if isinstance(expr, dict) and "situation" in expr and "style" in expr:
expr_copy = expr.copy()
expr_copy["type"] = "grammar"
selected_expressions.append(expr_copy)
# 3. personality_expressions随机选1条
if personality_expressions:
expr = random.choice(personality_expressions)
if isinstance(expr, dict) and "situation" in expr and "style" in expr:
expr_copy = expr.copy()
expr_copy["type"] = "personality"
selected_expressions.append(expr_copy)
logger.info(f"{self.log_prefix} 随机模式选择了{len(selected_expressions)}个表达方式")
return selected_expressions
def _find_similar_expressions(self, input_text: str, expressions: List[Dict], top_k: int = 3) -> List[Dict]:
"""使用简单的文本匹配找出相似的表达方式简化版避免依赖sklearn"""
if not expressions or not input_text:
return random.sample(expressions, min(top_k, len(expressions))) if expressions else []
# 简单的关键词匹配
scored_expressions = []
input_words = set(input_text.lower().split())
for expr in expressions:
situation = expr.get("situation", "").lower()
situation_words = set(situation.split())
# 计算交集大小作为相似度
similarity = len(input_words & situation_words)
scored_expressions.append((similarity, expr))
# 按相似度排序
scored_expressions.sort(key=lambda x: x[0], reverse=True)
# 如果没有匹配的,随机选择
if all(score == 0 for score, _ in scored_expressions):
return random.sample(expressions, min(top_k, len(expressions)))
# 返回top_k个最相似的
return [expr for _, expr in scored_expressions[:top_k]]
init_prompt()

View File

@@ -710,35 +710,5 @@ def weighted_sample_no_replacement(items, weights, k) -> list:
return selected
def find_similar_expressions(input_text: str, expressions: List[Dict], top_k: int = 3) -> List[Dict]:
"""使用TF-IDF和余弦相似度找出与输入文本最相似的top_k个表达方式"""
if not expressions:
return []
# 准备文本数据
texts = [expr["situation"] for expr in expressions]
texts.append(input_text) # 添加输入文本
# 使用TF-IDF向量化
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(texts)
# 计算余弦相似度
similarity_matrix = cosine_similarity(tfidf_matrix)
# 获取输入文本的相似度分数(最后一行)
scores = similarity_matrix[-1][:-1] # 排除与自身的相似度
# 获取top_k的索引
top_indices = np.argsort(scores)[::-1][:top_k]
# 获取相似表达
similar_exprs = []
for idx in top_indices:
if scores[idx] > 0: # 只保留有相似度的
similar_exprs.append(expressions[idx])
return similar_exprs
init_prompt()