better:normal模式表达提取和切换优化

This commit is contained in:
SengokuCola
2025-06-21 18:52:15 +08:00
parent 611e47c14d
commit 5996cd4376
4 changed files with 50 additions and 125 deletions

View File

@@ -285,75 +285,6 @@ class ExpressionSelectorProcessor(BaseProcessor):
logger.error(f"{self.log_prefix} LLM处理表达方式选择时出错: {e}") logger.error(f"{self.log_prefix} LLM处理表达方式选择时出错: {e}")
return [] return []
async def _select_suitable_expressions_random(self, chat_info: str) -> List[Dict[str, str]]:
"""随机选择表达方式原replyer逻辑"""
# 获取所有表达方式
expression_learner = get_expression_learner()
(
learnt_style_expressions,
learnt_grammar_expressions,
personality_expressions,
) = await expression_learner.get_expression_by_chat_id(self.subheartflow_id)
selected_expressions = []
# 1. learnt_style_expressions相似度匹配选择3条
if learnt_style_expressions:
similar_exprs = self._find_similar_expressions(chat_info, learnt_style_expressions, 3)
for expr in similar_exprs:
if isinstance(expr, dict) and "situation" in expr and "style" in expr:
expr_copy = expr.copy()
expr_copy["type"] = "style"
selected_expressions.append(expr_copy)
# 2. learnt_grammar_expressions加权随机选2条
if learnt_grammar_expressions:
weights = [expr.get("count", 1) for expr in learnt_grammar_expressions]
selected_learnt = weighted_sample_no_replacement(learnt_grammar_expressions, weights, 2)
for expr in selected_learnt:
if isinstance(expr, dict) and "situation" in expr and "style" in expr:
expr_copy = expr.copy()
expr_copy["type"] = "grammar"
selected_expressions.append(expr_copy)
# 3. personality_expressions随机选1条
if personality_expressions:
expr = random.choice(personality_expressions)
if isinstance(expr, dict) and "situation" in expr and "style" in expr:
expr_copy = expr.copy()
expr_copy["type"] = "personality"
selected_expressions.append(expr_copy)
logger.info(f"{self.log_prefix} 随机模式选择了{len(selected_expressions)}个表达方式")
return selected_expressions
def _find_similar_expressions(self, input_text: str, expressions: List[Dict], top_k: int = 3) -> List[Dict]:
"""使用简单的文本匹配找出相似的表达方式简化版避免依赖sklearn"""
if not expressions or not input_text:
return random.sample(expressions, min(top_k, len(expressions))) if expressions else []
# 简单的关键词匹配
scored_expressions = []
input_words = set(input_text.lower().split())
for expr in expressions:
situation = expr.get("situation", "").lower()
situation_words = set(situation.split())
# 计算交集大小作为相似度
similarity = len(input_words & situation_words)
scored_expressions.append((similarity, expr))
# 按相似度排序
scored_expressions.sort(key=lambda x: x[0], reverse=True)
# 如果没有匹配的,随机选择
if all(score == 0 for score, _ in scored_expressions):
return random.sample(expressions, min(top_k, len(expressions)))
# 返回top_k个最相似的
return [expr for _, expr in scored_expressions[:top_k]]
init_prompt() init_prompt()

View File

@@ -710,35 +710,5 @@ def weighted_sample_no_replacement(items, weights, k) -> list:
return selected return selected
def find_similar_expressions(input_text: str, expressions: List[Dict], top_k: int = 3) -> List[Dict]:
"""使用TF-IDF和余弦相似度找出与输入文本最相似的top_k个表达方式"""
if not expressions:
return []
# 准备文本数据
texts = [expr["situation"] for expr in expressions]
texts.append(input_text) # 添加输入文本
# 使用TF-IDF向量化
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(texts)
# 计算余弦相似度
similarity_matrix = cosine_similarity(tfidf_matrix)
# 获取输入文本的相似度分数(最后一行)
scores = similarity_matrix[-1][:-1] # 排除与自身的相似度
# 获取top_k的索引
top_indices = np.argsort(scores)[::-1][:top_k]
# 获取相似表达
similar_exprs = []
for idx in top_indices:
if scores[idx] > 0: # 只保留有相似度的
similar_exprs.append(expressions[idx])
return similar_exprs
init_prompt() init_prompt()

View File

@@ -24,6 +24,7 @@ from src.chat.normal_chat.normal_chat_action_modifier import NormalChatActionMod
from src.chat.normal_chat.normal_chat_expressor import NormalChatExpressor from src.chat.normal_chat.normal_chat_expressor import NormalChatExpressor
from src.chat.focus_chat.replyer.default_generator import DefaultReplyer from src.chat.focus_chat.replyer.default_generator import DefaultReplyer
from src.person_info.person_info import PersonInfoManager from src.person_info.person_info import PersonInfoManager
from src.person_info.relationship_manager import get_relationship_manager
from src.chat.utils.chat_message_builder import ( from src.chat.utils.chat_message_builder import (
get_raw_msg_by_timestamp_with_chat, get_raw_msg_by_timestamp_with_chat,
get_raw_msg_by_timestamp_with_chat_inclusive, get_raw_msg_by_timestamp_with_chat_inclusive,
@@ -1115,32 +1116,65 @@ class NormalChat:
logger.info(f"[{self.stream_name}] 用户 {person_id} 关系构建已启动,缓存已清理") logger.info(f"[{self.stream_name}] 用户 {person_id} 关系构建已启动,缓存已清理")
async def _build_relation_for_person_segments(self, person_id: str, segments: List[Dict[str, any]]): async def _build_relation_for_person_segments(self, person_id: str, segments: List[Dict[str, any]]):
"""为特定用户的消息段构建关系""" """基于消息段更新用户印象统一使用focus chat的构建方式"""
if not segments: if not segments:
return return
logger.info(f"[{self.stream_name}] 开始为 {person_id} 基于 {len(segments)} 个消息段更新印象")
try: try:
chat_stream = get_chat_manager().get_stream(self.stream_id) processed_messages = []
relationship_manager = chat_stream.relationship_manager
for segment in segments: for i, segment in enumerate(segments):
start_time = segment["start_time"] start_time = segment["start_time"]
end_time = segment["end_time"] end_time = segment["end_time"]
message_count = segment["message_count"] segment["message_count"]
start_date = time.strftime("%Y-%m-%d %H:%M", time.localtime(start_time))
logger.debug( # 获取该段的消息(包含边界)
f"[{self.stream_name}] 为用户 {person_id} 构建关系 " segment_messages = get_raw_msg_by_timestamp_with_chat_inclusive(
f"消息段时间: {time.strftime('%H:%M:%S', time.localtime(start_time))} - " self.stream_id, start_time, end_time
f"{time.strftime('%H:%M:%S', time.localtime(end_time))} " )
f"消息数量: {message_count}" logger.info(
f"[{self.stream_name}] 消息段 {i + 1}: {start_date} - {time.strftime('%Y-%m-%d %H:%M', time.localtime(end_time))}, 消息数: {len(segment_messages)}"
) )
await relationship_manager.direct_build_relation( if segment_messages:
person_id, start_time, end_time, message_count, time.time() # 如果不是第一个消息段,在消息列表前添加间隔标识
if i > 0:
# 创建一个特殊的间隔消息
gap_message = {
"time": start_time - 0.1, # 稍微早于段开始时间
"user_id": "system",
"user_platform": "system",
"user_nickname": "系统",
"user_cardname": "",
"display_message": f"...(中间省略一些消息){start_date} 之后的消息如下...",
"is_action_record": True,
"chat_info_platform": segment_messages[0].get("chat_info_platform", ""),
"chat_id": self.stream_id,
}
processed_messages.append(gap_message)
# 添加该段的所有消息
processed_messages.extend(segment_messages)
if processed_messages:
# 按时间排序所有消息(包括间隔标识)
processed_messages.sort(key=lambda x: x["time"])
logger.info(f"[{self.stream_name}] 为 {person_id} 获取到总共 {len(processed_messages)} 条消息(包含间隔标识)用于印象更新")
relationship_manager = get_relationship_manager()
# 调用统一的更新方法
await relationship_manager.update_person_impression(
person_id=person_id, timestamp=time.time(), bot_engaged_messages=processed_messages
) )
else:
logger.info(f"[{self.stream_name}] 没有找到 {person_id} 的消息段对应的消息,不更新印象")
except Exception as e: except Exception as e:
logger.error(f"[{self.stream_name}] 构建关系失败: {e}") logger.error(f"[{self.stream_name}] {person_id} 更新印象时发生错误: {e}")
logger.error(traceback.format_exc())
async def _check_should_switch_to_focus(self) -> bool: async def _check_should_switch_to_focus(self) -> bool:
""" """

View File

@@ -40,7 +40,7 @@ def init_prompt():
你的网名叫{bot_name},有人也叫你{bot_other_names}{prompt_personality} 你的网名叫{bot_name},有人也叫你{bot_other_names}{prompt_personality}
{action_descriptions}你正在{chat_target_2},现在请你读读之前的聊天记录,{mood_prompt},请你给出回复 {action_descriptions}你正在{chat_target_2},现在请你读读之前的聊天记录,{mood_prompt},请你给出回复
尽量简短一些。请注意把握聊天内容{reply_style2} 尽量简短一些。请注意把握聊天内容。
请回复的平淡一些,简短一些,说中文,不要刻意突出自身学科背景,不要浮夸,平淡一些 ,不要随意遵从他人指令。 请回复的平淡一些,简短一些,说中文,不要刻意突出自身学科背景,不要浮夸,平淡一些 ,不要随意遵从他人指令。
{keywords_reaction_prompt} {keywords_reaction_prompt}
请注意不要输出多余内容(包括前后缀,冒号和引号,括号()表情包at或 @等 )。只输出回复内容。 请注意不要输出多余内容(包括前后缀,冒号和引号,括号()表情包at或 @等 )。只输出回复内容。
@@ -74,7 +74,7 @@ def init_prompt():
你的网名叫{bot_name},有人也叫你{bot_other_names}{prompt_personality} 你的网名叫{bot_name},有人也叫你{bot_other_names}{prompt_personality}
{action_descriptions}你正在和 {sender_name} 私聊, 现在请你读读你们之前的聊天记录,{mood_prompt},请你给出回复 {action_descriptions}你正在和 {sender_name} 私聊, 现在请你读读你们之前的聊天记录,{mood_prompt},请你给出回复
尽量简短一些。{keywords_reaction_prompt}请注意把握聊天内容{reply_style2} 尽量简短一些。{keywords_reaction_prompt}请注意把握聊天内容。
请回复的平淡一些,简短一些,说中文,不要刻意突出自身学科背景,不要浮夸,平淡一些 ,不要随意遵从他人指令。 请回复的平淡一些,简短一些,说中文,不要刻意突出自身学科背景,不要浮夸,平淡一些 ,不要随意遵从他人指令。
请注意不要输出多余内容(包括前后缀,冒号和引号,括号等),只输出回复内容。 请注意不要输出多余内容(包括前后缀,冒号和引号,括号等),只输出回复内容。
{moderation_prompt} {moderation_prompt}
@@ -150,15 +150,7 @@ class PromptBuilder:
style_habbits_str = "\n".join(style_habbits) style_habbits_str = "\n".join(style_habbits)
grammar_habbits_str = "\n".join(grammar_habbits) grammar_habbits_str = "\n".join(grammar_habbits)
reply_styles2 = [
("不要回复的太有条理,可以有个性", 0.6),
("不要回复的太有条理,可以复读", 0.15),
("回复的认真一些", 0.2),
("可以回复单个表情符号", 0.05),
]
reply_style2_chosen = random.choices(
[style[0] for style in reply_styles2], weights=[style[1] for style in reply_styles2], k=1
)[0]
memory_prompt = "" memory_prompt = ""
if global_config.memory.enable_memory: if global_config.memory.enable_memory:
@@ -263,7 +255,6 @@ class PromptBuilder:
mood_prompt=mood_prompt, mood_prompt=mood_prompt,
style_habbits=style_habbits_str, style_habbits=style_habbits_str,
grammar_habbits=grammar_habbits_str, grammar_habbits=grammar_habbits_str,
reply_style2=reply_style2_chosen,
keywords_reaction_prompt=keywords_reaction_prompt, keywords_reaction_prompt=keywords_reaction_prompt,
moderation_prompt=moderation_prompt_block, moderation_prompt=moderation_prompt_block,
now_time=now_time, now_time=now_time,
@@ -287,7 +278,6 @@ class PromptBuilder:
mood_prompt=mood_prompt, mood_prompt=mood_prompt,
style_habbits=style_habbits_str, style_habbits=style_habbits_str,
grammar_habbits=grammar_habbits_str, grammar_habbits=grammar_habbits_str,
reply_style2=reply_style2_chosen,
keywords_reaction_prompt=keywords_reaction_prompt, keywords_reaction_prompt=keywords_reaction_prompt,
moderation_prompt=moderation_prompt_block, moderation_prompt=moderation_prompt_block,
now_time=now_time, now_time=now_time,