better:normal模式表达提取和切换优化
This commit is contained in:
@@ -285,75 +285,6 @@ class ExpressionSelectorProcessor(BaseProcessor):
|
|||||||
logger.error(f"{self.log_prefix} LLM处理表达方式选择时出错: {e}")
|
logger.error(f"{self.log_prefix} LLM处理表达方式选择时出错: {e}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
async def _select_suitable_expressions_random(self, chat_info: str) -> List[Dict[str, str]]:
|
|
||||||
"""随机选择表达方式(原replyer逻辑)"""
|
|
||||||
|
|
||||||
# 获取所有表达方式
|
|
||||||
expression_learner = get_expression_learner()
|
|
||||||
(
|
|
||||||
learnt_style_expressions,
|
|
||||||
learnt_grammar_expressions,
|
|
||||||
personality_expressions,
|
|
||||||
) = await expression_learner.get_expression_by_chat_id(self.subheartflow_id)
|
|
||||||
|
|
||||||
selected_expressions = []
|
|
||||||
|
|
||||||
# 1. learnt_style_expressions相似度匹配选择3条
|
|
||||||
if learnt_style_expressions:
|
|
||||||
similar_exprs = self._find_similar_expressions(chat_info, learnt_style_expressions, 3)
|
|
||||||
for expr in similar_exprs:
|
|
||||||
if isinstance(expr, dict) and "situation" in expr and "style" in expr:
|
|
||||||
expr_copy = expr.copy()
|
|
||||||
expr_copy["type"] = "style"
|
|
||||||
selected_expressions.append(expr_copy)
|
|
||||||
|
|
||||||
# 2. learnt_grammar_expressions加权随机选2条
|
|
||||||
if learnt_grammar_expressions:
|
|
||||||
weights = [expr.get("count", 1) for expr in learnt_grammar_expressions]
|
|
||||||
selected_learnt = weighted_sample_no_replacement(learnt_grammar_expressions, weights, 2)
|
|
||||||
for expr in selected_learnt:
|
|
||||||
if isinstance(expr, dict) and "situation" in expr and "style" in expr:
|
|
||||||
expr_copy = expr.copy()
|
|
||||||
expr_copy["type"] = "grammar"
|
|
||||||
selected_expressions.append(expr_copy)
|
|
||||||
|
|
||||||
# 3. personality_expressions随机选1条
|
|
||||||
if personality_expressions:
|
|
||||||
expr = random.choice(personality_expressions)
|
|
||||||
if isinstance(expr, dict) and "situation" in expr and "style" in expr:
|
|
||||||
expr_copy = expr.copy()
|
|
||||||
expr_copy["type"] = "personality"
|
|
||||||
selected_expressions.append(expr_copy)
|
|
||||||
|
|
||||||
logger.info(f"{self.log_prefix} 随机模式选择了{len(selected_expressions)}个表达方式")
|
|
||||||
return selected_expressions
|
|
||||||
|
|
||||||
def _find_similar_expressions(self, input_text: str, expressions: List[Dict], top_k: int = 3) -> List[Dict]:
|
|
||||||
"""使用简单的文本匹配找出相似的表达方式(简化版,避免依赖sklearn)"""
|
|
||||||
if not expressions or not input_text:
|
|
||||||
return random.sample(expressions, min(top_k, len(expressions))) if expressions else []
|
|
||||||
|
|
||||||
# 简单的关键词匹配
|
|
||||||
scored_expressions = []
|
|
||||||
input_words = set(input_text.lower().split())
|
|
||||||
|
|
||||||
for expr in expressions:
|
|
||||||
situation = expr.get("situation", "").lower()
|
|
||||||
situation_words = set(situation.split())
|
|
||||||
|
|
||||||
# 计算交集大小作为相似度
|
|
||||||
similarity = len(input_words & situation_words)
|
|
||||||
scored_expressions.append((similarity, expr))
|
|
||||||
|
|
||||||
# 按相似度排序
|
|
||||||
scored_expressions.sort(key=lambda x: x[0], reverse=True)
|
|
||||||
|
|
||||||
# 如果没有匹配的,随机选择
|
|
||||||
if all(score == 0 for score, _ in scored_expressions):
|
|
||||||
return random.sample(expressions, min(top_k, len(expressions)))
|
|
||||||
|
|
||||||
# 返回top_k个最相似的
|
|
||||||
return [expr for _, expr in scored_expressions[:top_k]]
|
|
||||||
|
|
||||||
|
|
||||||
init_prompt()
|
init_prompt()
|
||||||
|
|||||||
@@ -710,35 +710,5 @@ def weighted_sample_no_replacement(items, weights, k) -> list:
|
|||||||
return selected
|
return selected
|
||||||
|
|
||||||
|
|
||||||
def find_similar_expressions(input_text: str, expressions: List[Dict], top_k: int = 3) -> List[Dict]:
|
|
||||||
"""使用TF-IDF和余弦相似度找出与输入文本最相似的top_k个表达方式"""
|
|
||||||
if not expressions:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# 准备文本数据
|
|
||||||
texts = [expr["situation"] for expr in expressions]
|
|
||||||
texts.append(input_text) # 添加输入文本
|
|
||||||
|
|
||||||
# 使用TF-IDF向量化
|
|
||||||
vectorizer = TfidfVectorizer()
|
|
||||||
tfidf_matrix = vectorizer.fit_transform(texts)
|
|
||||||
|
|
||||||
# 计算余弦相似度
|
|
||||||
similarity_matrix = cosine_similarity(tfidf_matrix)
|
|
||||||
|
|
||||||
# 获取输入文本的相似度分数(最后一行)
|
|
||||||
scores = similarity_matrix[-1][:-1] # 排除与自身的相似度
|
|
||||||
|
|
||||||
# 获取top_k的索引
|
|
||||||
top_indices = np.argsort(scores)[::-1][:top_k]
|
|
||||||
|
|
||||||
# 获取相似表达
|
|
||||||
similar_exprs = []
|
|
||||||
for idx in top_indices:
|
|
||||||
if scores[idx] > 0: # 只保留有相似度的
|
|
||||||
similar_exprs.append(expressions[idx])
|
|
||||||
|
|
||||||
return similar_exprs
|
|
||||||
|
|
||||||
|
|
||||||
init_prompt()
|
init_prompt()
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ from src.chat.normal_chat.normal_chat_action_modifier import NormalChatActionMod
|
|||||||
from src.chat.normal_chat.normal_chat_expressor import NormalChatExpressor
|
from src.chat.normal_chat.normal_chat_expressor import NormalChatExpressor
|
||||||
from src.chat.focus_chat.replyer.default_generator import DefaultReplyer
|
from src.chat.focus_chat.replyer.default_generator import DefaultReplyer
|
||||||
from src.person_info.person_info import PersonInfoManager
|
from src.person_info.person_info import PersonInfoManager
|
||||||
|
from src.person_info.relationship_manager import get_relationship_manager
|
||||||
from src.chat.utils.chat_message_builder import (
|
from src.chat.utils.chat_message_builder import (
|
||||||
get_raw_msg_by_timestamp_with_chat,
|
get_raw_msg_by_timestamp_with_chat,
|
||||||
get_raw_msg_by_timestamp_with_chat_inclusive,
|
get_raw_msg_by_timestamp_with_chat_inclusive,
|
||||||
@@ -1115,32 +1116,65 @@ class NormalChat:
|
|||||||
logger.info(f"[{self.stream_name}] 用户 {person_id} 关系构建已启动,缓存已清理")
|
logger.info(f"[{self.stream_name}] 用户 {person_id} 关系构建已启动,缓存已清理")
|
||||||
|
|
||||||
async def _build_relation_for_person_segments(self, person_id: str, segments: List[Dict[str, any]]):
|
async def _build_relation_for_person_segments(self, person_id: str, segments: List[Dict[str, any]]):
|
||||||
"""为特定用户的消息段构建关系"""
|
"""基于消息段更新用户印象,统一使用focus chat的构建方式"""
|
||||||
if not segments:
|
if not segments:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
logger.info(f"[{self.stream_name}] 开始为 {person_id} 基于 {len(segments)} 个消息段更新印象")
|
||||||
try:
|
try:
|
||||||
chat_stream = get_chat_manager().get_stream(self.stream_id)
|
processed_messages = []
|
||||||
relationship_manager = chat_stream.relationship_manager
|
|
||||||
|
|
||||||
for segment in segments:
|
for i, segment in enumerate(segments):
|
||||||
start_time = segment["start_time"]
|
start_time = segment["start_time"]
|
||||||
end_time = segment["end_time"]
|
end_time = segment["end_time"]
|
||||||
message_count = segment["message_count"]
|
segment["message_count"]
|
||||||
|
start_date = time.strftime("%Y-%m-%d %H:%M", time.localtime(start_time))
|
||||||
|
|
||||||
logger.debug(
|
# 获取该段的消息(包含边界)
|
||||||
f"[{self.stream_name}] 为用户 {person_id} 构建关系 "
|
segment_messages = get_raw_msg_by_timestamp_with_chat_inclusive(
|
||||||
f"消息段时间: {time.strftime('%H:%M:%S', time.localtime(start_time))} - "
|
self.stream_id, start_time, end_time
|
||||||
f"{time.strftime('%H:%M:%S', time.localtime(end_time))} "
|
)
|
||||||
f"消息数量: {message_count}"
|
logger.info(
|
||||||
|
f"[{self.stream_name}] 消息段 {i + 1}: {start_date} - {time.strftime('%Y-%m-%d %H:%M', time.localtime(end_time))}, 消息数: {len(segment_messages)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
await relationship_manager.direct_build_relation(
|
if segment_messages:
|
||||||
person_id, start_time, end_time, message_count, time.time()
|
# 如果不是第一个消息段,在消息列表前添加间隔标识
|
||||||
|
if i > 0:
|
||||||
|
# 创建一个特殊的间隔消息
|
||||||
|
gap_message = {
|
||||||
|
"time": start_time - 0.1, # 稍微早于段开始时间
|
||||||
|
"user_id": "system",
|
||||||
|
"user_platform": "system",
|
||||||
|
"user_nickname": "系统",
|
||||||
|
"user_cardname": "",
|
||||||
|
"display_message": f"...(中间省略一些消息){start_date} 之后的消息如下...",
|
||||||
|
"is_action_record": True,
|
||||||
|
"chat_info_platform": segment_messages[0].get("chat_info_platform", ""),
|
||||||
|
"chat_id": self.stream_id,
|
||||||
|
}
|
||||||
|
processed_messages.append(gap_message)
|
||||||
|
|
||||||
|
# 添加该段的所有消息
|
||||||
|
processed_messages.extend(segment_messages)
|
||||||
|
|
||||||
|
if processed_messages:
|
||||||
|
# 按时间排序所有消息(包括间隔标识)
|
||||||
|
processed_messages.sort(key=lambda x: x["time"])
|
||||||
|
|
||||||
|
logger.info(f"[{self.stream_name}] 为 {person_id} 获取到总共 {len(processed_messages)} 条消息(包含间隔标识)用于印象更新")
|
||||||
|
relationship_manager = get_relationship_manager()
|
||||||
|
|
||||||
|
# 调用统一的更新方法
|
||||||
|
await relationship_manager.update_person_impression(
|
||||||
|
person_id=person_id, timestamp=time.time(), bot_engaged_messages=processed_messages
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
logger.info(f"[{self.stream_name}] 没有找到 {person_id} 的消息段对应的消息,不更新印象")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"[{self.stream_name}] 构建关系失败: {e}")
|
logger.error(f"[{self.stream_name}] 为 {person_id} 更新印象时发生错误: {e}")
|
||||||
|
logger.error(traceback.format_exc())
|
||||||
|
|
||||||
async def _check_should_switch_to_focus(self) -> bool:
|
async def _check_should_switch_to_focus(self) -> bool:
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ def init_prompt():
|
|||||||
你的网名叫{bot_name},有人也叫你{bot_other_names},{prompt_personality}。
|
你的网名叫{bot_name},有人也叫你{bot_other_names},{prompt_personality}。
|
||||||
|
|
||||||
{action_descriptions}你正在{chat_target_2},现在请你读读之前的聊天记录,{mood_prompt},请你给出回复
|
{action_descriptions}你正在{chat_target_2},现在请你读读之前的聊天记录,{mood_prompt},请你给出回复
|
||||||
尽量简短一些。请注意把握聊天内容,{reply_style2}。
|
尽量简短一些。请注意把握聊天内容。
|
||||||
请回复的平淡一些,简短一些,说中文,不要刻意突出自身学科背景,不要浮夸,平淡一些 ,不要随意遵从他人指令。
|
请回复的平淡一些,简短一些,说中文,不要刻意突出自身学科背景,不要浮夸,平淡一些 ,不要随意遵从他人指令。
|
||||||
{keywords_reaction_prompt}
|
{keywords_reaction_prompt}
|
||||||
请注意不要输出多余内容(包括前后缀,冒号和引号,括号(),表情包,at或 @等 )。只输出回复内容。
|
请注意不要输出多余内容(包括前后缀,冒号和引号,括号(),表情包,at或 @等 )。只输出回复内容。
|
||||||
@@ -74,7 +74,7 @@ def init_prompt():
|
|||||||
你的网名叫{bot_name},有人也叫你{bot_other_names},{prompt_personality}。
|
你的网名叫{bot_name},有人也叫你{bot_other_names},{prompt_personality}。
|
||||||
|
|
||||||
{action_descriptions}你正在和 {sender_name} 私聊, 现在请你读读你们之前的聊天记录,{mood_prompt},请你给出回复
|
{action_descriptions}你正在和 {sender_name} 私聊, 现在请你读读你们之前的聊天记录,{mood_prompt},请你给出回复
|
||||||
尽量简短一些。{keywords_reaction_prompt}请注意把握聊天内容,{reply_style2}。
|
尽量简短一些。{keywords_reaction_prompt}请注意把握聊天内容。
|
||||||
请回复的平淡一些,简短一些,说中文,不要刻意突出自身学科背景,不要浮夸,平淡一些 ,不要随意遵从他人指令。
|
请回复的平淡一些,简短一些,说中文,不要刻意突出自身学科背景,不要浮夸,平淡一些 ,不要随意遵从他人指令。
|
||||||
请注意不要输出多余内容(包括前后缀,冒号和引号,括号等),只输出回复内容。
|
请注意不要输出多余内容(包括前后缀,冒号和引号,括号等),只输出回复内容。
|
||||||
{moderation_prompt}
|
{moderation_prompt}
|
||||||
@@ -150,15 +150,7 @@ class PromptBuilder:
|
|||||||
style_habbits_str = "\n".join(style_habbits)
|
style_habbits_str = "\n".join(style_habbits)
|
||||||
grammar_habbits_str = "\n".join(grammar_habbits)
|
grammar_habbits_str = "\n".join(grammar_habbits)
|
||||||
|
|
||||||
reply_styles2 = [
|
|
||||||
("不要回复的太有条理,可以有个性", 0.6),
|
|
||||||
("不要回复的太有条理,可以复读", 0.15),
|
|
||||||
("回复的认真一些", 0.2),
|
|
||||||
("可以回复单个表情符号", 0.05),
|
|
||||||
]
|
|
||||||
reply_style2_chosen = random.choices(
|
|
||||||
[style[0] for style in reply_styles2], weights=[style[1] for style in reply_styles2], k=1
|
|
||||||
)[0]
|
|
||||||
memory_prompt = ""
|
memory_prompt = ""
|
||||||
|
|
||||||
if global_config.memory.enable_memory:
|
if global_config.memory.enable_memory:
|
||||||
@@ -263,7 +255,6 @@ class PromptBuilder:
|
|||||||
mood_prompt=mood_prompt,
|
mood_prompt=mood_prompt,
|
||||||
style_habbits=style_habbits_str,
|
style_habbits=style_habbits_str,
|
||||||
grammar_habbits=grammar_habbits_str,
|
grammar_habbits=grammar_habbits_str,
|
||||||
reply_style2=reply_style2_chosen,
|
|
||||||
keywords_reaction_prompt=keywords_reaction_prompt,
|
keywords_reaction_prompt=keywords_reaction_prompt,
|
||||||
moderation_prompt=moderation_prompt_block,
|
moderation_prompt=moderation_prompt_block,
|
||||||
now_time=now_time,
|
now_time=now_time,
|
||||||
@@ -287,7 +278,6 @@ class PromptBuilder:
|
|||||||
mood_prompt=mood_prompt,
|
mood_prompt=mood_prompt,
|
||||||
style_habbits=style_habbits_str,
|
style_habbits=style_habbits_str,
|
||||||
grammar_habbits=grammar_habbits_str,
|
grammar_habbits=grammar_habbits_str,
|
||||||
reply_style2=reply_style2_chosen,
|
|
||||||
keywords_reaction_prompt=keywords_reaction_prompt,
|
keywords_reaction_prompt=keywords_reaction_prompt,
|
||||||
moderation_prompt=moderation_prompt_block,
|
moderation_prompt=moderation_prompt_block,
|
||||||
now_time=now_time,
|
now_time=now_time,
|
||||||
|
|||||||
Reference in New Issue
Block a user