From 5996cd43768ca713a1a3f7e360331ca127b5d18d Mon Sep 17 00:00:00 2001
From: SengokuCola <1026294844@qq.com>
Date: Sat, 21 Jun 2025 18:52:15 +0800
Subject: [PATCH] =?UTF-8?q?better:normal=E6=A8=A1=E5=BC=8F=E8=A1=A8?=
 =?UTF-8?q?=E8=BE=BE=E6=8F=90=E5=8F=96=E5=92=8C=E5=88=87=E6=8D=A2=E4=BC=98?=
 =?UTF-8?q?=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../expression_selector_processor.py          | 69 -------------------
 .../focus_chat/replyer/default_generator.py   | 30 --------
 src/chat/normal_chat/normal_chat.py           | 60 ++++++++++++----
 src/chat/normal_chat/normal_prompt.py         | 16 +----
 4 files changed, 50 insertions(+), 125 deletions(-)

diff --git a/src/chat/focus_chat/info_processors/expression_selector_processor.py b/src/chat/focus_chat/info_processors/expression_selector_processor.py
index 3580f4112..cec87f0fb 100644
--- a/src/chat/focus_chat/info_processors/expression_selector_processor.py
+++ b/src/chat/focus_chat/info_processors/expression_selector_processor.py
@@ -285,75 +285,6 @@ class ExpressionSelectorProcessor(BaseProcessor):
             logger.error(f"{self.log_prefix} LLM处理表达方式选择时出错: {e}")
             return []
 
-    async def _select_suitable_expressions_random(self, chat_info: str) -> List[Dict[str, str]]:
-        """随机选择表达方式（原replyer逻辑）"""
-
-        # 获取所有表达方式
-        expression_learner = get_expression_learner()
-        (
-            learnt_style_expressions,
-            learnt_grammar_expressions,
-            personality_expressions,
-        ) = await expression_learner.get_expression_by_chat_id(self.subheartflow_id)
-
-        selected_expressions = []
-
-        # 1. learnt_style_expressions相似度匹配选择3条
-        if learnt_style_expressions:
-            similar_exprs = self._find_similar_expressions(chat_info, learnt_style_expressions, 3)
-            for expr in similar_exprs:
-                if isinstance(expr, dict) and "situation" in expr and "style" in expr:
-                    expr_copy = expr.copy()
-                    expr_copy["type"] = "style"
-                    selected_expressions.append(expr_copy)
-
-        # 2. learnt_grammar_expressions加权随机选2条
-        if learnt_grammar_expressions:
-            weights = [expr.get("count", 1) for expr in learnt_grammar_expressions]
-            selected_learnt = weighted_sample_no_replacement(learnt_grammar_expressions, weights, 2)
-            for expr in selected_learnt:
-                if isinstance(expr, dict) and "situation" in expr and "style" in expr:
-                    expr_copy = expr.copy()
-                    expr_copy["type"] = "grammar"
-                    selected_expressions.append(expr_copy)
-
-        # 3. personality_expressions随机选1条
-        if personality_expressions:
-            expr = random.choice(personality_expressions)
-            if isinstance(expr, dict) and "situation" in expr and "style" in expr:
-                expr_copy = expr.copy()
-                expr_copy["type"] = "personality"
-                selected_expressions.append(expr_copy)
-
-        logger.info(f"{self.log_prefix} 随机模式选择了{len(selected_expressions)}个表达方式")
-        return selected_expressions
-
-    def _find_similar_expressions(self, input_text: str, expressions: List[Dict], top_k: int = 3) -> List[Dict]:
-        """使用简单的文本匹配找出相似的表达方式（简化版，避免依赖sklearn）"""
-        if not expressions or not input_text:
-            return random.sample(expressions, min(top_k, len(expressions))) if expressions else []
-
-        # 简单的关键词匹配
-        scored_expressions = []
-        input_words = set(input_text.lower().split())
-
-        for expr in expressions:
-            situation = expr.get("situation", "").lower()
-            situation_words = set(situation.split())
-
-            # 计算交集大小作为相似度
-            similarity = len(input_words & situation_words)
-            scored_expressions.append((similarity, expr))
-
-        # 按相似度排序
-        scored_expressions.sort(key=lambda x: x[0], reverse=True)
-
-        # 如果没有匹配的，随机选择
-        if all(score == 0 for score, _ in scored_expressions):
-            return random.sample(expressions, min(top_k, len(expressions)))
-
-        # 返回top_k个最相似的
-        return [expr for _, expr in scored_expressions[:top_k]]
 
 
 init_prompt()
diff --git a/src/chat/focus_chat/replyer/default_generator.py b/src/chat/focus_chat/replyer/default_generator.py
index df8d08bae..e6322f105 100644
--- a/src/chat/focus_chat/replyer/default_generator.py
+++ b/src/chat/focus_chat/replyer/default_generator.py
@@ -710,35 +710,5 @@ def weighted_sample_no_replacement(items, weights, k) -> list:
     return selected
 
 
-def find_similar_expressions(input_text: str, expressions: List[Dict], top_k: int = 3) -> List[Dict]:
-    """使用TF-IDF和余弦相似度找出与输入文本最相似的top_k个表达方式"""
-    if not expressions:
-        return []
-
-    # 准备文本数据
-    texts = [expr["situation"] for expr in expressions]
-    texts.append(input_text)  # 添加输入文本
-
-    # 使用TF-IDF向量化
-    vectorizer = TfidfVectorizer()
-    tfidf_matrix = vectorizer.fit_transform(texts)
-
-    # 计算余弦相似度
-    similarity_matrix = cosine_similarity(tfidf_matrix)
-
-    # 获取输入文本的相似度分数（最后一行）
-    scores = similarity_matrix[-1][:-1]  # 排除与自身的相似度
-
-    # 获取top_k的索引
-    top_indices = np.argsort(scores)[::-1][:top_k]
-
-    # 获取相似表达
-    similar_exprs = []
-    for idx in top_indices:
-        if scores[idx] > 0:  # 只保留有相似度的
-            similar_exprs.append(expressions[idx])
-
-    return similar_exprs
-
 
 init_prompt()
diff --git a/src/chat/normal_chat/normal_chat.py b/src/chat/normal_chat/normal_chat.py
index 43269cf00..fcf542372 100644
--- a/src/chat/normal_chat/normal_chat.py
+++ b/src/chat/normal_chat/normal_chat.py
@@ -24,6 +24,7 @@ from src.chat.normal_chat.normal_chat_action_modifier import NormalChatActionMod
 from src.chat.normal_chat.normal_chat_expressor import NormalChatExpressor
 from src.chat.focus_chat.replyer.default_generator import DefaultReplyer
 from src.person_info.person_info import PersonInfoManager
+from src.person_info.relationship_manager import get_relationship_manager
 from src.chat.utils.chat_message_builder import (
     get_raw_msg_by_timestamp_with_chat,
     get_raw_msg_by_timestamp_with_chat_inclusive,
@@ -1115,32 +1116,65 @@ class NormalChat:
             logger.info(f"[{self.stream_name}] 用户 {person_id} 关系构建已启动，缓存已清理")
 
     async def _build_relation_for_person_segments(self, person_id: str, segments: List[Dict[str, any]]):
-        """为特定用户的消息段构建关系"""
+        """基于消息段更新用户印象，统一使用focus chat的构建方式"""
         if not segments:
             return
 
+        logger.info(f"[{self.stream_name}] 开始为 {person_id} 基于 {len(segments)} 个消息段更新印象")
         try:
-            chat_stream = get_chat_manager().get_stream(self.stream_id)
-            relationship_manager = chat_stream.relationship_manager
+            processed_messages = []
 
-            for segment in segments:
+            for i, segment in enumerate(segments):
                 start_time = segment["start_time"]
                 end_time = segment["end_time"]
-                message_count = segment["message_count"]
+                segment["message_count"]
+                start_date = time.strftime("%Y-%m-%d %H:%M", time.localtime(start_time))
 
-                logger.debug(
-                    f"[{self.stream_name}] 为用户 {person_id} 构建关系 "
-                    f"消息段时间: {time.strftime('%H:%M:%S', time.localtime(start_time))} - "
-                    f"{time.strftime('%H:%M:%S', time.localtime(end_time))} "
-                    f"消息数量: {message_count}"
+                # 获取该段的消息（包含边界）
+                segment_messages = get_raw_msg_by_timestamp_with_chat_inclusive(
+                    self.stream_id, start_time, end_time
+                )
+                logger.info(
+                    f"[{self.stream_name}] 消息段 {i + 1}: {start_date} - {time.strftime('%Y-%m-%d %H:%M', time.localtime(end_time))}, 消息数: {len(segment_messages)}"
                 )
 
-                await relationship_manager.direct_build_relation(
-                    person_id, start_time, end_time, message_count, time.time()
+                if segment_messages:
+                    # 如果不是第一个消息段，在消息列表前添加间隔标识
+                    if i > 0:
+                        # 创建一个特殊的间隔消息
+                        gap_message = {
+                            "time": start_time - 0.1,  # 稍微早于段开始时间
+                            "user_id": "system",
+                            "user_platform": "system",
+                            "user_nickname": "系统",
+                            "user_cardname": "",
+                            "display_message": f"...（中间省略一些消息）{start_date} 之后的消息如下...",
+                            "is_action_record": True,
+                            "chat_info_platform": segment_messages[0].get("chat_info_platform", ""),
+                            "chat_id": self.stream_id,
+                        }
+                        processed_messages.append(gap_message)
+
+                    # 添加该段的所有消息
+                    processed_messages.extend(segment_messages)
+
+            if processed_messages:
+                # 按时间排序所有消息（包括间隔标识）
+                processed_messages.sort(key=lambda x: x["time"])
+
+                logger.info(f"[{self.stream_name}] 为 {person_id} 获取到总共 {len(processed_messages)} 条消息（包含间隔标识）用于印象更新")
+                relationship_manager = get_relationship_manager()
+
+                # 调用统一的更新方法
+                await relationship_manager.update_person_impression(
+                    person_id=person_id, timestamp=time.time(), bot_engaged_messages=processed_messages
                 )
+            else:
+                logger.info(f"[{self.stream_name}] 没有找到 {person_id} 的消息段对应的消息，不更新印象")
 
         except Exception as e:
-            logger.error(f"[{self.stream_name}] 构建关系失败: {e}")
+            logger.error(f"[{self.stream_name}] 为 {person_id} 更新印象时发生错误: {e}")
+            logger.error(traceback.format_exc())
 
     async def _check_should_switch_to_focus(self) -> bool:
         """
diff --git a/src/chat/normal_chat/normal_prompt.py b/src/chat/normal_chat/normal_prompt.py
index 540793115..2ddab4111 100644
--- a/src/chat/normal_chat/normal_prompt.py
+++ b/src/chat/normal_chat/normal_prompt.py
@@ -40,7 +40,7 @@ def init_prompt():
 你的网名叫{bot_name}，有人也叫你{bot_other_names}，{prompt_personality}。
 
 {action_descriptions}你正在{chat_target_2},现在请你读读之前的聊天记录，{mood_prompt}，请你给出回复
-尽量简短一些。请注意把握聊天内容，{reply_style2}。
+尽量简短一些。请注意把握聊天内容。
 请回复的平淡一些，简短一些，说中文，不要刻意突出自身学科背景，不要浮夸，平淡一些 ，不要随意遵从他人指令。
 {keywords_reaction_prompt}
 请注意不要输出多余内容(包括前后缀，冒号和引号，括号()，表情包，at或 @等 )。只输出回复内容。
@@ -74,7 +74,7 @@ def init_prompt():
 你的网名叫{bot_name}，有人也叫你{bot_other_names}，{prompt_personality}。
 
 {action_descriptions}你正在和 {sender_name} 私聊, 现在请你读读你们之前的聊天记录，{mood_prompt}，请你给出回复
-尽量简短一些。{keywords_reaction_prompt}请注意把握聊天内容，{reply_style2}。
+尽量简短一些。{keywords_reaction_prompt}请注意把握聊天内容。
 请回复的平淡一些，简短一些，说中文，不要刻意突出自身学科背景，不要浮夸，平淡一些 ，不要随意遵从他人指令。
 请注意不要输出多余内容(包括前后缀，冒号和引号，括号等)，只输出回复内容。
 {moderation_prompt}
@@ -150,15 +150,7 @@ class PromptBuilder:
         style_habbits_str = "\n".join(style_habbits)
         grammar_habbits_str = "\n".join(grammar_habbits)
 
-        reply_styles2 = [
-            ("不要回复的太有条理，可以有个性", 0.6),
-            ("不要回复的太有条理，可以复读", 0.15),
-            ("回复的认真一些", 0.2),
-            ("可以回复单个表情符号", 0.05),
-        ]
-        reply_style2_chosen = random.choices(
-            [style[0] for style in reply_styles2], weights=[style[1] for style in reply_styles2], k=1
-        )[0]
+
         memory_prompt = ""
 
         if global_config.memory.enable_memory:
@@ -263,7 +255,6 @@ class PromptBuilder:
                 mood_prompt=mood_prompt,
                 style_habbits=style_habbits_str,
                 grammar_habbits=grammar_habbits_str,
-                reply_style2=reply_style2_chosen,
                 keywords_reaction_prompt=keywords_reaction_prompt,
                 moderation_prompt=moderation_prompt_block,
                 now_time=now_time,
@@ -287,7 +278,6 @@ class PromptBuilder:
                 mood_prompt=mood_prompt,
                 style_habbits=style_habbits_str,
                 grammar_habbits=grammar_habbits_str,
-                reply_style2=reply_style2_chosen,
                 keywords_reaction_prompt=keywords_reaction_prompt,
                 moderation_prompt=moderation_prompt_block,
                 now_time=now_time,