diff --git a/src/main.py b/src/main.py index 862b9051a..ed03ab025 100644 --- a/src/main.py +++ b/src/main.py @@ -108,6 +108,9 @@ class MainSystem: ) logger.success("个体特征初始化成功") + # 初始化表达方式 + await expression_learner.extract_and_store_personality_expressions() + try: # 启动全局消息管理器 (负责消息发送/排队) await message_manager.start() diff --git a/src/plugins/heartFC_chat/expressors/exprssion_learner.py b/src/plugins/heartFC_chat/expressors/exprssion_learner.py index 8100685ec..4e84889f2 100644 --- a/src/plugins/heartFC_chat/expressors/exprssion_learner.py +++ b/src/plugins/heartFC_chat/expressors/exprssion_learner.py @@ -33,10 +33,32 @@ def init_prompt() -> None: 当"想说明某个观点,但懒得明说",使用"懂的都懂" 当"想搞笑的表现高深的感觉",使用"文言文句式" +注意不要总结你自己的发言 现在请你概括 """ Prompt(learn_expression_prompt, "learn_expression_prompt") + personality_expression_prompt = """ +{personality} + +请从以上人设中总结出这个角色可能的语言风格 +思考回复语法,长度和情感 +思考有没有特殊的梗,一并总结成语言风格 +总结成如下格式的规律,总结的内容要详细,但具有概括性: +当"xxx"时,可以"xxx", xxx不超过10个字 + +例如: +当"表示十分惊叹"时,使用"我嘞个xxxx" +当"表示讽刺的赞同,不想讲道理"时,使用"对对对" +当"想表达某个观点,但不想明说",使用"反讽的句式" +当"想说明某个观点,但懒得明说",使用"懂的都懂" +当"想搞笑的表现高深的感觉",使用"文言文句式" + +现在请你概括 +""" + Prompt(personality_expression_prompt, "personality_expression_prompt") + +peronality = "情绪敏感,有时候有些搞怪幽默, 是一个女大学生,现在在读大二,你会刷贴吧" class ExpressionLearner: def __init__(self) -> None: @@ -47,14 +69,22 @@ class ExpressionLearner: request_type="response_heartflow", ) - async def get_expression_by_chat_id(self, chat_id: str) -> List[Dict[str, str]]: - """从/data/expression/对应chat_id/expressions.json中读取表达方式""" - file_path: str = os.path.join("data", "expression", str(chat_id), "expressions.json") - if not os.path.exists(file_path): - return [] - with open(file_path, "r", encoding="utf-8") as f: - expressions: List[dict] = json.load(f) - return expressions + async def get_expression_by_chat_id(self, chat_id: str) -> Tuple[List[Dict[str, str]], List[Dict[str, str]]]: + """ + 读取/data/expression/learnt/{chat_id}/expressions.json和/data/expression/personality/expressions.json + 返回(learnt_expressions, personality_expressions) + """ + learnt_file = os.path.join("data", "expression", "learnt", str(chat_id), "expressions.json") + personality_file = os.path.join("data", "expression", "personality", "expressions.json") + learnt_expressions = [] + personality_expressions = [] + if os.path.exists(learnt_file): + with open(learnt_file, "r", encoding="utf-8") as f: + learnt_expressions = json.load(f) + if os.path.exists(personality_file): + with open(personality_file, "r", encoding="utf-8") as f: + personality_expressions = json.load(f) + return learnt_expressions, personality_expressions def is_similar(self, s1: str, s2: str) -> bool: """ @@ -85,7 +115,7 @@ class ExpressionLearner: chat_dict[chat_id].append({"situation": situation, "style": style}) # 存储到/data/expression/对应chat_id/expressions.json for chat_id, expr_list in chat_dict.items(): - dir_path = os.path.join("data", "expression", str(chat_id)) + dir_path = os.path.join("data", "expression", "learnt", str(chat_id)) os.makedirs(dir_path, exist_ok=True) file_path = os.path.join(dir_path, "expressions.json") # 若已存在,先读出合并 @@ -188,6 +218,38 @@ class ExpressionLearner: expressions.append((chat_id, situation, style)) return expressions + async def extract_and_store_personality_expressions(self): + """ + 检查data/expression/personality目录,不存在则创建。 + 用peronality变量作为chat_str,调用LLM生成表达风格,解析后count=100,存储到expressions.json。 + """ + dir_path = os.path.join("data", "expression", "personality") + os.makedirs(dir_path, exist_ok=True) + file_path = os.path.join(dir_path, "expressions.json") + + # 构建prompt + prompt = await global_prompt_manager.format_prompt( + "personality_expression_prompt", + personality=peronality, + ) + logger.info(f"个性表达方式提取prompt: {prompt}") + response, _ = await self.express_learn_model.generate_response_async(prompt) + logger.info(f"个性表达方式提取response: {response}") + # chat_id用personality + expressions = self.parse_expression_response(response, "personality") + # 转为dict并count=100 + result = [] + for _, situation, style in expressions: + result.append({"situation": situation, "style": style, "count": 100}) + # 超过50条时随机删除多余的,只保留50条 + if len(result) > 50: + remove_count = len(result) - 50 + remove_indices = set(random.sample(range(len(result)), remove_count)) + result = [item for idx, item in enumerate(result) if idx not in remove_indices] + with open(file_path, "w", encoding="utf-8") as f: + json.dump(result, f, ensure_ascii=False, indent=2) + logger.info(f"已写入{len(result)}条表达到{file_path}") + init_prompt() diff --git a/src/plugins/heartFC_chat/heartflow_prompt_builder.py b/src/plugins/heartFC_chat/heartflow_prompt_builder.py index ccb3109c4..00912e7f2 100644 --- a/src/plugins/heartFC_chat/heartflow_prompt_builder.py +++ b/src/plugins/heartFC_chat/heartflow_prompt_builder.py @@ -246,14 +246,22 @@ async def _build_prompt_focus( structured_info_prompt = "" # 从/data/expression/对应chat_id/expressions.json中读取表达方式 - expressions = await expression_learner.get_expression_by_chat_id(chat_stream.stream_id) - if expressions: - language_habits = [] - for expr in expressions: - # print(f"expr: {expr}") + learnt_expressions, personality_expressions = await expression_learner.get_expression_by_chat_id(chat_stream.stream_id) + language_habits = [] + # 1. learnt_expressions加权随机选5条 + if learnt_expressions: + weights = [expr["count"] for expr in learnt_expressions] + selected_learnt = weighted_sample_no_replacement(learnt_expressions, weights, 5) + for expr in selected_learnt: if isinstance(expr, dict) and "situation" in expr and "style" in expr: language_habits.append(f"当{expr['situation']}时,使用 {expr['style']}") - else: + # 2. personality_expressions随机选1条 + if personality_expressions: + expr = random.choice(personality_expressions) + if isinstance(expr, dict) and "situation" in expr and "style" in expr: + language_habits.append(f"当{expr['situation']}时,使用 {expr['style']}") + # 3. 如果都没有,给默认 + if not language_habits: language_habits = [ "当表示惊叹时,使用 我嘞个xxxx", "当表示惊讶无语是,使用 不是?", @@ -264,8 +272,8 @@ async def _build_prompt_focus( "当你想表达某个观点,但不想明说,可以使用反讽", "当你想说明某个观点,但懒得明说,使用 懂的都懂", ] - selected = random.sample(language_habits, 8) - language_habits_str = "\n".join(selected) + language_habits = random.sample(language_habits, 6) + language_habits_str = "\n".join(language_habits) logger.debug("开始构建 focus prompt") @@ -855,5 +863,37 @@ class PromptBuilder: return "[构建 Planner Prompt 时出错]" +def weighted_sample_no_replacement(items, weights, k): + """ + 加权且不放回地随机抽取k个元素。 + + 参数: + items: 待抽取的元素列表 + weights: 每个元素对应的权重(与items等长,且为正数) + k: 需要抽取的元素个数 + 返回: + selected: 按权重加权且不重复抽取的k个元素组成的列表 + + 实现思路: + 每次从当前池中按权重加权随机选出一个元素,选中后将其从池中移除,重复k次。 + 这样保证了: + 1. count越大被选中概率越高 + 2. 不会重复选中同一个元素 + """ + selected = [] + pool = list(zip(items, weights)) + for _ in range(min(k, len(pool))): + total = sum(w for _, w in pool) + r = random.uniform(0, total) + upto = 0 + for idx, (item, weight) in enumerate(pool): + upto += weight + if upto >= r: + selected.append(item) + pool.pop(idx) + break + return selected + + init_prompt() prompt_builder = PromptBuilder() diff --git a/src/plugins/utils/chat_message_builder.py b/src/plugins/utils/chat_message_builder.py index 0356194ce..a2c5621ce 100644 --- a/src/plugins/utils/chat_message_builder.py +++ b/src/plugins/utils/chat_message_builder.py @@ -247,8 +247,14 @@ async def _build_readable_messages_internal( last_end = m.end() new_content += content[last_end:] content = new_content - - message_details_raw.append((timestamp, person_name, content)) + + target_str = "这是QQ的一个功能,用于提及某人,但没那么明显" + if target_str in content: + if random.random() < 0.6: + content = content.replace(target_str, "") + + if content != "": + message_details_raw.append((timestamp, person_name, content)) if not message_details_raw: return "", []