feat:根据人格生成言语风格

This commit is contained in:
SengokuCola
2025-05-12 23:50:12 +08:00
parent a19211e03b
commit bb043afaa2
4 changed files with 130 additions and 19 deletions

View File

@@ -108,6 +108,9 @@ class MainSystem:
) )
logger.success("个体特征初始化成功") logger.success("个体特征初始化成功")
# 初始化表达方式
await expression_learner.extract_and_store_personality_expressions()
try: try:
# 启动全局消息管理器 (负责消息发送/排队) # 启动全局消息管理器 (负责消息发送/排队)
await message_manager.start() await message_manager.start()

View File

@@ -33,10 +33,32 @@ def init_prompt() -> None:
"想说明某个观点,但懒得明说",使用"懂的都懂" "想说明某个观点,但懒得明说",使用"懂的都懂"
"想搞笑的表现高深的感觉",使用"文言文句式" "想搞笑的表现高深的感觉",使用"文言文句式"
注意不要总结你自己的发言
现在请你概括 现在请你概括
""" """
Prompt(learn_expression_prompt, "learn_expression_prompt") Prompt(learn_expression_prompt, "learn_expression_prompt")
personality_expression_prompt = """
{personality}
请从以上人设中总结出这个角色可能的语言风格
思考回复语法,长度和情感
思考有没有特殊的梗,一并总结成语言风格
总结成如下格式的规律,总结的内容要详细,但具有概括性:
"xxx"时,可以"xxx", xxx不超过10个字
例如:
"表示十分惊叹"时,使用"我嘞个xxxx"
"表示讽刺的赞同,不想讲道理"时,使用"对对对"
"想表达某个观点,但不想明说",使用"反讽的句式"
"想说明某个观点,但懒得明说",使用"懂的都懂"
"想搞笑的表现高深的感觉",使用"文言文句式"
现在请你概括
"""
Prompt(personality_expression_prompt, "personality_expression_prompt")
peronality = "情绪敏感,有时候有些搞怪幽默, 是一个女大学生,现在在读大二,你会刷贴吧"
class ExpressionLearner: class ExpressionLearner:
def __init__(self) -> None: def __init__(self) -> None:
@@ -47,14 +69,22 @@ class ExpressionLearner:
request_type="response_heartflow", request_type="response_heartflow",
) )
async def get_expression_by_chat_id(self, chat_id: str) -> List[Dict[str, str]]: async def get_expression_by_chat_id(self, chat_id: str) -> Tuple[List[Dict[str, str]], List[Dict[str, str]]]:
"""从/data/expression/对应chat_id/expressions.json中读取表达方式""" """
file_path: str = os.path.join("data", "expression", str(chat_id), "expressions.json") 读取/data/expression/learnt/{chat_id}/expressions.json和/data/expression/personality/expressions.json
if not os.path.exists(file_path): 返回(learnt_expressions, personality_expressions)
return [] """
with open(file_path, "r", encoding="utf-8") as f: learnt_file = os.path.join("data", "expression", "learnt", str(chat_id), "expressions.json")
expressions: List[dict] = json.load(f) personality_file = os.path.join("data", "expression", "personality", "expressions.json")
return expressions learnt_expressions = []
personality_expressions = []
if os.path.exists(learnt_file):
with open(learnt_file, "r", encoding="utf-8") as f:
learnt_expressions = json.load(f)
if os.path.exists(personality_file):
with open(personality_file, "r", encoding="utf-8") as f:
personality_expressions = json.load(f)
return learnt_expressions, personality_expressions
def is_similar(self, s1: str, s2: str) -> bool: def is_similar(self, s1: str, s2: str) -> bool:
""" """
@@ -85,7 +115,7 @@ class ExpressionLearner:
chat_dict[chat_id].append({"situation": situation, "style": style}) chat_dict[chat_id].append({"situation": situation, "style": style})
# 存储到/data/expression/对应chat_id/expressions.json # 存储到/data/expression/对应chat_id/expressions.json
for chat_id, expr_list in chat_dict.items(): for chat_id, expr_list in chat_dict.items():
dir_path = os.path.join("data", "expression", str(chat_id)) dir_path = os.path.join("data", "expression", "learnt", str(chat_id))
os.makedirs(dir_path, exist_ok=True) os.makedirs(dir_path, exist_ok=True)
file_path = os.path.join(dir_path, "expressions.json") file_path = os.path.join(dir_path, "expressions.json")
# 若已存在,先读出合并 # 若已存在,先读出合并
@@ -188,6 +218,38 @@ class ExpressionLearner:
expressions.append((chat_id, situation, style)) expressions.append((chat_id, situation, style))
return expressions return expressions
async def extract_and_store_personality_expressions(self):
"""
检查data/expression/personality目录不存在则创建。
用peronality变量作为chat_str调用LLM生成表达风格解析后count=100存储到expressions.json。
"""
dir_path = os.path.join("data", "expression", "personality")
os.makedirs(dir_path, exist_ok=True)
file_path = os.path.join(dir_path, "expressions.json")
# 构建prompt
prompt = await global_prompt_manager.format_prompt(
"personality_expression_prompt",
personality=peronality,
)
logger.info(f"个性表达方式提取prompt: {prompt}")
response, _ = await self.express_learn_model.generate_response_async(prompt)
logger.info(f"个性表达方式提取response: {response}")
# chat_id用personality
expressions = self.parse_expression_response(response, "personality")
# 转为dict并count=100
result = []
for _, situation, style in expressions:
result.append({"situation": situation, "style": style, "count": 100})
# 超过50条时随机删除多余的只保留50条
if len(result) > 50:
remove_count = len(result) - 50
remove_indices = set(random.sample(range(len(result)), remove_count))
result = [item for idx, item in enumerate(result) if idx not in remove_indices]
with open(file_path, "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
logger.info(f"已写入{len(result)}条表达到{file_path}")
init_prompt() init_prompt()

View File

@@ -246,14 +246,22 @@ async def _build_prompt_focus(
structured_info_prompt = "" structured_info_prompt = ""
# 从/data/expression/对应chat_id/expressions.json中读取表达方式 # 从/data/expression/对应chat_id/expressions.json中读取表达方式
expressions = await expression_learner.get_expression_by_chat_id(chat_stream.stream_id) learnt_expressions, personality_expressions = await expression_learner.get_expression_by_chat_id(chat_stream.stream_id)
if expressions: language_habits = []
language_habits = [] # 1. learnt_expressions加权随机选5条
for expr in expressions: if learnt_expressions:
# print(f"expr: {expr}") weights = [expr["count"] for expr in learnt_expressions]
selected_learnt = weighted_sample_no_replacement(learnt_expressions, weights, 5)
for expr in selected_learnt:
if isinstance(expr, dict) and "situation" in expr and "style" in expr: if isinstance(expr, dict) and "situation" in expr and "style" in expr:
language_habits.append(f"{expr['situation']}时,使用 {expr['style']}") language_habits.append(f"{expr['situation']}时,使用 {expr['style']}")
else: # 2. personality_expressions随机选1条
if personality_expressions:
expr = random.choice(personality_expressions)
if isinstance(expr, dict) and "situation" in expr and "style" in expr:
language_habits.append(f"{expr['situation']}时,使用 {expr['style']}")
# 3. 如果都没有,给默认
if not language_habits:
language_habits = [ language_habits = [
"当表示惊叹时,使用 我嘞个xxxx", "当表示惊叹时,使用 我嘞个xxxx",
"当表示惊讶无语是,使用 不是?", "当表示惊讶无语是,使用 不是?",
@@ -264,8 +272,8 @@ async def _build_prompt_focus(
"当你想表达某个观点,但不想明说,可以使用反讽", "当你想表达某个观点,但不想明说,可以使用反讽",
"当你想说明某个观点,但懒得明说,使用 懂的都懂", "当你想说明某个观点,但懒得明说,使用 懂的都懂",
] ]
selected = random.sample(language_habits, 8) language_habits = random.sample(language_habits, 6)
language_habits_str = "\n".join(selected) language_habits_str = "\n".join(language_habits)
logger.debug("开始构建 focus prompt") logger.debug("开始构建 focus prompt")
@@ -855,5 +863,37 @@ class PromptBuilder:
return "[构建 Planner Prompt 时出错]" return "[构建 Planner Prompt 时出错]"
def weighted_sample_no_replacement(items, weights, k):
"""
加权且不放回地随机抽取k个元素。
参数:
items: 待抽取的元素列表
weights: 每个元素对应的权重与items等长且为正数
k: 需要抽取的元素个数
返回:
selected: 按权重加权且不重复抽取的k个元素组成的列表
实现思路:
每次从当前池中按权重加权随机选出一个元素选中后将其从池中移除重复k次。
这样保证了:
1. count越大被选中概率越高
2. 不会重复选中同一个元素
"""
selected = []
pool = list(zip(items, weights))
for _ in range(min(k, len(pool))):
total = sum(w for _, w in pool)
r = random.uniform(0, total)
upto = 0
for idx, (item, weight) in enumerate(pool):
upto += weight
if upto >= r:
selected.append(item)
pool.pop(idx)
break
return selected
init_prompt() init_prompt()
prompt_builder = PromptBuilder() prompt_builder = PromptBuilder()

View File

@@ -248,7 +248,13 @@ async def _build_readable_messages_internal(
new_content += content[last_end:] new_content += content[last_end:]
content = new_content content = new_content
message_details_raw.append((timestamp, person_name, content)) target_str = "这是QQ的一个功能用于提及某人但没那么明显"
if target_str in content:
if random.random() < 0.6:
content = content.replace(target_str, "")
if content != "":
message_details_raw.append((timestamp, person_name, content))
if not message_details_raw: if not message_details_raw:
return "", [] return "", []