feat:根据人格生成言语风格
This commit is contained in:
@@ -108,6 +108,9 @@ class MainSystem:
|
||||
)
|
||||
logger.success("个体特征初始化成功")
|
||||
|
||||
# 初始化表达方式
|
||||
await expression_learner.extract_and_store_personality_expressions()
|
||||
|
||||
try:
|
||||
# 启动全局消息管理器 (负责消息发送/排队)
|
||||
await message_manager.start()
|
||||
|
||||
@@ -33,10 +33,32 @@ def init_prompt() -> None:
|
||||
当"想说明某个观点,但懒得明说",使用"懂的都懂"
|
||||
当"想搞笑的表现高深的感觉",使用"文言文句式"
|
||||
|
||||
注意不要总结你自己的发言
|
||||
现在请你概括
|
||||
"""
|
||||
Prompt(learn_expression_prompt, "learn_expression_prompt")
|
||||
|
||||
personality_expression_prompt = """
|
||||
{personality}
|
||||
|
||||
请从以上人设中总结出这个角色可能的语言风格
|
||||
思考回复语法,长度和情感
|
||||
思考有没有特殊的梗,一并总结成语言风格
|
||||
总结成如下格式的规律,总结的内容要详细,但具有概括性:
|
||||
当"xxx"时,可以"xxx", xxx不超过10个字
|
||||
|
||||
例如:
|
||||
当"表示十分惊叹"时,使用"我嘞个xxxx"
|
||||
当"表示讽刺的赞同,不想讲道理"时,使用"对对对"
|
||||
当"想表达某个观点,但不想明说",使用"反讽的句式"
|
||||
当"想说明某个观点,但懒得明说",使用"懂的都懂"
|
||||
当"想搞笑的表现高深的感觉",使用"文言文句式"
|
||||
|
||||
现在请你概括
|
||||
"""
|
||||
Prompt(personality_expression_prompt, "personality_expression_prompt")
|
||||
|
||||
peronality = "情绪敏感,有时候有些搞怪幽默, 是一个女大学生,现在在读大二,你会刷贴吧"
|
||||
|
||||
class ExpressionLearner:
|
||||
def __init__(self) -> None:
|
||||
@@ -47,14 +69,22 @@ class ExpressionLearner:
|
||||
request_type="response_heartflow",
|
||||
)
|
||||
|
||||
async def get_expression_by_chat_id(self, chat_id: str) -> List[Dict[str, str]]:
|
||||
"""从/data/expression/对应chat_id/expressions.json中读取表达方式"""
|
||||
file_path: str = os.path.join("data", "expression", str(chat_id), "expressions.json")
|
||||
if not os.path.exists(file_path):
|
||||
return []
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
expressions: List[dict] = json.load(f)
|
||||
return expressions
|
||||
async def get_expression_by_chat_id(self, chat_id: str) -> Tuple[List[Dict[str, str]], List[Dict[str, str]]]:
|
||||
"""
|
||||
读取/data/expression/learnt/{chat_id}/expressions.json和/data/expression/personality/expressions.json
|
||||
返回(learnt_expressions, personality_expressions)
|
||||
"""
|
||||
learnt_file = os.path.join("data", "expression", "learnt", str(chat_id), "expressions.json")
|
||||
personality_file = os.path.join("data", "expression", "personality", "expressions.json")
|
||||
learnt_expressions = []
|
||||
personality_expressions = []
|
||||
if os.path.exists(learnt_file):
|
||||
with open(learnt_file, "r", encoding="utf-8") as f:
|
||||
learnt_expressions = json.load(f)
|
||||
if os.path.exists(personality_file):
|
||||
with open(personality_file, "r", encoding="utf-8") as f:
|
||||
personality_expressions = json.load(f)
|
||||
return learnt_expressions, personality_expressions
|
||||
|
||||
def is_similar(self, s1: str, s2: str) -> bool:
|
||||
"""
|
||||
@@ -85,7 +115,7 @@ class ExpressionLearner:
|
||||
chat_dict[chat_id].append({"situation": situation, "style": style})
|
||||
# 存储到/data/expression/对应chat_id/expressions.json
|
||||
for chat_id, expr_list in chat_dict.items():
|
||||
dir_path = os.path.join("data", "expression", str(chat_id))
|
||||
dir_path = os.path.join("data", "expression", "learnt", str(chat_id))
|
||||
os.makedirs(dir_path, exist_ok=True)
|
||||
file_path = os.path.join(dir_path, "expressions.json")
|
||||
# 若已存在,先读出合并
|
||||
@@ -188,6 +218,38 @@ class ExpressionLearner:
|
||||
expressions.append((chat_id, situation, style))
|
||||
return expressions
|
||||
|
||||
async def extract_and_store_personality_expressions(self):
|
||||
"""
|
||||
检查data/expression/personality目录,不存在则创建。
|
||||
用peronality变量作为chat_str,调用LLM生成表达风格,解析后count=100,存储到expressions.json。
|
||||
"""
|
||||
dir_path = os.path.join("data", "expression", "personality")
|
||||
os.makedirs(dir_path, exist_ok=True)
|
||||
file_path = os.path.join(dir_path, "expressions.json")
|
||||
|
||||
# 构建prompt
|
||||
prompt = await global_prompt_manager.format_prompt(
|
||||
"personality_expression_prompt",
|
||||
personality=peronality,
|
||||
)
|
||||
logger.info(f"个性表达方式提取prompt: {prompt}")
|
||||
response, _ = await self.express_learn_model.generate_response_async(prompt)
|
||||
logger.info(f"个性表达方式提取response: {response}")
|
||||
# chat_id用personality
|
||||
expressions = self.parse_expression_response(response, "personality")
|
||||
# 转为dict并count=100
|
||||
result = []
|
||||
for _, situation, style in expressions:
|
||||
result.append({"situation": situation, "style": style, "count": 100})
|
||||
# 超过50条时随机删除多余的,只保留50条
|
||||
if len(result) > 50:
|
||||
remove_count = len(result) - 50
|
||||
remove_indices = set(random.sample(range(len(result)), remove_count))
|
||||
result = [item for idx, item in enumerate(result) if idx not in remove_indices]
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
json.dump(result, f, ensure_ascii=False, indent=2)
|
||||
logger.info(f"已写入{len(result)}条表达到{file_path}")
|
||||
|
||||
|
||||
init_prompt()
|
||||
|
||||
|
||||
@@ -246,14 +246,22 @@ async def _build_prompt_focus(
|
||||
structured_info_prompt = ""
|
||||
|
||||
# 从/data/expression/对应chat_id/expressions.json中读取表达方式
|
||||
expressions = await expression_learner.get_expression_by_chat_id(chat_stream.stream_id)
|
||||
if expressions:
|
||||
learnt_expressions, personality_expressions = await expression_learner.get_expression_by_chat_id(chat_stream.stream_id)
|
||||
language_habits = []
|
||||
for expr in expressions:
|
||||
# print(f"expr: {expr}")
|
||||
# 1. learnt_expressions加权随机选5条
|
||||
if learnt_expressions:
|
||||
weights = [expr["count"] for expr in learnt_expressions]
|
||||
selected_learnt = weighted_sample_no_replacement(learnt_expressions, weights, 5)
|
||||
for expr in selected_learnt:
|
||||
if isinstance(expr, dict) and "situation" in expr and "style" in expr:
|
||||
language_habits.append(f"当{expr['situation']}时,使用 {expr['style']}")
|
||||
else:
|
||||
# 2. personality_expressions随机选1条
|
||||
if personality_expressions:
|
||||
expr = random.choice(personality_expressions)
|
||||
if isinstance(expr, dict) and "situation" in expr and "style" in expr:
|
||||
language_habits.append(f"当{expr['situation']}时,使用 {expr['style']}")
|
||||
# 3. 如果都没有,给默认
|
||||
if not language_habits:
|
||||
language_habits = [
|
||||
"当表示惊叹时,使用 我嘞个xxxx",
|
||||
"当表示惊讶无语是,使用 不是?",
|
||||
@@ -264,8 +272,8 @@ async def _build_prompt_focus(
|
||||
"当你想表达某个观点,但不想明说,可以使用反讽",
|
||||
"当你想说明某个观点,但懒得明说,使用 懂的都懂",
|
||||
]
|
||||
selected = random.sample(language_habits, 8)
|
||||
language_habits_str = "\n".join(selected)
|
||||
language_habits = random.sample(language_habits, 6)
|
||||
language_habits_str = "\n".join(language_habits)
|
||||
|
||||
logger.debug("开始构建 focus prompt")
|
||||
|
||||
@@ -855,5 +863,37 @@ class PromptBuilder:
|
||||
return "[构建 Planner Prompt 时出错]"
|
||||
|
||||
|
||||
def weighted_sample_no_replacement(items, weights, k):
|
||||
"""
|
||||
加权且不放回地随机抽取k个元素。
|
||||
|
||||
参数:
|
||||
items: 待抽取的元素列表
|
||||
weights: 每个元素对应的权重(与items等长,且为正数)
|
||||
k: 需要抽取的元素个数
|
||||
返回:
|
||||
selected: 按权重加权且不重复抽取的k个元素组成的列表
|
||||
|
||||
实现思路:
|
||||
每次从当前池中按权重加权随机选出一个元素,选中后将其从池中移除,重复k次。
|
||||
这样保证了:
|
||||
1. count越大被选中概率越高
|
||||
2. 不会重复选中同一个元素
|
||||
"""
|
||||
selected = []
|
||||
pool = list(zip(items, weights))
|
||||
for _ in range(min(k, len(pool))):
|
||||
total = sum(w for _, w in pool)
|
||||
r = random.uniform(0, total)
|
||||
upto = 0
|
||||
for idx, (item, weight) in enumerate(pool):
|
||||
upto += weight
|
||||
if upto >= r:
|
||||
selected.append(item)
|
||||
pool.pop(idx)
|
||||
break
|
||||
return selected
|
||||
|
||||
|
||||
init_prompt()
|
||||
prompt_builder = PromptBuilder()
|
||||
|
||||
@@ -248,6 +248,12 @@ async def _build_readable_messages_internal(
|
||||
new_content += content[last_end:]
|
||||
content = new_content
|
||||
|
||||
target_str = "这是QQ的一个功能,用于提及某人,但没那么明显"
|
||||
if target_str in content:
|
||||
if random.random() < 0.6:
|
||||
content = content.replace(target_str, "")
|
||||
|
||||
if content != "":
|
||||
message_details_raw.append((timestamp, person_name, content))
|
||||
|
||||
if not message_details_raw:
|
||||
|
||||
Reference in New Issue
Block a user