feat:优化表达方式学习,太久没学的会抛弃,提供检查脚本
This commit is contained in:
@@ -12,6 +12,8 @@ import json
|
||||
|
||||
|
||||
MAX_EXPRESSION_COUNT = 300
|
||||
DECAY_DAYS = 30 # 30天衰减到0.01
|
||||
DECAY_MIN = 0.01 # 最小衰减值
|
||||
|
||||
logger = get_logger("expressor")
|
||||
|
||||
@@ -30,9 +32,10 @@ def init_prompt() -> None:
|
||||
当"xxx"时,可以"xxx", xxx不超过10个字
|
||||
|
||||
例如:
|
||||
当"表示十分惊叹"时,使用"我嘞个xxxx"
|
||||
当"表示十分惊叹,有些意外"时,使用"我嘞个xxxx"
|
||||
当"表示讽刺的赞同,不想讲道理"时,使用"对对对"
|
||||
当"想说明某个观点,但懒得明说",使用"懂的都懂"
|
||||
当"想说明某个观点,但懒得明说,或者不便明说",使用"懂的都懂"
|
||||
当"表示意外的夸赞,略带戏谑意味"时,使用"这么强!"
|
||||
|
||||
注意不要总结你自己(SELF)的发言
|
||||
现在请你概括
|
||||
@@ -109,16 +112,62 @@ class ExpressionLearner:
|
||||
"""
|
||||
学习并存储表达方式,分别学习语言风格和句法特点
|
||||
"""
|
||||
learnt_style: Optional[List[Tuple[str, str, str]]] = await self.learn_and_store(type="style", num=15)
|
||||
if not learnt_style:
|
||||
return []
|
||||
for i in range(3):
|
||||
learnt_style: Optional[List[Tuple[str, str, str]]] = await self.learn_and_store(type="style", num=15)
|
||||
if not learnt_style:
|
||||
return []
|
||||
|
||||
learnt_grammar: Optional[List[Tuple[str, str, str]]] = await self.learn_and_store(type="grammar", num=15)
|
||||
if not learnt_grammar:
|
||||
return []
|
||||
for i in range(1):
|
||||
learnt_grammar: Optional[List[Tuple[str, str, str]]] = await self.learn_and_store(type="grammar", num=15)
|
||||
if not learnt_grammar:
|
||||
return []
|
||||
|
||||
return learnt_style, learnt_grammar
|
||||
|
||||
def calculate_decay_factor(self, time_diff_days: float) -> float:
|
||||
"""
|
||||
计算衰减因子
|
||||
当时间差为0天或30天时,衰减值为0.01
|
||||
当时间差为7天时,衰减值为1.0
|
||||
使用二次函数进行曲线插值
|
||||
"""
|
||||
if time_diff_days <= 0 or time_diff_days >= DECAY_DAYS:
|
||||
return DECAY_MIN
|
||||
|
||||
# 使用二次函数进行插值
|
||||
# 将7天作为顶点,0天和30天作为两个端点
|
||||
# 使用顶点式:y = a(x-h)^2 + k,其中(h,k)为顶点
|
||||
h = 7.0 # 顶点x坐标
|
||||
k = 1.0 # 顶点y坐标
|
||||
|
||||
# 计算a值,使得x=0和x=30时y=0.01
|
||||
# 0.01 = a(0-7)^2 + 1
|
||||
# 0.01 = a(30-7)^2 + 1
|
||||
# 解得a = -0.99/49
|
||||
a = -0.99 / 49
|
||||
|
||||
# 计算衰减因子
|
||||
decay = a * (time_diff_days - h) ** 2 + k
|
||||
return max(DECAY_MIN, min(1.0, decay))
|
||||
|
||||
def apply_decay_to_expressions(self, expressions: List[Dict[str, Any]], current_time: float) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
对表达式列表应用衰减
|
||||
返回衰减后的表达式列表,移除count小于0的项
|
||||
"""
|
||||
result = []
|
||||
for expr in expressions:
|
||||
last_active = expr.get("last_active_time", current_time)
|
||||
time_diff_days = (current_time - last_active) / (24 * 3600) # 转换为天
|
||||
|
||||
decay_factor = self.calculate_decay_factor(time_diff_days)
|
||||
expr["count"] = expr.get("count", 1) * decay_factor
|
||||
|
||||
if expr["count"] > 0:
|
||||
result.append(expr)
|
||||
|
||||
return result
|
||||
|
||||
async def learn_and_store(self, type: str, num: int = 10) -> List[Tuple[str, str, str]]:
|
||||
"""
|
||||
选择从当前到最近1小时内的随机num条消息,然后学习这些消息的表达方式
|
||||
@@ -130,7 +179,7 @@ class ExpressionLearner:
|
||||
type_str = "句法特点"
|
||||
else:
|
||||
raise ValueError(f"Invalid type: {type}")
|
||||
# logger.info(f"开始学习{type_str}...")
|
||||
|
||||
res = await self.learn_expression(type, num)
|
||||
|
||||
if res is None:
|
||||
@@ -146,7 +195,6 @@ class ExpressionLearner:
|
||||
for _chat_id, situation, style in learnt_expressions:
|
||||
learnt_expressions_str += f"{situation}->{style}\n"
|
||||
logger.info(f"在 {group_name} 学习到{type_str}:\n{learnt_expressions_str}")
|
||||
# learnt_expressions: List[(chat_id, situation, style)]
|
||||
|
||||
if not learnt_expressions:
|
||||
logger.info(f"没有学习到{type_str}")
|
||||
@@ -158,29 +206,27 @@ class ExpressionLearner:
|
||||
if chat_id not in chat_dict:
|
||||
chat_dict[chat_id] = []
|
||||
chat_dict[chat_id].append({"situation": situation, "style": style})
|
||||
|
||||
current_time = time.time()
|
||||
|
||||
# 存储到/data/expression/对应chat_id/expressions.json
|
||||
for chat_id, expr_list in chat_dict.items():
|
||||
dir_path = os.path.join("data", "expression", f"learnt_{type}", str(chat_id))
|
||||
os.makedirs(dir_path, exist_ok=True)
|
||||
file_path = os.path.join(dir_path, "expressions.json")
|
||||
|
||||
# 若已存在,先读出合并
|
||||
old_data: List[Dict[str, Any]] = []
|
||||
if os.path.exists(file_path):
|
||||
old_data: List[Dict[str, str, str]] = []
|
||||
try:
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
old_data = json.load(f)
|
||||
except Exception:
|
||||
old_data = []
|
||||
else:
|
||||
old_data = []
|
||||
# 超过最大数量时,20%概率移除count=1的项
|
||||
if len(old_data) >= MAX_EXPRESSION_COUNT:
|
||||
new_old_data = []
|
||||
for item in old_data:
|
||||
if item.get("count", 1) == 1 and random.random() < 0.2:
|
||||
continue # 20%概率移除
|
||||
new_old_data.append(item)
|
||||
old_data = new_old_data
|
||||
|
||||
# 应用衰减
|
||||
old_data = self.apply_decay_to_expressions(old_data, current_time)
|
||||
|
||||
# 合并逻辑
|
||||
for new_expr in expr_list:
|
||||
found = False
|
||||
@@ -194,12 +240,16 @@ class ExpressionLearner:
|
||||
old_expr["situation"] = new_expr["situation"]
|
||||
old_expr["style"] = new_expr["style"]
|
||||
old_expr["count"] = old_expr.get("count", 1) + 1
|
||||
old_expr["last_active_time"] = current_time
|
||||
break
|
||||
if not found:
|
||||
new_expr["count"] = 1
|
||||
new_expr["last_active_time"] = current_time
|
||||
old_data.append(new_expr)
|
||||
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
json.dump(old_data, f, ensure_ascii=False, indent=2)
|
||||
|
||||
return learnt_expressions
|
||||
|
||||
async def learn_expression(self, type: str, num: int = 10) -> Optional[Tuple[List[Tuple[str, str, str]], str]]:
|
||||
|
||||
@@ -49,7 +49,7 @@ class RelationshipProcessor(BaseProcessor):
|
||||
self.llm_model = LLMRequest(
|
||||
model=global_config.model.relation,
|
||||
max_tokens=800,
|
||||
request_type="focus.processor.self_identify",
|
||||
request_type="relation",
|
||||
)
|
||||
|
||||
name = chat_manager.get_stream_name(self.subheartflow_id)
|
||||
|
||||
@@ -7,15 +7,18 @@ from typing import List, Tuple
|
||||
import os
|
||||
import json
|
||||
from datetime import datetime
|
||||
from src.individuality.individuality import individuality
|
||||
|
||||
logger = get_logger("expressor")
|
||||
|
||||
|
||||
def init_prompt() -> None:
|
||||
personality_expression_prompt = """
|
||||
{personality}
|
||||
你的人物设定:{personality}
|
||||
|
||||
请从以上人设中总结出这个角色可能的语言风格,你必须严格根据人设引申,不要输出例子
|
||||
你说话的表达方式:{expression_style}
|
||||
|
||||
请从以上表达方式中总结出这个角色可能的语言风格,你必须严格根据人设引申,不要输出例子
|
||||
思考回复的特殊内容和情感
|
||||
思考有没有特殊的梗,一并总结成语言风格
|
||||
总结成如下格式的规律,总结的内容要详细,但具有概括性:
|
||||
@@ -80,19 +83,27 @@ class PersonalityExpression:
|
||||
"""
|
||||
检查data/expression/personality目录,不存在则创建。
|
||||
用peronality变量作为chat_str,调用LLM生成表达风格,解析后count=100,存储到expressions.json。
|
||||
如果expression_style发生变化,则删除旧的expressions.json并重置计数。
|
||||
如果expression_style、personality或identity发生变化,则删除旧的expressions.json并重置计数。
|
||||
对于相同的expression_style,最多计算self.max_calculations次。
|
||||
"""
|
||||
os.makedirs(os.path.dirname(self.expressions_file_path), exist_ok=True)
|
||||
|
||||
current_style_text = global_config.expression.expression_style
|
||||
current_personality = individuality.get_personality_prompt(x_person=2, level=2)
|
||||
current_identity = individuality.get_identity_prompt(x_person=2, level=2)
|
||||
|
||||
meta_data = self._read_meta_data()
|
||||
|
||||
last_style_text = meta_data.get("last_style_text")
|
||||
last_personality = meta_data.get("last_personality")
|
||||
last_identity = meta_data.get("last_identity")
|
||||
count = meta_data.get("count", 0)
|
||||
|
||||
if current_style_text != last_style_text:
|
||||
logger.info(f"表达风格已从 '{last_style_text}' 变为 '{current_style_text}'。重置计数。")
|
||||
# 检查是否有任何变化
|
||||
if (current_style_text != last_style_text or
|
||||
current_personality != last_personality or
|
||||
current_identity != last_identity):
|
||||
logger.info(f"检测到变化:\n风格: '{last_style_text}' -> '{current_style_text}'\n人格: '{last_personality}' -> '{current_personality}'\n身份: '{last_identity}' -> '{current_identity}'")
|
||||
count = 0
|
||||
if os.path.exists(self.expressions_file_path):
|
||||
try:
|
||||
@@ -102,11 +113,13 @@ class PersonalityExpression:
|
||||
logger.error(f"删除旧的表达文件 {self.expressions_file_path} 失败: {e}")
|
||||
|
||||
if count >= self.max_calculations:
|
||||
logger.debug(f"对于风格 '{current_style_text}' 已达到最大计算次数 ({self.max_calculations})。跳过提取。")
|
||||
# 即使跳过,也更新元数据以反映当前风格已被识别且计数已满
|
||||
logger.debug(f"对于当前配置已达到最大计算次数 ({self.max_calculations})。跳过提取。")
|
||||
# 即使跳过,也更新元数据以反映当前配置已被识别且计数已满
|
||||
self._write_meta_data(
|
||||
{
|
||||
"last_style_text": current_style_text,
|
||||
"last_personality": current_personality,
|
||||
"last_identity": current_identity,
|
||||
"count": count,
|
||||
"last_update_time": meta_data.get("last_update_time"),
|
||||
}
|
||||
@@ -116,18 +129,20 @@ class PersonalityExpression:
|
||||
# 构建prompt
|
||||
prompt = await global_prompt_manager.format_prompt(
|
||||
"personality_expression_prompt",
|
||||
personality=current_style_text,
|
||||
personality=current_personality,
|
||||
expression_style=current_style_text,
|
||||
)
|
||||
# logger.info(f"个性表达方式提取prompt: {prompt}")
|
||||
|
||||
try:
|
||||
response, _ = await self.express_learn_model.generate_response_async(prompt)
|
||||
except Exception as e:
|
||||
logger.error(f"个性表达方式提取失败: {e}")
|
||||
# 如果提取失败,保存当前的风格和未增加的计数
|
||||
# 如果提取失败,保存当前的配置和未增加的计数
|
||||
self._write_meta_data(
|
||||
{
|
||||
"last_style_text": current_style_text,
|
||||
"last_personality": current_personality,
|
||||
"last_identity": current_identity,
|
||||
"count": count,
|
||||
"last_update_time": meta_data.get("last_update_time"),
|
||||
}
|
||||
@@ -135,7 +150,6 @@ class PersonalityExpression:
|
||||
return
|
||||
|
||||
logger.info(f"个性表达方式提取response: {response}")
|
||||
# chat_id用personality
|
||||
|
||||
# 转为dict并count=100
|
||||
if response != "":
|
||||
@@ -183,9 +197,15 @@ class PersonalityExpression:
|
||||
count += 1
|
||||
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
self._write_meta_data(
|
||||
{"last_style_text": current_style_text, "count": count, "last_update_time": current_time}
|
||||
{
|
||||
"last_style_text": current_style_text,
|
||||
"last_personality": current_personality,
|
||||
"last_identity": current_identity,
|
||||
"count": count,
|
||||
"last_update_time": current_time
|
||||
}
|
||||
)
|
||||
logger.info(f"成功处理。风格 '{current_style_text}' 的计数现在是 {count},最后更新时间:{current_time}。")
|
||||
logger.info(f"成功处理。当前配置的计数现在是 {count},最后更新时间:{current_time}。")
|
||||
else:
|
||||
logger.warning(f"个性表达方式提取失败,模型返回空内容: {response}")
|
||||
|
||||
|
||||
@@ -17,12 +17,12 @@ class ImpressionUpdateTask(AsyncTask):
|
||||
super().__init__(
|
||||
task_name="impression_update",
|
||||
wait_before_start=5, # 启动后等待10秒
|
||||
run_interval=10, # 每1分钟运行一次
|
||||
run_interval=20, # 每1分钟运行一次
|
||||
)
|
||||
|
||||
async def run(self):
|
||||
try:
|
||||
if random.random() < 0.5:
|
||||
if random.random() < 0.1:
|
||||
# 获取最近10分钟的消息
|
||||
current_time = int(time.time())
|
||||
start_time = current_time - 6000 # 10分钟前
|
||||
@@ -30,7 +30,7 @@ class ImpressionUpdateTask(AsyncTask):
|
||||
else:
|
||||
now = int(time.time())
|
||||
# 30天前的时间戳
|
||||
month_ago = now - 30 * 24 * 60 * 60
|
||||
month_ago = now - 90 * 24 * 60 * 60
|
||||
# 随机选择一个小时的起点
|
||||
random_start = random.randint(month_ago, now - 3600)
|
||||
start_time = random_start
|
||||
|
||||
@@ -228,7 +228,7 @@ class RelationshipManager:
|
||||
readable_messages = build_readable_messages(
|
||||
messages=user_messages,
|
||||
replace_bot_name=True,
|
||||
timestamp_mode="relative",
|
||||
timestamp_mode="normal",
|
||||
truncate=False)
|
||||
|
||||
|
||||
@@ -263,7 +263,8 @@ class RelationshipManager:
|
||||
|
||||
new_impression, _ = await self.relationship_llm.generate_response_async(prompt=prompt)
|
||||
|
||||
logger.debug(f"new_impression: {new_impression}")
|
||||
logger.info(f"prompt: {prompt}")
|
||||
logger.info(f"new_impression: {new_impression}")
|
||||
|
||||
prompt_json = f"""
|
||||
你的名字是{global_config.bot.nickname},别名是{alias_str}。
|
||||
@@ -274,8 +275,8 @@ class RelationshipManager:
|
||||
|
||||
请用json格式总结对{person_name}(昵称:{nickname})的印象,要求:
|
||||
1.总结出这个人的最核心的性格,可能在这段话里看不出,总结不出来的话,就输出空字符串
|
||||
2.尝试猜测这个人的性别,如果看不出来,就输出空字符串
|
||||
3.尝试猜测自己与这个人的关系,你与ta的交互,还可以思考是积极还是消极,以及具体内容
|
||||
2.尝试猜测这个人的性别
|
||||
3.尝试猜测自己与这个人的关系,你与ta的交互,思考是积极还是消极,以及具体内容
|
||||
4.尝试猜测这个人的身份,比如职业,兴趣爱好,生活状态等
|
||||
5.尝试总结你与他之间是否有一些独特的梗,如果有,就输出梗的内容,如果没有,就输出空字符串
|
||||
|
||||
|
||||
Reference in New Issue
Block a user