feat:优化表达方式学习,太久没学的会抛弃,提供检查脚本
This commit is contained in:
203
scripts/analyze_expressions.py
Normal file
203
scripts/analyze_expressions.py
Normal file
@@ -0,0 +1,203 @@
|
|||||||
|
import os
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import re
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Dict, List, Any
|
||||||
|
import pandas as pd
|
||||||
|
from pathlib import Path
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
|
def clean_group_name(name: str) -> str:
|
||||||
|
"""清理群组名称,只保留中文和英文字符"""
|
||||||
|
# 提取中文和英文字符
|
||||||
|
cleaned = re.sub(r'[^\u4e00-\u9fa5a-zA-Z]', '', name)
|
||||||
|
# 如果清理后为空,使用当前日期
|
||||||
|
if not cleaned:
|
||||||
|
cleaned = datetime.now().strftime("%Y%m%d")
|
||||||
|
return cleaned
|
||||||
|
|
||||||
|
def get_group_name(stream_id: str) -> str:
|
||||||
|
"""从数据库中获取群组名称"""
|
||||||
|
conn = sqlite3.connect("data/maibot.db")
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
cursor.execute(
|
||||||
|
"""
|
||||||
|
SELECT group_name, user_nickname, platform
|
||||||
|
FROM chat_streams
|
||||||
|
WHERE stream_id = ?
|
||||||
|
""",
|
||||||
|
(stream_id,),
|
||||||
|
)
|
||||||
|
|
||||||
|
result = cursor.fetchone()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
if result:
|
||||||
|
group_name, user_nickname, platform = result
|
||||||
|
if group_name:
|
||||||
|
return clean_group_name(group_name)
|
||||||
|
if user_nickname:
|
||||||
|
return clean_group_name(user_nickname)
|
||||||
|
if platform:
|
||||||
|
return clean_group_name(f"{platform}{stream_id[:8]}")
|
||||||
|
return stream_id
|
||||||
|
|
||||||
|
def load_expressions(chat_id: str) -> tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Dict[str, Any]]]:
|
||||||
|
"""加载指定群组的表达方式"""
|
||||||
|
learnt_style_file = os.path.join("data", "expression", "learnt_style", str(chat_id), "expressions.json")
|
||||||
|
learnt_grammar_file = os.path.join("data", "expression", "learnt_grammar", str(chat_id), "expressions.json")
|
||||||
|
personality_file = os.path.join("data", "expression", "personality", "expressions.json")
|
||||||
|
|
||||||
|
style_expressions = []
|
||||||
|
grammar_expressions = []
|
||||||
|
personality_expressions = []
|
||||||
|
|
||||||
|
if os.path.exists(learnt_style_file):
|
||||||
|
with open(learnt_style_file, "r", encoding="utf-8") as f:
|
||||||
|
style_expressions = json.load(f)
|
||||||
|
|
||||||
|
if os.path.exists(learnt_grammar_file):
|
||||||
|
with open(learnt_grammar_file, "r", encoding="utf-8") as f:
|
||||||
|
grammar_expressions = json.load(f)
|
||||||
|
|
||||||
|
if os.path.exists(personality_file):
|
||||||
|
with open(personality_file, "r", encoding="utf-8") as f:
|
||||||
|
personality_expressions = json.load(f)
|
||||||
|
|
||||||
|
return style_expressions, grammar_expressions, personality_expressions
|
||||||
|
|
||||||
|
def format_time(timestamp: float) -> str:
|
||||||
|
"""格式化时间戳为可读字符串"""
|
||||||
|
return datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
||||||
|
def write_expressions(f, expressions: List[Dict[str, Any]], title: str):
|
||||||
|
"""写入表达方式列表"""
|
||||||
|
if not expressions:
|
||||||
|
f.write(f"{title}:暂无数据\n")
|
||||||
|
f.write("-" * 40 + "\n")
|
||||||
|
return
|
||||||
|
|
||||||
|
f.write(f"{title}:\n")
|
||||||
|
for expr in expressions:
|
||||||
|
count = expr.get("count", 0)
|
||||||
|
last_active = expr.get("last_active_time", time.time())
|
||||||
|
f.write(f"场景: {expr['situation']}\n")
|
||||||
|
f.write(f"表达: {expr['style']}\n")
|
||||||
|
f.write(f"计数: {count:.2f}\n")
|
||||||
|
f.write(f"最后活跃: {format_time(last_active)}\n")
|
||||||
|
f.write("-" * 40 + "\n")
|
||||||
|
|
||||||
|
def write_group_report(group_file: str, group_name: str, chat_id: str, style_exprs: List[Dict[str, Any]], grammar_exprs: List[Dict[str, Any]]):
|
||||||
|
"""写入群组详细报告"""
|
||||||
|
with open(group_file, "w", encoding="utf-8") as gf:
|
||||||
|
gf.write(f"群组: {group_name} (ID: {chat_id})\n")
|
||||||
|
gf.write("=" * 80 + "\n\n")
|
||||||
|
|
||||||
|
# 写入语言风格
|
||||||
|
gf.write("【语言风格】\n")
|
||||||
|
gf.write("=" * 40 + "\n")
|
||||||
|
write_expressions(gf, style_exprs, "语言风格")
|
||||||
|
gf.write("\n")
|
||||||
|
|
||||||
|
# 写入句法特点
|
||||||
|
gf.write("【句法特点】\n")
|
||||||
|
gf.write("=" * 40 + "\n")
|
||||||
|
write_expressions(gf, grammar_exprs, "句法特点")
|
||||||
|
|
||||||
|
def analyze_expressions():
|
||||||
|
"""分析所有群组的表达方式"""
|
||||||
|
# 获取所有群组ID
|
||||||
|
style_dir = os.path.join("data", "expression", "learnt_style")
|
||||||
|
chat_ids = [d for d in os.listdir(style_dir) if os.path.isdir(os.path.join(style_dir, d))]
|
||||||
|
|
||||||
|
# 创建输出目录
|
||||||
|
output_dir = "data/expression_analysis"
|
||||||
|
personality_dir = os.path.join(output_dir, "personality")
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
os.makedirs(personality_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# 生成时间戳
|
||||||
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
|
||||||
|
# 创建总报告
|
||||||
|
summary_file = os.path.join(output_dir, f"summary_{timestamp}.txt")
|
||||||
|
with open(summary_file, "w", encoding="utf-8") as f:
|
||||||
|
f.write(f"表达方式分析报告 - 生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
||||||
|
f.write("=" * 80 + "\n\n")
|
||||||
|
|
||||||
|
# 先处理人格表达
|
||||||
|
personality_exprs = []
|
||||||
|
personality_file = os.path.join("data", "expression", "personality", "expressions.json")
|
||||||
|
if os.path.exists(personality_file):
|
||||||
|
with open(personality_file, "r", encoding="utf-8") as pf:
|
||||||
|
personality_exprs = json.load(pf)
|
||||||
|
|
||||||
|
# 保存人格表达总数
|
||||||
|
total_personality = len(personality_exprs)
|
||||||
|
|
||||||
|
# 排序并取前20条
|
||||||
|
personality_exprs.sort(key=lambda x: x.get("count", 0), reverse=True)
|
||||||
|
personality_exprs = personality_exprs[:20]
|
||||||
|
|
||||||
|
# 写入人格表达报告
|
||||||
|
personality_report = os.path.join(personality_dir, f"expressions_{timestamp}.txt")
|
||||||
|
with open(personality_report, "w", encoding="utf-8") as pf:
|
||||||
|
pf.write("【人格表达方式】\n")
|
||||||
|
pf.write("=" * 40 + "\n")
|
||||||
|
write_expressions(pf, personality_exprs, "人格表达")
|
||||||
|
|
||||||
|
# 写入总报告摘要中的人格表达部分
|
||||||
|
f.write("【人格表达方式】\n")
|
||||||
|
f.write("=" * 40 + "\n")
|
||||||
|
f.write(f"人格表达总数: {total_personality} (显示前20条)\n")
|
||||||
|
f.write(f"详细报告: {personality_report}\n")
|
||||||
|
f.write("-" * 40 + "\n\n")
|
||||||
|
|
||||||
|
# 处理各个群组的表达方式
|
||||||
|
f.write("【群组表达方式】\n")
|
||||||
|
f.write("=" * 40 + "\n\n")
|
||||||
|
|
||||||
|
for chat_id in chat_ids:
|
||||||
|
style_exprs, grammar_exprs, _ = load_expressions(chat_id)
|
||||||
|
|
||||||
|
# 保存总数
|
||||||
|
total_style = len(style_exprs)
|
||||||
|
total_grammar = len(grammar_exprs)
|
||||||
|
|
||||||
|
# 分别排序
|
||||||
|
style_exprs.sort(key=lambda x: x.get("count", 0), reverse=True)
|
||||||
|
grammar_exprs.sort(key=lambda x: x.get("count", 0), reverse=True)
|
||||||
|
|
||||||
|
# 只取前20条
|
||||||
|
style_exprs = style_exprs[:20]
|
||||||
|
grammar_exprs = grammar_exprs[:20]
|
||||||
|
|
||||||
|
# 获取群组名称
|
||||||
|
group_name = get_group_name(chat_id)
|
||||||
|
|
||||||
|
# 创建群组子目录(使用清理后的名称)
|
||||||
|
safe_group_name = clean_group_name(group_name)
|
||||||
|
group_dir = os.path.join(output_dir, f"{safe_group_name}_{chat_id}")
|
||||||
|
os.makedirs(group_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# 写入群组详细报告
|
||||||
|
group_file = os.path.join(group_dir, f"expressions_{timestamp}.txt")
|
||||||
|
write_group_report(group_file, group_name, chat_id, style_exprs, grammar_exprs)
|
||||||
|
|
||||||
|
# 写入总报告摘要
|
||||||
|
f.write(f"群组: {group_name} (ID: {chat_id})\n")
|
||||||
|
f.write("-" * 40 + "\n")
|
||||||
|
f.write(f"语言风格总数: {total_style} (显示前20条)\n")
|
||||||
|
f.write(f"句法特点总数: {total_grammar} (显示前20条)\n")
|
||||||
|
f.write(f"详细报告: {group_file}\n")
|
||||||
|
f.write("-" * 40 + "\n\n")
|
||||||
|
|
||||||
|
print(f"分析报告已生成:")
|
||||||
|
print(f"总报告: {summary_file}")
|
||||||
|
print(f"人格表达报告: {personality_report}")
|
||||||
|
print(f"各群组详细报告位于: {output_dir}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
analyze_expressions()
|
||||||
@@ -12,6 +12,8 @@ import json
|
|||||||
|
|
||||||
|
|
||||||
MAX_EXPRESSION_COUNT = 300
|
MAX_EXPRESSION_COUNT = 300
|
||||||
|
DECAY_DAYS = 30 # 30天衰减到0.01
|
||||||
|
DECAY_MIN = 0.01 # 最小衰减值
|
||||||
|
|
||||||
logger = get_logger("expressor")
|
logger = get_logger("expressor")
|
||||||
|
|
||||||
@@ -30,9 +32,10 @@ def init_prompt() -> None:
|
|||||||
当"xxx"时,可以"xxx", xxx不超过10个字
|
当"xxx"时,可以"xxx", xxx不超过10个字
|
||||||
|
|
||||||
例如:
|
例如:
|
||||||
当"表示十分惊叹"时,使用"我嘞个xxxx"
|
当"表示十分惊叹,有些意外"时,使用"我嘞个xxxx"
|
||||||
当"表示讽刺的赞同,不想讲道理"时,使用"对对对"
|
当"表示讽刺的赞同,不想讲道理"时,使用"对对对"
|
||||||
当"想说明某个观点,但懒得明说",使用"懂的都懂"
|
当"想说明某个观点,但懒得明说,或者不便明说",使用"懂的都懂"
|
||||||
|
当"表示意外的夸赞,略带戏谑意味"时,使用"这么强!"
|
||||||
|
|
||||||
注意不要总结你自己(SELF)的发言
|
注意不要总结你自己(SELF)的发言
|
||||||
现在请你概括
|
现在请你概括
|
||||||
@@ -109,16 +112,62 @@ class ExpressionLearner:
|
|||||||
"""
|
"""
|
||||||
学习并存储表达方式,分别学习语言风格和句法特点
|
学习并存储表达方式,分别学习语言风格和句法特点
|
||||||
"""
|
"""
|
||||||
|
for i in range(3):
|
||||||
learnt_style: Optional[List[Tuple[str, str, str]]] = await self.learn_and_store(type="style", num=15)
|
learnt_style: Optional[List[Tuple[str, str, str]]] = await self.learn_and_store(type="style", num=15)
|
||||||
if not learnt_style:
|
if not learnt_style:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
for i in range(1):
|
||||||
learnt_grammar: Optional[List[Tuple[str, str, str]]] = await self.learn_and_store(type="grammar", num=15)
|
learnt_grammar: Optional[List[Tuple[str, str, str]]] = await self.learn_and_store(type="grammar", num=15)
|
||||||
if not learnt_grammar:
|
if not learnt_grammar:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
return learnt_style, learnt_grammar
|
return learnt_style, learnt_grammar
|
||||||
|
|
||||||
|
def calculate_decay_factor(self, time_diff_days: float) -> float:
|
||||||
|
"""
|
||||||
|
计算衰减因子
|
||||||
|
当时间差为0天或30天时,衰减值为0.01
|
||||||
|
当时间差为7天时,衰减值为1.0
|
||||||
|
使用二次函数进行曲线插值
|
||||||
|
"""
|
||||||
|
if time_diff_days <= 0 or time_diff_days >= DECAY_DAYS:
|
||||||
|
return DECAY_MIN
|
||||||
|
|
||||||
|
# 使用二次函数进行插值
|
||||||
|
# 将7天作为顶点,0天和30天作为两个端点
|
||||||
|
# 使用顶点式:y = a(x-h)^2 + k,其中(h,k)为顶点
|
||||||
|
h = 7.0 # 顶点x坐标
|
||||||
|
k = 1.0 # 顶点y坐标
|
||||||
|
|
||||||
|
# 计算a值,使得x=0和x=30时y=0.01
|
||||||
|
# 0.01 = a(0-7)^2 + 1
|
||||||
|
# 0.01 = a(30-7)^2 + 1
|
||||||
|
# 解得a = -0.99/49
|
||||||
|
a = -0.99 / 49
|
||||||
|
|
||||||
|
# 计算衰减因子
|
||||||
|
decay = a * (time_diff_days - h) ** 2 + k
|
||||||
|
return max(DECAY_MIN, min(1.0, decay))
|
||||||
|
|
||||||
|
def apply_decay_to_expressions(self, expressions: List[Dict[str, Any]], current_time: float) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
对表达式列表应用衰减
|
||||||
|
返回衰减后的表达式列表,移除count小于0的项
|
||||||
|
"""
|
||||||
|
result = []
|
||||||
|
for expr in expressions:
|
||||||
|
last_active = expr.get("last_active_time", current_time)
|
||||||
|
time_diff_days = (current_time - last_active) / (24 * 3600) # 转换为天
|
||||||
|
|
||||||
|
decay_factor = self.calculate_decay_factor(time_diff_days)
|
||||||
|
expr["count"] = expr.get("count", 1) * decay_factor
|
||||||
|
|
||||||
|
if expr["count"] > 0:
|
||||||
|
result.append(expr)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
async def learn_and_store(self, type: str, num: int = 10) -> List[Tuple[str, str, str]]:
|
async def learn_and_store(self, type: str, num: int = 10) -> List[Tuple[str, str, str]]:
|
||||||
"""
|
"""
|
||||||
选择从当前到最近1小时内的随机num条消息,然后学习这些消息的表达方式
|
选择从当前到最近1小时内的随机num条消息,然后学习这些消息的表达方式
|
||||||
@@ -130,7 +179,7 @@ class ExpressionLearner:
|
|||||||
type_str = "句法特点"
|
type_str = "句法特点"
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Invalid type: {type}")
|
raise ValueError(f"Invalid type: {type}")
|
||||||
# logger.info(f"开始学习{type_str}...")
|
|
||||||
res = await self.learn_expression(type, num)
|
res = await self.learn_expression(type, num)
|
||||||
|
|
||||||
if res is None:
|
if res is None:
|
||||||
@@ -146,7 +195,6 @@ class ExpressionLearner:
|
|||||||
for _chat_id, situation, style in learnt_expressions:
|
for _chat_id, situation, style in learnt_expressions:
|
||||||
learnt_expressions_str += f"{situation}->{style}\n"
|
learnt_expressions_str += f"{situation}->{style}\n"
|
||||||
logger.info(f"在 {group_name} 学习到{type_str}:\n{learnt_expressions_str}")
|
logger.info(f"在 {group_name} 学习到{type_str}:\n{learnt_expressions_str}")
|
||||||
# learnt_expressions: List[(chat_id, situation, style)]
|
|
||||||
|
|
||||||
if not learnt_expressions:
|
if not learnt_expressions:
|
||||||
logger.info(f"没有学习到{type_str}")
|
logger.info(f"没有学习到{type_str}")
|
||||||
@@ -158,29 +206,27 @@ class ExpressionLearner:
|
|||||||
if chat_id not in chat_dict:
|
if chat_id not in chat_dict:
|
||||||
chat_dict[chat_id] = []
|
chat_dict[chat_id] = []
|
||||||
chat_dict[chat_id].append({"situation": situation, "style": style})
|
chat_dict[chat_id].append({"situation": situation, "style": style})
|
||||||
|
|
||||||
|
current_time = time.time()
|
||||||
|
|
||||||
# 存储到/data/expression/对应chat_id/expressions.json
|
# 存储到/data/expression/对应chat_id/expressions.json
|
||||||
for chat_id, expr_list in chat_dict.items():
|
for chat_id, expr_list in chat_dict.items():
|
||||||
dir_path = os.path.join("data", "expression", f"learnt_{type}", str(chat_id))
|
dir_path = os.path.join("data", "expression", f"learnt_{type}", str(chat_id))
|
||||||
os.makedirs(dir_path, exist_ok=True)
|
os.makedirs(dir_path, exist_ok=True)
|
||||||
file_path = os.path.join(dir_path, "expressions.json")
|
file_path = os.path.join(dir_path, "expressions.json")
|
||||||
|
|
||||||
# 若已存在,先读出合并
|
# 若已存在,先读出合并
|
||||||
|
old_data: List[Dict[str, Any]] = []
|
||||||
if os.path.exists(file_path):
|
if os.path.exists(file_path):
|
||||||
old_data: List[Dict[str, str, str]] = []
|
|
||||||
try:
|
try:
|
||||||
with open(file_path, "r", encoding="utf-8") as f:
|
with open(file_path, "r", encoding="utf-8") as f:
|
||||||
old_data = json.load(f)
|
old_data = json.load(f)
|
||||||
except Exception:
|
except Exception:
|
||||||
old_data = []
|
old_data = []
|
||||||
else:
|
|
||||||
old_data = []
|
# 应用衰减
|
||||||
# 超过最大数量时,20%概率移除count=1的项
|
old_data = self.apply_decay_to_expressions(old_data, current_time)
|
||||||
if len(old_data) >= MAX_EXPRESSION_COUNT:
|
|
||||||
new_old_data = []
|
|
||||||
for item in old_data:
|
|
||||||
if item.get("count", 1) == 1 and random.random() < 0.2:
|
|
||||||
continue # 20%概率移除
|
|
||||||
new_old_data.append(item)
|
|
||||||
old_data = new_old_data
|
|
||||||
# 合并逻辑
|
# 合并逻辑
|
||||||
for new_expr in expr_list:
|
for new_expr in expr_list:
|
||||||
found = False
|
found = False
|
||||||
@@ -194,12 +240,16 @@ class ExpressionLearner:
|
|||||||
old_expr["situation"] = new_expr["situation"]
|
old_expr["situation"] = new_expr["situation"]
|
||||||
old_expr["style"] = new_expr["style"]
|
old_expr["style"] = new_expr["style"]
|
||||||
old_expr["count"] = old_expr.get("count", 1) + 1
|
old_expr["count"] = old_expr.get("count", 1) + 1
|
||||||
|
old_expr["last_active_time"] = current_time
|
||||||
break
|
break
|
||||||
if not found:
|
if not found:
|
||||||
new_expr["count"] = 1
|
new_expr["count"] = 1
|
||||||
|
new_expr["last_active_time"] = current_time
|
||||||
old_data.append(new_expr)
|
old_data.append(new_expr)
|
||||||
|
|
||||||
with open(file_path, "w", encoding="utf-8") as f:
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
json.dump(old_data, f, ensure_ascii=False, indent=2)
|
json.dump(old_data, f, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
return learnt_expressions
|
return learnt_expressions
|
||||||
|
|
||||||
async def learn_expression(self, type: str, num: int = 10) -> Optional[Tuple[List[Tuple[str, str, str]], str]]:
|
async def learn_expression(self, type: str, num: int = 10) -> Optional[Tuple[List[Tuple[str, str, str]], str]]:
|
||||||
|
|||||||
@@ -49,7 +49,7 @@ class RelationshipProcessor(BaseProcessor):
|
|||||||
self.llm_model = LLMRequest(
|
self.llm_model = LLMRequest(
|
||||||
model=global_config.model.relation,
|
model=global_config.model.relation,
|
||||||
max_tokens=800,
|
max_tokens=800,
|
||||||
request_type="focus.processor.self_identify",
|
request_type="relation",
|
||||||
)
|
)
|
||||||
|
|
||||||
name = chat_manager.get_stream_name(self.subheartflow_id)
|
name = chat_manager.get_stream_name(self.subheartflow_id)
|
||||||
|
|||||||
@@ -7,15 +7,18 @@ from typing import List, Tuple
|
|||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from src.individuality.individuality import individuality
|
||||||
|
|
||||||
logger = get_logger("expressor")
|
logger = get_logger("expressor")
|
||||||
|
|
||||||
|
|
||||||
def init_prompt() -> None:
|
def init_prompt() -> None:
|
||||||
personality_expression_prompt = """
|
personality_expression_prompt = """
|
||||||
{personality}
|
你的人物设定:{personality}
|
||||||
|
|
||||||
请从以上人设中总结出这个角色可能的语言风格,你必须严格根据人设引申,不要输出例子
|
你说话的表达方式:{expression_style}
|
||||||
|
|
||||||
|
请从以上表达方式中总结出这个角色可能的语言风格,你必须严格根据人设引申,不要输出例子
|
||||||
思考回复的特殊内容和情感
|
思考回复的特殊内容和情感
|
||||||
思考有没有特殊的梗,一并总结成语言风格
|
思考有没有特殊的梗,一并总结成语言风格
|
||||||
总结成如下格式的规律,总结的内容要详细,但具有概括性:
|
总结成如下格式的规律,总结的内容要详细,但具有概括性:
|
||||||
@@ -80,19 +83,27 @@ class PersonalityExpression:
|
|||||||
"""
|
"""
|
||||||
检查data/expression/personality目录,不存在则创建。
|
检查data/expression/personality目录,不存在则创建。
|
||||||
用peronality变量作为chat_str,调用LLM生成表达风格,解析后count=100,存储到expressions.json。
|
用peronality变量作为chat_str,调用LLM生成表达风格,解析后count=100,存储到expressions.json。
|
||||||
如果expression_style发生变化,则删除旧的expressions.json并重置计数。
|
如果expression_style、personality或identity发生变化,则删除旧的expressions.json并重置计数。
|
||||||
对于相同的expression_style,最多计算self.max_calculations次。
|
对于相同的expression_style,最多计算self.max_calculations次。
|
||||||
"""
|
"""
|
||||||
os.makedirs(os.path.dirname(self.expressions_file_path), exist_ok=True)
|
os.makedirs(os.path.dirname(self.expressions_file_path), exist_ok=True)
|
||||||
|
|
||||||
current_style_text = global_config.expression.expression_style
|
current_style_text = global_config.expression.expression_style
|
||||||
|
current_personality = individuality.get_personality_prompt(x_person=2, level=2)
|
||||||
|
current_identity = individuality.get_identity_prompt(x_person=2, level=2)
|
||||||
|
|
||||||
meta_data = self._read_meta_data()
|
meta_data = self._read_meta_data()
|
||||||
|
|
||||||
last_style_text = meta_data.get("last_style_text")
|
last_style_text = meta_data.get("last_style_text")
|
||||||
|
last_personality = meta_data.get("last_personality")
|
||||||
|
last_identity = meta_data.get("last_identity")
|
||||||
count = meta_data.get("count", 0)
|
count = meta_data.get("count", 0)
|
||||||
|
|
||||||
if current_style_text != last_style_text:
|
# 检查是否有任何变化
|
||||||
logger.info(f"表达风格已从 '{last_style_text}' 变为 '{current_style_text}'。重置计数。")
|
if (current_style_text != last_style_text or
|
||||||
|
current_personality != last_personality or
|
||||||
|
current_identity != last_identity):
|
||||||
|
logger.info(f"检测到变化:\n风格: '{last_style_text}' -> '{current_style_text}'\n人格: '{last_personality}' -> '{current_personality}'\n身份: '{last_identity}' -> '{current_identity}'")
|
||||||
count = 0
|
count = 0
|
||||||
if os.path.exists(self.expressions_file_path):
|
if os.path.exists(self.expressions_file_path):
|
||||||
try:
|
try:
|
||||||
@@ -102,11 +113,13 @@ class PersonalityExpression:
|
|||||||
logger.error(f"删除旧的表达文件 {self.expressions_file_path} 失败: {e}")
|
logger.error(f"删除旧的表达文件 {self.expressions_file_path} 失败: {e}")
|
||||||
|
|
||||||
if count >= self.max_calculations:
|
if count >= self.max_calculations:
|
||||||
logger.debug(f"对于风格 '{current_style_text}' 已达到最大计算次数 ({self.max_calculations})。跳过提取。")
|
logger.debug(f"对于当前配置已达到最大计算次数 ({self.max_calculations})。跳过提取。")
|
||||||
# 即使跳过,也更新元数据以反映当前风格已被识别且计数已满
|
# 即使跳过,也更新元数据以反映当前配置已被识别且计数已满
|
||||||
self._write_meta_data(
|
self._write_meta_data(
|
||||||
{
|
{
|
||||||
"last_style_text": current_style_text,
|
"last_style_text": current_style_text,
|
||||||
|
"last_personality": current_personality,
|
||||||
|
"last_identity": current_identity,
|
||||||
"count": count,
|
"count": count,
|
||||||
"last_update_time": meta_data.get("last_update_time"),
|
"last_update_time": meta_data.get("last_update_time"),
|
||||||
}
|
}
|
||||||
@@ -116,18 +129,20 @@ class PersonalityExpression:
|
|||||||
# 构建prompt
|
# 构建prompt
|
||||||
prompt = await global_prompt_manager.format_prompt(
|
prompt = await global_prompt_manager.format_prompt(
|
||||||
"personality_expression_prompt",
|
"personality_expression_prompt",
|
||||||
personality=current_style_text,
|
personality=current_personality,
|
||||||
|
expression_style=current_style_text,
|
||||||
)
|
)
|
||||||
# logger.info(f"个性表达方式提取prompt: {prompt}")
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response, _ = await self.express_learn_model.generate_response_async(prompt)
|
response, _ = await self.express_learn_model.generate_response_async(prompt)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"个性表达方式提取失败: {e}")
|
logger.error(f"个性表达方式提取失败: {e}")
|
||||||
# 如果提取失败,保存当前的风格和未增加的计数
|
# 如果提取失败,保存当前的配置和未增加的计数
|
||||||
self._write_meta_data(
|
self._write_meta_data(
|
||||||
{
|
{
|
||||||
"last_style_text": current_style_text,
|
"last_style_text": current_style_text,
|
||||||
|
"last_personality": current_personality,
|
||||||
|
"last_identity": current_identity,
|
||||||
"count": count,
|
"count": count,
|
||||||
"last_update_time": meta_data.get("last_update_time"),
|
"last_update_time": meta_data.get("last_update_time"),
|
||||||
}
|
}
|
||||||
@@ -135,7 +150,6 @@ class PersonalityExpression:
|
|||||||
return
|
return
|
||||||
|
|
||||||
logger.info(f"个性表达方式提取response: {response}")
|
logger.info(f"个性表达方式提取response: {response}")
|
||||||
# chat_id用personality
|
|
||||||
|
|
||||||
# 转为dict并count=100
|
# 转为dict并count=100
|
||||||
if response != "":
|
if response != "":
|
||||||
@@ -183,9 +197,15 @@ class PersonalityExpression:
|
|||||||
count += 1
|
count += 1
|
||||||
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
self._write_meta_data(
|
self._write_meta_data(
|
||||||
{"last_style_text": current_style_text, "count": count, "last_update_time": current_time}
|
{
|
||||||
|
"last_style_text": current_style_text,
|
||||||
|
"last_personality": current_personality,
|
||||||
|
"last_identity": current_identity,
|
||||||
|
"count": count,
|
||||||
|
"last_update_time": current_time
|
||||||
|
}
|
||||||
)
|
)
|
||||||
logger.info(f"成功处理。风格 '{current_style_text}' 的计数现在是 {count},最后更新时间:{current_time}。")
|
logger.info(f"成功处理。当前配置的计数现在是 {count},最后更新时间:{current_time}。")
|
||||||
else:
|
else:
|
||||||
logger.warning(f"个性表达方式提取失败,模型返回空内容: {response}")
|
logger.warning(f"个性表达方式提取失败,模型返回空内容: {response}")
|
||||||
|
|
||||||
|
|||||||
@@ -17,12 +17,12 @@ class ImpressionUpdateTask(AsyncTask):
|
|||||||
super().__init__(
|
super().__init__(
|
||||||
task_name="impression_update",
|
task_name="impression_update",
|
||||||
wait_before_start=5, # 启动后等待10秒
|
wait_before_start=5, # 启动后等待10秒
|
||||||
run_interval=10, # 每1分钟运行一次
|
run_interval=20, # 每1分钟运行一次
|
||||||
)
|
)
|
||||||
|
|
||||||
async def run(self):
|
async def run(self):
|
||||||
try:
|
try:
|
||||||
if random.random() < 0.5:
|
if random.random() < 0.1:
|
||||||
# 获取最近10分钟的消息
|
# 获取最近10分钟的消息
|
||||||
current_time = int(time.time())
|
current_time = int(time.time())
|
||||||
start_time = current_time - 6000 # 10分钟前
|
start_time = current_time - 6000 # 10分钟前
|
||||||
@@ -30,7 +30,7 @@ class ImpressionUpdateTask(AsyncTask):
|
|||||||
else:
|
else:
|
||||||
now = int(time.time())
|
now = int(time.time())
|
||||||
# 30天前的时间戳
|
# 30天前的时间戳
|
||||||
month_ago = now - 30 * 24 * 60 * 60
|
month_ago = now - 90 * 24 * 60 * 60
|
||||||
# 随机选择一个小时的起点
|
# 随机选择一个小时的起点
|
||||||
random_start = random.randint(month_ago, now - 3600)
|
random_start = random.randint(month_ago, now - 3600)
|
||||||
start_time = random_start
|
start_time = random_start
|
||||||
|
|||||||
@@ -228,7 +228,7 @@ class RelationshipManager:
|
|||||||
readable_messages = build_readable_messages(
|
readable_messages = build_readable_messages(
|
||||||
messages=user_messages,
|
messages=user_messages,
|
||||||
replace_bot_name=True,
|
replace_bot_name=True,
|
||||||
timestamp_mode="relative",
|
timestamp_mode="normal",
|
||||||
truncate=False)
|
truncate=False)
|
||||||
|
|
||||||
|
|
||||||
@@ -263,7 +263,8 @@ class RelationshipManager:
|
|||||||
|
|
||||||
new_impression, _ = await self.relationship_llm.generate_response_async(prompt=prompt)
|
new_impression, _ = await self.relationship_llm.generate_response_async(prompt=prompt)
|
||||||
|
|
||||||
logger.debug(f"new_impression: {new_impression}")
|
logger.info(f"prompt: {prompt}")
|
||||||
|
logger.info(f"new_impression: {new_impression}")
|
||||||
|
|
||||||
prompt_json = f"""
|
prompt_json = f"""
|
||||||
你的名字是{global_config.bot.nickname},别名是{alias_str}。
|
你的名字是{global_config.bot.nickname},别名是{alias_str}。
|
||||||
@@ -274,8 +275,8 @@ class RelationshipManager:
|
|||||||
|
|
||||||
请用json格式总结对{person_name}(昵称:{nickname})的印象,要求:
|
请用json格式总结对{person_name}(昵称:{nickname})的印象,要求:
|
||||||
1.总结出这个人的最核心的性格,可能在这段话里看不出,总结不出来的话,就输出空字符串
|
1.总结出这个人的最核心的性格,可能在这段话里看不出,总结不出来的话,就输出空字符串
|
||||||
2.尝试猜测这个人的性别,如果看不出来,就输出空字符串
|
2.尝试猜测这个人的性别
|
||||||
3.尝试猜测自己与这个人的关系,你与ta的交互,还可以思考是积极还是消极,以及具体内容
|
3.尝试猜测自己与这个人的关系,你与ta的交互,思考是积极还是消极,以及具体内容
|
||||||
4.尝试猜测这个人的身份,比如职业,兴趣爱好,生活状态等
|
4.尝试猜测这个人的身份,比如职业,兴趣爱好,生活状态等
|
||||||
5.尝试总结你与他之间是否有一些独特的梗,如果有,就输出梗的内容,如果没有,就输出空字符串
|
5.尝试总结你与他之间是否有一些独特的梗,如果有,就输出梗的内容,如果没有,就输出空字符串
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user