rebase 清理

This commit is contained in:
Windpicker-owo
2025-11-19 23:45:47 +08:00
parent 829bc9b4bc
commit 40709d95de
60 changed files with 465 additions and 10066 deletions

View File

@@ -399,13 +399,21 @@ class ExpressionLearner:
# sourcery skip: use-join
"""
学习并存储表达方式
type: "style" or "grammar"
"""
if type == "style":
type_str = "语言风格"
elif type == "grammar":
type_str = "句法特点"
else:
raise ValueError(f"Invalid type: {type}")
# 检查是否允许在此聊天流中学习(在函数最前面检查)
if not self.can_learn_for_chat():
logger.debug(f"聊天流 {self.chat_name} 不允许学习表达,跳过学习")
return []
res = await self.learn_expression(num)
res = await self.learn_expression(type, num)
if res is None:
return []
@@ -421,10 +429,10 @@ class ExpressionLearner:
learnt_expressions_str = ""
for _chat_id, situation, style in learnt_expressions:
learnt_expressions_str += f"{situation}->{style}\n"
logger.info(f"{group_name} 学习到表达风格:\n{learnt_expressions_str}")
logger.info(f"{group_name} 学习到{type_str}:\n{learnt_expressions_str}")
if not learnt_expressions:
logger.info("没有学习到表达风格")
logger.info(f"没有学习到{type_str}")
return []
# 按chat_id分组
@@ -572,10 +580,16 @@ class ExpressionLearner:
"""从指定聊天流学习表达方式
Args:
num: 学习数量
type: "style" or "grammar"
"""
type_str = "语言风格"
prompt = "learn_style_prompt"
if type == "style":
type_str = "语言风格"
prompt = "learn_style_prompt"
elif type == "grammar":
type_str = "句法特点"
prompt = "learn_grammar_prompt"
else:
raise ValueError(f"Invalid type: {type}")
current_time = time.time()
@@ -766,11 +780,9 @@ class ExpressionLearnerManager:
"""
自动将/data/expression/learnt_style 和 learnt_grammar 下所有expressions.json迁移到数据库。
迁移完成后在/data/expression/done.done写入标记文件存在则跳过。
然后检查done.done2如果没有就删除所有grammar表达并创建该标记文件。
"""
base_dir = os.path.join("data", "expression")
done_flag = os.path.join(base_dir, "done.done")
done_flag2 = os.path.join(base_dir, "done.done2")
# 确保基础目录存在
try:
@@ -805,36 +817,27 @@ class ExpressionLearnerManager:
expr_file = os.path.join(type_dir, chat_id, "expressions.json")
if not os.path.exists(expr_file):
continue
try:
async with aiofiles.open(expr_file, encoding="utf-8") as f:
content = await f.read()
expressions = orjson.loads(content)
for chat_id in chat_ids:
expr_file = os.path.join(type_dir, chat_id, "expressions.json")
if not os.path.exists(expr_file):
if not isinstance(expressions, list):
logger.warning(f"表达方式文件格式错误,跳过: {expr_file}")
continue
try:
with open(expr_file, "r", encoding="utf-8") as f:
expressions = json.load(f)
if not isinstance(expressions, list):
logger.warning(f"表达方式文件格式错误,跳过: {expr_file}")
for expr in expressions:
if not isinstance(expr, dict):
continue
for expr in expressions:
if not isinstance(expr, dict):
continue
situation = expr.get("situation")
style_val = expr.get("style")
count = expr.get("count", 1)
last_active_time = expr.get("last_active_time", time.time())
situation = expr.get("situation")
style_val = expr.get("style")
count = expr.get("count", 1)
last_active_time = expr.get("last_active_time", time.time())
if not situation or not style_val:
logger.warning(f"表达方式缺少必要字段,跳过: {expr}")
continue
if not situation or not style_val:
logger.warning(f"表达方式缺少必要字段,跳过: {expr}")
continue
# 查重同chat_id+type+situation+style
async with get_db_session() as session:
@@ -913,40 +916,5 @@ class ExpressionLearnerManager:
except Exception as e:
logger.error(f"迁移老数据创建日期失败: {e}")
def delete_all_grammar_expressions(self) -> int:
"""
检查expression库中所有type为"grammar"的表达并全部删除
Returns:
int: 删除的grammar表达数量
"""
try:
# 查询所有type为"grammar"的表达
grammar_expressions = Expression.select().where(Expression.type == "grammar")
grammar_count = grammar_expressions.count()
if grammar_count == 0:
logger.info("expression库中没有找到grammar类型的表达")
return 0
logger.info(f"找到 {grammar_count} 个grammar类型的表达开始删除...")
# 删除所有grammar类型的表达
deleted_count = 0
for expr in grammar_expressions:
try:
expr.delete_instance()
deleted_count += 1
except Exception as e:
logger.error(f"删除grammar表达失败: {e}")
continue
logger.info(f"成功删除 {deleted_count} 个grammar类型的表达")
return deleted_count
except Exception as e:
logger.error(f"删除grammar表达过程中发生错误: {e}")
return 0
expression_learner_manager = ExpressionLearnerManager()

View File

@@ -32,7 +32,7 @@ def init_prompt():
以下是可选的表达情境:
{all_situations}
请你分析聊天内容的语境、情绪、话题类型,从上述情境中选择最适合当前聊天情境的,最多{max_num}个情境。
请你分析聊天内容的语境、情绪、话题类型,从上述情境中选择最适合当前聊天情境的{min_num}-{max_num}个情境。
考虑因素包括:
1. 聊天的情绪氛围(轻松、严肃、幽默等)
2. 话题类型(日常、技术、游戏、情感等)
@@ -42,7 +42,7 @@ def init_prompt():
请以JSON格式输出只需要输出选中的情境编号
例如:
{{
"selected_situations": [2, 3, 5, 7, 19]
"selected_situations": [2, 3, 5, 7, 19, 22, 25, 38, 39, 45, 48, 64]
}}
请严格按照JSON格式输出不要包含其他内容
@@ -544,24 +544,34 @@ class ExpressionSelector:
# 检查是否允许在此聊天流中使用表达
if not self.can_use_expression_for_chat(chat_id):
logger.debug(f"聊天流 {chat_id} 不允许使用表达,返回空列表")
return [], []
return []
# 1. 获取35个随机表达方式现在按权重抽取
style_exprs, grammar_exprs = await self.get_random_expressions(chat_id, 30, 0.5, 0.5)
# 2. 构建所有表达方式的索引和情境列表
all_expressions: List[Dict[str, Any]] = []
all_situations: List[str] = []
all_expressions = []
all_situations = []
# 添加style表达方式
for expr in style_exprs:
expr = expr.copy()
all_expressions.append(expr)
all_situations.append(f"{len(all_expressions)}.当 {expr['situation']} 时,使用 {expr['style']}")
if isinstance(expr, dict) and "situation" in expr and "style" in expr:
expr_with_type = expr.copy()
expr_with_type["type"] = "style"
all_expressions.append(expr_with_type)
all_situations.append(f"{len(all_expressions)}.{expr['situation']}")
# 添加grammar表达方式
for expr in grammar_exprs:
if isinstance(expr, dict) and "situation" in expr and "style" in expr:
expr_with_type = expr.copy()
expr_with_type["type"] = "grammar"
all_expressions.append(expr_with_type)
all_situations.append(f"{len(all_expressions)}.{expr['situation']}")
if not all_expressions:
logger.warning("没有找到可用的表达方式")
return [], []
return []
all_situations_str = "\n".join(all_situations)
@@ -577,11 +587,14 @@ class ExpressionSelector:
bot_name=global_config.bot.nickname,
chat_observe_info=chat_info,
all_situations=all_situations_str,
min_num=min_num,
max_num=max_num,
target_message=target_message_str,
target_message_extra_block=target_message_extra_block,
)
# print(prompt)
# 4. 调用LLM
try:
# start_time = time.time()
@@ -589,7 +602,7 @@ class ExpressionSelector:
if not content:
logger.warning("LLM返回空结果")
return [], []
return []
# 5. 解析结果
result = repair_json(content)
@@ -599,17 +612,15 @@ class ExpressionSelector:
if not isinstance(result, dict) or "selected_situations" not in result:
logger.error("LLM返回格式错误")
logger.info(f"LLM返回结果: \n{content}")
return [], []
return []
selected_indices = result["selected_situations"]
# 根据索引获取完整的表达方式
valid_expressions: List[Dict[str, Any]] = []
selected_ids = []
valid_expressions = []
for idx in selected_indices:
if isinstance(idx, int) and 1 <= idx <= len(all_expressions):
expression = all_expressions[idx - 1] # 索引从1开始
selected_ids.append(expression["id"])
valid_expressions.append(expression)
# 对选中的所有表达方式一次性更新count数
@@ -617,7 +628,7 @@ class ExpressionSelector:
asyncio.create_task(self.update_expressions_count_batch(valid_expressions, 0.006)) # noqa: RUF006
# logger.info(f"LLM从{len(all_expressions)}个情境中选择了{len(valid_expressions)}个")
return valid_expressions, selected_ids
return valid_expressions
except Exception as e:
logger.error(f"LLM处理表达方式选择时出错: {e}")