rebase 清理
This commit is contained in:
@@ -399,13 +399,21 @@ class ExpressionLearner:
|
||||
# sourcery skip: use-join
|
||||
"""
|
||||
学习并存储表达方式
|
||||
type: "style" or "grammar"
|
||||
"""
|
||||
if type == "style":
|
||||
type_str = "语言风格"
|
||||
elif type == "grammar":
|
||||
type_str = "句法特点"
|
||||
else:
|
||||
raise ValueError(f"Invalid type: {type}")
|
||||
|
||||
# 检查是否允许在此聊天流中学习(在函数最前面检查)
|
||||
if not self.can_learn_for_chat():
|
||||
logger.debug(f"聊天流 {self.chat_name} 不允许学习表达,跳过学习")
|
||||
return []
|
||||
|
||||
res = await self.learn_expression(num)
|
||||
res = await self.learn_expression(type, num)
|
||||
|
||||
if res is None:
|
||||
return []
|
||||
@@ -421,10 +429,10 @@ class ExpressionLearner:
|
||||
learnt_expressions_str = ""
|
||||
for _chat_id, situation, style in learnt_expressions:
|
||||
learnt_expressions_str += f"{situation}->{style}\n"
|
||||
logger.info(f"在 {group_name} 学习到表达风格:\n{learnt_expressions_str}")
|
||||
logger.info(f"在 {group_name} 学习到{type_str}:\n{learnt_expressions_str}")
|
||||
|
||||
if not learnt_expressions:
|
||||
logger.info("没有学习到表达风格")
|
||||
logger.info(f"没有学习到{type_str}")
|
||||
return []
|
||||
|
||||
# 按chat_id分组
|
||||
@@ -572,10 +580,16 @@ class ExpressionLearner:
|
||||
"""从指定聊天流学习表达方式
|
||||
|
||||
Args:
|
||||
num: 学习数量
|
||||
type: "style" or "grammar"
|
||||
"""
|
||||
type_str = "语言风格"
|
||||
prompt = "learn_style_prompt"
|
||||
if type == "style":
|
||||
type_str = "语言风格"
|
||||
prompt = "learn_style_prompt"
|
||||
elif type == "grammar":
|
||||
type_str = "句法特点"
|
||||
prompt = "learn_grammar_prompt"
|
||||
else:
|
||||
raise ValueError(f"Invalid type: {type}")
|
||||
|
||||
current_time = time.time()
|
||||
|
||||
@@ -766,11 +780,9 @@ class ExpressionLearnerManager:
|
||||
"""
|
||||
自动将/data/expression/learnt_style 和 learnt_grammar 下所有expressions.json迁移到数据库。
|
||||
迁移完成后在/data/expression/done.done写入标记文件,存在则跳过。
|
||||
然后检查done.done2,如果没有就删除所有grammar表达并创建该标记文件。
|
||||
"""
|
||||
base_dir = os.path.join("data", "expression")
|
||||
done_flag = os.path.join(base_dir, "done.done")
|
||||
done_flag2 = os.path.join(base_dir, "done.done2")
|
||||
|
||||
# 确保基础目录存在
|
||||
try:
|
||||
@@ -805,36 +817,27 @@ class ExpressionLearnerManager:
|
||||
expr_file = os.path.join(type_dir, chat_id, "expressions.json")
|
||||
if not os.path.exists(expr_file):
|
||||
continue
|
||||
|
||||
try:
|
||||
async with aiofiles.open(expr_file, encoding="utf-8") as f:
|
||||
content = await f.read()
|
||||
expressions = orjson.loads(content)
|
||||
|
||||
for chat_id in chat_ids:
|
||||
expr_file = os.path.join(type_dir, chat_id, "expressions.json")
|
||||
if not os.path.exists(expr_file):
|
||||
if not isinstance(expressions, list):
|
||||
logger.warning(f"表达方式文件格式错误,跳过: {expr_file}")
|
||||
continue
|
||||
try:
|
||||
with open(expr_file, "r", encoding="utf-8") as f:
|
||||
expressions = json.load(f)
|
||||
|
||||
if not isinstance(expressions, list):
|
||||
logger.warning(f"表达方式文件格式错误,跳过: {expr_file}")
|
||||
for expr in expressions:
|
||||
if not isinstance(expr, dict):
|
||||
continue
|
||||
|
||||
for expr in expressions:
|
||||
if not isinstance(expr, dict):
|
||||
continue
|
||||
situation = expr.get("situation")
|
||||
style_val = expr.get("style")
|
||||
count = expr.get("count", 1)
|
||||
last_active_time = expr.get("last_active_time", time.time())
|
||||
|
||||
situation = expr.get("situation")
|
||||
style_val = expr.get("style")
|
||||
count = expr.get("count", 1)
|
||||
last_active_time = expr.get("last_active_time", time.time())
|
||||
|
||||
if not situation or not style_val:
|
||||
logger.warning(f"表达方式缺少必要字段,跳过: {expr}")
|
||||
continue
|
||||
if not situation or not style_val:
|
||||
logger.warning(f"表达方式缺少必要字段,跳过: {expr}")
|
||||
continue
|
||||
|
||||
# 查重:同chat_id+type+situation+style
|
||||
async with get_db_session() as session:
|
||||
@@ -913,40 +916,5 @@ class ExpressionLearnerManager:
|
||||
except Exception as e:
|
||||
logger.error(f"迁移老数据创建日期失败: {e}")
|
||||
|
||||
def delete_all_grammar_expressions(self) -> int:
|
||||
"""
|
||||
检查expression库中所有type为"grammar"的表达并全部删除
|
||||
|
||||
Returns:
|
||||
int: 删除的grammar表达数量
|
||||
"""
|
||||
try:
|
||||
# 查询所有type为"grammar"的表达
|
||||
grammar_expressions = Expression.select().where(Expression.type == "grammar")
|
||||
grammar_count = grammar_expressions.count()
|
||||
|
||||
if grammar_count == 0:
|
||||
logger.info("expression库中没有找到grammar类型的表达")
|
||||
return 0
|
||||
|
||||
logger.info(f"找到 {grammar_count} 个grammar类型的表达,开始删除...")
|
||||
|
||||
# 删除所有grammar类型的表达
|
||||
deleted_count = 0
|
||||
for expr in grammar_expressions:
|
||||
try:
|
||||
expr.delete_instance()
|
||||
deleted_count += 1
|
||||
except Exception as e:
|
||||
logger.error(f"删除grammar表达失败: {e}")
|
||||
continue
|
||||
|
||||
logger.info(f"成功删除 {deleted_count} 个grammar类型的表达")
|
||||
return deleted_count
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"删除grammar表达过程中发生错误: {e}")
|
||||
return 0
|
||||
|
||||
|
||||
expression_learner_manager = ExpressionLearnerManager()
|
||||
|
||||
@@ -32,7 +32,7 @@ def init_prompt():
|
||||
以下是可选的表达情境:
|
||||
{all_situations}
|
||||
|
||||
请你分析聊天内容的语境、情绪、话题类型,从上述情境中选择最适合当前聊天情境的,最多{max_num}个情境。
|
||||
请你分析聊天内容的语境、情绪、话题类型,从上述情境中选择最适合当前聊天情境的{min_num}-{max_num}个情境。
|
||||
考虑因素包括:
|
||||
1. 聊天的情绪氛围(轻松、严肃、幽默等)
|
||||
2. 话题类型(日常、技术、游戏、情感等)
|
||||
@@ -42,7 +42,7 @@ def init_prompt():
|
||||
请以JSON格式输出,只需要输出选中的情境编号:
|
||||
例如:
|
||||
{{
|
||||
"selected_situations": [2, 3, 5, 7, 19]
|
||||
"selected_situations": [2, 3, 5, 7, 19, 22, 25, 38, 39, 45, 48, 64]
|
||||
}}
|
||||
|
||||
请严格按照JSON格式输出,不要包含其他内容:
|
||||
@@ -544,24 +544,34 @@ class ExpressionSelector:
|
||||
# 检查是否允许在此聊天流中使用表达
|
||||
if not self.can_use_expression_for_chat(chat_id):
|
||||
logger.debug(f"聊天流 {chat_id} 不允许使用表达,返回空列表")
|
||||
return [], []
|
||||
return []
|
||||
|
||||
# 1. 获取35个随机表达方式(现在按权重抽取)
|
||||
style_exprs, grammar_exprs = await self.get_random_expressions(chat_id, 30, 0.5, 0.5)
|
||||
|
||||
# 2. 构建所有表达方式的索引和情境列表
|
||||
all_expressions: List[Dict[str, Any]] = []
|
||||
all_situations: List[str] = []
|
||||
all_expressions = []
|
||||
all_situations = []
|
||||
|
||||
# 添加style表达方式
|
||||
for expr in style_exprs:
|
||||
expr = expr.copy()
|
||||
all_expressions.append(expr)
|
||||
all_situations.append(f"{len(all_expressions)}.当 {expr['situation']} 时,使用 {expr['style']}")
|
||||
if isinstance(expr, dict) and "situation" in expr and "style" in expr:
|
||||
expr_with_type = expr.copy()
|
||||
expr_with_type["type"] = "style"
|
||||
all_expressions.append(expr_with_type)
|
||||
all_situations.append(f"{len(all_expressions)}.{expr['situation']}")
|
||||
|
||||
# 添加grammar表达方式
|
||||
for expr in grammar_exprs:
|
||||
if isinstance(expr, dict) and "situation" in expr and "style" in expr:
|
||||
expr_with_type = expr.copy()
|
||||
expr_with_type["type"] = "grammar"
|
||||
all_expressions.append(expr_with_type)
|
||||
all_situations.append(f"{len(all_expressions)}.{expr['situation']}")
|
||||
|
||||
if not all_expressions:
|
||||
logger.warning("没有找到可用的表达方式")
|
||||
return [], []
|
||||
return []
|
||||
|
||||
all_situations_str = "\n".join(all_situations)
|
||||
|
||||
@@ -577,11 +587,14 @@ class ExpressionSelector:
|
||||
bot_name=global_config.bot.nickname,
|
||||
chat_observe_info=chat_info,
|
||||
all_situations=all_situations_str,
|
||||
min_num=min_num,
|
||||
max_num=max_num,
|
||||
target_message=target_message_str,
|
||||
target_message_extra_block=target_message_extra_block,
|
||||
)
|
||||
|
||||
# print(prompt)
|
||||
|
||||
# 4. 调用LLM
|
||||
try:
|
||||
# start_time = time.time()
|
||||
@@ -589,7 +602,7 @@ class ExpressionSelector:
|
||||
|
||||
if not content:
|
||||
logger.warning("LLM返回空结果")
|
||||
return [], []
|
||||
return []
|
||||
|
||||
# 5. 解析结果
|
||||
result = repair_json(content)
|
||||
@@ -599,17 +612,15 @@ class ExpressionSelector:
|
||||
if not isinstance(result, dict) or "selected_situations" not in result:
|
||||
logger.error("LLM返回格式错误")
|
||||
logger.info(f"LLM返回结果: \n{content}")
|
||||
return [], []
|
||||
return []
|
||||
|
||||
selected_indices = result["selected_situations"]
|
||||
|
||||
# 根据索引获取完整的表达方式
|
||||
valid_expressions: List[Dict[str, Any]] = []
|
||||
selected_ids = []
|
||||
valid_expressions = []
|
||||
for idx in selected_indices:
|
||||
if isinstance(idx, int) and 1 <= idx <= len(all_expressions):
|
||||
expression = all_expressions[idx - 1] # 索引从1开始
|
||||
selected_ids.append(expression["id"])
|
||||
valid_expressions.append(expression)
|
||||
|
||||
# 对选中的所有表达方式,一次性更新count数
|
||||
@@ -617,7 +628,7 @@ class ExpressionSelector:
|
||||
asyncio.create_task(self.update_expressions_count_batch(valid_expressions, 0.006)) # noqa: RUF006
|
||||
|
||||
# logger.info(f"LLM从{len(all_expressions)}个情境中选择了{len(valid_expressions)}个")
|
||||
return valid_expressions, selected_ids
|
||||
return valid_expressions
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"LLM处理表达方式选择时出错: {e}")
|
||||
|
||||
Reference in New Issue
Block a user