feat(chat): 实现发送错别字后自动撤回修正的功能

引入了一个新的聊天交互机制:当机器人发送了包含“错别字”的消息后,会在短暂延迟后自动撤回该消息,并发送正确的版本。此功能旨在模拟更真实的人类打字行为,增加交互的趣味性和拟人化程度。

主要变更:
- **错别字处理流程**:
  - `ResponseHandler`现在会识别出带有错别字的消息,并在发送后创建一个异步任务来处理后续的修正。
  - 新增`handle_typo_correction`方法,该方法会随机延迟2-4秒,然后调用新的`recall_message` API撤回原消息,并重新发送修正后的内容。
- **API扩展**:
  - `send_api`中增加了`recall_message`函数,用于调用适配器执行消息撤回操作。
  - `send_response`的返回值从单个字符串`reply_text`变更为元组`(reply_text, sent_messages)`,以便将已发送的消息信息(包括ID和类型)传递给上层调用者。
- **数据结构调整**:
  - `process_llm_response`的返回类型从`list[str]`调整为`list[dict[str, str]]`,以支持更复杂的响应类型,如包含原文、错别字和修正建议的`typo`类型。
- **代码优化与重构**:
  - 对`ChineseTypoGenerator`进行了大量的代码清理、注释补充和逻辑优化,使其代码更清晰、更易于维护。
  - 修复了多处代码中的类型注解和潜在的空指针问题,提高了代码的健壮性。
This commit is contained in:
minecraft1024a
2025-09-06 15:44:52 +08:00
committed by Windpicker-owo
parent 7f723e9ff6
commit 89fad16e0e
6 changed files with 313 additions and 202 deletions

View File

@@ -293,9 +293,11 @@ def random_remove_punctuation(text: str) -> str:
return result
def process_llm_response(text: str, enable_splitter: bool = True, enable_chinese_typo: bool = True) -> list[str]:
def process_llm_response(
text: str, enable_splitter: bool = True, enable_chinese_typo: bool = True
) -> list[dict[str, str]]:
if not global_config.response_post_process.enable_response_post_process:
return [text]
return [{"type": "text", "content": text}]
# 先保护颜文字
if global_config.response_splitter.enable_kaomoji_protection:
@@ -311,7 +313,7 @@ def process_llm_response(text: str, enable_splitter: bool = True, enable_chinese
cleaned_text = pattern.sub("", protected_text)
if cleaned_text == "":
return ["呃呃"]
return [{"type": "text", "content": "呃呃"}]
logger.debug(f"{text}去除括号处理后的文本: {cleaned_text}")
@@ -321,7 +323,7 @@ def process_llm_response(text: str, enable_splitter: bool = True, enable_chinese
# 如果基本上是中文,则进行长度过滤
if get_western_ratio(cleaned_text) < 0.1 and len(cleaned_text) > max_length:
logger.warning(f"回复过长 ({len(cleaned_text)} 字符),返回默认回复")
return ["懒得说"]
return [{"type": "text", "content": "懒得说"}]
typo_generator = ChineseTypoGenerator(
error_rate=global_config.chinese_typo.error_rate,
@@ -338,16 +340,24 @@ def process_llm_response(text: str, enable_splitter: bool = True, enable_chinese
sentences = []
for sentence in split_sentences:
if global_config.chinese_typo.enable and enable_chinese_typo:
typoed_text, typo_corrections = typo_generator.create_typo_sentence(sentence)
sentences.append(typoed_text)
original_sentence, typo_sentence, typo_corrections = typo_generator.create_typo_sentence(sentence)
if typo_corrections:
sentences.append(typo_corrections)
sentences.append(
{
"type": "typo",
"original": original_sentence,
"typo": typo_sentence,
"correction": typo_corrections,
}
)
else:
sentences.append({"type": "text", "content": sentence})
else:
sentences.append(sentence)
sentences.append({"type": "text", "content": sentence})
if len(sentences) > max_sentence_num:
logger.warning(f"分割后消息数量过多 ({len(sentences)} 条),返回默认回复")
return [f"{global_config.bot.nickname}不知道哦"]
return [{"type": "text", "content": f"{global_config.bot.nickname}不知道哦"}]
# if extracted_contents:
# for content in extracted_contents:
@@ -355,7 +365,20 @@ def process_llm_response(text: str, enable_splitter: bool = True, enable_chinese
# 在所有句子处理完毕后,对包含占位符的列表进行恢复
if global_config.response_splitter.enable_kaomoji_protection:
sentences = recover_kaomoji(sentences, kaomoji_mapping)
# sentences中的元素可能是dict也可能是str所以要分开处理
recovered_sentences = []
for s in sentences:
if isinstance(s, dict) and s.get("type") == "typo":
s["original"] = recover_kaomoji([s["original"]], kaomoji_mapping)
s["typo"] = recover_kaomoji([s["typo"]], kaomoji_mapping)
s["correction"] = recover_kaomoji([s["correction"]], kaomoji_mapping)
recovered_sentences.append(s)
elif isinstance(s, dict) and s.get("type") == "text":
s["content"] = recover_kaomoji([s["content"]], kaomoji_mapping)
recovered_sentences.append(s)
else:
recovered_sentences.append(recover_kaomoji([s], kaomoji_mapping))
sentences = recovered_sentences
return sentences