修复了记忆刷屏 加入了又新又好错别字生成器 增加了记忆过滤
This commit is contained in:
SengokuCola
2025-03-07 00:09:36 +08:00
parent 21d1a69b6e
commit 8ef00ee571
8 changed files with 883 additions and 547 deletions

View File

@@ -29,16 +29,6 @@ config = driver.config
class EmojiManager:
_instance = None
EMOJI_DIR = "data/emoji" # 表情包存储目录
EMOTION_KEYWORDS = {
'happy': ['开心', '快乐', '高兴', '欢喜', '', '喜悦', '兴奋', '愉快', '', ''],
'angry': ['生气', '愤怒', '恼火', '不爽', '火大', '', '气愤', '恼怒', '发火', '不满'],
'sad': ['伤心', '难过', '悲伤', '痛苦', '', '忧伤', '悲痛', '哀伤', '委屈', '失落'],
'surprised': ['惊讶', '震惊', '吃惊', '意外', '', '诧异', '惊奇', '惊喜', '不敢相信', '目瞪口呆'],
'disgusted': ['恶心', '讨厌', '厌恶', '反感', '嫌弃', '', '嫌恶', '憎恶', '不喜欢', ''],
'fearful': ['害怕', '恐惧', '惊恐', '担心', '', '惊吓', '惊慌', '畏惧', '胆怯', ''],
'neutral': ['普通', '一般', '还行', '正常', '平静', '平淡', '一般般', '凑合', '还好', '就这样']
}
def __new__(cls):
if cls._instance is None:

View File

@@ -84,7 +84,8 @@ class PromptBuilder:
relevant_memories = await hippocampus.get_relevant_memories(
text=message_txt,
max_topics=5,
similarity_threshold=0.4
similarity_threshold=0.4,
max_memory_num=5
)
if relevant_memories:

View File

@@ -13,6 +13,7 @@ from nonebot import get_driver
from ..models.utils_model import LLM_request
import aiohttp
import jieba
from ..utils.typo_generator import ChineseTypoGenerator
driver = get_driver()
config = driver.config
@@ -285,75 +286,6 @@ def split_into_sentences_w_remove_punctuation(text: str) -> List[str]:
print(f"处理后的句子: {sentences_done}")
return sentences_done
# 常见的错别字映射
TYPO_DICT = {
'': '地得',
'': '咯啦勒',
'': '嘛麻',
'': '八把罢',
'': '',
'': '再在',
'': '',
'': '',
'': '沃窝喔',
'': '泥尼拟',
'': '它她塔祂',
'': '',
'': '阿哇',
'': '呐捏',
'': '豆读毒',
'': '',
'': '回汇',
'': '趣取曲',
'': '作坐',
'': '相像',
'': '说税睡',
'': '砍堪刊',
'': '来莱赖',
'': '号毫豪',
'': '给既继',
'': '锅果裹',
'': '',
'': '位未',
'': '甚深伸',
'': '末麽嘛',
'': '话花划',
'': '织直值',
'': '',
'': '听停挺',
'': '见件建',
'': '觉脚搅',
'': '得德锝',
'': '着找招',
'': '向象想',
'': '等灯登',
'': '谢写卸',
'': '对队',
'': '里理鲤',
'': '啦拉喇',
'': '吃持迟',
'': '哦喔噢',
'': '呀压',
'': '',
'': '太抬台',
'': '',
'': '',
'': '以已',
'': '因应',
'': '啥沙傻',
'': '行型形',
'': '哈蛤铪',
'': '嘿黑嗨',
'': '嗯恩摁',
'': '哎爱埃',
'': '呜屋污',
'': '喂位未',
'': '嘛麻马',
'': '嗨害亥',
'': '哇娃蛙',
'': '咦意易',
'': '嘻西希'
}
def random_remove_punctuation(text: str) -> str:
"""随机处理标点符号,模拟人类打字习惯
@@ -381,17 +313,6 @@ def random_remove_punctuation(text: str) -> str:
result += char
return result
def add_typos(text: str) -> str:
TYPO_RATE = 0.02 # 控制错别字出现的概率(2%)
result = ""
for char in text:
if char in TYPO_DICT and random.random() < TYPO_RATE:
# 从可能的错别字中随机选择一个
typos = TYPO_DICT[char]
result += random.choice(typos)
else:
result += char
return result
def process_llm_response(text: str) -> List[str]:
# processed_response = process_text_with_typos(content)
@@ -399,7 +320,14 @@ def process_llm_response(text: str) -> List[str]:
print(f"回复过长 ({len(text)} 字符),返回默认回复")
return ['懒得说']
# 处理长消息
sentences = split_into_sentences_w_remove_punctuation(add_typos(text))
typo_generator = ChineseTypoGenerator(
error_rate=0.03,
min_freq=7,
tone_error_rate=0.2,
word_replace_rate=0.02
)
typoed_text = typo_generator.create_typo_sentence(text)[0]
sentences = split_into_sentences_w_remove_punctuation(typoed_text)
# 检查分割后的消息数量是否过多超过3条
if len(sentences) > 4:
print(f"分割后消息数量过多 ({len(sentences)} 条),返回默认回复")