better 新增了分割器，表情惩罚系数的自定义

2025-03-25 22:27:38 +08:00
parent c220f4c79e
commit 51990391fd
5 changed files with 40 additions and 196 deletions
--- a/config/bot_config_test.toml
+++ b/config/bot_config_test.toml
@@ -1,179 +0,0 @@
 [inner]
 version = "0.0.10"
 [mai_version]
 version = "0.6.0"
 version-fix = "snapshot-1"
 #以下是给开发人员阅读的，一般用户不需要阅读
 #如果你想要修改配置文件，请在修改后将version的值进行变更
 #如果新增项目，请在BotConfig类下新增相应的变量
 #1.如果你修改的是[]层级项目，例如你新增了 [memory],那么请在config.py的 load_config函数中的include_configs字典中新增"内容":{
 #"func":memory,
 #"support":">=0.0.0",  #新的版本号
 #"necessary":False      #是否必须
 #}
 #2.如果你修改的是[]下的项目，例如你新增了[memory]下的 memory_ban_words ,那么请在config.py的 load_config函数中的 memory函数下新增版本判断:
            # if config.INNER_VERSION in SpecifierSet(">=0.0.2"):
            #     config.memory_ban_words = set(memory_config.get("memory_ban_words", []))
 [bot]
 qq = 2814567326
 nickname = "麦麦"
 alias_names = ['牢麦', '麦叠', '哈基麦']
 [personality]
 prompt_personality = ['曾经是一个学习地质的女大学生，现在学习心理学和脑科学，你会刷贴吧', '是一个女大学生，你有黑色头发，你会刷小红书', '是一个女大学生，你会刷b站，对ACG文化感兴趣']
 personality_1_probability = 0.7 # 第一种人格出现概率
 personality_2_probability = 0.1 # 第二种人格出现概率
 personality_3_probability = 0.2 # 第三种人格出现概率，请确保三个概率相加等于1
 prompt_schedule = "一个曾经学习地质,现在学习心理学和脑科学的女大学生，喜欢刷qq，贴吧，知乎和小红书"
 [message]
 min_text_length = 2 # 与麦麦聊天时麦麦只会回答文本大于等于此数的消息
 max_context_size = 10 # 麦麦获得的上文数量
 emoji_chance = 0.2 # 麦麦使用表情包的概率
 thinking_timeout = 100 # 麦麦思考时间
 response_willing_amplifier = 1 # 麦麦回复意愿放大系数，一般为1
 response_interested_rate_amplifier = 1 # 麦麦回复兴趣度放大系数,听到记忆里的内容时放大系数
 down_frequency_rate = 2 # 降低回复频率的群组回复意愿降低系数
 ban_words = []
 ban_msgs_regex = []
 [emoji]
 check_interval = 120 # 检查表情包的时间间隔
 register_interval = 10 # 注册表情包的时间间隔
 auto_save = true  # 自动偷表情包
 enable_check = false  # 是否启用表情包过滤
 check_prompt = "符合公序良俗" # 表情包过滤要求
 [cq_code]
 enable_pic_translate = false
 [response]
 model_r1_probability = 0.5 # 麦麦回答时选择主要回复模型1 模型的概率
 model_v3_probability = 0.5 # 麦麦回答时选择次要回复模型2 模型的概率
 model_r1_distill_probability = 0 # 麦麦回答时选择次要回复模型3 模型的概率
 max_response_length = 1024 # 麦麦回答的最大token数
 [willing]
 willing_mode = "classical" # 回复意愿模式 经典模式
 # willing_mode = "dynamic" # 动态模式(可能不兼容)
 # willing_mode = "custom" # 自定义模式（可自行调整
 [memory]
 build_memory_interval = 3000 # 记忆构建间隔 单位秒   间隔越低，麦麦学习越多，但是冗余信息也会增多
 build_memory_distribution = [4, 4, 0.6, 48, 36, 0.4] # 记忆构建分布，参数：分布1均值，标准差，权重，分布2均值，标准差，权重
 build_memory_sample_num = 10 # 采样数量，数值越高记忆采样次数越多
 build_memory_sample_length = 30 # 采样长度，数值越高一段记忆内容越丰富
 memory_compress_rate = 0.1 # 记忆压缩率 控制记忆精简程度 建议保持默认,调高可以获得更多信息，但是冗余信息也会增多
 forget_memory_interval = 300 # 记忆遗忘间隔 单位秒   间隔越低，麦麦遗忘越频繁，记忆更精简，但更难学习
 memory_forget_time = 24 #多长时间后的记忆会被遗忘 单位小时 
 memory_forget_percentage = 0.005 # 记忆遗忘比例 控制记忆遗忘程度 越大遗忘越多 建议保持默认
 memory_ban_words = ['表情包', '图片', '回复', '聊天记录']
 [mood]
 mood_update_interval = 1.0 # 情绪更新间隔 单位秒
 mood_decay_rate = 0.95 # 情绪衰减率
 mood_intensity_factor = 1.0 # 情绪强度因子
 [keywords_reaction] # 针对某个关键词作出反应
 enable = true  # 关键词反应功能的总开关
 [[keywords_reaction.rules]]
 enable = true
 keywords = [ "人机", "bot", "机器", "入机", "robot", "机器人",]
 reaction = "有人说你是人机或者机器人，否定这一事实，攻击或反问他。注意，只有认为你是机器人才回复，否则不要否认"
 [[keywords_reaction.rules]]
 enable = false
 keywords = [ "测试关键词回复", "test", "",]
 reaction = "回答“测试成功”"
 [chinese_typo]
 enable = true # 是否启用中文错别字生成器
 error_rate=0.01 # 单字替换概率
 min_freq=7 # 最小字频阈值
 tone_error_rate=0.3 # 声调错误概率
 word_replace_rate=0.01 # 整词替换概率
 [others]
 enable_kuuki_read = true # 是否启用读空气功能
 enable_friend_chat = true # 是否启用好友聊天
 [groups]
 talk_allowed = [571780722,1022489779,534940728, 192194125, 851345375, 739044565, 766798517, 1030993430, 435591861, 708847644, 591693379, 571780722, 1028699246, 571780722, 1015816696]  #可以回复消息的群
 talk_frequency_down = [1022489779, 571780722]  #降低回复频率的群
 ban_user_id = [3488737411, 2732836727, 3878664193, 3799953254]  #禁止回复和读取消息的QQ号
 [remote] #发送统计信息，主要是看全球有多少只麦麦
 enable = true
 #下面的模型若使用硅基流动则不需要更改，使用ds官方则改成.env.prod自定义的宏，使用自定义模型则选择定位相似的模型自己填写
 #推理模型
 [model.llm_reasoning] #回复模型1 主要回复模型
 # name = "Pro/deepseek-ai/DeepSeek-R1"
 name = "Qwen/QwQ-32B"
 provider = "SILICONFLOW"
 pri_in = 1.0 #模型的输入价格（非必填，可以记录消耗）
 pri_out = 4.0 #模型的输出价格（非必填，可以记录消耗）
 [model.llm_reasoning_minor] #回复模型3 次要回复模型
 name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
 provider = "SILICONFLOW"
 pri_in = 1.26 #模型的输入价格（非必填，可以记录消耗）
 pri_out = 1.26 #模型的输出价格（非必填，可以记录消耗）
 #非推理模型
 [model.llm_normal] #V3 回复模型2 次要回复模型
 name = "Qwen/Qwen2.5-32B-Instruct"
 provider = "SILICONFLOW"
 pri_in = 1.26 #模型的输入价格（非必填，可以记录消耗）
 pri_out = 1.26 #模型的输出价格（非必填，可以记录消耗）
 [model.llm_emotion_judge] #表情包判断 
 name = "Qwen/Qwen2.5-14B-Instruct"
 provider = "SILICONFLOW"
 pri_in = 0.7
 pri_out = 0.7
 [model.llm_topic_judge] #记忆主题判断：建议使用qwen2.5 7b
 name = "Pro/Qwen/Qwen2.5-7B-Instruct"
 # name = "Qwen/Qwen2-1.5B-Instruct"
 provider = "SILICONFLOW"
 pri_in = 0.35
 pri_out = 0.35
 [model.llm_summary_by_topic] #概括模型，建议使用qwen2.5 32b 及以上
 name = "Qwen/Qwen2.5-32B-Instruct"
 provider = "SILICONFLOW"
 pri_in = 1.26
 pri_out = 1.26
 [model.moderation] #内容审核，开发中
 name = ""
 provider = "SILICONFLOW"
 pri_in = 1.0
 pri_out = 2.0
 # 识图模型
 [model.vlm] #图像识别 
 name = "Pro/Qwen/Qwen2.5-VL-7B-Instruct"
 provider = "SILICONFLOW"
 pri_in = 0.35
 pri_out = 0.35
 #嵌入模型
 [model.embedding] #嵌入
 name = "BAAI/bge-m3"
 provider = "SILICONFLOW"
--- a/src/plugins/chat/config.py
+++ b/src/plugins/chat/config.py
@@ -57,6 +57,7 @@ class BotConfig:
    response_willing_amplifier: float = 1.0  # 回复意愿放大系数
    response_interested_rate_amplifier: float = 1.0  # 回复兴趣度放大系数
    down_frequency_rate: float = 3  # 降低回复频率的群组回复意愿降低系数
    emoji_response_penalty: float = 0.0  # 表情包回复惩罚
    # response
    MODEL_R1_PROBABILITY: float = 0.8  # R1模型概率
@@ -101,6 +102,11 @@ class BotConfig:
    chinese_typo_min_freq = 7  # 最小字频阈值
    chinese_typo_tone_error_rate = 0.2  # 声调错误概率
    chinese_typo_word_replace_rate = 0.02  # 整词替换概率
    #response_spliter
    enable_response_spliter = True  # 是否启用回复分割器
    response_max_length = 100 # 回复允许的最大长度
    response_max_sentence_num = 3 # 回复允许的最大句子数    
    # remote
    remote_enable: bool = True  # 是否启用远程控制
@@ -242,7 +248,8 @@ class BotConfig:
                config.response_willing_amplifier = willing_config.get("response_willing_amplifier", config.response_willing_amplifier)
                config.response_interested_rate_amplifier = willing_config.get("response_interested_rate_amplifier", config.response_interested_rate_amplifier)
                config.down_frequency_rate = willing_config.get("down_frequency_rate", config.down_frequency_rate)
-
+                config.emoji_response_penalty = willing_config.get("emoji_response_penalty", config.emoji_response_penalty)
        def model(parent: dict):
            # 加载模型配置
            model_config: dict = parent["model"]
@@ -378,6 +385,12 @@ class BotConfig:
            config.chinese_typo_word_replace_rate = chinese_typo_config.get(
                "word_replace_rate", config.chinese_typo_word_replace_rate
            )
        def response_spliter(parent: dict):
            response_spliter_config = parent["response_spliter"]
            config.enable_response_spliter = response_spliter_config.get("enable_response_spliter", config.enable_response_spliter)
            config.response_max_length = response_spliter_config.get("response_max_length", config.response_max_length)
            config.response_max_sentence_num = response_spliter_config.get("response_max_sentence_num", config.response_max_sentence_num)
        def groups(parent: dict):
            groups_config = parent["groups"]
@@ -409,6 +422,7 @@ class BotConfig:
            "remote": {"func": remote, "support": ">=0.0.10", "necessary": False},
            "keywords_reaction": {"func": keywords_reaction, "support": ">=0.0.2", "necessary": False},
            "chinese_typo": {"func": chinese_typo, "support": ">=0.0.3", "necessary": False},
            "response_spliter": {"func": response_spliter, "support": ">=0.0.11", "necessary": False},
            "experimental": {"func": experimental, "support": ">=0.0.11", "necessary": False},
        }
--- a/src/plugins/chat/utils.py
+++ b/src/plugins/chat/utils.py
@@ -244,21 +244,17 @@ def split_into_sentences_w_remove_punctuation(text: str) -> List[str]:
        List[str]: 分割后的句子列表
    """
    len_text = len(text)
-    if len_text < 5:
+    if len_text < 4:
        if random.random() < 0.01:
            return list(text)  # 如果文本很短且触发随机条件,直接按字符分割
        else:
            return [text]
    if len_text < 12:
-        split_strength = 0.3
+        split_strength = 0.2
    elif len_text < 32:
-        split_strength = 0.7
+        split_strength = 0.6
    else:
-        split_strength = 0.9
+        split_strength = 0.7
    # 先移除换行符
    # print(f"split_strength: {split_strength}")
    # print(f"处理前的文本: {text}")
    # 检查是否为西文字符段落
    if not is_western_paragraph(text):
@@ -348,7 +344,7 @@ def random_remove_punctuation(text: str) -> str:
    for i, char in enumerate(text):
        if char == "。" and i == text_len - 1:  # 结尾的句号
-            if random.random() > 0.4:  # 80%概率删除结尾句号
+            if random.random() > 0.1:  # 90%概率删除结尾句号
                continue
        elif char == "，":
            rand = random.random()
@@ -364,10 +360,12 @@ def random_remove_punctuation(text: str) -> str:
 def process_llm_response(text: str) -> List[str]:
    # processed_response = process_text_with_typos(content)
    # 对西文字符段落的回复长度设置为汉字字符的两倍
-    if len(text) > 100 and not is_western_paragraph(text) :
+    max_length = global_config.response_max_length
    max_sentence_num = global_config.response_max_sentence_num
    if len(text) > max_length and not is_western_paragraph(text) :
        logger.warning(f"回复过长 ({len(text)} 字符)，返回默认回复")
        return ["懒得说"]
-    elif len(text) > 200 :
+    elif len(text) > max_length * 2 :
        logger.warning(f"回复过长 ({len(text)} 字符)，返回默认回复")
        return ["懒得说"]
    # 处理长消息
@@ -377,7 +375,10 @@ def process_llm_response(text: str) -> List[str]:
        tone_error_rate=global_config.chinese_typo_tone_error_rate,
        word_replace_rate=global_config.chinese_typo_word_replace_rate,
    )
-    split_sentences = split_into_sentences_w_remove_punctuation(text)
+    if global_config.enable_response_spliter:
        split_sentences = split_into_sentences_w_remove_punctuation(text)
    else:
        split_sentences = [text]
    sentences = []
    for sentence in split_sentences:
        if global_config.chinese_typo_enable:
@@ -389,14 +390,14 @@ def process_llm_response(text: str) -> List[str]:
            sentences.append(sentence)
    # 检查分割后的消息数量是否过多（超过3条）
-    if len(sentences) > 3:
+    if len(sentences) > max_sentence_num:
        logger.warning(f"分割后消息数量过多 ({len(sentences)} 条)，返回默认回复")
        return [f"{global_config.BOT_NICKNAME}不知道哦"]
    return sentences
-def calculate_typing_time(input_string: str, chinese_time: float = 0.4, english_time: float = 0.2) -> float:
+def calculate_typing_time(input_string: str, chinese_time: float = 0.2, english_time: float = 0.1) -> float:
    """
    计算输入字符串所需的时间，中文和英文字符有不同的输入时间
        input_string (str): 输入的字符串
--- a/src/plugins/willing/mode_classical.py
+++ b/src/plugins/willing/mode_classical.py
@@ -1,6 +1,7 @@
 import asyncio
 from typing import Dict
 from ..chat.chat_stream import ChatStream
 from ..chat.config import global_config
 class WillingManager:
@@ -51,7 +52,7 @@ class WillingManager:
            current_willing += 0.05
        if is_emoji:
-            current_willing *= 0.2
+            current_willing *= global_config.emoji_response_penalty
        self.chat_reply_willing[chat_id] = min(current_willing, 3.0)
--- a/template/bot_config_template.toml
+++ b/template/bot_config_template.toml
@@ -67,6 +67,7 @@ willing_mode = "classical" # 回复意愿模式 经典模式
 response_willing_amplifier = 1 # 麦麦回复意愿放大系数，一般为1
 response_interested_rate_amplifier = 1 # 麦麦回复兴趣度放大系数,听到记忆里的内容时放大系数
 down_frequency_rate = 3 # 降低回复频率的群组回复意愿降低系数 除法
 emoji_response_penalty = 0.1 # 表情包回复惩罚系数，设为0为不回复单个表情包，减少单独回复表情包的概率
 [response]
 model_r1_probability = 0.8 # 麦麦回答时选择主要回复模型1 模型的概率
@@ -105,7 +106,7 @@ enable = true  # 关键词反应功能的总开关
 [[keywords_reaction.rules]] # 如果想要新增多个关键词，直接复制本条，修改keywords和reaction即可
 enable = true # 是否启用此条（为了人类在未来AI战争能更好地识别AI（bushi），默认开启）
-keywords = ["人机", "bot", "机器", "入机", "robot", "机器人"] # 会触发反应的关键词
+keywords = ["人机", "bot", "机器", "入机", "robot", "机器人","ai","AI"] # 会触发反应的关键词
 reaction = "有人说你是人机或者机器人，否定这一事实，攻击或反问他。注意，只有认为你是机器人才回复，否则不要否认" # 触发之后添加的提示词
 [[keywords_reaction.rules]] # 就像这样复制
@@ -120,6 +121,12 @@ min_freq=9 # 最小字频阈值
 tone_error_rate=0.1 # 声调错误概率
 word_replace_rate=0.006 # 整词替换概率
 [response_spliter]
 enable_response_spliter = true # 是否启用回复分割器
 response_max_length = 100 # 回复允许的最大长度
 response_max_sentence_num = 4 # 回复允许的最大句子数
 [remote] #发送统计信息，主要是看全球有多少只麦麦
 enable = true