better 新增了分割器，表情惩罚系数的自定义

2025-03-25 22:27:38 +08:00
parent c220f4c79e
commit 51990391fd
5 changed files with 40 additions and 196 deletions
--- a/config/bot_config_test.toml
+++ b/config/bot_config_test.toml
@@ -1,179 +0,0 @@
-[inner]
-version = "0.0.10"
-
-[mai_version]
-version = "0.6.0"
-version-fix = "snapshot-1"
-
-#以下是给开发人员阅读的，一般用户不需要阅读
-#如果你想要修改配置文件，请在修改后将version的值进行变更
-#如果新增项目，请在BotConfig类下新增相应的变量
-#1.如果你修改的是[]层级项目，例如你新增了 [memory],那么请在config.py的 load_config函数中的include_configs字典中新增"内容":{
-#"func":memory,
-#"support":">=0.0.0",  #新的版本号
-#"necessary":False      #是否必须
-#}
-#2.如果你修改的是[]下的项目，例如你新增了[memory]下的 memory_ban_words ,那么请在config.py的 load_config函数中的 memory函数下新增版本判断:
-            # if config.INNER_VERSION in SpecifierSet(">=0.0.2"):
-            #     config.memory_ban_words = set(memory_config.get("memory_ban_words", []))
-
-[bot]
-qq = 2814567326
-nickname = "麦麦"
-alias_names = ['牢麦', '麦叠', '哈基麦']
-
-[personality]
-prompt_personality = ['曾经是一个学习地质的女大学生，现在学习心理学和脑科学，你会刷贴吧', '是一个女大学生，你有黑色头发，你会刷小红书', '是一个女大学生，你会刷b站，对ACG文化感兴趣']
-personality_1_probability = 0.7 # 第一种人格出现概率
-personality_2_probability = 0.1 # 第二种人格出现概率
-personality_3_probability = 0.2 # 第三种人格出现概率，请确保三个概率相加等于1
-prompt_schedule = "一个曾经学习地质,现在学习心理学和脑科学的女大学生，喜欢刷qq，贴吧，知乎和小红书"
-
-[message]
-min_text_length = 2 # 与麦麦聊天时麦麦只会回答文本大于等于此数的消息
-max_context_size = 10 # 麦麦获得的上文数量
-emoji_chance = 0.2 # 麦麦使用表情包的概率
-thinking_timeout = 100 # 麦麦思考时间
-
-response_willing_amplifier = 1 # 麦麦回复意愿放大系数，一般为1
-response_interested_rate_amplifier = 1 # 麦麦回复兴趣度放大系数,听到记忆里的内容时放大系数
-down_frequency_rate = 2 # 降低回复频率的群组回复意愿降低系数
-ban_words = []
-
-ban_msgs_regex = []
-
-[emoji]
-check_interval = 120 # 检查表情包的时间间隔
-register_interval = 10 # 注册表情包的时间间隔
-auto_save = true  # 自动偷表情包
-enable_check = false  # 是否启用表情包过滤
-check_prompt = "符合公序良俗" # 表情包过滤要求
-
-[cq_code]
-enable_pic_translate = false
-
-[response]
-model_r1_probability = 0.5 # 麦麦回答时选择主要回复模型1 模型的概率
-model_v3_probability = 0.5 # 麦麦回答时选择次要回复模型2 模型的概率
-model_r1_distill_probability = 0 # 麦麦回答时选择次要回复模型3 模型的概率
-max_response_length = 1024 # 麦麦回答的最大token数
-
-[willing]
-willing_mode = "classical" # 回复意愿模式 经典模式
-# willing_mode = "dynamic" # 动态模式(可能不兼容)
-# willing_mode = "custom" # 自定义模式（可自行调整
-
-[memory]
-build_memory_interval = 3000 # 记忆构建间隔 单位秒   间隔越低，麦麦学习越多，但是冗余信息也会增多
-build_memory_distribution = [4, 4, 0.6, 48, 36, 0.4] # 记忆构建分布，参数：分布1均值，标准差，权重，分布2均值，标准差，权重
-build_memory_sample_num = 10 # 采样数量，数值越高记忆采样次数越多
-build_memory_sample_length = 30 # 采样长度，数值越高一段记忆内容越丰富
-memory_compress_rate = 0.1 # 记忆压缩率 控制记忆精简程度 建议保持默认,调高可以获得更多信息，但是冗余信息也会增多
-
-forget_memory_interval = 300 # 记忆遗忘间隔 单位秒   间隔越低，麦麦遗忘越频繁，记忆更精简，但更难学习
-memory_forget_time = 24 #多长时间后的记忆会被遗忘 单位小时 
-memory_forget_percentage = 0.005 # 记忆遗忘比例 控制记忆遗忘程度 越大遗忘越多 建议保持默认
-
-
-memory_ban_words = ['表情包', '图片', '回复', '聊天记录']
-
-[mood]
-mood_update_interval = 1.0 # 情绪更新间隔 单位秒
-mood_decay_rate = 0.95 # 情绪衰减率
-mood_intensity_factor = 1.0 # 情绪强度因子
-
-[keywords_reaction] # 针对某个关键词作出反应
-enable = true  # 关键词反应功能的总开关
-
-[[keywords_reaction.rules]]
-enable = true
-keywords = [ "人机", "bot", "机器", "入机", "robot", "机器人",]
-reaction = "有人说你是人机或者机器人，否定这一事实，攻击或反问他。注意，只有认为你是机器人才回复，否则不要否认"
-
-[[keywords_reaction.rules]]
-enable = false
-keywords = [ "测试关键词回复", "test", "",]
-reaction = "回答“测试成功”"
-
-[chinese_typo]
-enable = true # 是否启用中文错别字生成器
-error_rate=0.01 # 单字替换概率
-min_freq=7 # 最小字频阈值
-tone_error_rate=0.3 # 声调错误概率
-word_replace_rate=0.01 # 整词替换概率
-
-[others]
-enable_kuuki_read = true # 是否启用读空气功能
-enable_friend_chat = true # 是否启用好友聊天
-
-[groups]
-talk_allowed = [571780722,1022489779,534940728, 192194125, 851345375, 739044565, 766798517, 1030993430, 435591861, 708847644, 591693379, 571780722, 1028699246, 571780722, 1015816696]  #可以回复消息的群
-talk_frequency_down = [1022489779, 571780722]  #降低回复频率的群
-ban_user_id = [3488737411, 2732836727, 3878664193, 3799953254]  #禁止回复和读取消息的QQ号
-
-[remote] #发送统计信息，主要是看全球有多少只麦麦
-enable = true
-
-#下面的模型若使用硅基流动则不需要更改，使用ds官方则改成.env.prod自定义的宏，使用自定义模型则选择定位相似的模型自己填写
-
-#推理模型
-
-[model.llm_reasoning] #回复模型1 主要回复模型
-# name = "Pro/deepseek-ai/DeepSeek-R1"
-name = "Qwen/QwQ-32B"
-provider = "SILICONFLOW"
-pri_in = 1.0 #模型的输入价格（非必填，可以记录消耗）
-pri_out = 4.0 #模型的输出价格（非必填，可以记录消耗）
-
-[model.llm_reasoning_minor] #回复模型3 次要回复模型
-name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
-provider = "SILICONFLOW"
-pri_in = 1.26 #模型的输入价格（非必填，可以记录消耗）
-pri_out = 1.26 #模型的输出价格（非必填，可以记录消耗）
-
-#非推理模型
-
-[model.llm_normal] #V3 回复模型2 次要回复模型
-name = "Qwen/Qwen2.5-32B-Instruct"
-provider = "SILICONFLOW"
-pri_in = 1.26 #模型的输入价格（非必填，可以记录消耗）
-pri_out = 1.26 #模型的输出价格（非必填，可以记录消耗）
-
-[model.llm_emotion_judge] #表情包判断 
-name = "Qwen/Qwen2.5-14B-Instruct"
-provider = "SILICONFLOW"
-pri_in = 0.7
-pri_out = 0.7
-
-[model.llm_topic_judge] #记忆主题判断：建议使用qwen2.5 7b
-name = "Pro/Qwen/Qwen2.5-7B-Instruct"
-# name = "Qwen/Qwen2-1.5B-Instruct"
-provider = "SILICONFLOW"
-pri_in = 0.35
-pri_out = 0.35
-
-[model.llm_summary_by_topic] #概括模型，建议使用qwen2.5 32b 及以上
-name = "Qwen/Qwen2.5-32B-Instruct"
-provider = "SILICONFLOW"
-pri_in = 1.26
-pri_out = 1.26
-
-[model.moderation] #内容审核，开发中
-name = ""
-provider = "SILICONFLOW"
-pri_in = 1.0
-pri_out = 2.0
-
-# 识图模型
-
-[model.vlm] #图像识别 
-name = "Pro/Qwen/Qwen2.5-VL-7B-Instruct"
-provider = "SILICONFLOW"
-pri_in = 0.35
-pri_out = 0.35
-
-#嵌入模型
-
-[model.embedding] #嵌入
-name = "BAAI/bge-m3"
-provider = "SILICONFLOW"
--- a/src/plugins/chat/config.py
+++ b/src/plugins/chat/config.py
@@ -57,6 +57,7 @@ class BotConfig:
    response_willing_amplifier: float = 1.0  # 回复意愿放大系数
    response_interested_rate_amplifier: float = 1.0  # 回复兴趣度放大系数
    down_frequency_rate: float = 3  # 降低回复频率的群组回复意愿降低系数
+    emoji_response_penalty: float = 0.0  # 表情包回复惩罚
    
    # response
    MODEL_R1_PROBABILITY: float = 0.8  # R1模型概率
@@ -102,6 +103,11 @@ class BotConfig:
    chinese_typo_tone_error_rate = 0.2  # 声调错误概率
    chinese_typo_word_replace_rate = 0.02  # 整词替换概率
    
+    #response_spliter
+    enable_response_spliter = True  # 是否启用回复分割器
+    response_max_length = 100 # 回复允许的最大长度
+    response_max_sentence_num = 3 # 回复允许的最大句子数    
+
    # remote
    remote_enable: bool = True  # 是否启用远程控制
    
@@ -242,6 +248,7 @@ class BotConfig:
                config.response_willing_amplifier = willing_config.get("response_willing_amplifier", config.response_willing_amplifier)
                config.response_interested_rate_amplifier = willing_config.get("response_interested_rate_amplifier", config.response_interested_rate_amplifier)
                config.down_frequency_rate = willing_config.get("down_frequency_rate", config.down_frequency_rate)
+                config.emoji_response_penalty = willing_config.get("emoji_response_penalty", config.emoji_response_penalty)
                
        def model(parent: dict):
            # 加载模型配置
@@ -379,6 +386,12 @@ class BotConfig:
                "word_replace_rate", config.chinese_typo_word_replace_rate
            )
            
+        def response_spliter(parent: dict):
+            response_spliter_config = parent["response_spliter"]
+            config.enable_response_spliter = response_spliter_config.get("enable_response_spliter", config.enable_response_spliter)
+            config.response_max_length = response_spliter_config.get("response_max_length", config.response_max_length)
+            config.response_max_sentence_num = response_spliter_config.get("response_max_sentence_num", config.response_max_sentence_num)
+
        def groups(parent: dict):
            groups_config = parent["groups"]
            config.talk_allowed_groups = set(groups_config.get("talk_allowed", []))
@@ -409,6 +422,7 @@ class BotConfig:
            "remote": {"func": remote, "support": ">=0.0.10", "necessary": False},
            "keywords_reaction": {"func": keywords_reaction, "support": ">=0.0.2", "necessary": False},
            "chinese_typo": {"func": chinese_typo, "support": ">=0.0.3", "necessary": False},
+            "response_spliter": {"func": response_spliter, "support": ">=0.0.11", "necessary": False},
            "experimental": {"func": experimental, "support": ">=0.0.11", "necessary": False},
        }

--- a/src/plugins/chat/utils.py
+++ b/src/plugins/chat/utils.py
@@ -244,21 +244,17 @@ def split_into_sentences_w_remove_punctuation(text: str) -> List[str]:
        List[str]: 分割后的句子列表
    """
    len_text = len(text)
-    if len_text < 5:
+    if len_text < 4:
        if random.random() < 0.01:
            return list(text)  # 如果文本很短且触发随机条件,直接按字符分割
        else:
            return [text]
    if len_text < 12:
-        split_strength = 0.3
+        split_strength = 0.2
    elif len_text < 32:
-        split_strength = 0.7
+        split_strength = 0.6
    else:
-        split_strength = 0.9
-    # 先移除换行符
-    # print(f"split_strength: {split_strength}")
-
-    # print(f"处理前的文本: {text}")
+        split_strength = 0.7

    # 检查是否为西文字符段落
    if not is_western_paragraph(text):
@@ -348,7 +344,7 @@ def random_remove_punctuation(text: str) -> str:

    for i, char in enumerate(text):
        if char == "。" and i == text_len - 1:  # 结尾的句号
-            if random.random() > 0.4:  # 80%概率删除结尾句号
+            if random.random() > 0.1:  # 90%概率删除结尾句号
                continue
        elif char == "，":
            rand = random.random()
@@ -364,10 +360,12 @@ def random_remove_punctuation(text: str) -> str:
 def process_llm_response(text: str) -> List[str]:
    # processed_response = process_text_with_typos(content)
    # 对西文字符段落的回复长度设置为汉字字符的两倍
-    if len(text) > 100 and not is_western_paragraph(text) :
+    max_length = global_config.response_max_length
+    max_sentence_num = global_config.response_max_sentence_num
+    if len(text) > max_length and not is_western_paragraph(text) :
        logger.warning(f"回复过长 ({len(text)} 字符)，返回默认回复")
        return ["懒得说"]
-    elif len(text) > 200 :
+    elif len(text) > max_length * 2 :
        logger.warning(f"回复过长 ({len(text)} 字符)，返回默认回复")
        return ["懒得说"]
    # 处理长消息
@@ -377,7 +375,10 @@ def process_llm_response(text: str) -> List[str]:
        tone_error_rate=global_config.chinese_typo_tone_error_rate,
        word_replace_rate=global_config.chinese_typo_word_replace_rate,
    )
-    split_sentences = split_into_sentences_w_remove_punctuation(text)
+    if global_config.enable_response_spliter:
+        split_sentences = split_into_sentences_w_remove_punctuation(text)
+    else:
+        split_sentences = [text]
    sentences = []
    for sentence in split_sentences:
        if global_config.chinese_typo_enable:
@@ -389,14 +390,14 @@ def process_llm_response(text: str) -> List[str]:
            sentences.append(sentence)
    # 检查分割后的消息数量是否过多（超过3条）

-    if len(sentences) > 3:
+    if len(sentences) > max_sentence_num:
        logger.warning(f"分割后消息数量过多 ({len(sentences)} 条)，返回默认回复")
        return [f"{global_config.BOT_NICKNAME}不知道哦"]

    return sentences


-def calculate_typing_time(input_string: str, chinese_time: float = 0.4, english_time: float = 0.2) -> float:
+def calculate_typing_time(input_string: str, chinese_time: float = 0.2, english_time: float = 0.1) -> float:
    """
    计算输入字符串所需的时间，中文和英文字符有不同的输入时间
        input_string (str): 输入的字符串
--- a/src/plugins/willing/mode_classical.py
+++ b/src/plugins/willing/mode_classical.py
@@ -1,6 +1,7 @@
 import asyncio
 from typing import Dict
 from ..chat.chat_stream import ChatStream
+from ..chat.config import global_config


 class WillingManager:
@@ -51,7 +52,7 @@ class WillingManager:
            current_willing += 0.05

        if is_emoji:
-            current_willing *= 0.2
+            current_willing *= global_config.emoji_response_penalty

        self.chat_reply_willing[chat_id] = min(current_willing, 3.0)

--- a/template/bot_config_template.toml
+++ b/template/bot_config_template.toml
@@ -67,6 +67,7 @@ willing_mode = "classical" # 回复意愿模式 经典模式
 response_willing_amplifier = 1 # 麦麦回复意愿放大系数，一般为1
 response_interested_rate_amplifier = 1 # 麦麦回复兴趣度放大系数,听到记忆里的内容时放大系数
 down_frequency_rate = 3 # 降低回复频率的群组回复意愿降低系数 除法
+emoji_response_penalty = 0.1 # 表情包回复惩罚系数，设为0为不回复单个表情包，减少单独回复表情包的概率

 [response]
 model_r1_probability = 0.8 # 麦麦回答时选择主要回复模型1 模型的概率
@@ -105,7 +106,7 @@ enable = true  # 关键词反应功能的总开关

 [[keywords_reaction.rules]] # 如果想要新增多个关键词，直接复制本条，修改keywords和reaction即可
 enable = true # 是否启用此条（为了人类在未来AI战争能更好地识别AI（bushi），默认开启）
-keywords = ["人机", "bot", "机器", "入机", "robot", "机器人"] # 会触发反应的关键词
+keywords = ["人机", "bot", "机器", "入机", "robot", "机器人","ai","AI"] # 会触发反应的关键词
 reaction = "有人说你是人机或者机器人，否定这一事实，攻击或反问他。注意，只有认为你是机器人才回复，否则不要否认" # 触发之后添加的提示词

 [[keywords_reaction.rules]] # 就像这样复制
@@ -120,6 +121,12 @@ min_freq=9 # 最小字频阈值
 tone_error_rate=0.1 # 声调错误概率
 word_replace_rate=0.006 # 整词替换概率

+[response_spliter]
+enable_response_spliter = true # 是否启用回复分割器
+response_max_length = 100 # 回复允许的最大长度
+response_max_sentence_num = 4 # 回复允许的最大句子数
+
+
 [remote] #发送统计信息，主要是看全球有多少只麦麦
 enable = true