better 新增了分割器,表情惩罚系数的自定义
This commit is contained in:
@@ -1,179 +0,0 @@
|
||||
[inner]
|
||||
version = "0.0.10"
|
||||
|
||||
[mai_version]
|
||||
version = "0.6.0"
|
||||
version-fix = "snapshot-1"
|
||||
|
||||
#以下是给开发人员阅读的,一般用户不需要阅读
|
||||
#如果你想要修改配置文件,请在修改后将version的值进行变更
|
||||
#如果新增项目,请在BotConfig类下新增相应的变量
|
||||
#1.如果你修改的是[]层级项目,例如你新增了 [memory],那么请在config.py的 load_config函数中的include_configs字典中新增"内容":{
|
||||
#"func":memory,
|
||||
#"support":">=0.0.0", #新的版本号
|
||||
#"necessary":False #是否必须
|
||||
#}
|
||||
#2.如果你修改的是[]下的项目,例如你新增了[memory]下的 memory_ban_words ,那么请在config.py的 load_config函数中的 memory函数下新增版本判断:
|
||||
# if config.INNER_VERSION in SpecifierSet(">=0.0.2"):
|
||||
# config.memory_ban_words = set(memory_config.get("memory_ban_words", []))
|
||||
|
||||
[bot]
|
||||
qq = 2814567326
|
||||
nickname = "麦麦"
|
||||
alias_names = ['牢麦', '麦叠', '哈基麦']
|
||||
|
||||
[personality]
|
||||
prompt_personality = ['曾经是一个学习地质的女大学生,现在学习心理学和脑科学,你会刷贴吧', '是一个女大学生,你有黑色头发,你会刷小红书', '是一个女大学生,你会刷b站,对ACG文化感兴趣']
|
||||
personality_1_probability = 0.7 # 第一种人格出现概率
|
||||
personality_2_probability = 0.1 # 第二种人格出现概率
|
||||
personality_3_probability = 0.2 # 第三种人格出现概率,请确保三个概率相加等于1
|
||||
prompt_schedule = "一个曾经学习地质,现在学习心理学和脑科学的女大学生,喜欢刷qq,贴吧,知乎和小红书"
|
||||
|
||||
[message]
|
||||
min_text_length = 2 # 与麦麦聊天时麦麦只会回答文本大于等于此数的消息
|
||||
max_context_size = 10 # 麦麦获得的上文数量
|
||||
emoji_chance = 0.2 # 麦麦使用表情包的概率
|
||||
thinking_timeout = 100 # 麦麦思考时间
|
||||
|
||||
response_willing_amplifier = 1 # 麦麦回复意愿放大系数,一般为1
|
||||
response_interested_rate_amplifier = 1 # 麦麦回复兴趣度放大系数,听到记忆里的内容时放大系数
|
||||
down_frequency_rate = 2 # 降低回复频率的群组回复意愿降低系数
|
||||
ban_words = []
|
||||
|
||||
ban_msgs_regex = []
|
||||
|
||||
[emoji]
|
||||
check_interval = 120 # 检查表情包的时间间隔
|
||||
register_interval = 10 # 注册表情包的时间间隔
|
||||
auto_save = true # 自动偷表情包
|
||||
enable_check = false # 是否启用表情包过滤
|
||||
check_prompt = "符合公序良俗" # 表情包过滤要求
|
||||
|
||||
[cq_code]
|
||||
enable_pic_translate = false
|
||||
|
||||
[response]
|
||||
model_r1_probability = 0.5 # 麦麦回答时选择主要回复模型1 模型的概率
|
||||
model_v3_probability = 0.5 # 麦麦回答时选择次要回复模型2 模型的概率
|
||||
model_r1_distill_probability = 0 # 麦麦回答时选择次要回复模型3 模型的概率
|
||||
max_response_length = 1024 # 麦麦回答的最大token数
|
||||
|
||||
[willing]
|
||||
willing_mode = "classical" # 回复意愿模式 经典模式
|
||||
# willing_mode = "dynamic" # 动态模式(可能不兼容)
|
||||
# willing_mode = "custom" # 自定义模式(可自行调整
|
||||
|
||||
[memory]
|
||||
build_memory_interval = 3000 # 记忆构建间隔 单位秒 间隔越低,麦麦学习越多,但是冗余信息也会增多
|
||||
build_memory_distribution = [4, 4, 0.6, 48, 36, 0.4] # 记忆构建分布,参数:分布1均值,标准差,权重,分布2均值,标准差,权重
|
||||
build_memory_sample_num = 10 # 采样数量,数值越高记忆采样次数越多
|
||||
build_memory_sample_length = 30 # 采样长度,数值越高一段记忆内容越丰富
|
||||
memory_compress_rate = 0.1 # 记忆压缩率 控制记忆精简程度 建议保持默认,调高可以获得更多信息,但是冗余信息也会增多
|
||||
|
||||
forget_memory_interval = 300 # 记忆遗忘间隔 单位秒 间隔越低,麦麦遗忘越频繁,记忆更精简,但更难学习
|
||||
memory_forget_time = 24 #多长时间后的记忆会被遗忘 单位小时
|
||||
memory_forget_percentage = 0.005 # 记忆遗忘比例 控制记忆遗忘程度 越大遗忘越多 建议保持默认
|
||||
|
||||
|
||||
memory_ban_words = ['表情包', '图片', '回复', '聊天记录']
|
||||
|
||||
[mood]
|
||||
mood_update_interval = 1.0 # 情绪更新间隔 单位秒
|
||||
mood_decay_rate = 0.95 # 情绪衰减率
|
||||
mood_intensity_factor = 1.0 # 情绪强度因子
|
||||
|
||||
[keywords_reaction] # 针对某个关键词作出反应
|
||||
enable = true # 关键词反应功能的总开关
|
||||
|
||||
[[keywords_reaction.rules]]
|
||||
enable = true
|
||||
keywords = [ "人机", "bot", "机器", "入机", "robot", "机器人",]
|
||||
reaction = "有人说你是人机或者机器人,否定这一事实,攻击或反问他。注意,只有认为你是机器人才回复,否则不要否认"
|
||||
|
||||
[[keywords_reaction.rules]]
|
||||
enable = false
|
||||
keywords = [ "测试关键词回复", "test", "",]
|
||||
reaction = "回答“测试成功”"
|
||||
|
||||
[chinese_typo]
|
||||
enable = true # 是否启用中文错别字生成器
|
||||
error_rate=0.01 # 单字替换概率
|
||||
min_freq=7 # 最小字频阈值
|
||||
tone_error_rate=0.3 # 声调错误概率
|
||||
word_replace_rate=0.01 # 整词替换概率
|
||||
|
||||
[others]
|
||||
enable_kuuki_read = true # 是否启用读空气功能
|
||||
enable_friend_chat = true # 是否启用好友聊天
|
||||
|
||||
[groups]
|
||||
talk_allowed = [571780722,1022489779,534940728, 192194125, 851345375, 739044565, 766798517, 1030993430, 435591861, 708847644, 591693379, 571780722, 1028699246, 571780722, 1015816696] #可以回复消息的群
|
||||
talk_frequency_down = [1022489779, 571780722] #降低回复频率的群
|
||||
ban_user_id = [3488737411, 2732836727, 3878664193, 3799953254] #禁止回复和读取消息的QQ号
|
||||
|
||||
[remote] #发送统计信息,主要是看全球有多少只麦麦
|
||||
enable = true
|
||||
|
||||
#下面的模型若使用硅基流动则不需要更改,使用ds官方则改成.env.prod自定义的宏,使用自定义模型则选择定位相似的模型自己填写
|
||||
|
||||
#推理模型
|
||||
|
||||
[model.llm_reasoning] #回复模型1 主要回复模型
|
||||
# name = "Pro/deepseek-ai/DeepSeek-R1"
|
||||
name = "Qwen/QwQ-32B"
|
||||
provider = "SILICONFLOW"
|
||||
pri_in = 1.0 #模型的输入价格(非必填,可以记录消耗)
|
||||
pri_out = 4.0 #模型的输出价格(非必填,可以记录消耗)
|
||||
|
||||
[model.llm_reasoning_minor] #回复模型3 次要回复模型
|
||||
name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
|
||||
provider = "SILICONFLOW"
|
||||
pri_in = 1.26 #模型的输入价格(非必填,可以记录消耗)
|
||||
pri_out = 1.26 #模型的输出价格(非必填,可以记录消耗)
|
||||
|
||||
#非推理模型
|
||||
|
||||
[model.llm_normal] #V3 回复模型2 次要回复模型
|
||||
name = "Qwen/Qwen2.5-32B-Instruct"
|
||||
provider = "SILICONFLOW"
|
||||
pri_in = 1.26 #模型的输入价格(非必填,可以记录消耗)
|
||||
pri_out = 1.26 #模型的输出价格(非必填,可以记录消耗)
|
||||
|
||||
[model.llm_emotion_judge] #表情包判断
|
||||
name = "Qwen/Qwen2.5-14B-Instruct"
|
||||
provider = "SILICONFLOW"
|
||||
pri_in = 0.7
|
||||
pri_out = 0.7
|
||||
|
||||
[model.llm_topic_judge] #记忆主题判断:建议使用qwen2.5 7b
|
||||
name = "Pro/Qwen/Qwen2.5-7B-Instruct"
|
||||
# name = "Qwen/Qwen2-1.5B-Instruct"
|
||||
provider = "SILICONFLOW"
|
||||
pri_in = 0.35
|
||||
pri_out = 0.35
|
||||
|
||||
[model.llm_summary_by_topic] #概括模型,建议使用qwen2.5 32b 及以上
|
||||
name = "Qwen/Qwen2.5-32B-Instruct"
|
||||
provider = "SILICONFLOW"
|
||||
pri_in = 1.26
|
||||
pri_out = 1.26
|
||||
|
||||
[model.moderation] #内容审核,开发中
|
||||
name = ""
|
||||
provider = "SILICONFLOW"
|
||||
pri_in = 1.0
|
||||
pri_out = 2.0
|
||||
|
||||
# 识图模型
|
||||
|
||||
[model.vlm] #图像识别
|
||||
name = "Pro/Qwen/Qwen2.5-VL-7B-Instruct"
|
||||
provider = "SILICONFLOW"
|
||||
pri_in = 0.35
|
||||
pri_out = 0.35
|
||||
|
||||
#嵌入模型
|
||||
|
||||
[model.embedding] #嵌入
|
||||
name = "BAAI/bge-m3"
|
||||
provider = "SILICONFLOW"
|
||||
@@ -57,6 +57,7 @@ class BotConfig:
|
||||
response_willing_amplifier: float = 1.0 # 回复意愿放大系数
|
||||
response_interested_rate_amplifier: float = 1.0 # 回复兴趣度放大系数
|
||||
down_frequency_rate: float = 3 # 降低回复频率的群组回复意愿降低系数
|
||||
emoji_response_penalty: float = 0.0 # 表情包回复惩罚
|
||||
|
||||
# response
|
||||
MODEL_R1_PROBABILITY: float = 0.8 # R1模型概率
|
||||
@@ -102,6 +103,11 @@ class BotConfig:
|
||||
chinese_typo_tone_error_rate = 0.2 # 声调错误概率
|
||||
chinese_typo_word_replace_rate = 0.02 # 整词替换概率
|
||||
|
||||
#response_spliter
|
||||
enable_response_spliter = True # 是否启用回复分割器
|
||||
response_max_length = 100 # 回复允许的最大长度
|
||||
response_max_sentence_num = 3 # 回复允许的最大句子数
|
||||
|
||||
# remote
|
||||
remote_enable: bool = True # 是否启用远程控制
|
||||
|
||||
@@ -242,6 +248,7 @@ class BotConfig:
|
||||
config.response_willing_amplifier = willing_config.get("response_willing_amplifier", config.response_willing_amplifier)
|
||||
config.response_interested_rate_amplifier = willing_config.get("response_interested_rate_amplifier", config.response_interested_rate_amplifier)
|
||||
config.down_frequency_rate = willing_config.get("down_frequency_rate", config.down_frequency_rate)
|
||||
config.emoji_response_penalty = willing_config.get("emoji_response_penalty", config.emoji_response_penalty)
|
||||
|
||||
def model(parent: dict):
|
||||
# 加载模型配置
|
||||
@@ -379,6 +386,12 @@ class BotConfig:
|
||||
"word_replace_rate", config.chinese_typo_word_replace_rate
|
||||
)
|
||||
|
||||
def response_spliter(parent: dict):
|
||||
response_spliter_config = parent["response_spliter"]
|
||||
config.enable_response_spliter = response_spliter_config.get("enable_response_spliter", config.enable_response_spliter)
|
||||
config.response_max_length = response_spliter_config.get("response_max_length", config.response_max_length)
|
||||
config.response_max_sentence_num = response_spliter_config.get("response_max_sentence_num", config.response_max_sentence_num)
|
||||
|
||||
def groups(parent: dict):
|
||||
groups_config = parent["groups"]
|
||||
config.talk_allowed_groups = set(groups_config.get("talk_allowed", []))
|
||||
@@ -409,6 +422,7 @@ class BotConfig:
|
||||
"remote": {"func": remote, "support": ">=0.0.10", "necessary": False},
|
||||
"keywords_reaction": {"func": keywords_reaction, "support": ">=0.0.2", "necessary": False},
|
||||
"chinese_typo": {"func": chinese_typo, "support": ">=0.0.3", "necessary": False},
|
||||
"response_spliter": {"func": response_spliter, "support": ">=0.0.11", "necessary": False},
|
||||
"experimental": {"func": experimental, "support": ">=0.0.11", "necessary": False},
|
||||
}
|
||||
|
||||
|
||||
@@ -244,21 +244,17 @@ def split_into_sentences_w_remove_punctuation(text: str) -> List[str]:
|
||||
List[str]: 分割后的句子列表
|
||||
"""
|
||||
len_text = len(text)
|
||||
if len_text < 5:
|
||||
if len_text < 4:
|
||||
if random.random() < 0.01:
|
||||
return list(text) # 如果文本很短且触发随机条件,直接按字符分割
|
||||
else:
|
||||
return [text]
|
||||
if len_text < 12:
|
||||
split_strength = 0.3
|
||||
split_strength = 0.2
|
||||
elif len_text < 32:
|
||||
split_strength = 0.7
|
||||
split_strength = 0.6
|
||||
else:
|
||||
split_strength = 0.9
|
||||
# 先移除换行符
|
||||
# print(f"split_strength: {split_strength}")
|
||||
|
||||
# print(f"处理前的文本: {text}")
|
||||
split_strength = 0.7
|
||||
|
||||
# 检查是否为西文字符段落
|
||||
if not is_western_paragraph(text):
|
||||
@@ -348,7 +344,7 @@ def random_remove_punctuation(text: str) -> str:
|
||||
|
||||
for i, char in enumerate(text):
|
||||
if char == "。" and i == text_len - 1: # 结尾的句号
|
||||
if random.random() > 0.4: # 80%概率删除结尾句号
|
||||
if random.random() > 0.1: # 90%概率删除结尾句号
|
||||
continue
|
||||
elif char == ",":
|
||||
rand = random.random()
|
||||
@@ -364,10 +360,12 @@ def random_remove_punctuation(text: str) -> str:
|
||||
def process_llm_response(text: str) -> List[str]:
|
||||
# processed_response = process_text_with_typos(content)
|
||||
# 对西文字符段落的回复长度设置为汉字字符的两倍
|
||||
if len(text) > 100 and not is_western_paragraph(text) :
|
||||
max_length = global_config.response_max_length
|
||||
max_sentence_num = global_config.response_max_sentence_num
|
||||
if len(text) > max_length and not is_western_paragraph(text) :
|
||||
logger.warning(f"回复过长 ({len(text)} 字符),返回默认回复")
|
||||
return ["懒得说"]
|
||||
elif len(text) > 200 :
|
||||
elif len(text) > max_length * 2 :
|
||||
logger.warning(f"回复过长 ({len(text)} 字符),返回默认回复")
|
||||
return ["懒得说"]
|
||||
# 处理长消息
|
||||
@@ -377,7 +375,10 @@ def process_llm_response(text: str) -> List[str]:
|
||||
tone_error_rate=global_config.chinese_typo_tone_error_rate,
|
||||
word_replace_rate=global_config.chinese_typo_word_replace_rate,
|
||||
)
|
||||
split_sentences = split_into_sentences_w_remove_punctuation(text)
|
||||
if global_config.enable_response_spliter:
|
||||
split_sentences = split_into_sentences_w_remove_punctuation(text)
|
||||
else:
|
||||
split_sentences = [text]
|
||||
sentences = []
|
||||
for sentence in split_sentences:
|
||||
if global_config.chinese_typo_enable:
|
||||
@@ -389,14 +390,14 @@ def process_llm_response(text: str) -> List[str]:
|
||||
sentences.append(sentence)
|
||||
# 检查分割后的消息数量是否过多(超过3条)
|
||||
|
||||
if len(sentences) > 3:
|
||||
if len(sentences) > max_sentence_num:
|
||||
logger.warning(f"分割后消息数量过多 ({len(sentences)} 条),返回默认回复")
|
||||
return [f"{global_config.BOT_NICKNAME}不知道哦"]
|
||||
|
||||
return sentences
|
||||
|
||||
|
||||
def calculate_typing_time(input_string: str, chinese_time: float = 0.4, english_time: float = 0.2) -> float:
|
||||
def calculate_typing_time(input_string: str, chinese_time: float = 0.2, english_time: float = 0.1) -> float:
|
||||
"""
|
||||
计算输入字符串所需的时间,中文和英文字符有不同的输入时间
|
||||
input_string (str): 输入的字符串
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import asyncio
|
||||
from typing import Dict
|
||||
from ..chat.chat_stream import ChatStream
|
||||
from ..chat.config import global_config
|
||||
|
||||
|
||||
class WillingManager:
|
||||
@@ -51,7 +52,7 @@ class WillingManager:
|
||||
current_willing += 0.05
|
||||
|
||||
if is_emoji:
|
||||
current_willing *= 0.2
|
||||
current_willing *= global_config.emoji_response_penalty
|
||||
|
||||
self.chat_reply_willing[chat_id] = min(current_willing, 3.0)
|
||||
|
||||
|
||||
@@ -67,6 +67,7 @@ willing_mode = "classical" # 回复意愿模式 经典模式
|
||||
response_willing_amplifier = 1 # 麦麦回复意愿放大系数,一般为1
|
||||
response_interested_rate_amplifier = 1 # 麦麦回复兴趣度放大系数,听到记忆里的内容时放大系数
|
||||
down_frequency_rate = 3 # 降低回复频率的群组回复意愿降低系数 除法
|
||||
emoji_response_penalty = 0.1 # 表情包回复惩罚系数,设为0为不回复单个表情包,减少单独回复表情包的概率
|
||||
|
||||
[response]
|
||||
model_r1_probability = 0.8 # 麦麦回答时选择主要回复模型1 模型的概率
|
||||
@@ -105,7 +106,7 @@ enable = true # 关键词反应功能的总开关
|
||||
|
||||
[[keywords_reaction.rules]] # 如果想要新增多个关键词,直接复制本条,修改keywords和reaction即可
|
||||
enable = true # 是否启用此条(为了人类在未来AI战争能更好地识别AI(bushi),默认开启)
|
||||
keywords = ["人机", "bot", "机器", "入机", "robot", "机器人"] # 会触发反应的关键词
|
||||
keywords = ["人机", "bot", "机器", "入机", "robot", "机器人","ai","AI"] # 会触发反应的关键词
|
||||
reaction = "有人说你是人机或者机器人,否定这一事实,攻击或反问他。注意,只有认为你是机器人才回复,否则不要否认" # 触发之后添加的提示词
|
||||
|
||||
[[keywords_reaction.rules]] # 就像这样复制
|
||||
@@ -120,6 +121,12 @@ min_freq=9 # 最小字频阈值
|
||||
tone_error_rate=0.1 # 声调错误概率
|
||||
word_replace_rate=0.006 # 整词替换概率
|
||||
|
||||
[response_spliter]
|
||||
enable_response_spliter = true # 是否启用回复分割器
|
||||
response_max_length = 100 # 回复允许的最大长度
|
||||
response_max_sentence_num = 4 # 回复允许的最大句子数
|
||||
|
||||
|
||||
[remote] #发送统计信息,主要是看全球有多少只麦麦
|
||||
enable = true
|
||||
|
||||
|
||||
Reference in New Issue
Block a user