Merge branch 'debug' of github.com:ChangingSelf/MaiMBot into debug

This commit is contained in:
ChangingSelf
2025-03-09 11:35:47 +08:00
9 changed files with 652 additions and 237 deletions

View File

@@ -4,11 +4,15 @@ from typing import Dict, Optional
import tomli
from loguru import logger
from packaging import version
from packaging.version import Version, InvalidVersion
from packaging.specifiers import SpecifierSet,InvalidSpecifier
@dataclass
class BotConfig:
"""机器人配置类"""
INNER_VERSION: Version = None
BOT_QQ: Optional[int] = 1
BOT_NICKNAME: Optional[str] = None
@@ -93,12 +97,284 @@ class BotConfig:
if not os.path.exists(config_dir):
os.makedirs(config_dir)
return config_dir
@classmethod
def convert_to_specifierset(cls, value: str) -> SpecifierSet:
"""将 字符串 版本表达式转换成 SpecifierSet
Args:
value[str]: 版本表达式(字符串)
Returns:
SpecifierSet
"""
try:
converted = SpecifierSet(value)
except InvalidSpecifier as e:
logger.error(
f"{value} 分类使用了错误的版本约束表达式\n",
"请阅读 https://semver.org/lang/zh-CN/ 修改代码"
)
exit(1)
return converted
@classmethod
def get_config_version(cls, toml: dict) -> Version:
"""提取配置文件的 SpecifierSet 版本数据
Args:
toml[dict]: 输入的配置文件字典
Returns:
Version
"""
if 'inner' in toml:
try:
config_version : str = toml["inner"]["version"]
except KeyError as e:
logger.error(f"配置文件中 inner 段 不存在 {e}, 这是错误的配置文件")
raise KeyError(f"配置文件中 inner 段 不存在 {e}, 这是错误的配置文件")
else:
toml["inner"] = { "version": "0.0.0" }
config_version = toml["inner"]["version"]
try:
ver = version.parse(config_version)
except InvalidVersion as e:
logger.error(
"配置文件中 inner段 的 version 键是错误的版本描述\n"
"请阅读 https://semver.org/lang/zh-CN/ 修改配置,并参考本项目指定的模板进行修改\n"
"本项目在不同的版本下有不同的模板,请注意识别"
)
raise InvalidVersion("配置文件中 inner段 的 version 键是错误的版本描述\n")
return ver
@classmethod
def load_config(cls, config_path: str = None) -> "BotConfig":
"""从TOML配置文件加载配置"""
config = cls()
def personality(parent: dict):
personality_config=parent['personality']
personality=personality_config.get('prompt_personality')
if len(personality) >= 2:
logger.info(f"载入自定义人格:{personality}")
config.PROMPT_PERSONALITY=personality_config.get('prompt_personality',config.PROMPT_PERSONALITY)
logger.info(f"载入自定义日程prompt:{personality_config.get('prompt_schedule',config.PROMPT_SCHEDULE_GEN)}")
config.PROMPT_SCHEDULE_GEN=personality_config.get('prompt_schedule',config.PROMPT_SCHEDULE_GEN)
if config.INNER_VERSION in SpecifierSet(">=0.0.2"):
config.PERSONALITY_1=personality_config.get('personality_1_probability',config.PERSONALITY_1)
config.PERSONALITY_2=personality_config.get('personality_2_probability',config.PERSONALITY_2)
config.PERSONALITY_3=personality_config.get('personality_3_probability',config.PERSONALITY_3)
def emoji(parent: dict):
emoji_config = parent["emoji"]
config.EMOJI_CHECK_INTERVAL = emoji_config.get("check_interval", config.EMOJI_CHECK_INTERVAL)
config.EMOJI_REGISTER_INTERVAL = emoji_config.get("register_interval", config.EMOJI_REGISTER_INTERVAL)
config.EMOJI_CHECK_PROMPT = emoji_config.get('check_prompt',config.EMOJI_CHECK_PROMPT)
config.EMOJI_SAVE = emoji_config.get('auto_save',config.EMOJI_SAVE)
config.EMOJI_CHECK = emoji_config.get('enable_check',config.EMOJI_CHECK)
def cq_code(parent: dict):
cq_code_config = parent["cq_code"]
config.ENABLE_PIC_TRANSLATE = cq_code_config.get("enable_pic_translate", config.ENABLE_PIC_TRANSLATE)
def bot(parent: dict):
# 机器人基础配置
bot_config = parent["bot"]
bot_qq = bot_config.get("qq")
config.BOT_QQ = int(bot_qq)
config.BOT_NICKNAME = bot_config.get("nickname", config.BOT_NICKNAME)
def response(parent: dict):
response_config = parent["response"]
config.MODEL_R1_PROBABILITY = response_config.get("model_r1_probability", config.MODEL_R1_PROBABILITY)
config.MODEL_V3_PROBABILITY = response_config.get("model_v3_probability", config.MODEL_V3_PROBABILITY)
config.MODEL_R1_DISTILL_PROBABILITY = response_config.get("model_r1_distill_probability", config.MODEL_R1_DISTILL_PROBABILITY)
config.max_response_length = response_config.get("max_response_length", config.max_response_length)
def model(parent: dict):
# 加载模型配置
model_config:dict = parent["model"]
config_list = [
"llm_reasoning",
"llm_reasoning_minor",
"llm_normal",
"llm_normal_minor",
"llm_topic_judge",
"llm_summary_by_topic",
"llm_emotion_judge",
"vlm",
"embedding",
"moderation"
]
for item in config_list:
if item in model_config:
cfg_item:dict = model_config[item]
# base_url 的例子: SILICONFLOW_BASE_URL
# key 的例子: SILICONFLOW_KEY
cfg_target = {
"name" : "",
"base_url" : "",
"key" : "",
"pri_in" : 0,
"pri_out" : 0
}
if config.INNER_VERSION in SpecifierSet("<=0.0.0"):
cfg_target = cfg_item
elif config.INNER_VERSION in SpecifierSet(">=0.0.1"):
stable_item = ["name","pri_in","pri_out"]
pricing_item = ["pri_in","pri_out"]
# 从配置中原始拷贝稳定字段
for i in stable_item:
# 如果 字段 属于计费项 且获取不到,那默认值是 0
if i in pricing_item and i not in cfg_item:
cfg_target[i] = 0
else:
# 没有特殊情况则原样复制
try:
cfg_target[i] = cfg_item[i]
except KeyError as e:
logger.error(f"{item} 中的必要字段 {e} 不存在,请检查")
raise KeyError(f"{item} 中的必要字段 {e} 不存在,请检查")
provider = cfg_item.get("provider")
if provider == None:
logger.error(f"provider 字段在模型配置 {item} 中不存在,请检查")
raise KeyError(f"provider 字段在模型配置 {item} 中不存在,请检查")
cfg_target["base_url"] = f"{provider}_BASE_URL"
cfg_target["key"] = f"{provider}_KEY"
# 如果 列表中的项目在 model_config 中,利用反射来设置对应项目
setattr(config,item,cfg_target)
else:
logger.error(f"模型 {item} 在config中不存在请检查")
raise KeyError(f"模型 {item} 在config中不存在请检查")
def message(parent: dict):
msg_config = parent["message"]
config.MIN_TEXT_LENGTH = msg_config.get("min_text_length", config.MIN_TEXT_LENGTH)
config.MAX_CONTEXT_SIZE = msg_config.get("max_context_size", config.MAX_CONTEXT_SIZE)
config.emoji_chance = msg_config.get("emoji_chance", config.emoji_chance)
config.ban_words=msg_config.get("ban_words",config.ban_words)
if config.INNER_VERSION in SpecifierSet(">=0.0.2"):
config.thinking_timeout = msg_config.get("thinking_timeout", config.thinking_timeout)
config.response_willing_amplifier = msg_config.get("response_willing_amplifier", config.response_willing_amplifier)
config.response_interested_rate_amplifier = msg_config.get("response_interested_rate_amplifier", config.response_interested_rate_amplifier)
config.down_frequency_rate = msg_config.get("down_frequency_rate", config.down_frequency_rate)
def memory(parent: dict):
memory_config = parent["memory"]
config.build_memory_interval = memory_config.get("build_memory_interval", config.build_memory_interval)
config.forget_memory_interval = memory_config.get("forget_memory_interval", config.forget_memory_interval)
def mood(parent: dict):
mood_config = parent["mood"]
config.mood_update_interval = mood_config.get("mood_update_interval", config.mood_update_interval)
config.mood_decay_rate = mood_config.get("mood_decay_rate", config.mood_decay_rate)
config.mood_intensity_factor = mood_config.get("mood_intensity_factor", config.mood_intensity_factor)
def keywords_reaction(parent: dict):
keywords_reaction_config = parent["keywords_reaction"]
if keywords_reaction_config.get("enable", False):
config.keywords_reaction_rules = keywords_reaction_config.get("rules", config.keywords_reaction_rules)
def chinese_typo(parent: dict):
chinese_typo_config = parent["chinese_typo"]
config.chinese_typo_enable = chinese_typo_config.get("enable", config.chinese_typo_enable)
config.chinese_typo_error_rate = chinese_typo_config.get("error_rate", config.chinese_typo_error_rate)
config.chinese_typo_min_freq = chinese_typo_config.get("min_freq", config.chinese_typo_min_freq)
config.chinese_typo_tone_error_rate = chinese_typo_config.get("tone_error_rate", config.chinese_typo_tone_error_rate)
config.chinese_typo_word_replace_rate = chinese_typo_config.get("word_replace_rate", config.chinese_typo_word_replace_rate)
def groups(parent: dict):
groups_config = parent["groups"]
config.talk_allowed_groups = set(groups_config.get("talk_allowed", []))
config.talk_frequency_down_groups = set(groups_config.get("talk_frequency_down", []))
config.ban_user_id = set(groups_config.get("ban_user_id", []))
def others(parent: dict):
others_config = parent["others"]
config.enable_advance_output = others_config.get("enable_advance_output", config.enable_advance_output)
config.enable_kuuki_read = others_config.get("enable_kuuki_read", config.enable_kuuki_read)
# 版本表达式:>=1.0.0,<2.0.0
# 允许字段func: method, support: str, notice: str, necessary: bool
# 如果使用 notice 字段,在该组配置加载时,会展示该字段对用户的警示
# 例如:"notice": "personality 将在 1.3.2 后被移除",那么在有效版本中的用户就会虽然可以
# 正常执行程序,但是会看到这条自定义提示
include_configs = {
"personality": {
"func": personality,
"support": ">=0.0.0"
},
"emoji": {
"func": emoji,
"support": ">=0.0.0"
},
"cq_code": {
"func": cq_code,
"support": ">=0.0.0"
},
"bot": {
"func": bot,
"support": ">=0.0.0"
},
"response": {
"func": response,
"support": ">=0.0.0"
},
"model": {
"func": model,
"support": ">=0.0.0"
},
"message": {
"func": message,
"support": ">=0.0.0"
},
"memory": {
"func": memory,
"support": ">=0.0.0"
},
"mood": {
"func": mood,
"support": ">=0.0.0"
},
"keywords_reaction": {
"func": keywords_reaction,
"support": ">=0.0.2",
"necessary": False
},
"chinese_typo": {
"func": chinese_typo,
"support": ">=0.0.3",
"necessary": False
},
"groups": {
"func": groups,
"support": ">=0.0.0"
},
"others": {
"func": others,
"support": ">=0.0.0"
}
}
# 原地修改,将 字符串版本表达式 转换成 版本对象
for key in include_configs:
item_support = include_configs[key]["support"]
include_configs[key]["support"] = cls.convert_to_specifierset(item_support)
if os.path.exists(config_path):
with open(config_path, "rb") as f:
try:
@@ -106,145 +382,60 @@ class BotConfig:
except(tomli.TOMLDecodeError) as e:
logger.critical(f"配置文件bot_config.toml填写有误请检查第{e.lineno}行第{e.colno}处:{e.msg}")
exit(1)
if 'personality' in toml_dict:
personality_config=toml_dict['personality']
personality=personality_config.get('prompt_personality')
if len(personality) >= 2:
logger.info(f"载入自定义人格:{personality}")
config.PROMPT_PERSONALITY=personality_config.get('prompt_personality',config.PROMPT_PERSONALITY)
logger.info(f"载入自定义日程prompt:{personality_config.get('prompt_schedule',config.PROMPT_SCHEDULE_GEN)}")
config.PROMPT_SCHEDULE_GEN=personality_config.get('prompt_schedule',config.PROMPT_SCHEDULE_GEN)
config.PERSONALITY_1=personality_config.get('personality_1_probability',config.PERSONALITY_1)
config.PERSONALITY_2=personality_config.get('personality_2_probability',config.PERSONALITY_2)
config.PERSONALITY_3=personality_config.get('personality_3_probability',config.PERSONALITY_3)
# 获取配置文件版本
config.INNER_VERSION = cls.get_config_version(toml_dict)
if "emoji" in toml_dict:
emoji_config = toml_dict["emoji"]
config.EMOJI_CHECK_INTERVAL = emoji_config.get("check_interval", config.EMOJI_CHECK_INTERVAL)
config.EMOJI_REGISTER_INTERVAL = emoji_config.get("register_interval", config.EMOJI_REGISTER_INTERVAL)
config.EMOJI_CHECK_PROMPT = emoji_config.get('check_prompt',config.EMOJI_CHECK_PROMPT)
config.EMOJI_SAVE = emoji_config.get('auto_save',config.EMOJI_SAVE)
config.EMOJI_CHECK = emoji_config.get('enable_check',config.EMOJI_CHECK)
if "cq_code" in toml_dict:
cq_code_config = toml_dict["cq_code"]
config.ENABLE_PIC_TRANSLATE = cq_code_config.get("enable_pic_translate", config.ENABLE_PIC_TRANSLATE)
# 机器人基础配置
if "bot" in toml_dict:
bot_config = toml_dict["bot"]
bot_qq = bot_config.get("qq")
config.BOT_QQ = int(bot_qq)
config.BOT_NICKNAME = bot_config.get("nickname", config.BOT_NICKNAME)
if "response" in toml_dict:
response_config = toml_dict["response"]
config.MODEL_R1_PROBABILITY = response_config.get("model_r1_probability", config.MODEL_R1_PROBABILITY)
config.MODEL_V3_PROBABILITY = response_config.get("model_v3_probability", config.MODEL_V3_PROBABILITY)
config.MODEL_R1_DISTILL_PROBABILITY = response_config.get("model_r1_distill_probability", config.MODEL_R1_DISTILL_PROBABILITY)
config.max_response_length = response_config.get("max_response_length", config.max_response_length)
# 加载模型配置
if "model" in toml_dict:
model_config = toml_dict["model"]
if "llm_reasoning" in model_config:
config.llm_reasoning = model_config["llm_reasoning"]
if "llm_reasoning_minor" in model_config:
config.llm_reasoning_minor = model_config["llm_reasoning_minor"]
if "llm_normal" in model_config:
config.llm_normal = model_config["llm_normal"]
if "llm_normal_minor" in model_config:
config.llm_normal_minor = model_config["llm_normal_minor"]
# 如果在配置中找到了需要的项,调用对应项的闭包函数处理
for key in include_configs:
if key in toml_dict:
group_specifierset: SpecifierSet = include_configs[key]["support"]
# 检查配置文件版本是否在支持范围内
if config.INNER_VERSION in group_specifierset:
# 如果版本在支持范围内,检查是否存在通知
if 'notice' in include_configs[key]:
logger.warning(include_configs[key]["notice"])
include_configs[key]["func"](toml_dict)
else:
# 如果版本不在支持范围内,崩溃并提示用户
logger.error(
f"配置文件中的 '{key}' 字段的版本 ({config.INNER_VERSION}) 不在支持范围内。\n"
f"当前程序仅支持以下版本范围: {group_specifierset}"
)
raise InvalidVersion(f"当前程序仅支持以下版本范围: {group_specifierset}")
if "llm_topic_judge" in model_config:
config.llm_topic_judge = model_config["llm_topic_judge"]
if "llm_summary_by_topic" in model_config:
config.llm_summary_by_topic = model_config["llm_summary_by_topic"]
if "llm_emotion_judge" in model_config:
config.llm_emotion_judge = model_config["llm_emotion_judge"]
if "vlm" in model_config:
config.vlm = model_config["vlm"]
# 如果 necessary 项目存在,而且显式声明是 False进入特殊处理
elif "necessary" in include_configs[key] and include_configs[key].get("necessary") == False:
# 通过 pass 处理的项虽然直接忽略也是可以的,但是为了不增加理解困难,依然需要在这里显式处理
if key == "keywords_reaction":
pass
if "embedding" in model_config:
config.embedding = model_config["embedding"]
if "moderation" in model_config:
config.moderation = model_config["moderation"]
# 消息配置
if "message" in toml_dict:
msg_config = toml_dict["message"]
config.MIN_TEXT_LENGTH = msg_config.get("min_text_length", config.MIN_TEXT_LENGTH)
config.MAX_CONTEXT_SIZE = msg_config.get("max_context_size", config.MAX_CONTEXT_SIZE)
config.emoji_chance = msg_config.get("emoji_chance", config.emoji_chance)
config.ban_words=msg_config.get("ban_words",config.ban_words)
config.thinking_timeout = msg_config.get("thinking_timeout", config.thinking_timeout)
config.response_willing_amplifier = msg_config.get("response_willing_amplifier", config.response_willing_amplifier)
config.response_interested_rate_amplifier = msg_config.get("response_interested_rate_amplifier", config.response_interested_rate_amplifier)
config.down_frequency_rate = msg_config.get("down_frequency_rate", config.down_frequency_rate)
else:
# 如果用户根本没有需要的配置项,提示缺少配置
logger.error(f"配置文件中缺少必需的字段: '{key}'")
raise KeyError(f"配置文件中缺少必需的字段: '{key}'")
if "memory" in toml_dict:
memory_config = toml_dict["memory"]
config.build_memory_interval = memory_config.get("build_memory_interval", config.build_memory_interval)
config.forget_memory_interval = memory_config.get("forget_memory_interval", config.forget_memory_interval)
if "mood" in toml_dict:
mood_config = toml_dict["mood"]
config.mood_update_interval = mood_config.get("mood_update_interval", config.mood_update_interval)
config.mood_decay_rate = mood_config.get("mood_decay_rate", config.mood_decay_rate)
config.mood_intensity_factor = mood_config.get("mood_intensity_factor", config.mood_intensity_factor)
# print(toml_dict)
if "keywords_reaction" in toml_dict:
# 读取关键词回复配置
keywords_reaction_config = toml_dict["keywords_reaction"]
if keywords_reaction_config.get("enable", False):
config.keywords_reaction_rules = keywords_reaction_config.get("rules", config.keywords_reaction_rules)
if "chinese_typo_generator" in toml_dict:
# 读取中文错别字生成器配置
chinese_typo_generator_config = toml_dict["chinese_typo_generator"]
config.chinese_typo_enable = chinese_typo_generator_config.get("enable", config.chinese_typo_enable)
config.chinese_typo_error_rate = chinese_typo_generator_config.get("error_rate", config.chinese_typo_error_rate)
config.chinese_typo_min_freq = chinese_typo_generator_config.get("min_freq", config.chinese_typo_min_freq)
config.chinese_typo_tone_error_rate = chinese_typo_generator_config.get("tone_error_rate", config.chinese_typo_tone_error_rate)
config.chinese_typo_word_replace_rate = chinese_typo_generator_config.get("word_replace_rate", config.chinese_typo_word_replace_rate)
# 群组配置
if "groups" in toml_dict:
groups_config = toml_dict["groups"]
config.talk_allowed_groups = set(groups_config.get("talk_allowed", []))
config.talk_frequency_down_groups = set(groups_config.get("talk_frequency_down", []))
config.ban_user_id = set(groups_config.get("ban_user_id", []))
if "others" in toml_dict:
others_config = toml_dict["others"]
config.enable_advance_output = others_config.get("enable_advance_output", config.enable_advance_output)
config.enable_kuuki_read = others_config.get("enable_kuuki_read", config.enable_kuuki_read)
logger.success(f"成功加载配置文件: {config_path}")
logger.success(f"成功加载配置文件: {config_path}")
return config
# 获取配置文件路径
bot_config_floder_path = BotConfig.get_config_dir()
print(f"正在品鉴配置文件目录: {bot_config_floder_path}")
bot_config_path = os.path.join(bot_config_floder_path, "bot_config.toml")
if os.path.exists(bot_config_path):
# 如果开发环境配置文件不存在,则使用默认配置文件
print(f"异常的新鲜,异常的美味: {bot_config_path}")
logger.info("使用bot配置文件")
else:
logger.info("没有找到美味")
# 配置文件不存在
logger.error("配置文件不存在,请检查路径: {bot_config_path}")
raise FileNotFoundError(f"配置文件不存在: {bot_config_path}")
global_config = BotConfig.load_config(config_path=bot_config_path)

View File

@@ -12,6 +12,7 @@ from ..models.utils_model import LLM_request
from ..utils.typo_generator import ChineseTypoGenerator
from .config import global_config
from .message import Message
from ..moods.moods import MoodManager
driver = get_driver()
config = driver.config
@@ -326,43 +327,68 @@ def random_remove_punctuation(text: str) -> str:
def process_llm_response(text: str) -> List[str]:
# processed_response = process_text_with_typos(content)
if len(text) > 300:
if len(text) > 200:
print(f"回复过长 ({len(text)} 字符),返回默认回复")
return ['懒得说']
# 处理长消息
if global_config.chinese_typo_enable:
typo_generator = ChineseTypoGenerator(
error_rate=global_config.chinese_typo_error_rate,
min_freq=global_config.chinese_typo_min_freq,
tone_error_rate=global_config.chinese_typo_tone_error_rate,
word_replace_rate=global_config.chinese_typo_word_replace_rate
)
typoed_text = typo_generator.create_typo_sentence(text)[0]
else:
typoed_text = text
sentences = split_into_sentences_w_remove_punctuation(typoed_text)
typo_generator = ChineseTypoGenerator(
error_rate=global_config.chinese_typo_error_rate,
min_freq=global_config.chinese_typo_min_freq,
tone_error_rate=global_config.chinese_typo_tone_error_rate,
word_replace_rate=global_config.chinese_typo_word_replace_rate
)
split_sentences = split_into_sentences_w_remove_punctuation(text)
sentences = []
for sentence in split_sentences:
if global_config.chinese_typo_enable:
typoed_text, typo_corrections = typo_generator.create_typo_sentence(sentence)
sentences.append(typoed_text)
if typo_corrections:
sentences.append(typo_corrections)
else:
sentences.append(sentence)
# 检查分割后的消息数量是否过多超过3条
if len(sentences) > 4:
if len(sentences) > 5:
print(f"分割后消息数量过多 ({len(sentences)} 条),返回默认回复")
return [f'{global_config.BOT_NICKNAME}不知道哦']
return sentences
def calculate_typing_time(input_string: str, chinese_time: float = 0.2, english_time: float = 0.1) -> float:
def calculate_typing_time(input_string: str, chinese_time: float = 0.4, english_time: float = 0.2) -> float:
"""
计算输入字符串所需的时间,中文和英文字符有不同的输入时间
input_string (str): 输入的字符串
chinese_time (float): 中文字符的输入时间默认为0.3
english_time (float): 英文字符的输入时间默认为0.15
chinese_time (float): 中文字符的输入时间默认为0.2
english_time (float): 英文字符的输入时间默认为0.1秒
特殊情况:
- 如果只有一个中文字符将使用3倍的中文输入时间
- 在所有输入结束后额外加上回车时间0.3秒
"""
mood_manager = MoodManager.get_instance()
# 将0-1的唤醒度映射到-1到1
mood_arousal = mood_manager.current_mood.arousal
# 映射到0.5到2倍的速度系数
typing_speed_multiplier = 1.5 ** mood_arousal # 唤醒度为1时速度翻倍,为-1时速度减半
chinese_time *= 1/typing_speed_multiplier
english_time *= 1/typing_speed_multiplier
# 计算中文字符数
chinese_chars = sum(1 for char in input_string if '\u4e00' <= char <= '\u9fff')
# 如果只有一个中文字符使用3倍时间
if chinese_chars == 1 and len(input_string.strip()) == 1:
return chinese_time * 3 + 0.3 # 加上回车时间
# 正常计算所有字符的输入时间
total_time = 0.0
for char in input_string:
if '\u4e00' <= char <= '\u9fff': # 判断是否为中文字符
total_time += chinese_time
else: # 其他字符(如英文)
total_time += english_time
return total_time
return total_time + 0.3 # 加上回车时间
def cosine_similarity(v1, v2):

View File

@@ -23,6 +23,7 @@ class LLM_request:
self.api_key = getattr(config, model["key"])
self.base_url = getattr(config, model["base_url"])
except AttributeError as e:
logger.error(f"原始 model dict 信息:{model}")
logger.error(f"配置错误:找不到对应的配置项 - {str(e)}")
raise ValueError(f"配置错误:找不到对应的配置项 - {str(e)}") from e
self.model_name = model["name"]
@@ -181,6 +182,13 @@ class LLM_request:
continue
elif response.status in policy["abort_codes"]:
logger.error(f"错误码: {response.status} - {error_code_mapping.get(response.status)}")
if response.status == 403 :
if global_config.llm_normal == "Pro/deepseek-ai/DeepSeek-V3":
logger.error("可能是没有给硅基流动充钱普通模型自动退化至非Pro模型反应速度可能会变慢")
global_config.llm_normal = "deepseek-ai/DeepSeek-V3"
if global_config.llm_reasoning == "Pro/deepseek-ai/DeepSeek-R1":
logger.error("可能是没有给硅基流动充钱推理模型自动退化至非Pro模型反应速度可能会变慢")
global_config.llm_reasoning = "deepseek-ai/DeepSeek-R1"
raise RuntimeError(f"请求被拒绝: {error_code_mapping.get(response.status)}")
response.raise_for_status()

View File

@@ -284,10 +284,13 @@ class ChineseTypoGenerator:
返回:
typo_sentence: 包含错别字的句子
typo_info: 错别字信息列表
correction_suggestion: 随机选择的一个纠正建议,返回正确的字/词
"""
result = []
typo_info = []
word_typos = [] # 记录词语错误对(错词,正确词)
char_typos = [] # 记录单字错误对(错字,正确字)
current_pos = 0
# 分词
words = self._segment_sentence(sentence)
@@ -296,6 +299,7 @@ class ChineseTypoGenerator:
# 如果是标点符号或空格,直接添加
if all(not self._is_chinese_char(c) for c in word):
result.append(word)
current_pos += len(word)
continue
# 获取词语的拼音
@@ -316,6 +320,8 @@ class ChineseTypoGenerator:
' '.join(word_pinyin),
' '.join(self._get_word_pinyin(typo_word)),
orig_freq, typo_freq))
word_typos.append((typo_word, word)) # 记录(错词,正确词)对
current_pos += len(typo_word)
continue
# 如果不进行整词替换,则进行单字替换
@@ -333,11 +339,15 @@ class ChineseTypoGenerator:
result.append(typo_char)
typo_py = pinyin(typo_char, style=Style.TONE3)[0][0]
typo_info.append((char, typo_char, py, typo_py, orig_freq, typo_freq))
char_typos.append((typo_char, char)) # 记录(错字,正确字)对
current_pos += 1
continue
result.append(char)
current_pos += 1
else:
# 处理多字词的单字替换
word_result = []
word_start_pos = current_pos
for i, (char, py) in enumerate(zip(word, word_pinyin)):
# 词中的字替换概率降低
word_error_rate = self.error_rate * (0.7 ** (len(word) - 1))
@@ -353,11 +363,24 @@ class ChineseTypoGenerator:
word_result.append(typo_char)
typo_py = pinyin(typo_char, style=Style.TONE3)[0][0]
typo_info.append((char, typo_char, py, typo_py, orig_freq, typo_freq))
char_typos.append((typo_char, char)) # 记录(错字,正确字)对
continue
word_result.append(char)
result.append(''.join(word_result))
current_pos += len(word)
return ''.join(result), typo_info
# 优先从词语错误中选择,如果没有则从单字错误中选择
correction_suggestion = None
# 50%概率返回纠正建议
if random.random() < 0.5:
if word_typos:
wrong_word, correct_word = random.choice(word_typos)
correction_suggestion = correct_word
elif char_typos:
wrong_char, correct_char = random.choice(char_typos)
correction_suggestion = correct_char
return ''.join(result), correction_suggestion
def format_typo_info(self, typo_info):
"""
@@ -419,16 +442,16 @@ def main():
# 创建包含错别字的句子
start_time = time.time()
typo_sentence, typo_info = typo_generator.create_typo_sentence(sentence)
typo_sentence, correction_suggestion = typo_generator.create_typo_sentence(sentence)
# 打印结果
print("\n原句:", sentence)
print("错字版:", typo_sentence)
# 打印错别字信息
if typo_info:
print("\n错别字信息")
print(typo_generator.format_typo_info(typo_info))
# 打印纠正建议
if correction_suggestion:
print("\n随机纠正建议")
print(f"应该改为:{correction_suggestion}")
# 计算并打印总耗时
end_time = time.time()