This commit is contained in:
minecraft1024a
2025-10-25 09:30:23 +08:00
49 changed files with 991 additions and 540 deletions

View File

@@ -60,7 +60,7 @@ class ChatterPlanFilter:
prompt, used_message_id_list = await self._build_prompt(plan)
plan.llm_prompt = prompt
if global_config.debug.show_prompt:
logger.info(f"规划器原始提示词:{prompt}") #叫你不要改你耳朵聋吗😡😡😡😡😡
logger.debug(f"规划器原始提示词:{prompt}")
llm_content, _ = await self.planner_llm.generate_response_async(prompt=prompt)
@@ -158,7 +158,7 @@ class ChatterPlanFilter:
if global_config.planning_system.schedule_enable:
if activity_info := schedule_manager.get_current_activity():
activity = activity_info.get("activity", "未知活动")
schedule_block = f"你当前正在进行“{activity}”。(此为你的当前状态,仅供参考。除非被直接询问,否则不要在对话中主动提及。)"
schedule_block = f'你当前正在进行“{activity}”。(此为你的当前状态,仅供参考。除非被直接询问,否则不要在对话中主动提及。)'
mood_block = ""
# 需要情绪模块打开才能获得情绪,否则会引发报错

View File

@@ -9,7 +9,7 @@ from src.chat.utils.utils import get_chat_type_and_target_info
from src.common.data_models.database_data_model import DatabaseMessages
from src.common.data_models.info_data_model import Plan, TargetPersonInfo
from src.config.config import global_config
from src.plugin_system.base.component_types import ActionInfo, ChatMode, ChatType
from src.plugin_system.base.component_types import ActionInfo, ChatMode, ChatType, ComponentType
from src.plugin_system.core.component_registry import component_registry

View File

@@ -271,7 +271,7 @@ class EmojiAction(BaseAction):
# 我们假设LLM返回的是精炼描述的一部分或全部
matched_emoji = None
best_match_score = 0
for item in all_emojis_data:
refined_info = extract_refined_info(item[1])
# 计算一个简单的匹配分数
@@ -280,16 +280,16 @@ class EmojiAction(BaseAction):
score += 2 # 包含匹配
if refined_info.lower() in chosen_description.lower():
score += 2 # 包含匹配
# 关键词匹配加分
chosen_keywords = re.findall(r"\w+", chosen_description.lower())
item_keywords = re.findall(r"\[(.*?)\]", refined_info)
chosen_keywords = re.findall(r'\w+', chosen_description.lower())
item_keywords = re.findall(r'\[(.*?)\]', refined_info)
if item_keywords:
item_keywords_set = {k.strip().lower() for k in item_keywords[0].split(",")}
item_keywords_set = {k.strip().lower() for k in item_keywords[0].split(',')}
for kw in chosen_keywords:
if kw in item_keywords_set:
score += 1
if score > best_match_score:
best_match_score = score
matched_emoji = item

View File

@@ -162,6 +162,16 @@ class MessageHandler:
)
logger.debug(f"原始消息内容: {raw_message.get('message', [])}")
# 检查是否包含@或video消息段
message_segments = raw_message.get("message", [])
if message_segments:
for i, seg in enumerate(message_segments):
seg_type = seg.get("type")
if seg_type in ["at", "video"]:
logger.info(f"检测到 {seg_type.upper()} 消息段 [{i}]: {seg}")
elif seg_type not in ["text", "face", "image"]:
logger.warning(f"检测到特殊消息段 [{i}]: type={seg_type}, data={seg.get('data', {})}")
message_type: str = raw_message.get("message_type")
message_id: int = raw_message.get("message_id")
# message_time: int = raw_message.get("time")

View File

@@ -237,6 +237,7 @@ class SendHandler:
target_id = str(target_id)
if target_id == "notice":
return payload
logger.info(target_id if isinstance(target_id, str) else "")
new_payload = self.build_payload(
payload,
await self.handle_reply_message(target_id if isinstance(target_id, str) else "", user_info),
@@ -321,7 +322,7 @@ class SendHandler:
# 如果没有获取到被回复者的ID则直接返回不进行@
if not replied_user_id:
logger.warning(f"无法获取消息 {id} 的发送者信息,跳过 @")
logger.debug(f"最终返回的回复段: {reply_seg}")
logger.info(f"最终返回的回复段: {reply_seg}")
return reply_seg
# 根据概率决定是否艾特用户
@@ -339,7 +340,7 @@ class SendHandler:
logger.info(f"最终返回的回复段: {reply_seg}")
return reply_seg
logger.debug(f"最终返回的回复段: {reply_seg}")
logger.info(f"最终返回的回复段: {reply_seg}")
return reply_seg
def handle_text_message(self, message: str) -> dict:

View File

@@ -6,6 +6,7 @@ from datetime import datetime
from maim_message import UserInfo
from src.chat.message_manager.sleep_system.state_manager import SleepState, sleep_state_manager
from src.chat.message_receive.chat_stream import get_chat_manager
from src.common.logger import get_logger
from src.config.config import global_config
@@ -38,6 +39,10 @@ class ColdStartTask(AsyncTask):
await asyncio.sleep(30) # 延迟以确保所有服务和聊天流已从数据库加载完毕
try:
current_state = sleep_state_manager.get_current_state()
if current_state == SleepState.SLEEPING:
logger.info("bot正在睡觉,跳过本次任务")
return
logger.info("【冷启动】开始扫描白名单,唤醒沉睡的聊天流...")
# 【修复】增加对私聊总开关的判断
@@ -147,6 +152,10 @@ class ProactiveThinkingTask(AsyncTask):
# 计算下一次检查前的休眠时间
next_interval = self._get_next_interval()
try:
current_state = sleep_state_manager.get_current_state()
if current_state == SleepState.SLEEPING:
logger.info("bot正在睡觉,跳过本次任务")
return
logger.debug(f"【日常唤醒】下一次检查将在 {next_interval:.2f} 秒后进行。")
await asyncio.sleep(next_interval)

View File

@@ -1,6 +1,5 @@
from src.plugin_system.base.plugin_metadata import PluginMetadata
# 定义插件元数据
__plugin_meta__ = PluginMetadata(
name="MoFox-Bot工具箱",
description="一个集合多种实用功能的插件,旨在提升聊天体验和效率。",
@@ -12,6 +11,4 @@ __plugin_meta__ = PluginMetadata(
keywords=["emoji", "reaction", "like", "表情", "回应", "点赞"],
categories=["Chat", "Integration"],
extra={"is_built_in": "true", "plugin_type": "functional"},
dependencies=[],
python_dependencies=["httpx", "Pillow"],
)

View File

@@ -13,6 +13,5 @@ __plugin_meta__ = PluginMetadata(
extra={
"is_built_in": False,
"plugin_type": "tools",
},
python_dependencies = ["aiohttp", "soundfile", "pedalboard"]
}
)

View File

@@ -2,33 +2,107 @@
TTS 语音合成 Action
"""
import toml
from pathlib import Path
from src.common.logger import get_logger
from src.plugin_system.apis import generator_api
from src.plugin_system.base.base_action import BaseAction, ChatMode
from src.plugin_system.base.base_action import ActionActivationType, BaseAction, ChatMode
from ..services.manager import get_service
logger = get_logger("tts_voice_plugin.action")
def _get_available_styles() -> list[str]:
"""动态读取配置文件获取所有可用的TTS风格名称"""
try:
# 这个路径构建逻辑是为了确保无论从哪里启动,都能准确定位到配置文件
plugin_file = Path(__file__).resolve()
# Bot/src/plugins/built_in/tts_voice_plugin/actions -> Bot
bot_root = plugin_file.parent.parent.parent.parent.parent.parent
config_file = bot_root / "config" / "plugins" / "tts_voice_plugin" / "config.toml"
if not config_file.is_file():
logger.warning("在 tts_action 中未找到 tts_voice_plugin 的配置文件,无法动态加载风格列表。")
return ["default"]
config = toml.loads(config_file.read_text(encoding="utf-8"))
styles_config = config.get("tts_styles", [])
if not isinstance(styles_config, list):
return ["default"]
# 使用显式循环和类型检查来提取 style_name以确保 Pylance 类型检查通过
style_names: list[str] = []
for style in styles_config:
if isinstance(style, dict):
name = style.get("style_name")
# 确保 name 是一个非空字符串
if isinstance(name, str) and name:
style_names.append(name)
return style_names if style_names else ["default"]
except Exception as e:
logger.error(f"动态加载TTS风格列表时出错: {e}", exc_info=True)
return ["default"] # 出现任何错误都回退
# 在类定义之前执行函数,获取风格列表
AVAILABLE_STYLES = _get_available_styles()
STYLE_OPTIONS_DESC = ", ".join(f"'{s}'" for s in AVAILABLE_STYLES)
class TTSVoiceAction(BaseAction):
"""
通过关键词或规划器自动触发 TTS 语音合成
"""
action_name = "tts_voice_action"
action_description = "使用GPT-SoVITS将文本转换为语音并发送"
action_description = "将你生成好的文本转换为语音并发送。你必须提供要转换的文本。"
mode_enable = ChatMode.ALL
parallel_action = False
action_parameters = {
"text": {
"type": "string",
"description": "需要转换为语音并发送的完整、自然、适合口语的文本内容。",
"required": True
},
"voice_style": {
"type": "string",
"description": f"语音的风格。可用选项: [{STYLE_OPTIONS_DESC}]。请根据对话的情感和上下文选择一个最合适的风格。如果未提供,将使用默认风格。",
"required": False
},
"text_language": {
"type": "string",
"description": (
"指定用于合成的语言模式,请务必根据文本内容选择最精确、范围最小的选项以获得最佳效果。"
"可用选项说明:\n"
"- 'zh': 中文与英文混合 (最优选)\n"
"- 'ja': 日文与英文混合 (最优选)\n"
"- 'yue': 粤语与英文混合 (最优选)\n"
"- 'ko': 韩文与英文混合 (最优选)\n"
"- 'en': 纯英文\n"
"- 'all_zh': 纯中文\n"
"- 'all_ja': 纯日文\n"
"- 'all_yue': 纯粤语\n"
"- 'all_ko': 纯韩文\n"
"- 'auto': 多语种混合自动识别 (备用选项,当前两种语言时优先使用上面的精确选项)\n"
"- 'auto_yue': 多语种混合自动识别(包含粤语)(备用选项)"
),
"required": False
}
}
action_require = [
"在调用此动作时,你必须在 'text' 参数中提供要合成语音的完整回复内容。这是强制性的。",
"当用户明确请求使用语音进行回复时,例如‘发个语音听听’、‘用语音说’等。",
"当对话内容适合用语音表达,例如讲故事、念诗、撒嬌或进行角色扮演时。",
"在表达特殊情感(如安慰、鼓励、庆祝)的场景下,可以主动使用语音来增强感染力。",
"不要在日常的、简短的问答或闲聊中频繁使用语音,避免打扰用户。",
"文本内容必须是纯粹的对话,不能包含任何括号或方括号括起来的动作、表情、或场景描述(例如,不要出现 '(笑)''[歪头]'",
"必须使用标准、完整的标点符号(如逗号、句号、问号)来进行自然的断句,以确保语音停顿自然,避免生成一长串没有停顿的文本"
"提供的 'text' 内容必须是纯粹的对话,不能包含任何括号或方括号括起来的动作、表情、或场景描述(例如,不要出现 '(笑)''[歪头]'",
"【**铁则**】为了确保语音停顿自然,'text' 参数中的所有断句【必须使用且仅能使用以下标准标点符号:''''''''。严禁使用 '''...' 或其他任何非标准符号来分隔句子,否则将导致语音合成失败"
]
def __init__(self, *args, **kwargs):
@@ -65,7 +139,7 @@ class TTSVoiceAction(BaseAction):
):
logger.info(f"{self.log_prefix} LLM 判断激活成功")
return True
logger.debug(f"{self.log_prefix} 所有激活条件均未满足,不激活")
return False
@@ -80,16 +154,23 @@ class TTSVoiceAction(BaseAction):
initial_text = self.action_data.get("text", "").strip()
voice_style = self.action_data.get("voice_style", "default")
logger.info(f"{self.log_prefix} 接收到规划器的初步文本: '{initial_text[:70]}...'")
# 新增:从决策模型获取指定的语言模式
text_language = self.action_data.get("text_language") # 如果模型没给,就是 None
logger.info(f"{self.log_prefix} 接收到规划器初步文本: '{initial_text[:70]}...', 指定风格: {voice_style}, 指定语言: {text_language}")
# 1. 请求主回复模型生成高质量文本
text = await self._generate_final_text(initial_text)
# 1. 使用规划器提供的文本
text = initial_text
if not text:
logger.warning(f"{self.log_prefix} 最终生成的文本为空,静默处理。")
return False, "最终生成的文本为空"
logger.warning(f"{self.log_prefix} 规划器提供的文本为空,静默处理。")
return False, "规划器提供的文本为空"
# 2. 调用 TTSService 生成语音
audio_b64 = await self.tts_service.generate_voice(text, voice_style)
logger.info(f"{self.log_prefix} 使用最终文本进行语音合成: '{text[:70]}...'")
audio_b64 = await self.tts_service.generate_voice(
text=text,
style_hint=voice_style,
language_hint=text_language # 新增:将决策模型指定的语言传递给服务
)
if audio_b64:
await self.send_custom(message_type="voice", content=audio_b64)
@@ -115,33 +196,3 @@ class TTSVoiceAction(BaseAction):
)
return False, f"语音合成出错: {e!s}"
async def _generate_final_text(self, initial_text: str) -> str:
"""请求主回复模型生成或优化文本"""
try:
generation_reason = (
"这是一个为语音消息TTS生成文本的特殊任务。"
"请基于规划器提供的初步文本,结合对话历史和自己的人设,将它优化成一句自然、富有感情、适合用语音说出的话。"
"最终指令:请务-必确保文本听起来像真实的、自然的口语对话,而不是书面语。"
)
logger.info(f"{self.log_prefix} 请求主回复模型(replyer)全新生成TTS文本...")
success, response_set, _ = await generator_api.rewrite_reply(
chat_stream=self.chat_stream,
reply_data={"raw_reply": initial_text, "reason": generation_reason},
request_type="replyer"
)
if success and response_set:
text = "".join(str(seg[1]) if isinstance(seg, tuple) else str(seg) for seg in response_set).strip()
logger.info(f"{self.log_prefix} 成功生成高质量TTS文本: {text}")
return text
if initial_text:
logger.warning(f"{self.log_prefix} 主模型生成失败,使用规划器原始文本作为兜底。")
return initial_text
raise Exception("主模型未能生成回复,且规划器也未提供兜底文本。")
except Exception as e:
logger.error(f"{self.log_prefix} 生成高质量回复内容时失败: {e}", exc_info=True)
return ""

View File

@@ -30,6 +30,7 @@ class TTSVoicePlugin(BasePlugin):
plugin_author = "Kilo Code & 靚仔"
config_file_name = "config.toml"
dependencies = []
python_dependencies = ["aiohttp", "soundfile", "pedalboard"]
permission_nodes: list[PermissionNodeField] = [
PermissionNodeField(node_name="command.use", description="是否可以使用 /tts 命令"),

View File

@@ -80,21 +80,34 @@ class TTSService:
"prompt_language": style_cfg.get("prompt_language", "zh"),
"gpt_weights": style_cfg.get("gpt_weights", default_gpt_weights),
"sovits_weights": style_cfg.get("sovits_weights", default_sovits_weights),
"speed_factor": style_cfg.get("speed_factor"), # 读取独立的语速配置
"speed_factor": style_cfg.get("speed_factor"),
"text_language": style_cfg.get("text_language", "auto"), # 新增:读取文本语言模式
}
return styles
# ... [其他方法保持不变] ...
def _detect_language(self, text: str) -> str:
chinese_chars = len(re.findall(r"[\u4e00-\u9fff]", text))
english_chars = len(re.findall(r"[a-zA-Z]", text))
def _determine_final_language(self, text: str, mode: str) -> str:
"""根据配置的语言策略和文本内容决定最终发送给API的语言代码"""
# 如果策略是具体的语言(如 all_zh, ja直接使用
if mode not in ["auto", "auto_yue"]:
return mode
# 对于 auto 和 auto_yue 策略,进行内容检测
# 优先检测粤语
if mode == "auto_yue":
cantonese_keywords = ["", "", "", "", "", "", "", "", ""]
if any(keyword in text for keyword in cantonese_keywords):
logger.info("在 auto_yue 模式下检测到粤语关键词,最终语言: yue")
return "yue"
# 检测日语(简单启发式规则)
japanese_chars = len(re.findall(r"[\u3040-\u309f\u30a0-\u30ff]", text))
total_chars = chinese_chars + english_chars + japanese_chars
if total_chars == 0: return "zh"
if chinese_chars / total_chars > 0.3: return "zh"
elif japanese_chars / total_chars > 0.3: return "ja"
elif english_chars / total_chars > 0.8: return "en"
else: return "zh"
if japanese_chars > 5 and japanese_chars > len(re.findall(r"[\u4e00-\u9fff]", text)) * 0.5:
logger.info("检测到日语字符,最终语言: ja")
return "ja"
# 默认回退到中文
logger.info(f"{mode} 模式下未检测到特定语言,默认回退到: zh")
return "zh"
def _clean_text_for_tts(self, text: str) -> str:
# 1. 基本清理
@@ -259,7 +272,7 @@ class TTSService:
logger.error(f"应用空间效果时出错: {e}", exc_info=True)
return audio_data # 如果出错,返回原始音频
async def generate_voice(self, text: str, style_hint: str = "default") -> str | None:
async def generate_voice(self, text: str, style_hint: str = "default", language_hint: str | None = None) -> str | None:
self._load_config()
if not self.tts_styles:
@@ -282,11 +295,21 @@ class TTSService:
clean_text = self._clean_text_for_tts(text)
if not clean_text: return None
text_language = self._detect_language(clean_text)
logger.info(f"开始TTS语音合成文本{clean_text[:50]}..., 风格:{style}")
# 语言决策流程:
# 1. 优先使用决策模型直接指定的 language_hint (最高优先级)
if language_hint:
final_language = language_hint
logger.info(f"使用决策模型指定的语言: {final_language}")
else:
# 2. 如果模型未指定,则使用风格配置的 language_policy
language_policy = server_config.get("text_language", "auto")
final_language = self._determine_final_language(clean_text, language_policy)
logger.info(f"决策模型未指定语言,使用策略 '{language_policy}' -> 最终语言: {final_language}")
logger.info(f"开始TTS语音合成文本{clean_text[:50]}..., 风格:{style}, 最终语言: {final_language}")
audio_data = await self._call_tts_api(
server_config=server_config, text=clean_text, text_language=text_language,
server_config=server_config, text=clean_text, text_language=final_language,
refer_wav_path=server_config.get("refer_wav_path"),
prompt_text=server_config.get("prompt_text"),
prompt_language=server_config.get("prompt_language"),

View File

@@ -1,4 +1,3 @@
from src.plugin_system.base.component_types import PythonDependency
from src.plugin_system.base.plugin_metadata import PluginMetadata
__plugin_meta__ = PluginMetadata(
@@ -14,26 +13,4 @@ __plugin_meta__ = PluginMetadata(
extra={
"is_built_in": True,
},
# Python包依赖列表
python_dependencies = [
PythonDependency(package_name="asyncddgs", description="异步DuckDuckGo搜索库", optional=False),
PythonDependency(
package_name="exa_py",
description="Exa搜索API客户端库",
optional=True, # 如果没有API密钥这个是可选的
),
PythonDependency(
package_name="tavily",
install_name="tavily-python", # 安装时使用这个名称
description="Tavily搜索API客户端库",
optional=True, # 如果没有API密钥这个是可选的
),
PythonDependency(
package_name="httpx",
version=">=0.20.0",
install_name="httpx[socks]", # 安装时使用这个名称(包含可选依赖)
description="支持SOCKS代理的HTTP客户端库",
optional=False,
),
]
)

View File

@@ -3,7 +3,7 @@ Base search engine interface
"""
from abc import ABC, abstractmethod
from typing import Any
from typing import Any, Optional
class BaseSearchEngine(ABC):
@@ -24,7 +24,7 @@ class BaseSearchEngine(ABC):
"""
pass
async def read_url(self, url: str) -> str | None:
async def read_url(self, url: str) -> Optional[str]:
"""
读取URL内容如果引擎不支持则返回None
"""

View File

@@ -2,7 +2,7 @@
Metaso Search Engine (Chat Completions Mode)
"""
import json
from typing import Any
from typing import Any, List
import httpx
@@ -27,7 +27,7 @@ class MetasoClient:
"Content-Type": "application/json",
}
async def search(self, query: str, **kwargs) -> list[dict[str, Any]]:
async def search(self, query: str, **kwargs) -> List[dict[str, Any]]:
"""Perform a search using the Metaso Chat Completions API."""
payload = {"model": "fast", "stream": True, "messages": [{"role": "user", "content": query}]}
search_url = f"{self.base_url}/chat/completions"

View File

@@ -5,7 +5,7 @@ Web Search Tool Plugin
"""
from src.common.logger import get_logger
from src.plugin_system import BasePlugin, ComponentInfo, ConfigField, register_plugin
from src.plugin_system import BasePlugin, ComponentInfo, ConfigField, PythonDependency, register_plugin
from src.plugin_system.apis import config_api
from .tools.url_parser import URLParserTool
@@ -42,9 +42,9 @@ class WEBSEARCHPLUGIN(BasePlugin):
from .engines.bing_engine import BingSearchEngine
from .engines.ddg_engine import DDGSearchEngine
from .engines.exa_engine import ExaSearchEngine
from .engines.metaso_engine import MetasoSearchEngine
from .engines.searxng_engine import SearXNGSearchEngine
from .engines.tavily_engine import TavilySearchEngine
from .engines.metaso_engine import MetasoSearchEngine
# 实例化所有搜索引擎这会触发API密钥管理器的初始化
exa_engine = ExaSearchEngine()
@@ -53,7 +53,7 @@ class WEBSEARCHPLUGIN(BasePlugin):
bing_engine = BingSearchEngine()
searxng_engine = SearXNGSearchEngine()
metaso_engine = MetasoSearchEngine()
# 报告每个引擎的状态
engines_status = {
"Exa": exa_engine.is_available(),
@@ -74,6 +74,29 @@ class WEBSEARCHPLUGIN(BasePlugin):
except Exception as e:
logger.error(f"❌ 搜索引擎初始化失败: {e}", exc_info=True)
# Python包依赖列表
python_dependencies: list[PythonDependency] = [ # noqa: RUF012
PythonDependency(package_name="asyncddgs", description="异步DuckDuckGo搜索库", optional=False),
PythonDependency(
package_name="exa_py",
description="Exa搜索API客户端库",
optional=True, # 如果没有API密钥这个是可选的
),
PythonDependency(
package_name="tavily",
install_name="tavily-python", # 安装时使用这个名称
description="Tavily搜索API客户端库",
optional=True, # 如果没有API密钥这个是可选的
),
PythonDependency(
package_name="httpx",
version=">=0.20.0",
install_name="httpx[socks]", # 安装时使用这个名称(包含可选依赖)
description="支持SOCKS代理的HTTP客户端库",
optional=False,
),
]
config_file_name: str = "config.toml" # 配置文件名
# 配置节描述

View File

@@ -13,9 +13,9 @@ from src.plugin_system.apis import config_api
from ..engines.bing_engine import BingSearchEngine
from ..engines.ddg_engine import DDGSearchEngine
from ..engines.exa_engine import ExaSearchEngine
from ..engines.metaso_engine import MetasoSearchEngine
from ..engines.searxng_engine import SearXNGSearchEngine
from ..engines.tavily_engine import TavilySearchEngine
from ..engines.metaso_engine import MetasoSearchEngine
from ..utils.formatters import deduplicate_results, format_search_results
logger = get_logger("web_search_tool")