<Thinking>

我需要分析这个git diff,并生成一个与之前完全不同的commit message。

让我先分析变更内容:

1. **主要变更**:
   - 在 `anti_injector.py` 中添加了新的 `auto` 处理模式
   - 添加了命令跳过列表功能 (`command_skip_list`)
   - 重构了反注入管理命令,从独立插件移动到核心动作插件
   - 更新了配置文件以支持新功能
   - 在插件管理器中添加了跳过列表刷新逻辑

2. **之前的commit message重点**:
   - 强调了"advanced anti-injection capabilities"
   - 重点描述了"intelligent 'auto' mode"
   - 提到了"command skip list"
   - 强调了"dynamically assesses threats"
   - 提到了"refactored and integrated into core actions plugin"

3. **我需要完全不同的角度**:
   - 可以从架构重构的角度
   - 可以从用户体验改进的角度
   - 可以从系统集成的角度
   - 可以从性能优化的角度

让我从**系统架构优化和插件整合**的角度来写这个commit message,重点强调:
- 插件系统的重构和整合
- 管理命令的迁移和统一
- 跳过机制的实现
- 自动化决策系统的引入

这样就能避免使用之前message中的关键词和结构。
</Thinking>

refactor(plugins): 重构反注入插件架构并优化命令管理体系

将反注入管理功能从独立插件迁移至核心动作模块,实现更好的系统集成。新增智能跳过机制,允许已注册的插件命令绕过检测流程,有效减少误判。同时引入自适应处理策略,系统可根据威胁评估结果自主选择最适当的响应方式。

插件管理器现已集成自动刷新功能,确保跳过列表与插件状态保持同步。配置系统扩展支持多种跳过模式和自定义规则,提升了整体可维护性和用户体验。
ps:谢谢雅诺狐姐姐投喂的提交喵^ω^
This commit is contained in:
tt-P607
2025-08-18 21:49:17 +08:00
parent fb02896359
commit 15ae0ea609
11 changed files with 1461 additions and 144 deletions

View File

@@ -18,6 +18,12 @@ from .anti_injector import AntiPromptInjector, get_anti_injector, initialize_ant
from .config import DetectionResult
from .detector import PromptInjectionDetector
from .shield import MessageShield
from .command_skip_list import (
initialize_skip_list,
should_skip_injection_detection,
refresh_plugin_commands,
get_skip_patterns_info
)
__all__ = [
"AntiPromptInjector",
@@ -25,7 +31,11 @@ __all__ = [
"initialize_anti_injector",
"DetectionResult",
"PromptInjectionDetector",
"MessageShield"
"MessageShield",
"initialize_skip_list",
"should_skip_injection_detection",
"refresh_plugin_commands",
"get_skip_patterns_info"
]

View File

@@ -22,6 +22,7 @@ from src.chat.message_receive.message import MessageRecv
from .config import DetectionResult, ProcessResult
from .detector import PromptInjectionDetector
from .shield import MessageShield
from .command_skip_list import should_skip_injection_detection, initialize_skip_list
# 数据库相关导入
from src.common.database.sqlalchemy_models import BanUser, AntiInjectionStats, get_db_session
@@ -38,6 +39,9 @@ class AntiPromptInjector:
self.detector = PromptInjectionDetector()
self.shield = MessageShield()
# 初始化跳过列表
initialize_skip_list()
async def _get_or_create_stats(self):
"""获取或创建统计记录"""
try:
@@ -73,7 +77,7 @@ class AntiPromptInjector:
continue
elif key == 'last_processing_time':
# 直接设置最后处理时间
stats.last_processing_time = value
stats.last_process_time = value
continue
elif hasattr(stats, key):
if key in ['total_messages', 'detected_injections',
@@ -127,10 +131,17 @@ class AntiPromptInjector:
if whitelist_result is not None:
return ProcessResult.ALLOWED, None, whitelist_result[2]
# 4. 内容检测
# 4. 命令跳过列表检测
message_text = self._extract_text_content(message)
should_skip, skip_reason = should_skip_injection_detection(message_text)
if should_skip:
logger.debug(f"消息匹配跳过列表,跳过反注入检测: {skip_reason}")
return ProcessResult.ALLOWED, None, f"命令跳过检测 - {skip_reason}"
# 5. 内容检测
detection_result = await self.detector.detect(message.processed_plain_text)
# 5. 处理检测结果
# 6. 处理检测结果
if detection_result.is_injection:
await self._update_stats(detected_injections=1)
@@ -163,8 +174,34 @@ class AntiPromptInjector:
else:
# 置信度不高,允许通过
return ProcessResult.ALLOWED, None, "检测到轻微可疑内容,已允许通过"
elif self.config.process_mode == "auto":
# 自动模式:根据威胁等级自动选择处理方式
auto_action = self._determine_auto_action(detection_result)
if auto_action == "block":
# 高威胁:直接丢弃
await self._update_stats(blocked_messages=1)
return ProcessResult.BLOCKED_INJECTION, None, f"自动模式:检测到高威胁内容,消息已拒绝 (置信度: {detection_result.confidence:.2f})"
elif auto_action == "shield":
# 中等威胁:加盾处理
await self._update_stats(shielded_messages=1)
shielded_content = self.shield.create_shielded_message(
message.processed_plain_text,
detection_result.confidence
)
summary = self.shield.create_safety_summary(detection_result.confidence, detection_result.matched_patterns)
return ProcessResult.SHIELDED, shielded_content, f"自动模式:检测到中等威胁已加盾处理: {summary}"
else: # auto_action == "allow"
# 低威胁:允许通过
return ProcessResult.ALLOWED, None, "自动模式:检测到轻微可疑内容,已允许通过"
# 6. 正常消息
# 7. 正常消息
return ProcessResult.ALLOWED, None, "消息检查通过"
except Exception as e:
@@ -267,6 +304,87 @@ class AntiPromptInjector:
return True, None, "用户白名单"
return None
def _determine_auto_action(self, detection_result: DetectionResult) -> str:
"""自动模式:根据检测结果确定处理动作
Args:
detection_result: 检测结果
Returns:
处理动作: "block"(丢弃), "shield"(加盾), "allow"(允许)
"""
confidence = detection_result.confidence
matched_patterns = detection_result.matched_patterns
# 高威胁阈值:直接丢弃
HIGH_THREAT_THRESHOLD = 0.85
# 中威胁阈值:加盾处理
MEDIUM_THREAT_THRESHOLD = 0.5
# 基于置信度的基础判断
if confidence >= HIGH_THREAT_THRESHOLD:
base_action = "block"
elif confidence >= MEDIUM_THREAT_THRESHOLD:
base_action = "shield"
else:
base_action = "allow"
# 基于匹配模式的威胁等级调整
high_risk_patterns = [
'system', '系统', 'admin', '管理', 'root', 'sudo',
'exec', '执行', 'command', '命令', 'shell', '终端',
'forget', '忘记', 'ignore', '忽略', 'override', '覆盖',
'roleplay', '扮演', 'pretend', '伪装', 'assume', '假设',
'reveal', '揭示', 'dump', '转储', 'extract', '提取',
'secret', '秘密', 'confidential', '机密', 'private', '私有'
]
medium_risk_patterns = [
'角色', '身份', '模式', 'mode', '权限', 'privilege',
'规则', 'rule', '限制', 'restriction', '安全', 'safety'
]
# 检查匹配的模式是否包含高风险关键词
high_risk_count = 0
medium_risk_count = 0
for pattern in matched_patterns:
pattern_lower = pattern.lower()
for risk_keyword in high_risk_patterns:
if risk_keyword in pattern_lower:
high_risk_count += 1
break
else:
for risk_keyword in medium_risk_patterns:
if risk_keyword in pattern_lower:
medium_risk_count += 1
break
# 根据风险模式调整决策
if high_risk_count >= 2:
# 多个高风险模式匹配,提升威胁等级
if base_action == "allow":
base_action = "shield"
elif base_action == "shield":
base_action = "block"
elif high_risk_count >= 1:
# 单个高风险模式匹配,适度提升
if base_action == "allow" and confidence > 0.3:
base_action = "shield"
elif medium_risk_count >= 3:
# 多个中风险模式匹配
if base_action == "allow" and confidence > 0.2:
base_action = "shield"
# 特殊情况如果检测方法是LLM且置信度很高倾向于更严格处理
if detection_result.detection_method == "llm" and confidence > 0.9:
base_action = "block"
logger.debug(f"自动模式决策: 置信度={confidence:.3f}, 高风险模式={high_risk_count}, "
f"中风险模式={medium_risk_count}, 决策={base_action}")
return base_action
async def _detect_injection(self, message: MessageRecv) -> DetectionResult:
"""检测提示词注入"""
@@ -318,9 +436,9 @@ class AntiPromptInjector:
# 宽松模式:消息加盾
if self.shield.is_shield_needed(detection_result.confidence, detection_result.matched_patterns):
original_text = message.processed_plain_text
shielded_text = self.shield.shield_message(
shielded_text = self.shield.create_shielded_message(
original_text,
detection_result.matched_patterns
detection_result.confidence
)
logger.info(f"宽松模式:消息已加盾 (置信度: {detection_result.confidence:.2f})")
@@ -328,8 +446,6 @@ class AntiPromptInjector:
# 创建处理摘要
summary = self.shield.create_safety_summary(
len(original_text),
len(shielded_text),
detection_result.confidence,
detection_result.matched_patterns
)
@@ -339,6 +455,39 @@ class AntiPromptInjector:
# 置信度不够,允许通过
return True, None, f"置信度不足,允许通过 - {detection_result.reason}"
elif self.config.process_mode == "auto":
# 自动模式:根据威胁等级自动选择处理方式
auto_action = self._determine_auto_action(detection_result)
if auto_action == "block":
# 高威胁:直接丢弃
logger.warning(f"自动模式:丢弃高威胁消息 (置信度: {detection_result.confidence:.2f})")
await self._update_stats(blocked_messages=1)
return False, None, f"自动模式阻止 - {detection_result.reason}"
elif auto_action == "shield":
# 中等威胁:加盾处理
original_text = message.processed_plain_text
shielded_text = self.shield.create_shielded_message(
original_text,
detection_result.confidence
)
logger.info(f"自动模式:消息已加盾 (置信度: {detection_result.confidence:.2f})")
await self._update_stats(shielded_messages=1)
# 创建处理摘要
summary = self.shield.create_safety_summary(
detection_result.confidence,
detection_result.matched_patterns
)
return True, shielded_text, f"自动模式加盾 - {summary}"
else: # auto_action == "allow"
# 低威胁:允许通过
return True, None, f"自动模式允许通过 - {detection_result.reason}"
# 默认允许通过
return True, None, "默认允许通过"
@@ -394,7 +543,7 @@ class AntiPromptInjector:
"shielded_messages": stats.shielded_messages or 0,
"detection_rate": f"{detection_rate:.2f}%",
"average_processing_time": f"{avg_processing_time:.3f}s",
"last_processing_time": f"{stats.last_processing_time:.3f}s" if stats.last_processing_time else "0.000s",
"last_processing_time": f"{stats.last_process_time:.3f}s" if stats.last_process_time else "0.000s",
"error_count": stats.error_count or 0
}
except Exception as e:

View File

@@ -0,0 +1,289 @@
# -*- coding: utf-8 -*-
"""
命令跳过列表模块
本模块负责管理反注入系统的命令跳过列表,自动收集插件注册的命令
并提供检查机制来跳过对合法命令的反注入检测。
"""
import re
from typing import Set, List, Pattern, Optional, Dict
from dataclasses import dataclass
from src.common.logger import get_logger
from src.config.config import global_config
logger = get_logger("anti_injector.skip_list")
@dataclass
class SkipPattern:
"""跳过模式信息"""
pattern: str
"""原始模式字符串"""
compiled_pattern: Pattern[str]
"""编译后的正则表达式"""
source: str
"""模式来源plugin, manual, system"""
description: str = ""
"""模式描述"""
class CommandSkipListManager:
"""命令跳过列表管理器"""
def __init__(self):
"""初始化跳过列表管理器"""
self.config = global_config.anti_prompt_injection
self._skip_patterns: Dict[str, SkipPattern] = {}
self._plugin_command_patterns: Set[str] = set()
self._is_initialized = False
def initialize(self):
"""初始化跳过列表"""
if self._is_initialized:
return
logger.info("初始化反注入命令跳过列表...")
# 清空现有模式
self._skip_patterns.clear()
self._plugin_command_patterns.clear()
if not self.config.enable_command_skip_list:
logger.info("命令跳过列表已禁用")
return
# 添加系统命令模式
if self.config.skip_system_commands:
self._add_system_command_patterns()
# 自动收集插件命令
if self.config.auto_collect_plugin_commands:
self._collect_plugin_commands()
# 添加手动指定的模式
self._add_manual_patterns()
self._is_initialized = True
logger.info(f"跳过列表初始化完成,共收集 {len(self._skip_patterns)} 个模式")
def _add_system_command_patterns(self):
"""添加系统内置命令模式"""
system_patterns = [
(r"^/pm\b", "/pm 插件管理命令"),
(r"^/反注入统计\b", "反注入统计查询命令"),
(r"^^/反注入跳过列表(?:\s+(.+))?$", "反注入列表管理命令"),
]
for pattern_str, description in system_patterns:
self._add_skip_pattern(pattern_str, "system", description)
def _collect_plugin_commands(self):
"""自动收集插件注册的命令"""
try:
from src.plugin_system.apis import component_manage_api
from src.plugin_system.base.component_types import ComponentType
# 获取所有注册的命令组件
command_components = component_manage_api.get_components_info_by_type(ComponentType.COMMAND)
if not command_components:
logger.debug("没有找到注册的命令组件(插件可能还未加载)")
return
collected_count = 0
for command_name, command_info in command_components.items():
# 获取命令的匹配模式
if hasattr(command_info, 'command_pattern') and command_info.command_pattern:
pattern = command_info.command_pattern
description = f"插件命令: {command_name}"
# 添加到跳过列表
if self._add_skip_pattern(pattern, "plugin", description):
self._plugin_command_patterns.add(pattern)
collected_count += 1
logger.debug(f"收集插件命令模式: {pattern} ({command_name})")
# 如果没有明确的模式,尝试从命令名生成基础模式
elif command_name:
# 生成基础命令模式
basic_patterns = [
f"^/{re.escape(command_name)}\\b", # /command_name
f"^{re.escape(command_name)}\\b", # command_name
]
for pattern in basic_patterns:
description = f"插件命令: {command_name} (自动生成)"
if self._add_skip_pattern(pattern, "plugin", description):
self._plugin_command_patterns.add(pattern)
collected_count += 1
if collected_count > 0:
logger.info(f"自动收集了 {collected_count} 个插件命令模式")
else:
logger.debug("当前没有收集到插件命令模式(插件可能还未加载)")
except Exception as e:
logger.warning(f"自动收集插件命令时出错: {e}")
def _add_manual_patterns(self):
"""添加手动指定的模式"""
manual_patterns = self.config.manual_skip_patterns or []
for pattern_str in manual_patterns:
if pattern_str.strip():
self._add_skip_pattern(pattern_str.strip(), "manual", "手动配置的跳过模式")
def _add_skip_pattern(self, pattern_str: str, source: str, description: str = "") -> bool:
"""添加跳过模式
Args:
pattern_str: 模式字符串
source: 模式来源
description: 模式描述
Returns:
是否成功添加
"""
try:
# 编译正则表达式
compiled_pattern = re.compile(pattern_str, re.IGNORECASE | re.DOTALL)
# 创建跳过模式对象
skip_pattern = SkipPattern(
pattern=pattern_str,
compiled_pattern=compiled_pattern,
source=source,
description=description
)
# 使用模式字符串作为键,避免重复
pattern_key = f"{source}:{pattern_str}"
self._skip_patterns[pattern_key] = skip_pattern
return True
except re.error as e:
logger.error(f"无效的正则表达式模式 '{pattern_str}': {e}")
return False
def should_skip_detection(self, message_text: str) -> tuple[bool, Optional[str]]:
"""检查消息是否应该跳过反注入检测
Args:
message_text: 待检查的消息文本
Returns:
(是否跳过, 匹配的模式描述)
"""
if not self.config.enable_command_skip_list or not self._is_initialized:
return False, None
message_text = message_text.strip()
if not message_text:
return False, None
# 检查所有跳过模式
for pattern_key, skip_pattern in self._skip_patterns.items():
try:
if skip_pattern.compiled_pattern.search(message_text):
logger.debug(f"消息匹配跳过模式: {skip_pattern.pattern} ({skip_pattern.description})")
return True, skip_pattern.description
except Exception as e:
logger.warning(f"检查跳过模式时出错 '{skip_pattern.pattern}': {e}")
return False, None
def refresh_plugin_commands(self):
"""刷新插件命令列表"""
if not self.config.auto_collect_plugin_commands:
return
logger.info("刷新插件命令跳过列表...")
# 移除旧的插件模式
old_plugin_patterns = [
key for key, pattern in self._skip_patterns.items()
if pattern.source == "plugin"
]
for key in old_plugin_patterns:
del self._skip_patterns[key]
self._plugin_command_patterns.clear()
# 重新收集插件命令
self._collect_plugin_commands()
logger.info(f"插件命令跳过列表已刷新,当前共有 {len(self._skip_patterns)} 个模式")
def get_skip_patterns_info(self) -> Dict[str, List[Dict[str, str]]]:
"""获取跳过模式信息
Returns:
按来源分组的模式信息
"""
result = {"system": [], "plugin": [], "manual": []}
for skip_pattern in self._skip_patterns.values():
pattern_info = {
"pattern": skip_pattern.pattern,
"description": skip_pattern.description
}
if skip_pattern.source in result:
result[skip_pattern.source].append(pattern_info)
return result
def add_temporary_skip_pattern(self, pattern: str, description: str = "") -> bool:
"""添加临时跳过模式(运行时添加,不保存到配置)
Args:
pattern: 模式字符串
description: 模式描述
Returns:
是否成功添加
"""
return self._add_skip_pattern(pattern, "temporary", description or "临时跳过模式")
def remove_temporary_patterns(self):
"""移除所有临时跳过模式"""
temp_patterns = [
key for key, pattern in self._skip_patterns.items()
if pattern.source == "temporary"
]
for key in temp_patterns:
del self._skip_patterns[key]
logger.info(f"已移除 {len(temp_patterns)} 个临时跳过模式")
# 全局跳过列表管理器实例
skip_list_manager = CommandSkipListManager()
def initialize_skip_list():
"""初始化跳过列表"""
skip_list_manager.initialize()
def should_skip_injection_detection(message_text: str) -> tuple[bool, Optional[str]]:
"""检查消息是否应该跳过反注入检测"""
return skip_list_manager.should_skip_detection(message_text)
def refresh_plugin_commands():
"""刷新插件命令列表"""
skip_list_manager.refresh_plugin_commands()
def get_skip_patterns_info():
"""获取跳过模式信息"""
return skip_list_manager.get_skip_patterns_info()