From 4686327d8d2dd0ff2dd49e7214c384b5e6f36beb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9B=85=E8=AF=BA=E7=8B=90?= <212194964+foxcyber907@users.noreply.github.com> Date: Sun, 5 Oct 2025 19:56:15 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0SearXNG=E5=BC=95=E6=93=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../web_search_tool/engines/searxng_engine.py | 145 ++++++++++++++++++ .../built_in/web_search_tool/plugin.py | 3 + .../web_search_tool/tools/web_search.py | 2 + 3 files changed, 150 insertions(+) create mode 100644 src/plugins/built_in/web_search_tool/engines/searxng_engine.py diff --git a/src/plugins/built_in/web_search_tool/engines/searxng_engine.py b/src/plugins/built_in/web_search_tool/engines/searxng_engine.py new file mode 100644 index 000000000..e539b9227 --- /dev/null +++ b/src/plugins/built_in/web_search_tool/engines/searxng_engine.py @@ -0,0 +1,145 @@ +""" +SearXNG search engine implementation + +参考: https://docs.searxng.org/dev/search_api.html (公开JSON接口说明) +""" + +from __future__ import annotations + +import asyncio +import functools +from typing import Any, List + +import httpx + +from src.common.logger import get_logger +from src.plugin_system.apis import config_api + +from .base import BaseSearchEngine + +logger = get_logger("searxng_engine") + + +class SearXNGSearchEngine(BaseSearchEngine): + """SearXNG 元搜索引擎实现 + + 通过在配置中提供一个或多个公开 / 自建 SearXNG 实例来使用。 + + 配置项(位于主配置 bot_config.toml 的 [web_search] 部分): + searxng_instances = ["https://searxng.example.com"] + # 可选: 若实例启用 token 验证,可在 searxng_api_keys 中提供对应 token (顺序与实例列表一致) + searxng_api_keys = ["token1", "token2"] + """ + + def __init__(self): + self._load_config() + self._client = httpx.AsyncClient(timeout=httpx.Timeout(10.0, connect=5.0)) + + def _load_config(self): + instances = config_api.get_global_config("web_search.searxng_instances", None) + if isinstance(instances, list): + # 过滤空值 + self.instances: List[str] = [u.rstrip("/") for u in instances if isinstance(u, str) and u.strip()] + else: + self.instances = [] + + api_keys = config_api.get_global_config("web_search.searxng_api_keys", None) + if isinstance(api_keys, list): + self.api_keys: List[str | None] = [k.strip() if isinstance(k, str) and k.strip() else None for k in api_keys] + else: + self.api_keys = [] + + # 与实例列表对齐(若 keys 少则补 None) + if self.api_keys and len(self.api_keys) < len(self.instances): + self.api_keys.extend([None] * (len(self.instances) - len(self.api_keys))) + + logger.debug( + f"SearXNG 引擎配置: instances={self.instances}, api_keys={'yes' if any(self.api_keys) else 'no'}" + ) + + def is_available(self) -> bool: + return bool(self.instances) + + async def search(self, args: dict[str, Any]) -> list[dict[str, Any]]: + if not self.is_available(): + return [] + + query = args["query"] + num_results = args.get("num_results", 3) + time_range = args.get("time_range", "any") + + # SearXNG 的时间范围参数: day / week / month / year + searx_time = None + if time_range == "week": + searx_time = "week" + elif time_range == "month": + searx_time = "month" + + # 轮询实例:简单使用循环尝试,直到获得结果或全部失败 + results: list[dict[str, Any]] = [] + for idx, base_url in enumerate(self.instances): + token = self.api_keys[idx] if idx < len(self.api_keys) else None + try: + instance_results = await self._search_one_instance(base_url, query, num_results, searx_time, token) + if instance_results: + results.extend(instance_results) + if len(results) >= num_results: + break + except Exception as e: # noqa: BLE001 + logger.warning(f"SearXNG 实例 {base_url} 调用失败: {e}") + continue + + # 截断到需要的数量 + return results[:num_results] + + async def _search_one_instance( + self, base_url: str, query: str, num_results: int, searx_time: str | None, api_key: str | None + ) -> list[dict[str, Any]]: + # 构造 URL & 参数 + url = f"{base_url}/search" + params = { + "q": query, + "format": "json", + "categories": "general", # 可扩展: 允许从 args 传 categories + "language": "zh-CN", + "safesearch": 1, + } + if searx_time: + params["time_range"] = searx_time + + headers = {} + if api_key: + # SearXNG 可通过 Authorization 或 X-Token (取决于实例配置),尝试常见方案 + headers["Authorization"] = f"Token {api_key}" + + # 在线程池中运行同步请求(httpx.AsyncClient 直接 await 即可,这里直接调用) + try: + resp = await self._client.get(url, params=params, headers=headers) + resp.raise_for_status() + except Exception as e: # noqa: BLE001 + raise RuntimeError(f"请求失败: {e}") from e + + try: + data = resp.json() + except Exception as e: # noqa: BLE001 + raise RuntimeError(f"解析 JSON 失败: {e}") from e + + raw_results = data.get("results", []) if isinstance(data, dict) else [] + + parsed: list[dict[str, Any]] = [] + for item in raw_results: + title = item.get("title") or item.get("url", "无标题") + url_item = item.get("url") or item.get("link", "") + snippet = item.get("content") or item.get("snippet") or "" + snippet = (snippet[:300] + "...") if len(snippet) > 300 else snippet + parsed.append({"title": title, "url": url_item, "snippet": snippet, "provider": "SearXNG"}) + if len(parsed) >= num_results: # 单实例限量 + break + + return parsed + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): # noqa: D401 + await self._client.aclose() diff --git a/src/plugins/built_in/web_search_tool/plugin.py b/src/plugins/built_in/web_search_tool/plugin.py index 2b85104bc..f8a8c785d 100644 --- a/src/plugins/built_in/web_search_tool/plugin.py +++ b/src/plugins/built_in/web_search_tool/plugin.py @@ -42,12 +42,14 @@ class WEBSEARCHPLUGIN(BasePlugin): from .engines.ddg_engine import DDGSearchEngine from .engines.exa_engine import ExaSearchEngine from .engines.tavily_engine import TavilySearchEngine + from .engines.searxng_engine import SearXNGSearchEngine # 实例化所有搜索引擎,这会触发API密钥管理器的初始化 exa_engine = ExaSearchEngine() tavily_engine = TavilySearchEngine() ddg_engine = DDGSearchEngine() bing_engine = BingSearchEngine() + searxng_engine = SearXNGSearchEngine() # 报告每个引擎的状态 engines_status = { @@ -55,6 +57,7 @@ class WEBSEARCHPLUGIN(BasePlugin): "Tavily": tavily_engine.is_available(), "DuckDuckGo": ddg_engine.is_available(), "Bing": bing_engine.is_available(), + "SearXNG": searxng_engine.is_available(), } available_engines = [name for name, available in engines_status.items() if available] diff --git a/src/plugins/built_in/web_search_tool/tools/web_search.py b/src/plugins/built_in/web_search_tool/tools/web_search.py index 9dcafc9a5..47fd7946c 100644 --- a/src/plugins/built_in/web_search_tool/tools/web_search.py +++ b/src/plugins/built_in/web_search_tool/tools/web_search.py @@ -14,6 +14,7 @@ from ..engines.bing_engine import BingSearchEngine from ..engines.ddg_engine import DDGSearchEngine from ..engines.exa_engine import ExaSearchEngine from ..engines.tavily_engine import TavilySearchEngine +from ..engines.searxng_engine import SearXNGSearchEngine from ..utils.formatters import deduplicate_results, format_search_results logger = get_logger("web_search_tool") @@ -49,6 +50,7 @@ class WebSurfingTool(BaseTool): "tavily": TavilySearchEngine(), "ddg": DDGSearchEngine(), "bing": BingSearchEngine(), + "searxng": SearXNGSearchEngine(), } async def execute(self, function_args: dict[str, Any]) -> dict[str, Any]: