添加SearXNG引擎
This commit is contained in:
145
src/plugins/built_in/web_search_tool/engines/searxng_engine.py
Normal file
145
src/plugins/built_in/web_search_tool/engines/searxng_engine.py
Normal file
@@ -0,0 +1,145 @@
|
||||
"""
|
||||
SearXNG search engine implementation
|
||||
|
||||
参考: https://docs.searxng.org/dev/search_api.html (公开JSON接口说明)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import functools
|
||||
from typing import Any, List
|
||||
|
||||
import httpx
|
||||
|
||||
from src.common.logger import get_logger
|
||||
from src.plugin_system.apis import config_api
|
||||
|
||||
from .base import BaseSearchEngine
|
||||
|
||||
logger = get_logger("searxng_engine")
|
||||
|
||||
|
||||
class SearXNGSearchEngine(BaseSearchEngine):
|
||||
"""SearXNG 元搜索引擎实现
|
||||
|
||||
通过在配置中提供一个或多个公开 / 自建 SearXNG 实例来使用。
|
||||
|
||||
配置项(位于主配置 bot_config.toml 的 [web_search] 部分):
|
||||
searxng_instances = ["https://searxng.example.com"]
|
||||
# 可选: 若实例启用 token 验证,可在 searxng_api_keys 中提供对应 token (顺序与实例列表一致)
|
||||
searxng_api_keys = ["token1", "token2"]
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._load_config()
|
||||
self._client = httpx.AsyncClient(timeout=httpx.Timeout(10.0, connect=5.0))
|
||||
|
||||
def _load_config(self):
|
||||
instances = config_api.get_global_config("web_search.searxng_instances", None)
|
||||
if isinstance(instances, list):
|
||||
# 过滤空值
|
||||
self.instances: List[str] = [u.rstrip("/") for u in instances if isinstance(u, str) and u.strip()]
|
||||
else:
|
||||
self.instances = []
|
||||
|
||||
api_keys = config_api.get_global_config("web_search.searxng_api_keys", None)
|
||||
if isinstance(api_keys, list):
|
||||
self.api_keys: List[str | None] = [k.strip() if isinstance(k, str) and k.strip() else None for k in api_keys]
|
||||
else:
|
||||
self.api_keys = []
|
||||
|
||||
# 与实例列表对齐(若 keys 少则补 None)
|
||||
if self.api_keys and len(self.api_keys) < len(self.instances):
|
||||
self.api_keys.extend([None] * (len(self.instances) - len(self.api_keys)))
|
||||
|
||||
logger.debug(
|
||||
f"SearXNG 引擎配置: instances={self.instances}, api_keys={'yes' if any(self.api_keys) else 'no'}"
|
||||
)
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return bool(self.instances)
|
||||
|
||||
async def search(self, args: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
if not self.is_available():
|
||||
return []
|
||||
|
||||
query = args["query"]
|
||||
num_results = args.get("num_results", 3)
|
||||
time_range = args.get("time_range", "any")
|
||||
|
||||
# SearXNG 的时间范围参数: day / week / month / year
|
||||
searx_time = None
|
||||
if time_range == "week":
|
||||
searx_time = "week"
|
||||
elif time_range == "month":
|
||||
searx_time = "month"
|
||||
|
||||
# 轮询实例:简单使用循环尝试,直到获得结果或全部失败
|
||||
results: list[dict[str, Any]] = []
|
||||
for idx, base_url in enumerate(self.instances):
|
||||
token = self.api_keys[idx] if idx < len(self.api_keys) else None
|
||||
try:
|
||||
instance_results = await self._search_one_instance(base_url, query, num_results, searx_time, token)
|
||||
if instance_results:
|
||||
results.extend(instance_results)
|
||||
if len(results) >= num_results:
|
||||
break
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.warning(f"SearXNG 实例 {base_url} 调用失败: {e}")
|
||||
continue
|
||||
|
||||
# 截断到需要的数量
|
||||
return results[:num_results]
|
||||
|
||||
async def _search_one_instance(
|
||||
self, base_url: str, query: str, num_results: int, searx_time: str | None, api_key: str | None
|
||||
) -> list[dict[str, Any]]:
|
||||
# 构造 URL & 参数
|
||||
url = f"{base_url}/search"
|
||||
params = {
|
||||
"q": query,
|
||||
"format": "json",
|
||||
"categories": "general", # 可扩展: 允许从 args 传 categories
|
||||
"language": "zh-CN",
|
||||
"safesearch": 1,
|
||||
}
|
||||
if searx_time:
|
||||
params["time_range"] = searx_time
|
||||
|
||||
headers = {}
|
||||
if api_key:
|
||||
# SearXNG 可通过 Authorization 或 X-Token (取决于实例配置),尝试常见方案
|
||||
headers["Authorization"] = f"Token {api_key}"
|
||||
|
||||
# 在线程池中运行同步请求(httpx.AsyncClient 直接 await 即可,这里直接调用)
|
||||
try:
|
||||
resp = await self._client.get(url, params=params, headers=headers)
|
||||
resp.raise_for_status()
|
||||
except Exception as e: # noqa: BLE001
|
||||
raise RuntimeError(f"请求失败: {e}") from e
|
||||
|
||||
try:
|
||||
data = resp.json()
|
||||
except Exception as e: # noqa: BLE001
|
||||
raise RuntimeError(f"解析 JSON 失败: {e}") from e
|
||||
|
||||
raw_results = data.get("results", []) if isinstance(data, dict) else []
|
||||
|
||||
parsed: list[dict[str, Any]] = []
|
||||
for item in raw_results:
|
||||
title = item.get("title") or item.get("url", "无标题")
|
||||
url_item = item.get("url") or item.get("link", "")
|
||||
snippet = item.get("content") or item.get("snippet") or ""
|
||||
snippet = (snippet[:300] + "...") if len(snippet) > 300 else snippet
|
||||
parsed.append({"title": title, "url": url_item, "snippet": snippet, "provider": "SearXNG"})
|
||||
if len(parsed) >= num_results: # 单实例限量
|
||||
break
|
||||
|
||||
return parsed
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc, tb): # noqa: D401
|
||||
await self._client.aclose()
|
||||
@@ -42,12 +42,14 @@ class WEBSEARCHPLUGIN(BasePlugin):
|
||||
from .engines.ddg_engine import DDGSearchEngine
|
||||
from .engines.exa_engine import ExaSearchEngine
|
||||
from .engines.tavily_engine import TavilySearchEngine
|
||||
from .engines.searxng_engine import SearXNGSearchEngine
|
||||
|
||||
# 实例化所有搜索引擎,这会触发API密钥管理器的初始化
|
||||
exa_engine = ExaSearchEngine()
|
||||
tavily_engine = TavilySearchEngine()
|
||||
ddg_engine = DDGSearchEngine()
|
||||
bing_engine = BingSearchEngine()
|
||||
searxng_engine = SearXNGSearchEngine()
|
||||
|
||||
# 报告每个引擎的状态
|
||||
engines_status = {
|
||||
@@ -55,6 +57,7 @@ class WEBSEARCHPLUGIN(BasePlugin):
|
||||
"Tavily": tavily_engine.is_available(),
|
||||
"DuckDuckGo": ddg_engine.is_available(),
|
||||
"Bing": bing_engine.is_available(),
|
||||
"SearXNG": searxng_engine.is_available(),
|
||||
}
|
||||
|
||||
available_engines = [name for name, available in engines_status.items() if available]
|
||||
|
||||
@@ -14,6 +14,7 @@ from ..engines.bing_engine import BingSearchEngine
|
||||
from ..engines.ddg_engine import DDGSearchEngine
|
||||
from ..engines.exa_engine import ExaSearchEngine
|
||||
from ..engines.tavily_engine import TavilySearchEngine
|
||||
from ..engines.searxng_engine import SearXNGSearchEngine
|
||||
from ..utils.formatters import deduplicate_results, format_search_results
|
||||
|
||||
logger = get_logger("web_search_tool")
|
||||
@@ -49,6 +50,7 @@ class WebSurfingTool(BaseTool):
|
||||
"tavily": TavilySearchEngine(),
|
||||
"ddg": DDGSearchEngine(),
|
||||
"bing": BingSearchEngine(),
|
||||
"searxng": SearXNGSearchEngine(),
|
||||
}
|
||||
|
||||
async def execute(self, function_args: dict[str, Any]) -> dict[str, Any]:
|
||||
|
||||
Reference in New Issue
Block a user