feat(web_search): 添加答案模式支持,优化Exa搜索引擎的结果处理
This commit is contained in:
@@ -39,7 +39,7 @@ class ExaSearchEngine(BaseSearchEngine):
|
||||
return self.api_manager.is_available()
|
||||
|
||||
async def search(self, args: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
"""执行Exa搜索"""
|
||||
"""执行优化的Exa搜索(使用answer模式)"""
|
||||
if not self.is_available():
|
||||
return []
|
||||
|
||||
@@ -47,7 +47,16 @@ class ExaSearchEngine(BaseSearchEngine):
|
||||
num_results = args.get("num_results", 3)
|
||||
time_range = args.get("time_range", "any")
|
||||
|
||||
exa_args = {"num_results": num_results, "text": True, "highlights": True}
|
||||
# 优化的搜索参数 - 更注重答案质量
|
||||
exa_args = {
|
||||
"num_results": num_results,
|
||||
"text": True,
|
||||
"highlights": True,
|
||||
"summary": True, # 启用自动摘要
|
||||
"include_text": True, # 包含全文内容
|
||||
}
|
||||
|
||||
# 时间范围过滤
|
||||
if time_range != "any":
|
||||
today = datetime.now()
|
||||
start_date = today - timedelta(days=7 if time_range == "week" else 30)
|
||||
@@ -61,18 +70,89 @@ class ExaSearchEngine(BaseSearchEngine):
|
||||
return []
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
# 使用search_and_contents获取完整内容,优化为answer模式
|
||||
func = functools.partial(exa_client.search_and_contents, query, **exa_args)
|
||||
search_response = await loop.run_in_executor(None, func)
|
||||
|
||||
return [
|
||||
{
|
||||
# 优化结果处理 - 更注重答案质量
|
||||
results = []
|
||||
for res in search_response.results:
|
||||
# 获取最佳内容片段
|
||||
highlights = getattr(res, "highlights", [])
|
||||
summary = getattr(res, "summary", "")
|
||||
text = getattr(res, "text", "")
|
||||
|
||||
# 智能内容选择:摘要 > 高亮 > 文本开头
|
||||
if summary and len(summary) > 50:
|
||||
snippet = summary.strip()
|
||||
elif highlights:
|
||||
snippet = " ".join(highlights).strip()
|
||||
elif text:
|
||||
snippet = text[:300] + "..." if len(text) > 300 else text
|
||||
else:
|
||||
snippet = "内容获取失败"
|
||||
|
||||
# 只保留有意义的摘要
|
||||
if len(snippet) < 30:
|
||||
snippet = text[:200] + "..." if text and len(text) > 200 else snippet
|
||||
|
||||
results.append({
|
||||
"title": res.title,
|
||||
"url": res.url,
|
||||
"snippet": " ".join(getattr(res, "highlights", [])) or (getattr(res, "text", "")[:250] + "..."),
|
||||
"snippet": snippet,
|
||||
"provider": "Exa",
|
||||
}
|
||||
for res in search_response.results
|
||||
]
|
||||
"answer_focused": True, # 标记为答案导向的搜索
|
||||
})
|
||||
|
||||
return results
|
||||
except Exception as e:
|
||||
logger.error(f"Exa 搜索失败: {e}")
|
||||
logger.error(f"Exa answer模式搜索失败: {e}")
|
||||
return []
|
||||
|
||||
async def answer_search(self, args: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
"""执行Exa快速答案搜索 - 最精简的搜索模式"""
|
||||
if not self.is_available():
|
||||
return []
|
||||
|
||||
query = args["query"]
|
||||
num_results = min(args.get("num_results", 2), 2) # 限制结果数量,专注质量
|
||||
|
||||
# 精简的搜索参数 - 专注快速答案
|
||||
exa_args = {
|
||||
"num_results": num_results,
|
||||
"text": False, # 不需要全文
|
||||
"highlights": True, # 只要关键高亮
|
||||
"summary": True, # 优先摘要
|
||||
}
|
||||
|
||||
try:
|
||||
exa_client = self.api_manager.get_next_client()
|
||||
if not exa_client:
|
||||
return []
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
func = functools.partial(exa_client.search_and_contents, query, **exa_args)
|
||||
search_response = await loop.run_in_executor(None, func)
|
||||
|
||||
# 极简结果处理 - 只保留最核心信息
|
||||
results = []
|
||||
for res in search_response.results:
|
||||
summary = getattr(res, "summary", "")
|
||||
highlights = getattr(res, "highlights", [])
|
||||
|
||||
# 优先使用摘要,否则使用高亮
|
||||
answer_text = summary.strip() if summary and len(summary) > 30 else " ".join(highlights).strip()
|
||||
|
||||
if answer_text and len(answer_text) > 20:
|
||||
results.append({
|
||||
"title": res.title,
|
||||
"url": res.url,
|
||||
"snippet": answer_text[:400] + "..." if len(answer_text) > 400 else answer_text,
|
||||
"provider": "Exa-Answer",
|
||||
"answer_mode": True # 标记为纯答案模式
|
||||
})
|
||||
|
||||
return results
|
||||
except Exception as e:
|
||||
logger.error(f"Exa快速答案搜索失败: {e}")
|
||||
return []
|
||||
|
||||
@@ -41,6 +41,13 @@ class WebSurfingTool(BaseTool):
|
||||
False,
|
||||
["any", "week", "month"],
|
||||
),
|
||||
(
|
||||
"answer_mode",
|
||||
ToolParamType.BOOLEAN,
|
||||
"是否启用答案模式(仅适用于Exa搜索引擎)。启用后将返回更精简、直接的答案,减少冗余信息。默认为False。",
|
||||
False,
|
||||
None,
|
||||
),
|
||||
] # type: ignore
|
||||
|
||||
def __init__(self, plugin_config=None, chat_stream=None):
|
||||
@@ -97,13 +104,19 @@ class WebSurfingTool(BaseTool):
|
||||
) -> dict[str, Any]:
|
||||
"""并行搜索策略:同时使用所有启用的搜索引擎"""
|
||||
search_tasks = []
|
||||
answer_mode = function_args.get("answer_mode", False)
|
||||
|
||||
for engine_name in enabled_engines:
|
||||
engine = self.engines.get(engine_name)
|
||||
if engine and engine.is_available():
|
||||
custom_args = function_args.copy()
|
||||
custom_args["num_results"] = custom_args.get("num_results", 5)
|
||||
search_tasks.append(engine.search(custom_args))
|
||||
|
||||
# 如果启用了answer模式且是Exa引擎,使用answer_search方法
|
||||
if answer_mode and engine_name == "exa" and hasattr(engine, 'answer_search'):
|
||||
search_tasks.append(engine.answer_search(custom_args))
|
||||
else:
|
||||
search_tasks.append(engine.search(custom_args))
|
||||
|
||||
if not search_tasks:
|
||||
|
||||
@@ -137,17 +150,23 @@ class WebSurfingTool(BaseTool):
|
||||
self, function_args: dict[str, Any], enabled_engines: list[str]
|
||||
) -> dict[str, Any]:
|
||||
"""回退搜索策略:按顺序尝试搜索引擎,失败则尝试下一个"""
|
||||
answer_mode = function_args.get("answer_mode", False)
|
||||
|
||||
for engine_name in enabled_engines:
|
||||
engine = self.engines.get(engine_name)
|
||||
if not engine or not engine.is_available():
|
||||
|
||||
continue
|
||||
|
||||
try:
|
||||
custom_args = function_args.copy()
|
||||
custom_args["num_results"] = custom_args.get("num_results", 5)
|
||||
|
||||
results = await engine.search(custom_args)
|
||||
# 如果启用了answer模式且是Exa引擎,使用answer_search方法
|
||||
if answer_mode and engine_name == "exa" and hasattr(engine, 'answer_search'):
|
||||
logger.info("使用Exa答案模式进行搜索(fallback策略)")
|
||||
results = await engine.answer_search(custom_args)
|
||||
else:
|
||||
results = await engine.search(custom_args)
|
||||
|
||||
if results: # 如果有结果,直接返回
|
||||
formatted_content = format_search_results(results)
|
||||
@@ -164,22 +183,30 @@ class WebSurfingTool(BaseTool):
|
||||
|
||||
async def _execute_single_search(self, function_args: dict[str, Any], enabled_engines: list[str]) -> dict[str, Any]:
|
||||
"""单一搜索策略:只使用第一个可用的搜索引擎"""
|
||||
answer_mode = function_args.get("answer_mode", False)
|
||||
|
||||
for engine_name in enabled_engines:
|
||||
engine = self.engines.get(engine_name)
|
||||
if not engine or not engine.is_available():
|
||||
|
||||
continue
|
||||
|
||||
try:
|
||||
custom_args = function_args.copy()
|
||||
custom_args["num_results"] = custom_args.get("num_results", 5)
|
||||
|
||||
results = await engine.search(custom_args)
|
||||
formatted_content = format_search_results(results)
|
||||
return {
|
||||
"type": "web_search_result",
|
||||
"content": formatted_content,
|
||||
}
|
||||
# 如果启用了answer模式且是Exa引擎,使用answer_search方法
|
||||
if answer_mode and engine_name == "exa" and hasattr(engine, 'answer_search'):
|
||||
logger.info("使用Exa答案模式进行搜索")
|
||||
results = await engine.answer_search(custom_args)
|
||||
else:
|
||||
results = await engine.search(custom_args)
|
||||
|
||||
if results:
|
||||
formatted_content = format_search_results(results)
|
||||
return {
|
||||
"type": "web_search_result",
|
||||
"content": formatted_content,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"{engine_name} 搜索失败: {e}")
|
||||
|
||||
Reference in New Issue
Block a user