diff --git a/plugins/hello_world_plugin/plugin.py b/plugins/hello_world_plugin/plugin.py index cab135c09..4ff01879d 100644 --- a/plugins/hello_world_plugin/plugin.py +++ b/plugins/hello_world_plugin/plugin.py @@ -19,14 +19,10 @@ class CompareNumbersTool(BaseTool): name = "compare_numbers" description = "使用工具 比较两个数的大小,返回较大的数" - parameters = { - "type": "object", - "properties": { - "num1": {"type": "number", "description": "第一个数字"}, - "num2": {"type": "number", "description": "第二个数字"}, - }, - "required": ["num1", "num2"], - } + parameters = [ + ("num1", "number", "第一个数字", True), + ("num2", "number", "第二个数字", True), + ] async def execute(self, function_args: dict[str, Any]) -> dict[str, Any]: """执行比较两个数的大小 diff --git a/src/chat/knowledge/ie_process.py b/src/chat/knowledge/ie_process.py index 16d4e0804..340a678db 100644 --- a/src/chat/knowledge/ie_process.py +++ b/src/chat/knowledge/ie_process.py @@ -8,12 +8,15 @@ from . import prompt_template from .knowledge_lib import INVALID_ENTITY from src.llm_models.utils_model import LLMRequest from json_repair import repair_json + + def _extract_json_from_text(text: str): + # sourcery skip: assign-if-exp, extract-method """从文本中提取JSON数据的高容错方法""" if text is None: logger.error("输入文本为None") return [] - + try: fixed_json = repair_json(text) if isinstance(fixed_json, str): @@ -24,7 +27,7 @@ def _extract_json_from_text(text: str): # 如果是列表,直接返回 if isinstance(parsed_json, list): return parsed_json - + # 如果是字典且只有一个项目,可能包装了列表 if isinstance(parsed_json, dict): # 如果字典只有一个键,并且值是列表,返回那个列表 @@ -33,7 +36,7 @@ def _extract_json_from_text(text: str): if isinstance(value, list): return value return parsed_json - + # 其他情况,尝试转换为列表 logger.warning(f"解析的JSON不是预期格式: {type(parsed_json)}, 内容: {parsed_json}") return [] @@ -42,44 +45,40 @@ def _extract_json_from_text(text: str): logger.error(f"JSON提取失败: {e}, 原始文本: {text[:100] if text else 'None'}...") return [] + def _entity_extract(llm_req: LLMRequest, paragraph: str) -> List[str]: + # sourcery skip: reintroduce-else, swap-if-else-branches, use-named-expression """对段落进行实体提取,返回提取出的实体列表(JSON格式)""" entity_extract_context = prompt_template.build_entity_extract_context(paragraph) - + # 使用 asyncio.run 来运行异步方法 try: # 如果当前已有事件循环在运行,使用它 loop = asyncio.get_running_loop() - future = asyncio.run_coroutine_threadsafe( - llm_req.generate_response_async(entity_extract_context), loop - ) - response, (reasoning_content, model_name) = future.result() + future = asyncio.run_coroutine_threadsafe(llm_req.generate_response_async(entity_extract_context), loop) + response, _ = future.result() except RuntimeError: # 如果没有运行中的事件循环,直接使用 asyncio.run - response, (reasoning_content, model_name) = asyncio.run( - llm_req.generate_response_async(entity_extract_context) - ) + response, _ = asyncio.run(llm_req.generate_response_async(entity_extract_context)) # 添加调试日志 logger.debug(f"LLM返回的原始响应: {response}") - + entity_extract_result = _extract_json_from_text(response) - + # 检查返回的是否为有效的实体列表 if not isinstance(entity_extract_result, list): - # 如果不是列表,可能是字典格式,尝试从中提取列表 - if isinstance(entity_extract_result, dict): - # 尝试常见的键名 - for key in ['entities', 'result', 'data', 'items']: - if key in entity_extract_result and isinstance(entity_extract_result[key], list): - entity_extract_result = entity_extract_result[key] - break - else: - # 如果找不到合适的列表,抛出异常 - raise Exception(f"实体提取结果格式错误,期望列表但得到: {type(entity_extract_result)}") + if not isinstance(entity_extract_result, dict): + raise ValueError(f"实体提取结果格式错误,期望列表但得到: {type(entity_extract_result)}") + + # 尝试常见的键名 + for key in ["entities", "result", "data", "items"]: + if key in entity_extract_result and isinstance(entity_extract_result[key], list): + entity_extract_result = entity_extract_result[key] + break else: - raise Exception(f"实体提取结果格式错误,期望列表但得到: {type(entity_extract_result)}") - + # 如果找不到合适的列表,抛出异常 + raise ValueError(f"实体提取结果格式错误,期望列表但得到: {type(entity_extract_result)}") # 过滤无效实体 entity_extract_result = [ entity @@ -87,8 +86,8 @@ def _entity_extract(llm_req: LLMRequest, paragraph: str) -> List[str]: if (entity is not None) and (entity != "") and (entity not in INVALID_ENTITY) ] - if len(entity_extract_result) == 0: - raise Exception("实体提取结果为空") + if not entity_extract_result: + raise ValueError("实体提取结果为空") return entity_extract_result @@ -98,45 +97,44 @@ def _rdf_triple_extract(llm_req: LLMRequest, paragraph: str, entities: list) -> rdf_extract_context = prompt_template.build_rdf_triple_extract_context( paragraph, entities=json.dumps(entities, ensure_ascii=False) ) - + # 使用 asyncio.run 来运行异步方法 try: # 如果当前已有事件循环在运行,使用它 loop = asyncio.get_running_loop() - future = asyncio.run_coroutine_threadsafe( - llm_req.generate_response_async(rdf_extract_context), loop - ) - response, (reasoning_content, model_name) = future.result() + future = asyncio.run_coroutine_threadsafe(llm_req.generate_response_async(rdf_extract_context), loop) + response, _ = future.result() except RuntimeError: # 如果没有运行中的事件循环,直接使用 asyncio.run - response, (reasoning_content, model_name) = asyncio.run( - llm_req.generate_response_async(rdf_extract_context) - ) + response, _ = asyncio.run(llm_req.generate_response_async(rdf_extract_context)) # 添加调试日志 logger.debug(f"RDF LLM返回的原始响应: {response}") - + rdf_triple_result = _extract_json_from_text(response) - + # 检查返回的是否为有效的三元组列表 if not isinstance(rdf_triple_result, list): - # 如果不是列表,可能是字典格式,尝试从中提取列表 - if isinstance(rdf_triple_result, dict): - # 尝试常见的键名 - for key in ['triples', 'result', 'data', 'items']: - if key in rdf_triple_result and isinstance(rdf_triple_result[key], list): - rdf_triple_result = rdf_triple_result[key] - break - else: - # 如果找不到合适的列表,抛出异常 - raise Exception(f"RDF三元组提取结果格式错误,期望列表但得到: {type(rdf_triple_result)}") + if not isinstance(rdf_triple_result, dict): + raise ValueError(f"RDF三元组提取结果格式错误,期望列表但得到: {type(rdf_triple_result)}") + + # 尝试常见的键名 + for key in ["triples", "result", "data", "items"]: + if key in rdf_triple_result and isinstance(rdf_triple_result[key], list): + rdf_triple_result = rdf_triple_result[key] + break else: - raise Exception(f"RDF三元组提取结果格式错误,期望列表但得到: {type(rdf_triple_result)}") - + # 如果找不到合适的列表,抛出异常 + raise ValueError(f"RDF三元组提取结果格式错误,期望列表但得到: {type(rdf_triple_result)}") # 验证三元组格式 for triple in rdf_triple_result: - if not isinstance(triple, list) or len(triple) != 3 or (triple[0] is None or triple[1] is None or triple[2] is None) or "" in triple: - raise Exception("RDF提取结果格式错误") + if ( + not isinstance(triple, list) + or len(triple) != 3 + or (triple[0] is None or triple[1] is None or triple[2] is None) + or "" in triple + ): + raise ValueError("RDF提取结果格式错误") return rdf_triple_result diff --git a/src/chat/knowledge/kg_manager.py b/src/chat/knowledge/kg_manager.py index 083a741d6..c2172312f 100644 --- a/src/chat/knowledge/kg_manager.py +++ b/src/chat/knowledge/kg_manager.py @@ -162,7 +162,7 @@ class KGManager: ent_hash_list = list(ent_hash_list) synonym_hash_set = set() - synonym_result = dict() + synonym_result = {} # rich 进度条 total = len(ent_hash_list) diff --git a/src/chat/knowledge/qa_manager.py b/src/chat/knowledge/qa_manager.py index c83683b79..678aa4190 100644 --- a/src/chat/knowledge/qa_manager.py +++ b/src/chat/knowledge/qa_manager.py @@ -5,13 +5,15 @@ from .global_logger import logger # from . import prompt_template from .embedding_store import EmbeddingManager + # from .llm_client import LLMClient from .kg_manager import KGManager + # from .lpmmconfig import global_config from .utils.dyn_topk import dyn_select_top_k from src.llm_models.utils_model import LLMRequest from src.chat.utils.utils import get_embedding -from src.config.config import global_config +from src.config.config import global_config, model_config MAX_KNOWLEDGE_LENGTH = 10000 # 最大知识长度 @@ -21,15 +23,10 @@ class QAManager: self, embed_manager: EmbeddingManager, kg_manager: KGManager, - ): self.embed_manager = embed_manager self.kg_manager = kg_manager - # TODO: API-Adapter修改标记 - self.qa_model = LLMRequest( - model=global_config.model.lpmm_qa, - request_type="lpmm.qa" - ) + self.qa_model = LLMRequest(model_set=model_config.model_task_config.lpmm_qa, request_type="lpmm.qa") async def process_query(self, question: str) -> Tuple[List[Tuple[str, float, float]], Optional[Dict[str, float]]]: """处理查询"""