From c24bb70291b276d5e471ec554188708c86e7c14c Mon Sep 17 00:00:00 2001 From: pine Date: Tue, 11 Mar 2025 18:51:28 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E6=B5=81=E5=BC=8F=E8=BE=93=E5=87=BA?= =?UTF-8?q?=E6=A8=A1=E5=BC=8F=E5=A2=9E=E5=8A=A0=E7=BB=93=E6=9D=9F=E5=88=A4?= =?UTF-8?q?=E6=96=AD=E4=B8=8Etoken=E7=94=A8=E9=87=8F=E8=AE=B0=E5=BD=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/models/utils_model.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/src/plugins/models/utils_model.py b/src/plugins/models/utils_model.py index e9d11f339..461f542d1 100644 --- a/src/plugins/models/utils_model.py +++ b/src/plugins/models/utils_model.py @@ -216,6 +216,7 @@ class LLM_request: # 将流式输出转化为非流式输出 if stream_mode: + flag_delta_content_finished = False accumulated_content = "" async for line_bytes in response.content: line = line_bytes.decode("utf-8").strip() @@ -227,13 +228,25 @@ class LLM_request: break try: chunk = json.loads(data_str) - delta = chunk["choices"][0]["delta"] - delta_content = delta.get("content") - if delta_content is None: - delta_content = "" - accumulated_content += delta_content + if flag_delta_content_finished: + usage = chunk.get("usage", None) # 获取tokn用量 + else: + delta = chunk["choices"][0]["delta"] + delta_content = delta.get("content") + if delta_content is None: + delta_content = "" + accumulated_content += delta_content + # 检测流式输出文本是否结束 + finish_reason = chunk["choices"][0]["finish_reason"] + if finish_reason == "stop": + usage = chunk.get("usage", None) + if usage: + break + # 部分平台在文本输出结束前不会返回token用量,此时需要再获取一次chunk + flag_delta_content_finished = True + except Exception: - logger.exception("解析流式输出错") + logger.exception("解析流式输出错误") content = accumulated_content reasoning_content = "" think_match = re.search(r'(.*?)', content, re.DOTALL) @@ -242,7 +255,7 @@ class LLM_request: content = re.sub(r'.*?', '', content, flags=re.DOTALL).strip() # 构造一个伪result以便调用自定义响应处理器或默认处理器 result = { - "choices": [{"message": {"content": content, "reasoning_content": reasoning_content}}]} + "choices": [{"message": {"content": content, "reasoning_content": reasoning_content}}], "usage": usage} return response_handler(result) if response_handler else self._default_response_handler( result, user_id, request_type, endpoint) else: