From c24bb70291b276d5e471ec554188708c86e7c14c Mon Sep 17 00:00:00 2001
From: pine <dijsds@163.com>
Date: Tue, 11 Mar 2025 18:51:28 +0800
Subject: [PATCH] =?UTF-8?q?fix:=20=E6=B5=81=E5=BC=8F=E8=BE=93=E5=87=BA?=
 =?UTF-8?q?=E6=A8=A1=E5=BC=8F=E5=A2=9E=E5=8A=A0=E7=BB=93=E6=9D=9F=E5=88=A4?=
 =?UTF-8?q?=E6=96=AD=E4=B8=8Etoken=E7=94=A8=E9=87=8F=E8=AE=B0=E5=BD=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/plugins/models/utils_model.py | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)
diff --git a/src/plugins/models/utils_model.py b/src/plugins/models/utils_model.py
index e9d11f339..461f542d1 100644
--- a/src/plugins/models/utils_model.py
+++ b/src/plugins/models/utils_model.py
@@ -216,6 +216,7 @@ class LLM_request:
 
                         # 将流式输出转化为非流式输出
                         if stream_mode:
+                            flag_delta_content_finished = False
                             accumulated_content = ""
                             async for line_bytes in response.content:
                                 line = line_bytes.decode("utf-8").strip()
@@ -227,13 +228,25 @@ class LLM_request:
                                         break
                                     try:
                                         chunk = json.loads(data_str)
-                                        delta = chunk["choices"][0]["delta"]
-                                        delta_content = delta.get("content")
-                                        if delta_content is None:
-                                            delta_content = ""
-                                        accumulated_content += delta_content
+                                        if flag_delta_content_finished:
+                                            usage = chunk.get("usage", None) # 获取tokn用量
+                                        else:
+                                            delta = chunk["choices"][0]["delta"]
+                                            delta_content = delta.get("content")
+                                            if delta_content is None:
+                                                delta_content = ""
+                                            accumulated_content += delta_content
+                                            # 检测流式输出文本是否结束
+                                            finish_reason =  chunk["choices"][0]["finish_reason"]
+                                            if finish_reason == "stop":
+                                                usage = chunk.get("usage", None)
+                                                if usage:
+                                                    break
+                                                # 部分平台在文本输出结束前不会返回token用量，此时需要再获取一次chunk
+                                                flag_delta_content_finished = True
+                                            
                                     except Exception:
-                                        logger.exception("解析流式输出错")
+                                        logger.exception("解析流式输出错误")
                             content = accumulated_content
                             reasoning_content = ""
                             think_match = re.search(r'<think>(.*?)</think>', content, re.DOTALL)
@@ -242,7 +255,7 @@ class LLM_request:
                             content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL).strip()
                             # 构造一个伪result以便调用自定义响应处理器或默认处理器
                             result = {
-                                "choices": [{"message": {"content": content, "reasoning_content": reasoning_content}}]}
+                                "choices": [{"message": {"content": content, "reasoning_content": reasoning_content}}],  "usage": usage}
                             return response_handler(result) if response_handler else self._default_response_handler(
                                 result, user_id, request_type, endpoint)
                         else: