feat: llm统计现已记录模型反应时间

This commit is contained in:
SengokuCola
2025-08-11 21:51:59 +08:00
parent 849928a8f3
commit 268b428e8f
13 changed files with 117 additions and 29 deletions

View File

@@ -155,7 +155,7 @@ class LLMUsageRecorder:
logger.error(f"创建 LLMUsage 表失败: {str(e)}")
def record_usage_to_database(
self, model_info: ModelInfo, model_usage: UsageRecord, user_id: str, request_type: str, endpoint: str
self, model_info: ModelInfo, model_usage: UsageRecord, user_id: str, request_type: str, endpoint: str, time_cost: float = 0.0
):
input_cost = (model_usage.prompt_tokens / 1000000) * model_info.price_in
output_cost = (model_usage.completion_tokens / 1000000) * model_info.price_out
@@ -164,6 +164,8 @@ class LLMUsageRecorder:
# 使用 Peewee 模型创建记录
LLMUsage.create(
model_name=model_info.model_identifier,
model_assign_name=model_info.name,
model_api_provider=model_info.api_provider,
user_id=user_id,
request_type=request_type,
endpoint=endpoint,
@@ -171,6 +173,7 @@ class LLMUsageRecorder:
completion_tokens=model_usage.completion_tokens or 0,
total_tokens=model_usage.total_tokens or 0,
cost=total_cost or 0.0,
time_cost = round(time_cost or 0.0, 3),
status="success",
timestamp=datetime.now(), # Peewee 会处理 DateTimeField
)

View File

@@ -71,6 +71,7 @@ class LLMRequest:
(Tuple[str, str, str, Optional[List[ToolCall]]]): 响应内容、推理内容、模型名称、工具调用列表
"""
# 模型选择
start_time = time.time()
model_info, api_provider, client = self._select_model()
# 请求体构建
@@ -105,6 +106,7 @@ class LLMRequest:
user_id="system",
request_type=self.request_type,
endpoint="/chat/completions",
time_cost=time.time() - start_time,
)
return content, (reasoning_content, model_info.name, tool_calls)
@@ -149,8 +151,6 @@ class LLMRequest:
# 请求体构建
start_time = time.time()
message_builder = MessageBuilder()
message_builder.add_text_content(prompt)
messages = [message_builder.build()]
@@ -190,6 +190,7 @@ class LLMRequest:
user_id="system",
request_type=self.request_type,
endpoint="/chat/completions",
time_cost=time.time() - start_time,
)
if not content:
@@ -208,6 +209,7 @@ class LLMRequest:
(Tuple[List[float], str]): (嵌入向量,使用的模型名称)
"""
# 无需构建消息体,直接使用输入文本
start_time = time.time()
model_info, api_provider, client = self._select_model()
# 请求并处理返回值
@@ -228,6 +230,7 @@ class LLMRequest:
user_id="system",
request_type=self.request_type,
endpoint="/embeddings",
time_cost=time.time() - start_time,
)
if not embedding: