llm统计记录模型反应时间
This commit is contained in:
@@ -78,6 +78,18 @@ COST_BY_MODULE = "costs_by_module"
|
|||||||
ONLINE_TIME = "online_time"
|
ONLINE_TIME = "online_time"
|
||||||
TOTAL_MSG_CNT = "total_messages"
|
TOTAL_MSG_CNT = "total_messages"
|
||||||
MSG_CNT_BY_CHAT = "messages_by_chat"
|
MSG_CNT_BY_CHAT = "messages_by_chat"
|
||||||
|
TIME_COST_BY_TYPE = "time_costs_by_type"
|
||||||
|
TIME_COST_BY_USER = "time_costs_by_user"
|
||||||
|
TIME_COST_BY_MODEL = "time_costs_by_model"
|
||||||
|
TIME_COST_BY_MODULE = "time_costs_by_module"
|
||||||
|
AVG_TIME_COST_BY_TYPE = "avg_time_costs_by_type"
|
||||||
|
AVG_TIME_COST_BY_USER = "avg_time_costs_by_user"
|
||||||
|
AVG_TIME_COST_BY_MODEL = "avg_time_costs_by_model"
|
||||||
|
AVG_TIME_COST_BY_MODULE = "avg_time_costs_by_module"
|
||||||
|
STD_TIME_COST_BY_TYPE = "std_time_costs_by_type"
|
||||||
|
STD_TIME_COST_BY_USER = "std_time_costs_by_user"
|
||||||
|
STD_TIME_COST_BY_MODEL = "std_time_costs_by_model"
|
||||||
|
STD_TIME_COST_BY_MODULE = "std_time_costs_by_module"
|
||||||
|
|
||||||
|
|
||||||
class OnlineTimeRecordTask(AsyncTask):
|
class OnlineTimeRecordTask(AsyncTask):
|
||||||
@@ -338,6 +350,18 @@ class StatisticOutputTask(AsyncTask):
|
|||||||
COST_BY_USER: defaultdict(float),
|
COST_BY_USER: defaultdict(float),
|
||||||
COST_BY_MODEL: defaultdict(float),
|
COST_BY_MODEL: defaultdict(float),
|
||||||
COST_BY_MODULE: defaultdict(float),
|
COST_BY_MODULE: defaultdict(float),
|
||||||
|
TIME_COST_BY_TYPE: defaultdict(list),
|
||||||
|
TIME_COST_BY_USER: defaultdict(list),
|
||||||
|
TIME_COST_BY_MODEL: defaultdict(list),
|
||||||
|
TIME_COST_BY_MODULE: defaultdict(list),
|
||||||
|
AVG_TIME_COST_BY_TYPE: defaultdict(float),
|
||||||
|
AVG_TIME_COST_BY_USER: defaultdict(float),
|
||||||
|
AVG_TIME_COST_BY_MODEL: defaultdict(float),
|
||||||
|
AVG_TIME_COST_BY_MODULE: defaultdict(float),
|
||||||
|
STD_TIME_COST_BY_TYPE: defaultdict(float),
|
||||||
|
STD_TIME_COST_BY_USER: defaultdict(float),
|
||||||
|
STD_TIME_COST_BY_MODEL: defaultdict(float),
|
||||||
|
STD_TIME_COST_BY_MODULE: defaultdict(float),
|
||||||
}
|
}
|
||||||
for period_key, _ in collect_period
|
for period_key, _ in collect_period
|
||||||
}
|
}
|
||||||
@@ -394,7 +418,40 @@ class StatisticOutputTask(AsyncTask):
|
|||||||
stats[period_key][COST_BY_USER][user_id] += cost
|
stats[period_key][COST_BY_USER][user_id] += cost
|
||||||
stats[period_key][COST_BY_MODEL][model_name] += cost
|
stats[period_key][COST_BY_MODEL][model_name] += cost
|
||||||
stats[period_key][COST_BY_MODULE][module_name] += cost
|
stats[period_key][COST_BY_MODULE][module_name] += cost
|
||||||
|
|
||||||
|
# 收集time_cost数据
|
||||||
|
time_cost = record.time_cost or 0.0
|
||||||
|
if time_cost > 0: # 只记录有效的time_cost
|
||||||
|
stats[period_key][TIME_COST_BY_TYPE][request_type].append(time_cost)
|
||||||
|
stats[period_key][TIME_COST_BY_USER][user_id].append(time_cost)
|
||||||
|
stats[period_key][TIME_COST_BY_MODEL][model_name].append(time_cost)
|
||||||
|
stats[period_key][TIME_COST_BY_MODULE][module_name].append(time_cost)
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# 计算平均耗时和标准差
|
||||||
|
for period_key in stats:
|
||||||
|
for category in [REQ_CNT_BY_TYPE, REQ_CNT_BY_USER, REQ_CNT_BY_MODEL, REQ_CNT_BY_MODULE]:
|
||||||
|
time_cost_key = f"time_costs_by_{category.split('_')[-1]}"
|
||||||
|
avg_key = f"avg_time_costs_by_{category.split('_')[-1]}"
|
||||||
|
std_key = f"std_time_costs_by_{category.split('_')[-1]}"
|
||||||
|
|
||||||
|
for item_name in stats[period_key][category]:
|
||||||
|
time_costs = stats[period_key][time_cost_key].get(item_name, [])
|
||||||
|
if time_costs:
|
||||||
|
# 计算平均耗时
|
||||||
|
avg_time_cost = sum(time_costs) / len(time_costs)
|
||||||
|
stats[period_key][avg_key][item_name] = round(avg_time_cost, 3)
|
||||||
|
|
||||||
|
# 计算标准差
|
||||||
|
if len(time_costs) > 1:
|
||||||
|
variance = sum((x - avg_time_cost) ** 2 for x in time_costs) / len(time_costs)
|
||||||
|
std_time_cost = variance ** 0.5
|
||||||
|
stats[period_key][std_key][item_name] = round(std_time_cost, 3)
|
||||||
|
else:
|
||||||
|
stats[period_key][std_key][item_name] = 0.0
|
||||||
|
else:
|
||||||
|
stats[period_key][avg_key][item_name] = 0.0
|
||||||
|
stats[period_key][std_key][item_name] = 0.0
|
||||||
return stats
|
return stats
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -626,11 +683,10 @@ class StatisticOutputTask(AsyncTask):
|
|||||||
"""
|
"""
|
||||||
if stats[TOTAL_REQ_CNT] <= 0:
|
if stats[TOTAL_REQ_CNT] <= 0:
|
||||||
return ""
|
return ""
|
||||||
data_fmt = "{:<32} {:>10} {:>12} {:>12} {:>12} {:>9.4f}¥"
|
data_fmt = "{:<32} {:>10} {:>12} {:>12} {:>12} {:>9.4f}¥ {:>10} {:>10}"
|
||||||
|
|
||||||
output = [
|
output = [
|
||||||
"按模型分类统计:",
|
" 模型名称 调用次数 输入Token 输出Token Token总量 累计花费 平均耗时(秒) 标准差(秒)",
|
||||||
" 模型名称 调用次数 输入Token 输出Token Token总量 累计花费",
|
|
||||||
]
|
]
|
||||||
for model_name, count in sorted(stats[REQ_CNT_BY_MODEL].items()):
|
for model_name, count in sorted(stats[REQ_CNT_BY_MODEL].items()):
|
||||||
name = f"{model_name[:29]}..." if len(model_name) > 32 else model_name
|
name = f"{model_name[:29]}..." if len(model_name) > 32 else model_name
|
||||||
@@ -638,7 +694,9 @@ class StatisticOutputTask(AsyncTask):
|
|||||||
out_tokens = stats[OUT_TOK_BY_MODEL][model_name]
|
out_tokens = stats[OUT_TOK_BY_MODEL][model_name]
|
||||||
tokens = stats[TOTAL_TOK_BY_MODEL][model_name]
|
tokens = stats[TOTAL_TOK_BY_MODEL][model_name]
|
||||||
cost = stats[COST_BY_MODEL][model_name]
|
cost = stats[COST_BY_MODEL][model_name]
|
||||||
output.append(data_fmt.format(name, count, in_tokens, out_tokens, tokens, cost))
|
avg_time_cost = stats[AVG_TIME_COST_BY_MODEL][model_name]
|
||||||
|
std_time_cost = stats[STD_TIME_COST_BY_MODEL][model_name]
|
||||||
|
output.append(data_fmt.format(name, count, in_tokens, out_tokens, tokens, cost, avg_time_cost, std_time_cost))
|
||||||
|
|
||||||
output.append("")
|
output.append("")
|
||||||
return "\n".join(output)
|
return "\n".join(output)
|
||||||
@@ -723,6 +781,8 @@ class StatisticOutputTask(AsyncTask):
|
|||||||
f"<td>{stat_data[OUT_TOK_BY_MODEL][model_name]}</td>"
|
f"<td>{stat_data[OUT_TOK_BY_MODEL][model_name]}</td>"
|
||||||
f"<td>{stat_data[TOTAL_TOK_BY_MODEL][model_name]}</td>"
|
f"<td>{stat_data[TOTAL_TOK_BY_MODEL][model_name]}</td>"
|
||||||
f"<td>{stat_data[COST_BY_MODEL][model_name]:.4f} ¥</td>"
|
f"<td>{stat_data[COST_BY_MODEL][model_name]:.4f} ¥</td>"
|
||||||
|
f"<td>{stat_data[AVG_TIME_COST_BY_MODEL][model_name]:.3f} 秒</td>"
|
||||||
|
f"<td>{stat_data[STD_TIME_COST_BY_MODEL][model_name]:.3f} 秒</td>"
|
||||||
f"</tr>"
|
f"</tr>"
|
||||||
for model_name, count in sorted(stat_data[REQ_CNT_BY_MODEL].items())
|
for model_name, count in sorted(stat_data[REQ_CNT_BY_MODEL].items())
|
||||||
]
|
]
|
||||||
@@ -737,6 +797,8 @@ class StatisticOutputTask(AsyncTask):
|
|||||||
f"<td>{stat_data[OUT_TOK_BY_TYPE][req_type]}</td>"
|
f"<td>{stat_data[OUT_TOK_BY_TYPE][req_type]}</td>"
|
||||||
f"<td>{stat_data[TOTAL_TOK_BY_TYPE][req_type]}</td>"
|
f"<td>{stat_data[TOTAL_TOK_BY_TYPE][req_type]}</td>"
|
||||||
f"<td>{stat_data[COST_BY_TYPE][req_type]:.4f} ¥</td>"
|
f"<td>{stat_data[COST_BY_TYPE][req_type]:.4f} ¥</td>"
|
||||||
|
f"<td>{stat_data[AVG_TIME_COST_BY_TYPE][req_type]:.3f} 秒</td>"
|
||||||
|
f"<td>{stat_data[STD_TIME_COST_BY_TYPE][req_type]:.3f} 秒</td>"
|
||||||
f"</tr>"
|
f"</tr>"
|
||||||
for req_type, count in sorted(stat_data[REQ_CNT_BY_TYPE].items())
|
for req_type, count in sorted(stat_data[REQ_CNT_BY_TYPE].items())
|
||||||
]
|
]
|
||||||
@@ -751,6 +813,8 @@ class StatisticOutputTask(AsyncTask):
|
|||||||
f"<td>{stat_data[OUT_TOK_BY_MODULE][module_name]}</td>"
|
f"<td>{stat_data[OUT_TOK_BY_MODULE][module_name]}</td>"
|
||||||
f"<td>{stat_data[TOTAL_TOK_BY_MODULE][module_name]}</td>"
|
f"<td>{stat_data[TOTAL_TOK_BY_MODULE][module_name]}</td>"
|
||||||
f"<td>{stat_data[COST_BY_MODULE][module_name]:.4f} ¥</td>"
|
f"<td>{stat_data[COST_BY_MODULE][module_name]:.4f} ¥</td>"
|
||||||
|
f"<td>{stat_data[AVG_TIME_COST_BY_MODULE][module_name]:.3f} 秒</td>"
|
||||||
|
f"<td>{stat_data[STD_TIME_COST_BY_MODULE][module_name]:.3f} 秒</td>"
|
||||||
f"</tr>"
|
f"</tr>"
|
||||||
for module_name, count in sorted(stat_data[REQ_CNT_BY_MODULE].items())
|
for module_name, count in sorted(stat_data[REQ_CNT_BY_MODULE].items())
|
||||||
]
|
]
|
||||||
@@ -777,7 +841,7 @@ class StatisticOutputTask(AsyncTask):
|
|||||||
|
|
||||||
<h2>按模型分类统计</h2>
|
<h2>按模型分类统计</h2>
|
||||||
<table>
|
<table>
|
||||||
<thead><tr><th>模型名称</th><th>调用次数</th><th>输入Token</th><th>输出Token</th><th>Token总量</th><th>累计花费</th></tr></thead>
|
<tr><th>模块名称</th><th>调用次数</th><th>输入Token</th><th>输出Token</th><th>Token总量</th><th>累计花费</th><th>平均耗时(秒)</th><th>标准差(秒)</th></tr>
|
||||||
<tbody>
|
<tbody>
|
||||||
{model_rows}
|
{model_rows}
|
||||||
</tbody>
|
</tbody>
|
||||||
@@ -786,7 +850,7 @@ class StatisticOutputTask(AsyncTask):
|
|||||||
<h2>按模块分类统计</h2>
|
<h2>按模块分类统计</h2>
|
||||||
<table>
|
<table>
|
||||||
<thead>
|
<thead>
|
||||||
<tr><th>模块名称</th><th>调用次数</th><th>输入Token</th><th>输出Token</th><th>Token总量</th><th>累计花费</th></tr>
|
<tr><th>模块名称</th><th>调用次数</th><th>输入Token</th><th>输出Token</th><th>Token总量</th><th>累计花费</th><th>平均耗时(秒)</th><th>标准差(秒)</th></tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
{module_rows}
|
{module_rows}
|
||||||
@@ -796,7 +860,7 @@ class StatisticOutputTask(AsyncTask):
|
|||||||
<h2>按请求类型分类统计</h2>
|
<h2>按请求类型分类统计</h2>
|
||||||
<table>
|
<table>
|
||||||
<thead>
|
<thead>
|
||||||
<tr><th>请求类型</th><th>调用次数</th><th>输入Token</th><th>输出Token</th><th>Token总量</th><th>累计花费</th></tr>
|
<tr><th>请求类型</th><th>调用次数</th><th>输入Token</th><th>输出Token</th><th>Token总量</th><th>累计花费</th><th>平均耗时(秒)</th><th>标准差(秒)</th></tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
{type_rows}
|
{type_rows}
|
||||||
|
|||||||
@@ -29,6 +29,8 @@ def get_string_field(max_length=255, **kwargs):
|
|||||||
return String(max_length, **kwargs)
|
return String(max_length, **kwargs)
|
||||||
else:
|
else:
|
||||||
return Text(**kwargs)
|
return Text(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class SessionProxy:
|
class SessionProxy:
|
||||||
"""线程安全的Session代理类,自动管理session生命周期"""
|
"""线程安全的Session代理类,自动管理session生命周期"""
|
||||||
@@ -155,11 +157,14 @@ class LLMUsage(Base):
|
|||||||
|
|
||||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||||
model_name = Column(get_string_field(100), nullable=False, index=True)
|
model_name = Column(get_string_field(100), nullable=False, index=True)
|
||||||
|
model_assign_name = Column(get_string_field(100), index=True) # 添加索引
|
||||||
|
model_api_provider = Column(get_string_field(100), index=True) # 添加索引
|
||||||
user_id = Column(get_string_field(50), nullable=False, index=True)
|
user_id = Column(get_string_field(50), nullable=False, index=True)
|
||||||
request_type = Column(get_string_field(50), nullable=False, index=True)
|
request_type = Column(get_string_field(50), nullable=False, index=True)
|
||||||
endpoint = Column(Text, nullable=False)
|
endpoint = Column(Text, nullable=False)
|
||||||
prompt_tokens = Column(Integer, nullable=False)
|
prompt_tokens = Column(Integer, nullable=False)
|
||||||
completion_tokens = Column(Integer, nullable=False)
|
completion_tokens = Column(Integer, nullable=False)
|
||||||
|
time_cost = Column(Float, nullable=True)
|
||||||
total_tokens = Column(Integer, nullable=False)
|
total_tokens = Column(Integer, nullable=False)
|
||||||
cost = Column(Float, nullable=False)
|
cost = Column(Float, nullable=False)
|
||||||
status = Column(Text, nullable=False)
|
status = Column(Text, nullable=False)
|
||||||
@@ -167,6 +172,9 @@ class LLMUsage(Base):
|
|||||||
|
|
||||||
__table_args__ = (
|
__table_args__ = (
|
||||||
Index('idx_llmusage_model_name', 'model_name'),
|
Index('idx_llmusage_model_name', 'model_name'),
|
||||||
|
Index('idx_llmusage_model_assign_name', 'model_assign_name'),
|
||||||
|
Index('idx_llmusage_model_api_provider', 'model_api_provider'),
|
||||||
|
Index('idx_llmusage_time_cost', 'time_cost'),
|
||||||
Index('idx_llmusage_user_id', 'user_id'),
|
Index('idx_llmusage_user_id', 'user_id'),
|
||||||
Index('idx_llmusage_request_type', 'request_type'),
|
Index('idx_llmusage_request_type', 'request_type'),
|
||||||
Index('idx_llmusage_timestamp', 'timestamp'),
|
Index('idx_llmusage_timestamp', 'timestamp'),
|
||||||
|
|||||||
@@ -147,7 +147,7 @@ class LLMUsageRecorder:
|
|||||||
|
|
||||||
|
|
||||||
def record_usage_to_database(
|
def record_usage_to_database(
|
||||||
self, model_info: ModelInfo, model_usage: UsageRecord, user_id: str, request_type: str, endpoint: str
|
self, model_info: ModelInfo, model_usage: UsageRecord, user_id: str, request_type: str, endpoint: str, time_cost: float = 0.0
|
||||||
):
|
):
|
||||||
input_cost = (model_usage.prompt_tokens / 1000000) * model_info.price_in
|
input_cost = (model_usage.prompt_tokens / 1000000) * model_info.price_in
|
||||||
output_cost = (model_usage.completion_tokens / 1000000) * model_info.price_out
|
output_cost = (model_usage.completion_tokens / 1000000) * model_info.price_out
|
||||||
@@ -160,6 +160,8 @@ class LLMUsageRecorder:
|
|||||||
|
|
||||||
usage_record = LLMUsage(
|
usage_record = LLMUsage(
|
||||||
model_name=model_info.model_identifier,
|
model_name=model_info.model_identifier,
|
||||||
|
model_assign_name=model_info.name,
|
||||||
|
model_api_provider=model_info.api_provider,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
request_type=request_type,
|
request_type=request_type,
|
||||||
endpoint=endpoint,
|
endpoint=endpoint,
|
||||||
@@ -167,6 +169,7 @@ class LLMUsageRecorder:
|
|||||||
completion_tokens=model_usage.completion_tokens or 0,
|
completion_tokens=model_usage.completion_tokens or 0,
|
||||||
total_tokens=model_usage.total_tokens or 0,
|
total_tokens=model_usage.total_tokens or 0,
|
||||||
cost=total_cost or 0.0,
|
cost=total_cost or 0.0,
|
||||||
|
time_cost = round(time_cost or 0.0, 3),
|
||||||
status="success",
|
status="success",
|
||||||
timestamp=datetime.now(), # SQLAlchemy 会处理 DateTime 字段
|
timestamp=datetime.now(), # SQLAlchemy 会处理 DateTime 字段
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -115,6 +115,7 @@ class LLMRequest:
|
|||||||
normalized_format = _normalize_image_format(image_format)
|
normalized_format = _normalize_image_format(image_format)
|
||||||
|
|
||||||
# 模型选择
|
# 模型选择
|
||||||
|
start_time = time.time()
|
||||||
model_info, api_provider, client = self._select_model()
|
model_info, api_provider, client = self._select_model()
|
||||||
|
|
||||||
# 请求体构建
|
# 请求体构建
|
||||||
@@ -147,6 +148,7 @@ class LLMRequest:
|
|||||||
model_info=model_info,
|
model_info=model_info,
|
||||||
model_usage=usage,
|
model_usage=usage,
|
||||||
user_id="system",
|
user_id="system",
|
||||||
|
time_cost=time.time() - start_time,
|
||||||
request_type=self.request_type,
|
request_type=self.request_type,
|
||||||
endpoint="/chat/completions",
|
endpoint="/chat/completions",
|
||||||
)
|
)
|
||||||
@@ -240,6 +242,7 @@ class LLMRequest:
|
|||||||
) -> Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]:
|
) -> Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]:
|
||||||
"""执行单次请求"""
|
"""执行单次请求"""
|
||||||
# 模型选择和请求准备
|
# 模型选择和请求准备
|
||||||
|
start_time = time.time()
|
||||||
model_info, api_provider, client = self._select_model()
|
model_info, api_provider, client = self._select_model()
|
||||||
processed_prompt = self._apply_content_obfuscation(prompt, api_provider)
|
processed_prompt = self._apply_content_obfuscation(prompt, api_provider)
|
||||||
|
|
||||||
@@ -293,6 +296,7 @@ class LLMRequest:
|
|||||||
llm_usage_recorder.record_usage_to_database(
|
llm_usage_recorder.record_usage_to_database(
|
||||||
model_info=model_info,
|
model_info=model_info,
|
||||||
model_usage=usage,
|
model_usage=usage,
|
||||||
|
time_cost=time.time() - start_time,
|
||||||
user_id="system",
|
user_id="system",
|
||||||
request_type=self.request_type,
|
request_type=self.request_type,
|
||||||
endpoint="/chat/completions",
|
endpoint="/chat/completions",
|
||||||
@@ -331,6 +335,7 @@ class LLMRequest:
|
|||||||
(Tuple[List[float], str]): (嵌入向量,使用的模型名称)
|
(Tuple[List[float], str]): (嵌入向量,使用的模型名称)
|
||||||
"""
|
"""
|
||||||
# 无需构建消息体,直接使用输入文本
|
# 无需构建消息体,直接使用输入文本
|
||||||
|
start_time = time.time()
|
||||||
model_info, api_provider, client = self._select_model()
|
model_info, api_provider, client = self._select_model()
|
||||||
|
|
||||||
# 请求并处理返回值
|
# 请求并处理返回值
|
||||||
@@ -347,6 +352,7 @@ class LLMRequest:
|
|||||||
if usage := response.usage:
|
if usage := response.usage:
|
||||||
llm_usage_recorder.record_usage_to_database(
|
llm_usage_recorder.record_usage_to_database(
|
||||||
model_info=model_info,
|
model_info=model_info,
|
||||||
|
time_cost=time.time() - start_time,
|
||||||
model_usage=usage,
|
model_usage=usage,
|
||||||
user_id="system",
|
user_id="system",
|
||||||
request_type=self.request_type,
|
request_type=self.request_type,
|
||||||
|
|||||||
Reference in New Issue
Block a user