llm统计记录模型反应时间
This commit is contained in:
@@ -90,6 +90,18 @@ STD_TIME_COST_BY_MODULE = "std_time_costs_by_module"
|
||||
ONLINE_TIME = "online_time"
|
||||
TOTAL_MSG_CNT = "total_messages"
|
||||
MSG_CNT_BY_CHAT = "messages_by_chat"
|
||||
TIME_COST_BY_TYPE = "time_costs_by_type"
|
||||
TIME_COST_BY_USER = "time_costs_by_user"
|
||||
TIME_COST_BY_MODEL = "time_costs_by_model"
|
||||
TIME_COST_BY_MODULE = "time_costs_by_module"
|
||||
AVG_TIME_COST_BY_TYPE = "avg_time_costs_by_type"
|
||||
AVG_TIME_COST_BY_USER = "avg_time_costs_by_user"
|
||||
AVG_TIME_COST_BY_MODEL = "avg_time_costs_by_model"
|
||||
AVG_TIME_COST_BY_MODULE = "avg_time_costs_by_module"
|
||||
STD_TIME_COST_BY_TYPE = "std_time_costs_by_type"
|
||||
STD_TIME_COST_BY_USER = "std_time_costs_by_user"
|
||||
STD_TIME_COST_BY_MODEL = "std_time_costs_by_model"
|
||||
STD_TIME_COST_BY_MODULE = "std_time_costs_by_module"
|
||||
|
||||
|
||||
class OnlineTimeRecordTask(AsyncTask):
|
||||
@@ -428,7 +440,7 @@ class StatisticOutputTask(AsyncTask):
|
||||
stats[period_key][TIME_COST_BY_MODULE][module_name].append(time_cost)
|
||||
break
|
||||
|
||||
# 计算平均耗时和标准差
|
||||
# 计算平均耗时和标准差
|
||||
for period_key in stats:
|
||||
for category in [REQ_CNT_BY_TYPE, REQ_CNT_BY_USER, REQ_CNT_BY_MODEL, REQ_CNT_BY_MODULE]:
|
||||
time_cost_key = f"time_costs_by_{category.split('_')[-1]}"
|
||||
@@ -452,7 +464,6 @@ class StatisticOutputTask(AsyncTask):
|
||||
else:
|
||||
stats[period_key][avg_key][item_name] = 0.0
|
||||
stats[period_key][std_key][item_name] = 0.0
|
||||
|
||||
return stats
|
||||
|
||||
@staticmethod
|
||||
@@ -687,7 +698,6 @@ class StatisticOutputTask(AsyncTask):
|
||||
data_fmt = "{:<32} {:>10} {:>12} {:>12} {:>12} {:>9.4f}¥ {:>10} {:>10}"
|
||||
|
||||
output = [
|
||||
"按模型分类统计:",
|
||||
" 模型名称 调用次数 输入Token 输出Token Token总量 累计花费 平均耗时(秒) 标准差(秒)",
|
||||
]
|
||||
for model_name, count in sorted(stats[REQ_CNT_BY_MODEL].items()):
|
||||
@@ -843,7 +853,7 @@ class StatisticOutputTask(AsyncTask):
|
||||
|
||||
<h2>按模型分类统计</h2>
|
||||
<table>
|
||||
<thead><tr><th>模型名称</th><th>调用次数</th><th>输入Token</th><th>输出Token</th><th>Token总量</th><th>累计花费</th><th>平均耗时(秒)</th><th>标准差(秒)</th></tr></thead>
|
||||
<tr><th>模块名称</th><th>调用次数</th><th>输入Token</th><th>输出Token</th><th>Token总量</th><th>累计花费</th><th>平均耗时(秒)</th><th>标准差(秒)</th></tr>
|
||||
<tbody>
|
||||
{model_rows}
|
||||
</tbody>
|
||||
|
||||
@@ -30,6 +30,8 @@ def get_string_field(max_length=255, **kwargs):
|
||||
else:
|
||||
return Text(**kwargs)
|
||||
|
||||
|
||||
|
||||
class SessionProxy:
|
||||
"""线程安全的Session代理类,自动管理session生命周期"""
|
||||
|
||||
@@ -155,11 +157,14 @@ class LLMUsage(Base):
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
model_name = Column(get_string_field(100), nullable=False, index=True)
|
||||
model_assign_name = Column(get_string_field(100), index=True) # 添加索引
|
||||
model_api_provider = Column(get_string_field(100), index=True) # 添加索引
|
||||
user_id = Column(get_string_field(50), nullable=False, index=True)
|
||||
request_type = Column(get_string_field(50), nullable=False, index=True)
|
||||
endpoint = Column(Text, nullable=False)
|
||||
prompt_tokens = Column(Integer, nullable=False)
|
||||
completion_tokens = Column(Integer, nullable=False)
|
||||
time_cost = Column(Float, nullable=True)
|
||||
total_tokens = Column(Integer, nullable=False)
|
||||
cost = Column(Float, nullable=False)
|
||||
status = Column(Text, nullable=False)
|
||||
@@ -167,6 +172,9 @@ class LLMUsage(Base):
|
||||
|
||||
__table_args__ = (
|
||||
Index('idx_llmusage_model_name', 'model_name'),
|
||||
Index('idx_llmusage_model_assign_name', 'model_assign_name'),
|
||||
Index('idx_llmusage_model_api_provider', 'model_api_provider'),
|
||||
Index('idx_llmusage_time_cost', 'time_cost'),
|
||||
Index('idx_llmusage_user_id', 'user_id'),
|
||||
Index('idx_llmusage_request_type', 'request_type'),
|
||||
Index('idx_llmusage_timestamp', 'timestamp'),
|
||||
|
||||
@@ -148,6 +148,7 @@ class LLMRequest:
|
||||
model_info=model_info,
|
||||
model_usage=usage,
|
||||
user_id="system",
|
||||
time_cost=time.time() - start_time,
|
||||
request_type=self.request_type,
|
||||
endpoint="/chat/completions",
|
||||
time_cost=time.time() - start_time,
|
||||
@@ -242,6 +243,7 @@ class LLMRequest:
|
||||
) -> Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]:
|
||||
"""执行单次请求"""
|
||||
# 模型选择和请求准备
|
||||
start_time = time.time()
|
||||
model_info, api_provider, client = self._select_model()
|
||||
processed_prompt = self._apply_content_obfuscation(prompt, api_provider)
|
||||
|
||||
@@ -295,6 +297,7 @@ class LLMRequest:
|
||||
llm_usage_recorder.record_usage_to_database(
|
||||
model_info=model_info,
|
||||
model_usage=usage,
|
||||
time_cost=time.time() - start_time,
|
||||
user_id="system",
|
||||
request_type=self.request_type,
|
||||
endpoint="/chat/completions",
|
||||
@@ -350,6 +353,7 @@ class LLMRequest:
|
||||
if usage := response.usage:
|
||||
llm_usage_recorder.record_usage_to_database(
|
||||
model_info=model_info,
|
||||
time_cost=time.time() - start_time,
|
||||
model_usage=usage,
|
||||
user_id="system",
|
||||
request_type=self.request_type,
|
||||
|
||||
Reference in New Issue
Block a user