fix(statistic): 增强统计数据处理的健壮性

此前的统计处理逻辑在某些统计项(如模型调用、消息数)在特定周期内未产生数据时,会因为直接访问字典键而引发 `KeyError`,导致统计任务失败。

本次提交通过以下方式解决了该问题:
- 在访问统计字典时,全面使用 `dict.get()` 并提供默认值,避免因键不存在而崩溃。
- 使用 `setdefault` 来确保在计算平均耗时等指标前,相关的数据结构已被初始化。
- 简化并重构了历史总览数据的合并逻辑,使其在处理不完整数据时更加稳健。
This commit is contained in:
minecraft1024a
2025-11-13 19:47:44 +08:00
parent 38ec114c81
commit 718584a7da

View File

@@ -417,7 +417,12 @@ class StatisticOutputTask(AsyncTask):
avg_key = f"AVG_TIME_COST_BY_{items.upper()}" avg_key = f"AVG_TIME_COST_BY_{items.upper()}"
std_key = f"STD_TIME_COST_BY_{items.upper()}" std_key = f"STD_TIME_COST_BY_{items.upper()}"
for item_name in period_stats[category_key]: # Ensure the stat dicts exist before trying to access them, making the process more robust.
period_stats.setdefault(time_cost_key, defaultdict(list))
period_stats.setdefault(avg_key, defaultdict(float))
period_stats.setdefault(std_key, defaultdict(float))
for item_name in period_stats.get(category_key, {}):
time_costs = period_stats[time_cost_key].get(item_name, []) time_costs = period_stats[time_cost_key].get(item_name, [])
if time_costs: if time_costs:
avg_time = sum(time_costs) / len(time_costs) avg_time = sum(time_costs) / len(time_costs)
@@ -614,37 +619,31 @@ class StatisticOutputTask(AsyncTask):
# 统计数据合并 # 统计数据合并
# 合并三类统计数据 # 合并三类统计数据
for period_key, _ in stat_start_timestamp: for period_key, _ in stat_start_timestamp:
stat[period_key].update(model_req_stat[period_key]) stat[period_key].update(model_req_stat.get(period_key, {}))
stat[period_key].update(online_time_stat[period_key]) stat[period_key].update(online_time_stat.get(period_key, {}))
stat[period_key].update(message_count_stat[period_key]) stat[period_key].update(message_count_stat.get(period_key, {}))
if last_all_time_stat: if last_all_time_stat:
# 若存在上次完整统计数据,则将其与当前统计数据合并 # 若存在上次完整统计数据,则将其与当前统计数据合并
for key, val in last_all_time_stat.items(): for key, val in last_all_time_stat.items():
# 确保当前统计数据中存在该key # If a key from old stats is not in the current period's stats, it means no new data was generated.
# In this case, we carry over the old data.
if key not in stat["all_time"]: if key not in stat["all_time"]:
stat["all_time"][key] = val
continue continue
# If the key exists in both, we merge.
if isinstance(val, dict): if isinstance(val, dict):
# 是字典类型,则进行合并 # It's a dictionary-like object (e.g., COST_BY_MODEL, TIME_COST_BY_TYPE)
current_dict = stat["all_time"][key]
for sub_key, sub_val in val.items(): for sub_key, sub_val in val.items():
# 普通的数值或字典合并 if sub_key in current_dict:
if sub_key in stat["all_time"][key]: # For lists (like TIME_COST), this extends. For numbers, this adds.
# 检查是否为嵌套的字典类型(如版本统计) current_dict[sub_key] += sub_val
if isinstance(sub_val, dict) and isinstance(stat["all_time"][key][sub_key], dict):
# 合并嵌套字典
for nested_key, nested_val in sub_val.items():
if nested_key in stat["all_time"][key][sub_key]:
stat["all_time"][key][sub_key][nested_key] += nested_val
else:
stat["all_time"][key][sub_key][nested_key] = nested_val
else:
# 普通数值累加
stat["all_time"][key][sub_key] += sub_val
else: else:
stat["all_time"][key][sub_key] = sub_val current_dict[sub_key] = sub_val
else: else:
# 直接合并 # It's a simple value (e.g., TOTAL_COST)
stat["all_time"][key] += val stat["all_time"][key] += val
# 更新上次完整统计数据的时间戳 # 更新上次完整统计数据的时间戳
@@ -686,10 +685,10 @@ class StatisticOutputTask(AsyncTask):
""" """
output = [ output = [
f"总在线时间: {_format_online_time(stats[ONLINE_TIME])}", f"总在线时间: {_format_online_time(stats.get(ONLINE_TIME, 0))}",
f"总消息数: {stats[TOTAL_MSG_CNT]}", f"总消息数: {stats.get(TOTAL_MSG_CNT, 0)}",
f"总请求数: {stats[TOTAL_REQ_CNT]}", f"总请求数: {stats.get(TOTAL_REQ_CNT, 0)}",
f"总花费: {stats[TOTAL_COST]:.4f}¥", f"总花费: {stats.get(TOTAL_COST, 0.0):.4f}¥",
"", "",
] ]
@@ -700,21 +699,21 @@ class StatisticOutputTask(AsyncTask):
""" """
格式化按模型分类的统计数据 格式化按模型分类的统计数据
""" """
if stats[TOTAL_REQ_CNT] <= 0: if stats.get(TOTAL_REQ_CNT, 0) <= 0:
return "" return ""
data_fmt = "{:<32} {:>10} {:>12} {:>12} {:>12} {:>9.4f}¥ {:>10} {:>10}" data_fmt = "{:<32} {:>10} {:>12} {:>12} {:>12} {:>9.4f}¥ {:>10} {:>10}"
output = [ output = [
" 模型名称 调用次数 输入Token 输出Token Token总量 累计花费 平均耗时(秒) 标准差(秒)", " 模型名称 调用次数 输入Token 输出Token Token总量 累计花费 平均耗时(秒) 标准差(秒)",
] ]
for model_name, count in sorted(stats[REQ_CNT_BY_MODEL].items()): for model_name, count in sorted(stats.get(REQ_CNT_BY_MODEL, {}).items()):
name = f"{model_name[:29]}..." if len(model_name) > 32 else model_name name = f"{model_name[:29]}..." if len(model_name) > 32 else model_name
in_tokens = stats[IN_TOK_BY_MODEL][model_name] in_tokens = stats.get(IN_TOK_BY_MODEL, {}).get(model_name, 0)
out_tokens = stats[OUT_TOK_BY_MODEL][model_name] out_tokens = stats.get(OUT_TOK_BY_MODEL, {}).get(model_name, 0)
tokens = stats[TOTAL_TOK_BY_MODEL][model_name] tokens = stats.get(TOTAL_TOK_BY_MODEL, {}).get(model_name, 0)
cost = stats[COST_BY_MODEL][model_name] cost = stats.get(COST_BY_MODEL, {}).get(model_name, 0.0)
avg_time_cost = stats[AVG_TIME_COST_BY_MODEL][model_name] avg_time_cost = stats.get(AVG_TIME_COST_BY_MODEL, {}).get(model_name, 0.0)
std_time_cost = stats[STD_TIME_COST_BY_MODEL][model_name] std_time_cost = stats.get(STD_TIME_COST_BY_MODEL, {}).get(model_name, 0.0)
output.append( output.append(
data_fmt.format(name, count, in_tokens, out_tokens, tokens, cost, avg_time_cost, std_time_cost) data_fmt.format(name, count, in_tokens, out_tokens, tokens, cost, avg_time_cost, std_time_cost)
) )
@@ -726,12 +725,12 @@ class StatisticOutputTask(AsyncTask):
""" """
格式化聊天统计数据 格式化聊天统计数据
""" """
if stats[TOTAL_MSG_CNT] <= 0: if stats.get(TOTAL_MSG_CNT, 0) <= 0:
return "" return ""
output = ["聊天消息统计:", " 联系人/群组名称 消息数量"] output = ["聊天消息统计:", " 联系人/群组名称 消息数量"]
output.extend( output.extend(
f"{self.name_mapping.get(chat_id, (chat_id, 0))[0][:32]:<32} {count:>10}" f"{self.name_mapping.get(chat_id, (chat_id, 0))[0][:32]:<32} {count:>10}"
for chat_id, count in sorted(stats[MSG_CNT_BY_CHAT].items()) for chat_id, count in sorted(stats.get(MSG_CNT_BY_CHAT, {}).items())
) )
output.append("") output.append("")
return "\n".join(output) return "\n".join(output)