From 6056ba47cc828e1b0ac6210b9793598ba629dbfb Mon Sep 17 00:00:00 2001 From: Oct-autumn Date: Fri, 9 May 2025 15:36:07 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E7=BB=9F=E8=AE=A1=E7=94=9F=E6=88=90HTM?= =?UTF-8?q?L=E6=A0=BC=E5=BC=8F=E7=9A=84=E7=BB=9F=E8=AE=A1=E6=8A=A5?= =?UTF-8?q?=E5=91=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 2 +- src/plugins/utils/statistic.py | 427 ++++++++++++++++++++++++--------- 2 files changed, 312 insertions(+), 117 deletions(-) diff --git a/.gitignore b/.gitignore index 9e1b96811..e8a931078 100644 --- a/.gitignore +++ b/.gitignore @@ -43,7 +43,7 @@ src/plugins/utils/statistic.py __pycache__/ *.py[cod] *$py.class -llm_statistics.txt +maibot_statistics.html mongodb napcat run_dev.bat diff --git a/src/plugins/utils/statistic.py b/src/plugins/utils/statistic.py index 66bdf279f..4c11ba3d8 100644 --- a/src/plugins/utils/statistic.py +++ b/src/plugins/utils/statistic.py @@ -96,12 +96,37 @@ class OnlineTimeRecordTask(AsyncTask): logger.exception("在线时间记录失败") +def _format_online_time(online_seconds: int) -> str: + """ + 格式化在线时间 + :param online_seconds: 在线时间(秒) + :return: 格式化后的在线时间字符串 + """ + total_oneline_time = timedelta(seconds=online_seconds) + + days = total_oneline_time.days + hours = total_oneline_time.seconds // 3600 + minutes = (total_oneline_time.seconds // 60) % 60 + seconds = total_oneline_time.seconds % 60 + if days > 0: + # 如果在线时间超过1天,则格式化为“X天X小时X分钟” + total_oneline_time_str = f"{total_oneline_time.days}天{hours}小时{minutes}分钟{seconds}秒" + elif hours > 0: + # 如果在线时间超过1小时,则格式化为“X小时X分钟X秒” + total_oneline_time_str = f"{hours}小时{minutes}分钟{seconds}秒" + else: + # 其他情况格式化为“X分钟X秒” + total_oneline_time_str = f"{minutes}分钟{seconds}秒" + + return total_oneline_time_str + + class StatisticOutputTask(AsyncTask): """统计输出任务""" SEP_LINE = "-" * 84 - def __init__(self, record_file_path: str = "llm_statistics.txt"): + def __init__(self, record_file_path: str = "maibot_statistics.html"): # 延迟300秒启动,运行间隔300秒 super().__init__(task_name="Statistics Data Output Task", wait_before_start=0, run_interval=300) @@ -126,10 +151,10 @@ class StatisticOutputTask(AsyncTask): local_storage["deploy_time"] = now.timestamp() self.stat_period: List[Tuple[str, timedelta, str]] = [ - ("all_time", now - deploy_time, "自部署以来的"), - ("last_7_days", timedelta(days=7), "最近7天的"), - ("last_24_hours", timedelta(days=1), "最近24小时的"), - ("last_hour", timedelta(hours=1), "最近1小时的"), + ("all_time", now - deploy_time, "自部署以来"), # 必须保留“all_time” + ("last_7_days", timedelta(days=7), "最近7天"), + ("last_24_hours", timedelta(days=1), "最近24小时"), + ("last_hour", timedelta(hours=1), "最近1小时"), ] """ 统计时间段 [(统计名称, 统计时间段, 统计描述), ...] @@ -158,52 +183,6 @@ class StatisticOutputTask(AsyncTask): logger.info("\n" + "\n".join(output)) - def _statistic_file_output(self, stats: Dict[str, Any], now: datetime): - """ - 输出统计数据到文件 - """ - output = [f"MaiBot运行统计报告 (统计截止时间:{now.strftime('%Y-%m-%d %H:%M:%S')})", ""] - - def _format_stat_data(title: str, stats_: Dict[str, Any]) -> str: - """ - 格式化统计数据 - """ - return "\n".join( - [ - self.SEP_LINE, - f" {title}", - self.SEP_LINE, - self._format_total_stat(stats_), - "", - self._format_model_classified_stat(stats_), - "", - self._format_req_type_classified_stat(stats_), - "", - self._format_user_classified_stat(stats_), - "", - self._format_chat_stat(stats_), - ] - ) - - for period_key, period_interval, period_desc in self.stat_period: - if period_key in stats: - start_time = ( - datetime.fromtimestamp(local_storage["deploy_time"]) - if period_key == "all_time" - else now - period_interval - ) - # 统计数据存在 - output.append( - _format_stat_data( - f"{period_desc}统计数据 " - f"(统计时段:{start_time.strftime('%Y-%m-%d %H:%M:%S')} ~ {now.strftime('%Y-%m-%d %H:%M:%S')})", - stats[period_key], - ) - ) - - with open(self.record_file_path, "w", encoding="utf-8") as f: - f.write("\n\n".join(output)) - async def run(self): try: now = datetime.now() @@ -212,8 +191,8 @@ class StatisticOutputTask(AsyncTask): # 输出统计数据到控制台 self._statistic_console_output(stats, now) - # 输出统计数据到文件 - self._statistic_file_output(stats, now) + # 输出统计数据到html文件 + self._generate_html_report(stats, now) except Exception as e: logger.exception(f"输出统计数据过程中发生异常,错误信息:{e}") @@ -340,10 +319,10 @@ class StatisticOutputTask(AsyncTask): start_timestamp: datetime = record.get("start_timestamp") if start_timestamp < _period_start: # 如果开始时间在查询边界之前,则使用开始时间 - stats[period_key][ONLINE_TIME] += (end_timestamp - _period_start).total_seconds() / 60 + stats[period_key][ONLINE_TIME] += (end_timestamp - _period_start).total_seconds() else: # 否则,使用开始时间 - stats[period_key][ONLINE_TIME] += (end_timestamp - start_timestamp).total_seconds() / 60 + stats[period_key][ONLINE_TIME] += (end_timestamp - start_timestamp).total_seconds() break # 取消更早时间段的判断 return stats @@ -460,8 +439,9 @@ class StatisticOutputTask(AsyncTask): """ 格式化总统计数据 """ + output = [ - f"总在线时间: {stats[ONLINE_TIME]:.1f}分钟", + f"总在线时间: {_format_online_time(stats[ONLINE_TIME])}", f"总消息数: {stats[TOTAL_MSG_CNT]}", f"总请求数: {stats[TOTAL_REQ_CNT]}", f"总花费: {stats[TOTAL_COST]:.4f}¥", @@ -495,66 +475,6 @@ class StatisticOutputTask(AsyncTask): else: return "" - @staticmethod - def _format_req_type_classified_stat(stats: Dict[str, Any]) -> str: - """ - 格式化按请求类型分类的统计数据 - """ - if stats[TOTAL_REQ_CNT] > 0: - # 按请求类型统计 - data_fmt = "{:<32} {:>10} {:>12} {:>12} {:>12} {:>9.4f}¥" - - output = [ - "按请求类型分类统计:", - " 请求类型 调用次数 输入Token 输出Token Token总量 累计花费", - ] - for req_type, count in sorted(stats[REQ_CNT_BY_TYPE].items()): - name = req_type[:29] + "..." if len(req_type) > 32 else req_type - in_tokens = stats[IN_TOK_BY_TYPE][req_type] - out_tokens = stats[OUT_TOK_BY_TYPE][req_type] - tokens = stats[TOTAL_TOK_BY_TYPE][req_type] - cost = stats[COST_BY_TYPE][req_type] - output.append(data_fmt.format(name, count, in_tokens, out_tokens, tokens, cost)) - - output.append("") - return "\n".join(output) - else: - return "" - - @staticmethod - def _format_user_classified_stat(stats: Dict[str, Any]) -> str: - """ - 格式化按用户分类的统计数据 - """ - if stats[TOTAL_REQ_CNT] > 0: - # 修正用户统计列宽 - data_fmt = "{:<32} {:>10} {:>12} {:>12} {:>12} {:>9.4f}¥" - - output = [ - "按用户分类统计:", - " 用户名称 调用次数 输入Token 输出Token Token总量 累计花费", - ] - for user_id, count in sorted(stats[REQ_CNT_BY_USER].items()): - in_tokens = stats[IN_TOK_BY_USER][user_id] - out_tokens = stats[OUT_TOK_BY_USER][user_id] - tokens = stats[TOTAL_TOK_BY_USER][user_id] - cost = stats[COST_BY_USER][user_id] - output.append( - data_fmt.format( - user_id[:22], # 不再添加省略号,保持原始ID - count, - in_tokens, - out_tokens, - tokens, - cost, - ) - ) - - output.append("") - return "\n".join(output) - else: - return "" - def _format_chat_stat(self, stats: Dict[str, Any]) -> str: """ 格式化聊天统计数据 @@ -568,3 +488,278 @@ class StatisticOutputTask(AsyncTask): return "\n".join(output) else: return "" + + def _generate_html_report(self, stat: dict[str, Any], now: datetime): + """ + 生成HTML格式的统计报告 + :param stat: 统计数据 + :param now: 基准当前时间 + :return: HTML格式的统计报告 + """ + + tab_list = [ + f'' + for period in self.stat_period + ] + + def _format_stat_data(stat_data: dict[str, Any], div_id: str, start_time: datetime) -> str: + """ + 格式化一个时间段的统计数据到html div块 + :param stat_data: 统计数据 + :param div_id: div的ID + :param start_time: 统计时间段开始时间 + """ + # format总在线时间 + + # 生成HTML + return f""" +
+

+ 统计时段: + {start_time.strftime("%Y-%m-%d %H:%M:%S")} ~ {now.strftime("%Y-%m-%d %H:%M:%S")} +

+

总在线时间: {_format_online_time(stat_data[ONLINE_TIME])}

+

总消息数: {stat_data[TOTAL_MSG_CNT]}

+

总请求数: {stat_data[TOTAL_REQ_CNT]}

+

总花费: {stat_data[TOTAL_COST]:.4f} ¥

+ +

按模型分类统计

+ + + + { + "\n".join( + [ + f"" + f"" + f"" + f"" + f"" + f"" + f"" + f"" + for model_name, count in sorted(stat_data[REQ_CNT_BY_MODEL].items()) + ] + ) + } + +
模型名称调用次数输入Token输出TokenToken总量累计花费
{model_name}{count}{stat_data[IN_TOK_BY_MODEL][model_name]}{stat_data[OUT_TOK_BY_MODEL][model_name]}{stat_data[TOTAL_TOK_BY_MODEL][model_name]}{stat_data[COST_BY_MODEL][model_name]:.4f} ¥
+ +

按请求类型分类统计

+ + + + + + { + "\n".join( + [ + f"" + f"" + f"" + f"" + f"" + f"" + f"" + f"" + for req_type, count in sorted(stat_data[REQ_CNT_BY_TYPE].items()) + ] + ) + } + +
请求类型调用次数输入Token输出TokenToken总量累计花费
{req_type}{count}{stat_data[IN_TOK_BY_TYPE][req_type]}{stat_data[OUT_TOK_BY_TYPE][req_type]}{stat_data[TOTAL_TOK_BY_TYPE][req_type]}{stat_data[COST_BY_TYPE][req_type]:.4f} ¥
+ +

按用户分类统计

+ + + + + + { + "\n".join( + [ + f"" + f"" + f"" + f"" + f"" + f"" + f"" + f"" + for user_id, count in sorted(stat_data[REQ_CNT_BY_USER].items()) + ] + ) + } + +
用户名称调用次数输入Token输出TokenToken总量累计花费
{user_id}{count}{stat_data[IN_TOK_BY_USER][user_id]}{stat_data[OUT_TOK_BY_USER][user_id]}{stat_data[TOTAL_TOK_BY_USER][user_id]}{stat_data[COST_BY_USER][user_id]:.4f} ¥
+ +

聊天消息统计

+ + + + + + { + "\n".join( + [ + f"" + for chat_id, count in sorted(stat_data[MSG_CNT_BY_CHAT].items()) + ] + ) + } + +
联系人/群组名称消息数量
{self.name_mapping[chat_id][0]}{count}
+
+ """ + + tab_content_list = [ + _format_stat_data(stat[period[0]], period[0], now - period[1]) + for period in self.stat_period + if period[0] != "all_time" + ] + + tab_content_list.append( + _format_stat_data(stat["all_time"], "all_time", datetime.fromtimestamp(local_storage["deploy_time"])) + ) + + html_template = ( + """ + + + + + + MaiBot运行统计报告 + + + +""" + + f""" +
+

MaiBot运行统计报告

+

统计截止时间: {now.strftime("%Y-%m-%d %H:%M:%S")}

+ +
+ {"\n".join(tab_list)} +
+ + {"\n".join(tab_content_list)} +
+""" + + """ + + + + """ + ) + + with open(self.record_file_path, "w", encoding="utf-8") as f: + f.write(html_template)