diff --git a/.gitignore b/.gitignore index 9e1b96811..e8a931078 100644 --- a/.gitignore +++ b/.gitignore @@ -43,7 +43,7 @@ src/plugins/utils/statistic.py __pycache__/ *.py[cod] *$py.class -llm_statistics.txt +maibot_statistics.html mongodb napcat run_dev.bat diff --git a/src/plugins/utils/statistic.py b/src/plugins/utils/statistic.py index 66bdf279f..4c11ba3d8 100644 --- a/src/plugins/utils/statistic.py +++ b/src/plugins/utils/statistic.py @@ -96,12 +96,37 @@ class OnlineTimeRecordTask(AsyncTask): logger.exception("在线时间记录失败") +def _format_online_time(online_seconds: int) -> str: + """ + 格式化在线时间 + :param online_seconds: 在线时间(秒) + :return: 格式化后的在线时间字符串 + """ + total_oneline_time = timedelta(seconds=online_seconds) + + days = total_oneline_time.days + hours = total_oneline_time.seconds // 3600 + minutes = (total_oneline_time.seconds // 60) % 60 + seconds = total_oneline_time.seconds % 60 + if days > 0: + # 如果在线时间超过1天,则格式化为“X天X小时X分钟” + total_oneline_time_str = f"{total_oneline_time.days}天{hours}小时{minutes}分钟{seconds}秒" + elif hours > 0: + # 如果在线时间超过1小时,则格式化为“X小时X分钟X秒” + total_oneline_time_str = f"{hours}小时{minutes}分钟{seconds}秒" + else: + # 其他情况格式化为“X分钟X秒” + total_oneline_time_str = f"{minutes}分钟{seconds}秒" + + return total_oneline_time_str + + class StatisticOutputTask(AsyncTask): """统计输出任务""" SEP_LINE = "-" * 84 - def __init__(self, record_file_path: str = "llm_statistics.txt"): + def __init__(self, record_file_path: str = "maibot_statistics.html"): # 延迟300秒启动,运行间隔300秒 super().__init__(task_name="Statistics Data Output Task", wait_before_start=0, run_interval=300) @@ -126,10 +151,10 @@ class StatisticOutputTask(AsyncTask): local_storage["deploy_time"] = now.timestamp() self.stat_period: List[Tuple[str, timedelta, str]] = [ - ("all_time", now - deploy_time, "自部署以来的"), - ("last_7_days", timedelta(days=7), "最近7天的"), - ("last_24_hours", timedelta(days=1), "最近24小时的"), - ("last_hour", timedelta(hours=1), "最近1小时的"), + ("all_time", now - deploy_time, "自部署以来"), # 必须保留“all_time” + ("last_7_days", timedelta(days=7), "最近7天"), + ("last_24_hours", timedelta(days=1), "最近24小时"), + ("last_hour", timedelta(hours=1), "最近1小时"), ] """ 统计时间段 [(统计名称, 统计时间段, 统计描述), ...] @@ -158,52 +183,6 @@ class StatisticOutputTask(AsyncTask): logger.info("\n" + "\n".join(output)) - def _statistic_file_output(self, stats: Dict[str, Any], now: datetime): - """ - 输出统计数据到文件 - """ - output = [f"MaiBot运行统计报告 (统计截止时间:{now.strftime('%Y-%m-%d %H:%M:%S')})", ""] - - def _format_stat_data(title: str, stats_: Dict[str, Any]) -> str: - """ - 格式化统计数据 - """ - return "\n".join( - [ - self.SEP_LINE, - f" {title}", - self.SEP_LINE, - self._format_total_stat(stats_), - "", - self._format_model_classified_stat(stats_), - "", - self._format_req_type_classified_stat(stats_), - "", - self._format_user_classified_stat(stats_), - "", - self._format_chat_stat(stats_), - ] - ) - - for period_key, period_interval, period_desc in self.stat_period: - if period_key in stats: - start_time = ( - datetime.fromtimestamp(local_storage["deploy_time"]) - if period_key == "all_time" - else now - period_interval - ) - # 统计数据存在 - output.append( - _format_stat_data( - f"{period_desc}统计数据 " - f"(统计时段:{start_time.strftime('%Y-%m-%d %H:%M:%S')} ~ {now.strftime('%Y-%m-%d %H:%M:%S')})", - stats[period_key], - ) - ) - - with open(self.record_file_path, "w", encoding="utf-8") as f: - f.write("\n\n".join(output)) - async def run(self): try: now = datetime.now() @@ -212,8 +191,8 @@ class StatisticOutputTask(AsyncTask): # 输出统计数据到控制台 self._statistic_console_output(stats, now) - # 输出统计数据到文件 - self._statistic_file_output(stats, now) + # 输出统计数据到html文件 + self._generate_html_report(stats, now) except Exception as e: logger.exception(f"输出统计数据过程中发生异常,错误信息:{e}") @@ -340,10 +319,10 @@ class StatisticOutputTask(AsyncTask): start_timestamp: datetime = record.get("start_timestamp") if start_timestamp < _period_start: # 如果开始时间在查询边界之前,则使用开始时间 - stats[period_key][ONLINE_TIME] += (end_timestamp - _period_start).total_seconds() / 60 + stats[period_key][ONLINE_TIME] += (end_timestamp - _period_start).total_seconds() else: # 否则,使用开始时间 - stats[period_key][ONLINE_TIME] += (end_timestamp - start_timestamp).total_seconds() / 60 + stats[period_key][ONLINE_TIME] += (end_timestamp - start_timestamp).total_seconds() break # 取消更早时间段的判断 return stats @@ -460,8 +439,9 @@ class StatisticOutputTask(AsyncTask): """ 格式化总统计数据 """ + output = [ - f"总在线时间: {stats[ONLINE_TIME]:.1f}分钟", + f"总在线时间: {_format_online_time(stats[ONLINE_TIME])}", f"总消息数: {stats[TOTAL_MSG_CNT]}", f"总请求数: {stats[TOTAL_REQ_CNT]}", f"总花费: {stats[TOTAL_COST]:.4f}¥", @@ -495,66 +475,6 @@ class StatisticOutputTask(AsyncTask): else: return "" - @staticmethod - def _format_req_type_classified_stat(stats: Dict[str, Any]) -> str: - """ - 格式化按请求类型分类的统计数据 - """ - if stats[TOTAL_REQ_CNT] > 0: - # 按请求类型统计 - data_fmt = "{:<32} {:>10} {:>12} {:>12} {:>12} {:>9.4f}¥" - - output = [ - "按请求类型分类统计:", - " 请求类型 调用次数 输入Token 输出Token Token总量 累计花费", - ] - for req_type, count in sorted(stats[REQ_CNT_BY_TYPE].items()): - name = req_type[:29] + "..." if len(req_type) > 32 else req_type - in_tokens = stats[IN_TOK_BY_TYPE][req_type] - out_tokens = stats[OUT_TOK_BY_TYPE][req_type] - tokens = stats[TOTAL_TOK_BY_TYPE][req_type] - cost = stats[COST_BY_TYPE][req_type] - output.append(data_fmt.format(name, count, in_tokens, out_tokens, tokens, cost)) - - output.append("") - return "\n".join(output) - else: - return "" - - @staticmethod - def _format_user_classified_stat(stats: Dict[str, Any]) -> str: - """ - 格式化按用户分类的统计数据 - """ - if stats[TOTAL_REQ_CNT] > 0: - # 修正用户统计列宽 - data_fmt = "{:<32} {:>10} {:>12} {:>12} {:>12} {:>9.4f}¥" - - output = [ - "按用户分类统计:", - " 用户名称 调用次数 输入Token 输出Token Token总量 累计花费", - ] - for user_id, count in sorted(stats[REQ_CNT_BY_USER].items()): - in_tokens = stats[IN_TOK_BY_USER][user_id] - out_tokens = stats[OUT_TOK_BY_USER][user_id] - tokens = stats[TOTAL_TOK_BY_USER][user_id] - cost = stats[COST_BY_USER][user_id] - output.append( - data_fmt.format( - user_id[:22], # 不再添加省略号,保持原始ID - count, - in_tokens, - out_tokens, - tokens, - cost, - ) - ) - - output.append("") - return "\n".join(output) - else: - return "" - def _format_chat_stat(self, stats: Dict[str, Any]) -> str: """ 格式化聊天统计数据 @@ -568,3 +488,278 @@ class StatisticOutputTask(AsyncTask): return "\n".join(output) else: return "" + + def _generate_html_report(self, stat: dict[str, Any], now: datetime): + """ + 生成HTML格式的统计报告 + :param stat: 统计数据 + :param now: 基准当前时间 + :return: HTML格式的统计报告 + """ + + tab_list = [ + f'' + for period in self.stat_period + ] + + def _format_stat_data(stat_data: dict[str, Any], div_id: str, start_time: datetime) -> str: + """ + 格式化一个时间段的统计数据到html div块 + :param stat_data: 统计数据 + :param div_id: div的ID + :param start_time: 统计时间段开始时间 + """ + # format总在线时间 + + # 生成HTML + return f""" +
+ 统计时段: + {start_time.strftime("%Y-%m-%d %H:%M:%S")} ~ {now.strftime("%Y-%m-%d %H:%M:%S")} +
+总在线时间: {_format_online_time(stat_data[ONLINE_TIME])}
+总消息数: {stat_data[TOTAL_MSG_CNT]}
+总请求数: {stat_data[TOTAL_REQ_CNT]}
+总花费: {stat_data[TOTAL_COST]:.4f} ¥
+ +| 模型名称 | 调用次数 | 输入Token | 输出Token | Token总量 | 累计花费 |
|---|---|---|---|---|---|
| {model_name} | " + f"{count} | " + f"{stat_data[IN_TOK_BY_MODEL][model_name]} | " + f"{stat_data[OUT_TOK_BY_MODEL][model_name]} | " + f"{stat_data[TOTAL_TOK_BY_MODEL][model_name]} | " + f"{stat_data[COST_BY_MODEL][model_name]:.4f} ¥ | " + f"
| 请求类型 | 调用次数 | 输入Token | 输出Token | Token总量 | 累计花费 |
|---|---|---|---|---|---|
| {req_type} | " + f"{count} | " + f"{stat_data[IN_TOK_BY_TYPE][req_type]} | " + f"{stat_data[OUT_TOK_BY_TYPE][req_type]} | " + f"{stat_data[TOTAL_TOK_BY_TYPE][req_type]} | " + f"{stat_data[COST_BY_TYPE][req_type]:.4f} ¥ | " + f"
| 用户名称 | 调用次数 | 输入Token | 输出Token | Token总量 | 累计花费 |
|---|---|---|---|---|---|
| {user_id} | " + f"{count} | " + f"{stat_data[IN_TOK_BY_USER][user_id]} | " + f"{stat_data[OUT_TOK_BY_USER][user_id]} | " + f"{stat_data[TOTAL_TOK_BY_USER][user_id]} | " + f"{stat_data[COST_BY_USER][user_id]:.4f} ¥ | " + f"
| 联系人/群组名称 | 消息数量 |
|---|---|
| {self.name_mapping[chat_id][0]} | {count} |
统计截止时间: {now.strftime("%Y-%m-%d %H:%M:%S")}
+ +