Merge branch 'refactor' of https://github.com/SengokuCola/MaiMBot into refactor

This commit is contained in:
tcmofashi
2025-04-01 03:15:23 +08:00
18 changed files with 388 additions and 213 deletions

View File

@@ -47,6 +47,39 @@ class ChatBot:
if not self._started:
self._started = True
async def _create_thinking_message(self, message, chat, userinfo, messageinfo):
"""创建思考消息
Args:
message: 接收到的消息
chat: 聊天流对象
userinfo: 用户信息对象
messageinfo: 消息信息对象
Returns:
str: thinking_id
"""
bot_user_info = UserInfo(
user_id=global_config.BOT_QQ,
user_nickname=global_config.BOT_NICKNAME,
platform=messageinfo.platform,
)
thinking_time_point = round(time.time(), 2)
thinking_id = "mt" + str(thinking_time_point)
thinking_message = MessageThinking(
message_id=thinking_id,
chat_stream=chat,
bot_user_info=bot_user_info,
reply=message,
thinking_start_time=thinking_time_point,
)
message_manager.add_message(thinking_message)
willing_manager.change_reply_willing_sent(chat)
return thinking_id
async def message_process(self, message_data: str) -> None:
"""处理转化后的统一格式消息
1. 过滤消息
@@ -56,6 +89,8 @@ class ChatBot:
5. 更新关系
6. 更新情绪
"""
timing_results = {} # 用于收集所有计时结果
response_set = None # 初始化response_set变量
message = MessageRecv(message_data)
groupinfo = message.message_info.group_info
@@ -75,10 +110,7 @@ class ChatBot:
# 创建 心流与chat的观察
heartflow.create_subheartflow(chat.stream_id)
timer1 = time.time()
await message.process()
timer2 = time.time()
logger.debug(f"2消息处理时间: {timer2 - timer1}")
# 过滤词/正则表达式过滤
if self._check_ban_words(message.processed_plain_text, chat, userinfo) or self._check_ban_regex(
@@ -94,7 +126,7 @@ class ChatBot:
message.processed_plain_text, fast_retrieval=True
)
timer2 = time.time()
logger.debug(f"3记忆激活时间: {timer2 - timer1}")
timing_results["记忆激活"] = timer2 - timer1
is_mentioned = is_mentioned_bot_in_message(message)
@@ -118,7 +150,7 @@ class ChatBot:
sender_id=str(message.message_info.user_info.user_id),
)
timer2 = time.time()
logger.debug(f"4计算意愿激活时间: {timer2 - timer1}")
timing_results["意愿激活"] = timer2 - timer1
# 神秘的消息流数据结构处理
if chat.group_info:
@@ -138,12 +170,30 @@ class ChatBot:
if "maimcore_reply_probability_gain" in message.message_info.additional_config.keys():
reply_probability += message.message_info.additional_config["maimcore_reply_probability_gain"]
do_reply = False
# 开始组织语言
if random() < reply_probability:
do_reply = True
timer1 = time.time()
response_set, thinking_id = await self._generate_response_from_message(message, chat, userinfo, messageinfo)
thinking_id = await self._create_thinking_message(message, chat, userinfo, messageinfo)
timer2 = time.time()
logger.info(f"5生成回复时间: {timer2 - timer1}")
timing_results["创建思考消息"] = timer2 - timer1
timer1 = time.time()
await heartflow.get_subheartflow(chat.stream_id).do_observe()
timer2 = time.time()
timing_results["观察"] = timer2 - timer1
timer1 = time.time()
await heartflow.get_subheartflow(chat.stream_id).do_thinking_before_reply(message.processed_plain_text)
timer2 = time.time()
timing_results["思考前脑内状态"] = timer2 - timer1
timer1 = time.time()
response_set = await self.gpt.generate_response(message)
timer2 = time.time()
timing_results["生成回复"] = timer2 - timer1
if not response_set:
logger.info("为什么生成回复失败?")
@@ -153,56 +203,25 @@ class ChatBot:
timer1 = time.time()
await self._send_response_messages(message, chat, response_set, thinking_id)
timer2 = time.time()
logger.info(f"7发送消息时间: {timer2 - timer1}")
timing_results["发送消息"] = timer2 - timer1
# 处理表情包
timer1 = time.time()
await self._handle_emoji(message, chat, response_set)
timer2 = time.time()
logger.debug(f"8处理表情包时间: {timer2 - timer1}")
timing_results["处理表情包"] = timer2 - timer1
timer1 = time.time()
await self._update_using_response(message, response_set)
timer2 = time.time()
logger.info(f"6更新htfl时间: {timer2 - timer1}")
timing_results["更新心流"] = timer2 - timer1
# 更新情绪和关系
# await self._update_emotion_and_relationship(message, chat, response_set)
async def _generate_response_from_message(self, message, chat, userinfo, messageinfo):
"""生成回复内容
Args:
message: 接收到的消息
chat: 聊天流对象
userinfo: 用户信息对象
messageinfo: 消息信息对象
Returns:
tuple: (response, raw_content) 回复内容和原始内容
"""
bot_user_info = UserInfo(
user_id=global_config.BOT_QQ,
user_nickname=global_config.BOT_NICKNAME,
platform=messageinfo.platform,
)
thinking_time_point = round(time.time(), 2)
thinking_id = "mt" + str(thinking_time_point)
thinking_message = MessageThinking(
message_id=thinking_id,
chat_stream=chat,
bot_user_info=bot_user_info,
reply=message,
thinking_start_time=thinking_time_point,
)
message_manager.add_message(thinking_message)
willing_manager.change_reply_willing_sent(chat)
response_set = await self.gpt.generate_response(message)
return response_set, thinking_id
# 在最后统一输出所有计时结果
if do_reply:
timing_str = " | ".join([f"{step}: {duration:.2f}" for step, duration in timing_results.items()])
trigger_msg = message.processed_plain_text
response_msg = " ".join(response_set) if response_set else "无回复"
logger.info(f"触发消息: {trigger_msg[:20]}... | 生成消息: {response_msg[:20]}... | 性能计时: {timing_str}")
async def _update_using_response(self, message, response_set):
# 更新心流状态
@@ -213,7 +232,7 @@ class ChatBot:
stream_id, limit=global_config.MAX_CONTEXT_SIZE, combine=True
)
await heartflow.get_subheartflow(stream_id).do_after_reply(response_set, chat_talking_prompt)
await heartflow.get_subheartflow(stream_id).do_thinking_after_reply(response_set, chat_talking_prompt)
async def _send_response_messages(self, message, chat, response_set, thinking_id):
container = message_manager.get_container(chat.stream_id)

View File

@@ -1,4 +1,3 @@
import random
import time
from typing import List, Optional, Tuple, Union
@@ -30,7 +29,7 @@ class ResponseGenerator:
request_type="response",
)
self.model_normal = LLM_request(
model=global_config.llm_normal, temperature=0.7, max_tokens=3000, request_type="response"
model=global_config.llm_normal, temperature=0.8, max_tokens=256, request_type="response"
)
self.model_sum = LLM_request(
@@ -42,20 +41,26 @@ class ResponseGenerator:
async def generate_response(self, message: MessageThinking) -> Optional[Union[str, List[str]]]:
"""根据当前模型类型选择对应的生成函数"""
# 从global_config中获取模型概率值并选择模型
if random.random() < global_config.MODEL_R1_PROBABILITY:
self.current_model_type = "深深地"
current_model = self.model_reasoning
else:
self.current_model_type = "浅浅的"
current_model = self.model_normal
# if random.random() < global_config.MODEL_R1_PROBABILITY:
# self.current_model_type = "深深地"
# current_model = self.model_reasoning
# else:
# self.current_model_type = "浅浅的"
# current_model = self.model_normal
# logger.info(
# f"{self.current_model_type}思考:{message.processed_plain_text[:30] + '...' if len(message.processed_plain_text) > 30 else message.processed_plain_text}"
# ) # noqa: E501
logger.info(
f"{self.current_model_type}思考:{message.processed_plain_text[:30] + '...' if len(message.processed_plain_text) > 30 else message.processed_plain_text}"
) # noqa: E501
f"思考:{message.processed_plain_text[:30] + '...' if len(message.processed_plain_text) > 30 else message.processed_plain_text}"
)
current_model = self.model_normal
model_response = await self._generate_response_with_model(message, current_model)
print(f"raw_content: {model_response}")
# print(f"raw_content: {model_response}")
if model_response:
logger.info(f"{global_config.BOT_NICKNAME}的回复是:{model_response}")
@@ -126,8 +131,6 @@ class ResponseGenerator:
"user": sender_name,
"message": message.processed_plain_text,
"model": self.current_model_name,
# 'reasoning_check': reasoning_content_check,
# 'response_check': content_check,
"reasoning": reasoning_content,
"response": content,
"prompt": prompt,

View File

@@ -188,11 +188,11 @@ class MessageManager:
# print(message_earliest.is_head)
# print(message_earliest.update_thinking_time())
# print(message_earliest.is_private_message())
# thinking_time = message_earliest.update_thinking_time()
# print(thinking_time)
thinking_time = message_earliest.update_thinking_time()
print(thinking_time)
if (
message_earliest.is_head
and message_earliest.update_thinking_time() > 50
and message_earliest.update_thinking_time() > 18
and not message_earliest.is_private_message() # 避免在私聊时插入reply
):
logger.debug(f"设置回复消息{message_earliest.processed_plain_text}")
@@ -215,11 +215,11 @@ class MessageManager:
try:
# print(msg.is_head)
# print(msg.update_thinking_time())
print(msg.update_thinking_time())
# print(msg.is_private_message())
if (
msg.is_head
and msg.update_thinking_time() > 50
and msg.update_thinking_time() > 18
and not msg.is_private_message() # 避免在私聊时插入reply
):
logger.debug(f"设置回复消息{msg.processed_plain_text}")

View File

@@ -24,27 +24,9 @@ class PromptBuilder:
async def _build_prompt(
self, chat_stream, message_txt: str, sender_name: str = "某人", stream_id: Optional[int] = None
) -> tuple[str, str]:
# 关系(载入当前聊天记录里部分人的关系)
# who_chat_in_group = [chat_stream]
# who_chat_in_group += get_recent_group_speaker(
# stream_id,
# (chat_stream.user_info.user_id, chat_stream.user_info.platform),
# limit=global_config.MAX_CONTEXT_SIZE,
# )
# outer_world_info = outer_world.outer_world_info
current_mind_info = heartflow.get_subheartflow(stream_id).current_mind
# relation_prompt = ""
# for person in who_chat_in_group:
# relation_prompt += relationship_manager.build_relationship_info(person)
# relation_prompt_all = (
# f"{relation_prompt}关系等级越大,关系越好,请分析聊天记录,"
# f"根据你和说话者{sender_name}的关系和态度进行回复,明确你的立场和情感。"
# )
# 开始构建prompt
# 心情
@@ -71,25 +53,6 @@ class PromptBuilder:
chat_talking_prompt = chat_talking_prompt
# print(f"\033[1;34m[调试]\033[0m 已从数据库获取群 {group_id} 的消息记录:{chat_talking_prompt}")
# 使用新的记忆获取方法
memory_prompt = ""
start_time = time.time()
# 调用 hippocampus 的 get_relevant_memories 方法
relevant_memories = await HippocampusManager.get_instance().get_memory_from_text(
text=message_txt, max_memory_num=3, max_memory_length=2, max_depth=2, fast_retrieval=False
)
memory_str = ""
for _topic, memories in relevant_memories:
memory_str += f"{memories}\n"
if relevant_memories:
# 格式化记忆内容
memory_prompt = f"你回忆起:\n{memory_str}\n"
end_time = time.time()
logger.info(f"回忆耗时: {(end_time - start_time):.3f}")
# 类型
if chat_in_group:
chat_target = "你正在qq群里聊天下面是群里在聊的内容"
@@ -146,19 +109,18 @@ class PromptBuilder:
涉及政治敏感以及违法违规的内容请规避。"""
logger.info("开始构建prompt")
prompt = f"""
{prompt_info}
{memory_prompt}
你刚刚脑子里在想:
{current_mind_info}
{chat_target}
{chat_talking_prompt}
现在"{sender_name}"说的:{message_txt}。引起了你的注意,{mood_prompt}\n
你刚刚脑子里在想:
{current_mind_info}
现在"{sender_name}"说的:{message_txt}。引起了你的注意,你想要在群里发言发言或者回复这条消息。\n
你的网名叫{global_config.BOT_NICKNAME},有人也叫你{"/".join(global_config.BOT_ALIAS_NAMES)}{prompt_personality}
你正在{chat_target_2},现在请你读读之前的聊天记录,然后给出日常且口语化的回复,平淡一些,
尽量简短一些。{keywords_reaction_prompt}请注意把握聊天内容,不要回复的太有条理,可以有个性。{prompt_ger}
请回复的平淡一些,简短一些,说中文,不要刻意突出自身学科背景,
请回复的平淡一些,简短一些,说中文,不要刻意突出自身学科背景,尽量不要说你说过的话
请注意不要输出多余内容(包括前后缀,冒号和引号,括号,表情等),只输出回复内容。
{moderation_prompt}不要输出多余内容(包括前后缀冒号和引号括号表情包at或 @等 )。"""

View File

@@ -32,7 +32,7 @@ class ImageManager:
self._ensure_description_collection()
self._ensure_image_dir()
self._initialized = True
self._llm = LLM_request(model=global_config.vlm, temperature=0.4, max_tokens=1000, request_type="image")
self._llm = LLM_request(model=global_config.vlm, temperature=0.4, max_tokens=300, request_type="image")
def _ensure_image_dir(self):
"""确保图像存储目录存在"""
@@ -171,7 +171,7 @@ class ImageManager:
# 调用AI获取描述
prompt = (
"请用中文描述这张图片的内容。如果有文字,请把文字都描述出来。并尝试猜测这个图片的含义。最多200个字。"
"请用中文描述这张图片的内容。如果有文字,请把文字都描述出来。并尝试猜测这个图片的含义。最多100个字。"
)
description, _ = await self._llm.generate_response_for_image(prompt, image_base64, image_format)

View File

@@ -25,7 +25,7 @@ logger = get_module_logger("config", config=config_config)
#考虑到实际上配置文件中的mai_version是不会自动更新的,所以采用硬编码
mai_version_main = "0.6.0"
mai_version_fix = "mmc-2"
mai_version_fix = "mmc-3"
mai_version = f"{mai_version_main}-{mai_version_fix}"
def update_config():
@@ -231,7 +231,7 @@ class BotConfig:
# 模型配置
llm_reasoning: Dict[str, str] = field(default_factory=lambda: {})
llm_reasoning_minor: Dict[str, str] = field(default_factory=lambda: {})
# llm_reasoning_minor: Dict[str, str] = field(default_factory=lambda: {})
llm_normal: Dict[str, str] = field(default_factory=lambda: {})
llm_topic_judge: Dict[str, str] = field(default_factory=lambda: {})
llm_summary_by_topic: Dict[str, str] = field(default_factory=lambda: {})
@@ -370,9 +370,9 @@ class BotConfig:
response_config = parent["response"]
config.MODEL_R1_PROBABILITY = response_config.get("model_r1_probability", config.MODEL_R1_PROBABILITY)
config.MODEL_V3_PROBABILITY = response_config.get("model_v3_probability", config.MODEL_V3_PROBABILITY)
config.MODEL_R1_DISTILL_PROBABILITY = response_config.get(
"model_r1_distill_probability", config.MODEL_R1_DISTILL_PROBABILITY
)
# config.MODEL_R1_DISTILL_PROBABILITY = response_config.get(
# "model_r1_distill_probability", config.MODEL_R1_DISTILL_PROBABILITY
# )
config.max_response_length = response_config.get("max_response_length", config.max_response_length)
def willing(parent: dict):
@@ -397,7 +397,7 @@ class BotConfig:
config_list = [
"llm_reasoning",
"llm_reasoning_minor",
# "llm_reasoning_minor",
"llm_normal",
"llm_topic_judge",
"llm_summary_by_topic",

View File

@@ -697,6 +697,11 @@ class ParahippocampalGyrus:
start_time = time.time()
logger.info("[遗忘] 开始检查数据库...")
# 验证百分比参数
if not 0 <= percentage <= 1:
logger.warning(f"[遗忘] 无效的遗忘百分比: {percentage}, 使用默认值 0.005")
percentage = 0.005
all_nodes = list(self.memory_graph.G.nodes())
all_edges = list(self.memory_graph.G.edges())
@@ -704,11 +709,21 @@ class ParahippocampalGyrus:
logger.info("[遗忘] 记忆图为空,无需进行遗忘操作")
return
check_nodes_count = max(1, int(len(all_nodes) * percentage))
check_edges_count = max(1, int(len(all_edges) * percentage))
# 确保至少检查1个节点和边且不超过总数
check_nodes_count = max(1, min(len(all_nodes), int(len(all_nodes) * percentage)))
check_edges_count = max(1, min(len(all_edges), int(len(all_edges) * percentage)))
nodes_to_check = random.sample(all_nodes, check_nodes_count)
edges_to_check = random.sample(all_edges, check_edges_count)
# 只有在有足够的节点和边时才进行采样
if len(all_nodes) >= check_nodes_count and len(all_edges) >= check_edges_count:
try:
nodes_to_check = random.sample(all_nodes, check_nodes_count)
edges_to_check = random.sample(all_edges, check_edges_count)
except ValueError as e:
logger.error(f"[遗忘] 采样错误: {str(e)}")
return
else:
logger.info("[遗忘] 没有足够的节点或边进行遗忘操作")
return
# 使用列表存储变化信息
edge_changes = {

View File

@@ -58,8 +58,18 @@ class MemoryBuildScheduler:
weight2 (float): 第二个分布的权重
total_samples (int): 要生成的总时间点数量
"""
# 验证参数
if total_samples <= 0:
raise ValueError("total_samples 必须大于0")
if weight1 < 0 or weight2 < 0:
raise ValueError("权重必须为非负数")
if std_hours1 < 0 or std_hours2 < 0:
raise ValueError("标准差必须为非负数")
# 归一化权重
total_weight = weight1 + weight2
if total_weight == 0:
raise ValueError("权重总和不能为0")
self.weight1 = weight1 / total_weight
self.weight2 = weight2 / total_weight
@@ -73,12 +83,11 @@ class MemoryBuildScheduler:
def generate_time_samples(self):
"""生成混合分布的时间采样点"""
# 根据权重计算每个分布的样本数
samples1 = int(self.total_samples * self.weight1)
samples2 = self.total_samples - samples1
samples1 = max(1, int(self.total_samples * self.weight1))
samples2 = max(1, self.total_samples - samples1) # 确保 samples2 至少为1
# 生成两个正态分布的小时偏移
hours_offset1 = np.random.normal(loc=self.n_hours1, scale=self.std_hours1, size=samples1)
hours_offset2 = np.random.normal(loc=self.n_hours2, scale=self.std_hours2, size=samples2)
# 合并两个分布的偏移

View File

@@ -285,39 +285,46 @@ class LLM_request:
usage = None # 初始化usage变量避免未定义错误
async for line_bytes in response.content:
line = line_bytes.decode("utf-8").strip()
if not line:
continue
if line.startswith("data:"):
data_str = line[5:].strip()
if data_str == "[DONE]":
break
try:
chunk = json.loads(data_str)
if flag_delta_content_finished:
chunk_usage = chunk.get("usage", None)
if chunk_usage:
usage = chunk_usage # 获取token用量
else:
delta = chunk["choices"][0]["delta"]
delta_content = delta.get("content")
if delta_content is None:
delta_content = ""
accumulated_content += delta_content
# 检测流式输出文本是否结束
finish_reason = chunk["choices"][0].get("finish_reason")
if delta.get("reasoning_content", None):
reasoning_content += delta["reasoning_content"]
if finish_reason == "stop":
try:
line = line_bytes.decode("utf-8").strip()
if not line:
continue
if line.startswith("data:"):
data_str = line[5:].strip()
if data_str == "[DONE]":
break
try:
chunk = json.loads(data_str)
if flag_delta_content_finished:
chunk_usage = chunk.get("usage", None)
if chunk_usage:
usage = chunk_usage
break
# 部分平台在文本输出结束前不会返回token用量此时需要再获取一次chunk
flag_delta_content_finished = True
usage = chunk_usage # 获取token用量
else:
delta = chunk["choices"][0]["delta"]
delta_content = delta.get("content")
if delta_content is None:
delta_content = ""
accumulated_content += delta_content
# 检测流式输出文本是否结束
finish_reason = chunk["choices"][0].get("finish_reason")
if delta.get("reasoning_content", None):
reasoning_content += delta["reasoning_content"]
if finish_reason == "stop":
chunk_usage = chunk.get("usage", None)
if chunk_usage:
usage = chunk_usage
break
# 部分平台在文本输出结束前不会返回token用量此时需要再获取一次chunk
flag_delta_content_finished = True
except Exception as e:
logger.exception(f"解析流式输出错误: {str(e)}")
except Exception as e:
logger.exception(f"解析流式输出错误: {str(e)}")
except GeneratorExit:
logger.warning("流式输出被中断")
break
except Exception as e:
logger.error(f"处理流式输出时发生错误: {str(e)}")
break
content = accumulated_content
think_match = re.search(r"<think>(.*?)</think>", content, re.DOTALL)
if think_match:

View File

@@ -176,21 +176,27 @@ class ScheduleGenerator:
logger.warning(f"未找到{today_str}的日程记录")
async def move_doing(self, mind_thinking: str = ""):
current_time = datetime.datetime.now()
if mind_thinking:
doing_prompt = self.construct_doing_prompt(current_time, mind_thinking)
else:
doing_prompt = self.construct_doing_prompt(current_time)
try:
current_time = datetime.datetime.now()
if mind_thinking:
doing_prompt = self.construct_doing_prompt(current_time, mind_thinking)
else:
doing_prompt = self.construct_doing_prompt(current_time)
# print(doing_prompt)
doing_response, _ = await self.llm_scheduler_doing.generate_response_async(doing_prompt)
self.today_done_list.append((current_time, doing_response))
doing_response, _ = await self.llm_scheduler_doing.generate_response_async(doing_prompt)
self.today_done_list.append((current_time, doing_response))
await self.update_today_done_list()
await self.update_today_done_list()
logger.info(f"当前活动: {doing_response}")
logger.info(f"当前活动: {doing_response}")
return doing_response
return doing_response
except GeneratorExit:
logger.warning("日程生成被中断")
return "日程生成被中断"
except Exception as e:
logger.error(f"生成日程时发生错误: {str(e)}")
return "生成日程时发生错误"
async def get_task_from_time_to_time(self, start_time: str, end_time: str):
"""获取指定时间范围内的任务列表

View File

@@ -20,6 +20,7 @@ class LLMStatistics:
self.output_file = output_file
self.running = False
self.stats_thread = None
self.console_thread = None
self._init_database()
def _init_database(self):
@@ -32,15 +33,22 @@ class LLMStatistics:
"""启动统计线程"""
if not self.running:
self.running = True
# 启动文件统计线程
self.stats_thread = threading.Thread(target=self._stats_loop)
self.stats_thread.daemon = True
self.stats_thread.start()
# 启动控制台输出线程
self.console_thread = threading.Thread(target=self._console_output_loop)
self.console_thread.daemon = True
self.console_thread.start()
def stop(self):
"""停止统计线程"""
self.running = False
if self.stats_thread:
self.stats_thread.join()
if self.console_thread:
self.console_thread.join()
def _record_online_time(self):
"""记录在线时间"""
@@ -126,10 +134,19 @@ class LLMStatistics:
messages_cursor = db.messages.find({"time": {"$gte": start_time.timestamp()}})
for doc in messages_cursor:
stats["total_messages"] += 1
user_id = str(doc.get("user_info", {}).get("user_id", "unknown"))
chat_id = str(doc.get("chat_id", "unknown"))
stats["messages_by_user"][user_id] += 1
stats["messages_by_chat"][chat_id] += 1
# user_id = str(doc.get("user_info", {}).get("user_id", "unknown"))
chat_info = doc.get("chat_info", {})
user_info = doc.get("user_info", {})
group_info = chat_info.get("group_info") if chat_info else {}
# print(f"group_info: {group_info}")
group_name = "unknown"
if group_info:
group_name = group_info["group_name"]
if user_info and group_name == "unknown":
group_name = user_info["user_nickname"]
# print(f"group_name: {group_name}")
stats["messages_by_user"][user_id] += 1
stats["messages_by_chat"][group_name] += 1
return stats
@@ -201,17 +218,74 @@ class LLMStatistics:
)
output.append("")
# 添加消息统计
output.append("消息统计:")
output.append(("用户ID 消息数量"))
for user_id, count in sorted(stats["messages_by_user"].items()):
output.append(f"{user_id[:32]:<32} {count:>10}")
# 添加聊天统计
output.append("群组统计:")
output.append(("群组名称 消息数量"))
for group_name, count in sorted(stats["messages_by_chat"].items()):
output.append(f"{group_name[:32]:<32} {count:>10}")
return "\n".join(output)
def _format_stats_section_lite(self, stats: Dict[str, Any], title: str) -> str:
"""格式化统计部分的输出"""
output = []
output.append("\n" + "-" * 84)
output.append(f"{title}")
output.append("-" * 84)
# output.append(f"总请求数: {stats['total_requests']}")
if stats["total_requests"] > 0:
# output.append(f"总Token数: {stats['total_tokens']}")
output.append(f"总花费: {stats['total_cost']:.4f}¥")
# output.append(f"在线时间: {stats['online_time_minutes']}分钟")
output.append(f"总消息数: {stats['total_messages']}\n")
data_fmt = "{:<32} {:>10} {:>14} {:>13.4f} ¥"
# 按模型统计
output.append("按模型统计:")
output.append(("模型名称 调用次数 Token总量 累计花费"))
for model_name, count in sorted(stats["requests_by_model"].items()):
tokens = stats["tokens_by_model"][model_name]
cost = stats["costs_by_model"][model_name]
output.append(
data_fmt.format(model_name[:32] + ".." if len(model_name) > 32 else model_name, count, tokens, cost)
)
output.append("")
output.append("聊天统计:")
output.append(("聊天ID 消息数量"))
for chat_id, count in sorted(stats["messages_by_chat"].items()):
output.append(f"{chat_id[:32]:<32} {count:>10}")
# 按请求类型统计
# output.append("按请求类型统计:")
# output.append(("模型名称 调用次数 Token总量 累计花费"))
# for req_type, count in sorted(stats["requests_by_type"].items()):
# tokens = stats["tokens_by_type"][req_type]
# cost = stats["costs_by_type"][req_type]
# output.append(
# data_fmt.format(req_type[:22] + ".." if len(req_type) > 24 else req_type, count, tokens, cost)
# )
# output.append("")
# 修正用户统计列宽
# output.append("按用户统计:")
# output.append(("用户ID 调用次数 Token总量 累计花费"))
# for user_id, count in sorted(stats["requests_by_user"].items()):
# tokens = stats["tokens_by_user"][user_id]
# cost = stats["costs_by_user"][user_id]
# output.append(
# data_fmt.format(
# user_id[:22], # 不再添加省略号保持原始ID
# count,
# tokens,
# cost,
# )
# )
# output.append("")
# 添加聊天统计
output.append("群组统计:")
output.append(("群组名称 消息数量"))
for group_name, count in sorted(stats["messages_by_chat"].items()):
output.append(f"{group_name[:32]:<32} {count:>10}")
return "\n".join(output)
@@ -237,8 +311,30 @@ class LLMStatistics:
with open(self.output_file, "w", encoding="utf-8") as f:
f.write("\n".join(output))
def _console_output_loop(self):
"""控制台输出循环每5分钟输出一次最近1小时的统计"""
while self.running:
# 等待5分钟
for _ in range(300): # 5分钟 = 300秒
if not self.running:
break
time.sleep(1)
try:
# 收集最近1小时的统计数据
now = datetime.now()
hour_stats = self._collect_statistics_for_period(now - timedelta(hours=1))
# 使用logger输出
stats_output = self._format_stats_section_lite(hour_stats, "最近1小时统计详细信息见根目录文件llm_statistics.txt")
logger.info("\n" + stats_output + "\n" + "=" * 50)
except Exception:
logger.exception("控制台统计数据输出失败")
def _stats_loop(self):
"""统计循环,每1分钟运行一次"""
"""统计循环,每5分钟运行一次"""
while self.running:
try:
# 记录在线时间
@@ -250,7 +346,7 @@ class LLMStatistics:
logger.exception("统计数据处理失败")
# 等待5分钟
for _ in range(30): # 5分钟 = 300秒
for _ in range(300): # 5分钟 = 300秒
if not self.running:
break
time.sleep(1)