better:优化prompt,修改buffer行为(更严格判定降低延迟,不丢弃图片前文本)

This commit is contained in:
SengokuCola
2025-04-25 23:05:46 +08:00
parent 5ed676e404
commit e17e47bcaf
6 changed files with 185 additions and 145 deletions

View File

@@ -53,7 +53,7 @@ person_info_default = {
# "impression" : None,
# "gender" : Unkown,
"konw_time": 0,
"msg_interval": 3000,
"msg_interval": 2000,
"msg_interval_list": [],
} # 个人信息的各项与默认值在此定义,以下处理会自动创建/补全每一项
@@ -384,18 +384,21 @@ class PersonInfoManager:
if delta > 0:
time_interval.append(delta)
time_interval = [t for t in time_interval if 500 <= t <= 8000]
if len(time_interval) >= 30:
time_interval = [t for t in time_interval if 200 <= t <= 8000]
# --- 修改后的逻辑 ---
# 数据量检查 (至少需要 30 条有效间隔,并且足够进行头尾截断)
if len(time_interval) >= 30 + 10: # 至少30条有效+头尾各5条
time_interval.sort()
# 画图(log)
# 画图(log) - 这部分保留
msg_interval_map = True
log_dir = Path("logs/person_info")
log_dir.mkdir(parents=True, exist_ok=True)
plt.figure(figsize=(10, 6))
time_series = pd.Series(time_interval)
plt.hist(time_series, bins=50, density=True, alpha=0.4, color="pink", label="Histogram")
time_series.plot(kind="kde", color="mediumpurple", linewidth=1, label="Density")
# 使用截断前的数据画图,更能反映原始分布
time_series_original = pd.Series(time_interval)
plt.hist(time_series_original, bins=50, density=True, alpha=0.4, color="pink", label="Histogram (Original Filtered)")
time_series_original.plot(kind="kde", color="mediumpurple", linewidth=1, label="Density (Original Filtered)")
plt.grid(True, alpha=0.2)
plt.xlim(0, 8000)
plt.title(f"Message Interval Distribution (User: {person_id[:8]}...)")
@@ -405,15 +408,22 @@ class PersonInfoManager:
img_path = log_dir / f"interval_distribution_{person_id[:8]}.png"
plt.savefig(img_path)
plt.close()
# 画图
# 画图结束
q25, q75 = np.percentile(time_interval, [25, 75])
iqr = q75 - q25
filtered = [x for x in time_interval if (q25 - 1.5 * iqr) <= x <= (q75 + 1.5 * iqr)]
# 去掉头尾各 5 个数据点
trimmed_interval = time_interval[5:-5]
msg_interval = int(round(np.percentile(filtered, 80)))
await self.update_one_field(person_id, "msg_interval", msg_interval)
logger.trace(f"用户{person_id}的msg_interval已经被更新为{msg_interval}")
# 计算截断后数据的 37% 分位数
if trimmed_interval: # 确保截断后列表不为空
msg_interval = int(round(np.percentile(trimmed_interval, 37)))
# 更新数据库
await self.update_one_field(person_id, "msg_interval", msg_interval)
logger.trace(f"用户{person_id}的msg_interval通过头尾截断和37分位数更新为{msg_interval}")
else:
logger.trace(f"用户{person_id}截断后数据为空无法计算msg_interval")
else:
logger.trace(f"用户{person_id}有效消息间隔数量 ({len(time_interval)}) 不足进行推断 (需要至少 {30+10} 条)")
# --- 修改结束 ---
except Exception as e:
logger.trace(f"用户{person_id}消息间隔计算失败: {type(e).__name__}: {str(e)}")
continue