feat:更新记忆系统

This commit is contained in:
SengokuCola
2025-08-13 23:17:28 +08:00
parent 3962fc601f
commit fed0c0fd04
10 changed files with 732 additions and 406 deletions

View File

@@ -4,14 +4,14 @@ import math
import random
import time
import re
import json
import jieba
import networkx as nx
import numpy as np
from itertools import combinations
from typing import List, Tuple, Coroutine, Any, Set
from typing import List, Tuple, Set, Coroutine, Any
from collections import Counter
from itertools import combinations
from rich.traceback import install
from src.llm_models.utils_model import LLMRequest
@@ -25,6 +25,15 @@ from src.chat.utils.chat_message_builder import (
get_raw_msg_by_timestamp_with_chat,
) # 导入 build_readable_messages
from src.chat.utils.utils import translate_timestamp_to_human_readable
# 添加cosine_similarity函数
def cosine_similarity(v1, v2):
"""计算余弦相似度"""
dot_product = np.dot(v1, v2)
norm1 = np.linalg.norm(v1)
norm2 = np.linalg.norm(v2)
if norm1 == 0 or norm2 == 0:
return 0
return dot_product / (norm1 * norm2)
install(extra_lines=3)
@@ -44,19 +53,18 @@ def calculate_information_content(text):
return entropy
def cosine_similarity(v1, v2): # sourcery skip: assign-if-exp, reintroduce-else
"""计算余弦相似度"""
dot_product = np.dot(v1, v2)
norm1 = np.linalg.norm(v1)
norm2 = np.linalg.norm(v2)
if norm1 == 0 or norm2 == 0:
return 0
return dot_product / (norm1 * norm2)
logger = get_logger("memory")
class MemoryGraph:
def __init__(self):
self.G = nx.Graph() # 使用 networkx 的图结构
@@ -83,26 +91,46 @@ class MemoryGraph:
last_modified=current_time,
) # 添加最后修改时间
def add_dot(self, concept, memory):
async def add_dot(self, concept, memory, hippocampus_instance=None):
current_time = datetime.datetime.now().timestamp()
if concept in self.G:
if "memory_items" in self.G.nodes[concept]:
if not isinstance(self.G.nodes[concept]["memory_items"], list):
self.G.nodes[concept]["memory_items"] = [self.G.nodes[concept]["memory_items"]]
self.G.nodes[concept]["memory_items"].append(memory)
# 获取现有的记忆项已经是str格式
existing_memory = self.G.nodes[concept]["memory_items"]
# 如果现有记忆不为空则使用LLM整合新旧记忆
if existing_memory and hippocampus_instance and hippocampus_instance.model_small:
try:
integrated_memory = await self._integrate_memories_with_llm(
existing_memory, str(memory), hippocampus_instance.model_small
)
self.G.nodes[concept]["memory_items"] = integrated_memory
# 整合成功,增加权重
current_weight = self.G.nodes[concept].get("weight", 0.0)
self.G.nodes[concept]["weight"] = current_weight + 1.0
logger.debug(f"节点 {concept} 记忆整合成功,权重增加到 {current_weight + 1.0}")
except Exception as e:
logger.error(f"LLM整合记忆失败: {e}")
# 降级到简单连接
new_memory_str = f"{existing_memory} | {memory}"
self.G.nodes[concept]["memory_items"] = new_memory_str
else:
new_memory_str = str(memory)
self.G.nodes[concept]["memory_items"] = new_memory_str
else:
self.G.nodes[concept]["memory_items"] = [memory]
self.G.nodes[concept]["memory_items"] = str(memory)
# 如果节点存在但没有memory_items,说明是第一次添加memory,设置created_time
if "created_time" not in self.G.nodes[concept]:
self.G.nodes[concept]["created_time"] = current_time
# 更新最后修改时间
self.G.nodes[concept]["last_modified"] = current_time
else:
# 如果是新节点,创建新的记忆列表
# 如果是新节点,创建新的记忆字符串
self.G.add_node(
concept,
memory_items=[memory],
memory_items=str(memory),
weight=1.0, # 新节点初始权重为1.0
created_time=current_time, # 添加创建时间
last_modified=current_time,
) # 添加最后修改时间
@@ -127,9 +155,8 @@ class MemoryGraph:
concept, data = node_data
if "memory_items" in data:
memory_items = data["memory_items"]
if isinstance(memory_items, list):
first_layer_items.extend(memory_items)
else:
# 直接使用完整的记忆内容
if memory_items:
first_layer_items.append(memory_items)
# 只在depth=2时获取第二层记忆
@@ -140,12 +167,57 @@ class MemoryGraph:
concept, data = node_data
if "memory_items" in data:
memory_items = data["memory_items"]
if isinstance(memory_items, list):
second_layer_items.extend(memory_items)
else:
# 直接使用完整的记忆内容
if memory_items:
second_layer_items.append(memory_items)
return first_layer_items, second_layer_items
async def _integrate_memories_with_llm(self, existing_memory: str, new_memory: str, llm_model: LLMRequest) -> str:
"""
使用LLM整合新旧记忆内容
Args:
existing_memory: 现有的记忆内容(字符串格式,可能包含多条记忆)
new_memory: 新的记忆内容
llm_model: LLM模型实例
Returns:
str: 整合后的记忆内容
"""
try:
# 构建整合提示
integration_prompt = f"""你是一个记忆整合专家。请将以下的旧记忆和新记忆整合成一条更完整、更准确的记忆内容。
旧记忆内容:
{existing_memory}
新记忆内容:
{new_memory}
整合要求:
1. 保留重要信息,去除重复内容
2. 如果新旧记忆有冲突,合理整合矛盾的地方
3. 将相关信息合并,形成更完整的描述
4. 保持语言简洁、准确
5. 只返回整合后的记忆内容,不要添加任何解释
整合后的记忆:"""
# 调用LLM进行整合
content, (reasoning_content, model_name, tool_calls) = await llm_model.generate_response_async(integration_prompt)
if content and content.strip():
integrated_content = content.strip()
logger.debug(f"LLM记忆整合成功模型: {model_name}")
return integrated_content
else:
logger.warning("LLM返回的整合结果为空使用默认连接方式")
return f"{existing_memory} | {new_memory}"
except Exception as e:
logger.error(f"LLM记忆整合过程中出错: {e}")
return f"{existing_memory} | {new_memory}"
@property
def dots(self):
@@ -164,26 +236,19 @@ class MemoryGraph:
if "memory_items" in node_data:
memory_items = node_data["memory_items"]
# 确保memory_items是列表
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
# 如果有记忆项可以删除
# 既然每个节点现在是一个完整的记忆内容,直接删除整个节点
if memory_items:
# 随机选择一个记忆项删除
removed_item = random.choice(memory_items)
memory_items.remove(removed_item)
# 更新节点的记忆项
if memory_items:
self.G.nodes[topic]["memory_items"] = memory_items
else:
# 如果没有记忆项了,删除整个节点
self.G.remove_node(topic)
return removed_item
return None
# 删除整个节点
self.G.remove_node(topic)
return f"删除了节点 {topic} 的完整记忆: {memory_items[:50]}..." if len(memory_items) > 50 else f"删除了节点 {topic} 的完整记忆: {memory_items}"
else:
# 如果没有记忆项,删除该节点
self.G.remove_node(topic)
return None
else:
# 如果没有memory_items字段,删除节点
self.G.remove_node(topic)
return None
# 海马体
@@ -205,15 +270,46 @@ class Hippocampus:
def get_all_node_names(self) -> list:
"""获取记忆图中所有节点的名字列表"""
return list(self.memory_graph.G.nodes())
def calculate_weighted_activation(self, current_activation: float, edge_strength: int, target_node: str) -> float:
"""
计算考虑节点权重的激活值
Args:
current_activation: 当前激活值
edge_strength: 边的强度
target_node: 目标节点名称
Returns:
float: 计算后的激活值
"""
# 基础激活值计算
base_activation = current_activation - (1 / edge_strength)
if base_activation <= 0:
return 0.0
# 获取目标节点的权重
if target_node in self.memory_graph.G:
node_data = self.memory_graph.G.nodes[target_node]
node_weight = node_data.get("weight", 1.0)
# 权重加成每次整合增加10%激活值最大加成200%
weight_multiplier = 1.0 + min((node_weight - 1.0) * 0.1, 2.0)
return base_activation * weight_multiplier
else:
return base_activation
@staticmethod
def calculate_node_hash(concept, memory_items) -> int:
"""计算节点的特征值"""
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
# memory_items已经是str格式直接按分隔符分割
if memory_items:
unique_items = {item.strip() for item in memory_items.split(" | ") if item.strip()}
else:
unique_items = set()
# 使用集合来去重,避免排序
unique_items = {str(item) for item in memory_items}
# 使用frozenset来保证顺序一致性
content = f"{concept}:{frozenset(unique_items)}"
return hash(content)
@@ -234,7 +330,7 @@ class Hippocampus:
topic_num_str = topic_num
prompt = (
f"这是一段文字:\n{text}\n\n请你从这段话中总结出最多{topic_num_str}个关键的概念,可以是名词,动词,或者特定人物,帮我列出来,"
f"这是一段文字:\n{text}\n\n请你从这段话中总结出最多{topic_num_str}个关键的概念,必须是某种概念,比如人,事,物,概念,事件,地点 等等,帮我列出来,"
f"将主题用逗号隔开,并加上<>,例如<主题1>,<主题2>......尽可能精简。只需要列举最多{topic_num}个话题就好,不要有序号,不要告诉我其他内容。"
f"如果确定找不出主题或者没有明显主题,返回<none>。"
)
@@ -245,8 +341,8 @@ class Hippocampus:
# sourcery skip: inline-immediately-returned-variable
# 不再需要 time_info 参数
prompt = (
f'这是一段文字:\n{text}\n\n我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成句自然的话,'
f"要求包含对这个概念的定义,内容,知识,但是这些信息必须来自这段文字,不能添加信息。\n,请包含时间和人物。只输出这句话就好"
f'这是一段文字:\n{text}\n\n我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成句自然的话,'
f"要求包含对这个概念的定义,内容,知识,时间和人物,这些信息必须来自这段文字,不能添加信息。\n只输出几句自然的话就好"
)
return prompt
@@ -271,9 +367,9 @@ class Hippocampus:
max_depth (int, optional): 记忆检索深度默认为2。1表示只获取直接相关的记忆2表示获取间接相关的记忆。
Returns:
list: 记忆列表,每个元素是一个元组 (topic, memory_items, similarity)
list: 记忆列表,每个元素是一个元组 (topic, memory_content, similarity)
- topic: str, 记忆主题
- memory_items: list, 该主题下的记忆项列表
- memory_content: str, 该主题下的完整记忆内容
- similarity: float, 与关键词的相似度
"""
if not keyword:
@@ -297,11 +393,10 @@ class Hippocampus:
# 如果相似度超过阈值,获取该节点的记忆
if similarity >= 0.3: # 可以调整这个阈值
node_data = self.memory_graph.G.nodes[node]
memory_items = node_data.get("memory_items", [])
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
memories.append((node, memory_items, similarity))
memory_items = node_data.get("memory_items", "")
# 直接使用完整的记忆内容
if memory_items:
memories.append((node, memory_items, similarity))
# 按相似度降序排序
memories.sort(key=lambda x: x[2], reverse=True)
@@ -378,10 +473,9 @@ class Hippocampus:
如果为False使用LLM提取关键词速度较慢但更准确。
Returns:
list: 记忆列表,每个元素是一个元组 (topic, memory_items, similarity)
list: 记忆列表,每个元素是一个元组 (topic, memory_content)
- topic: str, 记忆主题
- memory_items: list, 该主题下的记忆项列表
- similarity: float, 与文本的相似度
- memory_content: str, 该主题下的完整记忆内容
"""
keywords = await self.get_keywords_from_text(text)
@@ -478,31 +572,22 @@ class Hippocampus:
for node, activation in remember_map.items():
logger.debug(f"处理节点 '{node}' (激活值: {activation:.2f}):")
node_data = self.memory_graph.G.nodes[node]
memory_items = node_data.get("memory_items", [])
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
memory_items = node_data.get("memory_items", "")
# 直接使用完整的记忆内容
if memory_items:
logger.debug(f"节点包含 {len(memory_items)}记忆")
# 计算每条记忆与输入文本的相似度
memory_similarities = []
for memory in memory_items:
# 计算与输入文本的相似度
memory_words = set(jieba.cut(memory))
text_words = set(jieba.cut(text))
all_words = memory_words | text_words
logger.debug("节点包含完整记忆")
# 计算记忆与输入文本的相似度
memory_words = set(jieba.cut(memory_items))
text_words = set(jieba.cut(text))
all_words = memory_words | text_words
if all_words:
# 计算相似度(虽然这里没有使用,但保持逻辑一致性)
v1 = [1 if word in memory_words else 0 for word in all_words]
v2 = [1 if word in text_words else 0 for word in all_words]
similarity = cosine_similarity(v1, v2)
memory_similarities.append((memory, similarity))
# 按相似度排序
memory_similarities.sort(key=lambda x: x[1], reverse=True)
# 获取最匹配的记忆
top_memories = memory_similarities[:max_memory_length]
# 添加到结果中
all_memories.extend((node, [memory], similarity) for memory, similarity in top_memories)
_ = cosine_similarity(v1, v2) # 计算但不使用用_表示
# 添加完整记忆到结果中
all_memories.append((node, memory_items, activation))
else:
logger.info("节点没有记忆")
@@ -511,7 +596,8 @@ class Hippocampus:
seen_memories = set()
unique_memories = []
for topic, memory_items, activation_value in all_memories:
memory = memory_items[0] # 因为每个topic只有一条记忆
# memory_items现在是完整的字符串格式
memory = memory_items if memory_items else ""
if memory not in seen_memories:
seen_memories.add(memory)
unique_memories.append((topic, memory_items, activation_value))
@@ -522,7 +608,8 @@ class Hippocampus:
# 转换为(关键词, 记忆)格式
result = []
for topic, memory_items, _ in unique_memories:
memory = memory_items[0] # 因为每个topic只有一条记忆
# memory_items现在是完整的字符串格式
memory = memory_items if memory_items else ""
result.append((topic, memory))
logger.debug(f"选中记忆: {memory} (来自节点: {topic})")
@@ -544,10 +631,9 @@ class Hippocampus:
max_depth (int, optional): 记忆检索深度。默认为3。值越大检索范围越广可以获取更多间接相关的记忆但速度会变慢。
Returns:
list: 记忆列表,每个元素是一个元组 (topic, memory_items, similarity)
list: 记忆列表,每个元素是一个元组 (topic, memory_content)
- topic: str, 记忆主题
- memory_items: list, 该主题下的记忆项列表
- similarity: float, 与文本的相似度
- memory_content: str, 该主题下的完整记忆内容
"""
if not keywords:
return []
@@ -642,31 +728,22 @@ class Hippocampus:
for node, activation in remember_map.items():
logger.debug(f"处理节点 '{node}' (激活值: {activation:.2f}):")
node_data = self.memory_graph.G.nodes[node]
memory_items = node_data.get("memory_items", [])
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
memory_items = node_data.get("memory_items", "")
# 直接使用完整的记忆内容
if memory_items:
logger.debug(f"节点包含 {len(memory_items)}记忆")
# 计算每条记忆与输入文本的相似度
memory_similarities = []
for memory in memory_items:
# 计算与输入文本的相似度
memory_words = set(jieba.cut(memory))
text_words = set(keywords)
all_words = memory_words | text_words
logger.debug("节点包含完整记忆")
# 计算记忆与关键词的相似度
memory_words = set(jieba.cut(memory_items))
text_words = set(keywords)
all_words = memory_words | text_words
if all_words:
# 计算相似度(虽然这里没有使用,但保持逻辑一致性)
v1 = [1 if word in memory_words else 0 for word in all_words]
v2 = [1 if word in text_words else 0 for word in all_words]
similarity = cosine_similarity(v1, v2)
memory_similarities.append((memory, similarity))
# 按相似度排序
memory_similarities.sort(key=lambda x: x[1], reverse=True)
# 获取最匹配的记忆
top_memories = memory_similarities[:max_memory_length]
# 添加到结果中
all_memories.extend((node, [memory], similarity) for memory, similarity in top_memories)
_ = cosine_similarity(v1, v2) # 计算但不使用用_表示
# 添加完整记忆到结果中
all_memories.append((node, memory_items, activation))
else:
logger.info("节点没有记忆")
@@ -675,7 +752,8 @@ class Hippocampus:
seen_memories = set()
unique_memories = []
for topic, memory_items, activation_value in all_memories:
memory = memory_items[0] # 因为每个topic只有一条记忆
# memory_items现在是完整的字符串格式
memory = memory_items if memory_items else ""
if memory not in seen_memories:
seen_memories.add(memory)
unique_memories.append((topic, memory_items, activation_value))
@@ -686,7 +764,8 @@ class Hippocampus:
# 转换为(关键词, 记忆)格式
result = []
for topic, memory_items, _ in unique_memories:
memory = memory_items[0] # 因为每个topic只有一条记忆
# memory_items现在是完整的字符串格式
memory = memory_items if memory_items else ""
result.append((topic, memory))
logger.debug(f"选中记忆: {memory} (来自节点: {topic})")
@@ -894,11 +973,10 @@ class EntorhinalCortex:
self.memory_graph.G.remove_node(concept)
continue
memory_items = data.get("memory_items", [])
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
if not memory_items:
memory_items = data.get("memory_items", "")
# 直接检查字符串是否为空,不需要分割成列表
if not memory_items or memory_items.strip() == "":
self.memory_graph.G.remove_node(concept)
continue
@@ -907,21 +985,19 @@ class EntorhinalCortex:
created_time = data.get("created_time", current_time)
last_modified = data.get("last_modified", current_time)
# memory_items转换为JSON字符串
try:
memory_items = [str(item) for item in memory_items]
memory_items_json = json.dumps(memory_items, ensure_ascii=False)
if not memory_items_json:
continue
except Exception:
self.memory_graph.G.remove_node(concept)
# memory_items直接作为字符串存储,不需要JSON序列化
if not memory_items:
continue
# 获取权重属性
weight = data.get("weight", 1.0)
if concept not in db_nodes:
nodes_to_create.append(
{
"concept": concept,
"memory_items": memory_items_json,
"memory_items": memory_items,
"weight": weight,
"hash": memory_hash,
"created_time": created_time,
"last_modified": last_modified,
@@ -933,7 +1009,8 @@ class EntorhinalCortex:
nodes_to_update.append(
{
"concept": concept,
"memory_items": memory_items_json,
"memory_items": memory_items,
"weight": weight,
"hash": memory_hash,
"last_modified": last_modified,
}
@@ -1031,8 +1108,8 @@ class EntorhinalCortex:
GraphEdges.delete().where((GraphEdges.source == source) & (GraphEdges.target == target)).execute()
end_time = time.time()
logger.info(f"[同步] 总耗时: {end_time - start_time:.2f}")
logger.info(f"[同步] 同步了 {len(memory_nodes)} 个节点和 {len(memory_edges)} 条边")
logger.info(f"[数据库] 同步完成,总耗时: {end_time - start_time:.2f}")
logger.info(f"[数据库] 同步了 {len(nodes_to_create) + len(nodes_to_update)} 个节点和 {len(edges_to_create) + len(edges_to_update)} 条边")
async def resync_memory_to_db(self):
"""清空数据库并重新同步所有记忆数据"""
@@ -1054,27 +1131,43 @@ class EntorhinalCortex:
# 批量准备节点数据
nodes_data = []
for concept, data in memory_nodes:
memory_items = data.get("memory_items", [])
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
try:
memory_items = [str(item) for item in memory_items]
if memory_items_json := json.dumps(memory_items, ensure_ascii=False):
nodes_data.append(
{
"concept": concept,
"memory_items": memory_items_json,
"hash": self.hippocampus.calculate_node_hash(concept, memory_items),
"created_time": data.get("created_time", current_time),
"last_modified": data.get("last_modified", current_time),
}
)
except Exception as e:
logger.error(f"准备节点 {concept} 数据时发生错误: {e}")
memory_items = data.get("memory_items", "")
# 直接检查字符串是否为空,不需要分割成列表
if not memory_items or memory_items.strip() == "":
self.memory_graph.G.remove_node(concept)
continue
# 计算内存中节点的特征值
memory_hash = self.hippocampus.calculate_node_hash(concept, memory_items)
created_time = data.get("created_time", current_time)
last_modified = data.get("last_modified", current_time)
# memory_items直接作为字符串存储不需要JSON序列化
if not memory_items:
continue
# 获取权重属性
weight = data.get("weight", 1.0)
nodes_data.append(
{
"concept": concept,
"memory_items": memory_items,
"weight": weight,
"hash": memory_hash,
"created_time": created_time,
"last_modified": last_modified,
}
)
# 批量插入节点
if nodes_data:
batch_size = 100
for i in range(0, len(nodes_data), batch_size):
batch = nodes_data[i : i + batch_size]
GraphNodes.insert_many(batch).execute()
# 批量准备边数据
edges_data = []
for source, target, data in memory_edges:
@@ -1093,27 +1186,12 @@ class EntorhinalCortex:
logger.error(f"准备边 {source}-{target} 数据时发生错误: {e}")
continue
# 使用事务批量写入节点
node_start = time.time()
if nodes_data:
batch_size = 500 # 增加批量大小
with GraphNodes._meta.database.atomic(): # type: ignore
for i in range(0, len(nodes_data), batch_size):
batch = nodes_data[i : i + batch_size]
GraphNodes.insert_many(batch).execute()
node_end = time.time()
logger.info(f"[数据库] 写入 {len(nodes_data)} 个节点耗时: {node_end - node_start:.2f}")
# 使用事务批量写入边
edge_start = time.time()
# 批量插入边
if edges_data:
batch_size = 500 # 增加批量大小
with GraphEdges._meta.database.atomic(): # type: ignore
for i in range(0, len(edges_data), batch_size):
batch = edges_data[i : i + batch_size]
GraphEdges.insert_many(batch).execute()
edge_end = time.time()
logger.info(f"[数据库] 写入 {len(edges_data)} 条边耗时: {edge_end - edge_start:.2f}")
batch_size = 100
for i in range(0, len(edges_data), batch_size):
batch = edges_data[i : i + batch_size]
GraphEdges.insert_many(batch).execute()
end_time = time.time()
logger.info(f"[数据库] 重新同步完成,总耗时: {end_time - start_time:.2f}")
@@ -1126,19 +1204,30 @@ class EntorhinalCortex:
# 清空当前图
self.memory_graph.G.clear()
# 统计加载情况
total_nodes = 0
loaded_nodes = 0
skipped_nodes = 0
# 从数据库加载所有节点
nodes = list(GraphNodes.select())
total_nodes = len(nodes)
for node in nodes:
concept = node.concept
try:
memory_items = json.loads(node.memory_items)
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
# 处理空字符串或None的情况
if not node.memory_items or node.memory_items.strip() == "":
logger.warning(f"节点 {concept} 的memory_items为空跳过")
skipped_nodes += 1
continue
# 直接使用memory_items
memory_items = node.memory_items.strip()
# 检查时间字段是否存在
if not node.created_time or not node.last_modified:
need_update = True
# 更新数据库中的节点
update_data = {}
if not node.created_time:
@@ -1146,18 +1235,24 @@ class EntorhinalCortex:
if not node.last_modified:
update_data["last_modified"] = current_time
GraphNodes.update(**update_data).where(GraphNodes.concept == concept).execute()
if update_data:
GraphNodes.update(**update_data).where(GraphNodes.concept == concept).execute()
# 获取时间信息(如果不存在则使用当前时间)
created_time = node.created_time or current_time
last_modified = node.last_modified or current_time
# 获取权重属性
weight = node.weight if hasattr(node, 'weight') and node.weight is not None else 1.0
# 添加节点到图中
self.memory_graph.G.add_node(
concept, memory_items=memory_items, created_time=created_time, last_modified=last_modified
concept, memory_items=memory_items, weight=weight, created_time=created_time, last_modified=last_modified
)
loaded_nodes += 1
except Exception as e:
logger.error(f"加载节点 {concept} 时发生错误: {e}")
skipped_nodes += 1
continue
# 从数据库加载所有边
@@ -1193,6 +1288,9 @@ class EntorhinalCortex:
if need_update:
logger.info("[数据库] 已为缺失的时间字段进行补充")
# 输出加载统计信息
logger.info(f"[数据库] 记忆加载完成: 总计 {total_nodes} 个节点, 成功加载 {loaded_nodes} 个, 跳过 {skipped_nodes}")
# 负责整合,遗忘,合并记忆
@@ -1338,7 +1436,7 @@ class ParahippocampalGyrus:
all_added_nodes.extend(topic for topic, _ in compressed_memory)
for topic, memory in compressed_memory:
self.memory_graph.add_dot(topic, memory)
await self.memory_graph.add_dot(topic, memory, self.hippocampus)
all_topics.append(topic)
if topic in similar_topics_dict:
@@ -1458,12 +1556,9 @@ class ParahippocampalGyrus:
node_data = self.memory_graph.G.nodes[node]
# 首先获取记忆项
memory_items = node_data.get("memory_items", [])
if not isinstance(memory_items, list):
memory_items = [memory_items] if memory_items else []
# 新增:检查节点是否为空
if not memory_items:
memory_items = node_data.get("memory_items", "")
# 直接检查记忆内容是否为空
if not memory_items or memory_items.strip() == "":
try:
self.memory_graph.G.remove_node(node)
node_changes["removed"].append(f"{node}(空节点)") # 标记为空节点移除
@@ -1474,31 +1569,24 @@ class ParahippocampalGyrus:
# --- 如果节点不为空,则执行原来的不活跃检查和随机移除逻辑 ---
last_modified = node_data.get("last_modified", current_time)
# 条件1检查是否长时间未修改 (超过24小时)
if current_time - last_modified > 3600 * 24 and memory_items:
current_count = len(memory_items)
# 如果列表非空,才进行随机选择
if current_count > 0:
removed_item = random.choice(memory_items)
try:
memory_items.remove(removed_item)
# 条件3检查移除后 memory_items 是否变空
if memory_items: # 如果移除后列表不为空
# self.memory_graph.G.nodes[node]["memory_items"] = memory_items # 直接修改列表即可
self.memory_graph.G.nodes[node]["last_modified"] = current_time # 更新修改时间
node_changes["reduced"].append(f"{node} (数量: {current_count} -> {len(memory_items)})")
else: # 如果移除后列表为空
# 尝试移除节点,处理可能的错误
try:
self.memory_graph.G.remove_node(node)
node_changes["removed"].append(f"{node}(遗忘清空)") # 标记为遗忘清空
logger.debug(f"[遗忘] 节点 {node} 因移除最后一项而被清空。")
except nx.NetworkXError as e:
logger.warning(f"[遗忘] 尝试移除节点 {node} 时发生错误(可能已被移除):{e}")
except ValueError:
# 这个错误理论上不应发生,因为 removed_item 来自 memory_items
logger.warning(f"[遗忘] 尝试从节点 '{node}' 移除不存在的项目 '{removed_item[:30]}...'")
node_weight = node_data.get("weight", 1.0)
# 条件1检查是否长时间未修改 (使用配置的遗忘时间)
time_threshold = 3600 * global_config.memory.memory_forget_time
# 基于权重调整遗忘阈值:权重越高,需要更长时间才能被遗忘
# 权重为1时使用默认阈值权重越高阈值越大越难遗忘
adjusted_threshold = time_threshold * node_weight
if current_time - last_modified > adjusted_threshold and memory_items:
# 既然每个节点现在是完整记忆,直接删除整个节点
try:
self.memory_graph.G.remove_node(node)
node_changes["removed"].append(f"{node}(长时间未修改,权重{node_weight:.1f})")
logger.debug(f"[遗忘] 移除了长时间未修改的节点: {node} (权重: {node_weight:.1f})")
except nx.NetworkXError as e:
logger.warning(f"[遗忘] 移除节点 {node} 时发生错误(可能已被移除): {e}")
continue
node_check_end = time.time()
logger.info(f"[遗忘] 节点检查耗时: {node_check_end - node_check_start:.2f}")
@@ -1537,118 +1625,7 @@ class ParahippocampalGyrus:
end_time = time.time()
logger.info(f"[遗忘] 总耗时: {end_time - start_time:.2f}")
async def operation_consolidate_memory(self):
"""整合记忆:合并节点内相似的记忆项"""
start_time = time.time()
percentage = global_config.memory.consolidate_memory_percentage
similarity_threshold = global_config.memory.consolidation_similarity_threshold
logger.info(f"[整合] 开始检查记忆节点... 检查比例: {percentage:.2%}, 合并阈值: {similarity_threshold}")
# 获取所有至少有2条记忆项的节点
eligible_nodes = []
for node, data in self.memory_graph.G.nodes(data=True):
memory_items = data.get("memory_items", [])
if isinstance(memory_items, list) and len(memory_items) >= 2:
eligible_nodes.append(node)
if not eligible_nodes:
logger.info("[整合] 没有找到包含多个记忆项的节点,无需整合。")
return
# 计算需要检查的节点数量
check_nodes_count = max(1, min(len(eligible_nodes), int(len(eligible_nodes) * percentage)))
# 随机抽取节点进行检查
try:
nodes_to_check = random.sample(eligible_nodes, check_nodes_count)
except ValueError as e:
logger.error(f"[整合] 抽样节点时出错: {e}")
return
logger.info(f"[整合] 将检查 {len(nodes_to_check)} / {len(eligible_nodes)} 个符合条件的节点。")
merged_count = 0
nodes_modified = set()
current_timestamp = datetime.datetime.now().timestamp()
for node in nodes_to_check:
node_data = self.memory_graph.G.nodes[node]
memory_items = node_data.get("memory_items", [])
if not isinstance(memory_items, list) or len(memory_items) < 2:
continue # 双重检查,理论上不会进入
items_copy = list(memory_items) # 创建副本以安全迭代和修改
# 遍历所有记忆项组合
for item1, item2 in combinations(items_copy, 2):
# 确保 item1 和 item2 仍然存在于原始列表中(可能已被之前的合并移除)
if item1 not in memory_items or item2 not in memory_items:
continue
similarity = self._calculate_item_similarity(item1, item2)
if similarity >= similarity_threshold:
logger.debug(f"[整合] 节点 '{node}' 中发现相似项 (相似度: {similarity:.2f}):")
logger.debug(f" - '{item1}'")
logger.debug(f" - '{item2}'")
# 比较信息量
info1 = calculate_information_content(item1)
info2 = calculate_information_content(item2)
if info1 >= info2:
item_to_keep = item1
item_to_remove = item2
else:
item_to_keep = item2
item_to_remove = item1
# 从原始列表中移除信息量较低的项
try:
memory_items.remove(item_to_remove)
logger.info(
f"[整合] 已合并节点 '{node}' 中的记忆,保留: '{item_to_keep[:60]}...', 移除: '{item_to_remove[:60]}...'"
)
merged_count += 1
nodes_modified.add(node)
node_data["last_modified"] = current_timestamp # 更新修改时间
_merged_in_this_node = True
break # 每个节点每次检查只合并一对
except ValueError:
# 如果项已经被移除(例如,在之前的迭代中作为 item_to_keep则跳过
logger.warning(
f"[整合] 尝试移除节点 '{node}' 中不存在的项 '{item_to_remove[:30]}...',可能已被合并。"
)
continue
# # 如果节点内发生了合并,更新节点数据 (这种方式不安全,会丢失其他属性)
# if merged_in_this_node:
# self.memory_graph.G.nodes[node]["memory_items"] = memory_items
if merged_count > 0:
logger.info(f"[整合] 共合并了 {merged_count} 对相似记忆项,分布在 {len(nodes_modified)} 个节点中。")
sync_start = time.time()
logger.info("[整合] 开始将变更同步到数据库...")
# 使用 resync 更安全地处理删除和添加
await self.hippocampus.entorhinal_cortex.resync_memory_to_db()
sync_end = time.time()
logger.info(f"[整合] 数据库同步耗时: {sync_end - sync_start:.2f}")
else:
logger.info("[整合] 本次检查未发现需要合并的记忆项。")
end_time = time.time()
logger.info(f"[整合] 整合检查完成,总耗时: {end_time - start_time:.2f}")
@staticmethod
def _calculate_item_similarity(item1: str, item2: str) -> float:
"""计算两条记忆项文本的余弦相似度"""
words1 = set(jieba.cut(item1))
words2 = set(jieba.cut(item2))
all_words = words1 | words2
if not all_words:
return 0.0
v1 = [1 if word in words1 else 0 for word in all_words]
v2 = [1 if word in words2 else 0 for word in all_words]
return cosine_similarity(v1, v2)
class HippocampusManager:
@@ -1698,13 +1675,7 @@ class HippocampusManager:
raise RuntimeError("HippocampusManager 尚未初始化,请先调用 initialize 方法")
return await self._hippocampus.parahippocampal_gyrus.operation_forget_topic(percentage)
async def consolidate_memory(self):
"""整合记忆的公共接口"""
if not self._initialized:
raise RuntimeError("HippocampusManager 尚未初始化,请先调用 initialize 方法")
# 注意:目前 operation_consolidate_memory 内部直接读取配置percentage 参数暂时无效
# 如果需要外部控制比例,需要修改 operation_consolidate_memory
return await self._hippocampus.parahippocampal_gyrus.operation_consolidate_memory()
async def get_memory_from_text(
self,
@@ -1768,3 +1739,5 @@ class HippocampusManager:
# 创建全局实例
hippocampus_manager = HippocampusManager()