feat:更新记忆系统
This commit is contained in:
@@ -4,14 +4,14 @@ import math
|
||||
import random
|
||||
import time
|
||||
import re
|
||||
import json
|
||||
import jieba
|
||||
import networkx as nx
|
||||
import numpy as np
|
||||
|
||||
from itertools import combinations
|
||||
from typing import List, Tuple, Coroutine, Any, Set
|
||||
from typing import List, Tuple, Set, Coroutine, Any
|
||||
from collections import Counter
|
||||
from itertools import combinations
|
||||
|
||||
|
||||
from rich.traceback import install
|
||||
|
||||
from src.llm_models.utils_model import LLMRequest
|
||||
@@ -25,6 +25,15 @@ from src.chat.utils.chat_message_builder import (
|
||||
get_raw_msg_by_timestamp_with_chat,
|
||||
) # 导入 build_readable_messages
|
||||
from src.chat.utils.utils import translate_timestamp_to_human_readable
|
||||
# 添加cosine_similarity函数
|
||||
def cosine_similarity(v1, v2):
|
||||
"""计算余弦相似度"""
|
||||
dot_product = np.dot(v1, v2)
|
||||
norm1 = np.linalg.norm(v1)
|
||||
norm2 = np.linalg.norm(v2)
|
||||
if norm1 == 0 or norm2 == 0:
|
||||
return 0
|
||||
return dot_product / (norm1 * norm2)
|
||||
|
||||
|
||||
install(extra_lines=3)
|
||||
@@ -44,19 +53,18 @@ def calculate_information_content(text):
|
||||
return entropy
|
||||
|
||||
|
||||
def cosine_similarity(v1, v2): # sourcery skip: assign-if-exp, reintroduce-else
|
||||
"""计算余弦相似度"""
|
||||
dot_product = np.dot(v1, v2)
|
||||
norm1 = np.linalg.norm(v1)
|
||||
norm2 = np.linalg.norm(v2)
|
||||
if norm1 == 0 or norm2 == 0:
|
||||
return 0
|
||||
return dot_product / (norm1 * norm2)
|
||||
|
||||
|
||||
|
||||
logger = get_logger("memory")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class MemoryGraph:
|
||||
def __init__(self):
|
||||
self.G = nx.Graph() # 使用 networkx 的图结构
|
||||
@@ -83,26 +91,46 @@ class MemoryGraph:
|
||||
last_modified=current_time,
|
||||
) # 添加最后修改时间
|
||||
|
||||
def add_dot(self, concept, memory):
|
||||
async def add_dot(self, concept, memory, hippocampus_instance=None):
|
||||
current_time = datetime.datetime.now().timestamp()
|
||||
|
||||
if concept in self.G:
|
||||
if "memory_items" in self.G.nodes[concept]:
|
||||
if not isinstance(self.G.nodes[concept]["memory_items"], list):
|
||||
self.G.nodes[concept]["memory_items"] = [self.G.nodes[concept]["memory_items"]]
|
||||
self.G.nodes[concept]["memory_items"].append(memory)
|
||||
# 获取现有的记忆项(已经是str格式)
|
||||
existing_memory = self.G.nodes[concept]["memory_items"]
|
||||
|
||||
# 如果现有记忆不为空,则使用LLM整合新旧记忆
|
||||
if existing_memory and hippocampus_instance and hippocampus_instance.model_small:
|
||||
try:
|
||||
integrated_memory = await self._integrate_memories_with_llm(
|
||||
existing_memory, str(memory), hippocampus_instance.model_small
|
||||
)
|
||||
self.G.nodes[concept]["memory_items"] = integrated_memory
|
||||
# 整合成功,增加权重
|
||||
current_weight = self.G.nodes[concept].get("weight", 0.0)
|
||||
self.G.nodes[concept]["weight"] = current_weight + 1.0
|
||||
logger.debug(f"节点 {concept} 记忆整合成功,权重增加到 {current_weight + 1.0}")
|
||||
except Exception as e:
|
||||
logger.error(f"LLM整合记忆失败: {e}")
|
||||
# 降级到简单连接
|
||||
new_memory_str = f"{existing_memory} | {memory}"
|
||||
self.G.nodes[concept]["memory_items"] = new_memory_str
|
||||
else:
|
||||
new_memory_str = str(memory)
|
||||
self.G.nodes[concept]["memory_items"] = new_memory_str
|
||||
else:
|
||||
self.G.nodes[concept]["memory_items"] = [memory]
|
||||
self.G.nodes[concept]["memory_items"] = str(memory)
|
||||
# 如果节点存在但没有memory_items,说明是第一次添加memory,设置created_time
|
||||
if "created_time" not in self.G.nodes[concept]:
|
||||
self.G.nodes[concept]["created_time"] = current_time
|
||||
# 更新最后修改时间
|
||||
self.G.nodes[concept]["last_modified"] = current_time
|
||||
else:
|
||||
# 如果是新节点,创建新的记忆列表
|
||||
# 如果是新节点,创建新的记忆字符串
|
||||
self.G.add_node(
|
||||
concept,
|
||||
memory_items=[memory],
|
||||
memory_items=str(memory),
|
||||
weight=1.0, # 新节点初始权重为1.0
|
||||
created_time=current_time, # 添加创建时间
|
||||
last_modified=current_time,
|
||||
) # 添加最后修改时间
|
||||
@@ -127,9 +155,8 @@ class MemoryGraph:
|
||||
concept, data = node_data
|
||||
if "memory_items" in data:
|
||||
memory_items = data["memory_items"]
|
||||
if isinstance(memory_items, list):
|
||||
first_layer_items.extend(memory_items)
|
||||
else:
|
||||
# 直接使用完整的记忆内容
|
||||
if memory_items:
|
||||
first_layer_items.append(memory_items)
|
||||
|
||||
# 只在depth=2时获取第二层记忆
|
||||
@@ -140,12 +167,57 @@ class MemoryGraph:
|
||||
concept, data = node_data
|
||||
if "memory_items" in data:
|
||||
memory_items = data["memory_items"]
|
||||
if isinstance(memory_items, list):
|
||||
second_layer_items.extend(memory_items)
|
||||
else:
|
||||
# 直接使用完整的记忆内容
|
||||
if memory_items:
|
||||
second_layer_items.append(memory_items)
|
||||
|
||||
return first_layer_items, second_layer_items
|
||||
|
||||
async def _integrate_memories_with_llm(self, existing_memory: str, new_memory: str, llm_model: LLMRequest) -> str:
|
||||
"""
|
||||
使用LLM整合新旧记忆内容
|
||||
|
||||
Args:
|
||||
existing_memory: 现有的记忆内容(字符串格式,可能包含多条记忆)
|
||||
new_memory: 新的记忆内容
|
||||
llm_model: LLM模型实例
|
||||
|
||||
Returns:
|
||||
str: 整合后的记忆内容
|
||||
"""
|
||||
try:
|
||||
# 构建整合提示
|
||||
integration_prompt = f"""你是一个记忆整合专家。请将以下的旧记忆和新记忆整合成一条更完整、更准确的记忆内容。
|
||||
|
||||
旧记忆内容:
|
||||
{existing_memory}
|
||||
|
||||
新记忆内容:
|
||||
{new_memory}
|
||||
|
||||
整合要求:
|
||||
1. 保留重要信息,去除重复内容
|
||||
2. 如果新旧记忆有冲突,合理整合矛盾的地方
|
||||
3. 将相关信息合并,形成更完整的描述
|
||||
4. 保持语言简洁、准确
|
||||
5. 只返回整合后的记忆内容,不要添加任何解释
|
||||
|
||||
整合后的记忆:"""
|
||||
|
||||
# 调用LLM进行整合
|
||||
content, (reasoning_content, model_name, tool_calls) = await llm_model.generate_response_async(integration_prompt)
|
||||
|
||||
if content and content.strip():
|
||||
integrated_content = content.strip()
|
||||
logger.debug(f"LLM记忆整合成功,模型: {model_name}")
|
||||
return integrated_content
|
||||
else:
|
||||
logger.warning("LLM返回的整合结果为空,使用默认连接方式")
|
||||
return f"{existing_memory} | {new_memory}"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"LLM记忆整合过程中出错: {e}")
|
||||
return f"{existing_memory} | {new_memory}"
|
||||
|
||||
@property
|
||||
def dots(self):
|
||||
@@ -164,26 +236,19 @@ class MemoryGraph:
|
||||
if "memory_items" in node_data:
|
||||
memory_items = node_data["memory_items"]
|
||||
|
||||
# 确保memory_items是列表
|
||||
if not isinstance(memory_items, list):
|
||||
memory_items = [memory_items] if memory_items else []
|
||||
|
||||
# 如果有记忆项可以删除
|
||||
# 既然每个节点现在是一个完整的记忆内容,直接删除整个节点
|
||||
if memory_items:
|
||||
# 随机选择一个记忆项删除
|
||||
removed_item = random.choice(memory_items)
|
||||
memory_items.remove(removed_item)
|
||||
|
||||
# 更新节点的记忆项
|
||||
if memory_items:
|
||||
self.G.nodes[topic]["memory_items"] = memory_items
|
||||
else:
|
||||
# 如果没有记忆项了,删除整个节点
|
||||
self.G.remove_node(topic)
|
||||
|
||||
return removed_item
|
||||
|
||||
return None
|
||||
# 删除整个节点
|
||||
self.G.remove_node(topic)
|
||||
return f"删除了节点 {topic} 的完整记忆: {memory_items[:50]}..." if len(memory_items) > 50 else f"删除了节点 {topic} 的完整记忆: {memory_items}"
|
||||
else:
|
||||
# 如果没有记忆项,删除该节点
|
||||
self.G.remove_node(topic)
|
||||
return None
|
||||
else:
|
||||
# 如果没有memory_items字段,删除该节点
|
||||
self.G.remove_node(topic)
|
||||
return None
|
||||
|
||||
|
||||
# 海马体
|
||||
@@ -205,15 +270,46 @@ class Hippocampus:
|
||||
def get_all_node_names(self) -> list:
|
||||
"""获取记忆图中所有节点的名字列表"""
|
||||
return list(self.memory_graph.G.nodes())
|
||||
|
||||
def calculate_weighted_activation(self, current_activation: float, edge_strength: int, target_node: str) -> float:
|
||||
"""
|
||||
计算考虑节点权重的激活值
|
||||
|
||||
Args:
|
||||
current_activation: 当前激活值
|
||||
edge_strength: 边的强度
|
||||
target_node: 目标节点名称
|
||||
|
||||
Returns:
|
||||
float: 计算后的激活值
|
||||
"""
|
||||
# 基础激活值计算
|
||||
base_activation = current_activation - (1 / edge_strength)
|
||||
|
||||
if base_activation <= 0:
|
||||
return 0.0
|
||||
|
||||
# 获取目标节点的权重
|
||||
if target_node in self.memory_graph.G:
|
||||
node_data = self.memory_graph.G.nodes[target_node]
|
||||
node_weight = node_data.get("weight", 1.0)
|
||||
|
||||
# 权重加成:每次整合增加10%激活值,最大加成200%
|
||||
weight_multiplier = 1.0 + min((node_weight - 1.0) * 0.1, 2.0)
|
||||
|
||||
return base_activation * weight_multiplier
|
||||
else:
|
||||
return base_activation
|
||||
|
||||
@staticmethod
|
||||
def calculate_node_hash(concept, memory_items) -> int:
|
||||
"""计算节点的特征值"""
|
||||
if not isinstance(memory_items, list):
|
||||
memory_items = [memory_items] if memory_items else []
|
||||
# memory_items已经是str格式,直接按分隔符分割
|
||||
if memory_items:
|
||||
unique_items = {item.strip() for item in memory_items.split(" | ") if item.strip()}
|
||||
else:
|
||||
unique_items = set()
|
||||
|
||||
# 使用集合来去重,避免排序
|
||||
unique_items = {str(item) for item in memory_items}
|
||||
# 使用frozenset来保证顺序一致性
|
||||
content = f"{concept}:{frozenset(unique_items)}"
|
||||
return hash(content)
|
||||
@@ -234,7 +330,7 @@ class Hippocampus:
|
||||
topic_num_str = topic_num
|
||||
|
||||
prompt = (
|
||||
f"这是一段文字:\n{text}\n\n请你从这段话中总结出最多{topic_num_str}个关键的概念,可以是名词,动词,或者特定人物,帮我列出来,"
|
||||
f"这是一段文字:\n{text}\n\n请你从这段话中总结出最多{topic_num_str}个关键的概念,必须是某种概念,比如人,事,物,概念,事件,地点 等等,帮我列出来,"
|
||||
f"将主题用逗号隔开,并加上<>,例如<主题1>,<主题2>......尽可能精简。只需要列举最多{topic_num}个话题就好,不要有序号,不要告诉我其他内容。"
|
||||
f"如果确定找不出主题或者没有明显主题,返回<none>。"
|
||||
)
|
||||
@@ -245,8 +341,8 @@ class Hippocampus:
|
||||
# sourcery skip: inline-immediately-returned-variable
|
||||
# 不再需要 time_info 参数
|
||||
prompt = (
|
||||
f'这是一段文字:\n{text}\n\n我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,'
|
||||
f"要求包含对这个概念的定义,内容,知识,但是这些信息必须来自这段文字,不能添加信息。\n,请包含时间和人物。只输出这句话就好"
|
||||
f'这是一段文字:\n{text}\n\n我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成几句自然的话,'
|
||||
f"要求包含对这个概念的定义,内容,知识,时间和人物,这些信息必须来自这段文字,不能添加信息。\n只输出几句自然的话就好"
|
||||
)
|
||||
return prompt
|
||||
|
||||
@@ -271,9 +367,9 @@ class Hippocampus:
|
||||
max_depth (int, optional): 记忆检索深度,默认为2。1表示只获取直接相关的记忆,2表示获取间接相关的记忆。
|
||||
|
||||
Returns:
|
||||
list: 记忆列表,每个元素是一个元组 (topic, memory_items, similarity)
|
||||
list: 记忆列表,每个元素是一个元组 (topic, memory_content, similarity)
|
||||
- topic: str, 记忆主题
|
||||
- memory_items: list, 该主题下的记忆项列表
|
||||
- memory_content: str, 该主题下的完整记忆内容
|
||||
- similarity: float, 与关键词的相似度
|
||||
"""
|
||||
if not keyword:
|
||||
@@ -297,11 +393,10 @@ class Hippocampus:
|
||||
# 如果相似度超过阈值,获取该节点的记忆
|
||||
if similarity >= 0.3: # 可以调整这个阈值
|
||||
node_data = self.memory_graph.G.nodes[node]
|
||||
memory_items = node_data.get("memory_items", [])
|
||||
if not isinstance(memory_items, list):
|
||||
memory_items = [memory_items] if memory_items else []
|
||||
|
||||
memories.append((node, memory_items, similarity))
|
||||
memory_items = node_data.get("memory_items", "")
|
||||
# 直接使用完整的记忆内容
|
||||
if memory_items:
|
||||
memories.append((node, memory_items, similarity))
|
||||
|
||||
# 按相似度降序排序
|
||||
memories.sort(key=lambda x: x[2], reverse=True)
|
||||
@@ -378,10 +473,9 @@ class Hippocampus:
|
||||
如果为False,使用LLM提取关键词,速度较慢但更准确。
|
||||
|
||||
Returns:
|
||||
list: 记忆列表,每个元素是一个元组 (topic, memory_items, similarity)
|
||||
list: 记忆列表,每个元素是一个元组 (topic, memory_content)
|
||||
- topic: str, 记忆主题
|
||||
- memory_items: list, 该主题下的记忆项列表
|
||||
- similarity: float, 与文本的相似度
|
||||
- memory_content: str, 该主题下的完整记忆内容
|
||||
"""
|
||||
keywords = await self.get_keywords_from_text(text)
|
||||
|
||||
@@ -478,31 +572,22 @@ class Hippocampus:
|
||||
for node, activation in remember_map.items():
|
||||
logger.debug(f"处理节点 '{node}' (激活值: {activation:.2f}):")
|
||||
node_data = self.memory_graph.G.nodes[node]
|
||||
memory_items = node_data.get("memory_items", [])
|
||||
if not isinstance(memory_items, list):
|
||||
memory_items = [memory_items] if memory_items else []
|
||||
|
||||
memory_items = node_data.get("memory_items", "")
|
||||
# 直接使用完整的记忆内容
|
||||
if memory_items:
|
||||
logger.debug(f"节点包含 {len(memory_items)} 条记忆")
|
||||
# 计算每条记忆与输入文本的相似度
|
||||
memory_similarities = []
|
||||
for memory in memory_items:
|
||||
# 计算与输入文本的相似度
|
||||
memory_words = set(jieba.cut(memory))
|
||||
text_words = set(jieba.cut(text))
|
||||
all_words = memory_words | text_words
|
||||
logger.debug("节点包含完整记忆")
|
||||
# 计算记忆与输入文本的相似度
|
||||
memory_words = set(jieba.cut(memory_items))
|
||||
text_words = set(jieba.cut(text))
|
||||
all_words = memory_words | text_words
|
||||
if all_words:
|
||||
# 计算相似度(虽然这里没有使用,但保持逻辑一致性)
|
||||
v1 = [1 if word in memory_words else 0 for word in all_words]
|
||||
v2 = [1 if word in text_words else 0 for word in all_words]
|
||||
similarity = cosine_similarity(v1, v2)
|
||||
memory_similarities.append((memory, similarity))
|
||||
|
||||
# 按相似度排序
|
||||
memory_similarities.sort(key=lambda x: x[1], reverse=True)
|
||||
# 获取最匹配的记忆
|
||||
top_memories = memory_similarities[:max_memory_length]
|
||||
|
||||
# 添加到结果中
|
||||
all_memories.extend((node, [memory], similarity) for memory, similarity in top_memories)
|
||||
_ = cosine_similarity(v1, v2) # 计算但不使用,用_表示
|
||||
|
||||
# 添加完整记忆到结果中
|
||||
all_memories.append((node, memory_items, activation))
|
||||
else:
|
||||
logger.info("节点没有记忆")
|
||||
|
||||
@@ -511,7 +596,8 @@ class Hippocampus:
|
||||
seen_memories = set()
|
||||
unique_memories = []
|
||||
for topic, memory_items, activation_value in all_memories:
|
||||
memory = memory_items[0] # 因为每个topic只有一条记忆
|
||||
# memory_items现在是完整的字符串格式
|
||||
memory = memory_items if memory_items else ""
|
||||
if memory not in seen_memories:
|
||||
seen_memories.add(memory)
|
||||
unique_memories.append((topic, memory_items, activation_value))
|
||||
@@ -522,7 +608,8 @@ class Hippocampus:
|
||||
# 转换为(关键词, 记忆)格式
|
||||
result = []
|
||||
for topic, memory_items, _ in unique_memories:
|
||||
memory = memory_items[0] # 因为每个topic只有一条记忆
|
||||
# memory_items现在是完整的字符串格式
|
||||
memory = memory_items if memory_items else ""
|
||||
result.append((topic, memory))
|
||||
logger.debug(f"选中记忆: {memory} (来自节点: {topic})")
|
||||
|
||||
@@ -544,10 +631,9 @@ class Hippocampus:
|
||||
max_depth (int, optional): 记忆检索深度。默认为3。值越大,检索范围越广,可以获取更多间接相关的记忆,但速度会变慢。
|
||||
|
||||
Returns:
|
||||
list: 记忆列表,每个元素是一个元组 (topic, memory_items, similarity)
|
||||
list: 记忆列表,每个元素是一个元组 (topic, memory_content)
|
||||
- topic: str, 记忆主题
|
||||
- memory_items: list, 该主题下的记忆项列表
|
||||
- similarity: float, 与文本的相似度
|
||||
- memory_content: str, 该主题下的完整记忆内容
|
||||
"""
|
||||
if not keywords:
|
||||
return []
|
||||
@@ -642,31 +728,22 @@ class Hippocampus:
|
||||
for node, activation in remember_map.items():
|
||||
logger.debug(f"处理节点 '{node}' (激活值: {activation:.2f}):")
|
||||
node_data = self.memory_graph.G.nodes[node]
|
||||
memory_items = node_data.get("memory_items", [])
|
||||
if not isinstance(memory_items, list):
|
||||
memory_items = [memory_items] if memory_items else []
|
||||
|
||||
memory_items = node_data.get("memory_items", "")
|
||||
# 直接使用完整的记忆内容
|
||||
if memory_items:
|
||||
logger.debug(f"节点包含 {len(memory_items)} 条记忆")
|
||||
# 计算每条记忆与输入文本的相似度
|
||||
memory_similarities = []
|
||||
for memory in memory_items:
|
||||
# 计算与输入文本的相似度
|
||||
memory_words = set(jieba.cut(memory))
|
||||
text_words = set(keywords)
|
||||
all_words = memory_words | text_words
|
||||
logger.debug("节点包含完整记忆")
|
||||
# 计算记忆与关键词的相似度
|
||||
memory_words = set(jieba.cut(memory_items))
|
||||
text_words = set(keywords)
|
||||
all_words = memory_words | text_words
|
||||
if all_words:
|
||||
# 计算相似度(虽然这里没有使用,但保持逻辑一致性)
|
||||
v1 = [1 if word in memory_words else 0 for word in all_words]
|
||||
v2 = [1 if word in text_words else 0 for word in all_words]
|
||||
similarity = cosine_similarity(v1, v2)
|
||||
memory_similarities.append((memory, similarity))
|
||||
|
||||
# 按相似度排序
|
||||
memory_similarities.sort(key=lambda x: x[1], reverse=True)
|
||||
# 获取最匹配的记忆
|
||||
top_memories = memory_similarities[:max_memory_length]
|
||||
|
||||
# 添加到结果中
|
||||
all_memories.extend((node, [memory], similarity) for memory, similarity in top_memories)
|
||||
_ = cosine_similarity(v1, v2) # 计算但不使用,用_表示
|
||||
|
||||
# 添加完整记忆到结果中
|
||||
all_memories.append((node, memory_items, activation))
|
||||
else:
|
||||
logger.info("节点没有记忆")
|
||||
|
||||
@@ -675,7 +752,8 @@ class Hippocampus:
|
||||
seen_memories = set()
|
||||
unique_memories = []
|
||||
for topic, memory_items, activation_value in all_memories:
|
||||
memory = memory_items[0] # 因为每个topic只有一条记忆
|
||||
# memory_items现在是完整的字符串格式
|
||||
memory = memory_items if memory_items else ""
|
||||
if memory not in seen_memories:
|
||||
seen_memories.add(memory)
|
||||
unique_memories.append((topic, memory_items, activation_value))
|
||||
@@ -686,7 +764,8 @@ class Hippocampus:
|
||||
# 转换为(关键词, 记忆)格式
|
||||
result = []
|
||||
for topic, memory_items, _ in unique_memories:
|
||||
memory = memory_items[0] # 因为每个topic只有一条记忆
|
||||
# memory_items现在是完整的字符串格式
|
||||
memory = memory_items if memory_items else ""
|
||||
result.append((topic, memory))
|
||||
logger.debug(f"选中记忆: {memory} (来自节点: {topic})")
|
||||
|
||||
@@ -894,11 +973,10 @@ class EntorhinalCortex:
|
||||
self.memory_graph.G.remove_node(concept)
|
||||
continue
|
||||
|
||||
memory_items = data.get("memory_items", [])
|
||||
if not isinstance(memory_items, list):
|
||||
memory_items = [memory_items] if memory_items else []
|
||||
|
||||
if not memory_items:
|
||||
memory_items = data.get("memory_items", "")
|
||||
|
||||
# 直接检查字符串是否为空,不需要分割成列表
|
||||
if not memory_items or memory_items.strip() == "":
|
||||
self.memory_graph.G.remove_node(concept)
|
||||
continue
|
||||
|
||||
@@ -907,21 +985,19 @@ class EntorhinalCortex:
|
||||
created_time = data.get("created_time", current_time)
|
||||
last_modified = data.get("last_modified", current_time)
|
||||
|
||||
# 将memory_items转换为JSON字符串
|
||||
try:
|
||||
memory_items = [str(item) for item in memory_items]
|
||||
memory_items_json = json.dumps(memory_items, ensure_ascii=False)
|
||||
if not memory_items_json:
|
||||
continue
|
||||
except Exception:
|
||||
self.memory_graph.G.remove_node(concept)
|
||||
# memory_items直接作为字符串存储,不需要JSON序列化
|
||||
if not memory_items:
|
||||
continue
|
||||
|
||||
# 获取权重属性
|
||||
weight = data.get("weight", 1.0)
|
||||
|
||||
if concept not in db_nodes:
|
||||
nodes_to_create.append(
|
||||
{
|
||||
"concept": concept,
|
||||
"memory_items": memory_items_json,
|
||||
"memory_items": memory_items,
|
||||
"weight": weight,
|
||||
"hash": memory_hash,
|
||||
"created_time": created_time,
|
||||
"last_modified": last_modified,
|
||||
@@ -933,7 +1009,8 @@ class EntorhinalCortex:
|
||||
nodes_to_update.append(
|
||||
{
|
||||
"concept": concept,
|
||||
"memory_items": memory_items_json,
|
||||
"memory_items": memory_items,
|
||||
"weight": weight,
|
||||
"hash": memory_hash,
|
||||
"last_modified": last_modified,
|
||||
}
|
||||
@@ -1031,8 +1108,8 @@ class EntorhinalCortex:
|
||||
GraphEdges.delete().where((GraphEdges.source == source) & (GraphEdges.target == target)).execute()
|
||||
|
||||
end_time = time.time()
|
||||
logger.info(f"[同步] 总耗时: {end_time - start_time:.2f}秒")
|
||||
logger.info(f"[同步] 同步了 {len(memory_nodes)} 个节点和 {len(memory_edges)} 条边")
|
||||
logger.info(f"[数据库] 同步完成,总耗时: {end_time - start_time:.2f}秒")
|
||||
logger.info(f"[数据库] 同步了 {len(nodes_to_create) + len(nodes_to_update)} 个节点和 {len(edges_to_create) + len(edges_to_update)} 条边")
|
||||
|
||||
async def resync_memory_to_db(self):
|
||||
"""清空数据库并重新同步所有记忆数据"""
|
||||
@@ -1054,27 +1131,43 @@ class EntorhinalCortex:
|
||||
# 批量准备节点数据
|
||||
nodes_data = []
|
||||
for concept, data in memory_nodes:
|
||||
memory_items = data.get("memory_items", [])
|
||||
if not isinstance(memory_items, list):
|
||||
memory_items = [memory_items] if memory_items else []
|
||||
|
||||
try:
|
||||
memory_items = [str(item) for item in memory_items]
|
||||
if memory_items_json := json.dumps(memory_items, ensure_ascii=False):
|
||||
nodes_data.append(
|
||||
{
|
||||
"concept": concept,
|
||||
"memory_items": memory_items_json,
|
||||
"hash": self.hippocampus.calculate_node_hash(concept, memory_items),
|
||||
"created_time": data.get("created_time", current_time),
|
||||
"last_modified": data.get("last_modified", current_time),
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"准备节点 {concept} 数据时发生错误: {e}")
|
||||
memory_items = data.get("memory_items", "")
|
||||
|
||||
# 直接检查字符串是否为空,不需要分割成列表
|
||||
if not memory_items or memory_items.strip() == "":
|
||||
self.memory_graph.G.remove_node(concept)
|
||||
continue
|
||||
|
||||
# 计算内存中节点的特征值
|
||||
memory_hash = self.hippocampus.calculate_node_hash(concept, memory_items)
|
||||
created_time = data.get("created_time", current_time)
|
||||
last_modified = data.get("last_modified", current_time)
|
||||
|
||||
# memory_items直接作为字符串存储,不需要JSON序列化
|
||||
if not memory_items:
|
||||
continue
|
||||
|
||||
# 获取权重属性
|
||||
weight = data.get("weight", 1.0)
|
||||
|
||||
nodes_data.append(
|
||||
{
|
||||
"concept": concept,
|
||||
"memory_items": memory_items,
|
||||
"weight": weight,
|
||||
"hash": memory_hash,
|
||||
"created_time": created_time,
|
||||
"last_modified": last_modified,
|
||||
}
|
||||
)
|
||||
|
||||
# 批量插入节点
|
||||
if nodes_data:
|
||||
batch_size = 100
|
||||
for i in range(0, len(nodes_data), batch_size):
|
||||
batch = nodes_data[i : i + batch_size]
|
||||
GraphNodes.insert_many(batch).execute()
|
||||
|
||||
# 批量准备边数据
|
||||
edges_data = []
|
||||
for source, target, data in memory_edges:
|
||||
@@ -1093,27 +1186,12 @@ class EntorhinalCortex:
|
||||
logger.error(f"准备边 {source}-{target} 数据时发生错误: {e}")
|
||||
continue
|
||||
|
||||
# 使用事务批量写入节点
|
||||
node_start = time.time()
|
||||
if nodes_data:
|
||||
batch_size = 500 # 增加批量大小
|
||||
with GraphNodes._meta.database.atomic(): # type: ignore
|
||||
for i in range(0, len(nodes_data), batch_size):
|
||||
batch = nodes_data[i : i + batch_size]
|
||||
GraphNodes.insert_many(batch).execute()
|
||||
node_end = time.time()
|
||||
logger.info(f"[数据库] 写入 {len(nodes_data)} 个节点耗时: {node_end - node_start:.2f}秒")
|
||||
|
||||
# 使用事务批量写入边
|
||||
edge_start = time.time()
|
||||
# 批量插入边
|
||||
if edges_data:
|
||||
batch_size = 500 # 增加批量大小
|
||||
with GraphEdges._meta.database.atomic(): # type: ignore
|
||||
for i in range(0, len(edges_data), batch_size):
|
||||
batch = edges_data[i : i + batch_size]
|
||||
GraphEdges.insert_many(batch).execute()
|
||||
edge_end = time.time()
|
||||
logger.info(f"[数据库] 写入 {len(edges_data)} 条边耗时: {edge_end - edge_start:.2f}秒")
|
||||
batch_size = 100
|
||||
for i in range(0, len(edges_data), batch_size):
|
||||
batch = edges_data[i : i + batch_size]
|
||||
GraphEdges.insert_many(batch).execute()
|
||||
|
||||
end_time = time.time()
|
||||
logger.info(f"[数据库] 重新同步完成,总耗时: {end_time - start_time:.2f}秒")
|
||||
@@ -1126,19 +1204,30 @@ class EntorhinalCortex:
|
||||
|
||||
# 清空当前图
|
||||
self.memory_graph.G.clear()
|
||||
|
||||
# 统计加载情况
|
||||
total_nodes = 0
|
||||
loaded_nodes = 0
|
||||
skipped_nodes = 0
|
||||
|
||||
# 从数据库加载所有节点
|
||||
nodes = list(GraphNodes.select())
|
||||
total_nodes = len(nodes)
|
||||
|
||||
for node in nodes:
|
||||
concept = node.concept
|
||||
try:
|
||||
memory_items = json.loads(node.memory_items)
|
||||
if not isinstance(memory_items, list):
|
||||
memory_items = [memory_items] if memory_items else []
|
||||
# 处理空字符串或None的情况
|
||||
if not node.memory_items or node.memory_items.strip() == "":
|
||||
logger.warning(f"节点 {concept} 的memory_items为空,跳过")
|
||||
skipped_nodes += 1
|
||||
continue
|
||||
|
||||
# 直接使用memory_items
|
||||
memory_items = node.memory_items.strip()
|
||||
|
||||
# 检查时间字段是否存在
|
||||
if not node.created_time or not node.last_modified:
|
||||
need_update = True
|
||||
# 更新数据库中的节点
|
||||
update_data = {}
|
||||
if not node.created_time:
|
||||
@@ -1146,18 +1235,24 @@ class EntorhinalCortex:
|
||||
if not node.last_modified:
|
||||
update_data["last_modified"] = current_time
|
||||
|
||||
GraphNodes.update(**update_data).where(GraphNodes.concept == concept).execute()
|
||||
if update_data:
|
||||
GraphNodes.update(**update_data).where(GraphNodes.concept == concept).execute()
|
||||
|
||||
# 获取时间信息(如果不存在则使用当前时间)
|
||||
created_time = node.created_time or current_time
|
||||
last_modified = node.last_modified or current_time
|
||||
|
||||
# 获取权重属性
|
||||
weight = node.weight if hasattr(node, 'weight') and node.weight is not None else 1.0
|
||||
|
||||
# 添加节点到图中
|
||||
self.memory_graph.G.add_node(
|
||||
concept, memory_items=memory_items, created_time=created_time, last_modified=last_modified
|
||||
concept, memory_items=memory_items, weight=weight, created_time=created_time, last_modified=last_modified
|
||||
)
|
||||
loaded_nodes += 1
|
||||
except Exception as e:
|
||||
logger.error(f"加载节点 {concept} 时发生错误: {e}")
|
||||
skipped_nodes += 1
|
||||
continue
|
||||
|
||||
# 从数据库加载所有边
|
||||
@@ -1193,6 +1288,9 @@ class EntorhinalCortex:
|
||||
|
||||
if need_update:
|
||||
logger.info("[数据库] 已为缺失的时间字段进行补充")
|
||||
|
||||
# 输出加载统计信息
|
||||
logger.info(f"[数据库] 记忆加载完成: 总计 {total_nodes} 个节点, 成功加载 {loaded_nodes} 个, 跳过 {skipped_nodes} 个")
|
||||
|
||||
|
||||
# 负责整合,遗忘,合并记忆
|
||||
@@ -1338,7 +1436,7 @@ class ParahippocampalGyrus:
|
||||
all_added_nodes.extend(topic for topic, _ in compressed_memory)
|
||||
|
||||
for topic, memory in compressed_memory:
|
||||
self.memory_graph.add_dot(topic, memory)
|
||||
await self.memory_graph.add_dot(topic, memory, self.hippocampus)
|
||||
all_topics.append(topic)
|
||||
|
||||
if topic in similar_topics_dict:
|
||||
@@ -1458,12 +1556,9 @@ class ParahippocampalGyrus:
|
||||
node_data = self.memory_graph.G.nodes[node]
|
||||
|
||||
# 首先获取记忆项
|
||||
memory_items = node_data.get("memory_items", [])
|
||||
if not isinstance(memory_items, list):
|
||||
memory_items = [memory_items] if memory_items else []
|
||||
|
||||
# 新增:检查节点是否为空
|
||||
if not memory_items:
|
||||
memory_items = node_data.get("memory_items", "")
|
||||
# 直接检查记忆内容是否为空
|
||||
if not memory_items or memory_items.strip() == "":
|
||||
try:
|
||||
self.memory_graph.G.remove_node(node)
|
||||
node_changes["removed"].append(f"{node}(空节点)") # 标记为空节点移除
|
||||
@@ -1474,31 +1569,24 @@ class ParahippocampalGyrus:
|
||||
|
||||
# --- 如果节点不为空,则执行原来的不活跃检查和随机移除逻辑 ---
|
||||
last_modified = node_data.get("last_modified", current_time)
|
||||
# 条件1:检查是否长时间未修改 (超过24小时)
|
||||
if current_time - last_modified > 3600 * 24 and memory_items:
|
||||
current_count = len(memory_items)
|
||||
# 如果列表非空,才进行随机选择
|
||||
if current_count > 0:
|
||||
removed_item = random.choice(memory_items)
|
||||
try:
|
||||
memory_items.remove(removed_item)
|
||||
|
||||
# 条件3:检查移除后 memory_items 是否变空
|
||||
if memory_items: # 如果移除后列表不为空
|
||||
# self.memory_graph.G.nodes[node]["memory_items"] = memory_items # 直接修改列表即可
|
||||
self.memory_graph.G.nodes[node]["last_modified"] = current_time # 更新修改时间
|
||||
node_changes["reduced"].append(f"{node} (数量: {current_count} -> {len(memory_items)})")
|
||||
else: # 如果移除后列表为空
|
||||
# 尝试移除节点,处理可能的错误
|
||||
try:
|
||||
self.memory_graph.G.remove_node(node)
|
||||
node_changes["removed"].append(f"{node}(遗忘清空)") # 标记为遗忘清空
|
||||
logger.debug(f"[遗忘] 节点 {node} 因移除最后一项而被清空。")
|
||||
except nx.NetworkXError as e:
|
||||
logger.warning(f"[遗忘] 尝试移除节点 {node} 时发生错误(可能已被移除):{e}")
|
||||
except ValueError:
|
||||
# 这个错误理论上不应发生,因为 removed_item 来自 memory_items
|
||||
logger.warning(f"[遗忘] 尝试从节点 '{node}' 移除不存在的项目 '{removed_item[:30]}...'")
|
||||
node_weight = node_data.get("weight", 1.0)
|
||||
|
||||
# 条件1:检查是否长时间未修改 (使用配置的遗忘时间)
|
||||
time_threshold = 3600 * global_config.memory.memory_forget_time
|
||||
|
||||
# 基于权重调整遗忘阈值:权重越高,需要更长时间才能被遗忘
|
||||
# 权重为1时使用默认阈值,权重越高阈值越大(越难遗忘)
|
||||
adjusted_threshold = time_threshold * node_weight
|
||||
|
||||
if current_time - last_modified > adjusted_threshold and memory_items:
|
||||
# 既然每个节点现在是完整记忆,直接删除整个节点
|
||||
try:
|
||||
self.memory_graph.G.remove_node(node)
|
||||
node_changes["removed"].append(f"{node}(长时间未修改,权重{node_weight:.1f})")
|
||||
logger.debug(f"[遗忘] 移除了长时间未修改的节点: {node} (权重: {node_weight:.1f})")
|
||||
except nx.NetworkXError as e:
|
||||
logger.warning(f"[遗忘] 移除节点 {node} 时发生错误(可能已被移除): {e}")
|
||||
continue
|
||||
node_check_end = time.time()
|
||||
logger.info(f"[遗忘] 节点检查耗时: {node_check_end - node_check_start:.2f}秒")
|
||||
|
||||
@@ -1537,118 +1625,7 @@ class ParahippocampalGyrus:
|
||||
end_time = time.time()
|
||||
logger.info(f"[遗忘] 总耗时: {end_time - start_time:.2f}秒")
|
||||
|
||||
async def operation_consolidate_memory(self):
|
||||
"""整合记忆:合并节点内相似的记忆项"""
|
||||
start_time = time.time()
|
||||
percentage = global_config.memory.consolidate_memory_percentage
|
||||
similarity_threshold = global_config.memory.consolidation_similarity_threshold
|
||||
logger.info(f"[整合] 开始检查记忆节点... 检查比例: {percentage:.2%}, 合并阈值: {similarity_threshold}")
|
||||
|
||||
# 获取所有至少有2条记忆项的节点
|
||||
eligible_nodes = []
|
||||
for node, data in self.memory_graph.G.nodes(data=True):
|
||||
memory_items = data.get("memory_items", [])
|
||||
if isinstance(memory_items, list) and len(memory_items) >= 2:
|
||||
eligible_nodes.append(node)
|
||||
|
||||
if not eligible_nodes:
|
||||
logger.info("[整合] 没有找到包含多个记忆项的节点,无需整合。")
|
||||
return
|
||||
|
||||
# 计算需要检查的节点数量
|
||||
check_nodes_count = max(1, min(len(eligible_nodes), int(len(eligible_nodes) * percentage)))
|
||||
|
||||
# 随机抽取节点进行检查
|
||||
try:
|
||||
nodes_to_check = random.sample(eligible_nodes, check_nodes_count)
|
||||
except ValueError as e:
|
||||
logger.error(f"[整合] 抽样节点时出错: {e}")
|
||||
return
|
||||
|
||||
logger.info(f"[整合] 将检查 {len(nodes_to_check)} / {len(eligible_nodes)} 个符合条件的节点。")
|
||||
|
||||
merged_count = 0
|
||||
nodes_modified = set()
|
||||
current_timestamp = datetime.datetime.now().timestamp()
|
||||
|
||||
for node in nodes_to_check:
|
||||
node_data = self.memory_graph.G.nodes[node]
|
||||
memory_items = node_data.get("memory_items", [])
|
||||
if not isinstance(memory_items, list) or len(memory_items) < 2:
|
||||
continue # 双重检查,理论上不会进入
|
||||
|
||||
items_copy = list(memory_items) # 创建副本以安全迭代和修改
|
||||
|
||||
# 遍历所有记忆项组合
|
||||
for item1, item2 in combinations(items_copy, 2):
|
||||
# 确保 item1 和 item2 仍然存在于原始列表中(可能已被之前的合并移除)
|
||||
if item1 not in memory_items or item2 not in memory_items:
|
||||
continue
|
||||
|
||||
similarity = self._calculate_item_similarity(item1, item2)
|
||||
|
||||
if similarity >= similarity_threshold:
|
||||
logger.debug(f"[整合] 节点 '{node}' 中发现相似项 (相似度: {similarity:.2f}):")
|
||||
logger.debug(f" - '{item1}'")
|
||||
logger.debug(f" - '{item2}'")
|
||||
|
||||
# 比较信息量
|
||||
info1 = calculate_information_content(item1)
|
||||
info2 = calculate_information_content(item2)
|
||||
|
||||
if info1 >= info2:
|
||||
item_to_keep = item1
|
||||
item_to_remove = item2
|
||||
else:
|
||||
item_to_keep = item2
|
||||
item_to_remove = item1
|
||||
|
||||
# 从原始列表中移除信息量较低的项
|
||||
try:
|
||||
memory_items.remove(item_to_remove)
|
||||
logger.info(
|
||||
f"[整合] 已合并节点 '{node}' 中的记忆,保留: '{item_to_keep[:60]}...', 移除: '{item_to_remove[:60]}...'"
|
||||
)
|
||||
merged_count += 1
|
||||
nodes_modified.add(node)
|
||||
node_data["last_modified"] = current_timestamp # 更新修改时间
|
||||
_merged_in_this_node = True
|
||||
break # 每个节点每次检查只合并一对
|
||||
except ValueError:
|
||||
# 如果项已经被移除(例如,在之前的迭代中作为 item_to_keep),则跳过
|
||||
logger.warning(
|
||||
f"[整合] 尝试移除节点 '{node}' 中不存在的项 '{item_to_remove[:30]}...',可能已被合并。"
|
||||
)
|
||||
continue
|
||||
# # 如果节点内发生了合并,更新节点数据 (这种方式不安全,会丢失其他属性)
|
||||
# if merged_in_this_node:
|
||||
# self.memory_graph.G.nodes[node]["memory_items"] = memory_items
|
||||
|
||||
if merged_count > 0:
|
||||
logger.info(f"[整合] 共合并了 {merged_count} 对相似记忆项,分布在 {len(nodes_modified)} 个节点中。")
|
||||
sync_start = time.time()
|
||||
logger.info("[整合] 开始将变更同步到数据库...")
|
||||
# 使用 resync 更安全地处理删除和添加
|
||||
await self.hippocampus.entorhinal_cortex.resync_memory_to_db()
|
||||
sync_end = time.time()
|
||||
logger.info(f"[整合] 数据库同步耗时: {sync_end - sync_start:.2f}秒")
|
||||
else:
|
||||
logger.info("[整合] 本次检查未发现需要合并的记忆项。")
|
||||
|
||||
end_time = time.time()
|
||||
logger.info(f"[整合] 整合检查完成,总耗时: {end_time - start_time:.2f}秒")
|
||||
|
||||
@staticmethod
|
||||
def _calculate_item_similarity(item1: str, item2: str) -> float:
|
||||
"""计算两条记忆项文本的余弦相似度"""
|
||||
words1 = set(jieba.cut(item1))
|
||||
words2 = set(jieba.cut(item2))
|
||||
all_words = words1 | words2
|
||||
if not all_words:
|
||||
return 0.0
|
||||
v1 = [1 if word in words1 else 0 for word in all_words]
|
||||
v2 = [1 if word in words2 else 0 for word in all_words]
|
||||
return cosine_similarity(v1, v2)
|
||||
|
||||
|
||||
class HippocampusManager:
|
||||
@@ -1698,13 +1675,7 @@ class HippocampusManager:
|
||||
raise RuntimeError("HippocampusManager 尚未初始化,请先调用 initialize 方法")
|
||||
return await self._hippocampus.parahippocampal_gyrus.operation_forget_topic(percentage)
|
||||
|
||||
async def consolidate_memory(self):
|
||||
"""整合记忆的公共接口"""
|
||||
if not self._initialized:
|
||||
raise RuntimeError("HippocampusManager 尚未初始化,请先调用 initialize 方法")
|
||||
# 注意:目前 operation_consolidate_memory 内部直接读取配置,percentage 参数暂时无效
|
||||
# 如果需要外部控制比例,需要修改 operation_consolidate_memory
|
||||
return await self._hippocampus.parahippocampal_gyrus.operation_consolidate_memory()
|
||||
|
||||
|
||||
async def get_memory_from_text(
|
||||
self,
|
||||
@@ -1768,3 +1739,5 @@ class HippocampusManager:
|
||||
|
||||
# 创建全局实例
|
||||
hippocampus_manager = HippocampusManager()
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user