feat：更新记忆系统

2025-08-13 23:17:28 +08:00
parent 3962fc601f
commit fed0c0fd04
10 changed files with 732 additions and 406 deletions
--- a/src/chat/memory_system/Hippocampus.py
+++ b/src/chat/memory_system/Hippocampus.py
@@ -4,14 +4,14 @@ import math
 import random
 import time
 import re
-import json
 import jieba
 import networkx as nx
 import numpy as np
-
-from itertools import combinations
-from typing import List, Tuple, Coroutine, Any, Set
+from typing import List, Tuple, Set, Coroutine, Any
 from collections import Counter
+from itertools import combinations
+
+
 from rich.traceback import install

 from src.llm_models.utils_model import LLMRequest
@@ -25,6 +25,15 @@ from src.chat.utils.chat_message_builder import (
    get_raw_msg_by_timestamp_with_chat,
 )  # 导入 build_readable_messages
 from src.chat.utils.utils import translate_timestamp_to_human_readable
+# 添加cosine_similarity函数
+def cosine_similarity(v1, v2):
+    """计算余弦相似度"""
+    dot_product = np.dot(v1, v2)
+    norm1 = np.linalg.norm(v1)
+    norm2 = np.linalg.norm(v2)
+    if norm1 == 0 or norm2 == 0:
+        return 0
+    return dot_product / (norm1 * norm2)


 install(extra_lines=3)
@@ -44,19 +53,18 @@ def calculate_information_content(text):
    return entropy


-def cosine_similarity(v1, v2):  # sourcery skip: assign-if-exp, reintroduce-else
-    """计算余弦相似度"""
-    dot_product = np.dot(v1, v2)
-    norm1 = np.linalg.norm(v1)
-    norm2 = np.linalg.norm(v2)
-    if norm1 == 0 or norm2 == 0:
-        return 0
-    return dot_product / (norm1 * norm2)
+


 logger = get_logger("memory")


+
+
+
+
+
+
 class MemoryGraph:
    def __init__(self):
        self.G = nx.Graph()  # 使用 networkx 的图结构
@@ -83,26 +91,46 @@ class MemoryGraph:
                last_modified=current_time,
            )  # 添加最后修改时间

-    def add_dot(self, concept, memory):
+    async def add_dot(self, concept, memory, hippocampus_instance=None):
        current_time = datetime.datetime.now().timestamp()

        if concept in self.G:
            if "memory_items" in self.G.nodes[concept]:
-                if not isinstance(self.G.nodes[concept]["memory_items"], list):
-                    self.G.nodes[concept]["memory_items"] = [self.G.nodes[concept]["memory_items"]]
-                self.G.nodes[concept]["memory_items"].append(memory)
+                # 获取现有的记忆项（已经是str格式）
+                existing_memory = self.G.nodes[concept]["memory_items"]
+                
+                # 如果现有记忆不为空，则使用LLM整合新旧记忆
+                if existing_memory and hippocampus_instance and hippocampus_instance.model_small:
+                    try:
+                        integrated_memory = await self._integrate_memories_with_llm(
+                            existing_memory, str(memory), hippocampus_instance.model_small
+                        )
+                        self.G.nodes[concept]["memory_items"] = integrated_memory
+                        # 整合成功，增加权重
+                        current_weight = self.G.nodes[concept].get("weight", 0.0)
+                        self.G.nodes[concept]["weight"] = current_weight + 1.0
+                        logger.debug(f"节点 {concept} 记忆整合成功，权重增加到 {current_weight + 1.0}")
+                    except Exception as e:
+                        logger.error(f"LLM整合记忆失败: {e}")
+                        # 降级到简单连接
+                        new_memory_str = f"{existing_memory} | {memory}"
+                        self.G.nodes[concept]["memory_items"] = new_memory_str
+                else:
+                    new_memory_str = str(memory)
+                    self.G.nodes[concept]["memory_items"] = new_memory_str
            else:
-                self.G.nodes[concept]["memory_items"] = [memory]
+                self.G.nodes[concept]["memory_items"] = str(memory)
                # 如果节点存在但没有memory_items,说明是第一次添加memory,设置created_time
                if "created_time" not in self.G.nodes[concept]:
                    self.G.nodes[concept]["created_time"] = current_time
            # 更新最后修改时间
            self.G.nodes[concept]["last_modified"] = current_time
        else:
-            # 如果是新节点,创建新的记忆列表
+            # 如果是新节点,创建新的记忆字符串
            self.G.add_node(
                concept,
-                memory_items=[memory],
+                memory_items=str(memory),
+                weight=1.0,  # 新节点初始权重为1.0
                created_time=current_time,  # 添加创建时间
                last_modified=current_time,
            )  # 添加最后修改时间
@@ -127,9 +155,8 @@ class MemoryGraph:
            concept, data = node_data
            if "memory_items" in data:
                memory_items = data["memory_items"]
-                if isinstance(memory_items, list):
-                    first_layer_items.extend(memory_items)
-                else:
+                # 直接使用完整的记忆内容
+                if memory_items:
                    first_layer_items.append(memory_items)

        # 只在depth=2时获取第二层记忆
@@ -140,12 +167,57 @@ class MemoryGraph:
                    concept, data = node_data
                    if "memory_items" in data:
                        memory_items = data["memory_items"]
-                        if isinstance(memory_items, list):
-                            second_layer_items.extend(memory_items)
-                        else:
+                        # 直接使用完整的记忆内容
+                        if memory_items:
                            second_layer_items.append(memory_items)

        return first_layer_items, second_layer_items
+    
+    async def _integrate_memories_with_llm(self, existing_memory: str, new_memory: str, llm_model: LLMRequest) -> str:
+        """
+        使用LLM整合新旧记忆内容
+        
+        Args:
+            existing_memory: 现有的记忆内容（字符串格式，可能包含多条记忆）
+            new_memory: 新的记忆内容
+            llm_model: LLM模型实例
+            
+        Returns:
+            str: 整合后的记忆内容
+        """
+        try:
+            # 构建整合提示
+            integration_prompt = f"""你是一个记忆整合专家。请将以下的旧记忆和新记忆整合成一条更完整、更准确的记忆内容。
+
+旧记忆内容：
+{existing_memory}
+
+新记忆内容：
+{new_memory}
+
+整合要求：
+1. 保留重要信息，去除重复内容
+2. 如果新旧记忆有冲突，合理整合矛盾的地方
+3. 将相关信息合并，形成更完整的描述
+4. 保持语言简洁、准确
+5. 只返回整合后的记忆内容，不要添加任何解释
+
+整合后的记忆："""
+
+            # 调用LLM进行整合
+            content, (reasoning_content, model_name, tool_calls) = await llm_model.generate_response_async(integration_prompt)
+            
+            if content and content.strip():
+                integrated_content = content.strip()
+                logger.debug(f"LLM记忆整合成功，模型: {model_name}")
+                return integrated_content
+            else:
+                logger.warning("LLM返回的整合结果为空，使用默认连接方式")
+                return f"{existing_memory} | {new_memory}"
+                
+        except Exception as e:
+            logger.error(f"LLM记忆整合过程中出错: {e}")
+            return f"{existing_memory} | {new_memory}"

    @property
    def dots(self):
@@ -164,26 +236,19 @@ class MemoryGraph:
        if "memory_items" in node_data:
            memory_items = node_data["memory_items"]

-            # 确保memory_items是列表
-            if not isinstance(memory_items, list):
-                memory_items = [memory_items] if memory_items else []
-
-            # 如果有记忆项可以删除
+            # 既然每个节点现在是一个完整的记忆内容，直接删除整个节点
            if memory_items:
-                # 随机选择一个记忆项删除
-                removed_item = random.choice(memory_items)
-                memory_items.remove(removed_item)
-
-                # 更新节点的记忆项
-                if memory_items:
-                    self.G.nodes[topic]["memory_items"] = memory_items
-                else:
-                    # 如果没有记忆项了，删除整个节点
-                    self.G.remove_node(topic)
-
-                return removed_item
-
-        return None
+                # 删除整个节点
+                self.G.remove_node(topic)
+                return f"删除了节点 {topic} 的完整记忆: {memory_items[:50]}..." if len(memory_items) > 50 else f"删除了节点 {topic} 的完整记忆: {memory_items}"
+            else:
+                # 如果没有记忆项，删除该节点
+                self.G.remove_node(topic)
+                return None
+        else:
+            # 如果没有memory_items字段，删除该节点
+            self.G.remove_node(topic)
+            return None


 # 海马体
@@ -205,15 +270,46 @@ class Hippocampus:
    def get_all_node_names(self) -> list:
        """获取记忆图中所有节点的名字列表"""
        return list(self.memory_graph.G.nodes())
+    
+    def calculate_weighted_activation(self, current_activation: float, edge_strength: int, target_node: str) -> float:
+        """
+        计算考虑节点权重的激活值
+        
+        Args:
+            current_activation: 当前激活值
+            edge_strength: 边的强度
+            target_node: 目标节点名称
+            
+        Returns:
+            float: 计算后的激活值
+        """
+        # 基础激活值计算
+        base_activation = current_activation - (1 / edge_strength)
+        
+        if base_activation <= 0:
+            return 0.0
+            
+        # 获取目标节点的权重
+        if target_node in self.memory_graph.G:
+            node_data = self.memory_graph.G.nodes[target_node]
+            node_weight = node_data.get("weight", 1.0)
+            
+            # 权重加成：每次整合增加10%激活值，最大加成200%
+            weight_multiplier = 1.0 + min((node_weight - 1.0) * 0.1, 2.0)
+            
+            return base_activation * weight_multiplier
+        else:
+            return base_activation

    @staticmethod
    def calculate_node_hash(concept, memory_items) -> int:
        """计算节点的特征值"""
-        if not isinstance(memory_items, list):
-            memory_items = [memory_items] if memory_items else []
+        # memory_items已经是str格式，直接按分隔符分割
+        if memory_items:
+            unique_items = {item.strip() for item in memory_items.split(" | ") if item.strip()}
+        else:
+            unique_items = set()

-        # 使用集合来去重，避免排序
-        unique_items = {str(item) for item in memory_items}
        # 使用frozenset来保证顺序一致性
        content = f"{concept}:{frozenset(unique_items)}"
        return hash(content)
@@ -234,7 +330,7 @@ class Hippocampus:
            topic_num_str = topic_num

        prompt = (
-            f"这是一段文字：\n{text}\n\n请你从这段话中总结出最多{topic_num_str}个关键的概念，可以是名词，动词，或者特定人物，帮我列出来，"
+            f"这是一段文字：\n{text}\n\n请你从这段话中总结出最多{topic_num_str}个关键的概念，必须是某种概念，比如人，事，物，概念，事件，地点 等等，帮我列出来，"
            f"将主题用逗号隔开，并加上<>,例如<主题1>,<主题2>......尽可能精简。只需要列举最多{topic_num}个话题就好，不要有序号，不要告诉我其他内容。"
            f"如果确定找不出主题或者没有明显主题，返回<none>。"
        )
@@ -245,8 +341,8 @@ class Hippocampus:
        # sourcery skip: inline-immediately-returned-variable
        # 不再需要 time_info 参数
        prompt = (
-            f'这是一段文字：\n{text}\n\n我想让你基于这段文字来概括"{topic}"这个概念，帮我总结成一句自然的话，'
-            f"要求包含对这个概念的定义，内容，知识，但是这些信息必须来自这段文字，不能添加信息。\n，请包含时间和人物。只输出这句话就好"
+            f'这是一段文字：\n{text}\n\n我想让你基于这段文字来概括"{topic}"这个概念，帮我总结成几句自然的话，'
+            f"要求包含对这个概念的定义，内容，知识，时间和人物，这些信息必须来自这段文字，不能添加信息。\n只输出几句自然的话就好"
        )
        return prompt

@@ -271,9 +367,9 @@ class Hippocampus:
            max_depth (int, optional): 记忆检索深度，默认为2。1表示只获取直接相关的记忆，2表示获取间接相关的记忆。

        Returns:
-            list: 记忆列表，每个元素是一个元组 (topic, memory_items, similarity)
+            list: 记忆列表，每个元素是一个元组 (topic, memory_content, similarity)
                - topic: str, 记忆主题
-                - memory_items: list, 该主题下的记忆项列表
+                - memory_content: str, 该主题下的完整记忆内容
                - similarity: float, 与关键词的相似度
        """
        if not keyword:
@@ -297,11 +393,10 @@ class Hippocampus:
            # 如果相似度超过阈值，获取该节点的记忆
            if similarity >= 0.3:  # 可以调整这个阈值
                node_data = self.memory_graph.G.nodes[node]
-                memory_items = node_data.get("memory_items", [])
-                if not isinstance(memory_items, list):
-                    memory_items = [memory_items] if memory_items else []
-
-                memories.append((node, memory_items, similarity))
+                memory_items = node_data.get("memory_items", "")
+                # 直接使用完整的记忆内容
+                if memory_items:
+                    memories.append((node, memory_items, similarity))

        # 按相似度降序排序
        memories.sort(key=lambda x: x[2], reverse=True)
@@ -378,10 +473,9 @@ class Hippocampus:
                如果为False，使用LLM提取关键词，速度较慢但更准确。

        Returns:
-            list: 记忆列表，每个元素是一个元组 (topic, memory_items, similarity)
+            list: 记忆列表，每个元素是一个元组 (topic, memory_content)
                - topic: str, 记忆主题
-                - memory_items: list, 该主题下的记忆项列表
-                - similarity: float, 与文本的相似度
+                - memory_content: str, 该主题下的完整记忆内容
        """
        keywords = await self.get_keywords_from_text(text)

@@ -478,31 +572,22 @@ class Hippocampus:
        for node, activation in remember_map.items():
            logger.debug(f"处理节点 '{node}' (激活值: {activation:.2f}):")
            node_data = self.memory_graph.G.nodes[node]
-            memory_items = node_data.get("memory_items", [])
-            if not isinstance(memory_items, list):
-                memory_items = [memory_items] if memory_items else []
-
+            memory_items = node_data.get("memory_items", "")
+            # 直接使用完整的记忆内容
            if memory_items:
-                logger.debug(f"节点包含 {len(memory_items)} 条记忆")
-                # 计算每条记忆与输入文本的相似度
-                memory_similarities = []
-                for memory in memory_items:
-                    # 计算与输入文本的相似度
-                    memory_words = set(jieba.cut(memory))
-                    text_words = set(jieba.cut(text))
-                    all_words = memory_words | text_words
+                logger.debug("节点包含完整记忆")
+                # 计算记忆与输入文本的相似度
+                memory_words = set(jieba.cut(memory_items))
+                text_words = set(jieba.cut(text))
+                all_words = memory_words | text_words
+                if all_words:
+                    # 计算相似度（虽然这里没有使用，但保持逻辑一致性）
                    v1 = [1 if word in memory_words else 0 for word in all_words]
                    v2 = [1 if word in text_words else 0 for word in all_words]
-                    similarity = cosine_similarity(v1, v2)
-                    memory_similarities.append((memory, similarity))
-
-                # 按相似度排序
-                memory_similarities.sort(key=lambda x: x[1], reverse=True)
-                # 获取最匹配的记忆
-                top_memories = memory_similarities[:max_memory_length]
-
-                # 添加到结果中
-                all_memories.extend((node, [memory], similarity) for memory, similarity in top_memories)
+                    _ = cosine_similarity(v1, v2)  # 计算但不使用，用_表示
+                    
+                    # 添加完整记忆到结果中
+                    all_memories.append((node, memory_items, activation))
            else:
                logger.info("节点没有记忆")

@@ -511,7 +596,8 @@ class Hippocampus:
        seen_memories = set()
        unique_memories = []
        for topic, memory_items, activation_value in all_memories:
-            memory = memory_items[0]  # 因为每个topic只有一条记忆
+            # memory_items现在是完整的字符串格式
+            memory = memory_items if memory_items else ""
            if memory not in seen_memories:
                seen_memories.add(memory)
                unique_memories.append((topic, memory_items, activation_value))
@@ -522,7 +608,8 @@ class Hippocampus:
        # 转换为(关键词, 记忆)格式
        result = []
        for topic, memory_items, _ in unique_memories:
-            memory = memory_items[0]  # 因为每个topic只有一条记忆
+            # memory_items现在是完整的字符串格式
+            memory = memory_items if memory_items else ""
            result.append((topic, memory))
            logger.debug(f"选中记忆: {memory} (来自节点: {topic})")

@@ -544,10 +631,9 @@ class Hippocampus:
            max_depth (int, optional): 记忆检索深度。默认为3。值越大，检索范围越广，可以获取更多间接相关的记忆，但速度会变慢。

        Returns:
-            list: 记忆列表，每个元素是一个元组 (topic, memory_items, similarity)
+            list: 记忆列表，每个元素是一个元组 (topic, memory_content)
                - topic: str, 记忆主题
-                - memory_items: list, 该主题下的记忆项列表
-                - similarity: float, 与文本的相似度
+                - memory_content: str, 该主题下的完整记忆内容
        """
        if not keywords:
            return []
@@ -642,31 +728,22 @@ class Hippocampus:
        for node, activation in remember_map.items():
            logger.debug(f"处理节点 '{node}' (激活值: {activation:.2f}):")
            node_data = self.memory_graph.G.nodes[node]
-            memory_items = node_data.get("memory_items", [])
-            if not isinstance(memory_items, list):
-                memory_items = [memory_items] if memory_items else []
-
+            memory_items = node_data.get("memory_items", "")
+            # 直接使用完整的记忆内容
            if memory_items:
-                logger.debug(f"节点包含 {len(memory_items)} 条记忆")
-                # 计算每条记忆与输入文本的相似度
-                memory_similarities = []
-                for memory in memory_items:
-                    # 计算与输入文本的相似度
-                    memory_words = set(jieba.cut(memory))
-                    text_words = set(keywords)
-                    all_words = memory_words | text_words
+                logger.debug("节点包含完整记忆")
+                # 计算记忆与关键词的相似度
+                memory_words = set(jieba.cut(memory_items))
+                text_words = set(keywords)
+                all_words = memory_words | text_words
+                if all_words:
+                    # 计算相似度（虽然这里没有使用，但保持逻辑一致性）
                    v1 = [1 if word in memory_words else 0 for word in all_words]
                    v2 = [1 if word in text_words else 0 for word in all_words]
-                    similarity = cosine_similarity(v1, v2)
-                    memory_similarities.append((memory, similarity))
-
-                # 按相似度排序
-                memory_similarities.sort(key=lambda x: x[1], reverse=True)
-                # 获取最匹配的记忆
-                top_memories = memory_similarities[:max_memory_length]
-
-                # 添加到结果中
-                all_memories.extend((node, [memory], similarity) for memory, similarity in top_memories)
+                    _ = cosine_similarity(v1, v2)  # 计算但不使用，用_表示
+                    
+                    # 添加完整记忆到结果中
+                    all_memories.append((node, memory_items, activation))
            else:
                logger.info("节点没有记忆")

@@ -675,7 +752,8 @@ class Hippocampus:
        seen_memories = set()
        unique_memories = []
        for topic, memory_items, activation_value in all_memories:
-            memory = memory_items[0]  # 因为每个topic只有一条记忆
+            # memory_items现在是完整的字符串格式
+            memory = memory_items if memory_items else ""
            if memory not in seen_memories:
                seen_memories.add(memory)
                unique_memories.append((topic, memory_items, activation_value))
@@ -686,7 +764,8 @@ class Hippocampus:
        # 转换为(关键词, 记忆)格式
        result = []
        for topic, memory_items, _ in unique_memories:
-            memory = memory_items[0]  # 因为每个topic只有一条记忆
+            # memory_items现在是完整的字符串格式
+            memory = memory_items if memory_items else ""
            result.append((topic, memory))
            logger.debug(f"选中记忆: {memory} (来自节点: {topic})")

@@ -894,11 +973,10 @@ class EntorhinalCortex:
                self.memory_graph.G.remove_node(concept)
                continue

-            memory_items = data.get("memory_items", [])
-            if not isinstance(memory_items, list):
-                memory_items = [memory_items] if memory_items else []
-
-            if not memory_items:
+            memory_items = data.get("memory_items", "")
+            
+            # 直接检查字符串是否为空，不需要分割成列表
+            if not memory_items or memory_items.strip() == "":
                self.memory_graph.G.remove_node(concept)
                continue

@@ -907,21 +985,19 @@ class EntorhinalCortex:
            created_time = data.get("created_time", current_time)
            last_modified = data.get("last_modified", current_time)

-            # 将memory_items转换为JSON字符串
-            try:
-                memory_items = [str(item) for item in memory_items]
-                memory_items_json = json.dumps(memory_items, ensure_ascii=False)
-                if not memory_items_json:
-                    continue
-            except Exception:
-                self.memory_graph.G.remove_node(concept)
+            # memory_items直接作为字符串存储，不需要JSON序列化
+            if not memory_items:
                continue

+            # 获取权重属性
+            weight = data.get("weight", 1.0)
+
            if concept not in db_nodes:
                nodes_to_create.append(
                    {
                        "concept": concept,
-                        "memory_items": memory_items_json,
+                        "memory_items": memory_items,
+                        "weight": weight,
                        "hash": memory_hash,
                        "created_time": created_time,
                        "last_modified": last_modified,
@@ -933,7 +1009,8 @@ class EntorhinalCortex:
                    nodes_to_update.append(
                        {
                            "concept": concept,
-                            "memory_items": memory_items_json,
+                            "memory_items": memory_items,
+                            "weight": weight,
                            "hash": memory_hash,
                            "last_modified": last_modified,
                        }
@@ -1031,8 +1108,8 @@ class EntorhinalCortex:
                GraphEdges.delete().where((GraphEdges.source == source) & (GraphEdges.target == target)).execute()

        end_time = time.time()
-        logger.info(f"[同步] 总耗时: {end_time - start_time:.2f}秒")
-        logger.info(f"[同步] 同步了 {len(memory_nodes)} 个节点和 {len(memory_edges)} 条边")
+        logger.info(f"[数据库] 同步完成，总耗时: {end_time - start_time:.2f}秒")
+        logger.info(f"[数据库] 同步了 {len(nodes_to_create) + len(nodes_to_update)} 个节点和 {len(edges_to_create) + len(edges_to_update)} 条边")

    async def resync_memory_to_db(self):
        """清空数据库并重新同步所有记忆数据"""
@@ -1054,27 +1131,43 @@ class EntorhinalCortex:
        # 批量准备节点数据
        nodes_data = []
        for concept, data in memory_nodes:
-            memory_items = data.get("memory_items", [])
-            if not isinstance(memory_items, list):
-                memory_items = [memory_items] if memory_items else []
-
-            try:
-                memory_items = [str(item) for item in memory_items]
-                if memory_items_json := json.dumps(memory_items, ensure_ascii=False):
-                    nodes_data.append(
-                        {
-                            "concept": concept,
-                            "memory_items": memory_items_json,
-                            "hash": self.hippocampus.calculate_node_hash(concept, memory_items),
-                            "created_time": data.get("created_time", current_time),
-                            "last_modified": data.get("last_modified", current_time),
-                        }
-                    )
-
-            except Exception as e:
-                logger.error(f"准备节点 {concept} 数据时发生错误: {e}")
+            memory_items = data.get("memory_items", "")
+            
+            # 直接检查字符串是否为空，不需要分割成列表
+            if not memory_items or memory_items.strip() == "":
+                self.memory_graph.G.remove_node(concept)
                continue

+            # 计算内存中节点的特征值
+            memory_hash = self.hippocampus.calculate_node_hash(concept, memory_items)
+            created_time = data.get("created_time", current_time)
+            last_modified = data.get("last_modified", current_time)
+
+            # memory_items直接作为字符串存储，不需要JSON序列化
+            if not memory_items:
+                continue
+
+            # 获取权重属性
+            weight = data.get("weight", 1.0)
+
+            nodes_data.append(
+                {
+                    "concept": concept,
+                    "memory_items": memory_items,
+                    "weight": weight,
+                    "hash": memory_hash,
+                    "created_time": created_time,
+                    "last_modified": last_modified,
+                }
+            )
+
+        # 批量插入节点
+        if nodes_data:
+            batch_size = 100
+            for i in range(0, len(nodes_data), batch_size):
+                batch = nodes_data[i : i + batch_size]
+                GraphNodes.insert_many(batch).execute()
+
        # 批量准备边数据
        edges_data = []
        for source, target, data in memory_edges:
@@ -1093,27 +1186,12 @@ class EntorhinalCortex:
                logger.error(f"准备边 {source}-{target} 数据时发生错误: {e}")
                continue

-        # 使用事务批量写入节点
-        node_start = time.time()
-        if nodes_data:
-            batch_size = 500  # 增加批量大小
-            with GraphNodes._meta.database.atomic():  # type: ignore
-                for i in range(0, len(nodes_data), batch_size):
-                    batch = nodes_data[i : i + batch_size]
-                    GraphNodes.insert_many(batch).execute()
-        node_end = time.time()
-        logger.info(f"[数据库] 写入 {len(nodes_data)} 个节点耗时: {node_end - node_start:.2f}秒")
-
-        # 使用事务批量写入边
-        edge_start = time.time()
+        # 批量插入边
        if edges_data:
-            batch_size = 500  # 增加批量大小
-            with GraphEdges._meta.database.atomic():  # type: ignore
-                for i in range(0, len(edges_data), batch_size):
-                    batch = edges_data[i : i + batch_size]
-                    GraphEdges.insert_many(batch).execute()
-        edge_end = time.time()
-        logger.info(f"[数据库] 写入 {len(edges_data)} 条边耗时: {edge_end - edge_start:.2f}秒")
+            batch_size = 100
+            for i in range(0, len(edges_data), batch_size):
+                batch = edges_data[i : i + batch_size]
+                GraphEdges.insert_many(batch).execute()

        end_time = time.time()
        logger.info(f"[数据库] 重新同步完成，总耗时: {end_time - start_time:.2f}秒")
@@ -1126,19 +1204,30 @@ class EntorhinalCortex:

        # 清空当前图
        self.memory_graph.G.clear()
+        
+        # 统计加载情况
+        total_nodes = 0
+        loaded_nodes = 0
+        skipped_nodes = 0

        # 从数据库加载所有节点
        nodes = list(GraphNodes.select())
+        total_nodes = len(nodes)
+        
        for node in nodes:
            concept = node.concept
            try:
-                memory_items = json.loads(node.memory_items)
-                if not isinstance(memory_items, list):
-                    memory_items = [memory_items] if memory_items else []
+                # 处理空字符串或None的情况
+                if not node.memory_items or node.memory_items.strip() == "":
+                    logger.warning(f"节点 {concept} 的memory_items为空，跳过")
+                    skipped_nodes += 1
+                    continue
+                
+                # 直接使用memory_items
+                memory_items = node.memory_items.strip()

                # 检查时间字段是否存在
                if not node.created_time or not node.last_modified:
-                    need_update = True
                    # 更新数据库中的节点
                    update_data = {}
                    if not node.created_time:
@@ -1146,18 +1235,24 @@ class EntorhinalCortex:
                    if not node.last_modified:
                        update_data["last_modified"] = current_time

-                    GraphNodes.update(**update_data).where(GraphNodes.concept == concept).execute()
+                    if update_data:
+                        GraphNodes.update(**update_data).where(GraphNodes.concept == concept).execute()

                # 获取时间信息(如果不存在则使用当前时间)
                created_time = node.created_time or current_time
                last_modified = node.last_modified or current_time

+                # 获取权重属性
+                weight = node.weight if hasattr(node, 'weight') and node.weight is not None else 1.0
+                
                # 添加节点到图中
                self.memory_graph.G.add_node(
-                    concept, memory_items=memory_items, created_time=created_time, last_modified=last_modified
+                    concept, memory_items=memory_items, weight=weight, created_time=created_time, last_modified=last_modified
                )
+                loaded_nodes += 1
            except Exception as e:
                logger.error(f"加载节点 {concept} 时发生错误: {e}")
+                skipped_nodes += 1
                continue

        # 从数据库加载所有边
@@ -1193,6 +1288,9 @@ class EntorhinalCortex:

        if need_update:
            logger.info("[数据库] 已为缺失的时间字段进行补充")
+            
+        # 输出加载统计信息
+        logger.info(f"[数据库] 记忆加载完成: 总计 {total_nodes} 个节点, 成功加载 {loaded_nodes} 个, 跳过 {skipped_nodes} 个")


 # 负责整合，遗忘，合并记忆
@@ -1338,7 +1436,7 @@ class ParahippocampalGyrus:
            all_added_nodes.extend(topic for topic, _ in compressed_memory)

            for topic, memory in compressed_memory:
-                self.memory_graph.add_dot(topic, memory)
+                await self.memory_graph.add_dot(topic, memory, self.hippocampus)
                all_topics.append(topic)

                if topic in similar_topics_dict:
@@ -1458,12 +1556,9 @@ class ParahippocampalGyrus:
            node_data = self.memory_graph.G.nodes[node]

            # 首先获取记忆项
-            memory_items = node_data.get("memory_items", [])
-            if not isinstance(memory_items, list):
-                memory_items = [memory_items] if memory_items else []
-
-            # 新增：检查节点是否为空
-            if not memory_items:
+            memory_items = node_data.get("memory_items", "")
+            # 直接检查记忆内容是否为空
+            if not memory_items or memory_items.strip() == "":
                try:
                    self.memory_graph.G.remove_node(node)
                    node_changes["removed"].append(f"{node}(空节点)")  # 标记为空节点移除
@@ -1474,31 +1569,24 @@ class ParahippocampalGyrus:

            # --- 如果节点不为空，则执行原来的不活跃检查和随机移除逻辑 ---
            last_modified = node_data.get("last_modified", current_time)
-            # 条件1：检查是否长时间未修改 (超过24小时)
-            if current_time - last_modified > 3600 * 24 and memory_items:
-                current_count = len(memory_items)
-                # 如果列表非空，才进行随机选择
-                if current_count > 0:
-                    removed_item = random.choice(memory_items)
-                    try:
-                        memory_items.remove(removed_item)
-
-                        # 条件3：检查移除后 memory_items 是否变空
-                        if memory_items:  # 如果移除后列表不为空
-                            # self.memory_graph.G.nodes[node]["memory_items"] = memory_items # 直接修改列表即可
-                            self.memory_graph.G.nodes[node]["last_modified"] = current_time  # 更新修改时间
-                            node_changes["reduced"].append(f"{node} (数量: {current_count} -> {len(memory_items)})")
-                        else:  # 如果移除后列表为空
-                            # 尝试移除节点，处理可能的错误
-                            try:
-                                self.memory_graph.G.remove_node(node)
-                                node_changes["removed"].append(f"{node}(遗忘清空)")  # 标记为遗忘清空
-                                logger.debug(f"[遗忘] 节点 {node} 因移除最后一项而被清空。")
-                            except nx.NetworkXError as e:
-                                logger.warning(f"[遗忘] 尝试移除节点 {node} 时发生错误（可能已被移除）：{e}")
-                    except ValueError:
-                        # 这个错误理论上不应发生，因为 removed_item 来自 memory_items
-                        logger.warning(f"[遗忘] 尝试从节点 '{node}' 移除不存在的项目 '{removed_item[:30]}...'")
+            node_weight = node_data.get("weight", 1.0)
+            
+            # 条件1：检查是否长时间未修改 (使用配置的遗忘时间)
+            time_threshold = 3600 * global_config.memory.memory_forget_time
+            
+            # 基于权重调整遗忘阈值：权重越高，需要更长时间才能被遗忘
+            # 权重为1时使用默认阈值，权重越高阈值越大（越难遗忘）
+            adjusted_threshold = time_threshold * node_weight
+            
+            if current_time - last_modified > adjusted_threshold and memory_items:
+                # 既然每个节点现在是完整记忆，直接删除整个节点
+                try:
+                    self.memory_graph.G.remove_node(node)
+                    node_changes["removed"].append(f"{node}(长时间未修改,权重{node_weight:.1f})")
+                    logger.debug(f"[遗忘] 移除了长时间未修改的节点: {node} (权重: {node_weight:.1f})")
+                except nx.NetworkXError as e:
+                    logger.warning(f"[遗忘] 移除节点 {node} 时发生错误（可能已被移除）: {e}")
+                    continue
        node_check_end = time.time()
        logger.info(f"[遗忘] 节点检查耗时: {node_check_end - node_check_start:.2f}秒")

@@ -1537,118 +1625,7 @@ class ParahippocampalGyrus:
        end_time = time.time()
        logger.info(f"[遗忘] 总耗时: {end_time - start_time:.2f}秒")

-    async def operation_consolidate_memory(self):
-        """整合记忆：合并节点内相似的记忆项"""
-        start_time = time.time()
-        percentage = global_config.memory.consolidate_memory_percentage
-        similarity_threshold = global_config.memory.consolidation_similarity_threshold
-        logger.info(f"[整合] 开始检查记忆节点... 检查比例: {percentage:.2%}, 合并阈值: {similarity_threshold}")

-        # 获取所有至少有2条记忆项的节点
-        eligible_nodes = []
-        for node, data in self.memory_graph.G.nodes(data=True):
-            memory_items = data.get("memory_items", [])
-            if isinstance(memory_items, list) and len(memory_items) >= 2:
-                eligible_nodes.append(node)
-
-        if not eligible_nodes:
-            logger.info("[整合] 没有找到包含多个记忆项的节点，无需整合。")
-            return
-
-        # 计算需要检查的节点数量
-        check_nodes_count = max(1, min(len(eligible_nodes), int(len(eligible_nodes) * percentage)))
-
-        # 随机抽取节点进行检查
-        try:
-            nodes_to_check = random.sample(eligible_nodes, check_nodes_count)
-        except ValueError as e:
-            logger.error(f"[整合] 抽样节点时出错: {e}")
-            return
-
-        logger.info(f"[整合] 将检查 {len(nodes_to_check)} / {len(eligible_nodes)} 个符合条件的节点。")
-
-        merged_count = 0
-        nodes_modified = set()
-        current_timestamp = datetime.datetime.now().timestamp()
-
-        for node in nodes_to_check:
-            node_data = self.memory_graph.G.nodes[node]
-            memory_items = node_data.get("memory_items", [])
-            if not isinstance(memory_items, list) or len(memory_items) < 2:
-                continue  # 双重检查，理论上不会进入
-
-            items_copy = list(memory_items)  # 创建副本以安全迭代和修改
-
-            # 遍历所有记忆项组合
-            for item1, item2 in combinations(items_copy, 2):
-                # 确保 item1 和 item2 仍然存在于原始列表中（可能已被之前的合并移除）
-                if item1 not in memory_items or item2 not in memory_items:
-                    continue
-
-                similarity = self._calculate_item_similarity(item1, item2)
-
-                if similarity >= similarity_threshold:
-                    logger.debug(f"[整合] 节点 '{node}' 中发现相似项 (相似度: {similarity:.2f}):")
-                    logger.debug(f"  - '{item1}'")
-                    logger.debug(f"  - '{item2}'")
-
-                    # 比较信息量
-                    info1 = calculate_information_content(item1)
-                    info2 = calculate_information_content(item2)
-
-                    if info1 >= info2:
-                        item_to_keep = item1
-                        item_to_remove = item2
-                    else:
-                        item_to_keep = item2
-                        item_to_remove = item1
-
-                    # 从原始列表中移除信息量较低的项
-                    try:
-                        memory_items.remove(item_to_remove)
-                        logger.info(
-                            f"[整合] 已合并节点 '{node}' 中的记忆，保留: '{item_to_keep[:60]}...', 移除: '{item_to_remove[:60]}...'"
-                        )
-                        merged_count += 1
-                        nodes_modified.add(node)
-                        node_data["last_modified"] = current_timestamp  # 更新修改时间
-                        _merged_in_this_node = True
-                        break  # 每个节点每次检查只合并一对
-                    except ValueError:
-                        # 如果项已经被移除（例如，在之前的迭代中作为 item_to_keep），则跳过
-                        logger.warning(
-                            f"[整合] 尝试移除节点 '{node}' 中不存在的项 '{item_to_remove[:30]}...'，可能已被合并。"
-                        )
-                        continue
-            # # 如果节点内发生了合并，更新节点数据 (这种方式不安全，会丢失其他属性)
-            # if merged_in_this_node:
-            #      self.memory_graph.G.nodes[node]["memory_items"] = memory_items
-
-        if merged_count > 0:
-            logger.info(f"[整合] 共合并了 {merged_count} 对相似记忆项，分布在 {len(nodes_modified)} 个节点中。")
-            sync_start = time.time()
-            logger.info("[整合] 开始将变更同步到数据库...")
-            # 使用 resync 更安全地处理删除和添加
-            await self.hippocampus.entorhinal_cortex.resync_memory_to_db()
-            sync_end = time.time()
-            logger.info(f"[整合] 数据库同步耗时: {sync_end - sync_start:.2f}秒")
-        else:
-            logger.info("[整合] 本次检查未发现需要合并的记忆项。")
-
-        end_time = time.time()
-        logger.info(f"[整合] 整合检查完成，总耗时: {end_time - start_time:.2f}秒")
-
-    @staticmethod
-    def _calculate_item_similarity(item1: str, item2: str) -> float:
-        """计算两条记忆项文本的余弦相似度"""
-        words1 = set(jieba.cut(item1))
-        words2 = set(jieba.cut(item2))
-        all_words = words1 | words2
-        if not all_words:
-            return 0.0
-        v1 = [1 if word in words1 else 0 for word in all_words]
-        v2 = [1 if word in words2 else 0 for word in all_words]
-        return cosine_similarity(v1, v2)


 class HippocampusManager:
@@ -1698,13 +1675,7 @@ class HippocampusManager:
            raise RuntimeError("HippocampusManager 尚未初始化，请先调用 initialize 方法")
        return await self._hippocampus.parahippocampal_gyrus.operation_forget_topic(percentage)

-    async def consolidate_memory(self):
-        """整合记忆的公共接口"""
-        if not self._initialized:
-            raise RuntimeError("HippocampusManager 尚未初始化，请先调用 initialize 方法")
-        # 注意：目前 operation_consolidate_memory 内部直接读取配置，percentage 参数暂时无效
-        # 如果需要外部控制比例，需要修改 operation_consolidate_memory
-        return await self._hippocampus.parahippocampal_gyrus.operation_consolidate_memory()
+

    async def get_memory_from_text(
        self,
@@ -1768,3 +1739,5 @@ class HippocampusManager:

 # 创建全局实例
 hippocampus_manager = HippocampusManager()
+
+