记忆系统接入关键词,重新启动自主发言功能
This commit is contained in:
SengokuCola
2025-03-06 14:27:22 +08:00
parent 8bca235ecf
commit eaa711ada7
10 changed files with 520 additions and 112 deletions

View File

@@ -11,7 +11,7 @@ from ..chat.config import global_config
from ...common.database import Database # 使用正确的导入语法
from ..models.utils_model import LLM_request
import math
from ..chat.utils import calculate_information_content, get_cloest_chat_from_db
from ..chat.utils import calculate_information_content, get_cloest_chat_from_db ,find_similar_topics,text_to_vector,cosine_similarity
@@ -135,6 +135,14 @@ class Hippocampus:
self.llm_model_get_topic = LLM_request(model = global_config.llm_normal_minor,temperature=0.5)
self.llm_model_summary = LLM_request(model = global_config.llm_normal,temperature=0.5)
def get_all_node_names(self) -> list:
"""获取记忆图中所有节点的名字列表
Returns:
list: 包含所有节点名字的列表
"""
return list(self.memory_graph.G.nodes())
def calculate_node_hash(self, concept, memory_items):
"""计算节点的特征值"""
if not isinstance(memory_items, list):
@@ -483,6 +491,193 @@ class Hippocampus:
prompt = f'这是一段文字:{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好'
return prompt
async def _identify_topics(self, text: str) -> list:
"""从文本中识别可能的主题
Args:
text: 输入文本
Returns:
list: 识别出的主题列表
"""
topics_response = await self.llm_model_get_topic.generate_response(self.find_topic_llm(text, 5))
print(f"话题: {topics_response[0]}")
topics = [topic.strip() for topic in topics_response[0].replace("", ",").replace("", ",").replace(" ", ",").split(",") if topic.strip()]
print(f"话题: {topics}")
return topics
def _find_similar_topics(self, topics: list, similarity_threshold: float = 0.4, debug_info: str = "") -> list:
"""查找与给定主题相似的记忆主题
Args:
topics: 主题列表
similarity_threshold: 相似度阈值
debug_info: 调试信息前缀
Returns:
list: (主题, 相似度) 元组列表
"""
all_memory_topics = self.get_all_node_names()
all_similar_topics = []
# 计算每个识别出的主题与记忆主题的相似度
for topic in topics:
if debug_info:
print(f"\033[1;32m[{debug_info}]\033[0m 正在思考有没有见过: {topic}")
topic_vector = text_to_vector(topic)
has_similar_topic = False
for memory_topic in all_memory_topics:
memory_vector = text_to_vector(memory_topic)
# 获取所有唯一词
all_words = set(topic_vector.keys()) | set(memory_vector.keys())
# 构建向量
v1 = [topic_vector.get(word, 0) for word in all_words]
v2 = [memory_vector.get(word, 0) for word in all_words]
# 计算相似度
similarity = cosine_similarity(v1, v2)
if similarity >= similarity_threshold:
has_similar_topic = True
if debug_info:
print(f"\033[1;32m[{debug_info}]\033[0m 找到相似主题: {topic} -> {memory_topic} (相似度: {similarity:.2f})")
all_similar_topics.append((memory_topic, similarity))
if not has_similar_topic and debug_info:
print(f"\033[1;31m[{debug_info}]\033[0m 没有见过: {topic} ,呃呃")
return all_similar_topics
def _get_top_topics(self, similar_topics: list, max_topics: int = 5) -> list:
"""获取相似度最高的主题
Args:
similar_topics: (主题, 相似度) 元组列表
max_topics: 最大主题数量
Returns:
list: (主题, 相似度) 元组列表
"""
seen_topics = set()
top_topics = []
for topic, score in sorted(similar_topics, key=lambda x: x[1], reverse=True):
if topic not in seen_topics and len(top_topics) < max_topics:
seen_topics.add(topic)
top_topics.append((topic, score))
return top_topics
async def memory_activate_value(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.3) -> int:
"""计算输入文本对记忆的激活程度"""
print(f"\033[1;32m[记忆激活]\033[0m 开始计算文本的记忆激活度: {text}")
# 识别主题
identified_topics = await self._identify_topics(text)
print(f"\033[1;32m[记忆激活]\033[0m 识别出的主题: {identified_topics}")
if not identified_topics:
print(f"\033[1;32m[记忆激活]\033[0m 未识别出主题,返回0")
return 0
# 查找相似主题
all_similar_topics = self._find_similar_topics(
identified_topics,
similarity_threshold=similarity_threshold,
debug_info="记忆激活"
)
if not all_similar_topics:
print(f"\033[1;32m[记忆激活]\033[0m 未找到相似主题,返回0")
return 0
# 获取最相关的主题
top_topics = self._get_top_topics(all_similar_topics, max_topics)
# 如果只找到一个主题,进行惩罚
if len(top_topics) == 1:
topic, score = top_topics[0]
activation = int(score * 50) # 单主题情况下,直接用相似度*50作为激活值
print(f"\033[1;32m[记忆激活]\033[0m 只找到一个主题,进行惩罚:")
print(f"\033[1;32m[记忆激活]\033[0m - 主题: {topic}")
print(f"\033[1;32m[记忆激活]\033[0m - 相似度: {score:.3f}")
print(f"\033[1;32m[记忆激活]\033[0m - 最终激活值: {activation}")
return activation
# 计算关键词匹配率
matched_topics = set()
topic_similarities = {}
print(f"\033[1;32m[记忆激活]\033[0m 计算关键词匹配情况:")
for memory_topic, similarity in top_topics:
# 对每个记忆主题,检查它与哪些输入主题相似
for input_topic in identified_topics:
topic_vector = text_to_vector(input_topic)
memory_vector = text_to_vector(memory_topic)
all_words = set(topic_vector.keys()) | set(memory_vector.keys())
v1 = [topic_vector.get(word, 0) for word in all_words]
v2 = [memory_vector.get(word, 0) for word in all_words]
sim = cosine_similarity(v1, v2)
if sim >= similarity_threshold:
matched_topics.add(input_topic)
topic_similarities[input_topic] = max(topic_similarities.get(input_topic, 0), sim)
print(f"\033[1;32m[记忆激活]\033[0m - 输入主题「{input_topic}」匹配到记忆「{memory_topic}」, 相似度: {sim:.3f}")
# 计算主题匹配率
topic_match = len(matched_topics) / len(identified_topics)
print(f"\033[1;32m[记忆激活]\033[0m 主题匹配率:")
print(f"\033[1;32m[记忆激活]\033[0m - 匹配主题数: {len(matched_topics)}")
print(f"\033[1;32m[记忆激活]\033[0m - 总主题数: {len(identified_topics)}")
print(f"\033[1;32m[记忆激活]\033[0m - 匹配率: {topic_match:.3f}")
# 计算匹配主题的平均相似度
average_similarities = sum(topic_similarities.values()) / len(topic_similarities) if topic_similarities else 0
print(f"\033[1;32m[记忆激活]\033[0m 平均相似度:")
print(f"\033[1;32m[记忆激活]\033[0m - 各主题相似度: {[f'{k}:{v:.3f}' for k,v in topic_similarities.items()]}")
print(f"\033[1;32m[记忆激活]\033[0m - 平均相似度: {average_similarities:.3f}")
# 计算最终激活值
activation = (topic_match + average_similarities) / 2 * 100
print(f"\033[1;32m[记忆激活]\033[0m 最终激活值: {int(activation)}")
return int(activation)
async def get_relevant_memories(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.4) -> list:
"""根据输入文本获取相关的记忆内容"""
# 识别主题
identified_topics = await self._identify_topics(text)
# 查找相似主题
all_similar_topics = self._find_similar_topics(
identified_topics,
similarity_threshold=similarity_threshold,
debug_info="记忆检索"
)
# 获取最相关的主题
relevant_topics = self._get_top_topics(all_similar_topics, max_topics)
# 获取相关记忆内容
relevant_memories = []
for topic, score in relevant_topics:
# 获取该主题的记忆内容
first_layer, _ = self.memory_graph.get_related_item(topic, depth=1)
if first_layer:
# 为每条记忆添加来源主题和相似度信息
for memory in first_layer:
relevant_memories.append({
'topic': topic,
'similarity': score,
'content': memory
})
# 按相似度排序
relevant_memories.sort(key=lambda x: x['similarity'], reverse=True)
return relevant_memories
def segment_text(text):
seg_text = list(jieba.cut(text))