This commit is contained in:
UnCLAS-Prommer
2025-07-25 17:41:38 +08:00
parent 7ebac142ea
commit 208e629faf
2 changed files with 17 additions and 17 deletions

View File

@@ -224,13 +224,14 @@ class Hippocampus:
return hash((source, target))
@staticmethod
def find_topic_llm(text:str, topic_num:int|list[int]):
def find_topic_llm(text: str, topic_num: int | list[int]):
# sourcery skip: inline-immediately-returned-variable
topic_num_str = ""
if isinstance(topic_num, list):
topic_num_str = f"{topic_num[0]}-{topic_num[1]}"
else:
topic_num_str = topic_num
prompt = (
f"这是一段文字:\n{text}\n\n请你从这段话中总结出最多{topic_num_str}个关键的概念,可以是名词,动词,或者特定人物,帮我列出来,"
f"将主题用逗号隔开,并加上<>,例如<主题1>,<主题2>......尽可能精简。只需要列举最多{topic_num}个话题就好,不要有序号,不要告诉我其他内容。"
@@ -304,10 +305,10 @@ class Hippocampus:
# 按相似度降序排序
memories.sort(key=lambda x: x[2], reverse=True)
return memories
async def get_keywords_from_text(self, text: str) -> list:
"""从文本中提取关键词。
Args:
text (str): 输入文本
fast_retrieval (bool, optional): 是否使用快速检索。默认为False。
@@ -319,7 +320,7 @@ class Hippocampus:
# 使用LLM提取关键词 - 根据详细文本长度分布优化topic_num计算
text_length = len(text)
topic_num:str|list[int] = None
topic_num: int | list[int] = 0
if text_length <= 5:
words = jieba.cut(text)
keywords = [word for word in words if len(word) > 1]
@@ -327,17 +328,16 @@ class Hippocampus:
logger.info(f"提取关键词: {keywords}")
return keywords
elif text_length <= 10:
topic_num = [1,3] # 6-10字符: 1个关键词 (27.18%的文本)
topic_num = [1, 3] # 6-10字符: 1个关键词 (27.18%的文本)
elif text_length <= 20:
topic_num = [2,4] # 11-20字符: 2个关键词 (22.76%的文本)
topic_num = [2, 4] # 11-20字符: 2个关键词 (22.76%的文本)
elif text_length <= 30:
topic_num = [3,5] # 21-30字符: 3个关键词 (10.33%的文本)
topic_num = [3, 5] # 21-30字符: 3个关键词 (10.33%的文本)
elif text_length <= 50:
topic_num = [4,5] # 31-50字符: 4个关键词 (9.79%的文本)
topic_num = [4, 5] # 31-50字符: 4个关键词 (9.79%的文本)
else:
topic_num = 5 # 51+字符: 5个关键词 (其余长文本)
topics_response, (reasoning_content, model_name) = await self.model_summary.generate_response_async(
self.find_topic_llm(text, topic_num)
)
@@ -352,11 +352,10 @@ class Hippocampus:
for keyword in ",".join(keywords).replace("", ",").replace("", ",").replace(" ", ",").split(",")
if keyword.strip()
]
logger.info(f"提取关键词: {keywords}")
return keywords
return keywords
async def get_memory_from_text(
self,
@@ -1310,6 +1309,7 @@ class ParahippocampalGyrus:
return compressed_memory, similar_topics_dict
async def operation_build_memory(self):
# sourcery skip: merge-list-appends-into-extend
logger.info("------------------------------------开始构建记忆--------------------------------------")
start_time = time.time()
memory_samples = self.hippocampus.entorhinal_cortex.get_memory_sample()

View File

@@ -36,7 +36,7 @@ def compare_dicts(new, old, path=None, new_comments=None, old_comments=None, log
continue
if key not in old:
comment = get_key_comment(new, key)
logs.append(f"新增: {'.'.join(path + [str(key)])} 注释: {comment if comment else ''}")
logs.append(f"新增: {'.'.join(path + [str(key)])} 注释: {comment or ''}")
elif isinstance(new[key], (dict, Table)) and isinstance(old.get(key), (dict, Table)):
compare_dicts(new[key], old[key], path + [str(key)], new_comments, old_comments, logs)
# 删减项
@@ -45,7 +45,7 @@ def compare_dicts(new, old, path=None, new_comments=None, old_comments=None, log
continue
if key not in new:
comment = get_key_comment(old, key)
logs.append(f"删减: {'.'.join(path + [str(key)])} 注释: {comment if comment else ''}")
logs.append(f"删减: {'.'.join(path + [str(key)])} 注释: {comment or ''}")
return logs