typing
This commit is contained in:
@@ -224,13 +224,14 @@ class Hippocampus:
|
|||||||
return hash((source, target))
|
return hash((source, target))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def find_topic_llm(text:str, topic_num:int|list[int]):
|
def find_topic_llm(text: str, topic_num: int | list[int]):
|
||||||
|
# sourcery skip: inline-immediately-returned-variable
|
||||||
topic_num_str = ""
|
topic_num_str = ""
|
||||||
if isinstance(topic_num, list):
|
if isinstance(topic_num, list):
|
||||||
topic_num_str = f"{topic_num[0]}-{topic_num[1]}"
|
topic_num_str = f"{topic_num[0]}-{topic_num[1]}"
|
||||||
else:
|
else:
|
||||||
topic_num_str = topic_num
|
topic_num_str = topic_num
|
||||||
|
|
||||||
prompt = (
|
prompt = (
|
||||||
f"这是一段文字:\n{text}\n\n请你从这段话中总结出最多{topic_num_str}个关键的概念,可以是名词,动词,或者特定人物,帮我列出来,"
|
f"这是一段文字:\n{text}\n\n请你从这段话中总结出最多{topic_num_str}个关键的概念,可以是名词,动词,或者特定人物,帮我列出来,"
|
||||||
f"将主题用逗号隔开,并加上<>,例如<主题1>,<主题2>......尽可能精简。只需要列举最多{topic_num}个话题就好,不要有序号,不要告诉我其他内容。"
|
f"将主题用逗号隔开,并加上<>,例如<主题1>,<主题2>......尽可能精简。只需要列举最多{topic_num}个话题就好,不要有序号,不要告诉我其他内容。"
|
||||||
@@ -304,10 +305,10 @@ class Hippocampus:
|
|||||||
# 按相似度降序排序
|
# 按相似度降序排序
|
||||||
memories.sort(key=lambda x: x[2], reverse=True)
|
memories.sort(key=lambda x: x[2], reverse=True)
|
||||||
return memories
|
return memories
|
||||||
|
|
||||||
async def get_keywords_from_text(self, text: str) -> list:
|
async def get_keywords_from_text(self, text: str) -> list:
|
||||||
"""从文本中提取关键词。
|
"""从文本中提取关键词。
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
text (str): 输入文本
|
text (str): 输入文本
|
||||||
fast_retrieval (bool, optional): 是否使用快速检索。默认为False。
|
fast_retrieval (bool, optional): 是否使用快速检索。默认为False。
|
||||||
@@ -319,7 +320,7 @@ class Hippocampus:
|
|||||||
|
|
||||||
# 使用LLM提取关键词 - 根据详细文本长度分布优化topic_num计算
|
# 使用LLM提取关键词 - 根据详细文本长度分布优化topic_num计算
|
||||||
text_length = len(text)
|
text_length = len(text)
|
||||||
topic_num:str|list[int] = None
|
topic_num: int | list[int] = 0
|
||||||
if text_length <= 5:
|
if text_length <= 5:
|
||||||
words = jieba.cut(text)
|
words = jieba.cut(text)
|
||||||
keywords = [word for word in words if len(word) > 1]
|
keywords = [word for word in words if len(word) > 1]
|
||||||
@@ -327,17 +328,16 @@ class Hippocampus:
|
|||||||
logger.info(f"提取关键词: {keywords}")
|
logger.info(f"提取关键词: {keywords}")
|
||||||
return keywords
|
return keywords
|
||||||
elif text_length <= 10:
|
elif text_length <= 10:
|
||||||
topic_num = [1,3] # 6-10字符: 1个关键词 (27.18%的文本)
|
topic_num = [1, 3] # 6-10字符: 1个关键词 (27.18%的文本)
|
||||||
elif text_length <= 20:
|
elif text_length <= 20:
|
||||||
topic_num = [2,4] # 11-20字符: 2个关键词 (22.76%的文本)
|
topic_num = [2, 4] # 11-20字符: 2个关键词 (22.76%的文本)
|
||||||
elif text_length <= 30:
|
elif text_length <= 30:
|
||||||
topic_num = [3,5] # 21-30字符: 3个关键词 (10.33%的文本)
|
topic_num = [3, 5] # 21-30字符: 3个关键词 (10.33%的文本)
|
||||||
elif text_length <= 50:
|
elif text_length <= 50:
|
||||||
topic_num = [4,5] # 31-50字符: 4个关键词 (9.79%的文本)
|
topic_num = [4, 5] # 31-50字符: 4个关键词 (9.79%的文本)
|
||||||
else:
|
else:
|
||||||
topic_num = 5 # 51+字符: 5个关键词 (其余长文本)
|
topic_num = 5 # 51+字符: 5个关键词 (其余长文本)
|
||||||
|
|
||||||
|
|
||||||
topics_response, (reasoning_content, model_name) = await self.model_summary.generate_response_async(
|
topics_response, (reasoning_content, model_name) = await self.model_summary.generate_response_async(
|
||||||
self.find_topic_llm(text, topic_num)
|
self.find_topic_llm(text, topic_num)
|
||||||
)
|
)
|
||||||
@@ -352,11 +352,10 @@ class Hippocampus:
|
|||||||
for keyword in ",".join(keywords).replace(",", ",").replace("、", ",").replace(" ", ",").split(",")
|
for keyword in ",".join(keywords).replace(",", ",").replace("、", ",").replace(" ", ",").split(",")
|
||||||
if keyword.strip()
|
if keyword.strip()
|
||||||
]
|
]
|
||||||
|
|
||||||
logger.info(f"提取关键词: {keywords}")
|
logger.info(f"提取关键词: {keywords}")
|
||||||
|
|
||||||
return keywords
|
return keywords
|
||||||
|
|
||||||
|
|
||||||
async def get_memory_from_text(
|
async def get_memory_from_text(
|
||||||
self,
|
self,
|
||||||
@@ -1310,6 +1309,7 @@ class ParahippocampalGyrus:
|
|||||||
return compressed_memory, similar_topics_dict
|
return compressed_memory, similar_topics_dict
|
||||||
|
|
||||||
async def operation_build_memory(self):
|
async def operation_build_memory(self):
|
||||||
|
# sourcery skip: merge-list-appends-into-extend
|
||||||
logger.info("------------------------------------开始构建记忆--------------------------------------")
|
logger.info("------------------------------------开始构建记忆--------------------------------------")
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
memory_samples = self.hippocampus.entorhinal_cortex.get_memory_sample()
|
memory_samples = self.hippocampus.entorhinal_cortex.get_memory_sample()
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ def compare_dicts(new, old, path=None, new_comments=None, old_comments=None, log
|
|||||||
continue
|
continue
|
||||||
if key not in old:
|
if key not in old:
|
||||||
comment = get_key_comment(new, key)
|
comment = get_key_comment(new, key)
|
||||||
logs.append(f"新增: {'.'.join(path + [str(key)])} 注释: {comment if comment else '无'}")
|
logs.append(f"新增: {'.'.join(path + [str(key)])} 注释: {comment or '无'}")
|
||||||
elif isinstance(new[key], (dict, Table)) and isinstance(old.get(key), (dict, Table)):
|
elif isinstance(new[key], (dict, Table)) and isinstance(old.get(key), (dict, Table)):
|
||||||
compare_dicts(new[key], old[key], path + [str(key)], new_comments, old_comments, logs)
|
compare_dicts(new[key], old[key], path + [str(key)], new_comments, old_comments, logs)
|
||||||
# 删减项
|
# 删减项
|
||||||
@@ -45,7 +45,7 @@ def compare_dicts(new, old, path=None, new_comments=None, old_comments=None, log
|
|||||||
continue
|
continue
|
||||||
if key not in new:
|
if key not in new:
|
||||||
comment = get_key_comment(old, key)
|
comment = get_key_comment(old, key)
|
||||||
logs.append(f"删减: {'.'.join(path + [str(key)])} 注释: {comment if comment else '无'}")
|
logs.append(f"删减: {'.'.join(path + [str(key)])} 注释: {comment or '无'}")
|
||||||
return logs
|
return logs
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user