feat:测试性的新辅助记忆系统
This commit is contained in:
@@ -535,7 +535,7 @@ class HeartFChatting:
|
||||
new_message_count = message_api.count_new_messages(
|
||||
chat_id=self.chat_stream.stream_id, start_time=thinking_start_time, end_time=current_time
|
||||
)
|
||||
platform = message_data.get("platform", "")
|
||||
platform = message_data.get("user_platform", "")
|
||||
user_id = message_data.get("user_id", "")
|
||||
reply_to_platform_id = f"{platform}:{user_id}"
|
||||
|
||||
|
||||
258
src/chat/memory_system/instant_memory.py
Normal file
258
src/chat/memory_system/instant_memory.py
Normal file
@@ -0,0 +1,258 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import time
|
||||
import re
|
||||
import json
|
||||
import ast
|
||||
from json_repair import repair_json
|
||||
from src.llm_models.utils_model import LLMRequest
|
||||
from src.common.logger import get_logger
|
||||
import traceback
|
||||
|
||||
from src.config.config import global_config
|
||||
from src.common.database.database_model import Memory # Peewee Models导入
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
class MemoryItem:
|
||||
def __init__(self,memory_id:str,chat_id:str,memory_text:str,keywords:list[str]):
|
||||
self.memory_id = memory_id
|
||||
self.chat_id = chat_id
|
||||
self.memory_text:str = memory_text
|
||||
self.keywords:list[str] = keywords
|
||||
self.create_time:float = time.time()
|
||||
self.last_view_time:float = time.time()
|
||||
|
||||
class MemoryManager:
|
||||
def __init__(self):
|
||||
# self.memory_items:list[MemoryItem] = []
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class InstantMemory:
|
||||
def __init__(self,chat_id):
|
||||
self.chat_id = chat_id
|
||||
self.last_view_time = time.time()
|
||||
self.summary_model = LLMRequest(
|
||||
model=global_config.model.memory,
|
||||
temperature=0.5,
|
||||
request_type="memory.summary",
|
||||
)
|
||||
|
||||
async def if_need_build(self,text):
|
||||
prompt = f"""
|
||||
请判断以下内容中是否有值得记忆的信息,如果有,请输出1,否则输出0
|
||||
{text}
|
||||
请只输出1或0就好
|
||||
"""
|
||||
|
||||
try:
|
||||
response,_ = await self.summary_model.generate_response_async(prompt)
|
||||
print(prompt)
|
||||
print(response)
|
||||
|
||||
|
||||
if "1" in response:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"判断是否需要记忆出现错误:{str(e)} {traceback.format_exc()}")
|
||||
return False
|
||||
|
||||
async def build_memory(self,text):
|
||||
prompt = f"""
|
||||
以下内容中存在值得记忆的信息,请你从中总结出一段值得记忆的信息,并输出
|
||||
{text}
|
||||
请以json格式输出一段概括的记忆内容和关键词
|
||||
{{
|
||||
"memory_text": "记忆内容",
|
||||
"keywords": "关键词,用/划分"
|
||||
}}
|
||||
"""
|
||||
try:
|
||||
response,_ = await self.summary_model.generate_response_async(prompt)
|
||||
print(prompt)
|
||||
print(response)
|
||||
if not response:
|
||||
return None
|
||||
try:
|
||||
repaired = repair_json(response)
|
||||
result = json.loads(repaired)
|
||||
memory_text = result.get('memory_text', '')
|
||||
keywords = result.get('keywords', '')
|
||||
if isinstance(keywords, str):
|
||||
keywords_list = [k.strip() for k in keywords.split('/') if k.strip()]
|
||||
elif isinstance(keywords, list):
|
||||
keywords_list = keywords
|
||||
else:
|
||||
keywords_list = []
|
||||
return {'memory_text': memory_text, 'keywords': keywords_list}
|
||||
except Exception as parse_e:
|
||||
logger.error(f"解析记忆json失败:{str(parse_e)} {traceback.format_exc()}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"构建记忆出现错误:{str(e)} {traceback.format_exc()}")
|
||||
return None
|
||||
|
||||
|
||||
async def create_and_store_memory(self,text):
|
||||
if_need = await self.if_need_build(text)
|
||||
if if_need:
|
||||
logger.info(f"需要记忆:{text}")
|
||||
memory = await self.build_memory(text)
|
||||
if memory and memory.get('memory_text'):
|
||||
memory_id = f"{self.chat_id}_{time.time()}"
|
||||
memory_item = MemoryItem(
|
||||
memory_id=memory_id,
|
||||
chat_id=self.chat_id,
|
||||
memory_text=memory['memory_text'],
|
||||
keywords=memory.get('keywords', [])
|
||||
)
|
||||
await self.store_memory(memory_item)
|
||||
else:
|
||||
logger.info(f"不需要记忆:{text}")
|
||||
|
||||
async def store_memory(self,memory_item:MemoryItem):
|
||||
memory = Memory(
|
||||
memory_id=memory_item.memory_id,
|
||||
chat_id=memory_item.chat_id,
|
||||
memory_text=memory_item.memory_text,
|
||||
keywords=memory_item.keywords,
|
||||
create_time=memory_item.create_time,
|
||||
last_view_time=memory_item.last_view_time
|
||||
)
|
||||
memory.save()
|
||||
|
||||
async def get_memory(self,target:str):
|
||||
from json_repair import repair_json
|
||||
prompt = f"""
|
||||
请根据以下发言内容,判断是否需要提取记忆
|
||||
{target}
|
||||
请用json格式输出,包含以下字段:
|
||||
其中,time的要求是:
|
||||
可以选择具体日期时间,格式为YYYY-MM-DD HH:MM:SS,或者大致时间,格式为YYYY-MM-DD
|
||||
可以选择相对时间,例如:今天,昨天,前天,5天前,1个月前
|
||||
可以选择留空进行模糊搜索
|
||||
{{
|
||||
"need_memory": 1,
|
||||
"keywords": "希望获取的记忆关键词,用/划分",
|
||||
"time": "希望获取的记忆大致时间"
|
||||
}}
|
||||
请只输出json格式,不要输出其他多余内容
|
||||
"""
|
||||
try:
|
||||
response,_ = await self.summary_model.generate_response_async(prompt)
|
||||
print(prompt)
|
||||
print(response)
|
||||
if not response:
|
||||
return None
|
||||
try:
|
||||
repaired = repair_json(response)
|
||||
result = json.loads(repaired)
|
||||
# 解析keywords
|
||||
keywords = result.get('keywords', '')
|
||||
if isinstance(keywords, str):
|
||||
keywords_list = [k.strip() for k in keywords.split('/') if k.strip()]
|
||||
elif isinstance(keywords, list):
|
||||
keywords_list = keywords
|
||||
else:
|
||||
keywords_list = []
|
||||
# 解析time为时间段
|
||||
time_str = result.get('time', '').strip()
|
||||
start_time, end_time = self._parse_time_range(time_str)
|
||||
logger.info(f"start_time: {start_time}, end_time: {end_time}")
|
||||
# 检索包含关键词的记忆
|
||||
memories_set = set()
|
||||
if start_time and end_time:
|
||||
start_ts = start_time.timestamp()
|
||||
end_ts = end_time.timestamp()
|
||||
query = Memory.select().where(
|
||||
(Memory.chat_id == self.chat_id) &
|
||||
(Memory.create_time >= start_ts) &
|
||||
(Memory.create_time < end_ts)
|
||||
)
|
||||
else:
|
||||
query = Memory.select().where(Memory.chat_id == self.chat_id)
|
||||
|
||||
|
||||
for mem in query:
|
||||
#对每条记忆
|
||||
mem_keywords = mem.keywords or []
|
||||
parsed = ast.literal_eval(mem_keywords)
|
||||
if isinstance(parsed, list):
|
||||
mem_keywords = [str(k).strip() for k in parsed if str(k).strip()]
|
||||
else:
|
||||
mem_keywords = []
|
||||
# logger.info(f"mem_keywords: {mem_keywords}")
|
||||
# logger.info(f"keywords_list: {keywords_list}")
|
||||
for kw in keywords_list:
|
||||
# logger.info(f"kw: {kw}")
|
||||
# logger.info(f"kw in mem_keywords: {kw in mem_keywords}")
|
||||
if kw in mem_keywords:
|
||||
# logger.info(f"mem.memory_text: {mem.memory_text}")
|
||||
memories_set.add(mem.memory_text)
|
||||
break
|
||||
return list(memories_set)
|
||||
except Exception as parse_e:
|
||||
logger.error(f"解析记忆json失败:{str(parse_e)} {traceback.format_exc()}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"获取记忆出现错误:{str(e)} {traceback.format_exc()}")
|
||||
return None
|
||||
|
||||
def _parse_time_range(self, time_str):
|
||||
"""
|
||||
支持解析如下格式:
|
||||
- 具体日期时间:YYYY-MM-DD HH:MM:SS
|
||||
- 具体日期:YYYY-MM-DD
|
||||
- 相对时间:今天,昨天,前天,N天前,N个月前
|
||||
- 空字符串:返回(None, None)
|
||||
"""
|
||||
from datetime import datetime, timedelta
|
||||
now = datetime.now()
|
||||
if not time_str:
|
||||
return 0, now
|
||||
time_str = time_str.strip()
|
||||
# 具体日期时间
|
||||
try:
|
||||
dt = datetime.strptime(time_str, "%Y-%m-%d %H:%M:%S")
|
||||
return dt, dt + timedelta(hours=1)
|
||||
except Exception:
|
||||
pass
|
||||
# 具体日期
|
||||
try:
|
||||
dt = datetime.strptime(time_str, "%Y-%m-%d")
|
||||
return dt, dt + timedelta(days=1)
|
||||
except Exception:
|
||||
pass
|
||||
# 相对时间
|
||||
if time_str == "今天":
|
||||
start = now.replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
end = start + timedelta(days=1)
|
||||
return start, end
|
||||
if time_str == "昨天":
|
||||
start = (now - timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
end = start + timedelta(days=1)
|
||||
return start, end
|
||||
if time_str == "前天":
|
||||
start = (now - timedelta(days=2)).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
end = start + timedelta(days=1)
|
||||
return start, end
|
||||
m = re.match(r"(\d+)天前", time_str)
|
||||
if m:
|
||||
days = int(m.group(1))
|
||||
start = (now - timedelta(days=days)).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
end = start + timedelta(days=1)
|
||||
return start, end
|
||||
m = re.match(r"(\d+)个月前", time_str)
|
||||
if m:
|
||||
months = int(m.group(1))
|
||||
# 近似每月30天
|
||||
start = (now - timedelta(days=months*30)).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
end = start + timedelta(days=1)
|
||||
return start, end
|
||||
# 其他无法解析
|
||||
return 0, now
|
||||
@@ -21,6 +21,7 @@ from src.chat.utils.chat_message_builder import build_readable_messages, get_raw
|
||||
from src.chat.express.expression_selector import expression_selector
|
||||
from src.chat.knowledge.knowledge_lib import qa_manager
|
||||
from src.chat.memory_system.memory_activator import MemoryActivator
|
||||
from src.chat.memory_system.instant_memory import InstantMemory
|
||||
from src.mood.mood_manager import mood_manager
|
||||
from src.person_info.relationship_fetcher import relationship_fetcher_manager
|
||||
from src.person_info.person_info import get_person_info_manager
|
||||
@@ -159,6 +160,7 @@ class DefaultReplyer:
|
||||
|
||||
self.heart_fc_sender = HeartFCSender()
|
||||
self.memory_activator = MemoryActivator()
|
||||
self.instant_memory = InstantMemory(chat_id=self.chat_stream.stream_id)
|
||||
self.tool_executor = ToolExecutor(chat_id=self.chat_stream.stream_id, enable_cache=True, cache_ttl=3)
|
||||
|
||||
def _select_weighted_model_config(self) -> Dict[str, Any]:
|
||||
@@ -369,12 +371,20 @@ class DefaultReplyer:
|
||||
target_message=target, chat_history_prompt=chat_history
|
||||
)
|
||||
|
||||
if global_config.memory.enable_instant_memory:
|
||||
asyncio.create_task(self.instant_memory.create_and_store_memory(chat_history))
|
||||
|
||||
instant_memory = await self.instant_memory.get_memory(target)
|
||||
logger.info(f"即时记忆:{instant_memory}")
|
||||
|
||||
if not running_memories:
|
||||
return ""
|
||||
|
||||
memory_str = "以下是当前在聊天中,你回忆起的记忆:\n"
|
||||
for running_memory in running_memories:
|
||||
memory_str += f"- {running_memory['content']}\n"
|
||||
|
||||
memory_str += f"- {instant_memory}\n"
|
||||
return memory_str
|
||||
|
||||
async def build_tool_info(self, chat_history, reply_data: Optional[Dict], enable_tool: bool = True):
|
||||
@@ -510,9 +520,8 @@ class DefaultReplyer:
|
||||
background_dialogue_prompt_str = build_readable_messages(
|
||||
latest_25_msgs,
|
||||
replace_bot_name=True,
|
||||
merge_messages=True,
|
||||
timestamp_mode="normal_no_YMD",
|
||||
show_pic=False,
|
||||
truncate=True,
|
||||
)
|
||||
background_dialogue_prompt = f"这是其他用户的发言:\n{background_dialogue_prompt_str}"
|
||||
|
||||
|
||||
@@ -204,7 +204,7 @@ class ImageManager:
|
||||
|
||||
# 调用AI获取描述
|
||||
image_format = Image.open(io.BytesIO(image_bytes)).format.lower() # type: ignore
|
||||
prompt = "请用中文描述这张图片的内容。如果有文字,请把文字都描述出来,请留意其主题,直观感受,输出为一段平文本,最多50字"
|
||||
prompt = global_config.custom_prompt.image_prompt
|
||||
description, _ = await self._llm.generate_response_for_image(prompt, image_base64, image_format)
|
||||
|
||||
if description is None:
|
||||
@@ -484,7 +484,7 @@ class ImageManager:
|
||||
image_format = Image.open(io.BytesIO(image_bytes)).format.lower() # type: ignore
|
||||
|
||||
# 构建prompt
|
||||
prompt = """请用中文描述这张图片的内容。如果有文字,请把文字描述概括出来,请留意其主题,直观感受,输出为一段平文本,最多30字,请注意不要分点,就输出一段文本"""
|
||||
prompt = global_config.custom_prompt.image_prompt
|
||||
|
||||
# 获取VLM描述
|
||||
description, _ = await self._llm.generate_response_for_image(prompt, image_base64, image_format)
|
||||
|
||||
@@ -267,6 +267,16 @@ class PersonInfo(BaseModel):
|
||||
# database = db # 继承自 BaseModel
|
||||
table_name = "person_info"
|
||||
|
||||
class Memory(BaseModel):
|
||||
memory_id = TextField(index=True)
|
||||
chat_id = TextField(null=True)
|
||||
memory_text = TextField(null=True)
|
||||
keywords = TextField(null=True)
|
||||
create_time = FloatField(null=True)
|
||||
last_view_time = FloatField(null=True)
|
||||
|
||||
class Meta:
|
||||
table_name = "memory"
|
||||
|
||||
class Knowledges(BaseModel):
|
||||
"""
|
||||
@@ -370,6 +380,7 @@ def create_tables():
|
||||
RecalledMessages, # 添加新模型
|
||||
GraphNodes, # 添加图节点表
|
||||
GraphEdges, # 添加图边表
|
||||
Memory,
|
||||
ActionRecords, # 添加 ActionRecords 到初始化列表
|
||||
]
|
||||
)
|
||||
@@ -391,6 +402,7 @@ def initialize_database():
|
||||
OnlineTime,
|
||||
PersonInfo,
|
||||
Knowledges,
|
||||
Memory,
|
||||
ThinkingLog,
|
||||
RecalledMessages,
|
||||
GraphNodes,
|
||||
|
||||
@@ -32,6 +32,7 @@ from src.config.official_configs import (
|
||||
RelationshipConfig,
|
||||
ToolConfig,
|
||||
DebugConfig,
|
||||
CustomPromptConfig,
|
||||
)
|
||||
|
||||
install(extra_lines=3)
|
||||
@@ -47,7 +48,7 @@ TEMPLATE_DIR = os.path.join(PROJECT_ROOT, "template")
|
||||
|
||||
# 考虑到,实际上配置文件中的mai_version是不会自动更新的,所以采用硬编码
|
||||
# 对该字段的更新,请严格参照语义化版本规范:https://semver.org/lang/zh-CN/
|
||||
MMC_VERSION = "0.9.0-snapshot.1"
|
||||
MMC_VERSION = "0.9.0-snapshot.2"
|
||||
|
||||
|
||||
def update_config():
|
||||
@@ -162,7 +163,7 @@ class Config(ConfigBase):
|
||||
lpmm_knowledge: LPMMKnowledgeConfig
|
||||
tool: ToolConfig
|
||||
debug: DebugConfig
|
||||
|
||||
custom_prompt: CustomPromptConfig
|
||||
|
||||
def load_config(config_path: str) -> Config:
|
||||
"""
|
||||
|
||||
@@ -386,6 +386,9 @@ class MemoryConfig(ConfigBase):
|
||||
memory_ban_words: list[str] = field(default_factory=lambda: ["表情包", "图片", "回复", "聊天记录"])
|
||||
"""不允许记忆的词列表"""
|
||||
|
||||
enable_instant_memory: bool = True
|
||||
"""是否启用即时记忆"""
|
||||
|
||||
|
||||
@dataclass
|
||||
class MoodConfig(ConfigBase):
|
||||
@@ -450,6 +453,13 @@ class KeywordReactionConfig(ConfigBase):
|
||||
if not isinstance(rule, KeywordRuleConfig):
|
||||
raise ValueError(f"规则必须是KeywordRuleConfig类型,而不是{type(rule).__name__}")
|
||||
|
||||
@dataclass
|
||||
class CustomPromptConfig(ConfigBase):
|
||||
"""自定义提示词配置类"""
|
||||
|
||||
image_prompt: str = ""
|
||||
"""图片提示词"""
|
||||
|
||||
|
||||
@dataclass
|
||||
class ResponsePostProcessConfig(ConfigBase):
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[inner]
|
||||
version = "4.2.0"
|
||||
version = "4.3.0"
|
||||
|
||||
#----以下是给开发人员阅读的,如果你只是部署了麦麦,不需要阅读----
|
||||
#如果你想要修改配置文件,请在修改后将version的值进行变更
|
||||
@@ -138,6 +138,8 @@ consolidate_memory_interval = 1000 # 记忆整合间隔 单位秒 间隔越低
|
||||
consolidation_similarity_threshold = 0.7 # 相似度阈值
|
||||
consolidation_check_percentage = 0.05 # 检查节点比例
|
||||
|
||||
enable_instant_memory = true # 是否启用即时记忆
|
||||
|
||||
#不希望记忆的词,已经记忆的不会受到影响,需要手动清理
|
||||
memory_ban_words = [ "表情包", "图片", "回复", "聊天记录" ]
|
||||
|
||||
@@ -178,6 +180,12 @@ regex_rules = [
|
||||
{ regex = ["^(?P<n>\\S{1,20})是这样的$"], reaction = "请按照以下模板造句:[n]是这样的,xx只要xx就可以,可是[n]要考虑的事情就很多了,比如什么时候xx,什么时候xx,什么时候xx。(请自由发挥替换xx部分,只需保持句式结构,同时表达一种将[n]过度重视的反讽意味)" }
|
||||
]
|
||||
|
||||
# 可以自定义部分提示词
|
||||
[custom_prompt]
|
||||
image_prompt = "请用中文描述这张图片的内容。如果有文字,请把文字描述概括出来,请留意其主题,直观感受,输出为一段平文本,最多30字,请注意不要分点,就输出一段文本"
|
||||
|
||||
|
||||
|
||||
[response_post_process]
|
||||
enable_response_post_process = true # 是否启用回复后处理,包括错别字生成器,回复分割器
|
||||
|
||||
|
||||
Reference in New Issue
Block a user