diff --git a/src/chat/focus_chat/heartFC_chat.py b/src/chat/focus_chat/heartFC_chat.py index e36823e9e..c6a81bb8e 100644 --- a/src/chat/focus_chat/heartFC_chat.py +++ b/src/chat/focus_chat/heartFC_chat.py @@ -535,7 +535,7 @@ class HeartFChatting: new_message_count = message_api.count_new_messages( chat_id=self.chat_stream.stream_id, start_time=thinking_start_time, end_time=current_time ) - platform = message_data.get("platform", "") + platform = message_data.get("user_platform", "") user_id = message_data.get("user_id", "") reply_to_platform_id = f"{platform}:{user_id}" diff --git a/src/chat/memory_system/instant_memory.py b/src/chat/memory_system/instant_memory.py new file mode 100644 index 000000000..5b38bbb0b --- /dev/null +++ b/src/chat/memory_system/instant_memory.py @@ -0,0 +1,258 @@ +# -*- coding: utf-8 -*- +import time +import re +import json +import ast +from json_repair import repair_json +from src.llm_models.utils_model import LLMRequest +from src.common.logger import get_logger +import traceback + +from src.config.config import global_config +from src.common.database.database_model import Memory # Peewee Models导入 + +logger = get_logger(__name__) + +class MemoryItem: + def __init__(self,memory_id:str,chat_id:str,memory_text:str,keywords:list[str]): + self.memory_id = memory_id + self.chat_id = chat_id + self.memory_text:str = memory_text + self.keywords:list[str] = keywords + self.create_time:float = time.time() + self.last_view_time:float = time.time() + +class MemoryManager: + def __init__(self): + # self.memory_items:list[MemoryItem] = [] + pass + + + + + +class InstantMemory: + def __init__(self,chat_id): + self.chat_id = chat_id + self.last_view_time = time.time() + self.summary_model = LLMRequest( + model=global_config.model.memory, + temperature=0.5, + request_type="memory.summary", + ) + + async def if_need_build(self,text): + prompt = f""" +请判断以下内容中是否有值得记忆的信息,如果有,请输出1,否则输出0 +{text} +请只输出1或0就好 + """ + + try: + response,_ = await self.summary_model.generate_response_async(prompt) + print(prompt) + print(response) + + + if "1" in response: + return True + else: + return False + except Exception as e: + logger.error(f"判断是否需要记忆出现错误:{str(e)} {traceback.format_exc()}") + return False + + async def build_memory(self,text): + prompt = f""" + 以下内容中存在值得记忆的信息,请你从中总结出一段值得记忆的信息,并输出 + {text} + 请以json格式输出一段概括的记忆内容和关键词 + {{ + "memory_text": "记忆内容", + "keywords": "关键词,用/划分" + }} + """ + try: + response,_ = await self.summary_model.generate_response_async(prompt) + print(prompt) + print(response) + if not response: + return None + try: + repaired = repair_json(response) + result = json.loads(repaired) + memory_text = result.get('memory_text', '') + keywords = result.get('keywords', '') + if isinstance(keywords, str): + keywords_list = [k.strip() for k in keywords.split('/') if k.strip()] + elif isinstance(keywords, list): + keywords_list = keywords + else: + keywords_list = [] + return {'memory_text': memory_text, 'keywords': keywords_list} + except Exception as parse_e: + logger.error(f"解析记忆json失败:{str(parse_e)} {traceback.format_exc()}") + return None + except Exception as e: + logger.error(f"构建记忆出现错误:{str(e)} {traceback.format_exc()}") + return None + + + async def create_and_store_memory(self,text): + if_need = await self.if_need_build(text) + if if_need: + logger.info(f"需要记忆:{text}") + memory = await self.build_memory(text) + if memory and memory.get('memory_text'): + memory_id = f"{self.chat_id}_{time.time()}" + memory_item = MemoryItem( + memory_id=memory_id, + chat_id=self.chat_id, + memory_text=memory['memory_text'], + keywords=memory.get('keywords', []) + ) + await self.store_memory(memory_item) + else: + logger.info(f"不需要记忆:{text}") + + async def store_memory(self,memory_item:MemoryItem): + memory = Memory( + memory_id=memory_item.memory_id, + chat_id=memory_item.chat_id, + memory_text=memory_item.memory_text, + keywords=memory_item.keywords, + create_time=memory_item.create_time, + last_view_time=memory_item.last_view_time + ) + memory.save() + + async def get_memory(self,target:str): + from json_repair import repair_json + prompt = f""" + 请根据以下发言内容,判断是否需要提取记忆 + {target} + 请用json格式输出,包含以下字段: + 其中,time的要求是: + 可以选择具体日期时间,格式为YYYY-MM-DD HH:MM:SS,或者大致时间,格式为YYYY-MM-DD + 可以选择相对时间,例如:今天,昨天,前天,5天前,1个月前 + 可以选择留空进行模糊搜索 + {{ + "need_memory": 1, + "keywords": "希望获取的记忆关键词,用/划分", + "time": "希望获取的记忆大致时间" + }} + 请只输出json格式,不要输出其他多余内容 + """ + try: + response,_ = await self.summary_model.generate_response_async(prompt) + print(prompt) + print(response) + if not response: + return None + try: + repaired = repair_json(response) + result = json.loads(repaired) + # 解析keywords + keywords = result.get('keywords', '') + if isinstance(keywords, str): + keywords_list = [k.strip() for k in keywords.split('/') if k.strip()] + elif isinstance(keywords, list): + keywords_list = keywords + else: + keywords_list = [] + # 解析time为时间段 + time_str = result.get('time', '').strip() + start_time, end_time = self._parse_time_range(time_str) + logger.info(f"start_time: {start_time}, end_time: {end_time}") + # 检索包含关键词的记忆 + memories_set = set() + if start_time and end_time: + start_ts = start_time.timestamp() + end_ts = end_time.timestamp() + query = Memory.select().where( + (Memory.chat_id == self.chat_id) & + (Memory.create_time >= start_ts) & + (Memory.create_time < end_ts) + ) + else: + query = Memory.select().where(Memory.chat_id == self.chat_id) + + + for mem in query: + #对每条记忆 + mem_keywords = mem.keywords or [] + parsed = ast.literal_eval(mem_keywords) + if isinstance(parsed, list): + mem_keywords = [str(k).strip() for k in parsed if str(k).strip()] + else: + mem_keywords = [] + # logger.info(f"mem_keywords: {mem_keywords}") + # logger.info(f"keywords_list: {keywords_list}") + for kw in keywords_list: + # logger.info(f"kw: {kw}") + # logger.info(f"kw in mem_keywords: {kw in mem_keywords}") + if kw in mem_keywords: + # logger.info(f"mem.memory_text: {mem.memory_text}") + memories_set.add(mem.memory_text) + break + return list(memories_set) + except Exception as parse_e: + logger.error(f"解析记忆json失败:{str(parse_e)} {traceback.format_exc()}") + return None + except Exception as e: + logger.error(f"获取记忆出现错误:{str(e)} {traceback.format_exc()}") + return None + + def _parse_time_range(self, time_str): + """ + 支持解析如下格式: + - 具体日期时间:YYYY-MM-DD HH:MM:SS + - 具体日期:YYYY-MM-DD + - 相对时间:今天,昨天,前天,N天前,N个月前 + - 空字符串:返回(None, None) + """ + from datetime import datetime, timedelta + now = datetime.now() + if not time_str: + return 0, now + time_str = time_str.strip() + # 具体日期时间 + try: + dt = datetime.strptime(time_str, "%Y-%m-%d %H:%M:%S") + return dt, dt + timedelta(hours=1) + except Exception: + pass + # 具体日期 + try: + dt = datetime.strptime(time_str, "%Y-%m-%d") + return dt, dt + timedelta(days=1) + except Exception: + pass + # 相对时间 + if time_str == "今天": + start = now.replace(hour=0, minute=0, second=0, microsecond=0) + end = start + timedelta(days=1) + return start, end + if time_str == "昨天": + start = (now - timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0) + end = start + timedelta(days=1) + return start, end + if time_str == "前天": + start = (now - timedelta(days=2)).replace(hour=0, minute=0, second=0, microsecond=0) + end = start + timedelta(days=1) + return start, end + m = re.match(r"(\d+)天前", time_str) + if m: + days = int(m.group(1)) + start = (now - timedelta(days=days)).replace(hour=0, minute=0, second=0, microsecond=0) + end = start + timedelta(days=1) + return start, end + m = re.match(r"(\d+)个月前", time_str) + if m: + months = int(m.group(1)) + # 近似每月30天 + start = (now - timedelta(days=months*30)).replace(hour=0, minute=0, second=0, microsecond=0) + end = start + timedelta(days=1) + return start, end + # 其他无法解析 + return 0, now \ No newline at end of file diff --git a/src/chat/replyer/default_generator.py b/src/chat/replyer/default_generator.py index 6091268b4..2dd889a93 100644 --- a/src/chat/replyer/default_generator.py +++ b/src/chat/replyer/default_generator.py @@ -21,6 +21,7 @@ from src.chat.utils.chat_message_builder import build_readable_messages, get_raw from src.chat.express.expression_selector import expression_selector from src.chat.knowledge.knowledge_lib import qa_manager from src.chat.memory_system.memory_activator import MemoryActivator +from src.chat.memory_system.instant_memory import InstantMemory from src.mood.mood_manager import mood_manager from src.person_info.relationship_fetcher import relationship_fetcher_manager from src.person_info.person_info import get_person_info_manager @@ -159,6 +160,7 @@ class DefaultReplyer: self.heart_fc_sender = HeartFCSender() self.memory_activator = MemoryActivator() + self.instant_memory = InstantMemory(chat_id=self.chat_stream.stream_id) self.tool_executor = ToolExecutor(chat_id=self.chat_stream.stream_id, enable_cache=True, cache_ttl=3) def _select_weighted_model_config(self) -> Dict[str, Any]: @@ -368,13 +370,21 @@ class DefaultReplyer: running_memories = await self.memory_activator.activate_memory_with_chat_history( target_message=target, chat_history_prompt=chat_history ) + + if global_config.memory.enable_instant_memory: + asyncio.create_task(self.instant_memory.create_and_store_memory(chat_history)) + instant_memory = await self.instant_memory.get_memory(target) + logger.info(f"即时记忆:{instant_memory}") + if not running_memories: return "" memory_str = "以下是当前在聊天中,你回忆起的记忆:\n" for running_memory in running_memories: memory_str += f"- {running_memory['content']}\n" + + memory_str += f"- {instant_memory}\n" return memory_str async def build_tool_info(self, chat_history, reply_data: Optional[Dict], enable_tool: bool = True): @@ -510,9 +520,8 @@ class DefaultReplyer: background_dialogue_prompt_str = build_readable_messages( latest_25_msgs, replace_bot_name=True, - merge_messages=True, timestamp_mode="normal_no_YMD", - show_pic=False, + truncate=True, ) background_dialogue_prompt = f"这是其他用户的发言:\n{background_dialogue_prompt_str}" diff --git a/src/chat/utils/utils_image.py b/src/chat/utils/utils_image.py index 4b7dc3730..0ab5559cb 100644 --- a/src/chat/utils/utils_image.py +++ b/src/chat/utils/utils_image.py @@ -204,7 +204,7 @@ class ImageManager: # 调用AI获取描述 image_format = Image.open(io.BytesIO(image_bytes)).format.lower() # type: ignore - prompt = "请用中文描述这张图片的内容。如果有文字,请把文字都描述出来,请留意其主题,直观感受,输出为一段平文本,最多50字" + prompt = global_config.custom_prompt.image_prompt description, _ = await self._llm.generate_response_for_image(prompt, image_base64, image_format) if description is None: @@ -484,7 +484,7 @@ class ImageManager: image_format = Image.open(io.BytesIO(image_bytes)).format.lower() # type: ignore # 构建prompt - prompt = """请用中文描述这张图片的内容。如果有文字,请把文字描述概括出来,请留意其主题,直观感受,输出为一段平文本,最多30字,请注意不要分点,就输出一段文本""" + prompt = global_config.custom_prompt.image_prompt # 获取VLM描述 description, _ = await self._llm.generate_response_for_image(prompt, image_base64, image_format) diff --git a/src/common/database/database_model.py b/src/common/database/database_model.py index 140bb305c..c846defa0 100644 --- a/src/common/database/database_model.py +++ b/src/common/database/database_model.py @@ -267,6 +267,16 @@ class PersonInfo(BaseModel): # database = db # 继承自 BaseModel table_name = "person_info" +class Memory(BaseModel): + memory_id = TextField(index=True) + chat_id = TextField(null=True) + memory_text = TextField(null=True) + keywords = TextField(null=True) + create_time = FloatField(null=True) + last_view_time = FloatField(null=True) + + class Meta: + table_name = "memory" class Knowledges(BaseModel): """ @@ -370,6 +380,7 @@ def create_tables(): RecalledMessages, # 添加新模型 GraphNodes, # 添加图节点表 GraphEdges, # 添加图边表 + Memory, ActionRecords, # 添加 ActionRecords 到初始化列表 ] ) @@ -391,6 +402,7 @@ def initialize_database(): OnlineTime, PersonInfo, Knowledges, + Memory, ThinkingLog, RecalledMessages, GraphNodes, diff --git a/src/config/config.py b/src/config/config.py index d40679b71..2bf3e7c2e 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -32,6 +32,7 @@ from src.config.official_configs import ( RelationshipConfig, ToolConfig, DebugConfig, + CustomPromptConfig, ) install(extra_lines=3) @@ -47,7 +48,7 @@ TEMPLATE_DIR = os.path.join(PROJECT_ROOT, "template") # 考虑到,实际上配置文件中的mai_version是不会自动更新的,所以采用硬编码 # 对该字段的更新,请严格参照语义化版本规范:https://semver.org/lang/zh-CN/ -MMC_VERSION = "0.9.0-snapshot.1" +MMC_VERSION = "0.9.0-snapshot.2" def update_config(): @@ -162,7 +163,7 @@ class Config(ConfigBase): lpmm_knowledge: LPMMKnowledgeConfig tool: ToolConfig debug: DebugConfig - + custom_prompt: CustomPromptConfig def load_config(config_path: str) -> Config: """ diff --git a/src/config/official_configs.py b/src/config/official_configs.py index 25bef7e89..67b314f7f 100644 --- a/src/config/official_configs.py +++ b/src/config/official_configs.py @@ -385,6 +385,9 @@ class MemoryConfig(ConfigBase): memory_ban_words: list[str] = field(default_factory=lambda: ["表情包", "图片", "回复", "聊天记录"]) """不允许记忆的词列表""" + + enable_instant_memory: bool = True + """是否启用即时记忆""" @dataclass @@ -450,6 +453,13 @@ class KeywordReactionConfig(ConfigBase): if not isinstance(rule, KeywordRuleConfig): raise ValueError(f"规则必须是KeywordRuleConfig类型,而不是{type(rule).__name__}") +@dataclass +class CustomPromptConfig(ConfigBase): + """自定义提示词配置类""" + + image_prompt: str = "" + """图片提示词""" + @dataclass class ResponsePostProcessConfig(ConfigBase): diff --git a/template/bot_config_template.toml b/template/bot_config_template.toml index a139e3aa5..e5a898551 100644 --- a/template/bot_config_template.toml +++ b/template/bot_config_template.toml @@ -1,5 +1,5 @@ [inner] -version = "4.2.0" +version = "4.3.0" #----以下是给开发人员阅读的,如果你只是部署了麦麦,不需要阅读---- #如果你想要修改配置文件,请在修改后将version的值进行变更 @@ -138,6 +138,8 @@ consolidate_memory_interval = 1000 # 记忆整合间隔 单位秒 间隔越低 consolidation_similarity_threshold = 0.7 # 相似度阈值 consolidation_check_percentage = 0.05 # 检查节点比例 +enable_instant_memory = true # 是否启用即时记忆 + #不希望记忆的词,已经记忆的不会受到影响,需要手动清理 memory_ban_words = [ "表情包", "图片", "回复", "聊天记录" ] @@ -178,6 +180,12 @@ regex_rules = [ { regex = ["^(?P\\S{1,20})是这样的$"], reaction = "请按照以下模板造句:[n]是这样的,xx只要xx就可以,可是[n]要考虑的事情就很多了,比如什么时候xx,什么时候xx,什么时候xx。(请自由发挥替换xx部分,只需保持句式结构,同时表达一种将[n]过度重视的反讽意味)" } ] +# 可以自定义部分提示词 +[custom_prompt] +image_prompt = "请用中文描述这张图片的内容。如果有文字,请把文字描述概括出来,请留意其主题,直观感受,输出为一段平文本,最多30字,请注意不要分点,就输出一段文本" + + + [response_post_process] enable_response_post_process = true # 是否启用回复后处理,包括错别字生成器,回复分割器