Merge branch 'dev' of https://github.com/MaiM-with-u/MaiBot into dev
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
from ..moods.moods import MoodManager # 导入情绪管理器
|
||||
from ..config.config import global_config
|
||||
from ...config.config import global_config
|
||||
from .message import MessageRecv
|
||||
from ..PFC.pfc_manager import PFCManager
|
||||
from .chat_stream import chat_manager
|
||||
|
||||
@@ -10,7 +10,7 @@ from PIL import Image
|
||||
import io
|
||||
|
||||
from ...common.database import db
|
||||
from ..config.config import global_config
|
||||
from ...config.config import global_config
|
||||
from ..chat.utils import get_embedding
|
||||
from ..chat.utils_image import ImageManager, image_path_to_base64
|
||||
from ..models.utils_model import LLMRequest
|
||||
|
||||
@@ -3,13 +3,13 @@ from src.common.logger import get_module_logger
|
||||
import asyncio
|
||||
from dataclasses import dataclass, field
|
||||
from .message import MessageRecv
|
||||
from ..message.message_base import BaseMessageInfo, GroupInfo
|
||||
from ..message.message_base import BaseMessageInfo, GroupInfo, Seg
|
||||
import hashlib
|
||||
from typing import Dict
|
||||
from collections import OrderedDict
|
||||
import random
|
||||
import time
|
||||
from ..config.config import global_config
|
||||
from ...config.config import global_config
|
||||
|
||||
logger = get_module_logger("message_buffer")
|
||||
|
||||
@@ -130,22 +130,40 @@ class MessageBuffer:
|
||||
keep_msgs = OrderedDict()
|
||||
combined_text = []
|
||||
found = False
|
||||
type = "text"
|
||||
type = "seglist"
|
||||
is_update = True
|
||||
for msg_id, msg in self.buffer_pool[person_id_].items():
|
||||
if msg_id == message.message_info.message_id:
|
||||
found = True
|
||||
type = msg.message.message_segment.type
|
||||
if msg.message.message_segment.type != "seglist":
|
||||
type = msg.message.message_segment.type
|
||||
else:
|
||||
if (
|
||||
isinstance(msg.message.message_segment.data, list)
|
||||
and all(isinstance(x, Seg) for x in msg.message.message_segment.data)
|
||||
and len(msg.message.message_segment.data) == 1
|
||||
):
|
||||
type = msg.message.message_segment.data[0].type
|
||||
combined_text.append(msg.message.processed_plain_text)
|
||||
continue
|
||||
if found:
|
||||
keep_msgs[msg_id] = msg
|
||||
elif msg.result == "F":
|
||||
# 收集F消息的文本内容
|
||||
F_type = "seglist"
|
||||
if msg.message.message_segment.type != "seglist":
|
||||
F_type = msg.message.message_segment.type
|
||||
else:
|
||||
if (
|
||||
isinstance(msg.message.message_segment.data, list)
|
||||
and all(isinstance(x, Seg) for x in msg.message.message_segment.data)
|
||||
and len(msg.message.message_segment.data) == 1
|
||||
):
|
||||
F_type = msg.message.message_segment.data[0].type
|
||||
if hasattr(msg.message, "processed_plain_text") and msg.message.processed_plain_text:
|
||||
if msg.message.message_segment.type == "text":
|
||||
if F_type == "text":
|
||||
combined_text.append(msg.message.processed_plain_text)
|
||||
elif msg.message.message_segment.type != "text":
|
||||
elif F_type != "text":
|
||||
is_update = False
|
||||
elif msg.result == "U":
|
||||
logger.debug(f"异常未处理信息id: {msg.message.message_info.message_id}")
|
||||
|
||||
@@ -8,7 +8,7 @@ from ..message.api import global_api
|
||||
from .message import MessageSending, MessageThinking, MessageSet
|
||||
|
||||
from ..storage.storage import MessageStorage
|
||||
from ..config.config import global_config
|
||||
from ...config.config import global_config
|
||||
from .utils import truncate_message, calculate_typing_time, count_messages_between
|
||||
|
||||
from src.common.logger import LogConfig, SENDER_STYLE_CONFIG
|
||||
|
||||
@@ -10,7 +10,7 @@ from src.common.logger import get_module_logger
|
||||
|
||||
from ..models.utils_model import LLMRequest
|
||||
from ..utils.typo_generator import ChineseTypoGenerator
|
||||
from ..config.config import global_config
|
||||
from ...config.config import global_config
|
||||
from .message import MessageRecv, Message
|
||||
from ..message.message_base import UserInfo
|
||||
from .chat_stream import ChatStream
|
||||
@@ -338,11 +338,21 @@ def random_remove_punctuation(text: str) -> str:
|
||||
|
||||
|
||||
def process_llm_response(text: str) -> List[str]:
|
||||
# 先保护颜文字
|
||||
protected_text, kaomoji_mapping = protect_kaomoji(text)
|
||||
logger.debug(f"保护颜文字后的文本: {protected_text}")
|
||||
# 提取被 () 或 [] 包裹的内容
|
||||
pattern = re.compile(r"[(\[].*?[\)\]")
|
||||
_extracted_contents = pattern.findall(text)
|
||||
pattern = re.compile(r"[\(\[\(].*?[\)\]\)]")
|
||||
# _extracted_contents = pattern.findall(text)
|
||||
_extracted_contents = pattern.findall(protected_text) # 在保护后的文本上查找
|
||||
|
||||
# 去除 () 和 [] 及其包裹的内容
|
||||
cleaned_text = pattern.sub("", text)
|
||||
# cleaned_text = pattern.sub("", text)
|
||||
cleaned_text = pattern.sub("", protected_text)
|
||||
|
||||
if cleaned_text == "":
|
||||
return ["呃呃"]
|
||||
|
||||
logger.debug(f"{text}去除括号处理后的文本: {cleaned_text}")
|
||||
|
||||
# 对清理后的文本进行进一步处理
|
||||
@@ -382,6 +392,8 @@ def process_llm_response(text: str) -> List[str]:
|
||||
return [f"{global_config.BOT_NICKNAME}不知道哦"]
|
||||
|
||||
# sentences.extend(extracted_contents)
|
||||
# 在所有句子处理完毕后,对包含占位符的列表进行恢复
|
||||
sentences = recover_kaomoji(sentences, kaomoji_mapping)
|
||||
|
||||
return sentences
|
||||
|
||||
@@ -508,8 +520,7 @@ def protect_kaomoji(sentence):
|
||||
r"]"
|
||||
r")"
|
||||
r"|"
|
||||
r"([▼▽・ᴥω・﹏^><≧≦ ̄`´∀ヮДд︿﹀へ。゚╥╯╰︶︹•⁄]{2,15"
|
||||
r"}"
|
||||
r"([▼▽・ᴥω・﹏^><≧≦ ̄`´∀ヮДд︿﹀へ。゚╥╯╰︶︹•⁄]{2,15})"
|
||||
)
|
||||
|
||||
kaomoji_matches = kaomoji_pattern.findall(sentence)
|
||||
@@ -706,12 +717,30 @@ def parse_text_timestamps(text: str, mode: str = "normal") -> str:
|
||||
# normal模式: 直接转换所有时间戳
|
||||
if mode == "normal":
|
||||
result_text = text
|
||||
|
||||
# 将时间戳转换为可读格式并记录相同格式的时间戳
|
||||
timestamp_readable_map = {}
|
||||
readable_time_used = set()
|
||||
|
||||
for match in matches:
|
||||
timestamp = float(match.group(1))
|
||||
readable_time = translate_timestamp_to_human_readable(timestamp, "normal")
|
||||
# 由于替换会改变文本长度,需要使用正则替换而非直接替换
|
||||
pattern_instance = re.escape(match.group(0))
|
||||
result_text = re.sub(pattern_instance, readable_time, result_text, count=1)
|
||||
timestamp_readable_map[match.group(0)] = (timestamp, readable_time)
|
||||
|
||||
# 按时间戳排序
|
||||
sorted_timestamps = sorted(timestamp_readable_map.items(), key=lambda x: x[1][0])
|
||||
|
||||
# 执行替换,相同格式的只保留最早的
|
||||
for ts_str, (_, readable) in sorted_timestamps:
|
||||
pattern_instance = re.escape(ts_str)
|
||||
if readable in readable_time_used:
|
||||
# 如果这个可读时间已经使用过,替换为空字符串
|
||||
result_text = re.sub(pattern_instance, "", result_text, count=1)
|
||||
else:
|
||||
# 否则替换为可读时间并记录
|
||||
result_text = re.sub(pattern_instance, readable, result_text, count=1)
|
||||
readable_time_used.add(readable)
|
||||
|
||||
return result_text
|
||||
else:
|
||||
# lite模式: 按5秒间隔划分并选择性转换
|
||||
@@ -770,15 +799,30 @@ def parse_text_timestamps(text: str, mode: str = "normal") -> str:
|
||||
pattern_instance = re.escape(match.group(0))
|
||||
result_text = re.sub(pattern_instance, "", result_text, count=1)
|
||||
|
||||
# 按照时间戳原始顺序排序,避免替换时位置错误
|
||||
to_convert.sort(key=lambda x: x[1].start())
|
||||
# 按照时间戳升序排序
|
||||
to_convert.sort(key=lambda x: x[0])
|
||||
|
||||
# 将时间戳转换为可读时间并记录哪些可读时间已经使用过
|
||||
converted_timestamps = []
|
||||
readable_time_used = set()
|
||||
|
||||
# 执行替换
|
||||
# 由于替换会改变文本长度,从后向前替换
|
||||
to_convert.reverse()
|
||||
for ts, match in to_convert:
|
||||
readable_time = translate_timestamp_to_human_readable(ts, "relative")
|
||||
converted_timestamps.append((ts, match, readable_time))
|
||||
|
||||
# 按照时间戳原始顺序排序,避免替换时位置错误
|
||||
converted_timestamps.sort(key=lambda x: x[1].start())
|
||||
|
||||
# 从后向前替换,避免位置改变
|
||||
converted_timestamps.reverse()
|
||||
for match, readable_time in converted_timestamps:
|
||||
pattern_instance = re.escape(match.group(0))
|
||||
result_text = re.sub(pattern_instance, readable_time, result_text, count=1)
|
||||
if readable_time in readable_time_used:
|
||||
# 如果相同格式的时间已存在,替换为空字符串
|
||||
result_text = re.sub(pattern_instance, "", result_text, count=1)
|
||||
else:
|
||||
# 否则替换为可读时间并记录
|
||||
result_text = re.sub(pattern_instance, readable_time, result_text, count=1)
|
||||
readable_time_used.add(readable_time)
|
||||
|
||||
return result_text
|
||||
|
||||
@@ -8,7 +8,7 @@ import io
|
||||
|
||||
|
||||
from ...common.database import db
|
||||
from ..config.config import global_config
|
||||
from ...config.config import global_config
|
||||
from ..models.utils_model import LLMRequest
|
||||
|
||||
from src.common.logger import get_module_logger
|
||||
|
||||
Reference in New Issue
Block a user