优化代码格式和异常处理

- 修复异常处理链,使用from语法保留原始异常
- 格式化代码以符合项目规范
- 优化导入模块的顺序

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
春河晴
2025-03-19 20:27:34 +09:00
parent a829dfdb77
commit fdc098d0db
52 changed files with 3156 additions and 2778 deletions

View File

@@ -13,7 +13,7 @@ from src.common.logger import get_module_logger
from ..models.utils_model import LLM_request
from ..utils.typo_generator import ChineseTypoGenerator
from .config import global_config
from .message import MessageRecv,Message
from .message import MessageRecv, Message
from .message_base import UserInfo
from .chat_stream import ChatStream
from ..moods.moods import MoodManager
@@ -25,14 +25,16 @@ config = driver.config
logger = get_module_logger("chat_utils")
def db_message_to_str(message_dict: Dict) -> str:
logger.debug(f"message_dict: {message_dict}")
time_str = time.strftime("%m-%d %H:%M:%S", time.localtime(message_dict["time"]))
try:
name = "[(%s)%s]%s" % (
message_dict['user_id'], message_dict.get("user_nickname", ""), message_dict.get("user_cardname", ""))
except:
message_dict["user_id"],
message_dict.get("user_nickname", ""),
message_dict.get("user_cardname", ""),
)
except Exception:
name = message_dict.get("user_nickname", "") or f"用户{message_dict['user_id']}"
content = message_dict.get("processed_plain_text", "")
result = f"[{time_str}] {name}: {content}\n"
@@ -55,18 +57,11 @@ def is_mentioned_bot_in_message(message: MessageRecv) -> bool:
async def get_embedding(text):
"""获取文本的embedding向量"""
llm = LLM_request(model=global_config.embedding,request_type = 'embedding')
llm = LLM_request(model=global_config.embedding, request_type="embedding")
# return llm.get_embedding_sync(text)
return await llm.get_embedding(text)
def cosine_similarity(v1, v2):
dot_product = np.dot(v1, v2)
norm1 = np.linalg.norm(v1)
norm2 = np.linalg.norm(v2)
return dot_product / (norm1 * norm2)
def calculate_information_content(text):
"""计算文本的信息量(熵)"""
char_count = Counter(text)
@@ -82,60 +77,70 @@ def calculate_information_content(text):
def get_closest_chat_from_db(length: int, timestamp: str):
"""从数据库中获取最接近指定时间戳的聊天记录
Args:
length: 要获取的消息数量
timestamp: 时间戳
Returns:
list: 消息记录列表,每个记录包含时间和文本信息
"""
chat_records = []
closest_record = db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)])
if closest_record:
closest_time = closest_record['time']
chat_id = closest_record['chat_id'] # 获取chat_id
closest_record = db.messages.find_one({"time": {"$lte": timestamp}}, sort=[("time", -1)])
if closest_record:
closest_time = closest_record["time"]
chat_id = closest_record["chat_id"] # 获取chat_id
# 获取该时间戳之后的length条消息保持相同的chat_id
chat_records = list(db.messages.find(
{
"time": {"$gt": closest_time},
"chat_id": chat_id # 添加chat_id过滤
}
).sort('time', 1).limit(length))
chat_records = list(
db.messages.find(
{
"time": {"$gt": closest_time},
"chat_id": chat_id, # 添加chat_id过滤
}
)
.sort("time", 1)
.limit(length)
)
# 转换记录格式
formatted_records = []
for record in chat_records:
# 兼容行为,前向兼容老数据
formatted_records.append({
'_id': record["_id"],
'time': record["time"],
'chat_id': record["chat_id"],
'detailed_plain_text': record.get("detailed_plain_text", ""), # 添加文本内容
'memorized_times': record.get("memorized_times", 0) # 添加记忆次数
})
formatted_records.append(
{
"_id": record["_id"],
"time": record["time"],
"chat_id": record["chat_id"],
"detailed_plain_text": record.get("detailed_plain_text", ""), # 添加文本内容
"memorized_times": record.get("memorized_times", 0), # 添加记忆次数
}
)
return formatted_records
return []
async def get_recent_group_messages(chat_id:str, limit: int = 12) -> list:
async def get_recent_group_messages(chat_id: str, limit: int = 12) -> list:
"""从数据库获取群组最近的消息记录
Args:
group_id: 群组ID
limit: 获取消息数量默认12条
Returns:
list: Message对象列表按时间正序排列
"""
# 从数据库获取最近消息
recent_messages = list(db.messages.find(
{"chat_id": chat_id},
).sort("time", -1).limit(limit))
recent_messages = list(
db.messages.find(
{"chat_id": chat_id},
)
.sort("time", -1)
.limit(limit)
)
if not recent_messages:
return []
@@ -144,17 +149,17 @@ async def get_recent_group_messages(chat_id:str, limit: int = 12) -> list:
message_objects = []
for msg_data in recent_messages:
try:
chat_info=msg_data.get("chat_info",{})
chat_stream=ChatStream.from_dict(chat_info)
user_info=msg_data.get("user_info",{})
user_info=UserInfo.from_dict(user_info)
chat_info = msg_data.get("chat_info", {})
chat_stream = ChatStream.from_dict(chat_info)
user_info = msg_data.get("user_info", {})
user_info = UserInfo.from_dict(user_info)
msg = Message(
message_id=msg_data["message_id"],
chat_stream=chat_stream,
time=msg_data["time"],
user_info=user_info,
processed_plain_text=msg_data.get("processed_text", ""),
detailed_plain_text=msg_data.get("detailed_plain_text", "")
detailed_plain_text=msg_data.get("detailed_plain_text", ""),
)
message_objects.append(msg)
except KeyError:
@@ -167,22 +172,26 @@ async def get_recent_group_messages(chat_id:str, limit: int = 12) -> list:
def get_recent_group_detailed_plain_text(chat_stream_id: int, limit: int = 12, combine=False):
recent_messages = list(db.messages.find(
{"chat_id": chat_stream_id},
{
"time": 1, # 返回时间字段
"chat_id":1,
"chat_info":1,
"user_info": 1,
"message_id": 1, # 返回消息ID字段
"detailed_plain_text": 1 # 返回处理后的文本字段
}
).sort("time", -1).limit(limit))
recent_messages = list(
db.messages.find(
{"chat_id": chat_stream_id},
{
"time": 1, # 返回时间字段
"chat_id": 1,
"chat_info": 1,
"user_info": 1,
"message_id": 1, # 返回消息ID字段
"detailed_plain_text": 1, # 返回处理后的文本字段
},
)
.sort("time", -1)
.limit(limit)
)
if not recent_messages:
return []
message_detailed_plain_text = ''
message_detailed_plain_text = ""
message_detailed_plain_text_list = []
# 反转消息列表,使最新的消息在最后
@@ -200,13 +209,17 @@ def get_recent_group_detailed_plain_text(chat_stream_id: int, limit: int = 12, c
def get_recent_group_speaker(chat_stream_id: int, sender, limit: int = 12) -> list:
# 获取当前群聊记录内发言的人
recent_messages = list(db.messages.find(
{"chat_id": chat_stream_id},
{
"chat_info": 1,
"user_info": 1,
}
).sort("time", -1).limit(limit))
recent_messages = list(
db.messages.find(
{"chat_id": chat_stream_id},
{
"chat_info": 1,
"user_info": 1,
},
)
.sort("time", -1)
.limit(limit)
)
if not recent_messages:
return []
@@ -216,11 +229,12 @@ def get_recent_group_speaker(chat_stream_id: int, sender, limit: int = 12) -> li
duplicate_removal = []
for msg_db_data in recent_messages:
user_info = UserInfo.from_dict(msg_db_data["user_info"])
if (user_info.user_id, user_info.platform) != sender \
and (user_info.user_id, user_info.platform) != (global_config.BOT_QQ, "qq") \
and (user_info.user_id, user_info.platform) not in duplicate_removal \
and len(duplicate_removal) < 5: # 排除重复排除消息发送者排除bot(此处bot的平台强制为了qq可能需要更改),限制加载的关系数目
if (
(user_info.user_id, user_info.platform) != sender
and (user_info.user_id, user_info.platform) != (global_config.BOT_QQ, "qq")
and (user_info.user_id, user_info.platform) not in duplicate_removal
and len(duplicate_removal) < 5
): # 排除重复排除消息发送者排除bot(此处bot的平台强制为了qq可能需要更改),限制加载的关系数目
duplicate_removal.append((user_info.user_id, user_info.platform))
chat_info = msg_db_data.get("chat_info", {})
who_chat_in_group.append(ChatStream.from_dict(chat_info))
@@ -252,45 +266,45 @@ def split_into_sentences_w_remove_punctuation(text: str) -> List[str]:
# print(f"处理前的文本: {text}")
# 统一将英文逗号转换为中文逗号
text = text.replace(',', '')
text = text.replace('\n', ' ')
text = text.replace(",", "")
text = text.replace("\n", " ")
text, mapping = protect_kaomoji(text)
# print(f"处理前的文本: {text}")
text_no_1 = ''
text_no_1 = ""
for letter in text:
# print(f"当前字符: {letter}")
if letter in ['!', '', '?', '']:
if letter in ["!", "", "?", ""]:
# print(f"当前字符: {letter}, 随机数: {random.random()}")
if random.random() < split_strength:
letter = ''
if letter in ['', '']:
letter = ""
if letter in ["", ""]:
# print(f"当前字符: {letter}, 随机数: {random.random()}")
if random.random() < 1 - split_strength:
letter = ''
letter = ""
text_no_1 += letter
# 对每个逗号单独判断是否分割
sentences = [text_no_1]
new_sentences = []
for sentence in sentences:
parts = sentence.split('')
parts = sentence.split("")
current_sentence = parts[0]
for part in parts[1:]:
if random.random() < split_strength:
new_sentences.append(current_sentence.strip())
current_sentence = part
else:
current_sentence += '' + part
current_sentence += "" + part
# 处理空格分割
space_parts = current_sentence.split(' ')
space_parts = current_sentence.split(" ")
current_sentence = space_parts[0]
for part in space_parts[1:]:
if random.random() < split_strength:
new_sentences.append(current_sentence.strip())
current_sentence = part
else:
current_sentence += ' ' + part
current_sentence += " " + part
new_sentences.append(current_sentence.strip())
sentences = [s for s in new_sentences if s] # 移除空字符串
sentences = recover_kaomoji(sentences, mapping)
@@ -298,11 +312,11 @@ def split_into_sentences_w_remove_punctuation(text: str) -> List[str]:
# print(f"分割后的句子: {sentences}")
sentences_done = []
for sentence in sentences:
sentence = sentence.rstrip(',')
sentence = sentence.rstrip(",")
if random.random() < split_strength * 0.5:
sentence = sentence.replace('', '').replace(',', '')
sentence = sentence.replace("", "").replace(",", "")
elif random.random() < split_strength:
sentence = sentence.replace('', ' ').replace(',', ' ')
sentence = sentence.replace("", " ").replace(",", " ")
sentences_done.append(sentence)
logger.info(f"处理后的句子: {sentences_done}")
@@ -311,26 +325,26 @@ def split_into_sentences_w_remove_punctuation(text: str) -> List[str]:
def random_remove_punctuation(text: str) -> str:
"""随机处理标点符号,模拟人类打字习惯
Args:
text: 要处理的文本
Returns:
str: 处理后的文本
"""
result = ''
result = ""
text_len = len(text)
for i, char in enumerate(text):
if char == '' and i == text_len - 1: # 结尾的句号
if char == "" and i == text_len - 1: # 结尾的句号
if random.random() > 0.4: # 80%概率删除结尾句号
continue
elif char == '':
elif char == "":
rand = random.random()
if rand < 0.25: # 5%概率删除逗号
continue
elif rand < 0.25: # 20%概率把逗号变成空格
result += ' '
result += " "
continue
result += char
return result
@@ -340,13 +354,13 @@ def process_llm_response(text: str) -> List[str]:
# processed_response = process_text_with_typos(content)
if len(text) > 100:
logger.warning(f"回复过长 ({len(text)} 字符),返回默认回复")
return ['懒得说']
return ["懒得说"]
# 处理长消息
typo_generator = ChineseTypoGenerator(
error_rate=global_config.chinese_typo_error_rate,
min_freq=global_config.chinese_typo_min_freq,
tone_error_rate=global_config.chinese_typo_tone_error_rate,
word_replace_rate=global_config.chinese_typo_word_replace_rate
word_replace_rate=global_config.chinese_typo_word_replace_rate,
)
split_sentences = split_into_sentences_w_remove_punctuation(text)
sentences = []
@@ -362,7 +376,7 @@ def process_llm_response(text: str) -> List[str]:
if len(sentences) > 3:
logger.warning(f"分割后消息数量过多 ({len(sentences)} 条),返回默认回复")
return [f'{global_config.BOT_NICKNAME}不知道哦']
return [f"{global_config.BOT_NICKNAME}不知道哦"]
return sentences
@@ -373,7 +387,7 @@ def calculate_typing_time(input_string: str, chinese_time: float = 0.4, english_
input_string (str): 输入的字符串
chinese_time (float): 中文字符的输入时间默认为0.2秒
english_time (float): 英文字符的输入时间默认为0.1秒
特殊情况:
- 如果只有一个中文字符将使用3倍的中文输入时间
- 在所有输入结束后额外加上回车时间0.3秒
@@ -382,11 +396,11 @@ def calculate_typing_time(input_string: str, chinese_time: float = 0.4, english_
# 将0-1的唤醒度映射到-1到1
mood_arousal = mood_manager.current_mood.arousal
# 映射到0.5到2倍的速度系数
typing_speed_multiplier = 1.5 ** mood_arousal # 唤醒度为1时速度翻倍,为-1时速度减半
typing_speed_multiplier = 1.5**mood_arousal # 唤醒度为1时速度翻倍,为-1时速度减半
chinese_time *= 1 / typing_speed_multiplier
english_time *= 1 / typing_speed_multiplier
# 计算中文字符数
chinese_chars = sum(1 for char in input_string if '\u4e00' <= char <= '\u9fff')
chinese_chars = sum(1 for char in input_string if "\u4e00" <= char <= "\u9fff")
# 如果只有一个中文字符使用3倍时间
if chinese_chars == 1 and len(input_string.strip()) == 1:
@@ -395,7 +409,7 @@ def calculate_typing_time(input_string: str, chinese_time: float = 0.4, english_
# 正常计算所有字符的输入时间
total_time = 0.0
for char in input_string:
if '\u4e00' <= char <= '\u9fff': # 判断是否为中文字符
if "\u4e00" <= char <= "\u9fff": # 判断是否为中文字符
total_time += chinese_time
else: # 其他字符(如英文)
total_time += english_time
@@ -451,7 +465,7 @@ def truncate_message(message: str, max_length=20) -> str:
def protect_kaomoji(sentence):
""""
""" "
识别并保护句子中的颜文字(含括号与无括号),将其替换为占位符,
并返回替换后的句子和占位符到颜文字的映射表。
Args:
@@ -460,17 +474,17 @@ def protect_kaomoji(sentence):
tuple: (处理后的句子, {占位符: 颜文字})
"""
kaomoji_pattern = re.compile(
r'('
r'[\(\[(【]' # 左括号
r'[^()\[\]()【】]*?' # 非括号字符(惰性匹配)
r'[^\u4e00-\u9fa5a-zA-Z0-9\s]' # 非中文、非英文、非数字、非空格字符(必须包含至少一个)
r'[^()\[\]()【】]*?' # 非括号字符(惰性匹配)
r'[\)\])】]' # 右括号
r')'
r'|'
r'('
r'[▼▽・ᴥω・﹏^><≧≦ ̄`´∀ヮДд︿﹀へ。゚╥╯╰︶︹•⁄]{2,15}'
r')'
r"("
r"[\(\[(【]" # 左括号
r"[^()\[\]()【】]*?" # 非括号字符(惰性匹配)
r"[^\u4e00-\u9fa5a-zA-Z0-9\s]" # 非中文、非英文、非数字、非空格字符(必须包含至少一个)
r"[^()\[\]()【】]*?" # 非括号字符(惰性匹配)
r"[\)\])】]" # 右括号
r")"
r"|"
r"("
r"[▼▽・ᴥω・﹏^><≧≦ ̄`´∀ヮДд︿﹀へ。゚╥╯╰︶︹•⁄]{2,15}"
r")"
)
kaomoji_matches = kaomoji_pattern.findall(sentence)
@@ -478,7 +492,7 @@ def protect_kaomoji(sentence):
for idx, match in enumerate(kaomoji_matches):
kaomoji = match[0] if match[0] else match[1]
placeholder = f'__KAOMOJI_{idx}__'
placeholder = f"__KAOMOJI_{idx}__"
sentence = sentence.replace(kaomoji, placeholder, 1)
placeholder_to_kaomoji[placeholder] = kaomoji
@@ -499,4 +513,4 @@ def recover_kaomoji(sentences, placeholder_to_kaomoji):
for placeholder, kaomoji in placeholder_to_kaomoji.items():
sentence = sentence.replace(placeholder, kaomoji)
recovered_sentences.append(sentence)
return recovered_sentences
return recovered_sentences