v0.2修改了一些东西

使概率配置生效
将一些模块解耦合
将组信息管理器合并到关系管理器,添加了可以全局调用的接口
精简了llm生成器的代码
精简了message代码
重写了回复后处理
This commit is contained in:
SengokuCola
2025-02-28 00:28:34 +08:00
parent 6938a97941
commit 870be0a426
14 changed files with 333 additions and 653 deletions

View File

@@ -5,6 +5,8 @@ from .message import Message
import requests
import numpy as np
from .config import llm_config
import re
def combine_messages(messages: List[Message]) -> str:
"""将消息列表组合成格式化的字符串
@@ -115,52 +117,85 @@ def get_recent_group_messages(db, group_id: int, limit: int = 12) -> list:
message_objects.reverse()
return message_objects
def split_into_sentences(text: str) -> List[str]:
def split_into_sentences_w_remove_punctuation(text: str) -> List[str]:
"""将文本分割成句子,但保持书名号中的内容完整
Args:
text: 要分割的文本字符串
Returns:
List[str]: 分割后的句子列表
"""
delimiters = ['', '', '', ',', '', '', '!', '?', '\n'] # 添加换行符作为分隔符
remove_chars = ['', ','] # 只移除这两种逗号
sentences = []
current_sentence = ""
in_book_title = False # 标记是否在书名号内
len_text = len(text)
if len_text < 5:
if random.random() < 0.01:
return list(text) # 如果文本很短且触发随机条件,直接按字符分割
else:
return [text]
if len_text < 12:
split_strength = 0.3
elif len_text < 32:
split_strength = 0.7
else:
split_strength = 0.9
#先移除换行符
# print(f"split_strength: {split_strength}")
for char in text:
current_sentence += char
# print(f"处理前的文本: {text}")
# 统一将英文逗号转换为中文逗号
text = text.replace(',', '')
text = text.replace('\n', ' ')
# print(f"处理前的文本: {text}")
text_no_1 = ''
for letter in text:
# print(f"当前字符: {letter}")
if letter in ['!','','?','']:
# print(f"当前字符: {letter}, 随机数: {random.random()}")
if random.random() < split_strength:
letter = ''
if letter in ['','']:
# print(f"当前字符: {letter}, 随机数: {random.random()}")
if random.random() < 1 - split_strength:
letter = ''
text_no_1 += letter
# 检查书名号
if char == '':
in_book_title = True
elif char == '':
in_book_title = False
# 对每个逗号单独判断是否分割
sentences = [text_no_1]
new_sentences = []
for sentence in sentences:
parts = sentence.split('')
current_sentence = parts[0]
for part in parts[1:]:
if random.random() < split_strength:
new_sentences.append(current_sentence.strip())
current_sentence = part
else:
current_sentence += '' + part
# 处理空格分割
space_parts = current_sentence.split(' ')
current_sentence = space_parts[0]
for part in space_parts[1:]:
if random.random() < split_strength:
new_sentences.append(current_sentence.strip())
current_sentence = part
else:
current_sentence += ' ' + part
new_sentences.append(current_sentence.strip())
sentences = [s for s in new_sentences if s] # 移除空字符串
# print(f"分割后的句子: {sentences}")
sentences_done = []
for sentence in sentences:
sentence = sentence.rstrip(',')
if random.random() < split_strength*0.5:
sentence = sentence.replace('', '').replace(',', '')
elif random.random() < split_strength:
sentence = sentence.replace('', ' ').replace(',', ' ')
sentences_done.append(sentence)
# 只有不在书名号内且是分隔符时才分割
if char in delimiters and not in_book_title:
if current_sentence.strip(): # 确保不是空字符串
# 只移除逗号
clean_sentence = current_sentence
if clean_sentence[-1] in remove_chars:
clean_sentence = clean_sentence[:-1]
if clean_sentence.strip():
sentences.append(clean_sentence.strip())
current_sentence = ""
# 处理最后一个句子
if current_sentence.strip():
# 如果最后一个字符是逗号,移除它
if current_sentence[-1] in remove_chars:
current_sentence = current_sentence[:-1]
sentences.append(current_sentence.strip())
# 过滤掉空字符串
sentences = [s for s in sentences if s.strip()]
return sentences
print(f"处理后的句子: {sentences_done}")
return sentences_done
# 常见的错别字映射
TYPO_DICT = {
@@ -259,16 +294,7 @@ def random_remove_punctuation(text: str) -> str:
return result
def add_typos(text: str) -> str:
"""随机给文本添加错别字
Args:
text: 要处理的文本
Returns:
str: 添加错别字后的文本
"""
TYPO_RATE = 0.02 # 控制错别字出现的概率(2%)
result = ""
for char in text:
if char in TYPO_DICT and random.random() < TYPO_RATE:
@@ -279,15 +305,33 @@ def add_typos(text: str) -> str:
result += char
return result
def process_text_with_typos(text: str) -> str:
"""处理文本,添加错别字和处理标点符号
def process_llm_response(text: str) -> List[str]:
# processed_response = process_text_with_typos(content)
if len(text) > 200:
print(f"回复过长 ({len(text)} 字符),返回默认回复")
return ['懒得说']
# 处理长消息
sentences = split_into_sentences_w_remove_punctuation(add_typos(text))
# 检查分割后的消息数量是否过多超过3条
if len(sentences) > 3:
print(f"分割后消息数量过多 ({len(sentences)} 条),返回默认回复")
return ['麦麦不知道哦']
Args:
text: 要处理的文本
Returns:
str: 处理后的文本
return sentences
def calculate_typing_time(input_string: str, chinese_time: float = 0.2, english_time: float = 0.1) -> float:
"""
if random.random() < 0.9: # 90%概率进行处理
return random_remove_punctuation(add_typos(text))
return text
计算输入字符串所需的时间,中文和英文字符有不同的输入时间
input_string (str): 输入的字符串
chinese_time (float): 中文字符的输入时间默认为0.3秒
english_time (float): 英文字符的输入时间默认为0.15秒
"""
total_time = 0.0
for char in input_string:
if '\u4e00' <= char <= '\u9fff': # 判断是否为中文字符
total_time += chinese_time
else: # 其他字符(如英文)
total_time += english_time
return total_time