v0.2修改了一些东西
使概率配置生效 将一些模块解耦合 将组信息管理器合并到关系管理器,添加了可以全局调用的接口 精简了llm生成器的代码 精简了message代码 重写了回复后处理
This commit is contained in:
@@ -5,6 +5,8 @@ from .message import Message
|
||||
import requests
|
||||
import numpy as np
|
||||
from .config import llm_config
|
||||
import re
|
||||
|
||||
|
||||
def combine_messages(messages: List[Message]) -> str:
|
||||
"""将消息列表组合成格式化的字符串
|
||||
@@ -115,52 +117,85 @@ def get_recent_group_messages(db, group_id: int, limit: int = 12) -> list:
|
||||
message_objects.reverse()
|
||||
return message_objects
|
||||
|
||||
def split_into_sentences(text: str) -> List[str]:
|
||||
def split_into_sentences_w_remove_punctuation(text: str) -> List[str]:
|
||||
"""将文本分割成句子,但保持书名号中的内容完整
|
||||
|
||||
Args:
|
||||
text: 要分割的文本字符串
|
||||
|
||||
Returns:
|
||||
List[str]: 分割后的句子列表
|
||||
"""
|
||||
delimiters = ['。', '!', ',', ',', '?', '…', '!', '?', '\n'] # 添加换行符作为分隔符
|
||||
remove_chars = [',', ','] # 只移除这两种逗号
|
||||
sentences = []
|
||||
current_sentence = ""
|
||||
in_book_title = False # 标记是否在书名号内
|
||||
len_text = len(text)
|
||||
if len_text < 5:
|
||||
if random.random() < 0.01:
|
||||
return list(text) # 如果文本很短且触发随机条件,直接按字符分割
|
||||
else:
|
||||
return [text]
|
||||
if len_text < 12:
|
||||
split_strength = 0.3
|
||||
elif len_text < 32:
|
||||
split_strength = 0.7
|
||||
else:
|
||||
split_strength = 0.9
|
||||
#先移除换行符
|
||||
# print(f"split_strength: {split_strength}")
|
||||
|
||||
for char in text:
|
||||
current_sentence += char
|
||||
# print(f"处理前的文本: {text}")
|
||||
|
||||
# 统一将英文逗号转换为中文逗号
|
||||
text = text.replace(',', ',')
|
||||
text = text.replace('\n', ' ')
|
||||
|
||||
# print(f"处理前的文本: {text}")
|
||||
|
||||
text_no_1 = ''
|
||||
for letter in text:
|
||||
# print(f"当前字符: {letter}")
|
||||
if letter in ['!','!','?','?']:
|
||||
# print(f"当前字符: {letter}, 随机数: {random.random()}")
|
||||
if random.random() < split_strength:
|
||||
letter = ''
|
||||
if letter in ['。','…']:
|
||||
# print(f"当前字符: {letter}, 随机数: {random.random()}")
|
||||
if random.random() < 1 - split_strength:
|
||||
letter = ''
|
||||
text_no_1 += letter
|
||||
|
||||
# 检查书名号
|
||||
if char == '《':
|
||||
in_book_title = True
|
||||
elif char == '》':
|
||||
in_book_title = False
|
||||
# 对每个逗号单独判断是否分割
|
||||
sentences = [text_no_1]
|
||||
new_sentences = []
|
||||
for sentence in sentences:
|
||||
parts = sentence.split(',')
|
||||
current_sentence = parts[0]
|
||||
for part in parts[1:]:
|
||||
if random.random() < split_strength:
|
||||
new_sentences.append(current_sentence.strip())
|
||||
current_sentence = part
|
||||
else:
|
||||
current_sentence += ',' + part
|
||||
# 处理空格分割
|
||||
space_parts = current_sentence.split(' ')
|
||||
current_sentence = space_parts[0]
|
||||
for part in space_parts[1:]:
|
||||
if random.random() < split_strength:
|
||||
new_sentences.append(current_sentence.strip())
|
||||
current_sentence = part
|
||||
else:
|
||||
current_sentence += ' ' + part
|
||||
new_sentences.append(current_sentence.strip())
|
||||
sentences = [s for s in new_sentences if s] # 移除空字符串
|
||||
|
||||
# print(f"分割后的句子: {sentences}")
|
||||
sentences_done = []
|
||||
for sentence in sentences:
|
||||
sentence = sentence.rstrip(',,')
|
||||
if random.random() < split_strength*0.5:
|
||||
sentence = sentence.replace(',', '').replace(',', '')
|
||||
elif random.random() < split_strength:
|
||||
sentence = sentence.replace(',', ' ').replace(',', ' ')
|
||||
sentences_done.append(sentence)
|
||||
|
||||
# 只有不在书名号内且是分隔符时才分割
|
||||
if char in delimiters and not in_book_title:
|
||||
if current_sentence.strip(): # 确保不是空字符串
|
||||
# 只移除逗号
|
||||
clean_sentence = current_sentence
|
||||
if clean_sentence[-1] in remove_chars:
|
||||
clean_sentence = clean_sentence[:-1]
|
||||
if clean_sentence.strip():
|
||||
sentences.append(clean_sentence.strip())
|
||||
current_sentence = ""
|
||||
|
||||
# 处理最后一个句子
|
||||
if current_sentence.strip():
|
||||
# 如果最后一个字符是逗号,移除它
|
||||
if current_sentence[-1] in remove_chars:
|
||||
current_sentence = current_sentence[:-1]
|
||||
sentences.append(current_sentence.strip())
|
||||
|
||||
# 过滤掉空字符串
|
||||
sentences = [s for s in sentences if s.strip()]
|
||||
|
||||
return sentences
|
||||
print(f"处理后的句子: {sentences_done}")
|
||||
return sentences_done
|
||||
|
||||
# 常见的错别字映射
|
||||
TYPO_DICT = {
|
||||
@@ -259,16 +294,7 @@ def random_remove_punctuation(text: str) -> str:
|
||||
return result
|
||||
|
||||
def add_typos(text: str) -> str:
|
||||
"""随机给文本添加错别字
|
||||
|
||||
Args:
|
||||
text: 要处理的文本
|
||||
|
||||
Returns:
|
||||
str: 添加错别字后的文本
|
||||
"""
|
||||
TYPO_RATE = 0.02 # 控制错别字出现的概率(2%)
|
||||
|
||||
result = ""
|
||||
for char in text:
|
||||
if char in TYPO_DICT and random.random() < TYPO_RATE:
|
||||
@@ -279,15 +305,33 @@ def add_typos(text: str) -> str:
|
||||
result += char
|
||||
return result
|
||||
|
||||
def process_text_with_typos(text: str) -> str:
|
||||
"""处理文本,添加错别字和处理标点符号
|
||||
def process_llm_response(text: str) -> List[str]:
|
||||
# processed_response = process_text_with_typos(content)
|
||||
if len(text) > 200:
|
||||
print(f"回复过长 ({len(text)} 字符),返回默认回复")
|
||||
return ['懒得说']
|
||||
# 处理长消息
|
||||
sentences = split_into_sentences_w_remove_punctuation(add_typos(text))
|
||||
# 检查分割后的消息数量是否过多(超过3条)
|
||||
if len(sentences) > 3:
|
||||
print(f"分割后消息数量过多 ({len(sentences)} 条),返回默认回复")
|
||||
return ['麦麦不知道哦']
|
||||
|
||||
Args:
|
||||
text: 要处理的文本
|
||||
|
||||
Returns:
|
||||
str: 处理后的文本
|
||||
return sentences
|
||||
|
||||
def calculate_typing_time(input_string: str, chinese_time: float = 0.2, english_time: float = 0.1) -> float:
|
||||
"""
|
||||
if random.random() < 0.9: # 90%概率进行处理
|
||||
return random_remove_punctuation(add_typos(text))
|
||||
return text
|
||||
计算输入字符串所需的时间,中文和英文字符有不同的输入时间
|
||||
input_string (str): 输入的字符串
|
||||
chinese_time (float): 中文字符的输入时间,默认为0.3秒
|
||||
english_time (float): 英文字符的输入时间,默认为0.15秒
|
||||
"""
|
||||
total_time = 0.0
|
||||
for char in input_string:
|
||||
if '\u4e00' <= char <= '\u9fff': # 判断是否为中文字符
|
||||
total_time += chinese_time
|
||||
else: # 其他字符(如英文)
|
||||
total_time += english_time
|
||||
return total_time
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user