improve 新款表情包系统

This commit is contained in:
SengokuCola
2025-03-09 00:36:58 +08:00
parent 42144ed943
commit 583c276c91
2 changed files with 64 additions and 15 deletions

View File

@@ -12,6 +12,7 @@ from ..models.utils_model import LLM_request
from ..utils.typo_generator import ChineseTypoGenerator
from .config import global_config
from .message import Message
from ..moods.moods import MoodManager
driver = get_driver()
config = driver.config
@@ -326,7 +327,7 @@ def random_remove_punctuation(text: str) -> str:
def process_llm_response(text: str) -> List[str]:
# processed_response = process_text_with_typos(content)
if len(text) > 300:
if len(text) > 200:
print(f"回复过长 ({len(text)} 字符),返回默认回复")
return ['懒得说']
# 处理长消息
@@ -336,30 +337,55 @@ def process_llm_response(text: str) -> List[str]:
tone_error_rate=0.2,
word_replace_rate=0.02
)
typoed_text = typo_generator.create_typo_sentence(text)[0]
sentences = split_into_sentences_w_remove_punctuation(typoed_text)
split_sentences = split_into_sentences_w_remove_punctuation(text)
sentences = []
for sentence in split_sentences:
typoed_text, typo_corrections = typo_generator.create_typo_sentence(sentence)
sentences.append(typoed_text)
if typo_corrections:
sentences.append(typo_corrections)
# 检查分割后的消息数量是否过多超过3条
if len(sentences) > 4:
if len(sentences) > 5:
print(f"分割后消息数量过多 ({len(sentences)} 条),返回默认回复")
return [f'{global_config.BOT_NICKNAME}不知道哦']
return sentences
def calculate_typing_time(input_string: str, chinese_time: float = 0.2, english_time: float = 0.1) -> float:
def calculate_typing_time(input_string: str, chinese_time: float = 0.4, english_time: float = 0.2) -> float:
"""
计算输入字符串所需的时间,中文和英文字符有不同的输入时间
input_string (str): 输入的字符串
chinese_time (float): 中文字符的输入时间默认为0.3
english_time (float): 英文字符的输入时间默认为0.15
chinese_time (float): 中文字符的输入时间默认为0.2
english_time (float): 英文字符的输入时间默认为0.1秒
特殊情况:
- 如果只有一个中文字符将使用3倍的中文输入时间
- 在所有输入结束后额外加上回车时间0.3秒
"""
mood_manager = MoodManager.get_instance()
# 将0-1的唤醒度映射到-1到1
mood_arousal = mood_manager.current_mood.arousal
# 映射到0.5到2倍的速度系数
typing_speed_multiplier = 1.5 ** mood_arousal # 唤醒度为1时速度翻倍,为-1时速度减半
chinese_time *= 1/typing_speed_multiplier
english_time *= 1/typing_speed_multiplier
# 计算中文字符数
chinese_chars = sum(1 for char in input_string if '\u4e00' <= char <= '\u9fff')
# 如果只有一个中文字符使用3倍时间
if chinese_chars == 1 and len(input_string.strip()) == 1:
return chinese_time * 3 + 0.3 # 加上回车时间
# 正常计算所有字符的输入时间
total_time = 0.0
for char in input_string:
if '\u4e00' <= char <= '\u9fff': # 判断是否为中文字符
total_time += chinese_time
else: # 其他字符(如英文)
total_time += english_time
return total_time
return total_time + 0.3 # 加上回车时间
def cosine_similarity(v1, v2):

View File

@@ -284,10 +284,13 @@ class ChineseTypoGenerator:
返回:
typo_sentence: 包含错别字的句子
typo_info: 错别字信息列表
correction_suggestion: 随机选择的一个纠正建议,返回正确的字/词
"""
result = []
typo_info = []
word_typos = [] # 记录词语错误对(错词,正确词)
char_typos = [] # 记录单字错误对(错字,正确字)
current_pos = 0
# 分词
words = self._segment_sentence(sentence)
@@ -296,6 +299,7 @@ class ChineseTypoGenerator:
# 如果是标点符号或空格,直接添加
if all(not self._is_chinese_char(c) for c in word):
result.append(word)
current_pos += len(word)
continue
# 获取词语的拼音
@@ -316,6 +320,8 @@ class ChineseTypoGenerator:
' '.join(word_pinyin),
' '.join(self._get_word_pinyin(typo_word)),
orig_freq, typo_freq))
word_typos.append((typo_word, word)) # 记录(错词,正确词)对
current_pos += len(typo_word)
continue
# 如果不进行整词替换,则进行单字替换
@@ -333,11 +339,15 @@ class ChineseTypoGenerator:
result.append(typo_char)
typo_py = pinyin(typo_char, style=Style.TONE3)[0][0]
typo_info.append((char, typo_char, py, typo_py, orig_freq, typo_freq))
char_typos.append((typo_char, char)) # 记录(错字,正确字)对
current_pos += 1
continue
result.append(char)
current_pos += 1
else:
# 处理多字词的单字替换
word_result = []
word_start_pos = current_pos
for i, (char, py) in enumerate(zip(word, word_pinyin)):
# 词中的字替换概率降低
word_error_rate = self.error_rate * (0.7 ** (len(word) - 1))
@@ -353,11 +363,24 @@ class ChineseTypoGenerator:
word_result.append(typo_char)
typo_py = pinyin(typo_char, style=Style.TONE3)[0][0]
typo_info.append((char, typo_char, py, typo_py, orig_freq, typo_freq))
char_typos.append((typo_char, char)) # 记录(错字,正确字)对
continue
word_result.append(char)
result.append(''.join(word_result))
current_pos += len(word)
return ''.join(result), typo_info
# 优先从词语错误中选择,如果没有则从单字错误中选择
correction_suggestion = None
# 50%概率返回纠正建议
if random.random() < 0.5:
if word_typos:
wrong_word, correct_word = random.choice(word_typos)
correction_suggestion = correct_word
elif char_typos:
wrong_char, correct_char = random.choice(char_typos)
correction_suggestion = correct_char
return ''.join(result), correction_suggestion
def format_typo_info(self, typo_info):
"""
@@ -419,16 +442,16 @@ def main():
# 创建包含错别字的句子
start_time = time.time()
typo_sentence, typo_info = typo_generator.create_typo_sentence(sentence)
typo_sentence, correction_suggestion = typo_generator.create_typo_sentence(sentence)
# 打印结果
print("\n原句:", sentence)
print("错字版:", typo_sentence)
# 打印错别字信息
if typo_info:
print("\n错别字信息")
print(typo_generator.format_typo_info(typo_info))
# 打印纠正建议
if correction_suggestion:
print("\n随机纠正建议")
print(f"应该改为:{correction_suggestion}")
# 计算并打印总耗时
end_time = time.time()