v0.2修改了一些东西

使概率配置生效将一些模块解耦合将组信息管理器合并到关系管理器，添加了可以全局调用的接口精简了llm生成器的代码精简了message代码重写了回复后处理
2025-02-28 00:28:34 +08:00
parent 6938a97941
commit 870be0a426
14 changed files with 333 additions and 653 deletions
--- a/src/plugins/chat/utils.py
+++ b/src/plugins/chat/utils.py
@@ -5,6 +5,8 @@ from .message import Message
 import requests
 import numpy as np
 from .config import llm_config
+import re
+

 def combine_messages(messages: List[Message]) -> str:
    """将消息列表组合成格式化的字符串
@@ -115,52 +117,85 @@ def get_recent_group_messages(db, group_id: int, limit: int = 12) -> list:
    message_objects.reverse()
    return message_objects

-def split_into_sentences(text: str) -> List[str]:
+def split_into_sentences_w_remove_punctuation(text: str) -> List[str]:
    """将文本分割成句子，但保持书名号中的内容完整
-    
    Args:
        text: 要分割的文本字符串
-        
    Returns:
        List[str]: 分割后的句子列表
    """
-    delimiters = ['。', '！', '，', ',', '？', '…', '!', '?', '\n']  # 添加换行符作为分隔符
-    remove_chars = ['，', ',']  # 只移除这两种逗号
-    sentences = []
-    current_sentence = ""
-    in_book_title = False  # 标记是否在书名号内
+    len_text = len(text)
+    if len_text < 5:
+        if random.random() < 0.01:
+            return list(text)  # 如果文本很短且触发随机条件,直接按字符分割
+        else:
+            return [text]
+    if len_text < 12:
+        split_strength = 0.3
+    elif len_text < 32:
+        split_strength = 0.7
+    else:
+        split_strength = 0.9
+    #先移除换行符
+    # print(f"split_strength: {split_strength}")
    
-    for char in text:
-        current_sentence += char
+    # print(f"处理前的文本: {text}")
+    
+    # 统一将英文逗号转换为中文逗号
+    text = text.replace(',', '，')
+    text = text.replace('\n', ' ')
+    
+    # print(f"处理前的文本: {text}")
+    
+    text_no_1 = ''
+    for letter in text:
+        # print(f"当前字符: {letter}")
+        if letter in ['!','！','?','？']:
+            # print(f"当前字符: {letter}, 随机数: {random.random()}")
+            if random.random() < split_strength:
+                letter = ''
+        if letter in ['。','…']:
+            # print(f"当前字符: {letter}, 随机数: {random.random()}")
+            if random.random() < 1 - split_strength:
+                letter = ''
+        text_no_1 += letter
        
-        # 检查书名号
-        if char == '《':
-            in_book_title = True
-        elif char == '》':
-            in_book_title = False
+    # 对每个逗号单独判断是否分割
+    sentences = [text_no_1]
+    new_sentences = []
+    for sentence in sentences:
+        parts = sentence.split('，')
+        current_sentence = parts[0]
+        for part in parts[1:]:
+            if random.random() < split_strength:
+                new_sentences.append(current_sentence.strip())
+                current_sentence = part
+            else:
+                current_sentence += '，' + part
+        # 处理空格分割
+        space_parts = current_sentence.split(' ')
+        current_sentence = space_parts[0]
+        for part in space_parts[1:]:
+            if random.random() < split_strength:
+                new_sentences.append(current_sentence.strip())
+                current_sentence = part
+            else:
+                current_sentence += ' ' + part
+        new_sentences.append(current_sentence.strip())
+    sentences = [s for s in new_sentences if s]  # 移除空字符串
+
+    # print(f"分割后的句子: {sentences}")
+    sentences_done = []
+    for sentence in sentences:
+        sentence = sentence.rstrip('，,')
+        if random.random() < split_strength*0.5:
+            sentence = sentence.replace('，', '').replace(',', '')
+        elif random.random() < split_strength:
+            sentence = sentence.replace('，', ' ').replace(',', ' ')
+        sentences_done.append(sentence)
        
-        # 只有不在书名号内且是分隔符时才分割
-        if char in delimiters and not in_book_title:
-            if current_sentence.strip():  # 确保不是空字符串
-                # 只移除逗号
-                clean_sentence = current_sentence
-                if clean_sentence[-1] in remove_chars:
-                    clean_sentence = clean_sentence[:-1]
-                if clean_sentence.strip():
-                    sentences.append(clean_sentence.strip())
-            current_sentence = ""
-    
-    # 处理最后一个句子
-    if current_sentence.strip():
-        # 如果最后一个字符是逗号，移除它
-        if current_sentence[-1] in remove_chars:
-            current_sentence = current_sentence[:-1]
-        sentences.append(current_sentence.strip())
-    
-    # 过滤掉空字符串
-    sentences = [s for s in sentences if s.strip()]
-    
-    return sentences
+    print(f"处理后的句子: {sentences_done}")
+    return sentences_done

 # 常见的错别字映射
 TYPO_DICT = {
@@ -259,16 +294,7 @@ def random_remove_punctuation(text: str) -> str:
    return result

 def add_typos(text: str) -> str:
-    """随机给文本添加错别字
-    
-    Args:
-        text: 要处理的文本
-        
-    Returns:
-        str: 添加错别字后的文本
-    """
    TYPO_RATE = 0.02  # 控制错别字出现的概率(2%)
-    
    result = ""
    for char in text:
        if char in TYPO_DICT and random.random() < TYPO_RATE:
@@ -279,15 +305,33 @@ def add_typos(text: str) -> str:
            result += char
    return result

-def process_text_with_typos(text: str) -> str:
-    """处理文本，添加错别字和处理标点符号
+def process_llm_response(text: str) -> List[str]:
+    # processed_response = process_text_with_typos(content)
+    if len(text) > 200:
+            print(f"回复过长 ({len(text)} 字符)，返回默认回复")
+            return ['懒得说']
+    # 处理长消息
+    sentences = split_into_sentences_w_remove_punctuation(add_typos(text))
+    # 检查分割后的消息数量是否过多（超过3条）
+    if len(sentences) > 3:
+        print(f"分割后消息数量过多 ({len(sentences)} 条)，返回默认回复")
+        return ['麦麦不知道哦']
    
-    Args:
-        text: 要处理的文本
-        
-    Returns:
-        str: 处理后的文本
+    return sentences
+
+def calculate_typing_time(input_string: str, chinese_time: float = 0.2, english_time: float = 0.1) -> float:
    """
-    if random.random() < 0.9:  # 90%概率进行处理
-        return random_remove_punctuation(add_typos(text))
-    return text
+    计算输入字符串所需的时间，中文和英文字符有不同的输入时间
+        input_string (str): 输入的字符串
+        chinese_time (float): 中文字符的输入时间，默认为0.3秒
+        english_time (float): 英文字符的输入时间，默认为0.15秒
+    """
+    total_time = 0.0
+    for char in input_string:
+        if '\u4e00' <= char <= '\u9fff':  # 判断是否为中文字符
+            total_time += chinese_time
+        else:  # 其他字符（如英文）
+            total_time += english_time  
+    return total_time
+
+