更新文档

1
2025-03-03 22:27:11 +08:00
parent 7ef6b23632
commit ecc7c33932
9 changed files with 675 additions and 568 deletions
--- a/src/plugins/chat/topic_identifier.py
+++ b/src/plugins/chat/topic_identifier.py
@@ -4,19 +4,18 @@ from .message import Message
 import jieba
 from nonebot import get_driver
 from .config import global_config
+from snownlp import SnowNLP
+from ..models.utils_model import LLM_request

 driver = get_driver()
 config = driver.config  

 class TopicIdentifier:
    def __init__(self):
-        self.client = OpenAI(
-            api_key=config.siliconflow_key,
-            base_url=config.siliconflow_base_url
-        )
+        self.llm_client = LLM_request(model=global_config.llm_normal)
        
-    def identify_topic_llm(self, text: str) -> Optional[str]:
-        """识别消息主题"""
+    async def identify_topic_llm(self, text: str) -> Optional[List[str]]:
+        """识别消息主题，返回主题列表"""

        prompt = f"""判断这条消息的主题，如果没有明显主题请回复"无主题"，要求：
 1. 主题通常2-4个字，必须简短，要求精准概括，不要太具体。
@@ -24,33 +23,20 @@ class TopicIdentifier:

 消息内容：{text}"""

-        response = self.client.chat.completions.create(
-            model=global_config.SILICONFLOW_MODEL_V3,
-            messages=[{"role": "user", "content": prompt}],
-            temperature=0.8,
-            max_tokens=10
-        )
+        # 使用 LLM_request 类进行请求
+        topic, _ = await self.llm_client.generate_response(prompt)
        
-        if not response or not response.choices:
-            print(f"\033[1;31m[错误]\033[0m OpenAI API 返回为空")
+        if not topic:
+            print(f"\033[1;31m[错误]\033[0m LLM API 返回为空")
            return None
            
-        # 从 OpenAI API 响应中获取第一个选项的消息内容,并去除首尾空白字符
-        topic = response.choices[0].message.content.strip() if response.choices[0].message.content else None
-        
-        if topic == "无主题":
-            return None
-        else:
-            # print(f"[主题分析结果]{text[:20]}... : {topic}")
-            split_topic = self.parse_topic(topic)
-            return split_topic
-
-
-    def parse_topic(self, topic: str) -> List[str]:
-        """解析主题，返回主题列表"""
+        # 直接在这里处理主题解析
        if not topic or topic == "无主题":
-            return []
-        return [t.strip() for t in topic.split(",") if t.strip()]
+            return None
+            
+        # 解析主题字符串为列表
+        topic_list = [t.strip() for t in topic.split(",") if t.strip()]
+        return topic_list if topic_list else None

    def identify_topic_jieba(self, text: str) -> Optional[str]:
        """使用jieba识别主题"""
@@ -80,9 +66,12 @@ class TopicIdentifier:
        filtered_words = []
        for word in words:
            if word not in stop_words and not word.strip() in {
-                '。', '，', '、', '：', '；', '！', '？', '"', '"', ''', ''', 
-                '（', '）', '【', '】', '《', '》', '…', '—', '·', '、', '~', 
-                '～', '+', '=', '-'
+                '。', '，', '、', '：', '；', '！', '？', '"', '"', ''', ''',
+                '（', '）', '【', '】', '《', '》', '…', '—', '·', '、', '~',
+                '～', '+', '=', '-', '/', '\\', '|', '*', '#', '@', '$', '%',
+                '^', '&', '[', ']', '{', '}', '<', '>', '`', '_', '.', ',',
+                ';', ':', '\'', '"', '(', ')', '?', '!', '±', '×', '÷', '≠',
+                '≈', '∈', '∉', '⊆', '⊇', '⊂', '⊃', '∪', '∩', '∧', '∨'
            }:
                filtered_words.append(word)
        
@@ -97,4 +86,25 @@ class TopicIdentifier:
        
        return top_words if top_words else None

-topic_identifier = TopicIdentifier()
+    def identify_topic_snownlp(self, text: str) -> Optional[List[str]]:
+        """使用 SnowNLP 进行主题识别
+        
+        Args:
+            text (str): 需要识别主题的文本
+            
+        Returns:
+            Optional[List[str]]: 返回识别出的主题关键词列表，如果无法识别则返回 None
+        """
+        if not text or len(text.strip()) == 0:
+            return None
+            
+        try:
+            s = SnowNLP(text)
+            # 提取前3个关键词作为主题
+            keywords = s.keywords(3)
+            return keywords if keywords else None
+        except Exception as e:
+            print(f"\033[1;31m[错误]\033[0m SnowNLP 处理失败: {str(e)}")
+            return None
+
+topic_identifier = TopicIdentifier()