Merge remote-tracking branch 'upstream/debug' into feature

2025-03-04 08:18:22 +08:00
parent 5db7bbda94 5f2b474c63
commit d3b299d267
34 changed files with 13732 additions and 413 deletions
--- a/src/plugins/chat/topic_identifier.py
+++ b/src/plugins/chat/topic_identifier.py
@@ -4,6 +4,8 @@ from .message import Message
 import jieba
 from nonebot import get_driver
 from .config import global_config
+from snownlp import SnowNLP
+from ..models.utils_model import LLM_request

 driver = get_driver()
 config = driver.config
@@ -11,12 +13,10 @@ config = driver.config

 class TopicIdentifier:
    def __init__(self):
-        self.client = OpenAI(
-            api_key=config.siliconflow_key, base_url=config.siliconflow_base_url
-        )
-
-    def identify_topic_llm(self, text: str) -> Optional[str]:
-        """识别消息主题"""
+        self.llm_client = LLM_request(model=global_config.llm_normal)
+        
+    async def identify_topic_llm(self, text: str) -> Optional[List[str]]:
+        """识别消息主题，返回主题列表"""

        prompt = f"""判断这条消息的主题，如果没有明显主题请回复"无主题"，要求：\
            1. 主题通常2-4个字，必须简短，要求精准概括，不要太具体。\
@@ -24,36 +24,20 @@ class TopicIdentifier:
            3. 这里是
            消息内容：{text}"""

-        response = self.client.chat.completions.create(
-            model=global_config.SILICONFLOW_MODEL_V3,
-            messages=[{"role": "user", "content": prompt}],
-            temperature=0.8,
-            max_tokens=10,
-        )
-
-        if not response or not response.choices:
-            print(f"\033[1;31m[错误]\033[0m OpenAI API 返回为空")
+        # 使用 LLM_request 类进行请求
+        topic, _ = await self.llm_client.generate_response(prompt)
+        
+        if not topic:
+            print(f"\033[1;31m[错误]\033[0m LLM API 返回为空")
            return None
-
-        # 从 OpenAI API 响应中获取第一个选项的消息内容,并去除首尾空白字符
-        topic = (
-            response.choices[0].message.content.strip()
-            if response.choices[0].message.content
-            else None
-        )
-
-        if topic == "无主题":
-            return None
-        else:
-            # print(f"[主题分析结果]{text[:20]}... : {topic}")
-            split_topic = self.parse_topic(topic)
-            return split_topic
-
-    def parse_topic(self, topic: str) -> List[str]:
-        """解析主题，返回主题列表"""
+            
+        # 直接在这里处理主题解析
        if not topic or topic == "无主题":
-            return []
-        return [t.strip() for t in topic.split(",") if t.strip()]
+            return None
+            
+        # 解析主题字符串为列表
+        topic_list = [t.strip() for t in topic.split(",") if t.strip()]
+        return topic_list if topic_list else None

    def identify_topic_jieba(self, text: str) -> Optional[str]:
        """使用jieba识别主题"""
@@ -239,33 +223,12 @@ class TopicIdentifier:
        filtered_words = []
        for word in words:
            if word not in stop_words and not word.strip() in {
-                "。",
-                "，",
-                "、",
-                "：",
-                "；",
-                "！",
-                "？",
-                '"',
-                '"',
-                """, """,
-                "（",
-                "）",
-                "【",
-                "】",
-                "《",
-                "》",
-                "…",
-                "—",
-                "·",
-                "、",
-                "~",
-                "～",
-                "+",
-                "=",
-                "-",
-                "[",
-                "]",
+                '。', '，', '、', '：', '；', '！', '？', '"', '"', ''', ''',
+                '（', '）', '【', '】', '《', '》', '…', '—', '·', '、', '~',
+                '～', '+', '=', '-', '/', '\\', '|', '*', '#', '@', '$', '%',
+                '^', '&', '[', ']', '{', '}', '<', '>', '`', '_', '.', ',',
+                ';', ':', '\'', '"', '(', ')', '?', '!', '±', '×', '÷', '≠',
+                '≈', '∈', '∉', '⊆', '⊇', '⊂', '⊃', '∪', '∩', '∧', '∨'
            }:
                filtered_words.append(word)

@@ -280,5 +243,25 @@ class TopicIdentifier:

        return top_words if top_words else None

+    def identify_topic_snownlp(self, text: str) -> Optional[List[str]]:
+        """使用 SnowNLP 进行主题识别
+        
+        Args:
+            text (str): 需要识别主题的文本
+            
+        Returns:
+            Optional[List[str]]: 返回识别出的主题关键词列表，如果无法识别则返回 None
+        """
+        if not text or len(text.strip()) == 0:
+            return None
+            
+        try:
+            s = SnowNLP(text)
+            # 提取前3个关键词作为主题
+            keywords = s.keywords(3)
+            return keywords if keywords else None
+        except Exception as e:
+            print(f"\033[1;31m[错误]\033[0m SnowNLP 处理失败: {str(e)}")
+            return None

 topic_identifier = TopicIdentifier()