fix: 修复神秘formatter

This commit is contained in:
tcmofashi
2025-03-04 10:19:43 +08:00
parent ae830ff1ec
commit 3be012f8bb
2 changed files with 76 additions and 277 deletions

View File

@@ -8,8 +8,7 @@ from snownlp import SnowNLP
from ..models.utils_model import LLM_request
driver = get_driver()
config = driver.config
config = driver.config
class TopicIdentifier:
def __init__(self):
@@ -18,11 +17,11 @@ class TopicIdentifier:
async def identify_topic_llm(self, text: str) -> Optional[List[str]]:
"""识别消息主题,返回主题列表"""
prompt = f"""判断这条消息的主题,如果没有明显主题请回复"无主题",要求:\
1. 主题通常2-4个字必须简短要求精准概括不要太具体。\
2. 建议给出多个主题,之间用英文逗号分割。只输出主题本身就好,不要有前后缀。\
3. 这里是
消息内容:{text}"""
prompt = f"""判断这条消息的主题,如果没有明显主题请回复"无主题",要求:
1. 主题通常2-4个字必须简短要求精准概括不要太具体。
2. 建议给出多个主题,之间用英文逗号分割。只输出主题本身就好,不要有前后缀。
消息内容:{text}"""
# 使用 LLM_request 类进行请求
topic, _ = await self.llm_client.generate_response(prompt)
@@ -44,181 +43,25 @@ class TopicIdentifier:
words = jieba.lcut(text)
# 去除停用词和标点符号
stop_words = {
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"因为",
"所以",
"如果",
"虽然",
"一个",
"",
"",
"",
"",
"",
"我们",
"你们",
"他们",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"什么",
"怎么",
"为什么",
"怎样",
"如何",
"什么样",
"这样",
"那样",
"这么",
"那么",
"多少",
"",
"",
"哪里",
"哪儿",
"什么时候",
"何时",
"为何",
"怎么办",
"怎么样",
"这些",
"那些",
"一些",
"一点",
"一下",
"一直",
"一定",
"一般",
"一样",
"一会儿",
"一边",
"一起",
'', '', '', '', '', '', '', '', '', '', '', '', '', '',
'因为', '所以', '如果', '虽然', '一个', '', '', '', '', '', '我们', '你们',
'他们', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '什么', '怎么', '为什么', '怎样', '如何', '什么样', '这样', '那样', '这么',
'那么', '多少', '', '', '哪里', '哪儿', '什么时候', '何时', '为何', '怎么办',
'怎么样', '这些', '那些', '一些', '一点', '一下', '一直', '一定', '一般', '一样',
'一会儿', '一边', '一起',
# 添加更多量词
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
'', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '',
# 添加更多介词
"",
"按照",
"",
"",
"",
"比如",
"",
"除了",
"",
"",
"对于",
"根据",
"关于",
"",
"",
"",
"",
"经过",
"",
"",
"",
"通过",
"",
"",
"",
"为了",
"围绕",
"",
"",
"由于",
"",
"",
"沿",
"沿着",
"",
"依照",
"",
"",
"因为",
"",
"",
"",
"",
"自从",
'', '按照', '', '', '', '比如', '', '除了', '', '', '对于',
'根据', '关于', '', '', '', '', '经过', '', '', '', '通过',
'', '', '', '为了', '围绕', '', '', '由于', '', '', '沿', '沿着',
'', '依照', '', '', '因为', '', '', '', '', '自从'
}
# 过滤掉停用词和标点符号,只保留名词和动词
filtered_words = []
for word in words:
@@ -231,16 +74,16 @@ class TopicIdentifier:
'', '', '', '', '', '', '', '', '', '', ''
}:
filtered_words.append(word)
# 统计词频
word_freq = {}
for word in filtered_words:
word_freq[word] = word_freq.get(word, 0) + 1
# 按词频排序取前3个
sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
top_words = [word for word, freq in sorted_words[:3]]
return top_words if top_words else None
def identify_topic_snownlp(self, text: str) -> Optional[List[str]]:
@@ -264,4 +107,4 @@ class TopicIdentifier:
print(f"\033[1;31m[错误]\033[0m SnowNLP 处理失败: {str(e)}")
return None
topic_identifier = TopicIdentifier()
topic_identifier = TopicIdentifier()