refactor(nlp): 将jieba替换为rjieba进行中文标记化

This commit is contained in:
Windpicker-owo
2025-10-31 21:41:02 +08:00
parent e46d9529f5
commit df22ff91cc
2 changed files with 11 additions and 11 deletions

View File

@@ -123,13 +123,13 @@ def extract_keywords(text: str, max_keywords: int = 10) -> list[str]:
return []
try:
import jieba.analyse
import rjieba.analyse
# 使用TF-IDF提取关键词
keywords = jieba.analyse.extract_tags(text, topK=max_keywords)
keywords = rjieba.analyse.extract_tags(text, topK=max_keywords)
return keywords
except ImportError:
logger.warning("jieba未安装无法提取关键词")
logger.warning("rjieba未安装无法提取关键词")
# 简单分词
words = text.split()
return words[:max_keywords]