refactor(deps): 将jieba分词库替换为rjieba
This commit is contained in:
@@ -9,7 +9,7 @@ import time
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import jieba
|
||||
import rjieba
|
||||
import orjson
|
||||
from pypinyin import Style, pinyin
|
||||
|
||||
@@ -56,9 +56,9 @@ class ChineseTypoGenerator:
|
||||
|
||||
# 使用内置的词频文件
|
||||
char_freq = defaultdict(int)
|
||||
dict_path = os.path.join(os.path.dirname(jieba.__file__), "dict.txt")
|
||||
dict_path = os.path.join(os.path.dirname(rjieba.__file__), "dict.txt")
|
||||
|
||||
# 读取jieba的词典文件
|
||||
# 读取rjieba的词典文件
|
||||
with open(dict_path, encoding="utf-8") as f:
|
||||
for line in f:
|
||||
word, freq = line.strip().split()[:2]
|
||||
@@ -224,9 +224,9 @@ class ChineseTypoGenerator:
|
||||
@staticmethod
|
||||
def _segment_sentence(sentence):
|
||||
"""
|
||||
使用jieba分词,返回词语列表
|
||||
使用rjieba分词,返回词语列表
|
||||
"""
|
||||
return list(jieba.cut(sentence))
|
||||
return list(rjieba.cut(sentence))
|
||||
|
||||
def _get_word_homophones(self, word):
|
||||
"""
|
||||
@@ -251,8 +251,8 @@ class ChineseTypoGenerator:
|
||||
|
||||
all_combinations = itertools.product(*candidates)
|
||||
|
||||
# 获取jieba词典和词频信息
|
||||
dict_path = os.path.join(os.path.dirname(jieba.__file__), "dict.txt")
|
||||
# 获取rjieba词典和词频信息
|
||||
dict_path = os.path.join(os.path.dirname(rjieba.__file__), "dict.txt")
|
||||
valid_words = {} # 改用字典存储词语及其频率
|
||||
with open(dict_path, encoding="utf-8") as f:
|
||||
for line in f:
|
||||
|
||||
@@ -6,7 +6,7 @@ import time
|
||||
from collections import Counter
|
||||
from typing import Any
|
||||
|
||||
import jieba
|
||||
import rjieba
|
||||
import numpy as np
|
||||
from maim_message import UserInfo
|
||||
|
||||
@@ -440,7 +440,7 @@ def cosine_similarity(v1, v2):
|
||||
def text_to_vector(text):
|
||||
"""将文本转换为词频向量"""
|
||||
# 分词
|
||||
words = jieba.lcut(text)
|
||||
words = rjieba.lcut(text)
|
||||
return Counter(words)
|
||||
|
||||
|
||||
|
||||
@@ -226,9 +226,9 @@ class ImageManager:
|
||||
if emotion_result is None:
|
||||
logger.warning("LLM未能生成情感标签,使用详细描述的前几个词")
|
||||
# 降级处理:从详细描述中提取关键词
|
||||
import jieba
|
||||
import rjieba
|
||||
|
||||
words = list(jieba.cut(detailed_description))
|
||||
words = list(rjieba.cut(detailed_description))
|
||||
emotion_result = ",".join(words[:2]) if len(words) >= 2 else (words[0] if words else "表情")
|
||||
|
||||
# 处理情感结果,取前1-2个最重要的标签
|
||||
|
||||
Reference in New Issue
Block a user