refactor(deps): 将jieba分词库替换为rjieba
This commit is contained in:
@@ -19,7 +19,7 @@ from src.chat.memory_system.memory_builder import MemoryBuilder, MemoryExtractio
|
||||
from src.chat.memory_system.memory_chunk import MemoryChunk
|
||||
from src.chat.memory_system.memory_fusion import MemoryFusionEngine
|
||||
from src.chat.memory_system.memory_query_planner import MemoryQueryPlanner
|
||||
# 简化的记忆采样模式枚举
|
||||
# 记忆采样模式枚举
|
||||
class MemorySamplingMode(Enum):
|
||||
"""记忆采样模式"""
|
||||
HIPPOCAMPUS = "hippocampus" # 海马体模式:定时任务采样
|
||||
@@ -162,6 +162,7 @@ class MemorySystem:
|
||||
async def initialize(self):
|
||||
"""异步初始化记忆系统"""
|
||||
try:
|
||||
logger.info("正在初始化记忆系统...")
|
||||
|
||||
# 初始化LLM模型
|
||||
fallback_task = getattr(self.llm_model, "model_for_task", None) if self.llm_model else None
|
||||
@@ -267,8 +268,11 @@ class MemorySystem:
|
||||
logger.warning(f"海马体采样器初始化失败: {e}")
|
||||
self.hippocampus_sampler = None
|
||||
|
||||
# 统一存储已经自动加载数据,无需额外加载
|
||||
logger.info("✅ 简化版记忆系统初始化完成")
|
||||
|
||||
self.status = MemorySystemStatus.READY
|
||||
logger.info("✅ 记忆系统初始化完成")
|
||||
|
||||
except Exception as e:
|
||||
self.status = MemorySystemStatus.ERROR
|
||||
@@ -546,16 +550,18 @@ class MemorySystem:
|
||||
return existing_candidates
|
||||
|
||||
async def process_conversation_memory(self, context: dict[str, Any]) -> dict[str, Any]:
|
||||
"""对外暴露的对话记忆处理接口,支持海马体、即时、所有三种采样模式"""
|
||||
"""对外暴露的对话记忆处理接口,支持海马体、精准记忆、自适应三种采样模式"""
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
context = dict(context or {})
|
||||
|
||||
# 获取配置的采样模式
|
||||
sampling_mode = getattr(global_config.memory, 'memory_sampling_mode', 'immediate')
|
||||
sampling_mode = getattr(global_config.memory, 'memory_sampling_mode', 'precision')
|
||||
current_mode = MemorySamplingMode(sampling_mode)
|
||||
|
||||
|
||||
context['__sampling_mode'] = current_mode.value
|
||||
logger.debug(f"使用记忆采样模式: {current_mode.value}")
|
||||
|
||||
# 根据采样模式处理记忆
|
||||
@@ -991,7 +997,7 @@ class MemorySystem:
|
||||
from src.chat.message_receive.chat_stream import get_chat_manager
|
||||
|
||||
chat_manager = get_chat_manager()
|
||||
chat_stream = await chat_manager.get_stream(stream_id)
|
||||
chat_stream = chat_manager.get_stream(stream_id)
|
||||
|
||||
if not chat_stream or not hasattr(chat_stream, "context_manager"):
|
||||
logger.debug(f"未找到stream_id={stream_id}的聊天流或上下文管理器")
|
||||
@@ -1105,7 +1111,7 @@ class MemorySystem:
|
||||
from src.chat.message_receive.chat_stream import get_chat_manager
|
||||
|
||||
chat_manager = get_chat_manager()
|
||||
chat_stream = await chat_manager.get_stream(stream_id)
|
||||
chat_stream = chat_manager.get_stream(stream_id)
|
||||
if chat_stream and hasattr(chat_stream, "context_manager"):
|
||||
history_limit = self._determine_history_limit(context)
|
||||
messages = chat_stream.context_manager.get_messages(limit=history_limit, include_unread=True)
|
||||
|
||||
@@ -9,7 +9,7 @@ import time
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import jieba
|
||||
import rjieba
|
||||
import orjson
|
||||
from pypinyin import Style, pinyin
|
||||
|
||||
@@ -56,9 +56,9 @@ class ChineseTypoGenerator:
|
||||
|
||||
# 使用内置的词频文件
|
||||
char_freq = defaultdict(int)
|
||||
dict_path = os.path.join(os.path.dirname(jieba.__file__), "dict.txt")
|
||||
dict_path = os.path.join(os.path.dirname(rjieba.__file__), "dict.txt")
|
||||
|
||||
# 读取jieba的词典文件
|
||||
# 读取rjieba的词典文件
|
||||
with open(dict_path, encoding="utf-8") as f:
|
||||
for line in f:
|
||||
word, freq = line.strip().split()[:2]
|
||||
@@ -224,9 +224,9 @@ class ChineseTypoGenerator:
|
||||
@staticmethod
|
||||
def _segment_sentence(sentence):
|
||||
"""
|
||||
使用jieba分词,返回词语列表
|
||||
使用rjieba分词,返回词语列表
|
||||
"""
|
||||
return list(jieba.cut(sentence))
|
||||
return list(rjieba.cut(sentence))
|
||||
|
||||
def _get_word_homophones(self, word):
|
||||
"""
|
||||
@@ -251,8 +251,8 @@ class ChineseTypoGenerator:
|
||||
|
||||
all_combinations = itertools.product(*candidates)
|
||||
|
||||
# 获取jieba词典和词频信息
|
||||
dict_path = os.path.join(os.path.dirname(jieba.__file__), "dict.txt")
|
||||
# 获取rjieba词典和词频信息
|
||||
dict_path = os.path.join(os.path.dirname(rjieba.__file__), "dict.txt")
|
||||
valid_words = {} # 改用字典存储词语及其频率
|
||||
with open(dict_path, encoding="utf-8") as f:
|
||||
for line in f:
|
||||
|
||||
@@ -6,7 +6,7 @@ import time
|
||||
from collections import Counter
|
||||
from typing import Any
|
||||
|
||||
import jieba
|
||||
import rjieba
|
||||
import numpy as np
|
||||
from maim_message import UserInfo
|
||||
|
||||
@@ -440,7 +440,7 @@ def cosine_similarity(v1, v2):
|
||||
def text_to_vector(text):
|
||||
"""将文本转换为词频向量"""
|
||||
# 分词
|
||||
words = jieba.lcut(text)
|
||||
words = rjieba.lcut(text)
|
||||
return Counter(words)
|
||||
|
||||
|
||||
|
||||
@@ -226,9 +226,9 @@ class ImageManager:
|
||||
if emotion_result is None:
|
||||
logger.warning("LLM未能生成情感标签,使用详细描述的前几个词")
|
||||
# 降级处理:从详细描述中提取关键词
|
||||
import jieba
|
||||
import rjieba
|
||||
|
||||
words = list(jieba.cut(detailed_description))
|
||||
words = list(rjieba.cut(detailed_description))
|
||||
emotion_result = ",".join(words[:2]) if len(words) >= 2 else (words[0] if words else "表情")
|
||||
|
||||
# 处理情感结果,取前1-2个最重要的标签
|
||||
|
||||
@@ -299,7 +299,7 @@ def load_log_config(): # sourcery skip: use-contextlib-suppress
|
||||
"peewee",
|
||||
"openai",
|
||||
"uvicorn",
|
||||
"jieba",
|
||||
"rjieba",
|
||||
],
|
||||
"library_log_levels": {"aiohttp": "WARNING"},
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ from datetime import datetime
|
||||
from difflib import SequenceMatcher
|
||||
from typing import Any
|
||||
|
||||
import jieba
|
||||
import rjieba
|
||||
import orjson
|
||||
from json_repair import repair_json
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
@@ -535,9 +535,9 @@ class RelationshipManager:
|
||||
s1 = str(s1)
|
||||
s2 = str(s2)
|
||||
|
||||
# 1. 使用 jieba 进行分词
|
||||
s1_words = " ".join(jieba.cut(s1))
|
||||
s2_words = " ".join(jieba.cut(s2))
|
||||
# 1. 使用 rjieba 进行分词
|
||||
s1_words = " ".join(rjieba.cut(s1))
|
||||
s2_words = " ".join(rjieba.cut(s2))
|
||||
|
||||
# 2. 将两句话放入一个列表中
|
||||
corpus = [s1_words, s2_words]
|
||||
|
||||
@@ -184,8 +184,8 @@ cython_debug/
|
||||
# PyPI configuration file
|
||||
.pypirc
|
||||
|
||||
# jieba
|
||||
jieba.cache
|
||||
# rjieba
|
||||
rjieba.cache
|
||||
|
||||
# .vscode
|
||||
!.vscode/settings.json
|
||||
|
||||
Reference in New Issue
Block a user