fix:修改log,说明更清楚
This commit is contained in:
@@ -23,10 +23,8 @@ def init_prompt():
|
||||
|
||||
你和别人的关系信息是,请从这些信息中提取出你和别人的关系的原文:
|
||||
{relation_prompt}
|
||||
请只从上面这些信息中提取出。
|
||||
请只从上面这些信息中提取出内容。
|
||||
|
||||
|
||||
现在是{time_now},你正在上网,和qq群里的网友们聊天,以下是正在进行的聊天内容:
|
||||
{chat_observe_info}
|
||||
|
||||
现在请你根据现有的信息,总结你和群里的人的关系
|
||||
|
||||
@@ -387,7 +387,10 @@ class NormalChat:
|
||||
if not response_set or (
|
||||
self.enable_planner and self.action_type not in ["no_action", "change_to_focus_chat"]
|
||||
):
|
||||
if not response_set:
|
||||
logger.info(f"[{self.stream_name}] 模型未生成回复内容")
|
||||
elif self.enable_planner and self.action_type not in ["no_action", "change_to_focus_chat"]:
|
||||
logger.info(f"[{self.stream_name}] 模型选择其他动作")
|
||||
# 如果模型未生成回复,移除思考消息
|
||||
container = await message_manager.get_container(self.stream_id) # 使用 self.stream_id
|
||||
for msg in container.messages[:]:
|
||||
@@ -443,7 +446,7 @@ class NormalChat:
|
||||
logger.warning(f"[{self.stream_name}] 没有设置切换到focus聊天模式的回调函数,无法执行切换")
|
||||
return
|
||||
else:
|
||||
# await self._check_switch_to_focus()
|
||||
await self._check_switch_to_focus()
|
||||
pass
|
||||
|
||||
info_catcher.done_catch()
|
||||
|
||||
@@ -57,7 +57,7 @@ class NormalChatGenerator:
|
||||
)
|
||||
|
||||
if model_response:
|
||||
logger.debug(f"{global_config.bot.nickname}的原始回复是:{model_response}")
|
||||
logger.debug(f"{global_config.bot.nickname}的备选回复是:{model_response}")
|
||||
model_response = process_llm_response(model_response)
|
||||
|
||||
return model_response
|
||||
|
||||
@@ -13,6 +13,9 @@ from json_repair import repair_json
|
||||
from datetime import datetime
|
||||
from difflib import SequenceMatcher
|
||||
import ast
|
||||
import jieba
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
|
||||
logger = get_logger("relation")
|
||||
|
||||
@@ -119,6 +122,8 @@ class RelationshipManager:
|
||||
person_id = person_info_manager.get_person_id(person[0], person[1])
|
||||
|
||||
person_name = await person_info_manager.get_value(person_id, "person_name")
|
||||
if not person_name or person_name == "none":
|
||||
return ""
|
||||
impression = await person_info_manager.get_value(person_id, "impression")
|
||||
interaction = await person_info_manager.get_value(person_id, "interaction")
|
||||
points = await person_info_manager.get_value(person_id, "points") or []
|
||||
@@ -324,8 +329,8 @@ class RelationshipManager:
|
||||
|
||||
# 在现有points中查找相似的点
|
||||
for i, existing_point in enumerate(current_points):
|
||||
similarity = SequenceMatcher(None, new_point[0], existing_point[0]).ratio()
|
||||
if similarity > 0.8:
|
||||
# 使用组合的相似度检查方法
|
||||
if self.check_similarity(new_point[0], existing_point[0]):
|
||||
similar_points.append(existing_point)
|
||||
similar_indices.append(i)
|
||||
|
||||
@@ -355,7 +360,7 @@ class RelationshipManager:
|
||||
current_points = points_list
|
||||
|
||||
# 如果points超过30条,按权重随机选择多余的条目移动到forgotten_points
|
||||
if len(current_points) > 5:
|
||||
if len(current_points) > 10:
|
||||
# 获取现有forgotten_points
|
||||
forgotten_points = await person_info_manager.get_value(person_id, "forgotten_points") or []
|
||||
if isinstance(forgotten_points, str):
|
||||
@@ -576,5 +581,56 @@ class RelationshipManager:
|
||||
self.logger.error(f"计算时间权重失败: {e}")
|
||||
return 0.5 # 发生错误时返回中等权重
|
||||
|
||||
def tfidf_similarity(self, s1, s2):
|
||||
"""
|
||||
使用 TF-IDF 和余弦相似度计算两个句子的相似性。
|
||||
"""
|
||||
# 1. 使用 jieba 进行分词
|
||||
s1_words = " ".join(jieba.cut(s1))
|
||||
s2_words = " ".join(jieba.cut(s2))
|
||||
|
||||
# 2. 将两句话放入一个列表中
|
||||
corpus = [s1_words, s2_words]
|
||||
|
||||
# 3. 创建 TF-IDF 向量化器并进行计算
|
||||
try:
|
||||
vectorizer = TfidfVectorizer()
|
||||
tfidf_matrix = vectorizer.fit_transform(corpus)
|
||||
except ValueError:
|
||||
# 如果句子完全由停用词组成,或者为空,可能会报错
|
||||
return 0.0
|
||||
|
||||
# 4. 计算余弦相似度
|
||||
similarity_matrix = cosine_similarity(tfidf_matrix)
|
||||
|
||||
# 返回 s1 和 s2 的相似度
|
||||
return similarity_matrix[0, 1]
|
||||
|
||||
def sequence_similarity(self, s1, s2):
|
||||
"""
|
||||
使用 SequenceMatcher 计算两个句子的相似性。
|
||||
"""
|
||||
return SequenceMatcher(None, s1, s2).ratio()
|
||||
|
||||
def check_similarity(self, text1, text2, tfidf_threshold=0.5, seq_threshold=0.6):
|
||||
"""
|
||||
使用两种方法检查文本相似度,只要其中一种方法达到阈值就认为是相似的。
|
||||
|
||||
Args:
|
||||
text1: 第一个文本
|
||||
text2: 第二个文本
|
||||
tfidf_threshold: TF-IDF相似度阈值
|
||||
seq_threshold: SequenceMatcher相似度阈值
|
||||
|
||||
Returns:
|
||||
bool: 如果任一方法达到阈值则返回True
|
||||
"""
|
||||
# 计算两种相似度
|
||||
tfidf_sim = self.tfidf_similarity(text1, text2)
|
||||
seq_sim = self.sequence_similarity(text1, text2)
|
||||
|
||||
# 只要其中一种方法达到阈值就认为是相似的
|
||||
return tfidf_sim > tfidf_threshold or seq_sim > seq_threshold
|
||||
|
||||
|
||||
relationship_manager = RelationshipManager()
|
||||
|
||||
156
tests/test_sentence_similarity.py
Normal file
156
tests/test_sentence_similarity.py
Normal file
@@ -0,0 +1,156 @@
|
||||
import time
|
||||
import unittest
|
||||
import jieba
|
||||
from difflib import SequenceMatcher
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
|
||||
def tfidf_similarity(s1, s2):
|
||||
"""
|
||||
使用 TF-IDF 和余弦相似度计算两个句子的相似性。
|
||||
"""
|
||||
# 1. 使用 jieba 进行分词
|
||||
s1_words = " ".join(jieba.cut(s1))
|
||||
s2_words = " ".join(jieba.cut(s2))
|
||||
|
||||
# 2. 将两句话放入一个列表中
|
||||
corpus = [s1_words, s2_words]
|
||||
|
||||
# 3. 创建 TF-IDF 向量化器并进行计算
|
||||
try:
|
||||
vectorizer = TfidfVectorizer()
|
||||
tfidf_matrix = vectorizer.fit_transform(corpus)
|
||||
except ValueError:
|
||||
# 如果句子完全由停用词组成,或者为空,可能会报错
|
||||
return 0.0
|
||||
|
||||
# 4. 计算余弦相似度
|
||||
similarity_matrix = cosine_similarity(tfidf_matrix)
|
||||
|
||||
# 返回 s1 和 s2 的相似度
|
||||
return similarity_matrix[0, 1]
|
||||
|
||||
def sequence_similarity(s1, s2):
|
||||
"""
|
||||
使用 SequenceMatcher 计算两个句子的相似性。
|
||||
"""
|
||||
return SequenceMatcher(None, s1, s2).ratio()
|
||||
|
||||
class TestSentenceSimilarity(unittest.TestCase):
|
||||
def test_similarity_comparison(self):
|
||||
"""比较不同相似度计算方法的结果"""
|
||||
test_cases = [
|
||||
{
|
||||
"sentence1": "今天天气怎么样",
|
||||
"sentence2": "今天气候如何",
|
||||
"expected_similar": True
|
||||
},
|
||||
{
|
||||
"sentence1": "今天天气怎么样",
|
||||
"sentence2": "我今天要去吃麦当劳",
|
||||
"expected_similar": False
|
||||
},
|
||||
{
|
||||
"sentence1": "我今天要去吃麦当劳",
|
||||
"sentence2": "肯德基和麦当劳哪家好吃",
|
||||
"expected_similar": True
|
||||
},
|
||||
{
|
||||
"sentence1": "Vindemiatrix提到昨天三个无赖杀穿交界地",
|
||||
"sentence2": "Vindemiatrix昨天用三个无赖角色杀穿了游戏中的交界地",
|
||||
"expected_similar": True
|
||||
},
|
||||
{
|
||||
"sentence1": "tc_魔法士解释了之前templateinfo的with用法和现在的单独逻辑发送的区别",
|
||||
"sentence2": "tc_魔法士解释了templateinfo的用法,包括它是一个字典,key是prompt的名字,value是prompt的内容,格式是只支持大括号的fstring",
|
||||
"expected_similar": False
|
||||
},
|
||||
{
|
||||
"sentence1": "YXH_XianYu分享了一张舰娘街机游戏的图片,并提到'玩舰娘街机的董不懂'",
|
||||
"sentence2": "YXH_XianYu对街机游戏表现出兴趣,并分享了玩舰娘街机的经历",
|
||||
"expected_similar": True
|
||||
},
|
||||
{
|
||||
"sentence1": "YXH_XianYu在考虑入坑明日方舟,犹豫是否要从零开荒或使用初始号",
|
||||
"sentence2": "YXH_XianYu考虑入坑明日方舟,倾向于从零开荒或初始号开荒",
|
||||
"expected_similar": True
|
||||
},
|
||||
{
|
||||
"sentence1": "YXH_XianYu提到秋叶原好多人在玩maimai",
|
||||
"sentence2": "YXH_XianYu对学园偶像的付费石头机制表示惊讶",
|
||||
"expected_similar": False
|
||||
}
|
||||
]
|
||||
|
||||
print("\n相似度计算方法比较:")
|
||||
for i, case in enumerate(test_cases, 1):
|
||||
print(f"\n测试用例 {i}:")
|
||||
print(f"句子1: {case['sentence1']}")
|
||||
print(f"句子2: {case['sentence2']}")
|
||||
|
||||
# TF-IDF 相似度
|
||||
start_time = time.time()
|
||||
tfidf_sim = tfidf_similarity(case['sentence1'], case['sentence2'])
|
||||
tfidf_time = time.time() - start_time
|
||||
|
||||
# SequenceMatcher 相似度
|
||||
start_time = time.time()
|
||||
seq_sim = sequence_similarity(case['sentence1'], case['sentence2'])
|
||||
seq_time = time.time() - start_time
|
||||
|
||||
print(f"TF-IDF相似度: {tfidf_sim:.4f} (耗时: {tfidf_time:.4f}秒)")
|
||||
print(f"SequenceMatcher相似度: {seq_sim:.4f} (耗时: {seq_time:.4f}秒)")
|
||||
|
||||
def test_batch_processing(self):
|
||||
"""测试批量处理性能"""
|
||||
sentences = [
|
||||
"人工智能正在改变世界",
|
||||
"AI技术发展迅速",
|
||||
"机器学习是人工智能的一个分支",
|
||||
"深度学习在图像识别领域取得了突破",
|
||||
"自然语言处理技术越来越成熟"
|
||||
]
|
||||
|
||||
print("\n批量处理测试:")
|
||||
|
||||
# TF-IDF 批量处理
|
||||
start_time = time.time()
|
||||
tfidf_matrix = []
|
||||
for i in range(len(sentences)):
|
||||
row = []
|
||||
for j in range(len(sentences)):
|
||||
similarity = tfidf_similarity(sentences[i], sentences[j])
|
||||
row.append(similarity)
|
||||
tfidf_matrix.append(row)
|
||||
tfidf_time = time.time() - start_time
|
||||
|
||||
# SequenceMatcher 批量处理
|
||||
start_time = time.time()
|
||||
seq_matrix = []
|
||||
for i in range(len(sentences)):
|
||||
row = []
|
||||
for j in range(len(sentences)):
|
||||
similarity = sequence_similarity(sentences[i], sentences[j])
|
||||
row.append(similarity)
|
||||
seq_matrix.append(row)
|
||||
seq_time = time.time() - start_time
|
||||
|
||||
print(f"TF-IDF批量处理 {len(sentences)} 个句子耗时: {tfidf_time:.4f}秒")
|
||||
print(f"SequenceMatcher批量处理 {len(sentences)} 个句子耗时: {seq_time:.4f}秒")
|
||||
|
||||
# 打印TF-IDF相似度矩阵
|
||||
print("\nTF-IDF相似度矩阵:")
|
||||
for row in tfidf_matrix:
|
||||
for similarity in row:
|
||||
print(f"{similarity:.4f}", end="\t")
|
||||
print()
|
||||
|
||||
# 打印SequenceMatcher相似度矩阵
|
||||
print("\nSequenceMatcher相似度矩阵:")
|
||||
for row in seq_matrix:
|
||||
for similarity in row:
|
||||
print(f"{similarity:.4f}", end="\t")
|
||||
print()
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main(verbosity=2)
|
||||
Reference in New Issue
Block a user