fix:修改log,说明更清楚
This commit is contained in:
@@ -23,10 +23,8 @@ def init_prompt():
|
|||||||
|
|
||||||
你和别人的关系信息是,请从这些信息中提取出你和别人的关系的原文:
|
你和别人的关系信息是,请从这些信息中提取出你和别人的关系的原文:
|
||||||
{relation_prompt}
|
{relation_prompt}
|
||||||
请只从上面这些信息中提取出。
|
请只从上面这些信息中提取出内容。
|
||||||
|
|
||||||
|
|
||||||
现在是{time_now},你正在上网,和qq群里的网友们聊天,以下是正在进行的聊天内容:
|
|
||||||
{chat_observe_info}
|
{chat_observe_info}
|
||||||
|
|
||||||
现在请你根据现有的信息,总结你和群里的人的关系
|
现在请你根据现有的信息,总结你和群里的人的关系
|
||||||
|
|||||||
@@ -387,7 +387,10 @@ class NormalChat:
|
|||||||
if not response_set or (
|
if not response_set or (
|
||||||
self.enable_planner and self.action_type not in ["no_action", "change_to_focus_chat"]
|
self.enable_planner and self.action_type not in ["no_action", "change_to_focus_chat"]
|
||||||
):
|
):
|
||||||
|
if not response_set:
|
||||||
logger.info(f"[{self.stream_name}] 模型未生成回复内容")
|
logger.info(f"[{self.stream_name}] 模型未生成回复内容")
|
||||||
|
elif self.enable_planner and self.action_type not in ["no_action", "change_to_focus_chat"]:
|
||||||
|
logger.info(f"[{self.stream_name}] 模型选择其他动作")
|
||||||
# 如果模型未生成回复,移除思考消息
|
# 如果模型未生成回复,移除思考消息
|
||||||
container = await message_manager.get_container(self.stream_id) # 使用 self.stream_id
|
container = await message_manager.get_container(self.stream_id) # 使用 self.stream_id
|
||||||
for msg in container.messages[:]:
|
for msg in container.messages[:]:
|
||||||
@@ -443,7 +446,7 @@ class NormalChat:
|
|||||||
logger.warning(f"[{self.stream_name}] 没有设置切换到focus聊天模式的回调函数,无法执行切换")
|
logger.warning(f"[{self.stream_name}] 没有设置切换到focus聊天模式的回调函数,无法执行切换")
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
# await self._check_switch_to_focus()
|
await self._check_switch_to_focus()
|
||||||
pass
|
pass
|
||||||
|
|
||||||
info_catcher.done_catch()
|
info_catcher.done_catch()
|
||||||
|
|||||||
@@ -57,7 +57,7 @@ class NormalChatGenerator:
|
|||||||
)
|
)
|
||||||
|
|
||||||
if model_response:
|
if model_response:
|
||||||
logger.debug(f"{global_config.bot.nickname}的原始回复是:{model_response}")
|
logger.debug(f"{global_config.bot.nickname}的备选回复是:{model_response}")
|
||||||
model_response = process_llm_response(model_response)
|
model_response = process_llm_response(model_response)
|
||||||
|
|
||||||
return model_response
|
return model_response
|
||||||
|
|||||||
@@ -13,6 +13,9 @@ from json_repair import repair_json
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from difflib import SequenceMatcher
|
from difflib import SequenceMatcher
|
||||||
import ast
|
import ast
|
||||||
|
import jieba
|
||||||
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
from sklearn.metrics.pairwise import cosine_similarity
|
||||||
|
|
||||||
logger = get_logger("relation")
|
logger = get_logger("relation")
|
||||||
|
|
||||||
@@ -119,6 +122,8 @@ class RelationshipManager:
|
|||||||
person_id = person_info_manager.get_person_id(person[0], person[1])
|
person_id = person_info_manager.get_person_id(person[0], person[1])
|
||||||
|
|
||||||
person_name = await person_info_manager.get_value(person_id, "person_name")
|
person_name = await person_info_manager.get_value(person_id, "person_name")
|
||||||
|
if not person_name or person_name == "none":
|
||||||
|
return ""
|
||||||
impression = await person_info_manager.get_value(person_id, "impression")
|
impression = await person_info_manager.get_value(person_id, "impression")
|
||||||
interaction = await person_info_manager.get_value(person_id, "interaction")
|
interaction = await person_info_manager.get_value(person_id, "interaction")
|
||||||
points = await person_info_manager.get_value(person_id, "points") or []
|
points = await person_info_manager.get_value(person_id, "points") or []
|
||||||
@@ -324,8 +329,8 @@ class RelationshipManager:
|
|||||||
|
|
||||||
# 在现有points中查找相似的点
|
# 在现有points中查找相似的点
|
||||||
for i, existing_point in enumerate(current_points):
|
for i, existing_point in enumerate(current_points):
|
||||||
similarity = SequenceMatcher(None, new_point[0], existing_point[0]).ratio()
|
# 使用组合的相似度检查方法
|
||||||
if similarity > 0.8:
|
if self.check_similarity(new_point[0], existing_point[0]):
|
||||||
similar_points.append(existing_point)
|
similar_points.append(existing_point)
|
||||||
similar_indices.append(i)
|
similar_indices.append(i)
|
||||||
|
|
||||||
@@ -355,7 +360,7 @@ class RelationshipManager:
|
|||||||
current_points = points_list
|
current_points = points_list
|
||||||
|
|
||||||
# 如果points超过30条,按权重随机选择多余的条目移动到forgotten_points
|
# 如果points超过30条,按权重随机选择多余的条目移动到forgotten_points
|
||||||
if len(current_points) > 5:
|
if len(current_points) > 10:
|
||||||
# 获取现有forgotten_points
|
# 获取现有forgotten_points
|
||||||
forgotten_points = await person_info_manager.get_value(person_id, "forgotten_points") or []
|
forgotten_points = await person_info_manager.get_value(person_id, "forgotten_points") or []
|
||||||
if isinstance(forgotten_points, str):
|
if isinstance(forgotten_points, str):
|
||||||
@@ -576,5 +581,56 @@ class RelationshipManager:
|
|||||||
self.logger.error(f"计算时间权重失败: {e}")
|
self.logger.error(f"计算时间权重失败: {e}")
|
||||||
return 0.5 # 发生错误时返回中等权重
|
return 0.5 # 发生错误时返回中等权重
|
||||||
|
|
||||||
|
def tfidf_similarity(self, s1, s2):
|
||||||
|
"""
|
||||||
|
使用 TF-IDF 和余弦相似度计算两个句子的相似性。
|
||||||
|
"""
|
||||||
|
# 1. 使用 jieba 进行分词
|
||||||
|
s1_words = " ".join(jieba.cut(s1))
|
||||||
|
s2_words = " ".join(jieba.cut(s2))
|
||||||
|
|
||||||
|
# 2. 将两句话放入一个列表中
|
||||||
|
corpus = [s1_words, s2_words]
|
||||||
|
|
||||||
|
# 3. 创建 TF-IDF 向量化器并进行计算
|
||||||
|
try:
|
||||||
|
vectorizer = TfidfVectorizer()
|
||||||
|
tfidf_matrix = vectorizer.fit_transform(corpus)
|
||||||
|
except ValueError:
|
||||||
|
# 如果句子完全由停用词组成,或者为空,可能会报错
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
# 4. 计算余弦相似度
|
||||||
|
similarity_matrix = cosine_similarity(tfidf_matrix)
|
||||||
|
|
||||||
|
# 返回 s1 和 s2 的相似度
|
||||||
|
return similarity_matrix[0, 1]
|
||||||
|
|
||||||
|
def sequence_similarity(self, s1, s2):
|
||||||
|
"""
|
||||||
|
使用 SequenceMatcher 计算两个句子的相似性。
|
||||||
|
"""
|
||||||
|
return SequenceMatcher(None, s1, s2).ratio()
|
||||||
|
|
||||||
|
def check_similarity(self, text1, text2, tfidf_threshold=0.5, seq_threshold=0.6):
|
||||||
|
"""
|
||||||
|
使用两种方法检查文本相似度,只要其中一种方法达到阈值就认为是相似的。
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text1: 第一个文本
|
||||||
|
text2: 第二个文本
|
||||||
|
tfidf_threshold: TF-IDF相似度阈值
|
||||||
|
seq_threshold: SequenceMatcher相似度阈值
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: 如果任一方法达到阈值则返回True
|
||||||
|
"""
|
||||||
|
# 计算两种相似度
|
||||||
|
tfidf_sim = self.tfidf_similarity(text1, text2)
|
||||||
|
seq_sim = self.sequence_similarity(text1, text2)
|
||||||
|
|
||||||
|
# 只要其中一种方法达到阈值就认为是相似的
|
||||||
|
return tfidf_sim > tfidf_threshold or seq_sim > seq_threshold
|
||||||
|
|
||||||
|
|
||||||
relationship_manager = RelationshipManager()
|
relationship_manager = RelationshipManager()
|
||||||
|
|||||||
156
tests/test_sentence_similarity.py
Normal file
156
tests/test_sentence_similarity.py
Normal file
@@ -0,0 +1,156 @@
|
|||||||
|
import time
|
||||||
|
import unittest
|
||||||
|
import jieba
|
||||||
|
from difflib import SequenceMatcher
|
||||||
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
from sklearn.metrics.pairwise import cosine_similarity
|
||||||
|
|
||||||
|
def tfidf_similarity(s1, s2):
|
||||||
|
"""
|
||||||
|
使用 TF-IDF 和余弦相似度计算两个句子的相似性。
|
||||||
|
"""
|
||||||
|
# 1. 使用 jieba 进行分词
|
||||||
|
s1_words = " ".join(jieba.cut(s1))
|
||||||
|
s2_words = " ".join(jieba.cut(s2))
|
||||||
|
|
||||||
|
# 2. 将两句话放入一个列表中
|
||||||
|
corpus = [s1_words, s2_words]
|
||||||
|
|
||||||
|
# 3. 创建 TF-IDF 向量化器并进行计算
|
||||||
|
try:
|
||||||
|
vectorizer = TfidfVectorizer()
|
||||||
|
tfidf_matrix = vectorizer.fit_transform(corpus)
|
||||||
|
except ValueError:
|
||||||
|
# 如果句子完全由停用词组成,或者为空,可能会报错
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
# 4. 计算余弦相似度
|
||||||
|
similarity_matrix = cosine_similarity(tfidf_matrix)
|
||||||
|
|
||||||
|
# 返回 s1 和 s2 的相似度
|
||||||
|
return similarity_matrix[0, 1]
|
||||||
|
|
||||||
|
def sequence_similarity(s1, s2):
|
||||||
|
"""
|
||||||
|
使用 SequenceMatcher 计算两个句子的相似性。
|
||||||
|
"""
|
||||||
|
return SequenceMatcher(None, s1, s2).ratio()
|
||||||
|
|
||||||
|
class TestSentenceSimilarity(unittest.TestCase):
|
||||||
|
def test_similarity_comparison(self):
|
||||||
|
"""比较不同相似度计算方法的结果"""
|
||||||
|
test_cases = [
|
||||||
|
{
|
||||||
|
"sentence1": "今天天气怎么样",
|
||||||
|
"sentence2": "今天气候如何",
|
||||||
|
"expected_similar": True
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sentence1": "今天天气怎么样",
|
||||||
|
"sentence2": "我今天要去吃麦当劳",
|
||||||
|
"expected_similar": False
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sentence1": "我今天要去吃麦当劳",
|
||||||
|
"sentence2": "肯德基和麦当劳哪家好吃",
|
||||||
|
"expected_similar": True
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sentence1": "Vindemiatrix提到昨天三个无赖杀穿交界地",
|
||||||
|
"sentence2": "Vindemiatrix昨天用三个无赖角色杀穿了游戏中的交界地",
|
||||||
|
"expected_similar": True
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sentence1": "tc_魔法士解释了之前templateinfo的with用法和现在的单独逻辑发送的区别",
|
||||||
|
"sentence2": "tc_魔法士解释了templateinfo的用法,包括它是一个字典,key是prompt的名字,value是prompt的内容,格式是只支持大括号的fstring",
|
||||||
|
"expected_similar": False
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sentence1": "YXH_XianYu分享了一张舰娘街机游戏的图片,并提到'玩舰娘街机的董不懂'",
|
||||||
|
"sentence2": "YXH_XianYu对街机游戏表现出兴趣,并分享了玩舰娘街机的经历",
|
||||||
|
"expected_similar": True
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sentence1": "YXH_XianYu在考虑入坑明日方舟,犹豫是否要从零开荒或使用初始号",
|
||||||
|
"sentence2": "YXH_XianYu考虑入坑明日方舟,倾向于从零开荒或初始号开荒",
|
||||||
|
"expected_similar": True
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sentence1": "YXH_XianYu提到秋叶原好多人在玩maimai",
|
||||||
|
"sentence2": "YXH_XianYu对学园偶像的付费石头机制表示惊讶",
|
||||||
|
"expected_similar": False
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
print("\n相似度计算方法比较:")
|
||||||
|
for i, case in enumerate(test_cases, 1):
|
||||||
|
print(f"\n测试用例 {i}:")
|
||||||
|
print(f"句子1: {case['sentence1']}")
|
||||||
|
print(f"句子2: {case['sentence2']}")
|
||||||
|
|
||||||
|
# TF-IDF 相似度
|
||||||
|
start_time = time.time()
|
||||||
|
tfidf_sim = tfidf_similarity(case['sentence1'], case['sentence2'])
|
||||||
|
tfidf_time = time.time() - start_time
|
||||||
|
|
||||||
|
# SequenceMatcher 相似度
|
||||||
|
start_time = time.time()
|
||||||
|
seq_sim = sequence_similarity(case['sentence1'], case['sentence2'])
|
||||||
|
seq_time = time.time() - start_time
|
||||||
|
|
||||||
|
print(f"TF-IDF相似度: {tfidf_sim:.4f} (耗时: {tfidf_time:.4f}秒)")
|
||||||
|
print(f"SequenceMatcher相似度: {seq_sim:.4f} (耗时: {seq_time:.4f}秒)")
|
||||||
|
|
||||||
|
def test_batch_processing(self):
|
||||||
|
"""测试批量处理性能"""
|
||||||
|
sentences = [
|
||||||
|
"人工智能正在改变世界",
|
||||||
|
"AI技术发展迅速",
|
||||||
|
"机器学习是人工智能的一个分支",
|
||||||
|
"深度学习在图像识别领域取得了突破",
|
||||||
|
"自然语言处理技术越来越成熟"
|
||||||
|
]
|
||||||
|
|
||||||
|
print("\n批量处理测试:")
|
||||||
|
|
||||||
|
# TF-IDF 批量处理
|
||||||
|
start_time = time.time()
|
||||||
|
tfidf_matrix = []
|
||||||
|
for i in range(len(sentences)):
|
||||||
|
row = []
|
||||||
|
for j in range(len(sentences)):
|
||||||
|
similarity = tfidf_similarity(sentences[i], sentences[j])
|
||||||
|
row.append(similarity)
|
||||||
|
tfidf_matrix.append(row)
|
||||||
|
tfidf_time = time.time() - start_time
|
||||||
|
|
||||||
|
# SequenceMatcher 批量处理
|
||||||
|
start_time = time.time()
|
||||||
|
seq_matrix = []
|
||||||
|
for i in range(len(sentences)):
|
||||||
|
row = []
|
||||||
|
for j in range(len(sentences)):
|
||||||
|
similarity = sequence_similarity(sentences[i], sentences[j])
|
||||||
|
row.append(similarity)
|
||||||
|
seq_matrix.append(row)
|
||||||
|
seq_time = time.time() - start_time
|
||||||
|
|
||||||
|
print(f"TF-IDF批量处理 {len(sentences)} 个句子耗时: {tfidf_time:.4f}秒")
|
||||||
|
print(f"SequenceMatcher批量处理 {len(sentences)} 个句子耗时: {seq_time:.4f}秒")
|
||||||
|
|
||||||
|
# 打印TF-IDF相似度矩阵
|
||||||
|
print("\nTF-IDF相似度矩阵:")
|
||||||
|
for row in tfidf_matrix:
|
||||||
|
for similarity in row:
|
||||||
|
print(f"{similarity:.4f}", end="\t")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# 打印SequenceMatcher相似度矩阵
|
||||||
|
print("\nSequenceMatcher相似度矩阵:")
|
||||||
|
for row in seq_matrix:
|
||||||
|
for similarity in row:
|
||||||
|
print(f"{similarity:.4f}", end="\t")
|
||||||
|
print()
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main(verbosity=2)
|
||||||
Reference in New Issue
Block a user