fix:修改log,说明更清楚

This commit is contained in:
SengokuCola
2025-06-07 22:09:42 +08:00
parent 1a713ed0d9
commit c6ffad2a84
5 changed files with 223 additions and 10 deletions

View File

@@ -23,10 +23,8 @@ def init_prompt():
你和别人的关系信息是,请从这些信息中提取出你和别人的关系的原文: 你和别人的关系信息是,请从这些信息中提取出你和别人的关系的原文:
{relation_prompt} {relation_prompt}
请只从上面这些信息中提取出。 请只从上面这些信息中提取出内容
现在是{time_now}你正在上网和qq群里的网友们聊天以下是正在进行的聊天内容
{chat_observe_info} {chat_observe_info}
现在请你根据现有的信息,总结你和群里的人的关系 现在请你根据现有的信息,总结你和群里的人的关系

View File

@@ -387,7 +387,10 @@ class NormalChat:
if not response_set or ( if not response_set or (
self.enable_planner and self.action_type not in ["no_action", "change_to_focus_chat"] self.enable_planner and self.action_type not in ["no_action", "change_to_focus_chat"]
): ):
if not response_set:
logger.info(f"[{self.stream_name}] 模型未生成回复内容") logger.info(f"[{self.stream_name}] 模型未生成回复内容")
elif self.enable_planner and self.action_type not in ["no_action", "change_to_focus_chat"]:
logger.info(f"[{self.stream_name}] 模型选择其他动作")
# 如果模型未生成回复,移除思考消息 # 如果模型未生成回复,移除思考消息
container = await message_manager.get_container(self.stream_id) # 使用 self.stream_id container = await message_manager.get_container(self.stream_id) # 使用 self.stream_id
for msg in container.messages[:]: for msg in container.messages[:]:
@@ -443,7 +446,7 @@ class NormalChat:
logger.warning(f"[{self.stream_name}] 没有设置切换到focus聊天模式的回调函数无法执行切换") logger.warning(f"[{self.stream_name}] 没有设置切换到focus聊天模式的回调函数无法执行切换")
return return
else: else:
# await self._check_switch_to_focus() await self._check_switch_to_focus()
pass pass
info_catcher.done_catch() info_catcher.done_catch()

View File

@@ -57,7 +57,7 @@ class NormalChatGenerator:
) )
if model_response: if model_response:
logger.debug(f"{global_config.bot.nickname}原始回复是:{model_response}") logger.debug(f"{global_config.bot.nickname}备选回复是:{model_response}")
model_response = process_llm_response(model_response) model_response = process_llm_response(model_response)
return model_response return model_response

View File

@@ -13,6 +13,9 @@ from json_repair import repair_json
from datetime import datetime from datetime import datetime
from difflib import SequenceMatcher from difflib import SequenceMatcher
import ast import ast
import jieba
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
logger = get_logger("relation") logger = get_logger("relation")
@@ -119,6 +122,8 @@ class RelationshipManager:
person_id = person_info_manager.get_person_id(person[0], person[1]) person_id = person_info_manager.get_person_id(person[0], person[1])
person_name = await person_info_manager.get_value(person_id, "person_name") person_name = await person_info_manager.get_value(person_id, "person_name")
if not person_name or person_name == "none":
return ""
impression = await person_info_manager.get_value(person_id, "impression") impression = await person_info_manager.get_value(person_id, "impression")
interaction = await person_info_manager.get_value(person_id, "interaction") interaction = await person_info_manager.get_value(person_id, "interaction")
points = await person_info_manager.get_value(person_id, "points") or [] points = await person_info_manager.get_value(person_id, "points") or []
@@ -324,8 +329,8 @@ class RelationshipManager:
# 在现有points中查找相似的点 # 在现有points中查找相似的点
for i, existing_point in enumerate(current_points): for i, existing_point in enumerate(current_points):
similarity = SequenceMatcher(None, new_point[0], existing_point[0]).ratio() # 使用组合的相似度检查方法
if similarity > 0.8: if self.check_similarity(new_point[0], existing_point[0]):
similar_points.append(existing_point) similar_points.append(existing_point)
similar_indices.append(i) similar_indices.append(i)
@@ -355,7 +360,7 @@ class RelationshipManager:
current_points = points_list current_points = points_list
# 如果points超过30条按权重随机选择多余的条目移动到forgotten_points # 如果points超过30条按权重随机选择多余的条目移动到forgotten_points
if len(current_points) > 5: if len(current_points) > 10:
# 获取现有forgotten_points # 获取现有forgotten_points
forgotten_points = await person_info_manager.get_value(person_id, "forgotten_points") or [] forgotten_points = await person_info_manager.get_value(person_id, "forgotten_points") or []
if isinstance(forgotten_points, str): if isinstance(forgotten_points, str):
@@ -576,5 +581,56 @@ class RelationshipManager:
self.logger.error(f"计算时间权重失败: {e}") self.logger.error(f"计算时间权重失败: {e}")
return 0.5 # 发生错误时返回中等权重 return 0.5 # 发生错误时返回中等权重
def tfidf_similarity(self, s1, s2):
"""
使用 TF-IDF 和余弦相似度计算两个句子的相似性。
"""
# 1. 使用 jieba 进行分词
s1_words = " ".join(jieba.cut(s1))
s2_words = " ".join(jieba.cut(s2))
# 2. 将两句话放入一个列表中
corpus = [s1_words, s2_words]
# 3. 创建 TF-IDF 向量化器并进行计算
try:
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(corpus)
except ValueError:
# 如果句子完全由停用词组成,或者为空,可能会报错
return 0.0
# 4. 计算余弦相似度
similarity_matrix = cosine_similarity(tfidf_matrix)
# 返回 s1 和 s2 的相似度
return similarity_matrix[0, 1]
def sequence_similarity(self, s1, s2):
"""
使用 SequenceMatcher 计算两个句子的相似性。
"""
return SequenceMatcher(None, s1, s2).ratio()
def check_similarity(self, text1, text2, tfidf_threshold=0.5, seq_threshold=0.6):
"""
使用两种方法检查文本相似度,只要其中一种方法达到阈值就认为是相似的。
Args:
text1: 第一个文本
text2: 第二个文本
tfidf_threshold: TF-IDF相似度阈值
seq_threshold: SequenceMatcher相似度阈值
Returns:
bool: 如果任一方法达到阈值则返回True
"""
# 计算两种相似度
tfidf_sim = self.tfidf_similarity(text1, text2)
seq_sim = self.sequence_similarity(text1, text2)
# 只要其中一种方法达到阈值就认为是相似的
return tfidf_sim > tfidf_threshold or seq_sim > seq_threshold
relationship_manager = RelationshipManager() relationship_manager = RelationshipManager()

View File

@@ -0,0 +1,156 @@
import time
import unittest
import jieba
from difflib import SequenceMatcher
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
def tfidf_similarity(s1, s2):
"""
使用 TF-IDF 和余弦相似度计算两个句子的相似性。
"""
# 1. 使用 jieba 进行分词
s1_words = " ".join(jieba.cut(s1))
s2_words = " ".join(jieba.cut(s2))
# 2. 将两句话放入一个列表中
corpus = [s1_words, s2_words]
# 3. 创建 TF-IDF 向量化器并进行计算
try:
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(corpus)
except ValueError:
# 如果句子完全由停用词组成,或者为空,可能会报错
return 0.0
# 4. 计算余弦相似度
similarity_matrix = cosine_similarity(tfidf_matrix)
# 返回 s1 和 s2 的相似度
return similarity_matrix[0, 1]
def sequence_similarity(s1, s2):
"""
使用 SequenceMatcher 计算两个句子的相似性。
"""
return SequenceMatcher(None, s1, s2).ratio()
class TestSentenceSimilarity(unittest.TestCase):
def test_similarity_comparison(self):
"""比较不同相似度计算方法的结果"""
test_cases = [
{
"sentence1": "今天天气怎么样",
"sentence2": "今天气候如何",
"expected_similar": True
},
{
"sentence1": "今天天气怎么样",
"sentence2": "我今天要去吃麦当劳",
"expected_similar": False
},
{
"sentence1": "我今天要去吃麦当劳",
"sentence2": "肯德基和麦当劳哪家好吃",
"expected_similar": True
},
{
"sentence1": "Vindemiatrix提到昨天三个无赖杀穿交界地",
"sentence2": "Vindemiatrix昨天用三个无赖角色杀穿了游戏中的交界地",
"expected_similar": True
},
{
"sentence1": "tc_魔法士解释了之前templateinfo的with用法和现在的单独逻辑发送的区别",
"sentence2": "tc_魔法士解释了templateinfo的用法包括它是一个字典key是prompt的名字value是prompt的内容格式是只支持大括号的fstring",
"expected_similar": False
},
{
"sentence1": "YXH_XianYu分享了一张舰娘街机游戏的图片并提到'玩舰娘街机的董不懂'",
"sentence2": "YXH_XianYu对街机游戏表现出兴趣并分享了玩舰娘街机的经历",
"expected_similar": True
},
{
"sentence1": "YXH_XianYu在考虑入坑明日方舟犹豫是否要从零开荒或使用初始号",
"sentence2": "YXH_XianYu考虑入坑明日方舟倾向于从零开荒或初始号开荒",
"expected_similar": True
},
{
"sentence1": "YXH_XianYu提到秋叶原好多人在玩maimai",
"sentence2": "YXH_XianYu对学园偶像的付费石头机制表示惊讶",
"expected_similar": False
}
]
print("\n相似度计算方法比较:")
for i, case in enumerate(test_cases, 1):
print(f"\n测试用例 {i}:")
print(f"句子1: {case['sentence1']}")
print(f"句子2: {case['sentence2']}")
# TF-IDF 相似度
start_time = time.time()
tfidf_sim = tfidf_similarity(case['sentence1'], case['sentence2'])
tfidf_time = time.time() - start_time
# SequenceMatcher 相似度
start_time = time.time()
seq_sim = sequence_similarity(case['sentence1'], case['sentence2'])
seq_time = time.time() - start_time
print(f"TF-IDF相似度: {tfidf_sim:.4f} (耗时: {tfidf_time:.4f}秒)")
print(f"SequenceMatcher相似度: {seq_sim:.4f} (耗时: {seq_time:.4f}秒)")
def test_batch_processing(self):
"""测试批量处理性能"""
sentences = [
"人工智能正在改变世界",
"AI技术发展迅速",
"机器学习是人工智能的一个分支",
"深度学习在图像识别领域取得了突破",
"自然语言处理技术越来越成熟"
]
print("\n批量处理测试:")
# TF-IDF 批量处理
start_time = time.time()
tfidf_matrix = []
for i in range(len(sentences)):
row = []
for j in range(len(sentences)):
similarity = tfidf_similarity(sentences[i], sentences[j])
row.append(similarity)
tfidf_matrix.append(row)
tfidf_time = time.time() - start_time
# SequenceMatcher 批量处理
start_time = time.time()
seq_matrix = []
for i in range(len(sentences)):
row = []
for j in range(len(sentences)):
similarity = sequence_similarity(sentences[i], sentences[j])
row.append(similarity)
seq_matrix.append(row)
seq_time = time.time() - start_time
print(f"TF-IDF批量处理 {len(sentences)} 个句子耗时: {tfidf_time:.4f}")
print(f"SequenceMatcher批量处理 {len(sentences)} 个句子耗时: {seq_time:.4f}")
# 打印TF-IDF相似度矩阵
print("\nTF-IDF相似度矩阵:")
for row in tfidf_matrix:
for similarity in row:
print(f"{similarity:.4f}", end="\t")
print()
# 打印SequenceMatcher相似度矩阵
print("\nSequenceMatcher相似度矩阵:")
for row in seq_matrix:
for similarity in row:
print(f"{similarity:.4f}", end="\t")
print()
if __name__ == '__main__':
unittest.main(verbosity=2)