feat(interest): 增加机器人兴趣标签长度限制和截断处理
在机器人兴趣系统(BotInterestManager)中,为生成的兴趣标签增加了长度校验和处理逻辑。 - 在生成兴趣标签的 Prompt 中,明确要求每个标签的长度不超过4个字符。 - 在解析模型返回的兴趣标签时,增加了一个检查机制。如果标签长度超过10个字符,系统会记录一条警告日志,并自动将标签截断为前10个字符,以确保数据的规范性和后续处理的稳定性。
This commit is contained in:
committed by
Windpicker-owo
parent
97006301a3
commit
6026682a03
@@ -169,6 +169,7 @@ class BotInterestManager:
|
|||||||
2. 每个标签都有权重(0.1-1.0),表示对该兴趣的喜好程度
|
2. 每个标签都有权重(0.1-1.0),表示对该兴趣的喜好程度
|
||||||
3. 生成15-25个不等的标签
|
3. 生成15-25个不等的标签
|
||||||
4. 标签应该是具体的关键词,而不是抽象概念
|
4. 标签应该是具体的关键词,而不是抽象概念
|
||||||
|
5. 每个标签的长度不超过4个字符
|
||||||
|
|
||||||
请以JSON格式返回,格式如下:
|
请以JSON格式返回,格式如下:
|
||||||
{{
|
{{
|
||||||
@@ -207,6 +208,11 @@ class BotInterestManager:
|
|||||||
tag_name = tag_data.get("name", f"标签_{i}")
|
tag_name = tag_data.get("name", f"标签_{i}")
|
||||||
weight = tag_data.get("weight", 0.5)
|
weight = tag_data.get("weight", 0.5)
|
||||||
|
|
||||||
|
# 检查标签长度,如果过长则截断
|
||||||
|
if len(tag_name) > 10:
|
||||||
|
logger.warning(f"⚠️ 标签 '{tag_name}' 过长,将截断为10个字符")
|
||||||
|
tag_name = tag_name[:10]
|
||||||
|
|
||||||
tag = BotInterestTag(tag_name=tag_name, weight=weight)
|
tag = BotInterestTag(tag_name=tag_name, weight=weight)
|
||||||
bot_interests.interest_tags.append(tag)
|
bot_interests.interest_tags.append(tag)
|
||||||
|
|
||||||
@@ -355,6 +361,8 @@ class BotInterestManager:
|
|||||||
|
|
||||||
# 使用LLMRequest获取embedding
|
# 使用LLMRequest获取embedding
|
||||||
logger.debug(f"🔄 正在获取embedding: '{text[:30]}...'")
|
logger.debug(f"🔄 正在获取embedding: '{text[:30]}...'")
|
||||||
|
if not self.embedding_request:
|
||||||
|
raise RuntimeError("❌ Embedding客户端未初始化")
|
||||||
embedding, model_name = await self.embedding_request.get_embedding(text)
|
embedding, model_name = await self.embedding_request.get_embedding(text)
|
||||||
|
|
||||||
if embedding and len(embedding) > 0:
|
if embedding and len(embedding) > 0:
|
||||||
@@ -504,7 +512,7 @@ class BotInterestManager:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# 添加直接关键词匹配奖励
|
# 添加直接关键词匹配奖励
|
||||||
keyword_bonus = self._calculate_keyword_match_bonus(keywords, result.matched_tags)
|
keyword_bonus = self._calculate_keyword_match_bonus(keywords or [], result.matched_tags)
|
||||||
logger.debug(f"🎯 关键词直接匹配奖励: {keyword_bonus}")
|
logger.debug(f"🎯 关键词直接匹配奖励: {keyword_bonus}")
|
||||||
|
|
||||||
# 应用关键词奖励到匹配分数
|
# 应用关键词奖励到匹配分数
|
||||||
@@ -616,17 +624,18 @@ class BotInterestManager:
|
|||||||
def _calculate_cosine_similarity(self, vec1: list[float], vec2: list[float]) -> float:
|
def _calculate_cosine_similarity(self, vec1: list[float], vec2: list[float]) -> float:
|
||||||
"""计算余弦相似度"""
|
"""计算余弦相似度"""
|
||||||
try:
|
try:
|
||||||
vec1 = np.array(vec1)
|
np_vec1 = np.array(vec1)
|
||||||
vec2 = np.array(vec2)
|
np_vec2 = np.array(vec2)
|
||||||
|
|
||||||
dot_product = np.dot(vec1, vec2)
|
dot_product = np.dot(np_vec1, np_vec2)
|
||||||
norm1 = np.linalg.norm(vec1)
|
norm1 = np.linalg.norm(np_vec1)
|
||||||
norm2 = np.linalg.norm(vec2)
|
norm2 = np.linalg.norm(np_vec2)
|
||||||
|
|
||||||
if norm1 == 0 or norm2 == 0:
|
if norm1 == 0 or norm2 == 0:
|
||||||
return 0.0
|
return 0.0
|
||||||
|
|
||||||
return dot_product / (norm1 * norm2)
|
similarity = dot_product / (norm1 * norm2)
|
||||||
|
return float(similarity)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"计算余弦相似度失败: {e}")
|
logger.error(f"计算余弦相似度失败: {e}")
|
||||||
@@ -758,7 +767,7 @@ class BotInterestManager:
|
|||||||
if existing_record:
|
if existing_record:
|
||||||
# 更新现有记录
|
# 更新现有记录
|
||||||
logger.info("🔄 更新现有的兴趣标签配置")
|
logger.info("🔄 更新现有的兴趣标签配置")
|
||||||
existing_record.interest_tags = json_data
|
existing_record.interest_tags = json_data.decode("utf-8")
|
||||||
existing_record.personality_description = interests.personality_description
|
existing_record.personality_description = interests.personality_description
|
||||||
existing_record.embedding_model = interests.embedding_model
|
existing_record.embedding_model = interests.embedding_model
|
||||||
existing_record.version = interests.version
|
existing_record.version = interests.version
|
||||||
@@ -772,7 +781,7 @@ class BotInterestManager:
|
|||||||
new_record = DBBotPersonalityInterests(
|
new_record = DBBotPersonalityInterests(
|
||||||
personality_id=interests.personality_id,
|
personality_id=interests.personality_id,
|
||||||
personality_description=interests.personality_description,
|
personality_description=interests.personality_description,
|
||||||
interest_tags=json_data,
|
interest_tags=json_data.decode("utf-8"),
|
||||||
embedding_model=interests.embedding_model,
|
embedding_model=interests.embedding_model,
|
||||||
version=interests.version,
|
version=interests.version,
|
||||||
last_updated=interests.last_updated,
|
last_updated=interests.last_updated,
|
||||||
|
|||||||
Reference in New Issue
Block a user