temp: 保存
This commit is contained in:
@@ -50,8 +50,8 @@ class BotConfig:
|
||||
|
||||
# 默认人设
|
||||
PROMPT_PERSONALITY=[
|
||||
"曾经是一个学习地质的女大学生,现在学习心理学和脑科学,你会刷贴吧,你正在浏览qq群",
|
||||
"是一个女大学生,你有黑色头发,你会刷小红书, 你正在浏览qq群"
|
||||
"曾经是一个学习地质的女大学生,现在学习心理学和脑科学,你会刷贴吧",
|
||||
"是一个女大学生,你有黑色头发,你会刷小红书"
|
||||
]
|
||||
PROMPT_SCHEDULE_GEN="一个曾经学习地质,现在学习心理学和脑科学的女大学生,喜欢刷qq,贴吧,知乎和小红书"
|
||||
|
||||
|
||||
@@ -23,65 +23,96 @@ class ResponseGenerator:
|
||||
def __init__(self):
|
||||
self.model_r1 = LLM_request(model=global_config.llm_reasoning, temperature=0.7)
|
||||
self.model_v3 = LLM_request(model=global_config.llm_normal, temperature=0.7)
|
||||
self.model_r1_distill = LLM_request(model=global_config.llm_reasoning_minor, temperature=0.7)
|
||||
self.model_r1_distill = LLM_request(
|
||||
model=global_config.llm_reasoning_minor, temperature=0.7
|
||||
)
|
||||
self.db = Database.get_instance()
|
||||
self.current_model_type = 'r1' # 默认使用 R1
|
||||
self.current_model_type = "r1" # 默认使用 R1
|
||||
|
||||
async def generate_response(self, message: Message) -> Optional[Union[str, List[str]]]:
|
||||
async def generate_response(
|
||||
self, message: Message
|
||||
) -> Optional[Union[str, List[str]]]:
|
||||
"""根据当前模型类型选择对应的生成函数"""
|
||||
# 从global_config中获取模型概率值并选择模型
|
||||
rand = random.random()
|
||||
if rand < global_config.MODEL_R1_PROBABILITY:
|
||||
self.current_model_type = 'r1'
|
||||
self.current_model_type = "r1"
|
||||
current_model = self.model_r1
|
||||
elif rand < global_config.MODEL_R1_PROBABILITY + global_config.MODEL_V3_PROBABILITY:
|
||||
self.current_model_type = 'v3'
|
||||
elif (
|
||||
rand
|
||||
< global_config.MODEL_R1_PROBABILITY + global_config.MODEL_V3_PROBABILITY
|
||||
):
|
||||
self.current_model_type = "v3"
|
||||
current_model = self.model_v3
|
||||
else:
|
||||
self.current_model_type = 'r1_distill'
|
||||
self.current_model_type = "r1_distill"
|
||||
current_model = self.model_r1_distill
|
||||
|
||||
print(f"+++++++++++++++++{global_config.BOT_NICKNAME}{self.current_model_type}思考中+++++++++++++++++")
|
||||
print(
|
||||
f"+++++++++++++++++{global_config.BOT_NICKNAME}{self.current_model_type}思考中+++++++++++++++++"
|
||||
)
|
||||
|
||||
model_response = await self._generate_response_with_model(message, current_model)
|
||||
model_response = await self._generate_response_with_model(
|
||||
message, current_model
|
||||
)
|
||||
|
||||
if model_response:
|
||||
print(f'{global_config.BOT_NICKNAME}的回复是:{model_response}')
|
||||
print(f"{global_config.BOT_NICKNAME}的回复是:{model_response}")
|
||||
model_response, emotion = await self._process_response(model_response)
|
||||
if model_response:
|
||||
print(f"为 '{model_response}' 获取到的情感标签为:{emotion}")
|
||||
valuedict={
|
||||
'happy':0.5,'angry':-1,'sad':-0.5,'surprised':0.5,'disgusted':-1.5,'fearful':-0.25,'neutral':0.25
|
||||
valuedict = {
|
||||
"happy": 0.5,
|
||||
"angry": -1,
|
||||
"sad": -0.5,
|
||||
"surprised": 0.5,
|
||||
"disgusted": -1.5,
|
||||
"fearful": -0.25,
|
||||
"neutral": 0.25,
|
||||
}
|
||||
await relationship_manager.update_relationship_value(message.user_id, relationship_value=valuedict[emotion[0]])
|
||||
await relationship_manager.update_relationship_value(
|
||||
message.user_id, relationship_value=valuedict[emotion[0]]
|
||||
)
|
||||
|
||||
return model_response, emotion
|
||||
return None, []
|
||||
|
||||
async def _generate_response_with_model(self, message: Message, model: LLM_request) -> Optional[str]:
|
||||
async def _generate_response_with_model(
|
||||
self, message: Message, model: LLM_request
|
||||
) -> Optional[str]:
|
||||
"""使用指定的模型生成回复"""
|
||||
sender_name = message.user_nickname or f"用户{message.user_id}"
|
||||
if message.user_cardname:
|
||||
sender_name=f"[({message.user_id}){message.user_nickname}]{message.user_cardname}"
|
||||
sender_name = (
|
||||
f"[({message.user_id}){message.user_nickname}]{message.user_cardname}"
|
||||
)
|
||||
|
||||
# 获取关系值
|
||||
relationship_value = relationship_manager.get_relationship(message.user_id).relationship_value if relationship_manager.get_relationship(message.user_id) else 0.0
|
||||
relationship_value = (
|
||||
relationship_manager.get_relationship(message.user_id).relationship_value
|
||||
if relationship_manager.get_relationship(message.user_id)
|
||||
else 0.0
|
||||
)
|
||||
if relationship_value != 0.0:
|
||||
print(f"\033[1;32m[关系管理]\033[0m 回复中_当前关系值: {relationship_value}")
|
||||
print(
|
||||
f"\033[1;32m[关系管理]\033[0m 回复中_当前关系值: {relationship_value}"
|
||||
)
|
||||
|
||||
# 构建prompt
|
||||
prompt, prompt_check = prompt_builder._build_prompt(
|
||||
message_txt=message.processed_plain_text,
|
||||
sender_name=sender_name,
|
||||
relationship_value=relationship_value,
|
||||
group_id=message.group_id
|
||||
group_id=message.group_id,
|
||||
)
|
||||
|
||||
# 读空气模块
|
||||
if global_config.enable_kuuki_read:
|
||||
content_check, reasoning_content_check = await self.model_v3.generate_response(prompt_check)
|
||||
content_check, reasoning_content_check = (
|
||||
await self.model_v3.generate_response(prompt_check)
|
||||
)
|
||||
print(f"\033[1;32m[读空气]\033[0m 读空气结果为{content_check}")
|
||||
if 'yes' not in content_check.lower() and random.random() < 0.3:
|
||||
if "yes" not in content_check.lower() and random.random() < 0.3:
|
||||
self._save_to_db(
|
||||
message=message,
|
||||
sender_name=sender_name,
|
||||
@@ -90,7 +121,7 @@ class ResponseGenerator:
|
||||
content="",
|
||||
content_check=content_check,
|
||||
reasoning_content="",
|
||||
reasoning_content_check=reasoning_content_check
|
||||
reasoning_content_check=reasoning_content_check,
|
||||
)
|
||||
return None
|
||||
|
||||
@@ -106,36 +137,49 @@ class ResponseGenerator:
|
||||
content=content,
|
||||
content_check=content_check if global_config.enable_kuuki_read else "",
|
||||
reasoning_content=reasoning_content,
|
||||
reasoning_content_check=reasoning_content_check if global_config.enable_kuuki_read else ""
|
||||
reasoning_content_check=(
|
||||
reasoning_content_check if global_config.enable_kuuki_read else ""
|
||||
),
|
||||
)
|
||||
|
||||
return content
|
||||
|
||||
def _save_to_db(self, message: Message, sender_name: str, prompt: str, prompt_check: str,
|
||||
content: str, content_check: str, reasoning_content: str, reasoning_content_check: str):
|
||||
def _save_to_db(
|
||||
self,
|
||||
message: Message,
|
||||
sender_name: str,
|
||||
prompt: str,
|
||||
prompt_check: str,
|
||||
content: str,
|
||||
content_check: str,
|
||||
reasoning_content: str,
|
||||
reasoning_content_check: str,
|
||||
):
|
||||
"""保存对话记录到数据库"""
|
||||
self.db.db.reasoning_logs.insert_one({
|
||||
'time': time.time(),
|
||||
'group_id': message.group_id,
|
||||
'user': sender_name,
|
||||
'message': message.processed_plain_text,
|
||||
'model': self.current_model_type,
|
||||
'reasoning_check': reasoning_content_check,
|
||||
'response_check': content_check,
|
||||
'reasoning': reasoning_content,
|
||||
'response': content,
|
||||
'prompt': prompt,
|
||||
'prompt_check': prompt_check
|
||||
})
|
||||
self.db.db.reasoning_logs.insert_one(
|
||||
{
|
||||
"time": time.time(),
|
||||
"group_id": message.group_id,
|
||||
"user": sender_name,
|
||||
"message": message.processed_plain_text,
|
||||
"model": self.current_model_type,
|
||||
"reasoning_check": reasoning_content_check,
|
||||
"response_check": content_check,
|
||||
"reasoning": reasoning_content,
|
||||
"response": content,
|
||||
"prompt": prompt,
|
||||
"prompt_check": prompt_check,
|
||||
}
|
||||
)
|
||||
|
||||
async def _get_emotion_tags(self, content: str) -> List[str]:
|
||||
"""提取情感标签"""
|
||||
try:
|
||||
prompt = f'''请从以下内容中,从"happy,angry,sad,surprised,disgusted,fearful,neutral"中选出最匹配的1个情感标签并输出
|
||||
prompt = f"""请从以下内容中,从"happy,angry,sad,surprised,disgusted,fearful,neutral"中选出最匹配的1个情感标签并输出
|
||||
只输出标签就好,不要输出其他内容:
|
||||
内容:{content}
|
||||
输出:
|
||||
'''
|
||||
"""
|
||||
|
||||
content, _ = await self.model_v3.generate_response(prompt)
|
||||
return [content.strip()] if content else ["neutral"]
|
||||
@@ -153,3 +197,41 @@ class ResponseGenerator:
|
||||
processed_response = process_llm_response(content)
|
||||
|
||||
return processed_response, emotion_tags
|
||||
|
||||
|
||||
class InitiativeMessageGenerate:
|
||||
def __init__(self):
|
||||
self.db = Database.get_instance()
|
||||
self.model_r1 = LLM_request(model=global_config.llm_reasoning, temperature=0.7)
|
||||
self.model_v3 = LLM_request(model=global_config.llm_normal, temperature=0.7)
|
||||
self.model_r1_distill = LLM_request(
|
||||
model=global_config.llm_reasoning_minor, temperature=0.7
|
||||
)
|
||||
|
||||
def gen_response(self, message: Message):
|
||||
topic_select_prompt, dots_for_select, prompt_template = (
|
||||
prompt_builder._build_initiative_prompt_select(message.group_id)
|
||||
)
|
||||
content_select, reasoning = self.model_v3.generate_response(topic_select_prompt)
|
||||
print(f"[DEBUG] {content_select} {reasoning}")
|
||||
topics_list = [dot[0] for dot in dots_for_select]
|
||||
if content_select:
|
||||
if content_select in topics_list:
|
||||
select_dot = dots_for_select[topics_list.index(content_select)]
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
prompt_check, memory = prompt_builder._build_initiative_prompt_check(
|
||||
select_dot[1], prompt_template
|
||||
)
|
||||
content_check, reasoning_check = self.model_v3.generate_response(prompt_check)
|
||||
print(f"[DEBUG] {content_check} {reasoning_check}")
|
||||
if "yes" not in content_check.lower():
|
||||
return None
|
||||
prompt = prompt_builder._build_initiative_prompt(
|
||||
select_dot, prompt_template, memory
|
||||
)
|
||||
content, reasoning = self.model_r1.generate_response(prompt)
|
||||
print(f"[DEBUG] {content} {reasoning}")
|
||||
return content
|
||||
|
||||
@@ -151,12 +151,11 @@ class PromptBuilder:
|
||||
prompt_personality = ''
|
||||
personality_choice = random.random()
|
||||
if personality_choice < 4/6: # 第一种人格
|
||||
prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},{personality[0]},{promt_info_prompt},
|
||||
prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},{personality[0]}, 你正在浏览qq群,{promt_info_prompt},
|
||||
现在请你给出日常且口语化的回复,平淡一些,尽量简短一些。{is_bot_prompt}
|
||||
请注意把握群里的聊天内容,不要刻意突出自身学科背景,不要回复的太有条理,可以有个性。'''
|
||||
elif personality_choice < 1: # 第二种人格
|
||||
prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},{personality[1]},{promt_info_prompt},
|
||||
|
||||
prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},{personality[1]}, 你正在浏览qq群,{promt_info_prompt},
|
||||
现在请你给出日常且口语化的回复,请表现你自己的见解,不要一昧迎合,尽量简短一些。{is_bot_prompt}
|
||||
请你表达自己的见解和观点。可以有个性。'''
|
||||
|
||||
@@ -196,14 +195,66 @@ class PromptBuilder:
|
||||
prompt_personality_check = ''
|
||||
extra_check_info=f"请注意把握群里的聊天内容的基础上,综合群内的氛围,例如,和{global_config.BOT_NICKNAME}相关的话题要积极回复,如果是at自己的消息一定要回复,如果自己正在和别人聊天一定要回复,其他话题如果合适搭话也可以回复,如果认为应该回复请输出yes,否则输出no,请注意是决定是否需要回复,而不是编写回复内容,除了yes和no不要输出任何回复内容。"
|
||||
if personality_choice < 4/6: # 第一种人格
|
||||
prompt_personality_check = f'''你的网名叫{global_config.BOT_NICKNAME},{personality[0]},{promt_info_prompt} {activate_prompt_check} {extra_check_info}'''
|
||||
prompt_personality_check = f'''你的网名叫{global_config.BOT_NICKNAME},{personality[0]}, 你正在浏览qq群,{promt_info_prompt} {activate_prompt_check} {extra_check_info}'''
|
||||
elif personality_choice < 1: # 第二种人格
|
||||
prompt_personality_check = f'''你的网名叫{global_config.BOT_NICKNAME},{personality[1]},{promt_info_prompt} {activate_prompt_check} {extra_check_info}'''
|
||||
prompt_personality_check = f'''你的网名叫{global_config.BOT_NICKNAME},{personality[1]}, 你正在浏览qq群,{promt_info_prompt} {activate_prompt_check} {extra_check_info}'''
|
||||
|
||||
prompt_check_if_response=f"{prompt_info}\n{prompt_date}\n{chat_talking_prompt}\n{prompt_personality_check}"
|
||||
|
||||
return prompt,prompt_check_if_response
|
||||
|
||||
def _build_initiative_prompt_select(self,group_id):
|
||||
current_date = time.strftime("%Y-%m-%d", time.localtime())
|
||||
current_time = time.strftime("%H:%M:%S", time.localtime())
|
||||
bot_schedule_now_time,bot_schedule_now_activity = bot_schedule.get_current_task()
|
||||
prompt_date = f'''今天是{current_date},现在是{current_time},你今天的日程是:\n{bot_schedule.today_schedule}\n你现在正在{bot_schedule_now_activity}\n'''
|
||||
|
||||
chat_talking_prompt = ''
|
||||
if group_id:
|
||||
chat_talking_prompt = get_recent_group_detailed_plain_text(self.db, group_id, limit=global_config.MAX_CONTEXT_SIZE,combine = True)
|
||||
|
||||
chat_talking_prompt = f"以下是群里正在聊天的内容:\n{chat_talking_prompt}"
|
||||
# print(f"\033[1;34m[调试]\033[0m 已从数据库获取群 {group_id} 的消息记录:{chat_talking_prompt}")
|
||||
|
||||
# 获取主动发言的话题
|
||||
all_nodes=memory_graph.dots
|
||||
all_nodes=filter(lambda dot:len(dot[1]['memory_items'])>3,all_nodes)
|
||||
nodes_for_select=random.sample(all_nodes,5)
|
||||
topics=[info[0] for info in nodes_for_select]
|
||||
infos=[info[1] for info in nodes_for_select]
|
||||
|
||||
#激活prompt构建
|
||||
activate_prompt = ''
|
||||
activate_prompt = f"以上是群里正在进行的聊天。"
|
||||
personality=global_config.PROMPT_PERSONALITY
|
||||
prompt_personality = ''
|
||||
personality_choice = random.random()
|
||||
if personality_choice < 4/6: # 第一种人格
|
||||
prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},{personality[0]}'''
|
||||
elif personality_choice < 1: # 第二种人格
|
||||
prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},{personality[1]}'''
|
||||
|
||||
topics_str=','.join(f"\"{topics}\"")
|
||||
prompt_for_select=f"你现在想在群里发言,回忆了一下,想到几个话题,分别是{topics_str},综合当前状态以及群内气氛,请你在其中选择一个合适的话题,注意只需要输出话题,除了话题什么也不要输出(双引号也不要输出)"
|
||||
|
||||
prompt_initiative_select=f"{prompt_date}\n{prompt_personality}\n{prompt_for_select}"
|
||||
prompt_regular=f"{prompt_date}\n{prompt_personality}"
|
||||
|
||||
return prompt_initiative_select,nodes_for_select,prompt_regular
|
||||
|
||||
def _build_initiative_prompt_check(self,selected_node,prompt_regular):
|
||||
memory=random.sample(selected_node['memory_items'],3)
|
||||
prompt_for_check=f"{prompt_regular}你现在想在群里发言,回忆了一下,想到一个话题,是\"{selected_node['concept']}\",关于这个话题的记忆有\n{'\n'.join(memory)}\n,以这个作为主题发言合适吗?\
|
||||
请在把握群里的聊天内容的基础上,综合群内的氛围,如果认为应该发言请输出yes,否则输出no,请注意是决定是否需要发言,而不是编写回复内容,除了yes和no不要输出任何回复内容。"
|
||||
return prompt_for_check,memory
|
||||
|
||||
def _build_initiative_prompt(self,selected_node,prompt_regular,memory):
|
||||
prompt_for_initiative=f"{prompt_regular}你现在想在群里发言,回忆了一下,想到一个话题,是\"{selected_node['concept']}\",关于这个话题的记忆有\n{'\n'.join(memory)}\n,\
|
||||
请在把握群里的聊天内容的基础上,综合群内的氛围,以日常且口语化的口吻,简短且随意一点进行发言,不要说的太有条理,可以有个性。记住不要输出多余内容(包括前后缀,冒号和引号,\
|
||||
括号,表情等)"
|
||||
return prompt_for_initiative
|
||||
|
||||
|
||||
def get_prompt_info(self,message:str,threshold:float):
|
||||
related_info = ''
|
||||
if len(message) > 10:
|
||||
|
||||
@@ -8,27 +8,27 @@ from .config import global_config
|
||||
driver = get_driver()
|
||||
config = driver.config
|
||||
|
||||
|
||||
class TopicIdentifier:
|
||||
def __init__(self):
|
||||
self.client = OpenAI(
|
||||
api_key=config.siliconflow_key,
|
||||
base_url=config.siliconflow_base_url
|
||||
api_key=config.siliconflow_key, base_url=config.siliconflow_base_url
|
||||
)
|
||||
|
||||
def identify_topic_llm(self, text: str) -> Optional[str]:
|
||||
"""识别消息主题"""
|
||||
|
||||
prompt = f"""判断这条消息的主题,如果没有明显主题请回复"无主题",要求:
|
||||
1. 主题通常2-4个字,必须简短,要求精准概括,不要太具体。
|
||||
2. 建议给出多个主题,之间用英文逗号分割。只输出主题本身就好,不要有前后缀。
|
||||
|
||||
消息内容:{text}"""
|
||||
prompt = f"""判断这条消息的主题,如果没有明显主题请回复"无主题",要求:\
|
||||
1. 主题通常2-4个字,必须简短,要求精准概括,不要太具体。\
|
||||
2. 建议给出多个主题,之间用英文逗号分割。只输出主题本身就好,不要有前后缀。\
|
||||
3. 这里是
|
||||
消息内容:{text}"""
|
||||
|
||||
response = self.client.chat.completions.create(
|
||||
model=global_config.SILICONFLOW_MODEL_V3,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=0.8,
|
||||
max_tokens=10
|
||||
max_tokens=10,
|
||||
)
|
||||
|
||||
if not response or not response.choices:
|
||||
@@ -36,7 +36,11 @@ class TopicIdentifier:
|
||||
return None
|
||||
|
||||
# 从 OpenAI API 响应中获取第一个选项的消息内容,并去除首尾空白字符
|
||||
topic = response.choices[0].message.content.strip() if response.choices[0].message.content else None
|
||||
topic = (
|
||||
response.choices[0].message.content.strip()
|
||||
if response.choices[0].message.content
|
||||
else None
|
||||
)
|
||||
|
||||
if topic == "无主题":
|
||||
return None
|
||||
@@ -45,7 +49,6 @@ class TopicIdentifier:
|
||||
split_topic = self.parse_topic(topic)
|
||||
return split_topic
|
||||
|
||||
|
||||
def parse_topic(self, topic: str) -> List[str]:
|
||||
"""解析主题,返回主题列表"""
|
||||
if not topic or topic == "无主题":
|
||||
@@ -57,32 +60,212 @@ class TopicIdentifier:
|
||||
words = jieba.lcut(text)
|
||||
# 去除停用词和标点符号
|
||||
stop_words = {
|
||||
'的', '了', '和', '是', '就', '都', '而', '及', '与', '这', '那', '但', '然', '却',
|
||||
'因为', '所以', '如果', '虽然', '一个', '我', '你', '他', '她', '它', '我们', '你们',
|
||||
'他们', '在', '有', '个', '把', '被', '让', '给', '从', '向', '到', '又', '也', '很',
|
||||
'啊', '吧', '呢', '吗', '呀', '哦', '哈', '么', '嘛', '啦', '哎', '唉', '哇', '嗯',
|
||||
'哼', '哪', '什么', '怎么', '为什么', '怎样', '如何', '什么样', '这样', '那样', '这么',
|
||||
'那么', '多少', '几', '谁', '哪里', '哪儿', '什么时候', '何时', '为何', '怎么办',
|
||||
'怎么样', '这些', '那些', '一些', '一点', '一下', '一直', '一定', '一般', '一样',
|
||||
'一会儿', '一边', '一起',
|
||||
"的",
|
||||
"了",
|
||||
"和",
|
||||
"是",
|
||||
"就",
|
||||
"都",
|
||||
"而",
|
||||
"及",
|
||||
"与",
|
||||
"这",
|
||||
"那",
|
||||
"但",
|
||||
"然",
|
||||
"却",
|
||||
"因为",
|
||||
"所以",
|
||||
"如果",
|
||||
"虽然",
|
||||
"一个",
|
||||
"我",
|
||||
"你",
|
||||
"他",
|
||||
"她",
|
||||
"它",
|
||||
"我们",
|
||||
"你们",
|
||||
"他们",
|
||||
"在",
|
||||
"有",
|
||||
"个",
|
||||
"把",
|
||||
"被",
|
||||
"让",
|
||||
"给",
|
||||
"从",
|
||||
"向",
|
||||
"到",
|
||||
"又",
|
||||
"也",
|
||||
"很",
|
||||
"啊",
|
||||
"吧",
|
||||
"呢",
|
||||
"吗",
|
||||
"呀",
|
||||
"哦",
|
||||
"哈",
|
||||
"么",
|
||||
"嘛",
|
||||
"啦",
|
||||
"哎",
|
||||
"唉",
|
||||
"哇",
|
||||
"嗯",
|
||||
"哼",
|
||||
"哪",
|
||||
"什么",
|
||||
"怎么",
|
||||
"为什么",
|
||||
"怎样",
|
||||
"如何",
|
||||
"什么样",
|
||||
"这样",
|
||||
"那样",
|
||||
"这么",
|
||||
"那么",
|
||||
"多少",
|
||||
"几",
|
||||
"谁",
|
||||
"哪里",
|
||||
"哪儿",
|
||||
"什么时候",
|
||||
"何时",
|
||||
"为何",
|
||||
"怎么办",
|
||||
"怎么样",
|
||||
"这些",
|
||||
"那些",
|
||||
"一些",
|
||||
"一点",
|
||||
"一下",
|
||||
"一直",
|
||||
"一定",
|
||||
"一般",
|
||||
"一样",
|
||||
"一会儿",
|
||||
"一边",
|
||||
"一起",
|
||||
# 添加更多量词
|
||||
'个', '只', '条', '张', '片', '块', '本', '册', '页', '幅', '面', '篇', '份',
|
||||
'朵', '颗', '粒', '座', '幢', '栋', '间', '层', '家', '户', '位', '名', '群',
|
||||
'双', '对', '打', '副', '套', '批', '组', '串', '包', '箱', '袋', '瓶', '罐',
|
||||
"个",
|
||||
"只",
|
||||
"条",
|
||||
"张",
|
||||
"片",
|
||||
"块",
|
||||
"本",
|
||||
"册",
|
||||
"页",
|
||||
"幅",
|
||||
"面",
|
||||
"篇",
|
||||
"份",
|
||||
"朵",
|
||||
"颗",
|
||||
"粒",
|
||||
"座",
|
||||
"幢",
|
||||
"栋",
|
||||
"间",
|
||||
"层",
|
||||
"家",
|
||||
"户",
|
||||
"位",
|
||||
"名",
|
||||
"群",
|
||||
"双",
|
||||
"对",
|
||||
"打",
|
||||
"副",
|
||||
"套",
|
||||
"批",
|
||||
"组",
|
||||
"串",
|
||||
"包",
|
||||
"箱",
|
||||
"袋",
|
||||
"瓶",
|
||||
"罐",
|
||||
# 添加更多介词
|
||||
'按', '按照', '把', '被', '比', '比如', '除', '除了', '当', '对', '对于',
|
||||
'根据', '关于', '跟', '和', '将', '经', '经过', '靠', '连', '论', '通过',
|
||||
'同', '往', '为', '为了', '围绕', '于', '由', '由于', '与', '在', '沿', '沿着',
|
||||
'依', '依照', '以', '因', '因为', '用', '由', '与', '自', '自从'
|
||||
"按",
|
||||
"按照",
|
||||
"把",
|
||||
"被",
|
||||
"比",
|
||||
"比如",
|
||||
"除",
|
||||
"除了",
|
||||
"当",
|
||||
"对",
|
||||
"对于",
|
||||
"根据",
|
||||
"关于",
|
||||
"跟",
|
||||
"和",
|
||||
"将",
|
||||
"经",
|
||||
"经过",
|
||||
"靠",
|
||||
"连",
|
||||
"论",
|
||||
"通过",
|
||||
"同",
|
||||
"往",
|
||||
"为",
|
||||
"为了",
|
||||
"围绕",
|
||||
"于",
|
||||
"由",
|
||||
"由于",
|
||||
"与",
|
||||
"在",
|
||||
"沿",
|
||||
"沿着",
|
||||
"依",
|
||||
"依照",
|
||||
"以",
|
||||
"因",
|
||||
"因为",
|
||||
"用",
|
||||
"由",
|
||||
"与",
|
||||
"自",
|
||||
"自从",
|
||||
}
|
||||
|
||||
# 过滤掉停用词和标点符号,只保留名词和动词
|
||||
filtered_words = []
|
||||
for word in words:
|
||||
if word not in stop_words and not word.strip() in {
|
||||
'。', ',', '、', ':', ';', '!', '?', '"', '"', ''', ''',
|
||||
'(', ')', '【', '】', '《', '》', '…', '—', '·', '、', '~',
|
||||
'~', '+', '=', '-','[',']'
|
||||
"。",
|
||||
",",
|
||||
"、",
|
||||
":",
|
||||
";",
|
||||
"!",
|
||||
"?",
|
||||
'"',
|
||||
'"',
|
||||
""", """,
|
||||
"(",
|
||||
")",
|
||||
"【",
|
||||
"】",
|
||||
"《",
|
||||
"》",
|
||||
"…",
|
||||
"—",
|
||||
"·",
|
||||
"、",
|
||||
"~",
|
||||
"~",
|
||||
"+",
|
||||
"=",
|
||||
"-",
|
||||
"[",
|
||||
"]",
|
||||
}:
|
||||
filtered_words.append(word)
|
||||
|
||||
@@ -97,4 +280,5 @@ class TopicIdentifier:
|
||||
|
||||
return top_words if top_words else None
|
||||
|
||||
|
||||
topic_identifier = TopicIdentifier()
|
||||
Reference in New Issue
Block a user