diff --git a/src/chat/express/expression_selector.py b/src/chat/express/expression_selector.py
index 83fdc128f..3f848e43f 100644
--- a/src/chat/express/expression_selector.py
+++ b/src/chat/express/expression_selector.py
@@ -36,11 +36,7 @@ def init_prompt():
 请以JSON格式输出，只需要输出选中的情境编号：
 例如：
 {{
-    "selected_situations": [2, 3, 5, 7, 19, 22, 25, 38, 39, 45, 48 , 64]
-}}
-例如：
-{{
-    "selected_situations": [1, 4, 7, 9, 23, 38, 44]
+    "selected_situations": [2, 3, 5, 7, 19, 22, 25, 38, 39, 45, 48, 64]
 }}
 
 请严格按照JSON格式输出，不要包含其他内容：
@@ -214,7 +210,7 @@ class ExpressionSelector:
         """使用LLM选择适合的表达方式"""
 
         # 1. 获取35个随机表达方式（现在按权重抽取）
-        style_exprs, grammar_exprs = self.get_random_expressions(chat_id, 50, 0.5, 0.5)
+        style_exprs, grammar_exprs = self.get_random_expressions(chat_id, 30, 0.5, 0.5)
 
         # 2. 构建所有表达方式的索引和情境列表
         all_expressions = []
@@ -264,7 +260,10 @@ class ExpressionSelector:
 
         # 4. 调用LLM
         try:
+            
+            start_time = time.time()
             content, (reasoning_content, model_name, _) = await self.llm_model.generate_response_async(prompt=prompt)
+            logger.info(f"LLM请求时间: {model_name}  {time.time() - start_time} \n{prompt}")
 
             # logger.info(f"模型名称: {model_name}")
             # logger.info(f"LLM返回结果: {content}")
diff --git a/src/chat/memory_system/Hippocampus.py b/src/chat/memory_system/Hippocampus.py
index fe3c25625..9e4005b97 100644
--- a/src/chat/memory_system/Hippocampus.py
+++ b/src/chat/memory_system/Hippocampus.py
@@ -190,7 +190,7 @@ class MemoryGraph:
 class Hippocampus:
     def __init__(self):
         self.memory_graph = MemoryGraph()
-        self.model_summary: LLMRequest = None  # type: ignore
+        self.model_small: LLMRequest = None  # type: ignore
         self.entorhinal_cortex: EntorhinalCortex = None  # type: ignore
         self.parahippocampal_gyrus: ParahippocampalGyrus = None  # type: ignore
 
@@ -200,7 +200,7 @@ class Hippocampus:
         self.parahippocampal_gyrus = ParahippocampalGyrus(self)
         # 从数据库加载记忆图
         self.entorhinal_cortex.sync_memory_from_db()
-        self.model_summary = LLMRequest(model_set=model_config.model_task_config.memory, request_type="memory.builder")
+        self.model_small = LLMRequest(model_set=model_config.model_task_config.utils_small, request_type="memory.small")
 
     def get_all_node_names(self) -> list:
         """获取记忆图中所有节点的名字列表"""
@@ -340,7 +340,7 @@ class Hippocampus:
         else:
             topic_num = 5  # 51+字符: 5个关键词 (其余长文本)
 
-        topics_response, _ = await self.model_summary.generate_response_async(self.find_topic_llm(text, topic_num))
+        topics_response, _ = await self.model_small.generate_response_async(self.find_topic_llm(text, topic_num))
 
         # 提取关键词
         keywords = re.findall(r"<([^>]+)>", topics_response)
diff --git a/src/chat/memory_system/instant_memory.py b/src/chat/memory_system/instant_memory.py
index a702a87ed..a6be80ef1 100644
--- a/src/chat/memory_system/instant_memory.py
+++ b/src/chat/memory_system/instant_memory.py
@@ -38,7 +38,7 @@ class InstantMemory:
         self.chat_id = chat_id
         self.last_view_time = time.time()
         self.summary_model = LLMRequest(
-            model_set=model_config.model_task_config.memory,
+            model_set=model_config.model_task_config.utils,
             request_type="memory.summary",
         )
 
diff --git a/src/config/api_ada_configs.py b/src/config/api_ada_configs.py
index 9692aced3..0292f7238 100644
--- a/src/config/api_ada_configs.py
+++ b/src/config/api_ada_configs.py
@@ -105,9 +105,6 @@ class ModelTaskConfig(ConfigBase):
     replyer_2: TaskConfig
     """normal_chat次要回复模型配置"""
 
-    memory: TaskConfig
-    """记忆模型配置"""
-
     emotion: TaskConfig
     """情绪模型配置"""
 
diff --git a/src/llm_models/model_client/openai_client.py b/src/llm_models/model_client/openai_client.py
index ad9cbf177..6fbf02467 100644
--- a/src/llm_models/model_client/openai_client.py
+++ b/src/llm_models/model_client/openai_client.py
@@ -1,6 +1,7 @@
 import asyncio
 import io
 import json
+import time
 import re
 import base64
 from collections.abc import Iterable
@@ -452,6 +453,7 @@ class OpenaiClient(BaseClient):
                 resp, usage_record = await stream_response_handler(req_task.result(), interrupt_flag)
             else:
                 # 发送请求并获取响应
+                # start_time = time.time()
                 req_task = asyncio.create_task(
                     self.client.chat.completions.create(
                         model=model_info.model_identifier,
@@ -469,7 +471,9 @@ class OpenaiClient(BaseClient):
                         # 如果中断量存在且被设置，则取消任务并抛出异常
                         req_task.cancel()
                         raise ReqAbortException("请求被外部信号中断")
-                    await asyncio.sleep(0.5)  # 等待0.5秒后再次检查任务&中断信号量状态
+                    await asyncio.sleep(0.1)  # 等待0.5秒后再次检查任务&中断信号量状态
+                
+                # logger.info(f"OpenAI请求时间: {model_info.model_identifier}  {time.time() - start_time} \n{messages}")
 
                 resp, usage_record = async_response_parser(req_task.result())
         except APIConnectionError as e:
diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py
index b7aa0a8b8..f3668eef3 100644
--- a/src/llm_models/utils_model.py
+++ b/src/llm_models/utils_model.py
@@ -1,6 +1,7 @@
 import re
 import copy
 import asyncio
+import time
 
 from enum import Enum
 from rich.traceback import install
@@ -150,14 +151,22 @@ class LLMRequest:
             (Tuple[str, str, str, Optional[List[ToolCall]]]): 响应内容、推理内容、模型名称、工具调用列表
         """
         # 请求体构建
+        start_time = time.time()
+        
+        
+        
         message_builder = MessageBuilder()
         message_builder.add_text_content(prompt)
         messages = [message_builder.build()]
+        
         tool_built = self._build_tool_options(tools)
+        
         # 模型选择
         model_info, api_provider, client = self._select_model()
-
+        
         # 请求并处理返回值
+        logger.info(f"LLM选择耗时: {model_info.name} {time.time() - start_time}")
+        
         response = await self._execute_request(
             api_provider=api_provider,
             client=client,
@@ -168,6 +177,8 @@ class LLMRequest:
             max_tokens=max_tokens,
             tool_options=tool_built,
         )
+        
+        
         content = response.content
         reasoning_content = response.reasoning_content or ""
         tool_calls = response.tool_calls
@@ -175,6 +186,7 @@ class LLMRequest:
         if not reasoning_content and content:
             content, extracted_reasoning = self._extract_reasoning(content)
             reasoning_content = extracted_reasoning
+            
         if usage := response.usage:
             llm_usage_recorder.record_usage_to_database(
                 model_info=model_info,
@@ -183,6 +195,7 @@ class LLMRequest:
                 request_type=self.request_type,
                 endpoint="/chat/completions",
             )
+        
         if not content:
             if raise_when_empty:
                 logger.warning("生成的响应为空")
diff --git a/template/model_config_template.toml b/template/model_config_template.toml
index 3dcff6f84..77993954a 100644
--- a/template/model_config_template.toml
+++ b/template/model_config_template.toml
@@ -1,5 +1,5 @@
 [inner]
-version = "1.1.1"
+version = "1.2.0"
 
 # 配置文件版本号迭代规则同bot_config.toml
 
@@ -132,11 +132,6 @@ model_list = ["siliconflow-deepseek-v3"]
 temperature = 0.3
 max_tokens = 800
 
-[model_task_config.memory] # 记忆模型
-model_list = ["qwen3-30b"]
-temperature = 0.7
-max_tokens = 800
-
 [model_task_config.vlm] # 图像识别模型
 model_list = ["qwen2.5-vl-72b"]
 max_tokens = 800