diff --git a/src/llm_models/utils.py b/src/llm_models/utils.py index ee20533ee..34949e968 100644 --- a/src/llm_models/utils.py +++ b/src/llm_models/utils.py @@ -145,7 +145,7 @@ class LLMUsageRecorder: LLM使用情况记录器(SQLAlchemy版本) """ - def record_usage_to_database( + async def record_usage_to_database( self, model_info: ModelInfo, model_usage: UsageRecord, diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py index 3ea94c1e1..cf2a7cb1c 100644 --- a/src/llm_models/utils_model.py +++ b/src/llm_models/utils_model.py @@ -891,7 +891,7 @@ class LLMRequest: max_tokens=self.model_for_task.max_tokens if max_tokens is None else max_tokens, ) - self._record_usage(model_info, response.usage, time.time() - start_time, "/chat/completions") + await self._record_usage(model_info, response.usage, time.time() - start_time, "/chat/completions") if not response.content and not response.tool_calls: if raise_when_empty: @@ -916,14 +916,14 @@ class LLMRequest: embedding_input=embedding_input ) - self._record_usage(model_info, response.usage, time.time() - start_time, "/embeddings") + await self._record_usage(model_info, response.usage, time.time() - start_time, "/embeddings") if not response.embedding: raise RuntimeError("获取embedding失败") return response.embedding, model_info.name - def _record_usage(self, model_info: ModelInfo, usage: Optional[UsageRecord], time_cost: float, endpoint: str): + async def _record_usage(self, model_info: ModelInfo, usage: Optional[UsageRecord], time_cost: float, endpoint: str): """ 记录模型使用情况。