Merge afc branch into dev, prioritizing afc changes and migrating database async modifications from dev

This commit is contained in:
Windpicker-owo
2025-09-27 23:37:40 +08:00
138 changed files with 12183 additions and 5968 deletions

View File

@@ -145,9 +145,9 @@ class LLMUsageRecorder:
LLM使用情况记录器SQLAlchemy版本
"""
@staticmethod
async def record_usage_to_database(
model_info: ModelInfo,
self,
model_info: ModelInfo,
model_usage: UsageRecord,
user_id: str,
request_type: str,
@@ -161,7 +161,7 @@ class LLMUsageRecorder:
session = None
try:
# 使用 SQLAlchemy 会话创建记录
async with get_db_session() as session:
with get_db_session() as session:
usage_record = LLMUsage(
model_name=model_info.model_identifier,
model_assign_name=model_info.name,
@@ -179,7 +179,7 @@ class LLMUsageRecorder:
)
session.add(usage_record)
await session.commit()
session.commit()
logger.debug(
f"Token使用情况 - 模型: {model_usage.model_name}, "

View File

@@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
"""
@desc: 该模块封装了与大语言模型LLM交互的所有核心逻辑。
它被设计为一个高度容错和可扩展的系统,包含以下主要组件:
@@ -892,7 +891,7 @@ class LLMRequest:
max_tokens=self.model_for_task.max_tokens if max_tokens is None else max_tokens,
)
self._record_usage(model_info, response.usage, time.time() - start_time, "/chat/completions")
await self._record_usage(model_info, response.usage, time.time() - start_time, "/chat/completions")
if not response.content and not response.tool_calls:
if raise_when_empty:
@@ -917,14 +916,14 @@ class LLMRequest:
embedding_input=embedding_input
)
self._record_usage(model_info, response.usage, time.time() - start_time, "/embeddings")
await self._record_usage(model_info, response.usage, time.time() - start_time, "/embeddings")
if not response.embedding:
raise RuntimeError("获取embedding失败")
return response.embedding, model_info.name
def _record_usage(self, model_info: ModelInfo, usage: Optional[UsageRecord], time_cost: float, endpoint: str):
async def _record_usage(self, model_info: ModelInfo, usage: Optional[UsageRecord], time_cost: float, endpoint: str):
"""
记录模型使用情况。