Merge afc branch into dev, prioritizing afc changes and migrating database async modifications from dev

2025-09-27 23:37:40 +08:00
parent f9fbfe319f 80d34f3130
commit ff24bd8148
138 changed files with 12183 additions and 5968 deletions
--- a/src/llm_models/utils.py
+++ b/src/llm_models/utils.py
@@ -145,9 +145,9 @@ class LLMUsageRecorder:
    LLM使用情况记录器（SQLAlchemy版本）
    """

-    @staticmethod
    async def record_usage_to_database(
-            model_info: ModelInfo,
+        self,
+        model_info: ModelInfo,
        model_usage: UsageRecord,
        user_id: str,
        request_type: str,
@@ -161,7 +161,7 @@ class LLMUsageRecorder:
        session = None
        try:
            # 使用 SQLAlchemy 会话创建记录
-            async with get_db_session() as session:
+            with get_db_session() as session:
                usage_record = LLMUsage(
                    model_name=model_info.model_identifier,
                    model_assign_name=model_info.name,
@@ -179,7 +179,7 @@ class LLMUsageRecorder:
                )

                session.add(usage_record)
-                await session.commit()
+                session.commit()

            logger.debug(
                f"Token使用情况 - 模型: {model_usage.model_name}, "
--- a/src/llm_models/utils_model.py
+++ b/src/llm_models/utils_model.py
@@ -1,5 +1,4 @@
 # -*- coding: utf-8 -*-
-# -*- coding: utf-8 -*-
 """
@desc: 该模块封装了与大语言模型（LLM）交互的所有核心逻辑。
 它被设计为一个高度容错和可扩展的系统，包含以下主要组件：
@@ -892,7 +891,7 @@ class LLMRequest:
            max_tokens=self.model_for_task.max_tokens if max_tokens is None else max_tokens,
        )

-        self._record_usage(model_info, response.usage, time.time() - start_time, "/chat/completions")
+        await self._record_usage(model_info, response.usage, time.time() - start_time, "/chat/completions")

        if not response.content and not response.tool_calls:
            if raise_when_empty:
@@ -917,14 +916,14 @@ class LLMRequest:
            embedding_input=embedding_input
        )
        
-        self._record_usage(model_info, response.usage, time.time() - start_time, "/embeddings")
+        await self._record_usage(model_info, response.usage, time.time() - start_time, "/embeddings")
        
        if not response.embedding:
            raise RuntimeError("获取embedding失败")
        
        return response.embedding, model_info.name

-    def _record_usage(self, model_info: ModelInfo, usage: Optional[UsageRecord], time_cost: float, endpoint: str):
+    async def _record_usage(self, model_info: ModelInfo, usage: Optional[UsageRecord], time_cost: float, endpoint: str):
        """
        记录模型使用情况。