通过以下改动修复嵌入生成过程中的事件循环相关问题: - 在 EmbeddingStore._get_embedding 中,改为同步创建-使用-销毁的新事件循环模式,彻底避免嵌套事件循环问题 - 调整批量嵌入 _get_embeddings_batch_threaded,确保每个线程使用独立、短生命周期的事件循环 - 新增 force_new 参数,LLM 请求嵌入任务时强制创建新的客户端实例,减少跨循环对象复用 - 在 OpenAI 客户端的 embedding 调用处补充详细日志,方便排查网络连接异常 - get_embedding() 每次都重建 LLMRequest,降低实例在多个事件循环中穿梭的概率 此次改动虽然以同步风格“硬掰”异步接口,但对现有接口零破坏,确保了向量数据库及相关知识检索功能的稳定性。(还有就是把的脚本文件夹移回来了)
51 lines
1.4 KiB
Bash
51 lines
1.4 KiB
Bash
#!/bin/bash
|
|
|
|
# ==============================================
|
|
# Environment Initialization
|
|
# ==============================================
|
|
|
|
# Step 1: Locate project root directory
|
|
SCRIPTS_DIR="scripts"
|
|
SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
|
|
PROJECT_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
|
|
|
|
# Step 2: Verify scripts directory exists
|
|
if [ ! -d "$PROJECT_ROOT/$SCRIPTS_DIR" ]; then
|
|
echo "❌ Error: scripts directory not found in project root" >&2
|
|
echo "Current path: $PROJECT_ROOT" >&2
|
|
exit 1
|
|
fi
|
|
|
|
# Step 3: Set up Python environment
|
|
export PYTHONPATH="$PROJECT_ROOT:$PYTHONPATH"
|
|
cd "$PROJECT_ROOT" || {
|
|
echo "❌ Failed to cd to project root: $PROJECT_ROOT" >&2
|
|
exit 1
|
|
}
|
|
|
|
# Debug info
|
|
echo "============================"
|
|
echo "Project Root: $PROJECT_ROOT"
|
|
echo "Python Path: $PYTHONPATH"
|
|
echo "Working Dir: $(pwd)"
|
|
echo "============================"
|
|
|
|
# ==============================================
|
|
# Python Script Execution
|
|
# ==============================================
|
|
|
|
run_python_script() {
|
|
local script_name=$1
|
|
echo "🔄 Running $script_name"
|
|
if ! python3 "$SCRIPTS_DIR/$script_name"; then
|
|
echo "❌ $script_name failed" >&2
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
# Execute scripts in order
|
|
run_python_script "raw_data_preprocessor.py"
|
|
run_python_script "info_extraction.py"
|
|
run_python_script "import_openie.py"
|
|
|
|
echo "✅ All scripts completed successfully" |