This commit is contained in:
晴猫
2025-05-01 16:06:13 +09:00
2 changed files with 58 additions and 7 deletions

51
scripts/run_lpmm.sh Normal file
View File

@@ -0,0 +1,51 @@
#!/bin/bash
# ==============================================
# Environment Initialization
# ==============================================
# Step 1: Locate project root directory
SCRIPTS_DIR="scripts"
SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
PROJECT_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
# Step 2: Verify scripts directory exists
if [ ! -d "$PROJECT_ROOT/$SCRIPTS_DIR" ]; then
echo "❌ Error: scripts directory not found in project root" >&2
echo "Current path: $PROJECT_ROOT" >&2
exit 1
fi
# Step 3: Set up Python environment
export PYTHONPATH="$PROJECT_ROOT:$PYTHONPATH"
cd "$PROJECT_ROOT" || {
echo "❌ Failed to cd to project root: $PROJECT_ROOT" >&2
exit 1
}
# Debug info
echo "============================"
echo "Project Root: $PROJECT_ROOT"
echo "Python Path: $PYTHONPATH"
echo "Working Dir: $(pwd)"
echo "============================"
# ==============================================
# Python Script Execution
# ==============================================
run_python_script() {
local script_name=$1
echo "🔄 Running $script_name"
if ! python3 "$SCRIPTS_DIR/$script_name"; then
echo "$script_name failed" >&2
exit 1
fi
}
# Execute scripts in order
run_python_script "raw_data_preprocessor.py"
run_python_script "info_extraction.py"
run_python_script "import_openie.py"
echo "✅ All scripts completed successfully"

View File

@@ -30,7 +30,7 @@ def get_raw_msg_by_timestamp(
filter_query = {"time": {"$gt": timestamp_start, "$lt": timestamp_end}} filter_query = {"time": {"$gt": timestamp_start, "$lt": timestamp_end}}
# 只有当 limit 为 0 时才应用外部 sort # 只有当 limit 为 0 时才应用外部 sort
sort_order = [("time", 1)] if limit == 0 else None sort_order = [("time", 1)] if limit == 0 else None
return find_messages(filter=filter_query, sort=sort_order, limit=limit, limit_mode=limit_mode) return find_messages(message_filter=filter_query, sort=sort_order, limit=limit, limit_mode=limit_mode)
def get_raw_msg_by_timestamp_with_chat( def get_raw_msg_by_timestamp_with_chat(
@@ -44,7 +44,7 @@ def get_raw_msg_by_timestamp_with_chat(
# 只有当 limit 为 0 时才应用外部 sort # 只有当 limit 为 0 时才应用外部 sort
sort_order = [("time", 1)] if limit == 0 else None sort_order = [("time", 1)] if limit == 0 else None
# 直接将 limit_mode 传递给 find_messages # 直接将 limit_mode 传递给 find_messages
return find_messages(filter=filter_query, sort=sort_order, limit=limit, limit_mode=limit_mode) return find_messages(message_filter=filter_query, sort=sort_order, limit=limit, limit_mode=limit_mode)
def get_raw_msg_by_timestamp_with_chat_users( def get_raw_msg_by_timestamp_with_chat_users(
@@ -66,7 +66,7 @@ def get_raw_msg_by_timestamp_with_chat_users(
} }
# 只有当 limit 为 0 时才应用外部 sort # 只有当 limit 为 0 时才应用外部 sort
sort_order = [("time", 1)] if limit == 0 else None sort_order = [("time", 1)] if limit == 0 else None
return find_messages(filter=filter_query, sort=sort_order, limit=limit, limit_mode=limit_mode) return find_messages(message_filter=filter_query, sort=sort_order, limit=limit, limit_mode=limit_mode)
def get_raw_msg_by_timestamp_with_users( def get_raw_msg_by_timestamp_with_users(
@@ -79,7 +79,7 @@ def get_raw_msg_by_timestamp_with_users(
filter_query = {"time": {"$gt": timestamp_start, "$lt": timestamp_end}, "user_id": {"$in": person_ids}} filter_query = {"time": {"$gt": timestamp_start, "$lt": timestamp_end}, "user_id": {"$in": person_ids}}
# 只有当 limit 为 0 时才应用外部 sort # 只有当 limit 为 0 时才应用外部 sort
sort_order = [("time", 1)] if limit == 0 else None sort_order = [("time", 1)] if limit == 0 else None
return find_messages(filter=filter_query, sort=sort_order, limit=limit, limit_mode=limit_mode) return find_messages(message_filter=filter_query, sort=sort_order, limit=limit, limit_mode=limit_mode)
def get_raw_msg_before_timestamp(timestamp: float, limit: int = 0) -> List[Dict[str, Any]]: def get_raw_msg_before_timestamp(timestamp: float, limit: int = 0) -> List[Dict[str, Any]]:
@@ -88,7 +88,7 @@ def get_raw_msg_before_timestamp(timestamp: float, limit: int = 0) -> List[Dict[
""" """
filter_query = {"time": {"$lt": timestamp}} filter_query = {"time": {"$lt": timestamp}}
sort_order = [("time", 1)] sort_order = [("time", 1)]
return find_messages(filter=filter_query, sort=sort_order, limit=limit) return find_messages(message_filter=filter_query, sort=sort_order, limit=limit)
def get_raw_msg_before_timestamp_with_chat(chat_id: str, timestamp: float, limit: int = 0) -> List[Dict[str, Any]]: def get_raw_msg_before_timestamp_with_chat(chat_id: str, timestamp: float, limit: int = 0) -> List[Dict[str, Any]]:
@@ -97,7 +97,7 @@ def get_raw_msg_before_timestamp_with_chat(chat_id: str, timestamp: float, limit
""" """
filter_query = {"chat_id": chat_id, "time": {"$lt": timestamp}} filter_query = {"chat_id": chat_id, "time": {"$lt": timestamp}}
sort_order = [("time", 1)] sort_order = [("time", 1)]
return find_messages(filter=filter_query, sort=sort_order, limit=limit) return find_messages(message_filter=filter_query, sort=sort_order, limit=limit)
def get_raw_msg_before_timestamp_with_users(timestamp: float, person_ids: list, limit: int = 0) -> List[Dict[str, Any]]: def get_raw_msg_before_timestamp_with_users(timestamp: float, person_ids: list, limit: int = 0) -> List[Dict[str, Any]]:
@@ -106,7 +106,7 @@ def get_raw_msg_before_timestamp_with_users(timestamp: float, person_ids: list,
""" """
filter_query = {"time": {"$lt": timestamp}, "user_id": {"$in": person_ids}} filter_query = {"time": {"$lt": timestamp}, "user_id": {"$in": person_ids}}
sort_order = [("time", 1)] sort_order = [("time", 1)]
return find_messages(filter=filter_query, sort=sort_order, limit=limit) return find_messages(message_filter=filter_query, sort=sort_order, limit=limit)
def num_new_messages_since(chat_id: str, timestamp_start: float = 0.0, timestamp_end: float = None) -> int: def num_new_messages_since(chat_id: str, timestamp_start: float = 0.0, timestamp_end: float = None) -> int: