fix: dos in annotation import (#29470)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-12-25 01:00:42 -05:00 · 2025-12-15 15:22:04 +08:00
parent 714b443077
commit 724cd57dbf
9 changed files with 643 additions and 13 deletions
--- a/api/.env.example
+++ b/api/.env.example
@@ -670,3 +670,14 @@ SINGLE_CHUNK_ATTACHMENT_LIMIT=10
 ATTACHMENT_IMAGE_FILE_SIZE_LIMIT=2
 ATTACHMENT_IMAGE_DOWNLOAD_TIMEOUT=60
 IMAGE_FILE_BATCH_LIMIT=10
 # Maximum allowed CSV file size for annotation import in megabytes
 ANNOTATION_IMPORT_FILE_SIZE_LIMIT=2
 #Maximum number of annotation records allowed in a single import
 ANNOTATION_IMPORT_MAX_RECORDS=10000
 # Minimum number of annotation records required in a single import
 ANNOTATION_IMPORT_MIN_RECORDS=1
 ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE=5
 ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR=20
 # Maximum number of concurrent annotation import tasks per tenant
 ANNOTATION_IMPORT_MAX_CONCURRENT=5
--- a/api/configs/feature/init.py
+++ b/api/configs/feature/init.py
@@ -380,6 +380,37 @@ class FileUploadConfig(BaseSettings):
        default=60,
    )
    # Annotation Import Security Configurations
    ANNOTATION_IMPORT_FILE_SIZE_LIMIT: NonNegativeInt = Field(
        description="Maximum allowed CSV file size for annotation import in megabytes",
        default=2,
    )
    ANNOTATION_IMPORT_MAX_RECORDS: PositiveInt = Field(
        description="Maximum number of annotation records allowed in a single import",
        default=10000,
    )
    ANNOTATION_IMPORT_MIN_RECORDS: PositiveInt = Field(
        description="Minimum number of annotation records required in a single import",
        default=1,
    )
    ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE: PositiveInt = Field(
        description="Maximum number of annotation import requests per minute per tenant",
        default=5,
    )
    ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR: PositiveInt = Field(
        description="Maximum number of annotation import requests per hour per tenant",
        default=20,
    )
    ANNOTATION_IMPORT_MAX_CONCURRENT: PositiveInt = Field(
        description="Maximum number of concurrent annotation import tasks per tenant",
        default=2,
    )
    inner_UPLOAD_FILE_EXTENSION_BLACKLIST: str = Field(
        description=(
            "Comma-separated list of file extensions that are blocked from upload. "
--- a/api/controllers/console/app/annotation.py
+++ b/api/controllers/console/app/annotation.py
@@ -1,6 +1,6 @@
 from typing import Any, Literal
-from flask import request
+from flask import abort, request
 from flask_restx import Resource, fields, marshal, marshal_with
 from pydantic import BaseModel, Field, field_validator
@@ -8,6 +8,8 @@ from controllers.common.errors import NoFileUploadedError, TooManyFilesError
 from controllers.console import console_ns
 from controllers.console.wraps import (
    account_initialization_required,
    annotation_import_concurrency_limit,
    annotation_import_rate_limit,
    cloud_edition_billing_resource_check,
    edit_permission_required,
    setup_required,
@@ -314,18 +316,25 @@ class AnnotationUpdateDeleteApi(Resource):
@console_ns.route("/apps/<uuid:app_id>/annotations/batch-import")
 class AnnotationBatchImportApi(Resource):
    @console_ns.doc("batch_import_annotations")
-    @console_ns.doc(description="Batch import annotations from CSV file")
+    @console_ns.doc(description="Batch import annotations from CSV file with rate limiting and security checks")
    @console_ns.doc(params={"app_id": "Application ID"})
    @console_ns.response(200, "Batch import started successfully")
    @console_ns.response(403, "Insufficient permissions")
    @console_ns.response(400, "No file uploaded or too many files")
    @console_ns.response(413, "File too large")
    @console_ns.response(429, "Too many requests or concurrent imports")
    @setup_required
    @login_required
    @account_initialization_required
    @cloud_edition_billing_resource_check("annotation")
    @annotation_import_rate_limit
    @annotation_import_concurrency_limit
    @edit_permission_required
    def post(self, app_id):
        from configs import dify_config
        app_id = str(app_id)
        # check file
        if "file" not in request.files:
            raise NoFileUploadedError()
@@ -335,9 +344,27 @@ class AnnotationBatchImportApi(Resource):
        # get file from request
        file = request.files["file"]
        # check file type
        if not file.filename or not file.filename.lower().endswith(".csv"):
            raise ValueError("Invalid file type. Only CSV files are allowed")
        # Check file size before processing
        file.seek(0, 2)  # Seek to end of file
        file_size = file.tell()
        file.seek(0)  # Reset to beginning
        max_size_bytes = dify_config.ANNOTATION_IMPORT_FILE_SIZE_LIMIT * 1024 * 1024
        if file_size > max_size_bytes:
            abort(
                413,
                f"File size exceeds maximum limit of {dify_config.ANNOTATION_IMPORT_FILE_SIZE_LIMIT}MB. "
                f"Please reduce the file size and try again.",
            )
        if file_size == 0:
            raise ValueError("The uploaded file is empty")
        return AppAnnotationService.batch_import_app_annotations(app_id, file)
--- a/api/controllers/console/wraps.py
+++ b/api/controllers/console/wraps.py
@@ -331,3 +331,91 @@ def is_admin_or_owner_required(f: Callable[P, R]):
        return f(*args, **kwargs)
    return decorated_function
 def annotation_import_rate_limit(view: Callable[P, R]):
    """
    Rate limiting decorator for annotation import operations.
    Implements sliding window rate limiting with two tiers:
    - Short-term: Configurable requests per minute (default: 5)
    - Long-term: Configurable requests per hour (default: 20)
    Uses Redis ZSET for distributed rate limiting across multiple instances.
    """
    @wraps(view)
    def decorated(*args: P.args, **kwargs: P.kwargs):
        _, current_tenant_id = current_account_with_tenant()
        current_time = int(time.time() * 1000)
        # Check per-minute rate limit
        minute_key = f"annotation_import_rate_limit:{current_tenant_id}:1min"
        redis_client.zadd(minute_key, {current_time: current_time})
        redis_client.zremrangebyscore(minute_key, 0, current_time - 60000)
        minute_count = redis_client.zcard(minute_key)
        redis_client.expire(minute_key, 120)  # 2 minutes TTL
        if minute_count > dify_config.ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE:
            abort(
                429,
                f"Too many annotation import requests. Maximum {dify_config.ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE} "
                f"requests per minute allowed. Please try again later.",
            )
        # Check per-hour rate limit
        hour_key = f"annotation_import_rate_limit:{current_tenant_id}:1hour"
        redis_client.zadd(hour_key, {current_time: current_time})
        redis_client.zremrangebyscore(hour_key, 0, current_time - 3600000)
        hour_count = redis_client.zcard(hour_key)
        redis_client.expire(hour_key, 7200)  # 2 hours TTL
        if hour_count > dify_config.ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR:
            abort(
                429,
                f"Too many annotation import requests. Maximum {dify_config.ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR} "
                f"requests per hour allowed. Please try again later.",
            )
        return view(*args, **kwargs)
    return decorated
 def annotation_import_concurrency_limit(view: Callable[P, R]):
    """
    Concurrency control decorator for annotation import operations.
    Limits the number of concurrent import tasks per tenant to prevent
    resource exhaustion and ensure fair resource allocation.
    Uses Redis ZSET to track active import jobs with automatic cleanup
    of stale entries (jobs older than 2 minutes).
    """
    @wraps(view)
    def decorated(*args: P.args, **kwargs: P.kwargs):
        _, current_tenant_id = current_account_with_tenant()
        current_time = int(time.time() * 1000)
        active_jobs_key = f"annotation_import_active:{current_tenant_id}"
        # Clean up stale entries (jobs that should have completed or timed out)
        stale_threshold = current_time - 120000  # 2 minutes ago
        redis_client.zremrangebyscore(active_jobs_key, 0, stale_threshold)
        # Check current active job count
        active_count = redis_client.zcard(active_jobs_key)
        if active_count >= dify_config.ANNOTATION_IMPORT_MAX_CONCURRENT:
            abort(
                429,
                f"Too many concurrent import tasks. Maximum {dify_config.ANNOTATION_IMPORT_MAX_CONCURRENT} "
                f"concurrent imports allowed per workspace. Please wait for existing imports to complete.",
            )
        # Allow the request to proceed
        # The actual job registration will happen in the service layer
        return view(*args, **kwargs)
    return decorated
--- a/api/services/annotation_service.py
+++ b/api/services/annotation_service.py
@@ -1,6 +1,9 @@
 import logging
 import uuid
 import pandas as pd
 logger = logging.getLogger(__name__)
 from sqlalchemy import or_, select
 from werkzeug.datastructures import FileStorage
 from werkzeug.exceptions import NotFound
@@ -330,6 +333,18 @@ class AppAnnotationService:
    @classmethod
    def batch_import_app_annotations(cls, app_id, file: FileStorage):
        """
        Batch import annotations from CSV file with enhanced security checks.
        Security features:
        - File size validation
        - Row count limits (min/max)
        - Memory-efficient CSV parsing
        - Subscription quota validation
        - Concurrency tracking
        """
        from configs import dify_config
        # get app info
        current_user, current_tenant_id = current_account_with_tenant()
        app = (
@@ -341,16 +356,80 @@ class AppAnnotationService:
        if not app:
            raise NotFound("App not found")
        job_id: str | None = None  # Initialize to avoid unbound variable error
        try:
-            # Skip the first row
+            # Quick row count check before full parsing (memory efficient)
-            df = pd.read_csv(file.stream, dtype=str)
+            # Read only first chunk to estimate row count
-            result = []
+            file.stream.seek(0)
-            for _, row in df.iterrows():
+            first_chunk = file.stream.read(8192)  # Read first 8KB
-                content = {"question": row.iloc[0], "answer": row.iloc[1]}
+            file.stream.seek(0)
            # Estimate row count from first chunk
            newline_count = first_chunk.count(b"\n")
            if newline_count == 0:
                raise ValueError("The CSV file appears to be empty or invalid.")
            # Parse CSV with row limit to prevent memory exhaustion
            # Use chunksize for memory-efficient processing
            max_records = dify_config.ANNOTATION_IMPORT_MAX_RECORDS
            min_records = dify_config.ANNOTATION_IMPORT_MIN_RECORDS
            # Read CSV in chunks to avoid loading entire file into memory
            df = pd.read_csv(
                file.stream,
                dtype=str,
                nrows=max_records + 1,  # Read one extra to detect overflow
                engine="python",
                on_bad_lines="skip",  # Skip malformed lines instead of crashing
            )
            # Validate column count
            if len(df.columns) < 2:
                raise ValueError("Invalid CSV format. The file must contain at least 2 columns (question and answer).")
            # Build result list with validation
            result: list[dict] = []
            for idx, row in df.iterrows():
                # Stop if we exceed the limit
                if len(result) >= max_records:
                    raise ValueError(
                        f"The CSV file contains too many records. Maximum {max_records} records allowed per import. "
                        f"Please split your file into smaller batches."
                    )
                # Extract and validate question and answer
                try:
                    question_raw = row.iloc[0]
                    answer_raw = row.iloc[1]
                except (IndexError, KeyError):
                    continue  # Skip malformed rows
                # Convert to string and strip whitespace
                question = str(question_raw).strip() if question_raw is not None else ""
                answer = str(answer_raw).strip() if answer_raw is not None else ""
                # Skip empty entries or NaN values
                if not question or not answer or question.lower() == "nan" or answer.lower() == "nan":
                    continue
                # Validate length constraints (idx is pandas index, convert to int for display)
                row_num = int(idx) + 2 if isinstance(idx, (int, float)) else len(result) + 2
                if len(question) > 2000:
                    raise ValueError(f"Question at row {row_num} is too long. Maximum 2000 characters allowed.")
                if len(answer) > 10000:
                    raise ValueError(f"Answer at row {row_num} is too long. Maximum 10000 characters allowed.")
                content = {"question": question, "answer": answer}
                result.append(content)
-            if len(result) == 0:
+
-                raise ValueError("The CSV file is empty.")
+            # Validate minimum records
-            # check annotation limit
+            if len(result) < min_records:
                raise ValueError(
                    f"The CSV file must contain at least {min_records} valid annotation record(s). "
                    f"Found {len(result)} valid record(s)."
                )
            # Check annotation quota limit
            features = FeatureService.get_features(current_tenant_id)
            if features.billing.enabled:
                annotation_quota_limit = features.annotation_quota_limit
@@ -359,12 +438,34 @@ class AppAnnotationService:
            # async job
            job_id = str(uuid.uuid4())
            indexing_cache_key = f"app_annotation_batch_import_{str(job_id)}"
-            # send batch add segments task
+
            # Register job in active tasks list for concurrency tracking
            current_time = int(naive_utc_now().timestamp() * 1000)
            active_jobs_key = f"annotation_import_active:{current_tenant_id}"
            redis_client.zadd(active_jobs_key, {job_id: current_time})
            redis_client.expire(active_jobs_key, 7200)  # 2 hours TTL
            # Set job status
            redis_client.setnx(indexing_cache_key, "waiting")
            batch_import_annotations_task.delay(str(job_id), result, app_id, current_tenant_id, current_user.id)
-        except Exception as e:
+
        except ValueError as e:
            return {"error_msg": str(e)}
-        return {"job_id": job_id, "job_status": "waiting"}
+        except Exception as e:
            # Clean up active job registration on error (only if job was created)
            if job_id is not None:
                try:
                    active_jobs_key = f"annotation_import_active:{current_tenant_id}"
                    redis_client.zrem(active_jobs_key, job_id)
                except Exception:
                    # Silently ignore cleanup errors - the job will be auto-expired
                    logger.debug("Failed to clean up active job tracking during error handling")
            # Check if it's a CSV parsing error
            error_str = str(e)
            return {"error_msg": f"An error occurred while processing the file: {error_str}"}
        return {"job_id": job_id, "job_status": "waiting", "record_count": len(result)}
    @classmethod
    def get_annotation_hit_histories(cls, app_id: str, annotation_id: str, page, limit):
--- a/api/tasks/annotation/batch_import_annotations_task.py
+++ b/api/tasks/annotation/batch_import_annotations_task.py
@@ -30,6 +30,8 @@ def batch_import_annotations_task(job_id: str, content_list: list[dict], app_id:
    logger.info(click.style(f"Start batch import annotation: {job_id}", fg="green"))
    start_at = time.perf_counter()
    indexing_cache_key = f"app_annotation_batch_import_{str(job_id)}"
    active_jobs_key = f"annotation_import_active:{tenant_id}"
    # get app info
    app = db.session.query(App).where(App.id == app_id, App.tenant_id == tenant_id, App.status == "normal").first()
@@ -91,4 +93,13 @@ def batch_import_annotations_task(job_id: str, content_list: list[dict], app_id:
            redis_client.setex(indexing_error_msg_key, 600, str(e))
            logger.exception("Build index for batch import annotations failed")
        finally:
            # Clean up active job tracking to release concurrency slot
            try:
                redis_client.zrem(active_jobs_key, job_id)
                logger.debug("Released concurrency slot for job: %s", job_id)
            except Exception as cleanup_error:
                # Log but don't fail if cleanup fails - the job will be auto-expired
                logger.warning("Failed to clean up active job tracking for %s: %s", job_id, cleanup_error)
            # Close database session
            db.session.close()
--- a/api/tests/unit_tests/controllers/console/app/test_annotation_security.py
+++ b/api/tests/unit_tests/controllers/console/app/test_annotation_security.py
@@ -0,0 +1,344 @@
 """
 Unit tests for annotation import security features.
 Tests rate limiting, concurrency control, file validation, and other
 security features added to prevent DoS attacks on the annotation import endpoint.
 """
 import io
 from unittest.mock import MagicMock, patch
 import pytest
 from werkzeug.datastructures import FileStorage
 from configs import dify_config
 class TestAnnotationImportRateLimiting:
    """Test rate limiting for annotation import operations."""
    @pytest.fixture
    def mock_redis(self):
        """Mock Redis client for testing."""
        with patch("controllers.console.wraps.redis_client") as mock:
            yield mock
    @pytest.fixture
    def mock_current_account(self):
        """Mock current account with tenant."""
        with patch("controllers.console.wraps.current_account_with_tenant") as mock:
            mock.return_value = (MagicMock(id="user_id"), "test_tenant_id")
            yield mock
    def test_rate_limit_per_minute_enforced(self, mock_redis, mock_current_account):
        """Test that per-minute rate limit is enforced."""
        from controllers.console.wraps import annotation_import_rate_limit
        # Simulate exceeding per-minute limit
        mock_redis.zcard.side_effect = [
            dify_config.ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE + 1,  # Minute check
            10,  # Hour check
        ]
        @annotation_import_rate_limit
        def dummy_view():
            return "success"
        # Should abort with 429
        with pytest.raises(Exception) as exc_info:
            dummy_view()
        # Verify it's a rate limit error
        assert "429" in str(exc_info.value) or "Too many" in str(exc_info.value)
    def test_rate_limit_per_hour_enforced(self, mock_redis, mock_current_account):
        """Test that per-hour rate limit is enforced."""
        from controllers.console.wraps import annotation_import_rate_limit
        # Simulate exceeding per-hour limit
        mock_redis.zcard.side_effect = [
            3,  # Minute check (under limit)
            dify_config.ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR + 1,  # Hour check (over limit)
        ]
        @annotation_import_rate_limit
        def dummy_view():
            return "success"
        # Should abort with 429
        with pytest.raises(Exception) as exc_info:
            dummy_view()
        assert "429" in str(exc_info.value) or "Too many" in str(exc_info.value)
    def test_rate_limit_within_limits_passes(self, mock_redis, mock_current_account):
        """Test that requests within limits are allowed."""
        from controllers.console.wraps import annotation_import_rate_limit
        # Simulate being under both limits
        mock_redis.zcard.return_value = 2
        @annotation_import_rate_limit
        def dummy_view():
            return "success"
        # Should succeed
        result = dummy_view()
        assert result == "success"
        # Verify Redis operations were called
        assert mock_redis.zadd.called
        assert mock_redis.zremrangebyscore.called
 class TestAnnotationImportConcurrencyControl:
    """Test concurrency control for annotation import operations."""
    @pytest.fixture
    def mock_redis(self):
        """Mock Redis client for testing."""
        with patch("controllers.console.wraps.redis_client") as mock:
            yield mock
    @pytest.fixture
    def mock_current_account(self):
        """Mock current account with tenant."""
        with patch("controllers.console.wraps.current_account_with_tenant") as mock:
            mock.return_value = (MagicMock(id="user_id"), "test_tenant_id")
            yield mock
    def test_concurrency_limit_enforced(self, mock_redis, mock_current_account):
        """Test that concurrent task limit is enforced."""
        from controllers.console.wraps import annotation_import_concurrency_limit
        # Simulate max concurrent tasks already running
        mock_redis.zcard.return_value = dify_config.ANNOTATION_IMPORT_MAX_CONCURRENT
        @annotation_import_concurrency_limit
        def dummy_view():
            return "success"
        # Should abort with 429
        with pytest.raises(Exception) as exc_info:
            dummy_view()
        assert "429" in str(exc_info.value) or "concurrent" in str(exc_info.value).lower()
    def test_concurrency_within_limit_passes(self, mock_redis, mock_current_account):
        """Test that requests within concurrency limits are allowed."""
        from controllers.console.wraps import annotation_import_concurrency_limit
        # Simulate being under concurrent task limit
        mock_redis.zcard.return_value = 1
        @annotation_import_concurrency_limit
        def dummy_view():
            return "success"
        # Should succeed
        result = dummy_view()
        assert result == "success"
    def test_stale_jobs_are_cleaned_up(self, mock_redis, mock_current_account):
        """Test that old/stale job entries are removed."""
        from controllers.console.wraps import annotation_import_concurrency_limit
        mock_redis.zcard.return_value = 0
        @annotation_import_concurrency_limit
        def dummy_view():
            return "success"
        dummy_view()
        # Verify cleanup was called
        assert mock_redis.zremrangebyscore.called
 class TestAnnotationImportFileValidation:
    """Test file validation in annotation import."""
    def test_file_size_limit_enforced(self):
        """Test that files exceeding size limit are rejected."""
        # Create a file larger than the limit
        max_size = dify_config.ANNOTATION_IMPORT_FILE_SIZE_LIMIT * 1024 * 1024
        large_content = b"x" * (max_size + 1024)  # Exceed by 1KB
        file = FileStorage(stream=io.BytesIO(large_content), filename="test.csv", content_type="text/csv")
        # Should be rejected in controller
        # This would be tested in integration tests with actual endpoint
    def test_empty_file_rejected(self):
        """Test that empty files are rejected."""
        file = FileStorage(stream=io.BytesIO(b""), filename="test.csv", content_type="text/csv")
        # Should be rejected
        # This would be tested in integration tests
    def test_non_csv_file_rejected(self):
        """Test that non-CSV files are rejected."""
        file = FileStorage(stream=io.BytesIO(b"test"), filename="test.txt", content_type="text/plain")
        # Should be rejected based on extension
        # This would be tested in integration tests
 class TestAnnotationImportServiceValidation:
    """Test service layer validation for annotation import."""
    @pytest.fixture
    def mock_app(self):
        """Mock application object."""
        app = MagicMock()
        app.id = "app_id"
        return app
    @pytest.fixture
    def mock_db_session(self):
        """Mock database session."""
        with patch("services.annotation_service.db.session") as mock:
            yield mock
    def test_max_records_limit_enforced(self, mock_app, mock_db_session):
        """Test that files with too many records are rejected."""
        from services.annotation_service import AppAnnotationService
        # Create CSV with too many records
        max_records = dify_config.ANNOTATION_IMPORT_MAX_RECORDS
        csv_content = "question,answer\n"
        for i in range(max_records + 100):
            csv_content += f"Question {i},Answer {i}\n"
        file = FileStorage(stream=io.BytesIO(csv_content.encode()), filename="test.csv", content_type="text/csv")
        mock_db_session.query.return_value.where.return_value.first.return_value = mock_app
        with patch("services.annotation_service.current_account_with_tenant") as mock_auth:
            mock_auth.return_value = (MagicMock(id="user_id"), "tenant_id")
            with patch("services.annotation_service.FeatureService") as mock_features:
                mock_features.get_features.return_value.billing.enabled = False
                result = AppAnnotationService.batch_import_app_annotations("app_id", file)
                # Should return error about too many records
                assert "error_msg" in result
                assert "too many" in result["error_msg"].lower() or "maximum" in result["error_msg"].lower()
    def test_min_records_limit_enforced(self, mock_app, mock_db_session):
        """Test that files with too few valid records are rejected."""
        from services.annotation_service import AppAnnotationService
        # Create CSV with only header (no data rows)
        csv_content = "question,answer\n"
        file = FileStorage(stream=io.BytesIO(csv_content.encode()), filename="test.csv", content_type="text/csv")
        mock_db_session.query.return_value.where.return_value.first.return_value = mock_app
        with patch("services.annotation_service.current_account_with_tenant") as mock_auth:
            mock_auth.return_value = (MagicMock(id="user_id"), "tenant_id")
            result = AppAnnotationService.batch_import_app_annotations("app_id", file)
            # Should return error about insufficient records
            assert "error_msg" in result
            assert "at least" in result["error_msg"].lower() or "minimum" in result["error_msg"].lower()
    def test_invalid_csv_format_handled(self, mock_app, mock_db_session):
        """Test that invalid CSV format is handled gracefully."""
        from services.annotation_service import AppAnnotationService
        # Create invalid CSV content
        csv_content = 'invalid,csv,format\nwith,unbalanced,quotes,and"stuff'
        file = FileStorage(stream=io.BytesIO(csv_content.encode()), filename="test.csv", content_type="text/csv")
        mock_db_session.query.return_value.where.return_value.first.return_value = mock_app
        with patch("services.annotation_service.current_account_with_tenant") as mock_auth:
            mock_auth.return_value = (MagicMock(id="user_id"), "tenant_id")
            result = AppAnnotationService.batch_import_app_annotations("app_id", file)
            # Should return error message
            assert "error_msg" in result
    def test_valid_import_succeeds(self, mock_app, mock_db_session):
        """Test that valid import request succeeds."""
        from services.annotation_service import AppAnnotationService
        # Create valid CSV
        csv_content = "question,answer\nWhat is AI?,Artificial Intelligence\nWhat is ML?,Machine Learning\n"
        file = FileStorage(stream=io.BytesIO(csv_content.encode()), filename="test.csv", content_type="text/csv")
        mock_db_session.query.return_value.where.return_value.first.return_value = mock_app
        with patch("services.annotation_service.current_account_with_tenant") as mock_auth:
            mock_auth.return_value = (MagicMock(id="user_id"), "tenant_id")
            with patch("services.annotation_service.FeatureService") as mock_features:
                mock_features.get_features.return_value.billing.enabled = False
                with patch("services.annotation_service.batch_import_annotations_task") as mock_task:
                    with patch("services.annotation_service.redis_client"):
                        result = AppAnnotationService.batch_import_app_annotations("app_id", file)
                        # Should return success response
                        assert "job_id" in result
                        assert "job_status" in result
                        assert result["job_status"] == "waiting"
                        assert "record_count" in result
                        assert result["record_count"] == 2
 class TestAnnotationImportTaskOptimization:
    """Test optimizations in batch import task."""
    def test_task_has_timeout_configured(self):
        """Test that task has proper timeout configuration."""
        from tasks.annotation.batch_import_annotations_task import batch_import_annotations_task
        # Verify task configuration
        assert hasattr(batch_import_annotations_task, "time_limit")
        assert hasattr(batch_import_annotations_task, "soft_time_limit")
        # Check timeout values are reasonable
        # Hard limit should be 6 minutes (360s)
        # Soft limit should be 5 minutes (300s)
        # Note: actual values depend on Celery configuration
 class TestConfigurationValues:
    """Test that security configuration values are properly set."""
    def test_rate_limit_configs_exist(self):
        """Test that rate limit configurations are defined."""
        assert hasattr(dify_config, "ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE")
        assert hasattr(dify_config, "ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR")
        assert dify_config.ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE > 0
        assert dify_config.ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR > 0
    def test_file_size_limit_config_exists(self):
        """Test that file size limit configuration is defined."""
        assert hasattr(dify_config, "ANNOTATION_IMPORT_FILE_SIZE_LIMIT")
        assert dify_config.ANNOTATION_IMPORT_FILE_SIZE_LIMIT > 0
        assert dify_config.ANNOTATION_IMPORT_FILE_SIZE_LIMIT <= 10  # Reasonable max (10MB)
    def test_record_limit_configs_exist(self):
        """Test that record limit configurations are defined."""
        assert hasattr(dify_config, "ANNOTATION_IMPORT_MAX_RECORDS")
        assert hasattr(dify_config, "ANNOTATION_IMPORT_MIN_RECORDS")
        assert dify_config.ANNOTATION_IMPORT_MAX_RECORDS > 0
        assert dify_config.ANNOTATION_IMPORT_MIN_RECORDS > 0
        assert dify_config.ANNOTATION_IMPORT_MIN_RECORDS < dify_config.ANNOTATION_IMPORT_MAX_RECORDS
    def test_concurrency_limit_config_exists(self):
        """Test that concurrency limit configuration is defined."""
        assert hasattr(dify_config, "ANNOTATION_IMPORT_MAX_CONCURRENT")
        assert dify_config.ANNOTATION_IMPORT_MAX_CONCURRENT > 0
        assert dify_config.ANNOTATION_IMPORT_MAX_CONCURRENT <= 10  # Reasonable upper bound
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -1448,5 +1448,16 @@ WORKFLOW_SCHEDULE_MAX_DISPATCH_PER_TICK=0
 # Tenant isolated task queue configuration
 TENANT_ISOLATED_TASK_CONCURRENCY=1
 # Maximum allowed CSV file size for annotation import in megabytes
 ANNOTATION_IMPORT_FILE_SIZE_LIMIT=2
 #Maximum number of annotation records allowed in a single import
 ANNOTATION_IMPORT_MAX_RECORDS=10000
 # Minimum number of annotation records required in a single import
 ANNOTATION_IMPORT_MIN_RECORDS=1
 ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE=5
 ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR=20
 # Maximum number of concurrent annotation import tasks per tenant
 ANNOTATION_IMPORT_MAX_CONCURRENT=5
 # The API key of amplitude
 AMPLITUDE_API_KEY=
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@@ -648,6 +648,12 @@ x-shared-env: &shared-api-worker-env
  WORKFLOW_SCHEDULE_POLLER_BATCH_SIZE: ${WORKFLOW_SCHEDULE_POLLER_BATCH_SIZE:-100}
  WORKFLOW_SCHEDULE_MAX_DISPATCH_PER_TICK: ${WORKFLOW_SCHEDULE_MAX_DISPATCH_PER_TICK:-0}
  TENANT_ISOLATED_TASK_CONCURRENCY: ${TENANT_ISOLATED_TASK_CONCURRENCY:-1}
  ANNOTATION_IMPORT_FILE_SIZE_LIMIT: ${ANNOTATION_IMPORT_FILE_SIZE_LIMIT:-2}
  ANNOTATION_IMPORT_MAX_RECORDS: ${ANNOTATION_IMPORT_MAX_RECORDS:-10000}
  ANNOTATION_IMPORT_MIN_RECORDS: ${ANNOTATION_IMPORT_MIN_RECORDS:-1}
  ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE: ${ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE:-5}
  ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR: ${ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR:-20}
  ANNOTATION_IMPORT_MAX_CONCURRENT: ${ANNOTATION_IMPORT_MAX_CONCURRENT:-5}
  AMPLITUDE_API_KEY: ${AMPLITUDE_API_KEY:-}
 services: