fix: dos in annotation import (#29470)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
zyssyz123
2025-12-15 15:22:04 +08:00
committed by GitHub
parent 714b443077
commit 724cd57dbf
9 changed files with 643 additions and 13 deletions

View File

@@ -670,3 +670,14 @@ SINGLE_CHUNK_ATTACHMENT_LIMIT=10
ATTACHMENT_IMAGE_FILE_SIZE_LIMIT=2
ATTACHMENT_IMAGE_DOWNLOAD_TIMEOUT=60
IMAGE_FILE_BATCH_LIMIT=10
# Maximum allowed CSV file size for annotation import in megabytes
ANNOTATION_IMPORT_FILE_SIZE_LIMIT=2
#Maximum number of annotation records allowed in a single import
ANNOTATION_IMPORT_MAX_RECORDS=10000
# Minimum number of annotation records required in a single import
ANNOTATION_IMPORT_MIN_RECORDS=1
ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE=5
ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR=20
# Maximum number of concurrent annotation import tasks per tenant
ANNOTATION_IMPORT_MAX_CONCURRENT=5

View File

@@ -380,6 +380,37 @@ class FileUploadConfig(BaseSettings):
default=60,
)
# Annotation Import Security Configurations
ANNOTATION_IMPORT_FILE_SIZE_LIMIT: NonNegativeInt = Field(
description="Maximum allowed CSV file size for annotation import in megabytes",
default=2,
)
ANNOTATION_IMPORT_MAX_RECORDS: PositiveInt = Field(
description="Maximum number of annotation records allowed in a single import",
default=10000,
)
ANNOTATION_IMPORT_MIN_RECORDS: PositiveInt = Field(
description="Minimum number of annotation records required in a single import",
default=1,
)
ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE: PositiveInt = Field(
description="Maximum number of annotation import requests per minute per tenant",
default=5,
)
ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR: PositiveInt = Field(
description="Maximum number of annotation import requests per hour per tenant",
default=20,
)
ANNOTATION_IMPORT_MAX_CONCURRENT: PositiveInt = Field(
description="Maximum number of concurrent annotation import tasks per tenant",
default=2,
)
inner_UPLOAD_FILE_EXTENSION_BLACKLIST: str = Field(
description=(
"Comma-separated list of file extensions that are blocked from upload. "

View File

@@ -1,6 +1,6 @@
from typing import Any, Literal
from flask import request
from flask import abort, request
from flask_restx import Resource, fields, marshal, marshal_with
from pydantic import BaseModel, Field, field_validator
@@ -8,6 +8,8 @@ from controllers.common.errors import NoFileUploadedError, TooManyFilesError
from controllers.console import console_ns
from controllers.console.wraps import (
account_initialization_required,
annotation_import_concurrency_limit,
annotation_import_rate_limit,
cloud_edition_billing_resource_check,
edit_permission_required,
setup_required,
@@ -314,18 +316,25 @@ class AnnotationUpdateDeleteApi(Resource):
@console_ns.route("/apps/<uuid:app_id>/annotations/batch-import")
class AnnotationBatchImportApi(Resource):
@console_ns.doc("batch_import_annotations")
@console_ns.doc(description="Batch import annotations from CSV file")
@console_ns.doc(description="Batch import annotations from CSV file with rate limiting and security checks")
@console_ns.doc(params={"app_id": "Application ID"})
@console_ns.response(200, "Batch import started successfully")
@console_ns.response(403, "Insufficient permissions")
@console_ns.response(400, "No file uploaded or too many files")
@console_ns.response(413, "File too large")
@console_ns.response(429, "Too many requests or concurrent imports")
@setup_required
@login_required
@account_initialization_required
@cloud_edition_billing_resource_check("annotation")
@annotation_import_rate_limit
@annotation_import_concurrency_limit
@edit_permission_required
def post(self, app_id):
from configs import dify_config
app_id = str(app_id)
# check file
if "file" not in request.files:
raise NoFileUploadedError()
@@ -335,9 +344,27 @@ class AnnotationBatchImportApi(Resource):
# get file from request
file = request.files["file"]
# check file type
if not file.filename or not file.filename.lower().endswith(".csv"):
raise ValueError("Invalid file type. Only CSV files are allowed")
# Check file size before processing
file.seek(0, 2) # Seek to end of file
file_size = file.tell()
file.seek(0) # Reset to beginning
max_size_bytes = dify_config.ANNOTATION_IMPORT_FILE_SIZE_LIMIT * 1024 * 1024
if file_size > max_size_bytes:
abort(
413,
f"File size exceeds maximum limit of {dify_config.ANNOTATION_IMPORT_FILE_SIZE_LIMIT}MB. "
f"Please reduce the file size and try again.",
)
if file_size == 0:
raise ValueError("The uploaded file is empty")
return AppAnnotationService.batch_import_app_annotations(app_id, file)

View File

@@ -331,3 +331,91 @@ def is_admin_or_owner_required(f: Callable[P, R]):
return f(*args, **kwargs)
return decorated_function
def annotation_import_rate_limit(view: Callable[P, R]):
"""
Rate limiting decorator for annotation import operations.
Implements sliding window rate limiting with two tiers:
- Short-term: Configurable requests per minute (default: 5)
- Long-term: Configurable requests per hour (default: 20)
Uses Redis ZSET for distributed rate limiting across multiple instances.
"""
@wraps(view)
def decorated(*args: P.args, **kwargs: P.kwargs):
_, current_tenant_id = current_account_with_tenant()
current_time = int(time.time() * 1000)
# Check per-minute rate limit
minute_key = f"annotation_import_rate_limit:{current_tenant_id}:1min"
redis_client.zadd(minute_key, {current_time: current_time})
redis_client.zremrangebyscore(minute_key, 0, current_time - 60000)
minute_count = redis_client.zcard(minute_key)
redis_client.expire(minute_key, 120) # 2 minutes TTL
if minute_count > dify_config.ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE:
abort(
429,
f"Too many annotation import requests. Maximum {dify_config.ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE} "
f"requests per minute allowed. Please try again later.",
)
# Check per-hour rate limit
hour_key = f"annotation_import_rate_limit:{current_tenant_id}:1hour"
redis_client.zadd(hour_key, {current_time: current_time})
redis_client.zremrangebyscore(hour_key, 0, current_time - 3600000)
hour_count = redis_client.zcard(hour_key)
redis_client.expire(hour_key, 7200) # 2 hours TTL
if hour_count > dify_config.ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR:
abort(
429,
f"Too many annotation import requests. Maximum {dify_config.ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR} "
f"requests per hour allowed. Please try again later.",
)
return view(*args, **kwargs)
return decorated
def annotation_import_concurrency_limit(view: Callable[P, R]):
"""
Concurrency control decorator for annotation import operations.
Limits the number of concurrent import tasks per tenant to prevent
resource exhaustion and ensure fair resource allocation.
Uses Redis ZSET to track active import jobs with automatic cleanup
of stale entries (jobs older than 2 minutes).
"""
@wraps(view)
def decorated(*args: P.args, **kwargs: P.kwargs):
_, current_tenant_id = current_account_with_tenant()
current_time = int(time.time() * 1000)
active_jobs_key = f"annotation_import_active:{current_tenant_id}"
# Clean up stale entries (jobs that should have completed or timed out)
stale_threshold = current_time - 120000 # 2 minutes ago
redis_client.zremrangebyscore(active_jobs_key, 0, stale_threshold)
# Check current active job count
active_count = redis_client.zcard(active_jobs_key)
if active_count >= dify_config.ANNOTATION_IMPORT_MAX_CONCURRENT:
abort(
429,
f"Too many concurrent import tasks. Maximum {dify_config.ANNOTATION_IMPORT_MAX_CONCURRENT} "
f"concurrent imports allowed per workspace. Please wait for existing imports to complete.",
)
# Allow the request to proceed
# The actual job registration will happen in the service layer
return view(*args, **kwargs)
return decorated

View File

@@ -1,6 +1,9 @@
import logging
import uuid
import pandas as pd
logger = logging.getLogger(__name__)
from sqlalchemy import or_, select
from werkzeug.datastructures import FileStorage
from werkzeug.exceptions import NotFound
@@ -330,6 +333,18 @@ class AppAnnotationService:
@classmethod
def batch_import_app_annotations(cls, app_id, file: FileStorage):
"""
Batch import annotations from CSV file with enhanced security checks.
Security features:
- File size validation
- Row count limits (min/max)
- Memory-efficient CSV parsing
- Subscription quota validation
- Concurrency tracking
"""
from configs import dify_config
# get app info
current_user, current_tenant_id = current_account_with_tenant()
app = (
@@ -341,16 +356,80 @@ class AppAnnotationService:
if not app:
raise NotFound("App not found")
job_id: str | None = None # Initialize to avoid unbound variable error
try:
# Skip the first row
df = pd.read_csv(file.stream, dtype=str)
result = []
for _, row in df.iterrows():
content = {"question": row.iloc[0], "answer": row.iloc[1]}
# Quick row count check before full parsing (memory efficient)
# Read only first chunk to estimate row count
file.stream.seek(0)
first_chunk = file.stream.read(8192) # Read first 8KB
file.stream.seek(0)
# Estimate row count from first chunk
newline_count = first_chunk.count(b"\n")
if newline_count == 0:
raise ValueError("The CSV file appears to be empty or invalid.")
# Parse CSV with row limit to prevent memory exhaustion
# Use chunksize for memory-efficient processing
max_records = dify_config.ANNOTATION_IMPORT_MAX_RECORDS
min_records = dify_config.ANNOTATION_IMPORT_MIN_RECORDS
# Read CSV in chunks to avoid loading entire file into memory
df = pd.read_csv(
file.stream,
dtype=str,
nrows=max_records + 1, # Read one extra to detect overflow
engine="python",
on_bad_lines="skip", # Skip malformed lines instead of crashing
)
# Validate column count
if len(df.columns) < 2:
raise ValueError("Invalid CSV format. The file must contain at least 2 columns (question and answer).")
# Build result list with validation
result: list[dict] = []
for idx, row in df.iterrows():
# Stop if we exceed the limit
if len(result) >= max_records:
raise ValueError(
f"The CSV file contains too many records. Maximum {max_records} records allowed per import. "
f"Please split your file into smaller batches."
)
# Extract and validate question and answer
try:
question_raw = row.iloc[0]
answer_raw = row.iloc[1]
except (IndexError, KeyError):
continue # Skip malformed rows
# Convert to string and strip whitespace
question = str(question_raw).strip() if question_raw is not None else ""
answer = str(answer_raw).strip() if answer_raw is not None else ""
# Skip empty entries or NaN values
if not question or not answer or question.lower() == "nan" or answer.lower() == "nan":
continue
# Validate length constraints (idx is pandas index, convert to int for display)
row_num = int(idx) + 2 if isinstance(idx, (int, float)) else len(result) + 2
if len(question) > 2000:
raise ValueError(f"Question at row {row_num} is too long. Maximum 2000 characters allowed.")
if len(answer) > 10000:
raise ValueError(f"Answer at row {row_num} is too long. Maximum 10000 characters allowed.")
content = {"question": question, "answer": answer}
result.append(content)
if len(result) == 0:
raise ValueError("The CSV file is empty.")
# check annotation limit
# Validate minimum records
if len(result) < min_records:
raise ValueError(
f"The CSV file must contain at least {min_records} valid annotation record(s). "
f"Found {len(result)} valid record(s)."
)
# Check annotation quota limit
features = FeatureService.get_features(current_tenant_id)
if features.billing.enabled:
annotation_quota_limit = features.annotation_quota_limit
@@ -359,12 +438,34 @@ class AppAnnotationService:
# async job
job_id = str(uuid.uuid4())
indexing_cache_key = f"app_annotation_batch_import_{str(job_id)}"
# send batch add segments task
# Register job in active tasks list for concurrency tracking
current_time = int(naive_utc_now().timestamp() * 1000)
active_jobs_key = f"annotation_import_active:{current_tenant_id}"
redis_client.zadd(active_jobs_key, {job_id: current_time})
redis_client.expire(active_jobs_key, 7200) # 2 hours TTL
# Set job status
redis_client.setnx(indexing_cache_key, "waiting")
batch_import_annotations_task.delay(str(job_id), result, app_id, current_tenant_id, current_user.id)
except Exception as e:
except ValueError as e:
return {"error_msg": str(e)}
return {"job_id": job_id, "job_status": "waiting"}
except Exception as e:
# Clean up active job registration on error (only if job was created)
if job_id is not None:
try:
active_jobs_key = f"annotation_import_active:{current_tenant_id}"
redis_client.zrem(active_jobs_key, job_id)
except Exception:
# Silently ignore cleanup errors - the job will be auto-expired
logger.debug("Failed to clean up active job tracking during error handling")
# Check if it's a CSV parsing error
error_str = str(e)
return {"error_msg": f"An error occurred while processing the file: {error_str}"}
return {"job_id": job_id, "job_status": "waiting", "record_count": len(result)}
@classmethod
def get_annotation_hit_histories(cls, app_id: str, annotation_id: str, page, limit):

View File

@@ -30,6 +30,8 @@ def batch_import_annotations_task(job_id: str, content_list: list[dict], app_id:
logger.info(click.style(f"Start batch import annotation: {job_id}", fg="green"))
start_at = time.perf_counter()
indexing_cache_key = f"app_annotation_batch_import_{str(job_id)}"
active_jobs_key = f"annotation_import_active:{tenant_id}"
# get app info
app = db.session.query(App).where(App.id == app_id, App.tenant_id == tenant_id, App.status == "normal").first()
@@ -91,4 +93,13 @@ def batch_import_annotations_task(job_id: str, content_list: list[dict], app_id:
redis_client.setex(indexing_error_msg_key, 600, str(e))
logger.exception("Build index for batch import annotations failed")
finally:
# Clean up active job tracking to release concurrency slot
try:
redis_client.zrem(active_jobs_key, job_id)
logger.debug("Released concurrency slot for job: %s", job_id)
except Exception as cleanup_error:
# Log but don't fail if cleanup fails - the job will be auto-expired
logger.warning("Failed to clean up active job tracking for %s: %s", job_id, cleanup_error)
# Close database session
db.session.close()

View File

@@ -0,0 +1,344 @@
"""
Unit tests for annotation import security features.
Tests rate limiting, concurrency control, file validation, and other
security features added to prevent DoS attacks on the annotation import endpoint.
"""
import io
from unittest.mock import MagicMock, patch
import pytest
from werkzeug.datastructures import FileStorage
from configs import dify_config
class TestAnnotationImportRateLimiting:
"""Test rate limiting for annotation import operations."""
@pytest.fixture
def mock_redis(self):
"""Mock Redis client for testing."""
with patch("controllers.console.wraps.redis_client") as mock:
yield mock
@pytest.fixture
def mock_current_account(self):
"""Mock current account with tenant."""
with patch("controllers.console.wraps.current_account_with_tenant") as mock:
mock.return_value = (MagicMock(id="user_id"), "test_tenant_id")
yield mock
def test_rate_limit_per_minute_enforced(self, mock_redis, mock_current_account):
"""Test that per-minute rate limit is enforced."""
from controllers.console.wraps import annotation_import_rate_limit
# Simulate exceeding per-minute limit
mock_redis.zcard.side_effect = [
dify_config.ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE + 1, # Minute check
10, # Hour check
]
@annotation_import_rate_limit
def dummy_view():
return "success"
# Should abort with 429
with pytest.raises(Exception) as exc_info:
dummy_view()
# Verify it's a rate limit error
assert "429" in str(exc_info.value) or "Too many" in str(exc_info.value)
def test_rate_limit_per_hour_enforced(self, mock_redis, mock_current_account):
"""Test that per-hour rate limit is enforced."""
from controllers.console.wraps import annotation_import_rate_limit
# Simulate exceeding per-hour limit
mock_redis.zcard.side_effect = [
3, # Minute check (under limit)
dify_config.ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR + 1, # Hour check (over limit)
]
@annotation_import_rate_limit
def dummy_view():
return "success"
# Should abort with 429
with pytest.raises(Exception) as exc_info:
dummy_view()
assert "429" in str(exc_info.value) or "Too many" in str(exc_info.value)
def test_rate_limit_within_limits_passes(self, mock_redis, mock_current_account):
"""Test that requests within limits are allowed."""
from controllers.console.wraps import annotation_import_rate_limit
# Simulate being under both limits
mock_redis.zcard.return_value = 2
@annotation_import_rate_limit
def dummy_view():
return "success"
# Should succeed
result = dummy_view()
assert result == "success"
# Verify Redis operations were called
assert mock_redis.zadd.called
assert mock_redis.zremrangebyscore.called
class TestAnnotationImportConcurrencyControl:
"""Test concurrency control for annotation import operations."""
@pytest.fixture
def mock_redis(self):
"""Mock Redis client for testing."""
with patch("controllers.console.wraps.redis_client") as mock:
yield mock
@pytest.fixture
def mock_current_account(self):
"""Mock current account with tenant."""
with patch("controllers.console.wraps.current_account_with_tenant") as mock:
mock.return_value = (MagicMock(id="user_id"), "test_tenant_id")
yield mock
def test_concurrency_limit_enforced(self, mock_redis, mock_current_account):
"""Test that concurrent task limit is enforced."""
from controllers.console.wraps import annotation_import_concurrency_limit
# Simulate max concurrent tasks already running
mock_redis.zcard.return_value = dify_config.ANNOTATION_IMPORT_MAX_CONCURRENT
@annotation_import_concurrency_limit
def dummy_view():
return "success"
# Should abort with 429
with pytest.raises(Exception) as exc_info:
dummy_view()
assert "429" in str(exc_info.value) or "concurrent" in str(exc_info.value).lower()
def test_concurrency_within_limit_passes(self, mock_redis, mock_current_account):
"""Test that requests within concurrency limits are allowed."""
from controllers.console.wraps import annotation_import_concurrency_limit
# Simulate being under concurrent task limit
mock_redis.zcard.return_value = 1
@annotation_import_concurrency_limit
def dummy_view():
return "success"
# Should succeed
result = dummy_view()
assert result == "success"
def test_stale_jobs_are_cleaned_up(self, mock_redis, mock_current_account):
"""Test that old/stale job entries are removed."""
from controllers.console.wraps import annotation_import_concurrency_limit
mock_redis.zcard.return_value = 0
@annotation_import_concurrency_limit
def dummy_view():
return "success"
dummy_view()
# Verify cleanup was called
assert mock_redis.zremrangebyscore.called
class TestAnnotationImportFileValidation:
"""Test file validation in annotation import."""
def test_file_size_limit_enforced(self):
"""Test that files exceeding size limit are rejected."""
# Create a file larger than the limit
max_size = dify_config.ANNOTATION_IMPORT_FILE_SIZE_LIMIT * 1024 * 1024
large_content = b"x" * (max_size + 1024) # Exceed by 1KB
file = FileStorage(stream=io.BytesIO(large_content), filename="test.csv", content_type="text/csv")
# Should be rejected in controller
# This would be tested in integration tests with actual endpoint
def test_empty_file_rejected(self):
"""Test that empty files are rejected."""
file = FileStorage(stream=io.BytesIO(b""), filename="test.csv", content_type="text/csv")
# Should be rejected
# This would be tested in integration tests
def test_non_csv_file_rejected(self):
"""Test that non-CSV files are rejected."""
file = FileStorage(stream=io.BytesIO(b"test"), filename="test.txt", content_type="text/plain")
# Should be rejected based on extension
# This would be tested in integration tests
class TestAnnotationImportServiceValidation:
"""Test service layer validation for annotation import."""
@pytest.fixture
def mock_app(self):
"""Mock application object."""
app = MagicMock()
app.id = "app_id"
return app
@pytest.fixture
def mock_db_session(self):
"""Mock database session."""
with patch("services.annotation_service.db.session") as mock:
yield mock
def test_max_records_limit_enforced(self, mock_app, mock_db_session):
"""Test that files with too many records are rejected."""
from services.annotation_service import AppAnnotationService
# Create CSV with too many records
max_records = dify_config.ANNOTATION_IMPORT_MAX_RECORDS
csv_content = "question,answer\n"
for i in range(max_records + 100):
csv_content += f"Question {i},Answer {i}\n"
file = FileStorage(stream=io.BytesIO(csv_content.encode()), filename="test.csv", content_type="text/csv")
mock_db_session.query.return_value.where.return_value.first.return_value = mock_app
with patch("services.annotation_service.current_account_with_tenant") as mock_auth:
mock_auth.return_value = (MagicMock(id="user_id"), "tenant_id")
with patch("services.annotation_service.FeatureService") as mock_features:
mock_features.get_features.return_value.billing.enabled = False
result = AppAnnotationService.batch_import_app_annotations("app_id", file)
# Should return error about too many records
assert "error_msg" in result
assert "too many" in result["error_msg"].lower() or "maximum" in result["error_msg"].lower()
def test_min_records_limit_enforced(self, mock_app, mock_db_session):
"""Test that files with too few valid records are rejected."""
from services.annotation_service import AppAnnotationService
# Create CSV with only header (no data rows)
csv_content = "question,answer\n"
file = FileStorage(stream=io.BytesIO(csv_content.encode()), filename="test.csv", content_type="text/csv")
mock_db_session.query.return_value.where.return_value.first.return_value = mock_app
with patch("services.annotation_service.current_account_with_tenant") as mock_auth:
mock_auth.return_value = (MagicMock(id="user_id"), "tenant_id")
result = AppAnnotationService.batch_import_app_annotations("app_id", file)
# Should return error about insufficient records
assert "error_msg" in result
assert "at least" in result["error_msg"].lower() or "minimum" in result["error_msg"].lower()
def test_invalid_csv_format_handled(self, mock_app, mock_db_session):
"""Test that invalid CSV format is handled gracefully."""
from services.annotation_service import AppAnnotationService
# Create invalid CSV content
csv_content = 'invalid,csv,format\nwith,unbalanced,quotes,and"stuff'
file = FileStorage(stream=io.BytesIO(csv_content.encode()), filename="test.csv", content_type="text/csv")
mock_db_session.query.return_value.where.return_value.first.return_value = mock_app
with patch("services.annotation_service.current_account_with_tenant") as mock_auth:
mock_auth.return_value = (MagicMock(id="user_id"), "tenant_id")
result = AppAnnotationService.batch_import_app_annotations("app_id", file)
# Should return error message
assert "error_msg" in result
def test_valid_import_succeeds(self, mock_app, mock_db_session):
"""Test that valid import request succeeds."""
from services.annotation_service import AppAnnotationService
# Create valid CSV
csv_content = "question,answer\nWhat is AI?,Artificial Intelligence\nWhat is ML?,Machine Learning\n"
file = FileStorage(stream=io.BytesIO(csv_content.encode()), filename="test.csv", content_type="text/csv")
mock_db_session.query.return_value.where.return_value.first.return_value = mock_app
with patch("services.annotation_service.current_account_with_tenant") as mock_auth:
mock_auth.return_value = (MagicMock(id="user_id"), "tenant_id")
with patch("services.annotation_service.FeatureService") as mock_features:
mock_features.get_features.return_value.billing.enabled = False
with patch("services.annotation_service.batch_import_annotations_task") as mock_task:
with patch("services.annotation_service.redis_client"):
result = AppAnnotationService.batch_import_app_annotations("app_id", file)
# Should return success response
assert "job_id" in result
assert "job_status" in result
assert result["job_status"] == "waiting"
assert "record_count" in result
assert result["record_count"] == 2
class TestAnnotationImportTaskOptimization:
"""Test optimizations in batch import task."""
def test_task_has_timeout_configured(self):
"""Test that task has proper timeout configuration."""
from tasks.annotation.batch_import_annotations_task import batch_import_annotations_task
# Verify task configuration
assert hasattr(batch_import_annotations_task, "time_limit")
assert hasattr(batch_import_annotations_task, "soft_time_limit")
# Check timeout values are reasonable
# Hard limit should be 6 minutes (360s)
# Soft limit should be 5 minutes (300s)
# Note: actual values depend on Celery configuration
class TestConfigurationValues:
"""Test that security configuration values are properly set."""
def test_rate_limit_configs_exist(self):
"""Test that rate limit configurations are defined."""
assert hasattr(dify_config, "ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE")
assert hasattr(dify_config, "ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR")
assert dify_config.ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE > 0
assert dify_config.ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR > 0
def test_file_size_limit_config_exists(self):
"""Test that file size limit configuration is defined."""
assert hasattr(dify_config, "ANNOTATION_IMPORT_FILE_SIZE_LIMIT")
assert dify_config.ANNOTATION_IMPORT_FILE_SIZE_LIMIT > 0
assert dify_config.ANNOTATION_IMPORT_FILE_SIZE_LIMIT <= 10 # Reasonable max (10MB)
def test_record_limit_configs_exist(self):
"""Test that record limit configurations are defined."""
assert hasattr(dify_config, "ANNOTATION_IMPORT_MAX_RECORDS")
assert hasattr(dify_config, "ANNOTATION_IMPORT_MIN_RECORDS")
assert dify_config.ANNOTATION_IMPORT_MAX_RECORDS > 0
assert dify_config.ANNOTATION_IMPORT_MIN_RECORDS > 0
assert dify_config.ANNOTATION_IMPORT_MIN_RECORDS < dify_config.ANNOTATION_IMPORT_MAX_RECORDS
def test_concurrency_limit_config_exists(self):
"""Test that concurrency limit configuration is defined."""
assert hasattr(dify_config, "ANNOTATION_IMPORT_MAX_CONCURRENT")
assert dify_config.ANNOTATION_IMPORT_MAX_CONCURRENT > 0
assert dify_config.ANNOTATION_IMPORT_MAX_CONCURRENT <= 10 # Reasonable upper bound

View File

@@ -1448,5 +1448,16 @@ WORKFLOW_SCHEDULE_MAX_DISPATCH_PER_TICK=0
# Tenant isolated task queue configuration
TENANT_ISOLATED_TASK_CONCURRENCY=1
# Maximum allowed CSV file size for annotation import in megabytes
ANNOTATION_IMPORT_FILE_SIZE_LIMIT=2
#Maximum number of annotation records allowed in a single import
ANNOTATION_IMPORT_MAX_RECORDS=10000
# Minimum number of annotation records required in a single import
ANNOTATION_IMPORT_MIN_RECORDS=1
ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE=5
ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR=20
# Maximum number of concurrent annotation import tasks per tenant
ANNOTATION_IMPORT_MAX_CONCURRENT=5
# The API key of amplitude
AMPLITUDE_API_KEY=

View File

@@ -648,6 +648,12 @@ x-shared-env: &shared-api-worker-env
WORKFLOW_SCHEDULE_POLLER_BATCH_SIZE: ${WORKFLOW_SCHEDULE_POLLER_BATCH_SIZE:-100}
WORKFLOW_SCHEDULE_MAX_DISPATCH_PER_TICK: ${WORKFLOW_SCHEDULE_MAX_DISPATCH_PER_TICK:-0}
TENANT_ISOLATED_TASK_CONCURRENCY: ${TENANT_ISOLATED_TASK_CONCURRENCY:-1}
ANNOTATION_IMPORT_FILE_SIZE_LIMIT: ${ANNOTATION_IMPORT_FILE_SIZE_LIMIT:-2}
ANNOTATION_IMPORT_MAX_RECORDS: ${ANNOTATION_IMPORT_MAX_RECORDS:-10000}
ANNOTATION_IMPORT_MIN_RECORDS: ${ANNOTATION_IMPORT_MIN_RECORDS:-1}
ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE: ${ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE:-5}
ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR: ${ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR:-20}
ANNOTATION_IMPORT_MAX_CONCURRENT: ${ANNOTATION_IMPORT_MAX_CONCURRENT:-5}
AMPLITUDE_API_KEY: ${AMPLITUDE_API_KEY:-}
services: