refactor(storage): replace storage proxy with ticket-based URL system

- Removed the storage proxy controller and its associated endpoints for file download and upload.
- Updated the file controller to use the new storage ticket service for generating download and upload URLs.
- Modified the file presign storage to fallback to ticket-based URLs instead of signed proxy URLs.
- Enhanced unit tests to validate the new ticket generation and retrieval logic.
This commit is contained in:
Harry
2026-01-29 23:39:24 +08:00
parent f52fb919d1
commit 6be800e14f
6 changed files with 365 additions and 247 deletions

View File

@@ -16,7 +16,7 @@ files_ns = Namespace("files", description="File operations", path="/")
from . import (
image_preview,
storage_proxy,
storage_files,
tool_files,
upload,
)
@@ -28,7 +28,7 @@ __all__ = [
"bp",
"files_ns",
"image_preview",
"storage_proxy",
"storage_files",
"tool_files",
"upload",
]

View File

@@ -0,0 +1,80 @@
"""Token-based file proxy controller for storage operations.
This controller handles file download and upload operations using opaque UUID tokens.
The token maps to the real storage key in Redis, so the actual storage path is never
exposed in the URL.
Routes:
GET /files/storage-files/{token} - Download a file
PUT /files/storage-files/{token} - Upload a file
The operation type (download/upload) is determined by the ticket stored in Redis,
not by the HTTP method. This ensures a download ticket cannot be used for upload
and vice versa.
"""
from urllib.parse import quote
from flask import Response, request
from flask_restx import Resource
from werkzeug.exceptions import Forbidden, NotFound, RequestEntityTooLarge
from controllers.files import files_ns
from extensions.ext_storage import storage
from services.storage_ticket_service import StorageTicketService
@files_ns.route("/storage-files/<string:token>")
class StorageFilesApi(Resource):
"""Handle file operations through token-based URLs."""
def get(self, token: str):
"""Download a file using a token.
The ticket must have op="download", otherwise returns 403.
"""
ticket = StorageTicketService.get_ticket(token)
if ticket is None:
raise Forbidden("Invalid or expired token")
if ticket.op != "download":
raise Forbidden("This token is not valid for download")
try:
generator = storage.load_stream(ticket.storage_key)
except FileNotFoundError:
raise NotFound("File not found")
filename = ticket.filename or ticket.storage_key.rsplit("/", 1)[-1]
encoded_filename = quote(filename)
return Response(
generator,
mimetype="application/octet-stream",
direct_passthrough=True,
headers={
"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}",
},
)
def put(self, token: str):
"""Upload a file using a token.
The ticket must have op="upload", otherwise returns 403.
If the request body exceeds max_bytes, returns 413.
"""
ticket = StorageTicketService.get_ticket(token)
if ticket is None:
raise Forbidden("Invalid or expired token")
if ticket.op != "upload":
raise Forbidden("This token is not valid for upload")
content = request.get_data()
if ticket.max_bytes is not None and len(content) > ticket.max_bytes:
raise RequestEntityTooLarge(f"Upload exceeds maximum size of {ticket.max_bytes} bytes")
storage.save(ticket.storage_key, content)
return Response(status=204)

View File

@@ -1,102 +0,0 @@
"""Unified file proxy controller for storage operations.
This controller handles file download and upload operations when the underlying
storage backend doesn't support presigned URLs. It verifies signed proxy URLs
generated by FilePresignStorage and streams files to/from storage.
Endpoints:
GET /files/storage/{filename}/download - Download a file
PUT /files/storage/{filename}/upload - Upload a file
"""
from urllib.parse import quote, unquote
from flask import Response, request
from flask_restx import Resource
from pydantic import BaseModel, Field
from werkzeug.exceptions import Forbidden, NotFound
from controllers.files import files_ns
from extensions.ext_storage import storage
from extensions.storage.file_presign_storage import FilePresignStorage
DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
class StorageProxyQuery(BaseModel):
"""Query parameters for storage proxy URLs."""
timestamp: str = Field(..., description="Unix timestamp used in the signature")
nonce: str = Field(..., description="Random string for signature")
sign: str = Field(..., description="HMAC signature")
files_ns.schema_model(
StorageProxyQuery.__name__,
StorageProxyQuery.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
)
@files_ns.route("/storage/<path:filename>/download")
class StorageFileDownloadApi(Resource):
"""Handle file downloads through the proxy."""
def get(self, filename: str):
"""Download a file from storage.
Verifies the signed URL and streams the file content back to the client.
"""
filename = unquote(filename)
args = StorageProxyQuery.model_validate(request.args.to_dict(flat=True))
if not FilePresignStorage.verify_signature(
filename=filename,
operation="download",
timestamp=args.timestamp,
nonce=args.nonce,
sign=args.sign,
):
raise Forbidden("Invalid or expired download link")
try:
generator = storage.load_stream(filename)
except FileNotFoundError:
raise NotFound("File not found")
encoded_filename = quote(filename.split("/")[-1])
return Response(
generator,
mimetype="application/octet-stream",
direct_passthrough=True,
headers={
"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}",
},
)
@files_ns.route("/storage/<path:filename>/upload")
class StorageFileUploadApi(Resource):
"""Handle file uploads through the proxy."""
def put(self, filename: str):
"""Upload a file to storage.
Verifies the signed URL and saves the request body to storage.
"""
filename = unquote(filename)
args = StorageProxyQuery.model_validate(request.args.to_dict(flat=True))
if not FilePresignStorage.verify_signature(
filename=filename,
operation="upload",
timestamp=args.timestamp,
nonce=args.nonce,
sign=args.sign,
):
raise Forbidden("Invalid or expired upload link")
content = request.get_data()
storage.save(filename, content)
return Response(status=204)

View File

@@ -1,8 +1,8 @@
"""Storage wrapper that provides presigned URL support with fallback to signed proxy URLs.
"""Storage wrapper that provides presigned URL support with fallback to ticket-based URLs.
This is the unified presign wrapper for all storage operations. When the underlying
storage backend doesn't support presigned URLs (raises NotImplementedError), it falls
back to generating signed proxy URLs that route through Dify's file proxy endpoints.
back to generating ticket-based URLs that route through Dify's file proxy endpoints.
Usage:
from extensions.storage.file_presign_storage import FilePresignStorage
@@ -12,101 +12,45 @@ Usage:
download_url = presign_storage.get_download_url("path/to/file.txt", expires_in=3600)
upload_url = presign_storage.get_upload_url("path/to/file.txt", expires_in=3600)
The proxy URLs follow the format:
{FILES_URL}/files/storage/{encoded_filename}/(download|upload)?timestamp=...&nonce=...&sign=...
When the underlying storage doesn't support presigned URLs, the fallback URLs follow the format:
{FILES_URL}/files/storage-tickets/{token}
Signature format:
HMAC-SHA256(SECRET_KEY, "storage-file|{operation}|{filename}|{timestamp}|{nonce}")
The token is a UUID that maps to the real storage key in Redis.
"""
import base64
import hashlib
import hmac
import os
import time
import urllib.parse
from configs import dify_config
from extensions.storage.storage_wrapper import StorageWrapper
class FilePresignStorage(StorageWrapper):
"""Storage wrapper that provides presigned URL support with proxy fallback.
"""Storage wrapper that provides presigned URL support with ticket fallback.
If the wrapped storage supports presigned URLs, delegates to it.
Otherwise, generates signed proxy URLs for both download and upload operations.
Otherwise, generates ticket-based URLs for both download and upload operations.
"""
SIGNATURE_PREFIX = "storage-file"
def get_download_url(self, filename: str, expires_in: int = 3600) -> str:
"""Get a presigned download URL, falling back to proxy URL if not supported."""
"""Get a presigned download URL, falling back to ticket URL if not supported."""
try:
return self._storage.get_download_url(filename, expires_in)
except NotImplementedError:
return self._generate_signed_proxy_url(filename, "download", expires_in)
from services.storage_ticket_service import StorageTicketService
return StorageTicketService.create_download_url(filename, expires_in=expires_in)
def get_download_urls(self, filenames: list[str], expires_in: int = 3600) -> list[str]:
"""Get presigned download URLs for multiple files."""
try:
return self._storage.get_download_urls(filenames, expires_in)
except NotImplementedError:
return [self._generate_signed_proxy_url(f, "download", expires_in) for f in filenames]
from services.storage_ticket_service import StorageTicketService
return [StorageTicketService.create_download_url(f, expires_in=expires_in) for f in filenames]
def get_upload_url(self, filename: str, expires_in: int = 3600) -> str:
"""Get a presigned upload URL, falling back to proxy URL if not supported."""
"""Get a presigned upload URL, falling back to ticket URL if not supported."""
try:
return self._storage.get_upload_url(filename, expires_in)
except NotImplementedError:
return self._generate_signed_proxy_url(filename, "upload", expires_in)
from services.storage_ticket_service import StorageTicketService
def _generate_signed_proxy_url(self, filename: str, operation: str, expires_in: int = 3600) -> str:
"""Generate a signed proxy URL for file operations.
Args:
filename: The storage key/path
operation: Either "download" or "upload"
expires_in: URL validity duration in seconds
Returns:
Signed proxy URL string
"""
base_url = dify_config.FILES_URL
encoded_filename = urllib.parse.quote(filename, safe="")
url = f"{base_url}/files/storage/{encoded_filename}/{operation}"
timestamp = str(int(time.time()))
nonce = os.urandom(16).hex()
sign = self._create_signature(operation, filename, timestamp, nonce)
query = urllib.parse.urlencode({"timestamp": timestamp, "nonce": nonce, "sign": sign})
return f"{url}?{query}"
@classmethod
def _create_signature(cls, operation: str, filename: str, timestamp: str, nonce: str) -> str:
"""Create HMAC signature for the proxy URL."""
key = dify_config.SECRET_KEY.encode()
msg = f"{cls.SIGNATURE_PREFIX}|{operation}|{filename}|{timestamp}|{nonce}"
sign = hmac.new(key, msg.encode(), hashlib.sha256).digest()
return base64.urlsafe_b64encode(sign).decode()
@classmethod
def verify_signature(cls, *, operation: str, filename: str, timestamp: str, nonce: str, sign: str) -> bool:
"""Verify the signature of a proxy URL.
Args:
operation: The operation type ("download" or "upload")
filename: The storage key/path
timestamp: Unix timestamp string from the URL
nonce: Random nonce string from the URL
sign: Signature string from the URL
Returns:
True if signature is valid and not expired, False otherwise
"""
expected_sign = cls._create_signature(operation, filename, timestamp, nonce)
if not hmac.compare_digest(sign, expected_sign):
return False
current_time = int(time.time())
return current_time - int(timestamp) <= dify_config.FILES_ACCESS_TIMEOUT
return StorageTicketService.create_upload_url(filename, expires_in=expires_in)

View File

@@ -0,0 +1,159 @@
"""Storage ticket service for generating opaque download/upload URLs.
This service provides a ticket-based approach for file access. Instead of exposing
the real storage key in URLs, it generates a random UUID token and stores the mapping
in Redis with a TTL.
Usage:
from services.storage_ticket_service import StorageTicketService
# Generate a download ticket
url = StorageTicketService.create_download_url("path/to/file.txt", expires_in=300)
# Generate an upload ticket
url = StorageTicketService.create_upload_url("path/to/file.txt", expires_in=300, max_bytes=10*1024*1024)
URL format:
{FILES_URL}/files/storage-tickets/{token}
The token is validated by looking up the Redis key, which contains:
- op: "download" or "upload"
- storage_key: the real storage path
- max_bytes: (upload only) maximum allowed upload size
- filename: suggested filename for Content-Disposition header
"""
import json
import logging
from dataclasses import dataclass
from uuid import uuid4
from configs import dify_config
from extensions.ext_redis import redis_client
logger = logging.getLogger(__name__)
TICKET_KEY_PREFIX = "storage_files"
DEFAULT_DOWNLOAD_TTL = 300 # 5 minutes
DEFAULT_UPLOAD_TTL = 300 # 5 minutes
DEFAULT_MAX_UPLOAD_BYTES = 100 * 1024 * 1024 # 100MB
@dataclass
class StorageTicket:
"""Represents a storage access ticket."""
op: str # "download" or "upload"
storage_key: str
max_bytes: int | None = None # upload only
filename: str | None = None # suggested filename for download
def to_dict(self) -> dict:
data = {"op": self.op, "storage_key": self.storage_key}
if self.max_bytes is not None:
data["max_bytes"] = str(self.max_bytes)
if self.filename is not None:
data["filename"] = self.filename
return data
@classmethod
def from_dict(cls, data: dict) -> "StorageTicket":
return cls(
op=data["op"],
storage_key=data["storage_key"],
max_bytes=data.get("max_bytes"),
filename=data.get("filename"),
)
class StorageTicketService:
"""Service for creating and validating storage access tickets."""
@classmethod
def create_download_url(
cls,
storage_key: str,
*,
expires_in: int = DEFAULT_DOWNLOAD_TTL,
filename: str | None = None,
) -> str:
"""Create a download ticket and return the URL.
Args:
storage_key: The real storage path
expires_in: TTL in seconds (default 300)
filename: Suggested filename for Content-Disposition header
Returns:
Full URL with token
"""
if filename is None:
filename = storage_key.rsplit("/", 1)[-1]
ticket = StorageTicket(op="download", storage_key=storage_key, filename=filename)
token = cls._store_ticket(ticket, expires_in)
return cls._build_url(token)
@classmethod
def create_upload_url(
cls,
storage_key: str,
*,
expires_in: int = DEFAULT_UPLOAD_TTL,
max_bytes: int = DEFAULT_MAX_UPLOAD_BYTES,
) -> str:
"""Create an upload ticket and return the URL.
Args:
storage_key: The real storage path
expires_in: TTL in seconds (default 300)
max_bytes: Maximum allowed upload size in bytes
Returns:
Full URL with token
"""
ticket = StorageTicket(op="upload", storage_key=storage_key, max_bytes=max_bytes)
token = cls._store_ticket(ticket, expires_in)
return cls._build_url(token)
@classmethod
def get_ticket(cls, token: str) -> StorageTicket | None:
"""Retrieve a ticket by token.
Args:
token: The UUID token from the URL
Returns:
StorageTicket if found and valid, None otherwise
"""
key = cls._ticket_key(token)
try:
data = redis_client.get(key)
if data is None:
return None
if isinstance(data, bytes):
data = data.decode("utf-8")
return StorageTicket.from_dict(json.loads(data))
except Exception:
logger.warning("Failed to retrieve storage ticket: %s", token, exc_info=True)
return None
@classmethod
def _store_ticket(cls, ticket: StorageTicket, ttl: int) -> str:
"""Store a ticket in Redis and return the token."""
token = str(uuid4())
key = cls._ticket_key(token)
value = json.dumps(ticket.to_dict())
redis_client.setex(key, ttl, value)
return token
@classmethod
def _ticket_key(cls, token: str) -> str:
"""Generate Redis key for a token."""
return f"{TICKET_KEY_PREFIX}:{token}"
@classmethod
def _build_url(cls, token: str) -> str:
"""Build the full URL for a token."""
base_url = dify_config.FILES_URL
return f"{base_url}/files/storage-files/{token}"

View File

@@ -1,4 +1,4 @@
import time
from unittest.mock import MagicMock, patch
from uuid import uuid4
import pytest
@@ -6,7 +6,7 @@ import pytest
from configs import dify_config
from core.app_assets.storage import AppAssetStorage, AssetPath
from extensions.storage.base_storage import BaseStorage
from extensions.storage.file_presign_storage import FilePresignStorage
from services.storage_ticket_service import StorageTicket, StorageTicketService
class DummyStorage(BaseStorage):
@@ -70,96 +70,133 @@ def test_asset_path_validation():
AssetPath.draft(tenant_id=tenant_id, app_id=app_id, node_id="not-a-uuid")
def test_file_presign_signature_verification(monkeypatch: pytest.MonkeyPatch):
"""Test FilePresignStorage signature creation and verification."""
monkeypatch.setattr(dify_config, "SECRET_KEY", "test-secret-key", raising=False)
monkeypatch.setattr(dify_config, "FILES_ACCESS_TIMEOUT", 300, raising=False)
def test_storage_ticket_service(monkeypatch: pytest.MonkeyPatch):
"""Test StorageTicketService creates and retrieves tickets."""
monkeypatch.setattr(dify_config, "FILES_URL", "http://files.local", raising=False)
filename = "test/path/file.txt"
timestamp = str(int(time.time()))
nonce = "test-nonce"
mock_redis = MagicMock()
stored_data = {}
# Test download signature
sign = FilePresignStorage._create_signature("download", filename, timestamp, nonce)
assert FilePresignStorage.verify_signature(
filename=filename,
operation="download",
timestamp=timestamp,
nonce=nonce,
sign=sign,
)
def mock_setex(key, ttl, value):
stored_data[key] = value
# Test upload signature
upload_sign = FilePresignStorage._create_signature("upload", filename, timestamp, nonce)
assert FilePresignStorage.verify_signature(
filename=filename,
operation="upload",
timestamp=timestamp,
nonce=nonce,
sign=upload_sign,
)
def mock_get(key):
return stored_data.get(key)
# Test expired signature
expired_timestamp = str(int(time.time()) - 400)
expired_sign = FilePresignStorage._create_signature("download", filename, expired_timestamp, nonce)
assert not FilePresignStorage.verify_signature(
filename=filename,
operation="download",
timestamp=expired_timestamp,
nonce=nonce,
sign=expired_sign,
)
mock_redis.setex = mock_setex
mock_redis.get = mock_get
# Test wrong signature
assert not FilePresignStorage.verify_signature(
filename=filename,
operation="download",
timestamp=timestamp,
nonce=nonce,
sign="wrong-signature",
)
with patch("services.storage_ticket_service.redis_client", mock_redis):
# Test download URL creation
url = StorageTicketService.create_download_url("test/path/file.txt", expires_in=300, filename="file.txt")
assert url.startswith("http://files.local/files/storage-files/")
token = url.split("/")[-1]
# Verify ticket was stored
ticket = StorageTicketService.get_ticket(token)
assert ticket is not None
assert ticket.op == "download"
assert ticket.storage_key == "test/path/file.txt"
assert ticket.filename == "file.txt"
# Test upload URL creation
upload_url = StorageTicketService.create_upload_url("test/upload.txt", expires_in=300, max_bytes=1024)
upload_token = upload_url.split("/")[-1]
upload_ticket = StorageTicketService.get_ticket(upload_token)
assert upload_ticket is not None
assert upload_ticket.op == "upload"
assert upload_ticket.storage_key == "test/upload.txt"
assert upload_ticket.max_bytes == 1024
def test_signed_proxy_url_generation(monkeypatch: pytest.MonkeyPatch):
"""Test that AppAssetStorage generates correct proxy URLs when presign is not supported."""
def test_storage_ticket_not_found(monkeypatch: pytest.MonkeyPatch):
"""Test StorageTicketService returns None for invalid token."""
mock_redis = MagicMock()
mock_redis.get.return_value = None
with patch("services.storage_ticket_service.redis_client", mock_redis):
ticket = StorageTicketService.get_ticket("invalid-token")
assert ticket is None
def test_ticket_url_generation(monkeypatch: pytest.MonkeyPatch):
"""Test that AppAssetStorage generates correct ticket URLs when presign is not supported."""
tenant_id = str(uuid4())
app_id = str(uuid4())
resource_id = str(uuid4())
asset_path = AssetPath.draft(tenant_id, app_id, resource_id)
monkeypatch.setattr(dify_config, "SECRET_KEY", "test-secret-key", raising=False)
monkeypatch.setattr(dify_config, "FILES_ACCESS_TIMEOUT", 300, raising=False)
monkeypatch.setattr(dify_config, "FILES_URL", "http://files.local", raising=False)
storage = AppAssetStorage(DummyStorage(), redis_client=DummyRedis())
url = storage.get_download_url(asset_path, expires_in=120)
mock_redis = MagicMock()
mock_redis.setex = MagicMock()
# URL should be a proxy URL since DummyStorage doesn't support presign
storage_key = asset_path.get_storage_key()
assert url.startswith("http://files.local/files/storage/")
assert "/download?" in url
assert "timestamp=" in url
assert "nonce=" in url
assert "sign=" in url
with patch("services.storage_ticket_service.redis_client", mock_redis):
storage = AppAssetStorage(DummyStorage(), redis_client=DummyRedis())
url = storage.get_download_url(asset_path, expires_in=120)
# URL should be a ticket URL since DummyStorage doesn't support presign
assert url.startswith("http://files.local/files/storage-files/")
# Token should be a UUID
token = url.split("/")[-1]
assert len(token) == 36 # UUID format
def test_upload_url_generation(monkeypatch: pytest.MonkeyPatch):
"""Test that AppAssetStorage generates correct upload URLs."""
def test_upload_ticket_url_generation(monkeypatch: pytest.MonkeyPatch):
"""Test that AppAssetStorage generates correct upload ticket URLs."""
tenant_id = str(uuid4())
app_id = str(uuid4())
resource_id = str(uuid4())
asset_path = AssetPath.draft(tenant_id, app_id, resource_id)
monkeypatch.setattr(dify_config, "SECRET_KEY", "test-secret-key", raising=False)
monkeypatch.setattr(dify_config, "FILES_ACCESS_TIMEOUT", 300, raising=False)
monkeypatch.setattr(dify_config, "FILES_URL", "http://files.local", raising=False)
storage = AppAssetStorage(DummyStorage(), redis_client=DummyRedis())
url = storage.get_upload_url(asset_path, expires_in=120)
mock_redis = MagicMock()
mock_redis.setex = MagicMock()
# URL should be a proxy URL since DummyStorage doesn't support presign
assert url.startswith("http://files.local/files/storage/")
assert "/upload?" in url
assert "timestamp=" in url
assert "nonce=" in url
assert "sign=" in url
with patch("services.storage_ticket_service.redis_client", mock_redis):
storage = AppAssetStorage(DummyStorage(), redis_client=DummyRedis())
url = storage.get_upload_url(asset_path, expires_in=120)
# URL should be a ticket URL since DummyStorage doesn't support presign
assert url.startswith("http://files.local/files/storage-files/")
# Token should be a UUID
token = url.split("/")[-1]
assert len(token) == 36 # UUID format
def test_storage_ticket_dataclass():
"""Test StorageTicket serialization and deserialization."""
ticket = StorageTicket(
op="download",
storage_key="path/to/file.txt",
filename="file.txt",
)
data = ticket.to_dict()
assert data == {
"op": "download",
"storage_key": "path/to/file.txt",
"filename": "file.txt",
}
restored = StorageTicket.from_dict(data)
assert restored.op == ticket.op
assert restored.storage_key == ticket.storage_key
assert restored.filename == ticket.filename
assert restored.max_bytes is None
# Test upload ticket with max_bytes
upload_ticket = StorageTicket(
op="upload",
storage_key="path/to/upload.txt",
max_bytes=1024,
)
upload_data = upload_ticket.to_dict()
assert upload_data["max_bytes"] == 1024
restored_upload = StorageTicket.from_dict(upload_data)
assert restored_upload.max_bytes == 1024