diff --git a/api/core/app/workflow/file_runtime.py b/api/core/app/workflow/file_runtime.py index 48eff44291..6f8166c067 100644 --- a/api/core/app/workflow/file_runtime.py +++ b/api/core/app/workflow/file_runtime.py @@ -27,8 +27,10 @@ if TYPE_CHECKING: class DifyWorkflowFileRuntime(WorkflowFileRuntimeProtocol): """Production runtime wiring for ``dify_graph.file``. - When a request-scoped file access scope is present, opaque file references are - re-validated against the database before URLs are signed or storage keys are used. + Opaque file references are resolved back to canonical database records before + URLs are signed or storage keys are used. When a request-scoped file access + scope is present, those lookups additionally enforce tenant and end-user + ownership filters. """ _file_access_controller: FileAccessControllerProtocol @@ -141,8 +143,6 @@ class DifyWorkflowFileRuntime(WorkflowFileRuntimeProtocol): parsed_reference = parse_file_reference(file.reference) if parsed_reference is None: raise ValueError("Missing file reference") - if parsed_reference.storage_key and self._file_access_controller.current_scope() is None: - return parsed_reference.storage_key record_id = parsed_reference.record_id with session_factory.create_session() as session: diff --git a/api/core/datasource/datasource_manager.py b/api/core/datasource/datasource_manager.py index ac0d45be03..6cb9bb868c 100644 --- a/api/core/datasource/datasource_manager.py +++ b/api/core/datasource/datasource_manager.py @@ -361,8 +361,9 @@ class DatasourceManager: type=FileType.CUSTOM, transfer_method=FileTransferMethod.LOCAL_FILE, remote_url=upload_file.source_url, - reference=build_file_reference(record_id=str(upload_file.id), storage_key=upload_file.key), + reference=build_file_reference(record_id=str(upload_file.id)), size=upload_file.size, + storage_key=upload_file.key, url=upload_file.source_url, ) return file_info diff --git a/api/core/rag/index_processor/processor/paragraph_index_processor.py b/api/core/rag/index_processor/processor/paragraph_index_processor.py index b952206310..571042aa76 100644 --- a/api/core/rag/index_processor/processor/paragraph_index_processor.py +++ b/api/core/rag/index_processor/processor/paragraph_index_processor.py @@ -614,9 +614,9 @@ class ParagraphIndexProcessor(BaseIndexProcessor): remote_url=upload_file.source_url, reference=build_file_reference( record_id=str(upload_file.id), - storage_key=upload_file.key, ), size=upload_file.size, + storage_key=upload_file.key, ) file_objects.append(file_obj) except Exception as e: diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index 4de09d09ba..638607b3ff 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -527,9 +527,9 @@ class DatasetRetrieval: remote_url=upload_file.source_url, reference=build_file_reference( record_id=str(upload_file.id), - storage_key=upload_file.key, ), size=upload_file.size, + storage_key=upload_file.key, url=sign_upload_file(upload_file.id, upload_file.extension), ) context_files.append(attachment_info) diff --git a/api/core/tools/tool_file_manager.py b/api/core/tools/tool_file_manager.py index fef899bb76..363584acf8 100644 --- a/api/core/tools/tool_file_manager.py +++ b/api/core/tools/tool_file_manager.py @@ -31,11 +31,12 @@ class ToolFileManager: type=get_file_type_by_mime_type(tool_file.mimetype), transfer_method=FileTransferMethod.TOOL_FILE, remote_url=tool_file.original_url, - reference=build_file_reference(record_id=str(tool_file.id), storage_key=tool_file.file_key), + reference=build_file_reference(record_id=str(tool_file.id)), filename=tool_file.name, extension=extension, mime_type=tool_file.mimetype, size=tool_file.size, + storage_key=tool_file.file_key, ) @staticmethod diff --git a/api/core/tools/workflow_as_tool/tool.py b/api/core/tools/workflow_as_tool/tool.py index 9230f7c915..22b8a5206e 100644 --- a/api/core/tools/workflow_as_tool/tool.py +++ b/api/core/tools/workflow_as_tool/tool.py @@ -24,6 +24,7 @@ from dify_graph.model_runtime.entities.llm_entities import LLMUsage, LLMUsageMet from factories.file_factory import build_from_mapping from models import Account, Tenant from models.model import App, EndUser +from models.utils.file_input_compat import build_file_from_stored_mapping from models.workflow import Workflow logger = logging.getLogger(__name__) @@ -292,10 +293,12 @@ class WorkflowTool(Tool): if file: try: file_var_list = [ - # Workflow-as-tool can receive stored file payloads - # from older runs that still include tenant_id. - File.model_validate({key: item for key, item in f.items() if key != "tenant_id"}) + build_file_from_stored_mapping( + file_mapping=cast(Mapping[str, Any], f), + tenant_id=str(self.runtime.tenant_id), + ) for f in file + if isinstance(f, Mapping) ] for file in file_var_list: file_dict: dict[str, str | None] = { @@ -306,6 +309,8 @@ class WorkflowTool(Tool): file_dict["tool_file_id"] = resolve_file_record_id(file.reference) elif file.transfer_method == FileTransferMethod.LOCAL_FILE: file_dict["upload_file_id"] = resolve_file_record_id(file.reference) + elif file.transfer_method == FileTransferMethod.DATASOURCE_FILE: + file_dict["datasource_file_id"] = resolve_file_record_id(file.reference) elif file.transfer_method == FileTransferMethod.REMOTE_URL: file_dict["url"] = file.generate_url() diff --git a/api/core/workflow/node_runtime.py b/api/core/workflow/node_runtime.py index c6e9032f72..2e0a3c8928 100644 --- a/api/core/workflow/node_runtime.py +++ b/api/core/workflow/node_runtime.py @@ -247,7 +247,7 @@ class DifyRetrieverAttachmentLoader(RetrieverAttachmentLoaderProtocol): "type": FileType.IMAGE, "transfer_method": FileTransferMethod.LOCAL_FILE, "remote_url": upload_file.source_url, - "reference": build_file_reference(record_id=str(upload_file.id), storage_key=upload_file.key), + "reference": build_file_reference(record_id=str(upload_file.id)), "size": upload_file.size, } ) diff --git a/api/dify_graph/file/models.py b/api/dify_graph/file/models.py index 0437b481d9..570921003d 100644 --- a/api/dify_graph/file/models.py +++ b/api/dify_graph/file/models.py @@ -48,7 +48,7 @@ class FileUploadConfig(BaseModel): def _parse_reference(reference: str | None) -> tuple[str | None, str | None]: - """Best-effort parser for legacy aliases backed by the opaque file reference.""" + """Best-effort parser for record references and historical storage-key payloads.""" if not reference: return None, None @@ -91,6 +91,8 @@ class File(BaseModel): # `remote_url` attribute must not be `None`. remote_url: str | None = None # remote url # Opaque workflow-layer reference for files resolved outside ``dify_graph``. + # New payloads only carry the backing record id; historical payloads may + # still include storage_key and must remain readable. reference: str | None = None filename: str | None = None extension: str | None = Field(default=None, description="File extension, should contain dot") diff --git a/api/factories/file_factory/builders.py b/api/factories/file_factory/builders.py index 91e44c8d8f..d2c60aebb7 100644 --- a/api/factories/file_factory/builders.py +++ b/api/factories/file_factory/builders.py @@ -155,8 +155,9 @@ def _build_from_local_file( type=file_type, transfer_method=transfer_method, remote_url=row.source_url, - reference=build_file_reference(record_id=str(row.id), storage_key=row.key), + reference=build_file_reference(record_id=str(row.id)), size=row.size, + storage_key=row.key, ) @@ -201,8 +202,9 @@ def _build_from_remote_url( type=file_type, transfer_method=transfer_method, remote_url=helpers.get_signed_file_url(upload_file_id=str(upload_file_id)), - reference=build_file_reference(record_id=str(upload_file.id), storage_key=upload_file.key), + reference=build_file_reference(record_id=str(upload_file.id)), size=upload_file.size, + storage_key=upload_file.key, ) url = mapping.get("url") or mapping.get("remote_url") @@ -264,10 +266,11 @@ def _build_from_tool_file( type=file_type, transfer_method=transfer_method, remote_url=tool_file.original_url, - reference=build_file_reference(record_id=str(tool_file.id), storage_key=tool_file.file_key), + reference=build_file_reference(record_id=str(tool_file.id)), extension=extension, mime_type=tool_file.mimetype, size=tool_file.size, + storage_key=tool_file.file_key, ) @@ -305,10 +308,11 @@ def _build_from_datasource_file( type=file_type, transfer_method=FileTransferMethod.TOOL_FILE, remote_url=datasource_file.source_url, - reference=build_file_reference(record_id=str(datasource_file.id), storage_key=datasource_file.key), + reference=build_file_reference(record_id=str(datasource_file.id)), extension=extension, mime_type=datasource_file.mime_type, size=datasource_file.size, + storage_key=datasource_file.key, url=datasource_file.source_url, ) diff --git a/api/factories/file_factory/storage_keys.py b/api/factories/file_factory/storage_keys.py index 8b6a3c25d2..17edf54278 100644 --- a/api/factories/file_factory/storage_keys.py +++ b/api/factories/file_factory/storage_keys.py @@ -51,9 +51,10 @@ class StorageKeyLoader: """Hydrate storage keys by loading their backing file rows in batches. The sequence shape is preserved. Each file is updated in place with a - canonical reference and storage key loaded from an authorized database - row. Tenant scoping is enforced by this loader's context rather than by - embedding tenant identity inside graph-layer ``File`` values. + canonical record reference and storage key loaded from an authorized + database row. Tenant scoping is enforced by this loader's context + rather than by embedding tenant identity or storage paths inside + graph-layer ``File`` values. For best performance, prefer batches smaller than 1000 files. """ @@ -93,7 +94,6 @@ class StorageKeyLoader: raise ValueError(f"Upload file not found for id: {model_id}") file.reference = build_file_reference( record_id=str(upload_file_row.id), - storage_key=upload_file_row.key, ) file.storage_key = upload_file_row.key elif file.transfer_method == FileTransferMethod.TOOL_FILE: @@ -102,6 +102,5 @@ class StorageKeyLoader: raise ValueError(f"Tool file not found for id: {model_id}") file.reference = build_file_reference( record_id=str(tool_file_row.id), - storage_key=tool_file_row.file_key, ) file.storage_key = tool_file_row.file_key diff --git a/api/models/utils/file_input_compat.py b/api/models/utils/file_input_compat.py index 2eb308b837..2c73dd1558 100644 --- a/api/models/utils/file_input_compat.py +++ b/api/models/utils/file_input_compat.py @@ -43,6 +43,50 @@ def resolve_file_mapping_tenant_id( return tenant_resolver() +def build_file_from_stored_mapping( + *, + file_mapping: Mapping[str, Any], + tenant_id: str, +) -> File: + """ + Canonicalize a persisted file payload against the current tenant context. + + Stored JSON rows can outlive file schema changes, so rebuild storage-backed + files through the workflow factory instead of trusting serialized metadata. + Pure external ``REMOTE_URL`` payloads without a backing upload row are + passed through because there is no server-owned record to rebind. + """ + + # NOTE: It's not the best way to implement this, but it's the only way to avoid circular import for now. + from factories import file_factory + + mapping = dict(file_mapping) + mapping.pop("tenant_id", None) + record_id = resolve_file_record_id(mapping) + transfer_method = FileTransferMethod.value_of(mapping["transfer_method"]) + + if transfer_method == FileTransferMethod.TOOL_FILE and record_id: + mapping["tool_file_id"] = record_id + elif transfer_method in [FileTransferMethod.LOCAL_FILE, FileTransferMethod.REMOTE_URL] and record_id: + mapping["upload_file_id"] = record_id + elif transfer_method == FileTransferMethod.DATASOURCE_FILE and record_id: + mapping["datasource_file_id"] = record_id + + if transfer_method == FileTransferMethod.REMOTE_URL and record_id is None: + remote_url = mapping.get("remote_url") + if not isinstance(remote_url, str) or not remote_url: + url = mapping.get("url") + if isinstance(url, str) and url: + mapping["remote_url"] = url + return File.model_validate(mapping) + + return file_factory.build_from_mapping( + mapping=mapping, + tenant_id=tenant_id, + access_controller=_get_file_access_controller(), + ) + + def build_file_from_input_mapping( *, file_mapping: Mapping[str, Any], @@ -57,20 +101,16 @@ def build_file_from_input_mapping( boundary, instead of pushing tenant data back into `dify_graph.file.File`. """ - # NOTE: It's not the best way to implement this, but it's the only way to avoid circular import for now. - from factories import file_factory + transfer_method = FileTransferMethod.value_of(file_mapping["transfer_method"]) + record_id = resolve_file_record_id(file_mapping) + if transfer_method == FileTransferMethod.REMOTE_URL and record_id is None: + return build_file_from_stored_mapping( + file_mapping=file_mapping, + tenant_id="", + ) - mapping = dict(file_mapping) - record_id = resolve_file_record_id(mapping) - - if mapping["transfer_method"] == FileTransferMethod.TOOL_FILE: - mapping["tool_file_id"] = record_id - elif mapping["transfer_method"] in [FileTransferMethod.LOCAL_FILE, FileTransferMethod.REMOTE_URL]: - mapping["upload_file_id"] = record_id - - tenant_id = resolve_file_mapping_tenant_id(file_mapping=mapping, tenant_resolver=tenant_resolver) - return file_factory.build_from_mapping( - mapping=mapping, + tenant_id = resolve_file_mapping_tenant_id(file_mapping=file_mapping, tenant_resolver=tenant_resolver) + return build_file_from_stored_mapping( + file_mapping=file_mapping, tenant_id=tenant_id, - access_controller=_get_file_access_controller(), ) diff --git a/api/models/workflow.py b/api/models/workflow.py index e0f4a1df88..889542a612 100644 --- a/api/models/workflow.py +++ b/api/models/workflow.py @@ -58,6 +58,7 @@ from .base import Base, DefaultFieldsMixin, TypeBase from .engine import db from .enums import CreatorUserRole, DraftVariableType, ExecutionOffLoadType, WorkflowRunTriggeredFrom from .types import EnumText, LongText, StringUUID +from .utils.file_input_compat import build_file_from_stored_mapping logger = logging.getLogger(__name__) @@ -65,6 +66,15 @@ SerializedWorkflowValue = dict[str, Any] SerializedWorkflowVariables = dict[str, SerializedWorkflowValue] +def _resolve_workflow_app_tenant_id(app_id: str) -> str: + from .model import App + + tenant_id = db.session.scalar(select(App.tenant_id).where(App.id == app_id)) + if not tenant_id: + raise ValueError(f"Unable to resolve tenant_id for app {app_id}") + return tenant_id + + class WorkflowContentDict(TypedDict): graph: Mapping[str, Any] features: dict[str, Any] @@ -1560,10 +1570,9 @@ class WorkflowDraftVariable(Base): def _loads_value(self) -> Segment: value = json.loads(self.value) - return self.build_segment_with_type(self.value_type, value) + return self.build_segment_from_serialized_value(self.value_type, value) - @staticmethod - def rebuild_file_types(value: Any): + def _rebuild_file_types(self, value: Any): # NOTE(QuantumGhost): Temporary workaround for structured data handling. # By this point, `output` has been converted to dict by # `WorkflowEntry.handle_special_values`, so we need to @@ -1577,8 +1586,59 @@ class WorkflowDraftVariable(Base): if isinstance(value, dict): if not maybe_file_object(value): return cast(Any, value) - # Older serialized File payloads may still carry the removed - # graph-level tenant_id field. Strip it at this deserialization edge. + tenant_id = _resolve_workflow_app_tenant_id(self.app_id) + return build_file_from_stored_mapping( + file_mapping=cast(dict[str, Any], value), + tenant_id=tenant_id, + ) + elif isinstance(value, list) and value: + value_list = cast(list[Any], value) + first: Any = value_list[0] + if not maybe_file_object(first): + return cast(Any, value) + tenant_id = _resolve_workflow_app_tenant_id(self.app_id) + file_list: list[File] = [] + for item in value_list: + file_list.append( + build_file_from_stored_mapping( + file_mapping=cast(dict[str, Any], item), + tenant_id=tenant_id, + ) + ) + return cast(Any, file_list) + else: + return cast(Any, value) + + def build_segment_from_serialized_value(self, segment_type: SegmentType, value: Any) -> Segment: + # Persisted draft variable rows may contain historical file payloads. + # Rebuild them through the file factory so tenant ownership, signed URLs, + # and storage-backed metadata come from canonical records instead of the + # serialized JSON blob. + if segment_type == SegmentType.FILE: + if isinstance(value, File): + return build_segment_with_type(segment_type, value) + elif isinstance(value, dict): + file = self._rebuild_file_types(value) + return build_segment_with_type(segment_type, file) + else: + raise TypeMismatchError(f"expected dict or File for FileSegment, got {type(value)}") + if segment_type == SegmentType.ARRAY_FILE: + if not isinstance(value, list): + raise TypeMismatchError(f"expected list for ArrayFileSegment, got {type(value)}") + file_list = self._rebuild_file_types(value) + return build_segment_with_type(segment_type=segment_type, value=file_list) + + return build_segment_with_type(segment_type=segment_type, value=value) + + @staticmethod + def rebuild_file_types(value: Any): + # Keep the class-level fallback for callers that only need lightweight + # structural reconstruction. Persisted draft-variable payloads should go + # through `build_segment_from_serialized_value()` so file metadata is + # rebuilt from canonical storage records. + if isinstance(value, dict): + if not maybe_file_object(value): + return cast(Any, value) normalized_file = dict(value) normalized_file.pop("tenant_id", None) return File.model_validate(normalized_file) @@ -1589,8 +1649,6 @@ class WorkflowDraftVariable(Base): return cast(Any, value) file_list: list[File] = [] for item in value_list: - # Keep the compatibility handling local to the payload rebuild - # path instead of weakening the File model itself. normalized_file = dict(cast(dict[str, Any], item)) normalized_file.pop("tenant_id", None) file_list.append(File.model_validate(normalized_file)) diff --git a/api/services/workflow_draft_variable_service.py b/api/services/workflow_draft_variable_service.py index 7d34cde645..4aea93204f 100644 --- a/api/services/workflow_draft_variable_service.py +++ b/api/services/workflow_draft_variable_service.py @@ -43,6 +43,7 @@ from libs.datetime_utils import naive_utc_now from libs.uuid_utils import uuidv7 from models import Account, App, Conversation from models.enums import DraftVariableType +from models.utils.file_input_compat import build_file_from_stored_mapping from models.workflow import Workflow, WorkflowDraftVariable, WorkflowDraftVariableFile, is_system_variable_editable from repositories.factory import DifyAPIRepositoryFactory from services.file_service import FileService @@ -185,7 +186,7 @@ class DraftVarLoader(VariableLoader): return (draft_var.node_id, draft_var.name), variable deserialized = json.loads(content) - segment = WorkflowDraftVariable.build_segment_with_type(variable_file.value_type, deserialized) + segment = draft_var.build_segment_from_serialized_value(variable_file.value_type, deserialized) variable = segment_to_variable( segment=segment, selector=draft_var.get_selector(), @@ -849,6 +850,12 @@ class DraftVariableSaver: self._user = user self._enclosing_node_id = enclosing_node_id + def _resolve_app_tenant_id(self) -> str: + tenant_id = self._session.scalar(select(App.tenant_id).where(App.id == self._app_id)) + if not tenant_id: + raise ValueError(f"Unable to resolve tenant_id for app {self._app_id}") + return tenant_id + def _create_dummy_output_variable(self): return WorkflowDraftVariable.new_node_variable( app_id=self._app_id, @@ -906,11 +913,13 @@ class DraftVariableSaver: if node_id == SYSTEM_VARIABLE_NODE_ID: if name == SystemVariableKey.FILES: # Here we know the type of variable must be `array[file]`, we - # just build files from the value. + # just rebuild files from the serialized payload. + tenant_id = self._resolve_app_tenant_id() files = [ - # Draft variable payloads may predate the removal of - # File.tenant_id, so normalize the raw dict first. - File.model_validate({key: item for key, item in v.items() if key != "tenant_id"}) + build_file_from_stored_mapping( + file_mapping=v, + tenant_id=tenant_id, + ) for v in value ] if files: diff --git a/api/tests/unit_tests/core/app/workflow/test_file_runtime.py b/api/tests/unit_tests/core/app/workflow/test_file_runtime.py index 4b87fece53..92c69b4383 100644 --- a/api/tests/unit_tests/core/app/workflow/test_file_runtime.py +++ b/api/tests/unit_tests/core/app/workflow/test_file_runtime.py @@ -168,25 +168,49 @@ def test_load_file_bytes_returns_bytes_and_rejects_non_bytes(monkeypatch: pytest runtime = _build_runtime() file = _build_file( transfer_method=FileTransferMethod.LOCAL_FILE, - reference=build_file_reference(record_id="upload-file-id", storage_key="storage-key"), + reference=build_file_reference(record_id="upload-file-id"), ) + session = MagicMock() + session.get.return_value = SimpleNamespace(key="canonical-storage-key") + + class _SessionContext: + def __enter__(self): + return session + + def __exit__(self, exc_type, exc, tb): + return False + + monkeypatch.setattr(file_runtime.session_factory, "create_session", lambda: _SessionContext()) monkeypatch.setattr(file_runtime.storage, "load", lambda *args, **kwargs: b"image-bytes") assert runtime.load_file_bytes(file=file) == b"image-bytes" + session.get.assert_called_with(UploadFile, "upload-file-id") monkeypatch.setattr(file_runtime.storage, "load", lambda *args, **kwargs: "not-bytes") with pytest.raises(ValueError, match="is not a bytes object"): runtime.load_file_bytes(file=file) -def test_resolve_storage_key_prefers_encoded_reference() -> None: +def test_resolve_storage_key_ignores_encoded_reference_when_unscoped(monkeypatch: pytest.MonkeyPatch) -> None: runtime = _build_runtime() file = _build_file( transfer_method=FileTransferMethod.LOCAL_FILE, - reference=build_file_reference(record_id="upload-file-id", storage_key="storage-key"), + reference=build_file_reference(record_id="upload-file-id", storage_key="tampered-storage-key"), ) + session = MagicMock() + session.get.return_value = SimpleNamespace(key="canonical-storage-key") - assert runtime._resolve_storage_key(file=file) == "storage-key" + class _SessionContext: + def __enter__(self): + return session + + def __exit__(self, exc_type, exc, tb): + return False + + monkeypatch.setattr(file_runtime.session_factory, "create_session", lambda: _SessionContext()) + + assert runtime._resolve_storage_key(file=file) == "canonical-storage-key" + session.get.assert_called_once_with(UploadFile, "upload-file-id") def test_resolve_storage_key_uses_canonical_record_when_scope_is_bound(monkeypatch: pytest.MonkeyPatch) -> None: diff --git a/api/tests/unit_tests/core/datasource/test_datasource_manager.py b/api/tests/unit_tests/core/datasource/test_datasource_manager.py index f0568c68e9..e01be9ed28 100644 --- a/api/tests/unit_tests/core/datasource/test_datasource_manager.py +++ b/api/tests/unit_tests/core/datasource/test_datasource_manager.py @@ -7,6 +7,7 @@ from contexts.wrapper import RecyclableContextVar from core.datasource.datasource_manager import DatasourceManager from core.datasource.entities.datasource_entities import DatasourceMessage, DatasourceProviderType from core.datasource.errors import DatasourceProviderNotFoundError +from core.workflow.file_reference import parse_file_reference from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus from dify_graph.file import File from dify_graph.file.enums import FileTransferMethod, FileType @@ -660,6 +661,8 @@ def test_get_upload_file_by_id_builds_file(mocker): f = DatasourceManager.get_upload_file_by_id(file_id="fid", tenant_id="t1") assert f.related_id == "fid" assert f.extension == ".txt" + assert parse_file_reference(f.reference).storage_key is None + assert f.storage_key == "k" def test_get_upload_file_by_id_raises_when_missing(mocker): diff --git a/api/tests/unit_tests/core/tools/workflow_as_tool/test_tool.py b/api/tests/unit_tests/core/tools/workflow_as_tool/test_tool.py index cc00f79698..ff401ab358 100644 --- a/api/tests/unit_tests/core/tools/workflow_as_tool/test_tool.py +++ b/api/tests/unit_tests/core/tools/workflow_as_tool/test_tool.py @@ -24,7 +24,7 @@ from core.tools.entities.tool_entities import ( ) from core.tools.errors import ToolInvokeError from core.tools.workflow_as_tool.tool import WorkflowTool -from dify_graph.file import FILE_MODEL_IDENTITY +from dify_graph.file import FILE_MODEL_IDENTITY, FileTransferMethod, FileType class StubScalars: @@ -439,6 +439,32 @@ def _setup_transform_args_tool(monkeypatch: pytest.MonkeyPatch) -> WorkflowTool: def test_transform_args_valid_files(monkeypatch: pytest.MonkeyPatch): """Transform args into parameters and files payloads.""" tool = _setup_transform_args_tool(monkeypatch) + build_file_from_stored_mapping = MagicMock( + side_effect=[ + SimpleNamespace( + transfer_method=FileTransferMethod.TOOL_FILE, + type=FileType.IMAGE, + reference="tool-1", + generate_url=lambda: None, + ), + SimpleNamespace( + transfer_method=FileTransferMethod.LOCAL_FILE, + type=FileType.DOCUMENT, + reference="upload-1", + generate_url=lambda: None, + ), + SimpleNamespace( + transfer_method=FileTransferMethod.REMOTE_URL, + type=FileType.DOCUMENT, + reference=None, + generate_url=lambda: "https://example.com/a.pdf", + ), + ] + ) + monkeypatch.setattr( + "core.tools.workflow_as_tool.tool.build_file_from_stored_mapping", + build_file_from_stored_mapping, + ) params, files = tool._transform_args( { @@ -470,6 +496,8 @@ def test_transform_args_valid_files(monkeypatch: pytest.MonkeyPatch): assert any(file_item.get("tool_file_id") == "tool-1" for file_item in files) assert any(file_item.get("upload_file_id") == "upload-1" for file_item in files) assert any(file_item.get("url") == "https://example.com/a.pdf" for file_item in files) + assert build_file_from_stored_mapping.call_count == 3 + assert all(call.kwargs["tenant_id"] == "test_tool" for call in build_file_from_stored_mapping.call_args_list) def test_transform_args_invalid_files(monkeypatch: pytest.MonkeyPatch): diff --git a/api/tests/unit_tests/core/workflow/test_node_runtime.py b/api/tests/unit_tests/core/workflow/test_node_runtime.py index 5086c49cb2..5c7c7dc027 100644 --- a/api/tests/unit_tests/core/workflow/test_node_runtime.py +++ b/api/tests/unit_tests/core/workflow/test_node_runtime.py @@ -263,7 +263,7 @@ def test_dify_retriever_attachment_loader_builds_graph_files(monkeypatch: pytest assert mapping["id"] == "upload-file-id" assert mapping["transfer_method"] == FileTransferMethod.LOCAL_FILE assert mapping["type"] == FileType.IMAGE - assert parse_file_reference(mapping["reference"]).storage_key == "storage-key" + assert parse_file_reference(mapping["reference"]).storage_key is None def test_dify_tool_file_manager_resolves_conversation_id_for_tool_files(monkeypatch: pytest.MonkeyPatch) -> None: diff --git a/api/tests/unit_tests/factories/test_build_from_mapping.py b/api/tests/unit_tests/factories/test_build_from_mapping.py index c214c1c77e..2a759cca06 100644 --- a/api/tests/unit_tests/factories/test_build_from_mapping.py +++ b/api/tests/unit_tests/factories/test_build_from_mapping.py @@ -6,7 +6,7 @@ from httpx import Response from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom from core.app.file_access import DatabaseFileAccessController, FileAccessScope, bind_file_access_scope -from core.workflow.file_reference import build_file_reference, resolve_file_record_id +from core.workflow.file_reference import build_file_reference, parse_file_reference, resolve_file_record_id from dify_graph.file import File, FileTransferMethod, FileType, FileUploadConfig from factories.file_factory.builders import build_from_mapping as _build_from_mapping from models import ToolFile, UploadFile @@ -109,6 +109,8 @@ def test_build_from_mapping_backward_compatibility(mock_upload_file): assert file.transfer_method == FileTransferMethod.LOCAL_FILE assert file.type == FileType.IMAGE assert resolve_file_record_id(file.reference) == TEST_UPLOAD_FILE_ID + assert parse_file_reference(file.reference).storage_key is None + assert file.storage_key == "test_key" def test_build_from_mapping_accepts_opaque_reference_for_local_file(mock_upload_file): @@ -139,6 +141,8 @@ def test_build_from_mapping_accepts_opaque_related_id_for_tool_file(mock_tool_fi assert file.transfer_method == FileTransferMethod.TOOL_FILE assert file.type == FileType.DOCUMENT assert resolve_file_record_id(file.reference) == TEST_TOOL_FILE_ID + assert parse_file_reference(file.reference).storage_key is None + assert file.storage_key == "tool_file.pdf" @pytest.mark.parametrize( diff --git a/api/tests/unit_tests/models/test_model.py b/api/tests/unit_tests/models/test_model.py index d48f3ef3ae..89cbc8a28f 100644 --- a/api/tests/unit_tests/models/test_model.py +++ b/api/tests/unit_tests/models/test_model.py @@ -211,3 +211,25 @@ def test_inputs_prefer_serialized_tenant_id_when_present( "tenant_id": "tenant-from-payload", "upload_file_id": "upload-1", } + + +@pytest.mark.parametrize("owner_cls", [Conversation, Message]) +def test_inputs_restore_external_remote_url_file_mappings(owner_cls: type[Conversation] | type[Message]) -> None: + owner = owner_cls(app_id="app-1") + owner.inputs = { + "file": { + "dify_model_identity": FILE_MODEL_IDENTITY, + "transfer_method": FileTransferMethod.REMOTE_URL, + "type": "document", + "url": "https://example.com/report.pdf", + "filename": "report.pdf", + "extension": ".pdf", + "mime_type": "application/pdf", + "size": 1, + } + } + + restored_file = owner.inputs["file"] + + assert restored_file.transfer_method == FileTransferMethod.REMOTE_URL + assert restored_file.remote_url == "https://example.com/report.pdf" diff --git a/api/tests/unit_tests/models/test_workflow.py b/api/tests/unit_tests/models/test_workflow.py index ab6b4dc3c3..93487c9252 100644 --- a/api/tests/unit_tests/models/test_workflow.py +++ b/api/tests/unit_tests/models/test_workflow.py @@ -5,6 +5,7 @@ from uuid import uuid4 from constants import HIDDEN_VALUE from core.helper import encrypter +from core.workflow.file_reference import build_file_reference from dify_graph.file.enums import FileTransferMethod, FileType from dify_graph.file.models import File from dify_graph.variables import FloatVariable, IntegerVariable, SecretVariable, StringVariable @@ -324,6 +325,43 @@ class TestWorkflowDraftVariableGetValue: # Verify the segments have the same type and the important fields match assert file_segment.value_type == retrieved_segment.value_type + def test_file_variable_rebuilds_storage_backed_payloads_with_app_tenant(self): + persisted_file = File( + id="test_file_id", + type=FileType.DOCUMENT, + transfer_method=FileTransferMethod.LOCAL_FILE, + reference=build_file_reference(record_id="upload-1", storage_key="legacy-storage-key"), + filename="test.txt", + extension=".txt", + mime_type="text/plain", + size=12, + ) + rebuilt_file = File( + id="test_file_id", + type=FileType.DOCUMENT, + transfer_method=FileTransferMethod.LOCAL_FILE, + reference=build_file_reference(record_id="upload-1"), + filename="test.txt", + extension=".txt", + mime_type="text/plain", + size=12, + storage_key="canonical-storage-key", + ) + draft_var = WorkflowDraftVariable() + draft_var.app_id = "app-1" + draft_var.set_value(build_segment(persisted_file)) + draft_var._WorkflowDraftVariable__value = None + + with ( + mock.patch("models.workflow._resolve_workflow_app_tenant_id", return_value="tenant-1"), + mock.patch("models.workflow.build_file_from_stored_mapping", return_value=rebuilt_file) as rebuild_file, + ): + retrieved_segment = draft_var.get_value() + + assert retrieved_segment.value == rebuilt_file + rebuild_file.assert_called_once() + assert rebuild_file.call_args.kwargs["tenant_id"] == "tenant-1" + def test_get_and_set_value(self): draft_var = WorkflowDraftVariable() int_var = IntegerSegment(value=1) diff --git a/api/tests/unit_tests/services/workflow/test_draft_var_loader_simple.py b/api/tests/unit_tests/services/workflow/test_draft_var_loader_simple.py index f3391d6380..60774669a6 100644 --- a/api/tests/unit_tests/services/workflow/test_draft_var_loader_simple.py +++ b/api/tests/unit_tests/services/workflow/test_draft_var_loader_simple.py @@ -6,6 +6,9 @@ from unittest.mock import Mock, patch import pytest from sqlalchemy import Engine +from core.workflow.file_reference import build_file_reference +from dify_graph.file.enums import FileTransferMethod, FileType +from dify_graph.file.models import File from dify_graph.variables.segments import ObjectSegment, StringSegment from dify_graph.variables.types import SegmentType from models.model import UploadFile @@ -54,25 +57,18 @@ class TestDraftVarLoaderSimple: with patch("services.workflow_draft_variable_service.storage") as mock_storage: mock_storage.load.return_value = test_content.encode() - with patch("factories.variable_factory.segment_to_variable") as mock_segment_to_variable: - mock_variable = Mock() - mock_variable.id = "draft-var-id" - mock_variable.name = "test_variable" - mock_variable.value = StringSegment(value=test_content) - mock_segment_to_variable.return_value = mock_variable + # Execute the method + selector_tuple, variable = draft_var_loader._load_offloaded_variable(draft_var) - # Execute the method - selector_tuple, variable = draft_var_loader._load_offloaded_variable(draft_var) + # Verify results + assert selector_tuple == ("test-node-id", "test_variable") + assert variable.id == "draft-var-id" + assert variable.name == "test_variable" + assert variable.description == "test description" + assert variable.value == test_content - # Verify results - assert selector_tuple == ("test-node-id", "test_variable") - assert variable.id == "draft-var-id" - assert variable.name == "test_variable" - assert variable.description == "test description" - assert variable.value == test_content - - # Verify storage was called correctly - mock_storage.load.assert_called_once_with("storage/key/test.txt") + # Verify storage was called correctly + mock_storage.load.assert_called_once_with("storage/key/test.txt") def test_load_offloaded_variable_object_type_unit(self, draft_var_loader): """Test _load_offloaded_variable with object type - isolated unit test.""" @@ -97,31 +93,22 @@ class TestDraftVarLoaderSimple: with patch("services.workflow_draft_variable_service.storage") as mock_storage: mock_storage.load.return_value = test_json_content.encode() + mock_segment = ObjectSegment(value=test_object) + draft_var.build_segment_from_serialized_value.return_value = mock_segment - with patch.object(WorkflowDraftVariable, "build_segment_with_type") as mock_build_segment: - mock_segment = ObjectSegment(value=test_object) - mock_build_segment.return_value = mock_segment + # Execute the method + selector_tuple, variable = draft_var_loader._load_offloaded_variable(draft_var) - with patch("factories.variable_factory.segment_to_variable") as mock_segment_to_variable: - mock_variable = Mock() - mock_variable.id = "draft-var-id" - mock_variable.name = "test_object" - mock_variable.value = mock_segment - mock_segment_to_variable.return_value = mock_variable + # Verify results + assert selector_tuple == ("test-node-id", "test_object") + assert variable.id == "draft-var-id" + assert variable.name == "test_object" + assert variable.description == "test description" + assert variable.value == test_object - # Execute the method - selector_tuple, variable = draft_var_loader._load_offloaded_variable(draft_var) - - # Verify results - assert selector_tuple == ("test-node-id", "test_object") - assert variable.id == "draft-var-id" - assert variable.name == "test_object" - assert variable.description == "test description" - assert variable.value == test_object - - # Verify method calls - mock_storage.load.assert_called_once_with("storage/key/test.json") - mock_build_segment.assert_called_once_with(SegmentType.OBJECT, test_object) + # Verify method calls + mock_storage.load.assert_called_once_with("storage/key/test.json") + draft_var.build_segment_from_serialized_value.assert_called_once_with(SegmentType.OBJECT, test_object) def test_load_offloaded_variable_missing_variable_file_unit(self, draft_var_loader): """Test that assertion error is raised when variable_file is None.""" @@ -176,32 +163,23 @@ class TestDraftVarLoaderSimple: with patch("services.workflow_draft_variable_service.storage") as mock_storage: mock_storage.load.return_value = test_json_content.encode() + from dify_graph.variables.segments import FloatSegment - with patch.object(WorkflowDraftVariable, "build_segment_with_type") as mock_build_segment: - from dify_graph.variables.segments import FloatSegment + mock_segment = FloatSegment(value=test_number) + draft_var.build_segment_from_serialized_value.return_value = mock_segment - mock_segment = FloatSegment(value=test_number) - mock_build_segment.return_value = mock_segment + # Execute the method + selector_tuple, variable = draft_var_loader._load_offloaded_variable(draft_var) - with patch("factories.variable_factory.segment_to_variable") as mock_segment_to_variable: - mock_variable = Mock() - mock_variable.id = "draft-var-id" - mock_variable.name = "test_number" - mock_variable.value = mock_segment - mock_segment_to_variable.return_value = mock_variable + # Verify results + assert selector_tuple == ("test-node-id", "test_number") + assert variable.id == "draft-var-id" + assert variable.name == "test_number" + assert variable.description == "test number description" - # Execute the method - selector_tuple, variable = draft_var_loader._load_offloaded_variable(draft_var) - - # Verify results - assert selector_tuple == ("test-node-id", "test_number") - assert variable.id == "draft-var-id" - assert variable.name == "test_number" - assert variable.description == "test number description" - - # Verify method calls - mock_storage.load.assert_called_once_with("storage/key/test_number.json") - mock_build_segment.assert_called_once_with(SegmentType.NUMBER, test_number) + # Verify method calls + mock_storage.load.assert_called_once_with("storage/key/test_number.json") + draft_var.build_segment_from_serialized_value.assert_called_once_with(SegmentType.NUMBER, test_number) def test_load_offloaded_variable_array_type_unit(self, draft_var_loader): """Test _load_offloaded_variable with array type - isolated unit test.""" @@ -226,32 +204,83 @@ class TestDraftVarLoaderSimple: with patch("services.workflow_draft_variable_service.storage") as mock_storage: mock_storage.load.return_value = test_json_content.encode() + from dify_graph.variables.segments import ArrayAnySegment - with patch.object(WorkflowDraftVariable, "build_segment_with_type") as mock_build_segment: - from dify_graph.variables.segments import ArrayAnySegment + mock_segment = ArrayAnySegment(value=test_array) + draft_var.build_segment_from_serialized_value.return_value = mock_segment - mock_segment = ArrayAnySegment(value=test_array) - mock_build_segment.return_value = mock_segment + # Execute the method + selector_tuple, variable = draft_var_loader._load_offloaded_variable(draft_var) - with patch("factories.variable_factory.segment_to_variable") as mock_segment_to_variable: - mock_variable = Mock() - mock_variable.id = "draft-var-id" - mock_variable.name = "test_array" - mock_variable.value = mock_segment - mock_segment_to_variable.return_value = mock_variable + # Verify results + assert selector_tuple == ("test-node-id", "test_array") + assert variable.id == "draft-var-id" + assert variable.name == "test_array" + assert variable.description == "test array description" - # Execute the method - selector_tuple, variable = draft_var_loader._load_offloaded_variable(draft_var) + # Verify method calls + mock_storage.load.assert_called_once_with("storage/key/test_array.json") + draft_var.build_segment_from_serialized_value.assert_called_once_with(SegmentType.ARRAY_ANY, test_array) - # Verify results - assert selector_tuple == ("test-node-id", "test_array") - assert variable.id == "draft-var-id" - assert variable.name == "test_array" - assert variable.description == "test array description" + def test_load_offloaded_variable_file_type_rebuilds_storage_backed_payload(self, draft_var_loader): + upload_file = Mock(spec=UploadFile) + upload_file.key = "storage/key/test_file.json" - # Verify method calls - mock_storage.load.assert_called_once_with("storage/key/test_array.json") - mock_build_segment.assert_called_once_with(SegmentType.ARRAY_ANY, test_array) + variable_file = Mock(spec=WorkflowDraftVariableFile) + variable_file.value_type = SegmentType.FILE + variable_file.upload_file = upload_file + + draft_var = WorkflowDraftVariable() + draft_var.id = "draft-var-id" + draft_var.app_id = "app-1" + draft_var.node_id = "test-node-id" + draft_var.name = "test_file" + draft_var.description = "test file description" + draft_var._set_selector(["test-node-id", "test_file"]) + draft_var.variable_file = variable_file + + persisted_file = File( + id="file-1", + type=FileType.DOCUMENT, + transfer_method=FileTransferMethod.LOCAL_FILE, + reference=build_file_reference(record_id="upload-1", storage_key="legacy-storage-key"), + filename="test.txt", + extension=".txt", + mime_type="text/plain", + size=12, + ) + rebuilt_file = File( + id="file-1", + type=FileType.DOCUMENT, + transfer_method=FileTransferMethod.LOCAL_FILE, + reference=build_file_reference(record_id="upload-1"), + filename="test.txt", + extension=".txt", + mime_type="text/plain", + size=12, + storage_key="canonical-storage-key", + ) + + raw_file = { + **persisted_file.model_dump(mode="json"), + "tenant_id": "legacy-tenant", + } + + with ( + patch("services.workflow_draft_variable_service.storage") as mock_storage, + patch("models.workflow._resolve_workflow_app_tenant_id", return_value="tenant-1"), + patch("models.workflow.build_file_from_stored_mapping", return_value=rebuilt_file) as rebuild_file, + ): + mock_storage.load.return_value = json.dumps(raw_file).encode() + + selector_tuple, variable = draft_var_loader._load_offloaded_variable(draft_var) + + assert selector_tuple == ("test-node-id", "test_file") + assert variable.id == "draft-var-id" + assert variable.name == "test_file" + assert variable.description == "test file description" + assert variable.value == rebuilt_file + rebuild_file.assert_called_once_with(file_mapping=raw_file, tenant_id="tenant-1") def test_load_variables_with_offloaded_variables_unit(self, draft_var_loader): """Test load_variables method with mix of regular and offloaded variables.""" diff --git a/api/tests/unit_tests/services/workflow/test_workflow_draft_variable_service.py b/api/tests/unit_tests/services/workflow/test_workflow_draft_variable_service.py index 65c2ab5c2d..930dc0d823 100644 --- a/api/tests/unit_tests/services/workflow/test_workflow_draft_variable_service.py +++ b/api/tests/unit_tests/services/workflow/test_workflow_draft_variable_service.py @@ -14,6 +14,8 @@ from core.workflow.variable_prefixes import ( SYSTEM_VARIABLE_NODE_ID, ) from dify_graph.enums import BuiltinNodeTypes +from dify_graph.file.enums import FileTransferMethod, FileType +from dify_graph.file.models import File from dify_graph.variables.segments import StringSegment from dify_graph.variables.types import SegmentType from libs.uuid_utils import uuidv7 @@ -131,6 +133,47 @@ class TestDraftVariableSaver: assert node_id == c.expected_node_id, fail_msg assert name == c.expected_name, fail_msg + def test_build_variables_from_start_mapping_rebuilds_system_files(self): + mock_session = MagicMock(spec=Session) + mock_user = MagicMock(spec=Account) + mock_user.id = str(uuid.uuid4()) + saver = DraftVariableSaver( + session=mock_session, + app_id=self._get_test_app_id(), + node_id="start", + node_type=BuiltinNodeTypes.START, + node_execution_id="exec-1", + user=mock_user, + ) + rebuilt_file = File( + id="file-1", + type=FileType.DOCUMENT, + transfer_method=FileTransferMethod.LOCAL_FILE, + reference="upload-1", + filename="test.txt", + extension=".txt", + mime_type="text/plain", + size=12, + storage_key="canonical-storage-key", + ) + raw_file = { + **rebuilt_file.model_dump(mode="json"), + "tenant_id": "legacy-tenant", + } + + with ( + patch.object(saver, "_resolve_app_tenant_id", return_value="tenant-1"), + patch( + "services.workflow_draft_variable_service.build_file_from_stored_mapping", + return_value=rebuilt_file, + ) as rebuild_file, + ): + draft_vars = saver._build_variables_from_start_mapping({"sys.files": [raw_file]}) + + sys_var = draft_vars[0] + assert sys_var.get_value().value[0] == rebuilt_file + rebuild_file.assert_called_once_with(file_mapping=raw_file, tenant_id="tenant-1") + @pytest.fixture def mock_session(self): """Mock SQLAlchemy session."""