From c56d650e8368261ccf85512d2ac02fc45c8aa806 Mon Sep 17 00:00:00 2001 From: WH-2099 Date: Wed, 11 Feb 2026 14:58:39 +0800 Subject: [PATCH] refactor(workflow-file): phase1 migrate workflow file deps with compatibility bridge Implement phase 1 of the file module migration by moving workflow-facing file primitives to core.workflow.file while keeping core.file as a temporary compatibility layer. What this commit changes - Add core.workflow.file package (constants/enums/models/helpers/file_manager/tool_file_parser). - Add protocol-based runtime binding in core.workflow.file.runtime and core.workflow.file.protocols. - Add application adapter core.app.workflow.file_runtime and bind runtime in extensions.ext_storage.init_app. - Bind runtime in tests via tests/conftest.py. - Migrate workflow-only imports from core.file.* to core.workflow.file.* across workflow runtime/nodes/entry/encoder and workflow node factory. - Update workflow unit tests to patch/import the new workflow file namespace. - Remove workflow-external-imports ignore_imports entries related to core.file from .importlinter. Compatibility guarantee for phase split - Keep core.file import path available in this phase by replacing core/file/*.py with forwarding bridge modules that re-export core.workflow.file symbols. - Preserve runtime behavior and isinstance(File) identity consistency while non-workflow modules are still on legacy import paths. Notes - This commit intentionally does not remove core.file. Full repository replacement and bridge removal are handled in phase 2. --- api/.importlinter | 29 --- api/core/app/workflow/file_runtime.py | 45 ++++ api/core/app/workflow/node_factory.py | 2 +- api/core/file/__init__.py | 16 +- api/core/file/constants.py | 15 +- api/core/file/enums.py | 64 +----- api/core/file/file_manager.py | 192 +----------------- api/core/file/helpers.py | 98 ++------- api/core/file/models.py | 170 +--------------- api/core/file/tool_file_parser.py | 18 +- api/core/workflow/file/__init__.py | 19 ++ api/core/workflow/file/constants.py | 11 + api/core/workflow/file/enums.py | 57 ++++++ api/core/workflow/file/file_manager.py | 143 +++++++++++++ api/core/workflow/file/helpers.py | 92 +++++++++ api/core/workflow/file/models.py | 178 ++++++++++++++++ api/core/workflow/file/protocols.py | 42 ++++ api/core/workflow/file/runtime.py | 57 ++++++ api/core/workflow/file/tool_file_parser.py | 9 + api/core/workflow/node_events/node.py | 2 +- api/core/workflow/nodes/agent/agent_node.py | 2 +- .../nodes/datasource/datasource_node.py | 4 +- .../workflow/nodes/document_extractor/node.py | 2 +- .../workflow/nodes/http_request/executor.py | 4 +- api/core/workflow/nodes/http_request/node.py | 4 +- .../knowledge_retrieval_node.py | 2 +- api/core/workflow/nodes/list_operator/node.py | 2 +- api/core/workflow/nodes/llm/file_saver.py | 2 +- api/core/workflow/nodes/llm/llm_utils.py | 2 +- api/core/workflow/nodes/llm/node.py | 4 +- api/core/workflow/nodes/loop/loop_node.py | 6 +- .../parameter_extractor_node.py | 2 +- api/core/workflow/nodes/protocols.py | 2 +- .../question_classifier_node.py | 2 +- api/core/workflow/nodes/tool/tool_node.py | 2 +- .../workflow/nodes/trigger_webhook/node.py | 2 +- api/core/workflow/runtime/variable_pool.py | 2 +- api/core/workflow/system_variable.py | 2 +- .../workflow/utils/condition/processor.py | 2 +- api/core/workflow/workflow_entry.py | 2 +- api/core/workflow/workflow_type_encoder.py | 2 +- api/extensions/ext_storage.py | 3 + api/tests/conftest.py | 8 + .../workflow/nodes/llm/test_file_saver.py | 2 +- .../core/workflow/nodes/llm/test_node.py | 2 +- .../core/workflow/nodes/llm/test_scenarios.py | 2 +- .../nodes/test_document_extractor_node.py | 4 +- .../core/workflow/nodes/test_if_else.py | 2 +- .../core/workflow/nodes/test_list_operator.py | 2 +- .../workflow/nodes/tool/test_tool_node.py | 2 +- .../nodes/webhook/test_webhook_node.py | 2 +- .../core/workflow/test_system_variable.py | 4 +- .../test_system_variable_read_only_view.py | 2 +- .../core/workflow/test_variable_pool.py | 2 +- .../core/workflow/test_workflow_entry.py | 4 +- 55 files changed, 783 insertions(+), 571 deletions(-) create mode 100644 api/core/app/workflow/file_runtime.py create mode 100644 api/core/workflow/file/__init__.py create mode 100644 api/core/workflow/file/constants.py create mode 100644 api/core/workflow/file/enums.py create mode 100644 api/core/workflow/file/file_manager.py create mode 100644 api/core/workflow/file/helpers.py create mode 100644 api/core/workflow/file/models.py create mode 100644 api/core/workflow/file/protocols.py create mode 100644 api/core/workflow/file/runtime.py create mode 100644 api/core/workflow/file/tool_file_parser.py create mode 100644 api/tests/conftest.py diff --git a/api/.importlinter b/api/.importlinter index e30f498ba9..5fe76ce4c8 100644 --- a/api/.importlinter +++ b/api/.importlinter @@ -115,18 +115,15 @@ ignore_imports = core.workflow.nodes.datasource.datasource_node -> models.tools core.workflow.nodes.datasource.datasource_node -> services.datasource_provider_service core.workflow.nodes.document_extractor.node -> configs - core.workflow.nodes.document_extractor.node -> core.file.file_manager core.workflow.nodes.document_extractor.node -> core.helper.ssrf_proxy core.workflow.nodes.http_request.entities -> configs core.workflow.nodes.http_request.executor -> configs - core.workflow.nodes.http_request.executor -> core.file.file_manager core.workflow.nodes.http_request.node -> configs core.workflow.nodes.http_request.node -> core.tools.tool_file_manager core.workflow.nodes.iteration.iteration_node -> core.app.workflow.node_factory core.workflow.nodes.knowledge_index.knowledge_index_node -> core.rag.index_processor.index_processor_factory core.workflow.nodes.llm.llm_utils -> configs core.workflow.nodes.llm.llm_utils -> core.app.entities.app_invoke_entities - core.workflow.nodes.llm.llm_utils -> core.file.models core.workflow.nodes.llm.llm_utils -> core.model_manager core.workflow.nodes.llm.llm_utils -> core.model_runtime.model_providers.__base.large_language_model core.workflow.nodes.llm.llm_utils -> models.model @@ -162,36 +159,10 @@ ignore_imports = core.workflow.nodes.llm.llm_utils -> core.entities.provider_entities core.workflow.nodes.parameter_extractor.parameter_extractor_node -> core.model_manager core.workflow.nodes.question_classifier.question_classifier_node -> core.model_manager - core.workflow.node_events.node -> core.file - core.workflow.nodes.agent.agent_node -> core.file - core.workflow.nodes.datasource.datasource_node -> core.file - core.workflow.nodes.datasource.datasource_node -> core.file.enums - core.workflow.nodes.document_extractor.node -> core.file - core.workflow.nodes.http_request.executor -> core.file.enums - core.workflow.nodes.http_request.node -> core.file - core.workflow.nodes.http_request.node -> core.file.file_manager - core.workflow.nodes.knowledge_retrieval.knowledge_retrieval_node -> core.file.models - core.workflow.nodes.list_operator.node -> core.file - core.workflow.nodes.llm.file_saver -> core.file core.workflow.nodes.llm.llm_utils -> core.variables.segments - core.workflow.nodes.llm.node -> core.file - core.workflow.nodes.llm.node -> core.file.file_manager - core.workflow.nodes.llm.node -> core.file.models core.workflow.nodes.loop.entities -> core.variables.types - core.workflow.nodes.parameter_extractor.parameter_extractor_node -> core.file - core.workflow.nodes.protocols -> core.file - core.workflow.nodes.question_classifier.question_classifier_node -> core.file.models - core.workflow.nodes.tool.tool_node -> core.file core.workflow.nodes.tool.tool_node -> core.tools.utils.message_transformer core.workflow.nodes.tool.tool_node -> models - core.workflow.nodes.trigger_webhook.node -> core.file - core.workflow.runtime.variable_pool -> core.file - core.workflow.runtime.variable_pool -> core.file.file_manager - core.workflow.system_variable -> core.file.models - core.workflow.utils.condition.processor -> core.file - core.workflow.utils.condition.processor -> core.file.file_manager - core.workflow.workflow_entry -> core.file.models - core.workflow.workflow_type_encoder -> core.file.models core.workflow.nodes.agent.agent_node -> models.model core.workflow.nodes.code.code_node -> core.helper.code_executor.code_node_provider core.workflow.nodes.code.code_node -> core.helper.code_executor.javascript.javascript_code_provider diff --git a/api/core/app/workflow/file_runtime.py b/api/core/app/workflow/file_runtime.py new file mode 100644 index 0000000000..b697bc9287 --- /dev/null +++ b/api/core/app/workflow/file_runtime.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +from configs import dify_config +from core.helper.ssrf_proxy import ssrf_proxy +from core.tools.signature import sign_tool_file +from core.workflow.file.protocols import HttpResponseProtocol, WorkflowFileRuntimeProtocol +from core.workflow.file.runtime import set_workflow_file_runtime +from extensions.ext_storage import storage + + +class DifyWorkflowFileRuntime(WorkflowFileRuntimeProtocol): + """Production runtime wiring for ``core.workflow.file``.""" + + @property + def files_url(self) -> str: + return dify_config.FILES_URL + + @property + def internal_files_url(self) -> str | None: + return dify_config.INTERNAL_FILES_URL + + @property + def secret_key(self) -> str: + return dify_config.SECRET_KEY + + @property + def files_access_timeout(self) -> int: + return dify_config.FILES_ACCESS_TIMEOUT + + @property + def multimodal_send_format(self) -> str: + return dify_config.MULTIMODAL_SEND_FORMAT + + def http_get(self, url: str, *, follow_redirects: bool = True) -> HttpResponseProtocol: + return ssrf_proxy.get(url, follow_redirects=follow_redirects) + + def storage_load(self, path: str, *, stream: bool = False): + return storage.load(path, stream=stream) + + def sign_tool_file(self, *, tool_file_id: str, extension: str, for_external: bool = True) -> str: + return sign_tool_file(tool_file_id=tool_file_id, extension=extension, for_external=for_external) + + +def bind_dify_workflow_file_runtime() -> None: + set_workflow_file_runtime(DifyWorkflowFileRuntime()) diff --git a/api/core/app/workflow/node_factory.py b/api/core/app/workflow/node_factory.py index 18db750d28..bd58bcb6b0 100644 --- a/api/core/app/workflow/node_factory.py +++ b/api/core/app/workflow/node_factory.py @@ -4,7 +4,6 @@ from typing import TYPE_CHECKING, final from typing_extensions import override from configs import dify_config -from core.file.file_manager import file_manager from core.helper.code_executor.code_executor import CodeExecutor from core.helper.code_executor.code_node_provider import CodeNodeProvider from core.helper.ssrf_proxy import ssrf_proxy @@ -12,6 +11,7 @@ from core.rag.retrieval.dataset_retrieval import DatasetRetrieval from core.tools.tool_file_manager import ToolFileManager from core.workflow.entities.graph_config import NodeConfigDict from core.workflow.enums import NodeType +from core.workflow.file.file_manager import file_manager from core.workflow.graph.graph import NodeFactory from core.workflow.nodes.base.node import Node from core.workflow.nodes.code.code_node import CodeNode diff --git a/api/core/file/__init__.py b/api/core/file/__init__.py index 44749ebec3..f83c8df502 100644 --- a/api/core/file/__init__.py +++ b/api/core/file/__init__.py @@ -1,7 +1,17 @@ -from .constants import FILE_MODEL_IDENTITY -from .enums import ArrayFileAttribute, FileAttribute, FileBelongsTo, FileTransferMethod, FileType -from .models import ( +"""Compatibility bridge for legacy ``core.file`` imports. + +Phase 1 keeps this package as a forwarding layer while canonical file models and +helpers live under ``core.workflow.file``. +""" + +from core.workflow.file import ( + FILE_MODEL_IDENTITY, + ArrayFileAttribute, File, + FileAttribute, + FileBelongsTo, + FileTransferMethod, + FileType, FileUploadConfig, ImageConfig, ) diff --git a/api/core/file/constants.py b/api/core/file/constants.py index 0665ed7e0d..f7555f9c14 100644 --- a/api/core/file/constants.py +++ b/api/core/file/constants.py @@ -1,11 +1,8 @@ -from typing import Any +"""Compatibility bridge for legacy ``core.file.constants`` imports.""" -# TODO(QuantumGhost): Refactor variable type identification. Instead of directly -# comparing `dify_model_identity` with constants throughout the codebase, extract -# this logic into a dedicated function. This would encapsulate the implementation -# details of how different variable types are identified. -FILE_MODEL_IDENTITY = "__dify__file__" +from core.workflow.file.constants import FILE_MODEL_IDENTITY, maybe_file_object - -def maybe_file_object(o: Any) -> bool: - return isinstance(o, dict) and o.get("dify_model_identity") == FILE_MODEL_IDENTITY +__all__ = [ + "FILE_MODEL_IDENTITY", + "maybe_file_object", +] diff --git a/api/core/file/enums.py b/api/core/file/enums.py index 170eb4fc23..d27ae0b4d4 100644 --- a/api/core/file/enums.py +++ b/api/core/file/enums.py @@ -1,57 +1,11 @@ -from enum import StrEnum +"""Compatibility bridge for legacy ``core.file.enums`` imports.""" +from core.workflow.file.enums import ArrayFileAttribute, FileAttribute, FileBelongsTo, FileTransferMethod, FileType -class FileType(StrEnum): - IMAGE = "image" - DOCUMENT = "document" - AUDIO = "audio" - VIDEO = "video" - CUSTOM = "custom" - - @staticmethod - def value_of(value): - for member in FileType: - if member.value == value: - return member - raise ValueError(f"No matching enum found for value '{value}'") - - -class FileTransferMethod(StrEnum): - REMOTE_URL = "remote_url" - LOCAL_FILE = "local_file" - TOOL_FILE = "tool_file" - DATASOURCE_FILE = "datasource_file" - - @staticmethod - def value_of(value): - for member in FileTransferMethod: - if member.value == value: - return member - raise ValueError(f"No matching enum found for value '{value}'") - - -class FileBelongsTo(StrEnum): - USER = "user" - ASSISTANT = "assistant" - - @staticmethod - def value_of(value): - for member in FileBelongsTo: - if member.value == value: - return member - raise ValueError(f"No matching enum found for value '{value}'") - - -class FileAttribute(StrEnum): - TYPE = "type" - SIZE = "size" - NAME = "name" - MIME_TYPE = "mime_type" - TRANSFER_METHOD = "transfer_method" - URL = "url" - EXTENSION = "extension" - RELATED_ID = "related_id" - - -class ArrayFileAttribute(StrEnum): - LENGTH = "length" +__all__ = [ + "FileType", + "FileTransferMethod", + "FileBelongsTo", + "FileAttribute", + "ArrayFileAttribute", +] diff --git a/api/core/file/file_manager.py b/api/core/file/file_manager.py index 9945d7c1ab..c8c082ff2f 100644 --- a/api/core/file/file_manager.py +++ b/api/core/file/file_manager.py @@ -1,185 +1,11 @@ -import base64 -from collections.abc import Mapping +"""Compatibility bridge for legacy ``core.file.file_manager`` imports.""" -from configs import dify_config -from core.helper import ssrf_proxy -from core.model_runtime.entities import ( - AudioPromptMessageContent, - DocumentPromptMessageContent, - ImagePromptMessageContent, - TextPromptMessageContent, - VideoPromptMessageContent, -) -from core.model_runtime.entities.message_entities import PromptMessageContentUnionTypes -from core.tools.signature import sign_tool_file -from extensions.ext_storage import storage +from core.workflow.file.file_manager import FileManager, download, file_manager, get_attr, to_prompt_message_content -from . import helpers -from .enums import FileAttribute -from .models import File, FileTransferMethod, FileType - - -def get_attr(*, file: File, attr: FileAttribute): - match attr: - case FileAttribute.TYPE: - return file.type.value - case FileAttribute.SIZE: - return file.size - case FileAttribute.NAME: - return file.filename - case FileAttribute.MIME_TYPE: - return file.mime_type - case FileAttribute.TRANSFER_METHOD: - return file.transfer_method.value - case FileAttribute.URL: - return _to_url(file) - case FileAttribute.EXTENSION: - return file.extension - case FileAttribute.RELATED_ID: - return file.related_id - - -def to_prompt_message_content( - f: File, - /, - *, - image_detail_config: ImagePromptMessageContent.DETAIL | None = None, -) -> PromptMessageContentUnionTypes: - """ - Convert a file to prompt message content. - - This function converts files to their appropriate prompt message content types. - For supported file types (IMAGE, AUDIO, VIDEO, DOCUMENT), it creates the - corresponding message content with proper encoding/URL. - - For unsupported file types, instead of raising an error, it returns a - TextPromptMessageContent with a descriptive message about the file. - - Args: - f: The file to convert - image_detail_config: Optional detail configuration for image files - - Returns: - PromptMessageContentUnionTypes: The appropriate message content type - - Raises: - ValueError: If file extension or mime_type is missing - """ - if f.extension is None: - raise ValueError("Missing file extension") - if f.mime_type is None: - raise ValueError("Missing file mime_type") - - prompt_class_map: Mapping[FileType, type[PromptMessageContentUnionTypes]] = { - FileType.IMAGE: ImagePromptMessageContent, - FileType.AUDIO: AudioPromptMessageContent, - FileType.VIDEO: VideoPromptMessageContent, - FileType.DOCUMENT: DocumentPromptMessageContent, - } - - # Check if file type is supported - if f.type not in prompt_class_map: - # For unsupported file types, return a text description - return TextPromptMessageContent(data=f"[Unsupported file type: {f.filename} ({f.type.value})]") - - # Process supported file types - params = { - "base64_data": _get_encoded_string(f) if dify_config.MULTIMODAL_SEND_FORMAT == "base64" else "", - "url": _to_url(f) if dify_config.MULTIMODAL_SEND_FORMAT == "url" else "", - "format": f.extension.removeprefix("."), - "mime_type": f.mime_type, - "filename": f.filename or "", - } - if f.type == FileType.IMAGE: - params["detail"] = image_detail_config or ImagePromptMessageContent.DETAIL.LOW - - return prompt_class_map[f.type].model_validate(params) - - -def download(f: File, /): - if f.transfer_method in ( - FileTransferMethod.TOOL_FILE, - FileTransferMethod.LOCAL_FILE, - FileTransferMethod.DATASOURCE_FILE, - ): - return _download_file_content(f.storage_key) - elif f.transfer_method == FileTransferMethod.REMOTE_URL: - if f.remote_url is None: - raise ValueError("Missing file remote_url") - response = ssrf_proxy.get(f.remote_url, follow_redirects=True) - response.raise_for_status() - return response.content - raise ValueError(f"unsupported transfer method: {f.transfer_method}") - - -def _download_file_content(path: str, /): - """ - Download and return the contents of a file as bytes. - - This function loads the file from storage and ensures it's in bytes format. - - Args: - path (str): The path to the file in storage. - - Returns: - bytes: The contents of the file as a bytes object. - - Raises: - ValueError: If the loaded file is not a bytes object. - """ - data = storage.load(path, stream=False) - if not isinstance(data, bytes): - raise ValueError(f"file {path} is not a bytes object") - return data - - -def _get_encoded_string(f: File, /): - match f.transfer_method: - case FileTransferMethod.REMOTE_URL: - if f.remote_url is None: - raise ValueError("Missing file remote_url") - response = ssrf_proxy.get(f.remote_url, follow_redirects=True) - response.raise_for_status() - data = response.content - case FileTransferMethod.LOCAL_FILE: - data = _download_file_content(f.storage_key) - case FileTransferMethod.TOOL_FILE: - data = _download_file_content(f.storage_key) - case FileTransferMethod.DATASOURCE_FILE: - data = _download_file_content(f.storage_key) - - encoded_string = base64.b64encode(data).decode("utf-8") - return encoded_string - - -def _to_url(f: File, /): - if f.transfer_method == FileTransferMethod.REMOTE_URL: - if f.remote_url is None: - raise ValueError("Missing file remote_url") - return f.remote_url - elif f.transfer_method == FileTransferMethod.LOCAL_FILE: - if f.related_id is None: - raise ValueError("Missing file related_id") - return f.remote_url or helpers.get_signed_file_url(upload_file_id=f.related_id) - elif f.transfer_method == FileTransferMethod.TOOL_FILE: - # add sign url - if f.related_id is None or f.extension is None: - raise ValueError("Missing file related_id or extension") - return sign_tool_file(tool_file_id=f.related_id, extension=f.extension) - else: - raise ValueError(f"Unsupported transfer method: {f.transfer_method}") - - -class FileManager: - """ - Adapter exposing file manager helpers behind FileManagerProtocol. - - This is intentionally a thin wrapper over the existing module-level functions so callers can inject it - where a protocol-typed file manager is expected. - """ - - def download(self, f: File, /) -> bytes: - return download(f) - - -file_manager = FileManager() +__all__ = [ + "FileManager", + "download", + "file_manager", + "get_attr", + "to_prompt_message_content", +] diff --git a/api/core/file/helpers.py b/api/core/file/helpers.py index 2ac483673a..df3b84695a 100644 --- a/api/core/file/helpers.py +++ b/api/core/file/helpers.py @@ -1,83 +1,19 @@ -import base64 -import hashlib -import hmac -import os -import time -import urllib.parse +"""Compatibility bridge for legacy ``core.file.helpers`` imports.""" -from configs import dify_config +from core.workflow.file.helpers import ( + get_signed_file_url, + get_signed_file_url_for_plugin, + get_signed_tool_file_url, + verify_file_signature, + verify_image_signature, + verify_plugin_file_signature, +) - -def get_signed_file_url(upload_file_id: str, as_attachment=False, for_external: bool = True) -> str: - base_url = dify_config.FILES_URL if for_external else (dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL) - url = f"{base_url}/files/{upload_file_id}/file-preview" - - timestamp = str(int(time.time())) - nonce = os.urandom(16).hex() - key = dify_config.SECRET_KEY.encode() - msg = f"file-preview|{upload_file_id}|{timestamp}|{nonce}" - sign = hmac.new(key, msg.encode(), hashlib.sha256).digest() - encoded_sign = base64.urlsafe_b64encode(sign).decode() - query = {"timestamp": timestamp, "nonce": nonce, "sign": encoded_sign} - if as_attachment: - query["as_attachment"] = "true" - query_string = urllib.parse.urlencode(query) - - return f"{url}?{query_string}" - - -def get_signed_file_url_for_plugin(filename: str, mimetype: str, tenant_id: str, user_id: str) -> str: - # Plugin access should use internal URL for Docker network communication - base_url = dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL - url = f"{base_url}/files/upload/for-plugin" - timestamp = str(int(time.time())) - nonce = os.urandom(16).hex() - key = dify_config.SECRET_KEY.encode() - msg = f"upload|{filename}|{mimetype}|{tenant_id}|{user_id}|{timestamp}|{nonce}" - sign = hmac.new(key, msg.encode(), hashlib.sha256).digest() - encoded_sign = base64.urlsafe_b64encode(sign).decode() - return f"{url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}&user_id={user_id}&tenant_id={tenant_id}" - - -def verify_plugin_file_signature( - *, filename: str, mimetype: str, tenant_id: str, user_id: str, timestamp: str, nonce: str, sign: str -) -> bool: - data_to_sign = f"upload|{filename}|{mimetype}|{tenant_id}|{user_id}|{timestamp}|{nonce}" - secret_key = dify_config.SECRET_KEY.encode() - recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() - recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode() - - # verify signature - if sign != recalculated_encoded_sign: - return False - - current_time = int(time.time()) - return current_time - int(timestamp) <= dify_config.FILES_ACCESS_TIMEOUT - - -def verify_image_signature(*, upload_file_id: str, timestamp: str, nonce: str, sign: str) -> bool: - data_to_sign = f"image-preview|{upload_file_id}|{timestamp}|{nonce}" - secret_key = dify_config.SECRET_KEY.encode() - recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() - recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode() - - # verify signature - if sign != recalculated_encoded_sign: - return False - - current_time = int(time.time()) - return current_time - int(timestamp) <= dify_config.FILES_ACCESS_TIMEOUT - - -def verify_file_signature(*, upload_file_id: str, timestamp: str, nonce: str, sign: str) -> bool: - data_to_sign = f"file-preview|{upload_file_id}|{timestamp}|{nonce}" - secret_key = dify_config.SECRET_KEY.encode() - recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() - recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode() - - # verify signature - if sign != recalculated_encoded_sign: - return False - - current_time = int(time.time()) - return current_time - int(timestamp) <= dify_config.FILES_ACCESS_TIMEOUT +__all__ = [ + "get_signed_file_url", + "get_signed_file_url_for_plugin", + "get_signed_tool_file_url", + "verify_file_signature", + "verify_image_signature", + "verify_plugin_file_signature", +] diff --git a/api/core/file/models.py b/api/core/file/models.py index 6324523b22..46f16ffda5 100644 --- a/api/core/file/models.py +++ b/api/core/file/models.py @@ -1,164 +1,10 @@ -from collections.abc import Mapping, Sequence -from typing import Any +"""Compatibility bridge for legacy ``core.file.models`` imports.""" -from pydantic import BaseModel, Field, model_validator +from core.workflow.file.models import File, FileUploadConfig, ImageConfig, sign_tool_file -from core.model_runtime.entities.message_entities import ImagePromptMessageContent -from core.tools.signature import sign_tool_file - -from . import helpers -from .constants import FILE_MODEL_IDENTITY -from .enums import FileTransferMethod, FileType - - -class ImageConfig(BaseModel): - """ - NOTE: This part of validation is deprecated, but still used in app features "Image Upload". - """ - - number_limits: int = 0 - transfer_methods: Sequence[FileTransferMethod] = Field(default_factory=list) - detail: ImagePromptMessageContent.DETAIL | None = None - - -class FileUploadConfig(BaseModel): - """ - File Upload Entity. - """ - - image_config: ImageConfig | None = None - allowed_file_types: Sequence[FileType] = Field(default_factory=list) - allowed_file_extensions: Sequence[str] = Field(default_factory=list) - allowed_file_upload_methods: Sequence[FileTransferMethod] = Field(default_factory=list) - number_limits: int = 0 - - -class File(BaseModel): - # NOTE: dify_model_identity is a special identifier used to distinguish between - # new and old data formats during serialization and deserialization. - dify_model_identity: str = FILE_MODEL_IDENTITY - - id: str | None = None # message file id - tenant_id: str - type: FileType - transfer_method: FileTransferMethod - # If `transfer_method` is `FileTransferMethod.remote_url`, the - # `remote_url` attribute must not be `None`. - remote_url: str | None = None # remote url - # If `transfer_method` is `FileTransferMethod.local_file` or - # `FileTransferMethod.tool_file`, the `related_id` attribute must not be `None`. - # - # It should be set to `ToolFile.id` when `transfer_method` is `tool_file`. - related_id: str | None = None - filename: str | None = None - extension: str | None = Field(default=None, description="File extension, should contain dot") - mime_type: str | None = None - size: int = -1 - - # Those properties are private, should not be exposed to the outside. - _storage_key: str - - def __init__( - self, - *, - id: str | None = None, - tenant_id: str, - type: FileType, - transfer_method: FileTransferMethod, - remote_url: str | None = None, - related_id: str | None = None, - filename: str | None = None, - extension: str | None = None, - mime_type: str | None = None, - size: int = -1, - storage_key: str | None = None, - dify_model_identity: str | None = FILE_MODEL_IDENTITY, - url: str | None = None, - # Legacy compatibility fields - explicitly handle known extra fields - tool_file_id: str | None = None, - upload_file_id: str | None = None, - datasource_file_id: str | None = None, - ): - super().__init__( - id=id, - tenant_id=tenant_id, - type=type, - transfer_method=transfer_method, - remote_url=remote_url, - related_id=related_id, - filename=filename, - extension=extension, - mime_type=mime_type, - size=size, - dify_model_identity=dify_model_identity, - url=url, - ) - self._storage_key = str(storage_key) - - def to_dict(self) -> Mapping[str, str | int | None]: - data = self.model_dump(mode="json") - return { - **data, - "url": self.generate_url(), - } - - @property - def markdown(self) -> str: - url = self.generate_url() - if self.type == FileType.IMAGE: - text = f"![{self.filename or ''}]({url})" - else: - text = f"[{self.filename or url}]({url})" - - return text - - def generate_url(self, for_external: bool = True) -> str | None: - if self.transfer_method == FileTransferMethod.REMOTE_URL: - return self.remote_url - elif self.transfer_method == FileTransferMethod.LOCAL_FILE: - if self.related_id is None: - raise ValueError("Missing file related_id") - return helpers.get_signed_file_url(upload_file_id=self.related_id, for_external=for_external) - elif self.transfer_method in [FileTransferMethod.TOOL_FILE, FileTransferMethod.DATASOURCE_FILE]: - assert self.related_id is not None - assert self.extension is not None - return sign_tool_file(tool_file_id=self.related_id, extension=self.extension, for_external=for_external) - return None - - def to_plugin_parameter(self) -> dict[str, Any]: - return { - "dify_model_identity": FILE_MODEL_IDENTITY, - "mime_type": self.mime_type, - "filename": self.filename, - "extension": self.extension, - "size": self.size, - "type": self.type, - "url": self.generate_url(for_external=False), - } - - @model_validator(mode="after") - def validate_after(self): - match self.transfer_method: - case FileTransferMethod.REMOTE_URL: - if not self.remote_url: - raise ValueError("Missing file url") - if not isinstance(self.remote_url, str) or not self.remote_url.startswith("http"): - raise ValueError("Invalid file url") - case FileTransferMethod.LOCAL_FILE: - if not self.related_id: - raise ValueError("Missing file related_id") - case FileTransferMethod.TOOL_FILE: - if not self.related_id: - raise ValueError("Missing file related_id") - case FileTransferMethod.DATASOURCE_FILE: - if not self.related_id: - raise ValueError("Missing file related_id") - return self - - @property - def storage_key(self) -> str: - return self._storage_key - - @storage_key.setter - def storage_key(self, value: str): - self._storage_key = value +__all__ = [ + "File", + "FileUploadConfig", + "ImageConfig", + "sign_tool_file", +] diff --git a/api/core/file/tool_file_parser.py b/api/core/file/tool_file_parser.py index 4c8e7282b8..a105e4e32e 100644 --- a/api/core/file/tool_file_parser.py +++ b/api/core/file/tool_file_parser.py @@ -1,12 +1,20 @@ +"""Compatibility bridge for legacy ``core.file.tool_file_parser`` imports.""" + from collections.abc import Callable -from typing import TYPE_CHECKING +from typing import Any -if TYPE_CHECKING: - from core.tools.tool_file_manager import ToolFileManager +from core.workflow.file import tool_file_parser as workflow_tool_file_parser -_tool_file_manager_factory: Callable[[], "ToolFileManager"] | None = None +_tool_file_manager_factory: Callable[[], Any] | None = None -def set_tool_file_manager_factory(factory: Callable[[], "ToolFileManager"]): +def set_tool_file_manager_factory(factory: Callable[[], Any]) -> None: global _tool_file_manager_factory _tool_file_manager_factory = factory + workflow_tool_file_parser.set_tool_file_manager_factory(factory) + + +__all__ = [ + "_tool_file_manager_factory", + "set_tool_file_manager_factory", +] diff --git a/api/core/workflow/file/__init__.py b/api/core/workflow/file/__init__.py new file mode 100644 index 0000000000..44749ebec3 --- /dev/null +++ b/api/core/workflow/file/__init__.py @@ -0,0 +1,19 @@ +from .constants import FILE_MODEL_IDENTITY +from .enums import ArrayFileAttribute, FileAttribute, FileBelongsTo, FileTransferMethod, FileType +from .models import ( + File, + FileUploadConfig, + ImageConfig, +) + +__all__ = [ + "FILE_MODEL_IDENTITY", + "ArrayFileAttribute", + "File", + "FileAttribute", + "FileBelongsTo", + "FileTransferMethod", + "FileType", + "FileUploadConfig", + "ImageConfig", +] diff --git a/api/core/workflow/file/constants.py b/api/core/workflow/file/constants.py new file mode 100644 index 0000000000..0665ed7e0d --- /dev/null +++ b/api/core/workflow/file/constants.py @@ -0,0 +1,11 @@ +from typing import Any + +# TODO(QuantumGhost): Refactor variable type identification. Instead of directly +# comparing `dify_model_identity` with constants throughout the codebase, extract +# this logic into a dedicated function. This would encapsulate the implementation +# details of how different variable types are identified. +FILE_MODEL_IDENTITY = "__dify__file__" + + +def maybe_file_object(o: Any) -> bool: + return isinstance(o, dict) and o.get("dify_model_identity") == FILE_MODEL_IDENTITY diff --git a/api/core/workflow/file/enums.py b/api/core/workflow/file/enums.py new file mode 100644 index 0000000000..170eb4fc23 --- /dev/null +++ b/api/core/workflow/file/enums.py @@ -0,0 +1,57 @@ +from enum import StrEnum + + +class FileType(StrEnum): + IMAGE = "image" + DOCUMENT = "document" + AUDIO = "audio" + VIDEO = "video" + CUSTOM = "custom" + + @staticmethod + def value_of(value): + for member in FileType: + if member.value == value: + return member + raise ValueError(f"No matching enum found for value '{value}'") + + +class FileTransferMethod(StrEnum): + REMOTE_URL = "remote_url" + LOCAL_FILE = "local_file" + TOOL_FILE = "tool_file" + DATASOURCE_FILE = "datasource_file" + + @staticmethod + def value_of(value): + for member in FileTransferMethod: + if member.value == value: + return member + raise ValueError(f"No matching enum found for value '{value}'") + + +class FileBelongsTo(StrEnum): + USER = "user" + ASSISTANT = "assistant" + + @staticmethod + def value_of(value): + for member in FileBelongsTo: + if member.value == value: + return member + raise ValueError(f"No matching enum found for value '{value}'") + + +class FileAttribute(StrEnum): + TYPE = "type" + SIZE = "size" + NAME = "name" + MIME_TYPE = "mime_type" + TRANSFER_METHOD = "transfer_method" + URL = "url" + EXTENSION = "extension" + RELATED_ID = "related_id" + + +class ArrayFileAttribute(StrEnum): + LENGTH = "length" diff --git a/api/core/workflow/file/file_manager.py b/api/core/workflow/file/file_manager.py new file mode 100644 index 0000000000..a7719400d9 --- /dev/null +++ b/api/core/workflow/file/file_manager.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +import base64 +from collections.abc import Mapping + +from core.model_runtime.entities import ( + AudioPromptMessageContent, + DocumentPromptMessageContent, + ImagePromptMessageContent, + TextPromptMessageContent, + VideoPromptMessageContent, +) +from core.model_runtime.entities.message_entities import PromptMessageContentUnionTypes + +from . import helpers +from .enums import FileAttribute +from .models import File, FileTransferMethod, FileType +from .runtime import get_workflow_file_runtime + + +def get_attr(*, file: File, attr: FileAttribute): + match attr: + case FileAttribute.TYPE: + return file.type.value + case FileAttribute.SIZE: + return file.size + case FileAttribute.NAME: + return file.filename + case FileAttribute.MIME_TYPE: + return file.mime_type + case FileAttribute.TRANSFER_METHOD: + return file.transfer_method.value + case FileAttribute.URL: + return _to_url(file) + case FileAttribute.EXTENSION: + return file.extension + case FileAttribute.RELATED_ID: + return file.related_id + + +def to_prompt_message_content( + f: File, + /, + *, + image_detail_config: ImagePromptMessageContent.DETAIL | None = None, +) -> PromptMessageContentUnionTypes: + """Convert a file to prompt message content.""" + if f.extension is None: + raise ValueError("Missing file extension") + if f.mime_type is None: + raise ValueError("Missing file mime_type") + + prompt_class_map: Mapping[FileType, type[PromptMessageContentUnionTypes]] = { + FileType.IMAGE: ImagePromptMessageContent, + FileType.AUDIO: AudioPromptMessageContent, + FileType.VIDEO: VideoPromptMessageContent, + FileType.DOCUMENT: DocumentPromptMessageContent, + } + + if f.type not in prompt_class_map: + return TextPromptMessageContent(data=f"[Unsupported file type: {f.filename} ({f.type.value})]") + + send_format = get_workflow_file_runtime().multimodal_send_format + params = { + "base64_data": _get_encoded_string(f) if send_format == "base64" else "", + "url": _to_url(f) if send_format == "url" else "", + "format": f.extension.removeprefix("."), + "mime_type": f.mime_type, + "filename": f.filename or "", + } + if f.type == FileType.IMAGE: + params["detail"] = image_detail_config or ImagePromptMessageContent.DETAIL.LOW + + return prompt_class_map[f.type].model_validate(params) + + +def download(f: File, /) -> bytes: + if f.transfer_method in ( + FileTransferMethod.TOOL_FILE, + FileTransferMethod.LOCAL_FILE, + FileTransferMethod.DATASOURCE_FILE, + ): + return _download_file_content(f.storage_key) + elif f.transfer_method == FileTransferMethod.REMOTE_URL: + if f.remote_url is None: + raise ValueError("Missing file remote_url") + response = get_workflow_file_runtime().http_get(f.remote_url, follow_redirects=True) + response.raise_for_status() + return response.content + raise ValueError(f"unsupported transfer method: {f.transfer_method}") + + +def _download_file_content(path: str, /) -> bytes: + """Download and return a file from storage as bytes.""" + data = get_workflow_file_runtime().storage_load(path, stream=False) + if not isinstance(data, bytes): + raise ValueError(f"file {path} is not a bytes object") + return data + + +def _get_encoded_string(f: File, /) -> str: + match f.transfer_method: + case FileTransferMethod.REMOTE_URL: + if f.remote_url is None: + raise ValueError("Missing file remote_url") + response = get_workflow_file_runtime().http_get(f.remote_url, follow_redirects=True) + response.raise_for_status() + data = response.content + case FileTransferMethod.LOCAL_FILE: + data = _download_file_content(f.storage_key) + case FileTransferMethod.TOOL_FILE: + data = _download_file_content(f.storage_key) + case FileTransferMethod.DATASOURCE_FILE: + data = _download_file_content(f.storage_key) + + return base64.b64encode(data).decode("utf-8") + + +def _to_url(f: File, /): + if f.transfer_method == FileTransferMethod.REMOTE_URL: + if f.remote_url is None: + raise ValueError("Missing file remote_url") + return f.remote_url + elif f.transfer_method == FileTransferMethod.LOCAL_FILE: + if f.related_id is None: + raise ValueError("Missing file related_id") + return f.remote_url or helpers.get_signed_file_url(upload_file_id=f.related_id) + elif f.transfer_method == FileTransferMethod.TOOL_FILE: + if f.related_id is None or f.extension is None: + raise ValueError("Missing file related_id or extension") + return helpers.get_signed_tool_file_url(tool_file_id=f.related_id, extension=f.extension) + else: + raise ValueError(f"Unsupported transfer method: {f.transfer_method}") + + +class FileManager: + """Adapter exposing file manager helpers behind FileManagerProtocol.""" + + def download(self, f: File, /) -> bytes: + return download(f) + + +file_manager = FileManager() diff --git a/api/core/workflow/file/helpers.py b/api/core/workflow/file/helpers.py new file mode 100644 index 0000000000..310cb1310b --- /dev/null +++ b/api/core/workflow/file/helpers.py @@ -0,0 +1,92 @@ +from __future__ import annotations + +import base64 +import hashlib +import hmac +import os +import time +import urllib.parse + +from .runtime import get_workflow_file_runtime + + +def get_signed_file_url(upload_file_id: str, as_attachment: bool = False, for_external: bool = True) -> str: + runtime = get_workflow_file_runtime() + base_url = runtime.files_url if for_external else (runtime.internal_files_url or runtime.files_url) + url = f"{base_url}/files/{upload_file_id}/file-preview" + + timestamp = str(int(time.time())) + nonce = os.urandom(16).hex() + key = runtime.secret_key.encode() + msg = f"file-preview|{upload_file_id}|{timestamp}|{nonce}" + sign = hmac.new(key, msg.encode(), hashlib.sha256).digest() + encoded_sign = base64.urlsafe_b64encode(sign).decode() + query: dict[str, str] = {"timestamp": timestamp, "nonce": nonce, "sign": encoded_sign} + if as_attachment: + query["as_attachment"] = "true" + query_string = urllib.parse.urlencode(query) + + return f"{url}?{query_string}" + + +def get_signed_file_url_for_plugin(filename: str, mimetype: str, tenant_id: str, user_id: str) -> str: + runtime = get_workflow_file_runtime() + # Plugin access should use internal URL for Docker network communication. + base_url = runtime.internal_files_url or runtime.files_url + url = f"{base_url}/files/upload/for-plugin" + timestamp = str(int(time.time())) + nonce = os.urandom(16).hex() + key = runtime.secret_key.encode() + msg = f"upload|{filename}|{mimetype}|{tenant_id}|{user_id}|{timestamp}|{nonce}" + sign = hmac.new(key, msg.encode(), hashlib.sha256).digest() + encoded_sign = base64.urlsafe_b64encode(sign).decode() + return f"{url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}&user_id={user_id}&tenant_id={tenant_id}" + + +def get_signed_tool_file_url(tool_file_id: str, extension: str, for_external: bool = True) -> str: + runtime = get_workflow_file_runtime() + return runtime.sign_tool_file(tool_file_id=tool_file_id, extension=extension, for_external=for_external) + + +def verify_plugin_file_signature( + *, filename: str, mimetype: str, tenant_id: str, user_id: str, timestamp: str, nonce: str, sign: str +) -> bool: + runtime = get_workflow_file_runtime() + data_to_sign = f"upload|{filename}|{mimetype}|{tenant_id}|{user_id}|{timestamp}|{nonce}" + secret_key = runtime.secret_key.encode() + recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() + recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode() + + if sign != recalculated_encoded_sign: + return False + + current_time = int(time.time()) + return current_time - int(timestamp) <= runtime.files_access_timeout + + +def verify_image_signature(*, upload_file_id: str, timestamp: str, nonce: str, sign: str) -> bool: + runtime = get_workflow_file_runtime() + data_to_sign = f"image-preview|{upload_file_id}|{timestamp}|{nonce}" + secret_key = runtime.secret_key.encode() + recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() + recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode() + + if sign != recalculated_encoded_sign: + return False + + current_time = int(time.time()) + return current_time - int(timestamp) <= runtime.files_access_timeout + + +def verify_file_signature(*, upload_file_id: str, timestamp: str, nonce: str, sign: str) -> bool: + runtime = get_workflow_file_runtime() + data_to_sign = f"file-preview|{upload_file_id}|{timestamp}|{nonce}" + secret_key = runtime.secret_key.encode() + recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() + recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode() + + if sign != recalculated_encoded_sign: + return False + + current_time = int(time.time()) + return current_time - int(timestamp) <= runtime.files_access_timeout diff --git a/api/core/workflow/file/models.py b/api/core/workflow/file/models.py new file mode 100644 index 0000000000..cd7d3edde8 --- /dev/null +++ b/api/core/workflow/file/models.py @@ -0,0 +1,178 @@ +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from typing import Any + +from pydantic import BaseModel, Field, model_validator + +from core.model_runtime.entities.message_entities import ImagePromptMessageContent + +from . import helpers +from .constants import FILE_MODEL_IDENTITY +from .enums import FileTransferMethod, FileType + + +def sign_tool_file(*, tool_file_id: str, extension: str, for_external: bool = True) -> str: + """Compatibility shim for tests and legacy callers patching ``models.sign_tool_file``.""" + return helpers.get_signed_tool_file_url( + tool_file_id=tool_file_id, + extension=extension, + for_external=for_external, + ) + + +class ImageConfig(BaseModel): + """ + NOTE: This part of validation is deprecated, but still used in app features "Image Upload". + """ + + number_limits: int = 0 + transfer_methods: Sequence[FileTransferMethod] = Field(default_factory=list) + detail: ImagePromptMessageContent.DETAIL | None = None + + +class FileUploadConfig(BaseModel): + """ + File Upload Entity. + """ + + image_config: ImageConfig | None = None + allowed_file_types: Sequence[FileType] = Field(default_factory=list) + allowed_file_extensions: Sequence[str] = Field(default_factory=list) + allowed_file_upload_methods: Sequence[FileTransferMethod] = Field(default_factory=list) + number_limits: int = 0 + + +class File(BaseModel): + # NOTE: dify_model_identity is a special identifier used to distinguish between + # new and old data formats during serialization and deserialization. + dify_model_identity: str = FILE_MODEL_IDENTITY + + id: str | None = None # message file id + tenant_id: str + type: FileType + transfer_method: FileTransferMethod + # If `transfer_method` is `FileTransferMethod.remote_url`, the + # `remote_url` attribute must not be `None`. + remote_url: str | None = None # remote url + # If `transfer_method` is `FileTransferMethod.local_file` or + # `FileTransferMethod.tool_file`, the `related_id` attribute must not be `None`. + # + # It should be set to `ToolFile.id` when `transfer_method` is `tool_file`. + related_id: str | None = None + filename: str | None = None + extension: str | None = Field(default=None, description="File extension, should contain dot") + mime_type: str | None = None + size: int = -1 + + # Those properties are private, should not be exposed to the outside. + _storage_key: str + + def __init__( + self, + *, + id: str | None = None, + tenant_id: str, + type: FileType, + transfer_method: FileTransferMethod, + remote_url: str | None = None, + related_id: str | None = None, + filename: str | None = None, + extension: str | None = None, + mime_type: str | None = None, + size: int = -1, + storage_key: str | None = None, + dify_model_identity: str | None = FILE_MODEL_IDENTITY, + url: str | None = None, + # Legacy compatibility fields - explicitly handle known extra fields + tool_file_id: str | None = None, + upload_file_id: str | None = None, + datasource_file_id: str | None = None, + ): + super().__init__( + id=id, + tenant_id=tenant_id, + type=type, + transfer_method=transfer_method, + remote_url=remote_url, + related_id=related_id, + filename=filename, + extension=extension, + mime_type=mime_type, + size=size, + dify_model_identity=dify_model_identity, + url=url, + ) + self._storage_key = str(storage_key) + + def to_dict(self) -> Mapping[str, str | int | None]: + data = self.model_dump(mode="json") + return { + **data, + "url": self.generate_url(), + } + + @property + def markdown(self) -> str: + url = self.generate_url() + if self.type == FileType.IMAGE: + text = f"![{self.filename or ''}]({url})" + else: + text = f"[{self.filename or url}]({url})" + + return text + + def generate_url(self, for_external: bool = True) -> str | None: + if self.transfer_method == FileTransferMethod.REMOTE_URL: + return self.remote_url + elif self.transfer_method == FileTransferMethod.LOCAL_FILE: + if self.related_id is None: + raise ValueError("Missing file related_id") + return helpers.get_signed_file_url(upload_file_id=self.related_id, for_external=for_external) + elif self.transfer_method in [FileTransferMethod.TOOL_FILE, FileTransferMethod.DATASOURCE_FILE]: + assert self.related_id is not None + assert self.extension is not None + return sign_tool_file( + tool_file_id=self.related_id, + extension=self.extension, + for_external=for_external, + ) + return None + + def to_plugin_parameter(self) -> dict[str, Any]: + return { + "dify_model_identity": FILE_MODEL_IDENTITY, + "mime_type": self.mime_type, + "filename": self.filename, + "extension": self.extension, + "size": self.size, + "type": self.type, + "url": self.generate_url(for_external=False), + } + + @model_validator(mode="after") + def validate_after(self) -> File: + match self.transfer_method: + case FileTransferMethod.REMOTE_URL: + if not self.remote_url: + raise ValueError("Missing file url") + if not isinstance(self.remote_url, str) or not self.remote_url.startswith("http"): + raise ValueError("Invalid file url") + case FileTransferMethod.LOCAL_FILE: + if not self.related_id: + raise ValueError("Missing file related_id") + case FileTransferMethod.TOOL_FILE: + if not self.related_id: + raise ValueError("Missing file related_id") + case FileTransferMethod.DATASOURCE_FILE: + if not self.related_id: + raise ValueError("Missing file related_id") + return self + + @property + def storage_key(self) -> str: + return self._storage_key + + @storage_key.setter + def storage_key(self, value: str) -> None: + self._storage_key = value diff --git a/api/core/workflow/file/protocols.py b/api/core/workflow/file/protocols.py new file mode 100644 index 0000000000..a2af058ac5 --- /dev/null +++ b/api/core/workflow/file/protocols.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +from collections.abc import Generator +from typing import Protocol + + +class HttpResponseProtocol(Protocol): + """Subset of response behavior needed by workflow file helpers.""" + + content: bytes + + def raise_for_status(self) -> None: ... + + +class WorkflowFileRuntimeProtocol(Protocol): + """Runtime dependencies required by ``core.workflow.file``. + + Implementations are expected to be provided by integration layers (for example, + ``core.app.workflow.file_runtime``) so the workflow package avoids importing + application infrastructure modules directly. + """ + + @property + def files_url(self) -> str: ... + + @property + def internal_files_url(self) -> str | None: ... + + @property + def secret_key(self) -> str: ... + + @property + def files_access_timeout(self) -> int: ... + + @property + def multimodal_send_format(self) -> str: ... + + def http_get(self, url: str, *, follow_redirects: bool = True) -> HttpResponseProtocol: ... + + def storage_load(self, path: str, *, stream: bool = False) -> bytes | Generator: ... + + def sign_tool_file(self, *, tool_file_id: str, extension: str, for_external: bool = True) -> str: ... diff --git a/api/core/workflow/file/runtime.py b/api/core/workflow/file/runtime.py new file mode 100644 index 0000000000..e41a393992 --- /dev/null +++ b/api/core/workflow/file/runtime.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +from collections.abc import Generator + +from .protocols import HttpResponseProtocol, WorkflowFileRuntimeProtocol + + +class WorkflowFileRuntimeNotConfiguredError(RuntimeError): + """Raised when workflow file runtime dependencies were not configured.""" + + +class _UnconfiguredWorkflowFileRuntime(WorkflowFileRuntimeProtocol): + def _raise(self) -> None: + raise WorkflowFileRuntimeNotConfiguredError( + "workflow file runtime is not configured, call set_workflow_file_runtime(...) first" + ) + + @property + def files_url(self) -> str: + self._raise() + + @property + def internal_files_url(self) -> str | None: + self._raise() + + @property + def secret_key(self) -> str: + self._raise() + + @property + def files_access_timeout(self) -> int: + self._raise() + + @property + def multimodal_send_format(self) -> str: + self._raise() + + def http_get(self, url: str, *, follow_redirects: bool = True) -> HttpResponseProtocol: + self._raise() + + def storage_load(self, path: str, *, stream: bool = False) -> bytes | Generator: + self._raise() + + def sign_tool_file(self, *, tool_file_id: str, extension: str, for_external: bool = True) -> str: + self._raise() + + +_runtime: WorkflowFileRuntimeProtocol = _UnconfiguredWorkflowFileRuntime() + + +def set_workflow_file_runtime(runtime: WorkflowFileRuntimeProtocol) -> None: + global _runtime + _runtime = runtime + + +def get_workflow_file_runtime() -> WorkflowFileRuntimeProtocol: + return _runtime diff --git a/api/core/workflow/file/tool_file_parser.py b/api/core/workflow/file/tool_file_parser.py new file mode 100644 index 0000000000..2d7a3d43df --- /dev/null +++ b/api/core/workflow/file/tool_file_parser.py @@ -0,0 +1,9 @@ +from collections.abc import Callable +from typing import Any + +_tool_file_manager_factory: Callable[[], Any] | None = None + + +def set_tool_file_manager_factory(factory: Callable[[], Any]): + global _tool_file_manager_factory + _tool_file_manager_factory = factory diff --git a/api/core/workflow/node_events/node.py b/api/core/workflow/node_events/node.py index 9c76b7d7c2..2468bd0ac3 100644 --- a/api/core/workflow/node_events/node.py +++ b/api/core/workflow/node_events/node.py @@ -3,10 +3,10 @@ from datetime import datetime from pydantic import Field -from core.file import File from core.model_runtime.entities.llm_entities import LLMUsage from core.rag.entities.citation_metadata import RetrievalSourceMetadata from core.workflow.entities.pause_reason import PauseReason +from core.workflow.file import File from core.workflow.node_events import NodeRunResult from .base import NodeEventBase diff --git a/api/core/workflow/nodes/agent/agent_node.py b/api/core/workflow/nodes/agent/agent_node.py index e195aebe6d..5c39a67102 100644 --- a/api/core/workflow/nodes/agent/agent_node.py +++ b/api/core/workflow/nodes/agent/agent_node.py @@ -11,7 +11,6 @@ from sqlalchemy.orm import Session from core.agent.entities import AgentToolEntity from core.agent.plugin_entities import AgentStrategyParameter -from core.file import File, FileTransferMethod from core.memory.token_buffer_memory import TokenBufferMemory from core.model_manager import ModelInstance, ModelManager from core.model_runtime.entities.llm_entities import LLMUsage, LLMUsageMetadata @@ -33,6 +32,7 @@ from core.workflow.enums import ( WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus, ) +from core.workflow.file import File, FileTransferMethod from core.workflow.node_events import ( AgentLogEvent, NodeEventBase, diff --git a/api/core/workflow/nodes/datasource/datasource_node.py b/api/core/workflow/nodes/datasource/datasource_node.py index a732a70417..80869ac7f7 100644 --- a/api/core/workflow/nodes/datasource/datasource_node.py +++ b/api/core/workflow/nodes/datasource/datasource_node.py @@ -14,13 +14,13 @@ from core.datasource.entities.datasource_entities import ( from core.datasource.online_document.online_document_plugin import OnlineDocumentDatasourcePlugin from core.datasource.online_drive.online_drive_plugin import OnlineDriveDatasourcePlugin from core.datasource.utils.message_transformer import DatasourceFileMessageTransformer -from core.file import File -from core.file.enums import FileTransferMethod, FileType from core.plugin.impl.exc import PluginDaemonClientSideError from core.variables.segments import ArrayAnySegment from core.variables.variables import ArrayAnyVariable from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionStatus from core.workflow.enums import NodeExecutionType, NodeType, SystemVariableKey +from core.workflow.file import File +from core.workflow.file.enums import FileTransferMethod, FileType from core.workflow.node_events import NodeRunResult, StreamChunkEvent, StreamCompletedEvent from core.workflow.nodes.base.node import Node from core.workflow.nodes.base.variable_template_parser import VariableTemplateParser diff --git a/api/core/workflow/nodes/document_extractor/node.py b/api/core/workflow/nodes/document_extractor/node.py index 14ebd1f9ae..0a14b81633 100644 --- a/api/core/workflow/nodes/document_extractor/node.py +++ b/api/core/workflow/nodes/document_extractor/node.py @@ -21,11 +21,11 @@ from docx.table import Table from docx.text.paragraph import Paragraph from configs import dify_config -from core.file import File, FileTransferMethod, file_manager from core.helper import ssrf_proxy from core.variables import ArrayFileSegment from core.variables.segments import ArrayStringSegment, FileSegment from core.workflow.enums import NodeType, WorkflowNodeExecutionStatus +from core.workflow.file import File, FileTransferMethod, file_manager from core.workflow.node_events import NodeRunResult from core.workflow.nodes.base.node import Node diff --git a/api/core/workflow/nodes/http_request/executor.py b/api/core/workflow/nodes/http_request/executor.py index 7de8216562..1e6e14482b 100644 --- a/api/core/workflow/nodes/http_request/executor.py +++ b/api/core/workflow/nodes/http_request/executor.py @@ -11,10 +11,10 @@ import httpx from json_repair import repair_json from configs import dify_config -from core.file.enums import FileTransferMethod -from core.file.file_manager import file_manager as default_file_manager from core.helper.ssrf_proxy import ssrf_proxy from core.variables.segments import ArrayFileSegment, FileSegment +from core.workflow.file.enums import FileTransferMethod +from core.workflow.file.file_manager import file_manager as default_file_manager from core.workflow.runtime import VariablePool from ..protocols import FileManagerProtocol, HttpClientProtocol diff --git a/api/core/workflow/nodes/http_request/node.py b/api/core/workflow/nodes/http_request/node.py index 480482375f..c9aca1b992 100644 --- a/api/core/workflow/nodes/http_request/node.py +++ b/api/core/workflow/nodes/http_request/node.py @@ -4,12 +4,12 @@ from collections.abc import Callable, Mapping, Sequence from typing import TYPE_CHECKING, Any from configs import dify_config -from core.file import File, FileTransferMethod -from core.file.file_manager import file_manager as default_file_manager from core.helper.ssrf_proxy import ssrf_proxy from core.tools.tool_file_manager import ToolFileManager from core.variables.segments import ArrayFileSegment from core.workflow.enums import NodeType, WorkflowNodeExecutionStatus +from core.workflow.file import File, FileTransferMethod +from core.workflow.file.file_manager import file_manager as default_file_manager from core.workflow.node_events import NodeRunResult from core.workflow.nodes.base import variable_template_parser from core.workflow.nodes.base.entities import VariableSelector diff --git a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py index 65c2792355..b25c3a3d29 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py +++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py @@ -30,7 +30,7 @@ from .exc import ( ) if TYPE_CHECKING: - from core.file.models import File + from core.workflow.file.models import File from core.workflow.runtime import GraphRuntimeState logger = logging.getLogger(__name__) diff --git a/api/core/workflow/nodes/list_operator/node.py b/api/core/workflow/nodes/list_operator/node.py index 235f5b9c52..3978a79550 100644 --- a/api/core/workflow/nodes/list_operator/node.py +++ b/api/core/workflow/nodes/list_operator/node.py @@ -1,10 +1,10 @@ from collections.abc import Callable, Sequence from typing import Any, TypeAlias, TypeVar -from core.file import File from core.variables import ArrayFileSegment, ArrayNumberSegment, ArrayStringSegment from core.variables.segments import ArrayAnySegment, ArrayBooleanSegment, ArraySegment from core.workflow.enums import NodeType, WorkflowNodeExecutionStatus +from core.workflow.file import File from core.workflow.node_events import NodeRunResult from core.workflow.nodes.base.node import Node diff --git a/api/core/workflow/nodes/llm/file_saver.py b/api/core/workflow/nodes/llm/file_saver.py index 3f32fa894a..3c06ab7d81 100644 --- a/api/core/workflow/nodes/llm/file_saver.py +++ b/api/core/workflow/nodes/llm/file_saver.py @@ -4,10 +4,10 @@ import typing as tp from sqlalchemy import Engine from constants.mimetypes import DEFAULT_EXTENSION, DEFAULT_MIME_TYPE -from core.file import File, FileTransferMethod, FileType from core.helper import ssrf_proxy from core.tools.signature import sign_tool_file from core.tools.tool_file_manager import ToolFileManager +from core.workflow.file import File, FileTransferMethod, FileType from extensions.ext_database import db as global_db diff --git a/api/core/workflow/nodes/llm/llm_utils.py b/api/core/workflow/nodes/llm/llm_utils.py index 01e25cbf5c..78fad37659 100644 --- a/api/core/workflow/nodes/llm/llm_utils.py +++ b/api/core/workflow/nodes/llm/llm_utils.py @@ -7,7 +7,6 @@ from sqlalchemy.orm import Session from configs import dify_config from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity from core.entities.provider_entities import ProviderQuotaType, QuotaUnit -from core.file.models import File from core.memory.token_buffer_memory import TokenBufferMemory from core.model_manager import ModelInstance, ModelManager from core.model_runtime.entities.llm_entities import LLMUsage @@ -16,6 +15,7 @@ from core.model_runtime.model_providers.__base.large_language_model import Large from core.prompt.entities.advanced_prompt_entities import MemoryConfig from core.variables.segments import ArrayAnySegment, ArrayFileSegment, FileSegment, NoneSegment, StringSegment from core.workflow.enums import SystemVariableKey +from core.workflow.file.models import File from core.workflow.nodes.llm.entities import ModelConfig from core.workflow.runtime import VariablePool from extensions.ext_database import db diff --git a/api/core/workflow/nodes/llm/node.py b/api/core/workflow/nodes/llm/node.py index beccf79344..49ae5d16c7 100644 --- a/api/core/workflow/nodes/llm/node.py +++ b/api/core/workflow/nodes/llm/node.py @@ -12,7 +12,6 @@ from typing import TYPE_CHECKING, Any, Literal from sqlalchemy import select from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity -from core.file import File, FileTransferMethod, FileType, file_manager from core.helper.code_executor import CodeExecutor, CodeLanguage from core.llm_generator.output_parser.errors import OutputParserError from core.llm_generator.output_parser.structured_output import invoke_llm_with_structured_output @@ -65,6 +64,7 @@ from core.workflow.enums import ( WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus, ) +from core.workflow.file import File, FileTransferMethod, FileType, file_manager from core.workflow.node_events import ( ModelInvokeCompletedEvent, NodeEventBase, @@ -101,7 +101,7 @@ from .exc import ( from .file_saver import FileSaverImpl, LLMFileSaver if TYPE_CHECKING: - from core.file.models import File + from core.workflow.file.models import File from core.workflow.runtime import GraphRuntimeState logger = logging.getLogger(__name__) diff --git a/api/core/workflow/nodes/loop/loop_node.py b/api/core/workflow/nodes/loop/loop_node.py index 84a9c29414..241a186a94 100644 --- a/api/core/workflow/nodes/loop/loop_node.py +++ b/api/core/workflow/nodes/loop/loop_node.py @@ -71,9 +71,9 @@ class LoopNode(LLMUsageTrackingMixin, Node[LoopNodeData]): if self.node_data.loop_variables: value_processor: dict[Literal["constant", "variable"], Callable[[LoopVariableData], Segment | None]] = { "constant": lambda var: self._get_segment_for_constant(var.var_type, var.value), - "variable": lambda var: self.graph_runtime_state.variable_pool.get(var.value) - if isinstance(var.value, list) - else None, + "variable": lambda var: ( + self.graph_runtime_state.variable_pool.get(var.value) if isinstance(var.value, list) else None + ), } for loop_variable in self.node_data.loop_variables: if loop_variable.value_type not in value_processor: diff --git a/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py b/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py index 08e0542d61..2f11a91b7e 100644 --- a/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py +++ b/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py @@ -6,7 +6,6 @@ from collections.abc import Mapping, Sequence from typing import Any, cast from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity -from core.file import File from core.memory.token_buffer_memory import TokenBufferMemory from core.model_manager import ModelInstance from core.model_runtime.entities import ImagePromptMessageContent @@ -28,6 +27,7 @@ from core.prompt.simple_prompt_transform import ModelMode from core.prompt.utils.prompt_message_util import PromptMessageUtil from core.variables.types import ArrayValidation, SegmentType from core.workflow.enums import NodeType, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus +from core.workflow.file import File from core.workflow.node_events import NodeRunResult from core.workflow.nodes.base import variable_template_parser from core.workflow.nodes.base.node import Node diff --git a/api/core/workflow/nodes/protocols.py b/api/core/workflow/nodes/protocols.py index 2ad39e0ab5..a1f3e20835 100644 --- a/api/core/workflow/nodes/protocols.py +++ b/api/core/workflow/nodes/protocols.py @@ -2,7 +2,7 @@ from typing import Any, Protocol import httpx -from core.file import File +from core.workflow.file import File class HttpClientProtocol(Protocol): diff --git a/api/core/workflow/nodes/question_classifier/question_classifier_node.py b/api/core/workflow/nodes/question_classifier/question_classifier_node.py index 4a3e8e56f8..6491e8e531 100644 --- a/api/core/workflow/nodes/question_classifier/question_classifier_node.py +++ b/api/core/workflow/nodes/question_classifier/question_classifier_node.py @@ -39,7 +39,7 @@ from .template_prompts import ( ) if TYPE_CHECKING: - from core.file.models import File + from core.workflow.file.models import File from core.workflow.runtime import GraphRuntimeState diff --git a/api/core/workflow/nodes/tool/tool_node.py b/api/core/workflow/nodes/tool/tool_node.py index 60d76db9b6..a7bf7d6642 100644 --- a/api/core/workflow/nodes/tool/tool_node.py +++ b/api/core/workflow/nodes/tool/tool_node.py @@ -5,7 +5,6 @@ from sqlalchemy import select from sqlalchemy.orm import Session from core.callback_handler.workflow_tool_callback_handler import DifyWorkflowCallbackHandler -from core.file import File, FileTransferMethod from core.model_runtime.entities.llm_entities import LLMUsage from core.tools.__base.tool import Tool from core.tools.entities.tool_entities import ToolInvokeMessage, ToolParameter @@ -20,6 +19,7 @@ from core.workflow.enums import ( WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus, ) +from core.workflow.file import File, FileTransferMethod from core.workflow.node_events import NodeEventBase, NodeRunResult, StreamChunkEvent, StreamCompletedEvent from core.workflow.nodes.base.node import Node from core.workflow.nodes.base.variable_template_parser import VariableTemplateParser diff --git a/api/core/workflow/nodes/trigger_webhook/node.py b/api/core/workflow/nodes/trigger_webhook/node.py index ec8c4b8ee3..060afd6ae6 100644 --- a/api/core/workflow/nodes/trigger_webhook/node.py +++ b/api/core/workflow/nodes/trigger_webhook/node.py @@ -2,12 +2,12 @@ import logging from collections.abc import Mapping from typing import Any -from core.file import FileTransferMethod from core.variables.types import SegmentType from core.variables.variables import FileVariable from core.workflow.constants import SYSTEM_VARIABLE_NODE_ID from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionStatus from core.workflow.enums import NodeExecutionType, NodeType +from core.workflow.file import FileTransferMethod from core.workflow.node_events import NodeRunResult from core.workflow.nodes.base.node import Node from factories import file_factory diff --git a/api/core/workflow/runtime/variable_pool.py b/api/core/workflow/runtime/variable_pool.py index c4b077fa69..0ba9d8b3a8 100644 --- a/api/core/workflow/runtime/variable_pool.py +++ b/api/core/workflow/runtime/variable_pool.py @@ -8,7 +8,6 @@ from typing import Annotated, Any, Union, cast from pydantic import BaseModel, Field -from core.file import File, FileAttribute, file_manager from core.variables import Segment, SegmentGroup, VariableBase from core.variables.consts import SELECTORS_LENGTH from core.variables.segments import FileSegment, ObjectSegment @@ -19,6 +18,7 @@ from core.workflow.constants import ( RAG_PIPELINE_VARIABLE_NODE_ID, SYSTEM_VARIABLE_NODE_ID, ) +from core.workflow.file import File, FileAttribute, file_manager from core.workflow.system_variable import SystemVariable from factories import variable_factory diff --git a/api/core/workflow/system_variable.py b/api/core/workflow/system_variable.py index 6946e3e6ab..4144f79b8a 100644 --- a/api/core/workflow/system_variable.py +++ b/api/core/workflow/system_variable.py @@ -7,8 +7,8 @@ from uuid import uuid4 from pydantic import AliasChoices, BaseModel, ConfigDict, Field, model_validator -from core.file.models import File from core.workflow.enums import SystemVariableKey +from core.workflow.file.models import File class SystemVariable(BaseModel): diff --git a/api/core/workflow/utils/condition/processor.py b/api/core/workflow/utils/condition/processor.py index c6070b83b8..c3f25a4d62 100644 --- a/api/core/workflow/utils/condition/processor.py +++ b/api/core/workflow/utils/condition/processor.py @@ -2,9 +2,9 @@ import json from collections.abc import Mapping, Sequence from typing import Literal, NamedTuple -from core.file import FileAttribute, file_manager from core.variables import ArrayFileSegment from core.variables.segments import ArrayBooleanSegment, BooleanSegment +from core.workflow.file import FileAttribute, file_manager from core.workflow.runtime import VariablePool from .entities import Condition, SubCondition, SupportedComparisonOperator diff --git a/api/core/workflow/workflow_entry.py b/api/core/workflow/workflow_entry.py index 4b1845cda2..29ffb8027f 100644 --- a/api/core/workflow/workflow_entry.py +++ b/api/core/workflow/workflow_entry.py @@ -9,10 +9,10 @@ from core.app.apps.exc import GenerateTaskStoppedError from core.app.entities.app_invoke_entities import InvokeFrom from core.app.workflow.layers.observability import ObservabilityLayer from core.app.workflow.node_factory import DifyNodeFactory -from core.file.models import File from core.workflow.constants import ENVIRONMENT_VARIABLE_NODE_ID from core.workflow.entities import GraphInitParams from core.workflow.errors import WorkflowNodeRunFailedError +from core.workflow.file.models import File from core.workflow.graph import Graph from core.workflow.graph_engine import GraphEngine, GraphEngineConfig from core.workflow.graph_engine.command_channels import InMemoryChannel diff --git a/api/core/workflow/workflow_type_encoder.py b/api/core/workflow/workflow_type_encoder.py index f1f549e1f8..93c6a31960 100644 --- a/api/core/workflow/workflow_type_encoder.py +++ b/api/core/workflow/workflow_type_encoder.py @@ -4,8 +4,8 @@ from typing import Any, overload from pydantic import BaseModel -from core.file.models import File from core.variables import Segment +from core.workflow.file.models import File class WorkflowRuntimeTypeConverter: diff --git a/api/extensions/ext_storage.py b/api/extensions/ext_storage.py index 6df0879694..27b18d73d2 100644 --- a/api/extensions/ext_storage.py +++ b/api/extensions/ext_storage.py @@ -124,3 +124,6 @@ storage = Storage() def init_app(app: DifyApp): storage.init_app(app) + from core.app.workflow.file_runtime import bind_dify_workflow_file_runtime + + bind_dify_workflow_file_runtime() diff --git a/api/tests/conftest.py b/api/tests/conftest.py new file mode 100644 index 0000000000..e526685433 --- /dev/null +++ b/api/tests/conftest.py @@ -0,0 +1,8 @@ +import pytest + +from core.app.workflow.file_runtime import bind_dify_workflow_file_runtime + + +@pytest.fixture(autouse=True) +def _bind_workflow_file_runtime() -> None: + bind_dify_workflow_file_runtime() diff --git a/api/tests/unit_tests/core/workflow/nodes/llm/test_file_saver.py b/api/tests/unit_tests/core/workflow/nodes/llm/test_file_saver.py index 1e224d56a5..0677f1bb52 100644 --- a/api/tests/unit_tests/core/workflow/nodes/llm/test_file_saver.py +++ b/api/tests/unit_tests/core/workflow/nodes/llm/test_file_saver.py @@ -6,10 +6,10 @@ import httpx import pytest from sqlalchemy import Engine -from core.file import FileTransferMethod, FileType, models from core.helper import ssrf_proxy from core.tools import signature from core.tools.tool_file_manager import ToolFileManager +from core.workflow.file import FileTransferMethod, FileType, models from core.workflow.nodes.llm.file_saver import ( FileSaverImpl, _extract_content_type_and_extension, diff --git a/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py b/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py index 3d1b8b2f27..b0661f7d29 100644 --- a/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py @@ -8,7 +8,6 @@ import pytest from core.app.entities.app_invoke_entities import InvokeFrom, ModelConfigWithCredentialsEntity from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle from core.entities.provider_entities import CustomConfiguration, SystemConfiguration -from core.file import File, FileTransferMethod, FileType from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.message_entities import ( ImagePromptMessageContent, @@ -21,6 +20,7 @@ from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, from core.model_runtime.model_providers.model_provider_factory import ModelProviderFactory from core.variables import ArrayAnySegment, ArrayFileSegment, NoneSegment from core.workflow.entities import GraphInitParams +from core.workflow.file import File, FileTransferMethod, FileType from core.workflow.nodes.llm import llm_utils from core.workflow.nodes.llm.entities import ( ContextConfig, diff --git a/api/tests/unit_tests/core/workflow/nodes/llm/test_scenarios.py b/api/tests/unit_tests/core/workflow/nodes/llm/test_scenarios.py index 21bb857353..ac0c1df9c5 100644 --- a/api/tests/unit_tests/core/workflow/nodes/llm/test_scenarios.py +++ b/api/tests/unit_tests/core/workflow/nodes/llm/test_scenarios.py @@ -2,9 +2,9 @@ from collections.abc import Mapping, Sequence from pydantic import BaseModel, Field -from core.file import File from core.model_runtime.entities.message_entities import PromptMessage from core.model_runtime.entities.model_entities import ModelFeature +from core.workflow.file import File from core.workflow.nodes.llm.entities import LLMNodeChatModelMessage diff --git a/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py b/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py index 088c60a337..669f36c100 100644 --- a/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py @@ -6,12 +6,12 @@ import pytest from docx.oxml.text.paragraph import CT_P from core.app.entities.app_invoke_entities import InvokeFrom -from core.file import File, FileTransferMethod from core.variables import ArrayFileSegment from core.variables.segments import ArrayStringSegment from core.variables.variables import StringVariable from core.workflow.entities import GraphInitParams from core.workflow.enums import NodeType, WorkflowNodeExecutionStatus +from core.workflow.file import File, FileTransferMethod from core.workflow.node_events import NodeRunResult from core.workflow.nodes.document_extractor import DocumentExtractorNode, DocumentExtractorNodeData from core.workflow.nodes.document_extractor.node import ( @@ -146,7 +146,7 @@ def test_run_extract_text( mock_ssrf_proxy_get.return_value.content = file_content mock_ssrf_proxy_get.return_value.raise_for_status = Mock() - monkeypatch.setattr("core.file.file_manager.download", mock_download) + monkeypatch.setattr("core.workflow.file.file_manager.download", mock_download) monkeypatch.setattr("core.helper.ssrf_proxy.get", mock_ssrf_proxy_get) if mime_type == "application/pdf": diff --git a/api/tests/unit_tests/core/workflow/nodes/test_if_else.py b/api/tests/unit_tests/core/workflow/nodes/test_if_else.py index d700888c2f..930bdbda4a 100644 --- a/api/tests/unit_tests/core/workflow/nodes/test_if_else.py +++ b/api/tests/unit_tests/core/workflow/nodes/test_if_else.py @@ -6,10 +6,10 @@ import pytest from core.app.entities.app_invoke_entities import InvokeFrom from core.app.workflow.node_factory import DifyNodeFactory -from core.file import File, FileTransferMethod, FileType from core.variables import ArrayFileSegment from core.workflow.entities import GraphInitParams from core.workflow.enums import WorkflowNodeExecutionStatus +from core.workflow.file import File, FileTransferMethod, FileType from core.workflow.graph import Graph from core.workflow.nodes.if_else.entities import IfElseNodeData from core.workflow.nodes.if_else.if_else_node import IfElseNode diff --git a/api/tests/unit_tests/core/workflow/nodes/test_list_operator.py b/api/tests/unit_tests/core/workflow/nodes/test_list_operator.py index ff3eec0608..66ddc0d3c7 100644 --- a/api/tests/unit_tests/core/workflow/nodes/test_list_operator.py +++ b/api/tests/unit_tests/core/workflow/nodes/test_list_operator.py @@ -3,9 +3,9 @@ from unittest.mock import MagicMock import pytest from core.app.entities.app_invoke_entities import InvokeFrom -from core.file import File, FileTransferMethod, FileType from core.variables import ArrayFileSegment from core.workflow.enums import WorkflowNodeExecutionStatus +from core.workflow.file import File, FileTransferMethod, FileType from core.workflow.nodes.list_operator.entities import ( ExtractConfig, FilterBy, diff --git a/api/tests/unit_tests/core/workflow/nodes/tool/test_tool_node.py b/api/tests/unit_tests/core/workflow/nodes/tool/test_tool_node.py index 06927cddcf..526ff72c8c 100644 --- a/api/tests/unit_tests/core/workflow/nodes/tool/test_tool_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/tool/test_tool_node.py @@ -8,12 +8,12 @@ from unittest.mock import MagicMock, patch import pytest -from core.file import File, FileTransferMethod, FileType from core.model_runtime.entities.llm_entities import LLMUsage from core.tools.entities.tool_entities import ToolInvokeMessage from core.tools.utils.message_transformer import ToolFileMessageTransformer from core.variables.segments import ArrayFileSegment from core.workflow.entities import GraphInitParams +from core.workflow.file import File, FileTransferMethod, FileType from core.workflow.node_events import StreamChunkEvent, StreamCompletedEvent from core.workflow.runtime import GraphRuntimeState, VariablePool from core.workflow.system_variable import SystemVariable diff --git a/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_node.py b/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_node.py index 3b5aedebca..8ceaad5cc9 100644 --- a/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_node.py @@ -3,10 +3,10 @@ from unittest.mock import patch import pytest from core.app.entities.app_invoke_entities import InvokeFrom -from core.file import File, FileTransferMethod, FileType from core.variables import FileVariable, StringVariable from core.workflow.entities.graph_init_params import GraphInitParams from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionStatus +from core.workflow.file import File, FileTransferMethod, FileType from core.workflow.nodes.trigger_webhook.entities import ( ContentType, Method, diff --git a/api/tests/unit_tests/core/workflow/test_system_variable.py b/api/tests/unit_tests/core/workflow/test_system_variable.py index f76e81ae55..93e7c9f68d 100644 --- a/api/tests/unit_tests/core/workflow/test_system_variable.py +++ b/api/tests/unit_tests/core/workflow/test_system_variable.py @@ -4,8 +4,8 @@ from typing import Any import pytest from pydantic import ValidationError -from core.file.enums import FileTransferMethod, FileType -from core.file.models import File +from core.workflow.file.enums import FileTransferMethod, FileType +from core.workflow.file.models import File from core.workflow.system_variable import SystemVariable # Test data constants for SystemVariable serialization tests diff --git a/api/tests/unit_tests/core/workflow/test_system_variable_read_only_view.py b/api/tests/unit_tests/core/workflow/test_system_variable_read_only_view.py index 57bc96fe71..743fecaed0 100644 --- a/api/tests/unit_tests/core/workflow/test_system_variable_read_only_view.py +++ b/api/tests/unit_tests/core/workflow/test_system_variable_read_only_view.py @@ -2,7 +2,7 @@ from typing import cast import pytest -from core.file.models import File, FileTransferMethod, FileType +from core.workflow.file.models import File, FileTransferMethod, FileType from core.workflow.system_variable import SystemVariable, SystemVariableReadOnlyView diff --git a/api/tests/unit_tests/core/workflow/test_variable_pool.py b/api/tests/unit_tests/core/workflow/test_variable_pool.py index b8869dbf1d..fb9a893d43 100644 --- a/api/tests/unit_tests/core/workflow/test_variable_pool.py +++ b/api/tests/unit_tests/core/workflow/test_variable_pool.py @@ -3,7 +3,6 @@ from collections import defaultdict import pytest -from core.file import File, FileTransferMethod, FileType from core.variables import FileSegment, StringSegment from core.variables.segments import ( ArrayAnySegment, @@ -27,6 +26,7 @@ from core.variables.variables import ( Variable, ) from core.workflow.constants import CONVERSATION_VARIABLE_NODE_ID, ENVIRONMENT_VARIABLE_NODE_ID, SYSTEM_VARIABLE_NODE_ID +from core.workflow.file import File, FileTransferMethod, FileType from core.workflow.runtime import VariablePool from core.workflow.system_variable import SystemVariable from factories.variable_factory import build_segment, segment_to_variable diff --git a/api/tests/unit_tests/core/workflow/test_workflow_entry.py b/api/tests/unit_tests/core/workflow/test_workflow_entry.py index 27ffa455d6..793b0d4eba 100644 --- a/api/tests/unit_tests/core/workflow/test_workflow_entry.py +++ b/api/tests/unit_tests/core/workflow/test_workflow_entry.py @@ -3,14 +3,14 @@ from types import SimpleNamespace import pytest from configs import dify_config -from core.file.enums import FileType -from core.file.models import File, FileTransferMethod from core.helper.code_executor.code_executor import CodeLanguage from core.variables.variables import StringVariable from core.workflow.constants import ( CONVERSATION_VARIABLE_NODE_ID, ENVIRONMENT_VARIABLE_NODE_ID, ) +from core.workflow.file.enums import FileType +from core.workflow.file.models import File, FileTransferMethod from core.workflow.nodes.code.code_node import CodeNode from core.workflow.nodes.code.limits import CodeNodeLimits from core.workflow.runtime import VariablePool