Files
dify/api/core/tools/tool_file_manager.py
copilot-swe-agent[bot] 215d3ed42d Merge remote-tracking branch 'origin/deploy/dev' into feat/evaluation
# Conflicts:
#	.vite-hooks/pre-commit
#	api/controllers/console/__init__.py
#	api/core/agent/base_agent_runner.py
#	api/core/app/app_config/easy_ui_based_app/model_config/converter.py
#	api/core/app/apps/agent_chat/app_runner.py
#	api/core/entities/provider_configuration.py
#	api/core/helper/moderation.py
#	api/core/model_manager.py
#	api/core/rag/embedding/cached_embedding.py
#	api/core/rag/retrieval/dataset_retrieval.py
#	api/core/rag/splitter/fixed_text_splitter.py
#	api/core/workflow/nodes/datasource/datasource_node.py
#	api/core/workflow/nodes/knowledge_index/knowledge_index_node.py
#	api/models/human_input.py
#	api/providers/trace/trace-tencent/src/dify_trace_tencent/span_builder.py
#	api/services/workflow_service.py
#	api/tasks/trigger_processing_tasks.py
#	api/tests/integration_tests/core/workflow/nodes/datasource/test_datasource_node_integration.py
#	api/tests/integration_tests/workflow/nodes/test_http.py
#	api/tests/integration_tests/workflow/nodes/test_parameter_extractor.py
#	api/tests/unit_tests/controllers/service_api/app/test_conversation.py
#	api/tests/unit_tests/core/prompt/test_agent_history_prompt_transform.py
#	api/tests/unit_tests/core/variables/test_segment.py
#	api/tests/unit_tests/core/workflow/graph_engine/test_mock_factory.py
#	api/tests/unit_tests/core/workflow/nodes/answer/test_answer.py
#	api/tests/unit_tests/core/workflow/nodes/datasource/test_datasource_node.py
#	api/tests/unit_tests/core/workflow/nodes/http_request/test_http_request_node.py
#	api/tests/unit_tests/core/workflow/nodes/human_input/test_email_delivery_config.py
#	api/tests/unit_tests/services/workflow/test_workflow_human_input_delivery.py
#	web/app/(commonLayout)/layout.tsx
#	web/app/components/app/configuration/dataset-config/params-config/weighted-score.tsx
#	web/app/components/app/configuration/debug/debug-with-multiple-model/debug-item.tsx
#	web/app/components/app/workflow-log/__tests__/list.spec.tsx
#	web/app/components/apps/__tests__/list.spec.tsx
#	web/app/components/apps/list.tsx
#	web/app/components/base/chat/chat-with-history/header/operation.tsx
#	web/app/components/base/chat/chat-with-history/sidebar/operation.tsx
#	web/app/components/header/account-setting/data-source-page-new/operator.tsx
#	web/app/components/header/account-setting/members-page/operation/index.tsx
#	web/app/components/plugins/marketplace/sort-dropdown/__tests__/index.spec.tsx
#	web/app/components/plugins/marketplace/sort-dropdown/index.tsx
#	web/app/components/plugins/plugin-page/plugin-tasks/index.tsx
#	web/app/components/workflow/header/__tests__/test-run-menu.spec.tsx
#	web/app/components/workflow/header/test-run-menu.tsx
#	web/app/components/workflow/nodes/_base/components/next-step/operator.tsx
#	web/app/components/workflow/nodes/_base/components/panel-operator/index.tsx
#	web/app/components/workflow/nodes/assigner/components/__tests__/operation-selector.spec.tsx
#	web/app/components/workflow/nodes/assigner/components/operation-selector.tsx
#	web/app/components/workflow/operator/__tests__/more-actions.spec.tsx
#	web/app/components/workflow/operator/zoom-in-out.tsx
#	web/app/components/workflow/panel/version-history-panel/context-menu/menu-item.tsx
#	web/app/components/workflow/selection-contextmenu.tsx
#	web/eslint-suppressions.json

Co-authored-by: FFXN <31929997+FFXN@users.noreply.github.com>
2026-04-20 07:03:29 +00:00

238 lines
7.9 KiB
Python

import base64
import hashlib
import hmac
import logging
import os
import time
from collections.abc import Generator
from mimetypes import guess_extension, guess_type
from uuid import uuid4
import httpx
from graphon.file import File, FileTransferMethod, get_file_type_by_mime_type
from sqlalchemy import select
from configs import dify_config
from core.db.session_factory import session_factory
from core.helper import ssrf_proxy
from core.workflow.file_reference import build_file_reference
from extensions.ext_storage import storage
from models.model import MessageFile
from models.tools import ToolFile
logger = logging.getLogger(__name__)
class ToolFileManager:
@staticmethod
def _build_graph_file_reference(tool_file: ToolFile) -> File:
extension = guess_extension(tool_file.mimetype) or ".bin"
return File(
file_type=get_file_type_by_mime_type(tool_file.mimetype),
transfer_method=FileTransferMethod.TOOL_FILE,
remote_url=tool_file.original_url,
reference=build_file_reference(record_id=str(tool_file.id)),
filename=tool_file.name,
extension=extension,
mime_type=tool_file.mimetype,
size=tool_file.size,
storage_key=tool_file.file_key,
)
@staticmethod
def sign_file(tool_file_id: str, extension: str) -> str:
"""
sign file to get a temporary url for plugin access
"""
# Use internal URL for plugin/tool file access in Docker environments
base_url = dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL
file_preview_url = f"{base_url}/files/tools/{tool_file_id}{extension}"
timestamp = str(int(time.time()))
nonce = os.urandom(16).hex()
data_to_sign = f"file-preview|{tool_file_id}|{timestamp}|{nonce}"
secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
encoded_sign = base64.urlsafe_b64encode(sign).decode()
return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
@staticmethod
def verify_file(file_id: str, timestamp: str, nonce: str, sign: str) -> bool:
"""
verify signature
"""
data_to_sign = f"file-preview|{file_id}|{timestamp}|{nonce}"
secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode()
# verify signature
if sign != recalculated_encoded_sign:
return False
current_time = int(time.time())
return current_time - int(timestamp) <= dify_config.FILES_ACCESS_TIMEOUT
def create_file_by_raw(
self,
*,
user_id: str,
tenant_id: str,
conversation_id: str | None,
file_binary: bytes,
mimetype: str,
filename: str | None = None,
) -> ToolFile:
extension = guess_extension(mimetype) or ".bin"
unique_name = uuid4().hex
unique_filename = f"{unique_name}{extension}"
# default just as before
present_filename = unique_filename
if filename is not None:
has_extension = len(filename.split(".")) > 1
# Add extension flexibly
present_filename = filename if has_extension else f"{filename}{extension}"
filepath = f"tools/{tenant_id}/{unique_filename}"
storage.save(filepath, file_binary)
with session_factory.create_session() as session:
tool_file = ToolFile(
user_id=user_id,
tenant_id=tenant_id,
conversation_id=conversation_id,
file_key=filepath,
mimetype=mimetype,
name=present_filename,
size=len(file_binary),
original_url=None,
)
session.add(tool_file)
session.commit()
session.refresh(tool_file)
return tool_file
def create_file_by_url(
self,
user_id: str,
tenant_id: str,
file_url: str,
conversation_id: str | None = None,
) -> ToolFile:
# try to download image
try:
response = ssrf_proxy.get(file_url)
response.raise_for_status()
blob = response.content
except httpx.TimeoutException:
raise ValueError(f"timeout when downloading file from {file_url}")
mimetype = (
guess_type(file_url)[0]
or response.headers.get("Content-Type", "").split(";")[0].strip()
or "application/octet-stream"
)
extension = guess_extension(mimetype) or ".bin"
unique_name = uuid4().hex
filename = f"{unique_name}{extension}"
filepath = f"tools/{tenant_id}/{filename}"
storage.save(filepath, blob)
with session_factory.create_session() as session:
tool_file = ToolFile(
user_id=user_id,
tenant_id=tenant_id,
conversation_id=conversation_id,
file_key=filepath,
mimetype=mimetype,
original_url=file_url,
name=filename,
size=len(blob),
)
session.add(tool_file)
session.commit()
session.refresh(tool_file)
return tool_file
def get_file_binary(self, id: str) -> tuple[bytes, str] | None:
"""
get file binary
:param id: the id of the file
:return: the binary of the file, mime type
"""
with session_factory.create_session() as session:
tool_file: ToolFile | None = session.scalar(select(ToolFile).where(ToolFile.id == id).limit(1))
if not tool_file:
return None
blob = storage.load_once(tool_file.file_key)
return blob, tool_file.mimetype
def get_file_binary_by_message_file_id(self, id: str) -> tuple[bytes, str] | None:
"""
get file binary
:param id: the id of the file
:return: the binary of the file, mime type
"""
with session_factory.create_session() as session:
message_file: MessageFile | None = session.scalar(select(MessageFile).where(MessageFile.id == id).limit(1))
# Check if message_file is not None
if message_file is not None:
# get tool file id
if message_file.url is not None:
tool_file_id = message_file.url.split("/")[-1]
# trim extension
tool_file_id = tool_file_id.split(".")[0]
else:
tool_file_id = None
else:
tool_file_id = None
tool_file: ToolFile | None = session.scalar(select(ToolFile).where(ToolFile.id == tool_file_id).limit(1))
if not tool_file:
return None
blob = storage.load_once(tool_file.file_key)
return blob, tool_file.mimetype
def get_file_generator_by_tool_file_id(self, tool_file_id: str) -> tuple[Generator | None, File | None]:
"""
get file binary
:param tool_file_id: the id of the tool file
:return: the binary of the file, mime type
"""
with session_factory.create_session() as session:
tool_file: ToolFile | None = session.scalar(select(ToolFile).where(ToolFile.id == tool_file_id).limit(1))
if not tool_file:
return None, None
stream = storage.load_stream(tool_file.file_key)
return stream, self._build_graph_file_reference(tool_file)
# init tool_file_parser
from graphon.file.tool_file_parser import set_tool_file_manager_factory
def _factory() -> ToolFileManager:
return ToolFileManager()
set_tool_file_manager_factory(_factory)