Merge branch 'main' into chore/ssrf-config

2025-12-19 17:27:16 -05:00 · 2025-09-30 15:55:43 +08:00
parent 0b74b82394 decf0f3da0
commit ed6ac97854
73 changed files with 833 additions and 1288 deletions
--- a/api/controllers/console/datasets/datasets.py
+++ b/api/controllers/console/datasets/datasets.py
@@ -1,4 +1,5 @@
-import flask_restx
+from typing import Any, cast
+
 from flask import request
 from flask_login import current_user
 from flask_restx import Resource, fields, marshal, marshal_with, reqparse
@@ -31,12 +32,13 @@ from fields.dataset_fields import dataset_detail_fields, dataset_query_detail_fi
 from fields.document_fields import document_status_fields
 from libs.login import login_required
 from models import ApiToken, Dataset, Document, DocumentSegment, UploadFile
+from models.account import Account
 from models.dataset import DatasetPermissionEnum
 from models.provider_ids import ModelProviderID
 from services.dataset_service import DatasetPermissionService, DatasetService, DocumentService


-def _validate_name(name):
+def _validate_name(name: str) -> str:
    if not name or len(name) < 1 or len(name) > 40:
        raise ValueError("Name must be between 1 to 40 characters.")
    return name
@@ -92,7 +94,7 @@ class DatasetListApi(Resource):
        for embedding_model in embedding_models:
            model_names.append(f"{embedding_model.model}:{embedding_model.provider.provider}")

-        data = marshal(datasets, dataset_detail_fields)
+        data = cast(list[dict[str, Any]], marshal(datasets, dataset_detail_fields))
        for item in data:
            # convert embedding_model_provider to plugin standard format
            if item["indexing_technique"] == "high_quality" and item["embedding_model_provider"]:
@@ -192,7 +194,7 @@ class DatasetListApi(Resource):
                name=args["name"],
                description=args["description"],
                indexing_technique=args["indexing_technique"],
-                account=current_user,
+                account=cast(Account, current_user),
                permission=DatasetPermissionEnum.ONLY_ME,
                provider=args["provider"],
                external_knowledge_api_id=args["external_knowledge_api_id"],
@@ -224,7 +226,7 @@ class DatasetApi(Resource):
            DatasetService.check_dataset_permission(dataset, current_user)
        except services.errors.account.NoPermissionError as e:
            raise Forbidden(str(e))
-        data = marshal(dataset, dataset_detail_fields)
+        data = cast(dict[str, Any], marshal(dataset, dataset_detail_fields))
        if dataset.indexing_technique == "high_quality":
            if dataset.embedding_model_provider:
                provider_id = ModelProviderID(dataset.embedding_model_provider)
@@ -369,7 +371,7 @@ class DatasetApi(Resource):
        if dataset is None:
            raise NotFound("Dataset not found.")

-        result_data = marshal(dataset, dataset_detail_fields)
+        result_data = cast(dict[str, Any], marshal(dataset, dataset_detail_fields))
        tenant_id = current_user.current_tenant_id

        if data.get("partial_member_list") and data.get("permission") == "partial_members":
@@ -688,7 +690,7 @@ class DatasetApiKeyApi(Resource):
        )

        if current_key_count >= self.max_keys:
-            flask_restx.abort(
+            api.abort(
                400,
                message=f"Cannot create more than {self.max_keys} API keys for this resource type.",
                code="max_keys_exceeded",
@@ -733,7 +735,7 @@ class DatasetApiDeleteApi(Resource):
        )

        if key is None:
-            flask_restx.abort(404, message="API key not found")
+            api.abort(404, message="API key not found")

        db.session.query(ApiToken).where(ApiToken.id == api_key_id).delete()
        db.session.commit()
--- a/api/controllers/console/datasets/datasets_document.py
+++ b/api/controllers/console/datasets/datasets_document.py
@@ -55,6 +55,7 @@ from fields.document_fields import (
 from libs.datetime_utils import naive_utc_now
 from libs.login import login_required
 from models import Dataset, DatasetProcessRule, Document, DocumentSegment, UploadFile
+from models.account import Account
 from models.dataset import DocumentPipelineExecutionLog
 from services.dataset_service import DatasetService, DocumentService
 from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig
@@ -418,7 +419,9 @@ class DatasetInitApi(Resource):

        try:
            dataset, documents, batch = DocumentService.save_document_without_dataset_id(
-                tenant_id=current_user.current_tenant_id, knowledge_config=knowledge_config, account=current_user
+                tenant_id=current_user.current_tenant_id,
+                knowledge_config=knowledge_config,
+                account=cast(Account, current_user),
            )
        except ProviderTokenNotInitError as ex:
            raise ProviderNotInitializeError(ex.description)
@@ -452,7 +455,7 @@ class DocumentIndexingEstimateApi(DocumentResource):
            raise DocumentAlreadyFinishedError()

        data_process_rule = document.dataset_process_rule
-        data_process_rule_dict = data_process_rule.to_dict()
+        data_process_rule_dict = data_process_rule.to_dict() if data_process_rule else {}

        response = {"tokens": 0, "total_price": 0, "currency": "USD", "total_segments": 0, "preview": []}

@@ -514,7 +517,7 @@ class DocumentBatchIndexingEstimateApi(DocumentResource):
        if not documents:
            return {"tokens": 0, "total_price": 0, "currency": "USD", "total_segments": 0, "preview": []}, 200
        data_process_rule = documents[0].dataset_process_rule
-        data_process_rule_dict = data_process_rule.to_dict()
+        data_process_rule_dict = data_process_rule.to_dict() if data_process_rule else {}
        extract_settings = []
        for document in documents:
            if document.indexing_status in {"completed", "error"}:
@@ -753,7 +756,7 @@ class DocumentApi(DocumentResource):
            }
        else:
            dataset_process_rules = DatasetService.get_process_rules(dataset_id)
-            document_process_rules = document.dataset_process_rule.to_dict()
+            document_process_rules = document.dataset_process_rule.to_dict() if document.dataset_process_rule else {}
            data_source_info = document.data_source_detail_dict
            response = {
                "id": document.id,
@@ -1073,7 +1076,9 @@ class DocumentRenameApi(DocumentResource):
        if not current_user.is_dataset_editor:
            raise Forbidden()
        dataset = DatasetService.get_dataset(dataset_id)
-        DatasetService.check_dataset_operator_permission(current_user, dataset)
+        if not dataset:
+            raise NotFound("Dataset not found.")
+        DatasetService.check_dataset_operator_permission(cast(Account, current_user), dataset)
        parser = reqparse.RequestParser()
        parser.add_argument("name", type=str, required=True, nullable=False, location="json")
        args = parser.parse_args()
--- a/api/controllers/console/datasets/datasets_segments.py
+++ b/api/controllers/console/datasets/datasets_segments.py
@@ -392,7 +392,12 @@ class DatasetDocumentSegmentBatchImportApi(Resource):
            # send batch add segments task
            redis_client.setnx(indexing_cache_key, "waiting")
            batch_create_segment_to_index_task.delay(
-                str(job_id), upload_file_id, dataset_id, document_id, current_user.current_tenant_id, current_user.id
+                str(job_id),
+                upload_file_id,
+                dataset_id,
+                document_id,
+                current_user.current_tenant_id,
+                current_user.id,
            )
        except Exception as e:
            return {"error": str(e)}, 500
@@ -468,7 +473,8 @@ class ChildChunkAddApi(Resource):
        parser.add_argument("content", type=str, required=True, nullable=False, location="json")
        args = parser.parse_args()
        try:
-            child_chunk = SegmentService.create_child_chunk(args.get("content"), segment, document, dataset)
+            content = args["content"]
+            child_chunk = SegmentService.create_child_chunk(content, segment, document, dataset)
        except ChildChunkIndexingServiceError as e:
            raise ChildChunkIndexingError(str(e))
        return {"data": marshal(child_chunk, child_chunk_fields)}, 200
@@ -557,7 +563,8 @@ class ChildChunkAddApi(Resource):
        parser.add_argument("chunks", type=list, required=True, nullable=False, location="json")
        args = parser.parse_args()
        try:
-            chunks = [ChildChunkUpdateArgs(**chunk) for chunk in args.get("chunks")]
+            chunks_data = args["chunks"]
+            chunks = [ChildChunkUpdateArgs(**chunk) for chunk in chunks_data]
            child_chunks = SegmentService.update_child_chunks(chunks, segment, document, dataset)
        except ChildChunkIndexingServiceError as e:
            raise ChildChunkIndexingError(str(e))
@@ -674,9 +681,8 @@ class ChildChunkUpdateApi(Resource):
        parser.add_argument("content", type=str, required=True, nullable=False, location="json")
        args = parser.parse_args()
        try:
-            child_chunk = SegmentService.update_child_chunk(
-                args.get("content"), child_chunk, segment, document, dataset
-            )
+            content = args["content"]
+            child_chunk = SegmentService.update_child_chunk(content, child_chunk, segment, document, dataset)
        except ChildChunkIndexingServiceError as e:
            raise ChildChunkIndexingError(str(e))
        return {"data": marshal(child_chunk, child_chunk_fields)}, 200
--- a/api/controllers/console/datasets/external.py
+++ b/api/controllers/console/datasets/external.py
@@ -1,3 +1,5 @@
+from typing import cast
+
 from flask import request
 from flask_login import current_user
 from flask_restx import Resource, fields, marshal, reqparse
@@ -9,13 +11,14 @@ from controllers.console.datasets.error import DatasetNameDuplicateError
 from controllers.console.wraps import account_initialization_required, setup_required
 from fields.dataset_fields import dataset_detail_fields
 from libs.login import login_required
+from models.account import Account
 from services.dataset_service import DatasetService
 from services.external_knowledge_service import ExternalDatasetService
 from services.hit_testing_service import HitTestingService
 from services.knowledge_service import ExternalDatasetTestService


-def _validate_name(name):
+def _validate_name(name: str) -> str:
    if not name or len(name) < 1 or len(name) > 100:
        raise ValueError("Name must be between 1 to 100 characters.")
    return name
@@ -274,7 +277,7 @@ class ExternalKnowledgeHitTestingApi(Resource):
            response = HitTestingService.external_retrieve(
                dataset=dataset,
                query=args["query"],
-                account=current_user,
+                account=cast(Account, current_user),
                external_retrieval_model=args["external_retrieval_model"],
                metadata_filtering_conditions=args["metadata_filtering_conditions"],
            )
--- a/api/controllers/console/datasets/hit_testing_base.py
+++ b/api/controllers/console/datasets/hit_testing_base.py
@@ -1,10 +1,11 @@
 import logging
+from typing import cast

 from flask_login import current_user
 from flask_restx import marshal, reqparse
 from werkzeug.exceptions import Forbidden, InternalServerError, NotFound

-import services.dataset_service
+import services
 from controllers.console.app.error import (
    CompletionRequestError,
    ProviderModelCurrentlyNotSupportError,
@@ -20,6 +21,7 @@ from core.errors.error import (
 )
 from core.model_runtime.errors.invoke import InvokeError
 from fields.hit_testing_fields import hit_testing_record_fields
+from models.account import Account
 from services.dataset_service import DatasetService
 from services.hit_testing_service import HitTestingService

@@ -59,7 +61,7 @@ class DatasetsHitTestingBase:
            response = HitTestingService.retrieve(
                dataset=dataset,
                query=args["query"],
-                account=current_user,
+                account=cast(Account, current_user),
                retrieval_model=args["retrieval_model"],
                external_retrieval_model=args["external_retrieval_model"],
                limit=10,
--- a/api/controllers/console/datasets/metadata.py
+++ b/api/controllers/console/datasets/metadata.py
@@ -62,6 +62,7 @@ class DatasetMetadataApi(Resource):
        parser = reqparse.RequestParser()
        parser.add_argument("name", type=str, required=True, nullable=False, location="json")
        args = parser.parse_args()
+        name = args["name"]

        dataset_id_str = str(dataset_id)
        metadata_id_str = str(metadata_id)
@@ -70,7 +71,7 @@ class DatasetMetadataApi(Resource):
            raise NotFound("Dataset not found.")
        DatasetService.check_dataset_permission(dataset, current_user)

-        metadata = MetadataService.update_metadata_name(dataset_id_str, metadata_id_str, args.get("name"))
+        metadata = MetadataService.update_metadata_name(dataset_id_str, metadata_id_str, name)
        return metadata, 200

    @setup_required
--- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline.py
+++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline.py
@@ -20,13 +20,13 @@ from services.rag_pipeline.rag_pipeline import RagPipelineService
 logger = logging.getLogger(__name__)


-def _validate_name(name):
+def _validate_name(name: str) -> str:
    if not name or len(name) < 1 or len(name) > 40:
        raise ValueError("Name must be between 1 to 40 characters.")
    return name


-def _validate_description_length(description):
+def _validate_description_length(description: str) -> str:
    if len(description) > 400:
        raise ValueError("Description cannot exceed 400 characters.")
    return description
@@ -76,7 +76,7 @@ class CustomizedPipelineTemplateApi(Resource):
        )
        parser.add_argument(
            "description",
-            type=str,
+            type=_validate_description_length,
            nullable=True,
            required=False,
            default="",
@@ -133,7 +133,7 @@ class PublishCustomizedPipelineTemplateApi(Resource):
        )
        parser.add_argument(
            "description",
-            type=str,
+            type=_validate_description_length,
            nullable=True,
            required=False,
            default="",
--- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_datasets.py
+++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_datasets.py
@@ -1,5 +1,5 @@
-from flask_login import current_user  # type: ignore  # type: ignore
-from flask_restx import Resource, marshal, reqparse  # type: ignore
+from flask_login import current_user
+from flask_restx import Resource, marshal, reqparse
 from sqlalchemy.orm import Session
 from werkzeug.exceptions import Forbidden

@@ -20,18 +20,6 @@ from services.entities.knowledge_entities.rag_pipeline_entities import IconInfo,
 from services.rag_pipeline.rag_pipeline_dsl_service import RagPipelineDslService


-def _validate_name(name):
-    if not name or len(name) < 1 or len(name) > 40:
-        raise ValueError("Name must be between 1 to 40 characters.")
-    return name
-
-
-def _validate_description_length(description):
-    if len(description) > 400:
-        raise ValueError("Description cannot exceed 400 characters.")
-    return description
-
-
@console_ns.route("/rag/pipeline/dataset")
 class CreateRagPipelineDatasetApi(Resource):
    @setup_required
--- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_draft_variable.py
+++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_draft_variable.py
@@ -1,5 +1,5 @@
 import logging
-from typing import Any, NoReturn
+from typing import NoReturn

 from flask import Response
 from flask_restx import Resource, fields, inputs, marshal, marshal_with, reqparse
@@ -11,14 +11,12 @@ from controllers.console.app.error import (
    DraftWorkflowNotExist,
 )
 from controllers.console.app.workflow_draft_variable import (
-    _WORKFLOW_DRAFT_VARIABLE_FIELDS,
-    _WORKFLOW_DRAFT_VARIABLE_WITHOUT_VALUE_FIELDS,
+    _WORKFLOW_DRAFT_VARIABLE_FIELDS,  # type: ignore[private-usage]
+    _WORKFLOW_DRAFT_VARIABLE_WITHOUT_VALUE_FIELDS,  # type: ignore[private-usage]
 )
 from controllers.console.datasets.wraps import get_rag_pipeline
 from controllers.console.wraps import account_initialization_required, setup_required
 from controllers.web.error import InvalidArgumentError, NotFoundError
-from core.variables.segment_group import SegmentGroup
-from core.variables.segments import ArrayFileSegment, FileSegment, Segment
 from core.variables.types import SegmentType
 from core.workflow.constants import CONVERSATION_VARIABLE_NODE_ID, SYSTEM_VARIABLE_NODE_ID
 from extensions.ext_database import db
@@ -34,32 +32,6 @@ from services.workflow_draft_variable_service import WorkflowDraftVariableList,
 logger = logging.getLogger(__name__)


-def _convert_values_to_json_serializable_object(value: Segment) -> Any:
-    if isinstance(value, FileSegment):
-        return value.value.model_dump()
-    elif isinstance(value, ArrayFileSegment):
-        return [i.model_dump() for i in value.value]
-    elif isinstance(value, SegmentGroup):
-        return [_convert_values_to_json_serializable_object(i) for i in value.value]
-    else:
-        return value.value
-
-
-def _serialize_var_value(variable: WorkflowDraftVariable) -> Any:
-    value = variable.get_value()
-    # create a copy of the value to avoid affecting the model cache.
-    value = value.model_copy(deep=True)
-    # Refresh the url signature before returning it to client.
-    if isinstance(value, FileSegment):
-        file = value.value
-        file.remote_url = file.generate_url()
-    elif isinstance(value, ArrayFileSegment):
-        files = value.value
-        for file in files:
-            file.remote_url = file.generate_url()
-    return _convert_values_to_json_serializable_object(value)
-
-
 def _create_pagination_parser():
    parser = reqparse.RequestParser()
    parser.add_argument(
@@ -104,7 +76,7 @@ def _api_prerequisite(f):
    @account_initialization_required
    @get_rag_pipeline
    def wrapper(*args, **kwargs):
-        if not isinstance(current_user, Account) or not current_user.is_editor:
+        if not isinstance(current_user, Account) or not current_user.has_edit_permission:
            raise Forbidden()
        return f(*args, **kwargs)

--- a/api/controllers/console/tag/tags.py
+++ b/api/controllers/console/tag/tags.py
@@ -3,7 +3,7 @@ from flask_login import current_user
 from flask_restx import Resource, marshal_with, reqparse
 from werkzeug.exceptions import Forbidden

-from controllers.console import api
+from controllers.console import console_ns
 from controllers.console.wraps import account_initialization_required, setup_required
 from fields.tag_fields import dataset_tag_fields
 from libs.login import login_required
@@ -17,6 +17,7 @@ def _validate_name(name):
    return name


+@console_ns.route("/tags")
 class TagListApi(Resource):
    @setup_required
    @login_required
@@ -52,6 +53,7 @@ class TagListApi(Resource):
        return response, 200


+@console_ns.route("/tags/<uuid:tag_id>")
 class TagUpdateDeleteApi(Resource):
    @setup_required
    @login_required
@@ -89,6 +91,7 @@ class TagUpdateDeleteApi(Resource):
        return 204


+@console_ns.route("/tag-bindings/create")
 class TagBindingCreateApi(Resource):
    @setup_required
    @login_required
@@ -114,6 +117,7 @@ class TagBindingCreateApi(Resource):
        return {"result": "success"}, 200


+@console_ns.route("/tag-bindings/remove")
 class TagBindingDeleteApi(Resource):
    @setup_required
    @login_required
@@ -133,9 +137,3 @@ class TagBindingDeleteApi(Resource):
        TagService.delete_tag_binding(args)

        return {"result": "success"}, 200
-
-
-api.add_resource(TagListApi, "/tags")
-api.add_resource(TagUpdateDeleteApi, "/tags/<uuid:tag_id>")
-api.add_resource(TagBindingCreateApi, "/tag-bindings/create")
-api.add_resource(TagBindingDeleteApi, "/tag-bindings/remove")
--- a/api/core/app/apps/base_app_queue_manager.py
+++ b/api/core/app/apps/base_app_queue_manager.py
@@ -1,9 +1,11 @@
+import logging
 import queue
 import time
 from abc import abstractmethod
 from enum import IntEnum, auto
 from typing import Any

+from redis.exceptions import RedisError
 from sqlalchemy.orm import DeclarativeMeta

 from configs import dify_config
@@ -18,6 +20,8 @@ from core.app.entities.queue_entities import (
 )
 from extensions.ext_redis import redis_client

+logger = logging.getLogger(__name__)
+

 class PublishFrom(IntEnum):
    APPLICATION_MANAGER = auto()
@@ -35,9 +39,8 @@ class AppQueueManager:
        self.invoke_from = invoke_from  # Public accessor for invoke_from

        user_prefix = "account" if self._invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER} else "end-user"
-        redis_client.setex(
-            AppQueueManager._generate_task_belong_cache_key(self._task_id), 1800, f"{user_prefix}-{self._user_id}"
-        )
+        self._task_belong_cache_key = AppQueueManager._generate_task_belong_cache_key(self._task_id)
+        redis_client.setex(self._task_belong_cache_key, 1800, f"{user_prefix}-{self._user_id}")

        q: queue.Queue[WorkflowQueueMessage | MessageQueueMessage | None] = queue.Queue()

@@ -79,9 +82,21 @@ class AppQueueManager:
        Stop listen to queue
        :return:
        """
+        self._clear_task_belong_cache()
        self._q.put(None)

-    def publish_error(self, e, pub_from: PublishFrom):
+    def _clear_task_belong_cache(self) -> None:
+        """
+        Remove the task belong cache key once listening is finished.
+        """
+        try:
+            redis_client.delete(self._task_belong_cache_key)
+        except RedisError:
+            logger.exception(
+                "Failed to clear task belong cache for task %s (key: %s)", self._task_id, self._task_belong_cache_key
+            )
+
+    def publish_error(self, e, pub_from: PublishFrom) -> None:
        """
        Publish error
        :param e: error
--- a/api/core/model_runtime/entities/message_entities.py
+++ b/api/core/model_runtime/entities/message_entities.py
@@ -74,7 +74,7 @@ class TextPromptMessageContent(PromptMessageContent):
    Model class for text prompt message content.
    """

-    type: Literal[PromptMessageContentType.TEXT] = PromptMessageContentType.TEXT
+    type: Literal[PromptMessageContentType.TEXT] = PromptMessageContentType.TEXT  # type: ignore
    data: str


@@ -95,11 +95,11 @@ class MultiModalPromptMessageContent(PromptMessageContent):


 class VideoPromptMessageContent(MultiModalPromptMessageContent):
-    type: Literal[PromptMessageContentType.VIDEO] = PromptMessageContentType.VIDEO
+    type: Literal[PromptMessageContentType.VIDEO] = PromptMessageContentType.VIDEO  # type: ignore


 class AudioPromptMessageContent(MultiModalPromptMessageContent):
-    type: Literal[PromptMessageContentType.AUDIO] = PromptMessageContentType.AUDIO
+    type: Literal[PromptMessageContentType.AUDIO] = PromptMessageContentType.AUDIO  # type: ignore


 class ImagePromptMessageContent(MultiModalPromptMessageContent):
@@ -111,12 +111,12 @@ class ImagePromptMessageContent(MultiModalPromptMessageContent):
        LOW = auto()
        HIGH = auto()

-    type: Literal[PromptMessageContentType.IMAGE] = PromptMessageContentType.IMAGE
+    type: Literal[PromptMessageContentType.IMAGE] = PromptMessageContentType.IMAGE  # type: ignore
    detail: DETAIL = DETAIL.LOW


 class DocumentPromptMessageContent(MultiModalPromptMessageContent):
-    type: Literal[PromptMessageContentType.DOCUMENT] = PromptMessageContentType.DOCUMENT
+    type: Literal[PromptMessageContentType.DOCUMENT] = PromptMessageContentType.DOCUMENT  # type: ignore


 PromptMessageContentUnionTypes = Annotated[
--- a/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenizer.py
+++ b/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenizer.py
@@ -15,7 +15,7 @@ class GPT2Tokenizer:
        use gpt2 tokenizer to get num tokens
        """
        _tokenizer = GPT2Tokenizer.get_encoder()
-        tokens = _tokenizer.encode(text)
+        tokens = _tokenizer.encode(text)  # type: ignore
        return len(tokens)

    @staticmethod
--- a/api/core/model_runtime/utils/encoders.py
+++ b/api/core/model_runtime/utils/encoders.py
@@ -196,15 +196,15 @@ def jsonable_encoder(
            return encoder(obj)

    try:
-        data = dict(obj)
+        data = dict(obj)  # type: ignore
    except Exception as e:
        errors: list[Exception] = []
        errors.append(e)
        try:
-            data = vars(obj)
+            data = vars(obj)  # type: ignore
        except Exception as e:
            errors.append(e)
-            raise ValueError(errors) from e
+            raise ValueError(str(errors)) from e
    return jsonable_encoder(
        data,
        by_alias=by_alias,
--- a/api/core/ops/aliyun_trace/entities/aliyun_trace_entity.py
+++ b/api/core/ops/aliyun_trace/entities/aliyun_trace_entity.py
@@ -3,7 +3,8 @@ from dataclasses import dataclass
 from typing import Any

 from opentelemetry import trace as trace_api
-from opentelemetry.sdk.trace import Event, Status, StatusCode
+from opentelemetry.sdk.trace import Event
+from opentelemetry.trace import Status, StatusCode
 from pydantic import BaseModel, Field


--- a/api/core/ops/ops_trace_manager.py
+++ b/api/core/ops/ops_trace_manager.py
@@ -155,7 +155,10 @@ class OpsTraceManager:
            if key in tracing_config:
                if "*" in tracing_config[key]:
                    # If the key contains '*', retain the original value from the current config
-                    new_config[key] = current_trace_config.get(key, tracing_config[key])
+                    if current_trace_config:
+                        new_config[key] = current_trace_config.get(key, tracing_config[key])
+                    else:
+                        new_config[key] = tracing_config[key]
                else:
                    # Otherwise, encrypt the key
                    new_config[key] = encrypt_token(tenant_id, tracing_config[key])
--- a/api/core/ops/weave_trace/weave_trace.py
+++ b/api/core/ops/weave_trace/weave_trace.py
@@ -62,7 +62,8 @@ class WeaveDataTrace(BaseTraceInstance):
        self,
    ):
        try:
-            project_url = f"https://wandb.ai/{self.weave_client._project_id()}"
+            project_identifier = f"{self.entity}/{self.project_name}" if self.entity else self.project_name
+            project_url = f"https://wandb.ai/{project_identifier}"
            return project_url
        except Exception as e:
            logger.debug("Weave get run url failed: %s", str(e))
@@ -424,7 +425,23 @@ class WeaveDataTrace(BaseTraceInstance):
            raise ValueError(f"Weave API check failed: {str(e)}")

    def start_call(self, run_data: WeaveTraceModel, parent_run_id: str | None = None):
-        call = self.weave_client.create_call(op=run_data.op, inputs=run_data.inputs, attributes=run_data.attributes)
+        inputs = run_data.inputs
+        if inputs is None:
+            inputs = {}
+        elif not isinstance(inputs, dict):
+            inputs = {"inputs": str(inputs)}
+
+        attributes = run_data.attributes
+        if attributes is None:
+            attributes = {}
+        elif not isinstance(attributes, dict):
+            attributes = {"attributes": str(attributes)}
+
+        call = self.weave_client.create_call(
+            op=run_data.op,
+            inputs=inputs,
+            attributes=attributes,
+        )
        self.calls[run_data.id] = call
        if parent_run_id:
            self.calls[run_data.id].parent_id = parent_run_id
@@ -432,6 +449,7 @@ class WeaveDataTrace(BaseTraceInstance):
    def finish_call(self, run_data: WeaveTraceModel):
        call = self.calls.get(run_data.id)
        if call:
-            self.weave_client.finish_call(call=call, output=run_data.outputs, exception=run_data.exception)
+            exception = Exception(run_data.exception) if run_data.exception else None
+            self.weave_client.finish_call(call=call, output=run_data.outputs, exception=exception)
        else:
            raise ValueError(f"Call with id {run_data.id} not found")
--- a/api/core/rag/datasource/retrieval_service.py
+++ b/api/core/rag/datasource/retrieval_service.py
@@ -106,7 +106,9 @@ class RetrievalService:
        if exceptions:
            raise ValueError(";\n".join(exceptions))

+        # Deduplicate documents for hybrid search to avoid duplicate chunks
        if retrieval_method == RetrievalMethod.HYBRID_SEARCH.value:
+            all_documents = cls._deduplicate_documents(all_documents)
            data_post_processor = DataPostProcessor(
                str(dataset.tenant_id), reranking_mode, reranking_model, weights, False
            )
@@ -143,6 +145,40 @@ class RetrievalService:
        )
        return all_documents

+    @classmethod
+    def _deduplicate_documents(cls, documents: list[Document]) -> list[Document]:
+        """Deduplicate documents based on doc_id to avoid duplicate chunks in hybrid search."""
+        if not documents:
+            return documents
+
+        unique_documents = []
+        seen_doc_ids = set()
+
+        for document in documents:
+            # For dify provider documents, use doc_id for deduplication
+            if document.provider == "dify" and document.metadata is not None and "doc_id" in document.metadata:
+                doc_id = document.metadata["doc_id"]
+                if doc_id not in seen_doc_ids:
+                    seen_doc_ids.add(doc_id)
+                    unique_documents.append(document)
+                # If duplicate, keep the one with higher score
+                elif "score" in document.metadata:
+                    # Find existing document with same doc_id and compare scores
+                    for i, existing_doc in enumerate(unique_documents):
+                        if (
+                            existing_doc.metadata
+                            and existing_doc.metadata.get("doc_id") == doc_id
+                            and existing_doc.metadata.get("score", 0) < document.metadata.get("score", 0)
+                        ):
+                            unique_documents[i] = document
+                            break
+            else:
+                # For non-dify documents, use content-based deduplication
+                if document not in unique_documents:
+                    unique_documents.append(document)
+
+        return unique_documents
+
    @classmethod
    def _get_dataset(cls, dataset_id: str) -> Dataset | None:
        with Session(db.engine) as session:
--- a/api/core/workflow/node_events/node.py
+++ b/api/core/workflow/node_events/node.py
@@ -20,6 +20,7 @@ class ModelInvokeCompletedEvent(NodeEventBase):
    usage: LLMUsage
    finish_reason: str | None = None
    reasoning_content: str | None = None
+    structured_output: dict | None = None


 class RunRetryEvent(NodeEventBase):
--- a/api/core/workflow/nodes/http_request/executor.py
+++ b/api/core/workflow/nodes/http_request/executor.py
@@ -87,7 +87,7 @@ class Executor:
                node_data.authorization.config.api_key
            ).text

-        self.url: str = node_data.url
+        self.url = node_data.url
        self.method = node_data.method
        self.auth = node_data.authorization
        self.timeout = timeout
@@ -349,11 +349,10 @@ class Executor:
            "timeout": (self.timeout.connect, self.timeout.read, self.timeout.write),
            "ssl_verify": self.ssl_verify,
            "follow_redirects": True,
-            "max_retries": self.max_retries,
        }
        # request_args = {k: v for k, v in request_args.items() if v is not None}
        try:
-            response: httpx.Response = _METHOD_MAP[method_lc](**request_args)
+            response: httpx.Response = _METHOD_MAP[method_lc](**request_args, max_retries=self.max_retries)
        except (ssrf_proxy.MaxRetriesExceededError, httpx.RequestError) as e:
            raise HttpRequestNodeError(str(e)) from e
        # FIXME: fix type ignore, this maybe httpx type issue
--- a/api/core/workflow/nodes/http_request/node.py
+++ b/api/core/workflow/nodes/http_request/node.py
@@ -165,6 +165,8 @@ class HttpRequestNode(Node):
            body_type = typed_node_data.body.type
            data = typed_node_data.body.data
            match body_type:
+                case "none":
+                    pass
                case "binary":
                    if len(data) != 1:
                        raise RequestBodyError("invalid body data, should have only one item")
--- a/api/core/workflow/nodes/if_else/if_else_node.py
+++ b/api/core/workflow/nodes/if_else/if_else_node.py
@@ -83,7 +83,7 @@ class IfElseNode(Node):
            else:
                # TODO: Update database then remove this
                # Fallback to old structure if cases are not defined
-                input_conditions, group_result, final_result = _should_not_use_old_function(  # ty: ignore [deprecated]
+                input_conditions, group_result, final_result = _should_not_use_old_function(  # pyright: ignore [reportDeprecated]
                    condition_processor=condition_processor,
                    variable_pool=self.graph_runtime_state.variable_pool,
                    conditions=self._node_data.conditions or [],
--- a/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py
+++ b/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py
@@ -136,6 +136,11 @@ class KnowledgeIndexNode(Node):
        document = db.session.query(Document).filter_by(id=document_id.value).first()
        if not document:
            raise KnowledgeIndexNodeError(f"Document {document_id.value} not found.")
+        doc_id_value = document.id
+        ds_id_value = dataset.id
+        dataset_name_value = dataset.name
+        document_name_value = document.name
+        created_at_value = document.created_at
        # chunk nodes by chunk size
        indexing_start_at = time.perf_counter()
        index_processor = IndexProcessorFactory(dataset.chunk_structure).init_index_processor()
@@ -161,16 +166,16 @@ class KnowledgeIndexNode(Node):
        document.word_count = (
            db.session.query(func.sum(DocumentSegment.word_count))
            .where(
-                DocumentSegment.document_id == document.id,
-                DocumentSegment.dataset_id == dataset.id,
+                DocumentSegment.document_id == doc_id_value,
+                DocumentSegment.dataset_id == ds_id_value,
            )
            .scalar()
        )
        db.session.add(document)
        # update document segment status
        db.session.query(DocumentSegment).where(
-            DocumentSegment.document_id == document.id,
-            DocumentSegment.dataset_id == dataset.id,
+            DocumentSegment.document_id == doc_id_value,
+            DocumentSegment.dataset_id == ds_id_value,
        ).update(
            {
                DocumentSegment.status: "completed",
@@ -182,13 +187,13 @@ class KnowledgeIndexNode(Node):
        db.session.commit()

        return {
-            "dataset_id": dataset.id,
-            "dataset_name": dataset.name,
+            "dataset_id": ds_id_value,
+            "dataset_name": dataset_name_value,
            "batch": batch.value,
-            "document_id": document.id,
-            "document_name": document.name,
-            "created_at": document.created_at.timestamp(),
-            "display_status": document.indexing_status,
+            "document_id": doc_id_value,
+            "document_name": document_name_value,
+            "created_at": created_at_value.timestamp(),
+            "display_status": "completed",
        }

    def _get_preview_output(self, chunk_structure: str, chunks: Any) -> Mapping[str, Any]:
--- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py
+++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py
@@ -107,7 +107,7 @@ class KnowledgeRetrievalNode(Node):
            graph_runtime_state=graph_runtime_state,
        )
        # LLM file outputs, used for MultiModal outputs.
-        self._file_outputs: list[File] = []
+        self._file_outputs = []

        if llm_file_saver is None:
            llm_file_saver = FileSaverImpl(
--- a/api/core/workflow/nodes/list_operator/node.py
+++ b/api/core/workflow/nodes/list_operator/node.py
@@ -161,6 +161,8 @@ class ListOperatorNode(Node):
            elif isinstance(variable, ArrayFileSegment):
                if isinstance(condition.value, str):
                    value = self.graph_runtime_state.variable_pool.convert_template(condition.value).text
+                elif isinstance(condition.value, bool):
+                    raise ValueError(f"File filter expects a string value, got {type(condition.value)}")
                else:
                    value = condition.value
                filter_func = _get_file_filter_func(
--- a/api/core/workflow/nodes/llm/file_saver.py
+++ b/api/core/workflow/nodes/llm/file_saver.py
@@ -46,7 +46,7 @@ class LLMFileSaver(tp.Protocol):
            dot (`.`). For example, `.py` and `.tar.gz` are both valid values, while `py`
            and `tar.gz` are not.
        """
-        pass
+        raise NotImplementedError()

    def save_remote_url(self, url: str, file_type: FileType) -> File:
        """save_remote_url saves the file from a remote url returned by LLM.
@@ -56,7 +56,7 @@ class LLMFileSaver(tp.Protocol):
        :param url: the url of the file.
        :param file_type: the file type of the file, check `FileType` enum for reference.
        """
-        pass
+        raise NotImplementedError()


 EngineFactory: tp.TypeAlias = tp.Callable[[], Engine]
--- a/api/core/workflow/nodes/llm/node.py
+++ b/api/core/workflow/nodes/llm/node.py
@@ -23,6 +23,7 @@ from core.model_runtime.entities.llm_entities import (
    LLMResult,
    LLMResultChunk,
    LLMResultChunkWithStructuredOutput,
+    LLMResultWithStructuredOutput,
    LLMStructuredOutput,
    LLMUsage,
 )
@@ -127,7 +128,7 @@ class LLMNode(Node):
            graph_runtime_state=graph_runtime_state,
        )
        # LLM file outputs, used for MultiModal outputs.
-        self._file_outputs: list[File] = []
+        self._file_outputs = []

        if llm_file_saver is None:
            llm_file_saver = FileSaverImpl(
@@ -165,6 +166,7 @@ class LLMNode(Node):
        node_inputs: dict[str, Any] = {}
        process_data: dict[str, Any] = {}
        result_text = ""
+        clean_text = ""
        usage = LLMUsage.empty_usage()
        finish_reason = None
        reasoning_content = None
@@ -278,6 +280,13 @@ class LLMNode(Node):
                        # Extract clean text from <think> tags
                        clean_text, _ = LLMNode._split_reasoning(result_text, self._node_data.reasoning_format)

+                    # Process structured output if available from the event.
+                    structured_output = (
+                        LLMStructuredOutput(structured_output=event.structured_output)
+                        if event.structured_output
+                        else None
+                    )
+
                    # deduct quota
                    llm_utils.deduct_llm_quota(tenant_id=self.tenant_id, model_instance=model_instance, usage=usage)
                    break
@@ -1048,7 +1057,7 @@ class LLMNode(Node):
    @staticmethod
    def handle_blocking_result(
        *,
-        invoke_result: LLMResult,
+        invoke_result: LLMResult | LLMResultWithStructuredOutput,
        saver: LLMFileSaver,
        file_outputs: list["File"],
        reasoning_format: Literal["separated", "tagged"] = "tagged",
@@ -1079,6 +1088,8 @@ class LLMNode(Node):
            finish_reason=None,
            # Reasoning content for workflow variables and downstream nodes
            reasoning_content=reasoning_content,
+            # Pass structured output if enabled
+            structured_output=getattr(invoke_result, "structured_output", None),
        )

    @staticmethod
--- a/api/core/workflow/nodes/parameter_extractor/prompts.py
+++ b/api/core/workflow/nodes/parameter_extractor/prompts.py
@@ -179,6 +179,6 @@ CHAT_EXAMPLE = [
                "required": ["food"],
            },
        },
-        "assistant": {"text": "I need to output a valid JSON object.", "json": {"result": "apple pie"}},
+        "assistant": {"text": "I need to output a valid JSON object.", "json": {"food": "apple pie"}},
    },
 ]
--- a/api/core/workflow/nodes/question_classifier/question_classifier_node.py
+++ b/api/core/workflow/nodes/question_classifier/question_classifier_node.py
@@ -68,7 +68,7 @@ class QuestionClassifierNode(Node):
            graph_runtime_state=graph_runtime_state,
        )
        # LLM file outputs, used for MultiModal outputs.
-        self._file_outputs: list[File] = []
+        self._file_outputs = []

        if llm_file_saver is None:
            llm_file_saver = FileSaverImpl(
@@ -111,9 +111,9 @@ class QuestionClassifierNode(Node):
        query = variable.value if variable else None
        variables = {"query": query}
        # fetch model config
-        model_instance, model_config = LLMNode._fetch_model_config(
-            node_data_model=node_data.model,
+        model_instance, model_config = llm_utils.fetch_model_config(
            tenant_id=self.tenant_id,
+            node_data_model=node_data.model,
        )
        # fetch memory
        memory = llm_utils.fetch_memory(
--- a/api/core/workflow/workflow_entry.py
+++ b/api/core/workflow/workflow_entry.py
@@ -416,4 +416,8 @@ class WorkflowEntry:

            # append variable and value to variable pool
            if variable_node_id != ENVIRONMENT_VARIABLE_NODE_ID:
+                # In single run, the input_value is set as the LLM's structured output value within the variable_pool.
+                if len(variable_key_list) == 2 and variable_key_list[0] == "structured_output":
+                    input_value = {variable_key_list[1]: input_value}
+                    variable_key_list = variable_key_list[0:1]
                variable_pool.add([variable_node_id] + variable_key_list, input_value)
--- a/api/pyrightconfig.json
+++ b/api/pyrightconfig.json
@@ -6,10 +6,6 @@
    "migrations/",
    "core/rag",
    "extensions",
-    "controllers/console/datasets",
-    "core/ops",
-    "core/model_runtime",
-    "core/workflow/nodes",
    "core/app/app_config/easy_ui_based_app/dataset"
  ],
  "typeCheckingMode": "strict",
@@ -38,4 +34,4 @@
  "reportAttributeAccessIssue": "hint",
  "pythonVersion": "3.11",
  "pythonPlatform": "All"
-}
+}
--- a/api/schedule/check_upgradable_plugin_task.py
+++ b/api/schedule/check_upgradable_plugin_task.py
@@ -52,7 +52,8 @@ def check_upgradable_plugin_task():
                strategy.include_plugins,
            )

-        if batch_interval_time > 0.0001:  # if lower than 1ms, skip
+        # Only sleep if batch_interval_time > 0.0001 AND current batch is not the last one
+        if batch_interval_time > 0.0001 and i + MAX_CONCURRENT_CHECK_TASKS < total_strategies:
            time.sleep(batch_interval_time)

    end_at = time.perf_counter()
--- a/api/services/dataset_service.py
+++ b/api/services/dataset_service.py
@@ -93,7 +93,7 @@ logger = logging.getLogger(__name__)
 class DatasetService:
    @staticmethod
    def get_datasets(page, per_page, tenant_id=None, user=None, search=None, tag_ids=None, include_all=False):
-        query = select(Dataset).where(Dataset.tenant_id == tenant_id).order_by(Dataset.created_at.desc())
+        query = select(Dataset).where(Dataset.tenant_id == tenant_id).order_by(Dataset.created_at.desc(), Dataset.id)

        if user:
            # get permitted dataset ids
--- a/web/app/components/explore/app-list/index.tsx
+++ b/web/app/components/explore/app-list/index.tsx
@@ -152,23 +152,20 @@ const Apps = ({
      <div className={cn(
        'mt-6 flex items-center justify-between px-12',
      )}>
-        <>
-          <Category
-            list={categories}
-            value={currCategory}
-            onChange={setCurrCategory}
-            allCategoriesEn={allCategoriesEn}
-          />
-        </>
+        <Category
+          list={categories}
+          value={currCategory}
+          onChange={setCurrCategory}
+          allCategoriesEn={allCategoriesEn}
+        />
        <Input
          showLeftIcon
          showClearIcon
-          wrapperClassName='w-[200px]'
+          wrapperClassName='w-[200px] self-start'
          value={keywords}
          onChange={e => handleKeywordsChange(e.target.value)}
          onClear={() => handleKeywordsChange('')}
        />
-
      </div>

      <div className={cn(
--- a/web/i18n/de-DE/common.ts
+++ b/web/i18n/de-DE/common.ts
@@ -501,6 +501,7 @@ const translation = {
      customModelCredentialsDeleteTip: 'Anmeldeinformationen werden verwendet und können nicht gelöscht werden',
    },
    parametersInvalidRemoved: 'Einige Parameter sind ungültig und wurden entfernt.',
+    installDataSourceProvider: 'Datenquellenanbieter installieren',
  },
  dataSource: {
    add: 'Eine Datenquelle hinzufügen',
--- a/web/i18n/de-DE/workflow.ts
+++ b/web/i18n/de-DE/workflow.ts
@@ -944,6 +944,9 @@ const translation = {
      chunkIsRequired: 'Chunk-Struktur ist erforderlich',
      chunksInput: 'Stücke',
      chunksInputTip: 'Die Eingangsvariable des Wissensbasis-Knotens sind Chunks. Der Variablentyp ist ein Objekt mit einem spezifischen JSON-Schema, das konsistent mit der ausgewählten Chunk-Struktur sein muss.',
+      embeddingModelIsRequired: 'Ein Einbettungsmodell ist erforderlich',
+      chunksVariableIsRequired: 'Die Variable \'Chunks\' ist erforderlich',
+      rerankingModelIsRequired: 'Ein Reranking-Modell ist erforderlich',
    },
  },
  tracing: {
--- a/web/i18n/es-ES/common.ts
+++ b/web/i18n/es-ES/common.ts
@@ -505,6 +505,7 @@ const translation = {
      editModelCredential: 'Editar credencial de modelo',
    },
    parametersInvalidRemoved: 'Algunos parámetros son inválidos y han sido eliminados',
+    installDataSourceProvider: 'Instalar proveedores de fuentes de datos',
  },
  dataSource: {
    add: 'Agregar una fuente de datos',
--- a/web/i18n/es-ES/workflow.ts
+++ b/web/i18n/es-ES/workflow.ts
@@ -944,6 +944,9 @@ const translation = {
      chunkIsRequired: 'Se requiere una estructura de fragmentos',
      chunksInput: 'Trozo',
      chunksInputTip: 'La variable de entrada del nodo de la base de conocimientos es Chunks. El tipo de variable es un objeto con un esquema JSON específico que debe ser consistente con la estructura del fragmento seleccionado.',
+      embeddingModelIsRequired: 'Se requiere un modelo de incrustación',
+      rerankingModelIsRequired: 'Se requiere un modelo de reordenamiento',
+      chunksVariableIsRequired: 'La variable Chunks es obligatoria',
    },
  },
  tracing: {
--- a/web/i18n/fa-IR/common.ts
+++ b/web/i18n/fa-IR/common.ts
@@ -505,6 +505,7 @@ const translation = {
      customModelCredentialsDeleteTip: 'اعتبار در حال استفاده است و قابل حذف نیست',
    },
    parametersInvalidRemoved: 'برخی پارامترها نامعتبر هستند و حذف شده‌اند',
+    installDataSourceProvider: 'نصب ارائه‌دهندگان منبع داده',
  },
  dataSource: {
    add: 'افزودن منبع داده',
--- a/web/i18n/fa-IR/workflow.ts
+++ b/web/i18n/fa-IR/workflow.ts
@@ -943,7 +943,10 @@ const translation = {
      chunkIsRequired: 'ساختار تکه ای مورد نیاز است',
      chooseChunkStructure: 'یک ساختار تکه ای را انتخاب کنید',
      chunksInput: 'تکه‌ها',
-      chunksInputTip: 'متغیر ورودی گره پایگاه دانش چانک‌ها است. نوع متغیر یک شیء با یک طرح JSON خاص است که باید با ساختار چانک انتخاب شده سازگار باشد.',
+      chunksInputTip: 'متغیر ورودی گره پایگاه دانش تکه‌ها است. نوع متغیر یک شیء با یک طرح JSON خاص است که باید با ساختار تکه انتخاب شده سازگار باشد.',
+      embeddingModelIsRequired: 'مدل جاسازی مورد نیاز است',
+      chunksVariableIsRequired: 'متغیر تکه‌ها الزامی است',
+      rerankingModelIsRequired: 'مدل رتبه‌بندی مجدد مورد نیاز است',
    },
  },
  tracing: {
--- a/web/i18n/fr-FR/common.ts
+++ b/web/i18n/fr-FR/common.ts
@@ -502,6 +502,7 @@ const translation = {
      editModelCredential: 'Modifier les informations d’identification du modèle',
    },
    parametersInvalidRemoved: 'Certains paramètres sont invalides et ont été supprimés.',
+    installDataSourceProvider: 'Installer les fournisseurs de sources de données',
  },
  dataSource: {
    add: 'Ajouter une source de données',
--- a/web/i18n/fr-FR/workflow.ts
+++ b/web/i18n/fr-FR/workflow.ts
@@ -944,6 +944,9 @@ const translation = {
      retrievalSettingIsRequired: 'Le paramètre de récupération est requis',
      chunksInput: 'Morceaux',
      chunksInputTip: 'La variable d\'entrée du nœud de la base de connaissances est Chunks. Le type de variable est un objet avec un schéma JSON spécifique qui doit être cohérent avec la structure de morceau sélectionnée.',
+      rerankingModelIsRequired: 'Un modèle de rerankage est requis',
+      embeddingModelIsRequired: 'Un modèle d\'intégration est requis',
+      chunksVariableIsRequired: 'La variable Chunks est requise',
    },
  },
  tracing: {
--- a/web/i18n/hi-IN/common.ts
+++ b/web/i18n/hi-IN/common.ts
@@ -521,6 +521,7 @@ const translation = {
      editModelCredential: 'मॉडल की क्रेडेंशियल संपादित करें',
    },
    parametersInvalidRemoved: 'कुछ पैरामीटर अमान्य हैं और हटा दिए गए हैं',
+    installDataSourceProvider: 'डेटा स्रोत प्रदाताओं को स्थापित करें',
  },
  dataSource: {
    add: 'डेटा स्रोत जोड़ें',
--- a/web/i18n/hi-IN/workflow.ts
+++ b/web/i18n/hi-IN/workflow.ts
@@ -963,7 +963,10 @@ const translation = {
      aboutRetrieval: 'पुनर्प्राप्ति विधि के बारे में।',
      chooseChunkStructure: 'एक चंक संरचना चुनें',
      chunksInput: 'टुकड़े',
-      chunksInputTip: 'ज्ञान आधार नोड का इनपुट वेरिएबल चंक्स है। वेरिएबल प्रकार एक ऑब्जेक्ट है जिसमें एक विशेष JSON स्कीमा है जो चयनित चंक संरचना के साथ सुसंगत होना चाहिए।',
+      chunksInputTip: 'ज्ञान आधार नोड का इनपुट वेरिएबल टुकड़े है। वेरिएबल प्रकार एक ऑब्जेक्ट है जिसमें एक विशेष JSON स्कीमा है जो चयनित चंक संरचना के साथ सुसंगत होना चाहिए।',
+      chunksVariableIsRequired: 'टुकड़े चर आवश्यक है',
+      embeddingModelIsRequired: 'एम्बेडिंग मॉडल आवश्यक है',
+      rerankingModelIsRequired: 'पुनः क्रमांकन मॉडल की आवश्यकता है',
    },
  },
  tracing: {
--- a/web/i18n/id-ID/common.ts
+++ b/web/i18n/id-ID/common.ts
@@ -501,6 +501,7 @@ const translation = {
    callTimes: 'Waktu panggilan',
    getFreeTokens: 'Dapatkan Token gratis',
    parametersInvalidRemoved: 'Beberapa parameter tidak valid dan telah dihapus',
+    installDataSourceProvider: 'Pasang penyedia sumber data',
  },
  dataSource: {
    notion: {
--- a/web/i18n/id-ID/workflow.ts
+++ b/web/i18n/id-ID/workflow.ts
@@ -919,6 +919,9 @@ const translation = {
      chunkStructure: 'Struktur Potongan',
      chunksInput: 'Potongan',
      chunksInputTip: 'Variabel input dari node basis pengetahuan adalah Chunks. Tipe variabel adalah objek dengan Skema JSON tertentu yang harus konsisten dengan struktur chunk yang dipilih.',
+      chunksVariableIsRequired: 'Variabel Chunks diperlukan',
+      rerankingModelIsRequired: 'Model reranking diperlukan',
+      embeddingModelIsRequired: 'Model embedding diperlukan',
    },
  },
  tracing: {},
--- a/web/i18n/it-IT/common.ts
+++ b/web/i18n/it-IT/common.ts
@@ -527,6 +527,7 @@ const translation = {
      editModelCredential: 'Modificare le credenziali del modello',
    },
    parametersInvalidRemoved: 'Alcuni parametri non sono validi e sono stati rimossi.',
+    installDataSourceProvider: 'Installa i fornitori di sorgenti dati',
  },
  dataSource: {
    add: 'Aggiungi una fonte di dati',
--- a/web/i18n/it-IT/workflow.ts
+++ b/web/i18n/it-IT/workflow.ts
@@ -970,6 +970,9 @@ const translation = {
      retrievalSettingIsRequired: 'È richiesta l\'impostazione di recupero',
      chunksInputTip: 'La variabile di input del nodo della base di conoscenza è Chunks. Il tipo di variabile è un oggetto con uno specifico schema JSON che deve essere coerente con la struttura del chunk selezionato.',
      chunksInput: 'Pezzetti',
+      chunksVariableIsRequired: 'La variabile Chunks è richiesta',
+      rerankingModelIsRequired: 'È richiesto un modello di riordinamento',
+      embeddingModelIsRequired: 'È necessario un modello di embedding',
    },
  },
  tracing: {
--- a/web/i18n/ja-JP/workflow.ts
+++ b/web/i18n/ja-JP/workflow.ts
@@ -956,6 +956,9 @@ const translation = {
      indexMethodIsRequired: 'インデックスメソッドが必要です',
      chunksInput: 'チャンク',
      chunksInputTip: '知識ベースノードの入力変数はチャンクです。変数のタイプは、選択されたチャンク構造と一貫性のある特定のJSONスキーマを持つオブジェクトです。',
+      chunksVariableIsRequired: 'Chunks変数は必須です',
+      embeddingModelIsRequired: '埋め込みモデルが必要です',
+      rerankingModelIsRequired: '再ランキングモデルが必要です',
    },
  },
  tracing: {
--- a/web/i18n/ko-KR/common.ts
+++ b/web/i18n/ko-KR/common.ts
@@ -497,6 +497,7 @@ const translation = {
      customModelCredentialsDeleteTip: '자격 증명이 사용 중이며 삭제할 수 없습니다.',
    },
    parametersInvalidRemoved: '일부 매개변수가 유효하지 않아 제거되었습니다.',
+    installDataSourceProvider: '데이터 소스 공급자 설치',
  },
  dataSource: {
    add: '데이터 소스 추가하기',
--- a/web/i18n/ko-KR/workflow.ts
+++ b/web/i18n/ko-KR/workflow.ts
@@ -992,6 +992,9 @@ const translation = {
      retrievalSettingIsRequired: '검색 설정이 필요합니다.',
      chunksInput: '청크',
      chunksInputTip: '지식 기반 노드의 입력 변수는 Chunks입니다. 변수 유형은 선택된 청크 구조와 일치해야 하는 특정 JSON 스키마를 가진 객체입니다.',
+      chunksVariableIsRequired: 'Chunks 변수는 필수입니다',
+      embeddingModelIsRequired: '임베딩 모델이 필요합니다',
+      rerankingModelIsRequired: '재순위 모델이 필요합니다',
    },
  },
  tracing: {
--- a/web/i18n/pl-PL/common.ts
+++ b/web/i18n/pl-PL/common.ts
@@ -514,6 +514,7 @@ const translation = {
      editModelCredential: 'Edytowanie poświadczeń modelu',
    },
    parametersInvalidRemoved: 'Niektóre parametry są nieprawidłowe i zostały usunięte.',
+    installDataSourceProvider: 'Zainstaluj dostawców źródeł danych',
  },
  dataSource: {
    add: 'Dodaj źródło danych',
--- a/web/i18n/pl-PL/workflow.ts
+++ b/web/i18n/pl-PL/workflow.ts
@@ -944,6 +944,9 @@ const translation = {
      chunkIsRequired: 'Wymagana jest struktura porcji',
      chunksInput: 'Kawałki',
      chunksInputTip: 'Zmienna wejściowa węzła bazy wiedzy to Chunks. Typ zmiennej to obiekt z określonym schematem JSON, który musi być zgodny z wybraną strukturą chunk.',
+      embeddingModelIsRequired: 'Wymagany jest model osadzania',
+      chunksVariableIsRequired: 'Wymagana jest zmienna Chunks',
+      rerankingModelIsRequired: 'Wymagany jest model ponownego rankingu',
    },
  },
  tracing: {
--- a/web/i18n/pt-BR/common.ts
+++ b/web/i18n/pt-BR/common.ts
@@ -501,6 +501,7 @@ const translation = {
      addNewModelCredential: 'Adicionar nova credencial de modelo',
    },
    parametersInvalidRemoved: 'Alguns parâmetros são inválidos e foram removidos',
+    installDataSourceProvider: 'Instalar provedores de fontes de dados',
  },
  dataSource: {
    add: 'Adicionar uma fonte de dados',
--- a/web/i18n/pt-BR/workflow.ts
+++ b/web/i18n/pt-BR/workflow.ts
@@ -944,6 +944,9 @@ const translation = {
      indexMethodIsRequired: 'O método de índice é necessário',
      chunksInput: 'Pedaços',
      chunksInputTip: 'A variável de entrada do nó da base de conhecimento é Chunks. O tipo da variável é um objeto com um esquema JSON específico que deve ser consistente com a estrutura de chunk selecionada.',
+      chunksVariableIsRequired: 'A variável \'chunks\' é obrigatória',
+      embeddingModelIsRequired: 'Modelo de incorporação é necessário',
+      rerankingModelIsRequired: 'Um modelo de reclassificação é necessário',
    },
  },
  tracing: {
--- a/web/i18n/ro-RO/common.ts
+++ b/web/i18n/ro-RO/common.ts
@@ -501,6 +501,7 @@ const translation = {
      customModelCredentialsDeleteTip: 'Acreditarea este în uz și nu poate fi ștearsă',
    },
    parametersInvalidRemoved: 'Unele parametrii sunt invalizi și au fost eliminați.',
+    installDataSourceProvider: 'Instalați furnizorii de surse de date',
  },
  dataSource: {
    add: 'Adăugați o sursă de date',
--- a/web/i18n/ro-RO/workflow.ts
+++ b/web/i18n/ro-RO/workflow.ts
@@ -944,6 +944,9 @@ const translation = {
      changeChunkStructure: 'Modificați structura bucății',
      chunksInput: 'Bucăți',
      chunksInputTip: 'Variabila de intrare a nodului bazei de cunoștințe este Chunks. Tipul variabilei este un obiect cu un Șchema JSON specific care trebuie să fie coerent cu structura de chunk selectată.',
+      chunksVariableIsRequired: 'Variabila Chunks este obligatorie',
+      embeddingModelIsRequired: 'Este necesar un model de încorporare',
+      rerankingModelIsRequired: 'Este necesar un model de reordonare',
    },
  },
  tracing: {
--- a/web/i18n/ru-RU/common.ts
+++ b/web/i18n/ru-RU/common.ts
@@ -505,6 +505,7 @@ const translation = {
      customModelCredentialsDeleteTip: 'Учетные данные используются и не могут быть удалены',
    },
    parametersInvalidRemoved: 'Некоторые параметры недействительны и были удалены',
+    installDataSourceProvider: 'Установить поставщиков источников данных',
  },
  dataSource: {
    add: 'Добавить источник данных',
--- a/web/i18n/ru-RU/workflow.ts
+++ b/web/i18n/ru-RU/workflow.ts
@@ -944,6 +944,9 @@ const translation = {
      retrievalSettingIsRequired: 'Настройка извлечения обязательна',
      chunksInput: 'Куски',
      chunksInputTip: 'Входная переменная узла базы знаний - это Чанки. Тип переменной является объектом с определенной схемой JSON, которая должна соответствовать выбранной структуре чанка.',
+      chunksVariableIsRequired: 'Переменная chunks обязательна',
+      embeddingModelIsRequired: 'Требуется модель встраивания',
+      rerankingModelIsRequired: 'Требуется модель перераспределения рангов',
    },
  },
  tracing: {
--- a/web/i18n/sl-SI/common.ts
+++ b/web/i18n/sl-SI/common.ts
@@ -586,6 +586,7 @@ const translation = {
      customModelCredentials: 'Poverilnice modela po meri',
    },
    parametersInvalidRemoved: 'Nekateri parametri so neveljavni in so bili odstranjeni.',
+    installDataSourceProvider: 'Namestite ponudnike podatkovnih virov',
  },
  dataSource: {
    notion: {
--- a/web/i18n/sl-SI/workflow.ts
+++ b/web/i18n/sl-SI/workflow.ts
@@ -951,6 +951,9 @@ const translation = {
      aboutRetrieval: 'o metodi iskanja.',
      chunksInput: 'Kosi',
      chunksInputTip: 'Vhodna spremenljivka vozlišča podatkovne baze je Chunks. Tip spremenljivke je objekt s specifično JSON shemo, ki mora biti skladna z izbrano strukturo kosov.',
+      chunksVariableIsRequired: 'Spremenljivka Chunks je obvezna',
+      embeddingModelIsRequired: 'Zahteva se vgrajevalni model',
+      rerankingModelIsRequired: 'Potreben je model za ponovno razvrščanje',
    },
  },
  tracing: {
--- a/web/i18n/th-TH/common.ts
+++ b/web/i18n/th-TH/common.ts
@@ -500,6 +500,7 @@ const translation = {
      addNewModelCredential: 'เพิ่มข้อมูลประจําตัวของโมเดลใหม่',
    },
    parametersInvalidRemoved: 'บางพารามิเตอร์ไม่ถูกต้องและถูกนำออก',
+    installDataSourceProvider: 'ติดตั้งผู้ให้บริการแหล่งข้อมูล',
  },
  dataSource: {
    add: 'เพิ่มแหล่งข้อมูล',
--- a/web/i18n/th-TH/workflow.ts
+++ b/web/i18n/th-TH/workflow.ts
@@ -944,6 +944,9 @@ const translation = {
      chunkIsRequired: 'จําเป็นต้องมีโครงสร้างก้อน',
      chunksInput: 'ชิ้นส่วน',
      chunksInputTip: 'ตัวแปรนำเข้าของโหนดฐานความรู้คือ Chunks ตัวแปรประเภทเป็นอ็อบเจ็กต์ที่มี JSON Schema เฉพาะซึ่งต้องสอดคล้องกับโครงสร้างชิ้นส่วนที่เลือกไว้.',
+      chunksVariableIsRequired: 'ตัวแปร Chunks เป็นสิ่งจำเป็น',
+      embeddingModelIsRequired: 'จำเป็นต้องใช้โมเดลฝัง',
+      rerankingModelIsRequired: 'จำเป็นต้องมีโมเดลการจัดอันดับใหม่',
    },
  },
  tracing: {
--- a/web/i18n/tr-TR/common.ts
+++ b/web/i18n/tr-TR/common.ts
@@ -505,6 +505,7 @@ const translation = {
      customModelCredentialsDeleteTip: 'Kimlik bilgisi kullanımda ve silinemiyor',
    },
    parametersInvalidRemoved: 'Bazı parametreler geçersizdir ve kaldırılmıştır.',
+    installDataSourceProvider: 'Veri kaynağı sağlayıcılarını yükle',
  },
  dataSource: {
    add: 'Bir veri kaynağı ekle',
--- a/web/i18n/tr-TR/workflow.ts
+++ b/web/i18n/tr-TR/workflow.ts
@@ -945,6 +945,9 @@ const translation = {
      changeChunkStructure: 'Yığın Yapısını Değiştir',
      chunksInput: 'Parçalar',
      chunksInputTip: 'Bilgi tabanı düğümünün girdi değişkeni \'Chunks\'tır. Değişkenin tipi, seçilen parça yapısıyla tutarlı olması gereken belirli bir JSON Şemasına sahip bir nesnedir.',
+      embeddingModelIsRequired: 'Gömme modeli gereklidir',
+      chunksVariableIsRequired: 'Chunks değişkeni gereklidir',
+      rerankingModelIsRequired: 'Yeniden sıralama modeli gereklidir',
    },
  },
  tracing: {
--- a/web/i18n/uk-UA/common.ts
+++ b/web/i18n/uk-UA/common.ts
@@ -502,6 +502,7 @@ const translation = {
      customModelCredentialsDeleteTip: 'Облікові дані використовуються і не можуть бути видалені',
    },
    parametersInvalidRemoved: 'Деякі параметри є недійсними і були видалені',
+    installDataSourceProvider: 'Встановіть постачальників джерел даних',
  },
  dataSource: {
    add: 'Додати джерело даних',
--- a/web/i18n/uk-UA/workflow.ts
+++ b/web/i18n/uk-UA/workflow.ts
@@ -944,6 +944,9 @@ const translation = {
      retrievalSettingIsRequired: 'Потрібне налаштування для отримання',
      chunksInput: 'Частини',
      chunksInputTip: 'Вхідна змінна вузла бази знань - це Частини. Тип змінної - об\'єкт з певною JSON-схемою, яка повинна відповідати вибраній структурі частин.',
+      chunksVariableIsRequired: 'Змінна chunks є обов\'язковою',
+      embeddingModelIsRequired: 'Потрібна модель вбудовування',
+      rerankingModelIsRequired: 'Потрібна модель повторного ранжування',
    },
  },
  tracing: {
--- a/web/i18n/vi-VN/common.ts
+++ b/web/i18n/vi-VN/common.ts
@@ -501,6 +501,7 @@ const translation = {
      selectModelCredential: 'Chọn thông tin xác thực mô hình',
    },
    parametersInvalidRemoved: 'Một số tham số không hợp lệ và đã được loại bỏ',
+    installDataSourceProvider: 'Cài đặt các nhà cung cấp nguồn dữ liệu',
  },
  dataSource: {
    add: 'Thêm nguồn dữ liệu',
--- a/web/i18n/vi-VN/workflow.ts
+++ b/web/i18n/vi-VN/workflow.ts
@@ -944,6 +944,9 @@ const translation = {
      indexMethodIsRequired: 'Phương pháp chỉ mục là bắt buộc',
      chunksInput: 'Mảnh',
      chunksInputTip: 'Biến đầu vào của nút cơ sở tri thức là Chunks. Loại biến là một đối tượng với một JSON Schema cụ thể mà phải nhất quán với cấu trúc chunk đã chọn.',
+      chunksVariableIsRequired: 'Biến Chunks là bắt buộc',
+      embeddingModelIsRequired: 'Cần có mô hình nhúng',
+      rerankingModelIsRequired: 'Cần có mô hình sắp xếp lại',
    },
  },
  tracing: {
--- a/web/i18n/zh-Hant/common.ts
+++ b/web/i18n/zh-Hant/common.ts
@@ -501,6 +501,7 @@ const translation = {
      selectModelCredential: '選取模型認證',
    },
    parametersInvalidRemoved: '一些參數無效，已被移除',
+    installDataSourceProvider: '安裝資料來源提供者',
  },
  dataSource: {
    add: '新增資料來源',
--- a/web/i18n/zh-Hant/workflow.ts
+++ b/web/i18n/zh-Hant/workflow.ts
@@ -944,6 +944,9 @@ const translation = {
      retrievalSettingIsRequired: '需要檢索設定',
      chunksInput: '區塊',
      chunksInputTip: '知識庫節點的輸入變數是 Chunks。該變數類型是一個物件，具有特定的 JSON Schema，必須與所選的塊結構一致。',
+      rerankingModelIsRequired: '需要重新排序模型',
+      chunksVariableIsRequired: 'Chunks 變數是必需的',
+      embeddingModelIsRequired: '需要嵌入模型',
    },
  },
  tracing: {
--- a/web/package.json
+++ b/web/package.json
@@ -2,7 +2,7 @@
  "name": "dify-web",
  "version": "1.9.1",
  "private": true,
-  "packageManager": "pnpm@10.16.0",
+  "packageManager": "pnpm@10.17.1",
  "engines": {
    "node": ">=v22.11.0"
  },
@@ -39,13 +39,12 @@
    "storybook": "storybook dev -p 6006",
    "build-storybook": "storybook build",
    "preinstall": "npx only-allow pnpm",
-    "analyze": "ANALYZE=true pnpm build"
+    "analyze": "ANALYZE=true pnpm build",
+    "knip": "knip"
  },
  "dependencies": {
-    "@babel/runtime": "^7.22.3",
    "@dagrejs/dagre": "^1.1.4",
    "@emoji-mart/data": "^1.2.1",
-    "@eslint/compat": "^1.2.4",
    "@floating-ui/react": "^0.26.25",
    "@formatjs/intl-localematcher": "^0.5.6",
    "@headlessui/react": "2.2.1",
@@ -63,7 +62,6 @@
    "@octokit/request-error": "^6.1.5",
    "@remixicon/react": "^4.5.0",
    "@sentry/react": "^8.54.0",
-    "@sentry/utils": "^8.54.0",
    "@svgdotjs/svg.js": "^3.2.4",
    "@tailwindcss/typography": "^0.5.15",
    "@tanstack/react-form": "^1.3.3",
@@ -75,7 +73,6 @@
    "classnames": "^2.5.1",
    "cmdk": "^1.1.1",
    "copy-to-clipboard": "^3.3.3",
-    "crypto-js": "^4.2.0",
    "dayjs": "^1.11.13",
    "decimal.js": "^10.4.3",
    "dompurify": "^3.2.4",
@@ -91,7 +88,6 @@
    "js-audio-recorder": "^1.0.7",
    "js-cookie": "^3.0.5",
    "jsonschema": "^1.5.0",
-    "jwt-decode": "^4.0.0",
    "katex": "^0.16.21",
    "ky": "^1.7.2",
    "lamejs": "^1.2.1",
@@ -112,12 +108,9 @@
    "react-18-input-autosize": "^3.0.0",
    "react-dom": "19.1.1",
    "react-easy-crop": "^5.1.0",
-    "react-error-boundary": "^4.1.2",
-    "react-headless-pagination": "^1.1.6",
    "react-hook-form": "^7.53.1",
    "react-hotkeys-hook": "^4.6.1",
    "react-i18next": "^15.1.0",
-    "react-infinite-scroll-component": "^6.1.0",
    "react-markdown": "^9.0.1",
    "react-multi-email": "^1.0.25",
    "react-papaparse": "^4.4.0",
@@ -126,11 +119,8 @@
    "react-sortablejs": "^6.1.4",
    "react-syntax-highlighter": "^15.6.1",
    "react-textarea-autosize": "^8.5.8",
-    "react-tooltip": "5.8.3",
    "react-window": "^1.8.10",
-    "react-window-infinite-loader": "^1.0.9",
    "reactflow": "^11.11.3",
-    "recordrtc": "^5.6.2",
    "rehype-katex": "^7.0.1",
    "rehype-raw": "^7.0.0",
    "remark-breaks": "^4.0.0",
@@ -138,9 +128,7 @@
    "remark-math": "^6.0.0",
    "scheduler": "^0.26.0",
    "semver": "^7.6.3",
-    "server-only": "^0.0.1",
    "sharp": "^0.33.2",
-    "shave": "^5.0.4",
    "sortablejs": "^1.15.0",
    "swr": "^2.3.0",
    "tailwind-merge": "^2.5.4",
@@ -153,13 +141,8 @@
  },
  "devDependencies": {
    "@antfu/eslint-config": "^5.0.0",
-    "@babel/core": "^7.28.3",
-    "@babel/preset-env": "^7.28.3",
    "@chromatic-com/storybook": "^3.1.0",
    "@eslint-react/eslint-plugin": "^1.15.0",
-    "@eslint/eslintrc": "^3.1.0",
-    "@eslint/js": "^9.36.0",
-    "@faker-js/faker": "^9.0.3",
    "@happy-dom/jest-environment": "^17.4.4",
    "@mdx-js/loader": "^3.1.0",
    "@mdx-js/react": "^3.1.0",
@@ -172,14 +155,13 @@
    "@storybook/addon-links": "8.5.0",
    "@storybook/addon-onboarding": "8.5.0",
    "@storybook/addon-themes": "8.5.0",
-    "@storybook/blocks": "8.5.0",
    "@storybook/nextjs": "8.5.0",
    "@storybook/react": "8.5.0",
    "@storybook/test": "8.5.0",
    "@testing-library/dom": "^10.4.0",
    "@testing-library/jest-dom": "^6.8.0",
    "@testing-library/react": "^16.0.1",
-    "@types/crypto-js": "^4.2.2",
+    "@babel/core": "^7.28.3",
    "@types/dagre": "^0.7.52",
    "@types/jest": "^29.5.13",
    "@types/js-cookie": "^3.0.6",
@@ -192,18 +174,14 @@
    "@types/react-slider": "^1.3.6",
    "@types/react-syntax-highlighter": "^15.5.13",
    "@types/react-window": "^1.8.8",
-    "@types/react-window-infinite-loader": "^1.0.9",
-    "@types/recordrtc": "^5.6.14",
    "@types/semver": "^7.5.8",
    "@types/sortablejs": "^1.15.1",
    "@types/uuid": "^10.0.0",
    "autoprefixer": "^10.4.20",
-    "babel-loader": "^10.0.0",
    "bing-translate-api": "^4.0.2",
    "code-inspector-plugin": "1.2.9",
    "cross-env": "^7.0.3",
    "eslint": "^9.35.0",
-    "eslint-config-next": "15.5.0",
    "eslint-plugin-oxlint": "^1.6.0",
    "eslint-plugin-react-hooks": "^5.1.0",
    "eslint-plugin-react-refresh": "^0.4.19",
@@ -213,6 +191,7 @@
    "globals": "^15.11.0",
    "husky": "^9.1.6",
    "jest": "^29.7.0",
+    "knip": "^5.64.1",
    "lint-staged": "^15.2.10",
    "lodash": "^4.17.21",
    "magicast": "^0.3.4",
@@ -220,10 +199,9 @@
    "sass": "^1.92.1",
    "storybook": "8.5.0",
    "tailwindcss": "^3.4.14",
-    "ts-node": "^10.9.2",
    "typescript": "^5.8.3",
-    "typescript-eslint": "^8.38.0",
-    "uglify-js": "^3.19.3"
+    "uglify-js": "^3.19.3",
+    "babel-loader": "^9.2.1"
  },
  "resolutions": {
    "@types/react": "19.1.11",
--- a/web/pnpm-lock.yaml
+++ b/web/pnpm-lock.yaml