From c2473d85dcc5bcb4660d17860d109e98c0c1fabf Mon Sep 17 00:00:00 2001
From: FFXN <31929997+FFXN@users.noreply.github.com>
Date: Thu, 29 Jan 2026 13:47:35 +0800
Subject: [PATCH 01/15] feat: Add summary index for knowledge. (#31625)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com>
Co-authored-by: zxhlyh <jasonapring2015@outlook.com>
Co-authored-by: Yansong Zhang <916125788@qq.com>
Co-authored-by: hj24 <mambahj24@gmail.com>
Co-authored-by: CodingOnStar <hanxujiang@dify.ai>
Co-authored-by: CodingOnStar <hanxujiang@dify.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 api/.importlinter                             |    3 +
 api/controllers/console/datasets/datasets.py  |   10 +-
 .../console/datasets/datasets_document.py     |  149 ++
 .../console/datasets/datasets_segments.py     |   37 +-
 .../console/datasets/hit_testing.py           |   43 +-
 .../service_api/dataset/dataset.py            |    2 +
 .../service_api/dataset/document.py           |   21 +
 .../base_app_generate_response_converter.py   |    1 +
 api/core/entities/knowledge_entities.py       |    1 +
 api/core/indexing_runner.py                   |   10 +
 api/core/llm_generator/prompts.py             |   17 +
 api/core/rag/datasource/retrieval_service.py  |  140 +-
 api/core/rag/embedding/retrieval.py           |    1 +
 api/core/rag/entities/citation_metadata.py    |    1 +
 .../index_processor/index_processor_base.py   |   12 +
 .../processor/paragraph_index_processor.py    |  369 ++++-
 .../processor/parent_child_index_processor.py |  118 ++
 .../processor/qa_index_processor.py           |   41 +-
 api/core/rag/retrieval/dataset_retrieval.py   |   29 +-
 .../dataset_retriever_tool.py                 |   29 +-
 .../nodes/knowledge_index/entities.py         |    2 +
 .../knowledge_index/knowledge_index_node.py   |  328 +++-
 .../knowledge_retrieval_node.py               |    3 +
 api/core/workflow/nodes/llm/node.py           |    3 +
 api/extensions/ext_celery.py                  |    2 +
 api/fields/dataset_fields.py                  |    9 +
 api/fields/document_fields.py                 |    9 +
 api/fields/hit_testing_fields.py              |    1 +
 api/fields/message_fields.py                  |    1 +
 api/fields/segment_fields.py                  |    1 +
 ...-788d3099ae3a_add_summary_index_feature.py |  107 ++
 api/models/dataset.py                         |   35 +
 api/services/dataset_service.py               |  272 ++++
 .../knowledge_entities/knowledge_entities.py  |    2 +
 .../rag_pipeline_entities.py                  |    2 +
 .../rag_pipeline/rag_pipeline_dsl_service.py  |    6 +
 api/services/summary_index_service.py         | 1432 +++++++++++++++++
 api/tasks/add_document_to_index_task.py       |   13 +
 api/tasks/batch_clean_document_task.py        |    4 +-
 api/tasks/clean_document_task.py              |    4 +-
 api/tasks/clean_notion_document_task.py       |    4 +-
 api/tasks/delete_segment_from_index_task.py   |    2 +
 api/tasks/disable_segment_from_index_task.py  |   12 +
 api/tasks/disable_segments_from_index_task.py |   15 +
 api/tasks/document_indexing_task.py           |   73 +
 api/tasks/enable_segment_to_index_task.py     |   11 +
 api/tasks/enable_segments_to_index_task.py    |   12 +
 api/tasks/generate_summary_index_task.py      |  119 ++
 api/tasks/regenerate_summary_index_task.py    |  315 ++++
 api/tasks/remove_document_from_index_task.py  |   15 +
 .../test_dataset_service_update_dataset.py    |    9 +
 51 files changed, 3797 insertions(+), 60 deletions(-)
 create mode 100644 api/migrations/versions/2026_01_27_1815-788d3099ae3a_add_summary_index_feature.py
 create mode 100644 api/services/summary_index_service.py
 create mode 100644 api/tasks/generate_summary_index_task.py
 create mode 100644 api/tasks/regenerate_summary_index_task.py

diff --git a/api/.importlinter b/api/.importlinter
index 2b4a3a5bd6..ff0577222e 100644
--- a/api/.importlinter
+++ b/api/.importlinter
@@ -227,6 +227,9 @@ ignore_imports =
     core.workflow.nodes.knowledge_index.entities -> core.rag.retrieval.retrieval_methods
     core.workflow.nodes.knowledge_index.knowledge_index_node -> core.rag.retrieval.retrieval_methods
     core.workflow.nodes.knowledge_index.knowledge_index_node -> models.dataset
+    core.workflow.nodes.knowledge_index.knowledge_index_node -> services.summary_index_service
+    core.workflow.nodes.knowledge_index.knowledge_index_node -> tasks.generate_summary_index_task
+    core.workflow.nodes.knowledge_index.knowledge_index_node -> core.rag.index_processor.processor.paragraph_index_processor
     core.workflow.nodes.knowledge_retrieval.knowledge_retrieval_node -> core.rag.retrieval.retrieval_methods
     core.workflow.nodes.llm.node -> models.dataset
     core.workflow.nodes.agent.agent_node -> core.tools.utils.message_transformer
diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py
index 8fbbc51e21..30e4ed1119 100644
--- a/api/controllers/console/datasets/datasets.py
+++ b/api/controllers/console/datasets/datasets.py
@@ -148,6 +148,7 @@ class DatasetUpdatePayload(BaseModel):
     embedding_model: str | None = None
     embedding_model_provider: str | None = None
     retrieval_model: dict[str, Any] | None = None
+    summary_index_setting: dict[str, Any] | None = None
     partial_member_list: list[dict[str, str]] | None = None
     external_retrieval_model: dict[str, Any] | None = None
     external_knowledge_id: str | None = None
@@ -288,7 +289,14 @@ class DatasetListApi(Resource):
     @enterprise_license_required
     def get(self):
         current_user, current_tenant_id = current_account_with_tenant()
-        query = ConsoleDatasetListQuery.model_validate(request.args.to_dict())
+        # Convert query parameters to dict, handling list parameters correctly
+        query_params: dict[str, str | list[str]] = dict(request.args.to_dict())
+        # Handle ids and tag_ids as lists (Flask request.args.getlist returns list even for single value)
+        if "ids" in request.args:
+            query_params["ids"] = request.args.getlist("ids")
+        if "tag_ids" in request.args:
+            query_params["tag_ids"] = request.args.getlist("tag_ids")
+        query = ConsoleDatasetListQuery.model_validate(query_params)
         # provider = request.args.get("provider", default="vendor")
         if query.ids:
             datasets, total = DatasetService.get_datasets_by_ids(query.ids, current_tenant_id)
diff --git a/api/controllers/console/datasets/datasets_document.py b/api/controllers/console/datasets/datasets_document.py
index 57fb9abf29..6e3c0db8a3 100644
--- a/api/controllers/console/datasets/datasets_document.py
+++ b/api/controllers/console/datasets/datasets_document.py
@@ -45,6 +45,7 @@ from models.dataset import DocumentPipelineExecutionLog
 from services.dataset_service import DatasetService, DocumentService
 from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig, ProcessRule, RetrievalModel
 from services.file_service import FileService
+from tasks.generate_summary_index_task import generate_summary_index_task
 
 from ..app.error import (
     ProviderModelCurrentlyNotSupportError,
@@ -103,6 +104,10 @@ class DocumentRenamePayload(BaseModel):
     name: str
 
 
+class GenerateSummaryPayload(BaseModel):
+    document_list: list[str]
+
+
 class DocumentBatchDownloadZipPayload(BaseModel):
     """Request payload for bulk downloading documents as a zip archive."""
 
@@ -125,6 +130,7 @@ register_schema_models(
     RetrievalModel,
     DocumentRetryPayload,
     DocumentRenamePayload,
+    GenerateSummaryPayload,
     DocumentBatchDownloadZipPayload,
 )
 
@@ -312,6 +318,13 @@ class DatasetDocumentListApi(Resource):
 
         paginated_documents = db.paginate(select=query, page=page, per_page=limit, max_per_page=100, error_out=False)
         documents = paginated_documents.items
+
+        DocumentService.enrich_documents_with_summary_index_status(
+            documents=documents,
+            dataset=dataset,
+            tenant_id=current_tenant_id,
+        )
+
         if fetch:
             for document in documents:
                 completed_segments = (
@@ -797,6 +810,7 @@ class DocumentApi(DocumentResource):
                 "display_status": document.display_status,
                 "doc_form": document.doc_form,
                 "doc_language": document.doc_language,
+                "need_summary": document.need_summary if document.need_summary is not None else False,
             }
         else:
             dataset_process_rules = DatasetService.get_process_rules(dataset_id)
@@ -832,6 +846,7 @@ class DocumentApi(DocumentResource):
                 "display_status": document.display_status,
                 "doc_form": document.doc_form,
                 "doc_language": document.doc_language,
+                "need_summary": document.need_summary if document.need_summary is not None else False,
             }
 
         return response, 200
@@ -1255,3 +1270,137 @@ class DocumentPipelineExecutionLogApi(DocumentResource):
             "input_data": log.input_data,
             "datasource_node_id": log.datasource_node_id,
         }, 200
+
+
+@console_ns.route("/datasets/<uuid:dataset_id>/documents/generate-summary")
+class DocumentGenerateSummaryApi(Resource):
+    @console_ns.doc("generate_summary_for_documents")
+    @console_ns.doc(description="Generate summary index for documents")
+    @console_ns.doc(params={"dataset_id": "Dataset ID"})
+    @console_ns.expect(console_ns.models[GenerateSummaryPayload.__name__])
+    @console_ns.response(200, "Summary generation started successfully")
+    @console_ns.response(400, "Invalid request or dataset configuration")
+    @console_ns.response(403, "Permission denied")
+    @console_ns.response(404, "Dataset not found")
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @cloud_edition_billing_rate_limit_check("knowledge")
+    def post(self, dataset_id):
+        """
+        Generate summary index for specified documents.
+
+        This endpoint checks if the dataset configuration supports summary generation
+        (indexing_technique must be 'high_quality' and summary_index_setting.enable must be true),
+        then asynchronously generates summary indexes for the provided documents.
+        """
+        current_user, _ = current_account_with_tenant()
+        dataset_id = str(dataset_id)
+
+        # Get dataset
+        dataset = DatasetService.get_dataset(dataset_id)
+        if not dataset:
+            raise NotFound("Dataset not found.")
+
+        # Check permissions
+        if not current_user.is_dataset_editor:
+            raise Forbidden()
+
+        try:
+            DatasetService.check_dataset_permission(dataset, current_user)
+        except services.errors.account.NoPermissionError as e:
+            raise Forbidden(str(e))
+
+        # Validate request payload
+        payload = GenerateSummaryPayload.model_validate(console_ns.payload or {})
+        document_list = payload.document_list
+
+        if not document_list:
+            from werkzeug.exceptions import BadRequest
+
+            raise BadRequest("document_list cannot be empty.")
+
+        # Check if dataset configuration supports summary generation
+        if dataset.indexing_technique != "high_quality":
+            raise ValueError(
+                f"Summary generation is only available for 'high_quality' indexing technique. "
+                f"Current indexing technique: {dataset.indexing_technique}"
+            )
+
+        summary_index_setting = dataset.summary_index_setting
+        if not summary_index_setting or not summary_index_setting.get("enable"):
+            raise ValueError("Summary index is not enabled for this dataset. Please enable it in the dataset settings.")
+
+        # Verify all documents exist and belong to the dataset
+        documents = DocumentService.get_documents_by_ids(dataset_id, document_list)
+
+        if len(documents) != len(document_list):
+            found_ids = {doc.id for doc in documents}
+            missing_ids = set(document_list) - found_ids
+            raise NotFound(f"Some documents not found: {list(missing_ids)}")
+
+        # Dispatch async tasks for each document
+        for document in documents:
+            # Skip qa_model documents as they don't generate summaries
+            if document.doc_form == "qa_model":
+                logger.info("Skipping summary generation for qa_model document %s", document.id)
+                continue
+
+            # Dispatch async task
+            generate_summary_index_task.delay(dataset_id, document.id)
+            logger.info(
+                "Dispatched summary generation task for document %s in dataset %s",
+                document.id,
+                dataset_id,
+            )
+
+        return {"result": "success"}, 200
+
+
+@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/summary-status")
+class DocumentSummaryStatusApi(DocumentResource):
+    @console_ns.doc("get_document_summary_status")
+    @console_ns.doc(description="Get summary index generation status for a document")
+    @console_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
+    @console_ns.response(200, "Summary status retrieved successfully")
+    @console_ns.response(404, "Document not found")
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self, dataset_id, document_id):
+        """
+        Get summary index generation status for a document.
+
+        Returns:
+        - total_segments: Total number of segments in the document
+        - summary_status: Dictionary with status counts
+          - completed: Number of summaries completed
+          - generating: Number of summaries being generated
+          - error: Number of summaries with errors
+          - not_started: Number of segments without summary records
+        - summaries: List of summary records with status and content preview
+        """
+        current_user, _ = current_account_with_tenant()
+        dataset_id = str(dataset_id)
+        document_id = str(document_id)
+
+        # Get dataset
+        dataset = DatasetService.get_dataset(dataset_id)
+        if not dataset:
+            raise NotFound("Dataset not found.")
+
+        # Check permissions
+        try:
+            DatasetService.check_dataset_permission(dataset, current_user)
+        except services.errors.account.NoPermissionError as e:
+            raise Forbidden(str(e))
+
+        # Get summary status detail from service
+        from services.summary_index_service import SummaryIndexService
+
+        result = SummaryIndexService.get_document_summary_status_detail(
+            document_id=document_id,
+            dataset_id=dataset_id,
+        )
+
+        return result, 200
diff --git a/api/controllers/console/datasets/datasets_segments.py b/api/controllers/console/datasets/datasets_segments.py
index 08e1ddd3e0..23a668112d 100644
--- a/api/controllers/console/datasets/datasets_segments.py
+++ b/api/controllers/console/datasets/datasets_segments.py
@@ -41,6 +41,17 @@ from services.errors.chunk import ChildChunkIndexingError as ChildChunkIndexingS
 from tasks.batch_create_segment_to_index_task import batch_create_segment_to_index_task
 
 
+def _get_segment_with_summary(segment, dataset_id):
+    """Helper function to marshal segment and add summary information."""
+    from services.summary_index_service import SummaryIndexService
+
+    segment_dict = dict(marshal(segment, segment_fields))
+    # Query summary for this segment (only enabled summaries)
+    summary = SummaryIndexService.get_segment_summary(segment_id=segment.id, dataset_id=dataset_id)
+    segment_dict["summary"] = summary.summary_content if summary else None
+    return segment_dict
+
+
 class SegmentListQuery(BaseModel):
     limit: int = Field(default=20, ge=1, le=100)
     status: list[str] = Field(default_factory=list)
@@ -63,6 +74,7 @@ class SegmentUpdatePayload(BaseModel):
     keywords: list[str] | None = None
     regenerate_child_chunks: bool = False
     attachment_ids: list[str] | None = None
+    summary: str | None = None  # Summary content for summary index
 
 
 class BatchImportPayload(BaseModel):
@@ -181,8 +193,25 @@ class DatasetDocumentSegmentListApi(Resource):
 
         segments = db.paginate(select=query, page=page, per_page=limit, max_per_page=100, error_out=False)
 
+        # Query summaries for all segments in this page (batch query for efficiency)
+        segment_ids = [segment.id for segment in segments.items]
+        summaries = {}
+        if segment_ids:
+            from services.summary_index_service import SummaryIndexService
+
+            summary_records = SummaryIndexService.get_segments_summaries(segment_ids=segment_ids, dataset_id=dataset_id)
+            # Only include enabled summaries (already filtered by service)
+            summaries = {chunk_id: summary.summary_content for chunk_id, summary in summary_records.items()}
+
+        # Add summary to each segment
+        segments_with_summary = []
+        for segment in segments.items:
+            segment_dict = dict(marshal(segment, segment_fields))
+            segment_dict["summary"] = summaries.get(segment.id)
+            segments_with_summary.append(segment_dict)
+
         response = {
-            "data": marshal(segments.items, segment_fields),
+            "data": segments_with_summary,
             "limit": limit,
             "total": segments.total,
             "total_pages": segments.pages,
@@ -328,7 +357,7 @@ class DatasetDocumentSegmentAddApi(Resource):
         payload_dict = payload.model_dump(exclude_none=True)
         SegmentService.segment_create_args_validate(payload_dict, document)
         segment = SegmentService.create_segment(payload_dict, document, dataset)
-        return {"data": marshal(segment, segment_fields), "doc_form": document.doc_form}, 200
+        return {"data": _get_segment_with_summary(segment, dataset_id), "doc_form": document.doc_form}, 200
 
 
 @console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>")
@@ -390,10 +419,12 @@ class DatasetDocumentSegmentUpdateApi(Resource):
         payload = SegmentUpdatePayload.model_validate(console_ns.payload or {})
         payload_dict = payload.model_dump(exclude_none=True)
         SegmentService.segment_create_args_validate(payload_dict, document)
+
+        # Update segment (summary update with change detection is handled in SegmentService.update_segment)
         segment = SegmentService.update_segment(
             SegmentUpdateArgs.model_validate(payload.model_dump(exclude_none=True)), segment, document, dataset
         )
-        return {"data": marshal(segment, segment_fields), "doc_form": document.doc_form}, 200
+        return {"data": _get_segment_with_summary(segment, dataset_id), "doc_form": document.doc_form}, 200
 
     @setup_required
     @login_required
diff --git a/api/controllers/console/datasets/hit_testing.py b/api/controllers/console/datasets/hit_testing.py
index 932cb4fcce..e62be13c2f 100644
--- a/api/controllers/console/datasets/hit_testing.py
+++ b/api/controllers/console/datasets/hit_testing.py
@@ -1,6 +1,13 @@
-from flask_restx import Resource
+from flask_restx import Resource, fields
 
 from controllers.common.schema import register_schema_model
+from fields.hit_testing_fields import (
+    child_chunk_fields,
+    document_fields,
+    files_fields,
+    hit_testing_record_fields,
+    segment_fields,
+)
 from libs.login import login_required
 
 from .. import console_ns
@@ -14,13 +21,45 @@ from ..wraps import (
 register_schema_model(console_ns, HitTestingPayload)
 
 
+def _get_or_create_model(model_name: str, field_def):
+    """Get or create a flask_restx model to avoid dict type issues in Swagger."""
+    existing = console_ns.models.get(model_name)
+    if existing is None:
+        existing = console_ns.model(model_name, field_def)
+    return existing
+
+
+# Register models for flask_restx to avoid dict type issues in Swagger
+document_model = _get_or_create_model("HitTestingDocument", document_fields)
+
+segment_fields_copy = segment_fields.copy()
+segment_fields_copy["document"] = fields.Nested(document_model)
+segment_model = _get_or_create_model("HitTestingSegment", segment_fields_copy)
+
+child_chunk_model = _get_or_create_model("HitTestingChildChunk", child_chunk_fields)
+files_model = _get_or_create_model("HitTestingFile", files_fields)
+
+hit_testing_record_fields_copy = hit_testing_record_fields.copy()
+hit_testing_record_fields_copy["segment"] = fields.Nested(segment_model)
+hit_testing_record_fields_copy["child_chunks"] = fields.List(fields.Nested(child_chunk_model))
+hit_testing_record_fields_copy["files"] = fields.List(fields.Nested(files_model))
+hit_testing_record_model = _get_or_create_model("HitTestingRecord", hit_testing_record_fields_copy)
+
+# Response model for hit testing API
+hit_testing_response_fields = {
+    "query": fields.String,
+    "records": fields.List(fields.Nested(hit_testing_record_model)),
+}
+hit_testing_response_model = _get_or_create_model("HitTestingResponse", hit_testing_response_fields)
+
+
 @console_ns.route("/datasets/<uuid:dataset_id>/hit-testing")
 class HitTestingApi(Resource, DatasetsHitTestingBase):
     @console_ns.doc("test_dataset_retrieval")
     @console_ns.doc(description="Test dataset knowledge retrieval")
     @console_ns.doc(params={"dataset_id": "Dataset ID"})
     @console_ns.expect(console_ns.models[HitTestingPayload.__name__])
-    @console_ns.response(200, "Hit testing completed successfully")
+    @console_ns.response(200, "Hit testing completed successfully", model=hit_testing_response_model)
     @console_ns.response(404, "Dataset not found")
     @console_ns.response(400, "Invalid parameters")
     @setup_required
diff --git a/api/controllers/service_api/dataset/dataset.py b/api/controllers/service_api/dataset/dataset.py
index 28864a140a..c11f64585a 100644
--- a/api/controllers/service_api/dataset/dataset.py
+++ b/api/controllers/service_api/dataset/dataset.py
@@ -46,6 +46,7 @@ class DatasetCreatePayload(BaseModel):
     retrieval_model: RetrievalModel | None = None
     embedding_model: str | None = None
     embedding_model_provider: str | None = None
+    summary_index_setting: dict | None = None
 
 
 class DatasetUpdatePayload(BaseModel):
@@ -217,6 +218,7 @@ class DatasetListApi(DatasetApiResource):
                 embedding_model_provider=payload.embedding_model_provider,
                 embedding_model_name=payload.embedding_model,
                 retrieval_model=payload.retrieval_model,
+                summary_index_setting=payload.summary_index_setting,
             )
         except services.errors.dataset.DatasetNameDuplicateError:
             raise DatasetNameDuplicateError()
diff --git a/api/controllers/service_api/dataset/document.py b/api/controllers/service_api/dataset/document.py
index c85c1cf81e..a01524f1bc 100644
--- a/api/controllers/service_api/dataset/document.py
+++ b/api/controllers/service_api/dataset/document.py
@@ -45,6 +45,7 @@ from services.entities.knowledge_entities.knowledge_entities import (
     Segmentation,
 )
 from services.file_service import FileService
+from services.summary_index_service import SummaryIndexService
 
 
 class DocumentTextCreatePayload(BaseModel):
@@ -508,6 +509,12 @@ class DocumentListApi(DatasetApiResource):
         )
         documents = paginated_documents.items
 
+        DocumentService.enrich_documents_with_summary_index_status(
+            documents=documents,
+            dataset=dataset,
+            tenant_id=tenant_id,
+        )
+
         response = {
             "data": marshal(documents, document_fields),
             "has_more": len(documents) == query_params.limit,
@@ -612,6 +619,16 @@ class DocumentApi(DatasetApiResource):
         if metadata not in self.METADATA_CHOICES:
             raise InvalidMetadataError(f"Invalid metadata value: {metadata}")
 
+        # Calculate summary_index_status if needed
+        summary_index_status = None
+        has_summary_index = dataset.summary_index_setting and dataset.summary_index_setting.get("enable") is True
+        if has_summary_index and document.need_summary is True:
+            summary_index_status = SummaryIndexService.get_document_summary_index_status(
+                document_id=document_id,
+                dataset_id=dataset_id,
+                tenant_id=tenant_id,
+            )
+
         if metadata == "only":
             response = {"id": document.id, "doc_type": document.doc_type, "doc_metadata": document.doc_metadata_details}
         elif metadata == "without":
@@ -646,6 +663,8 @@ class DocumentApi(DatasetApiResource):
                 "display_status": document.display_status,
                 "doc_form": document.doc_form,
                 "doc_language": document.doc_language,
+                "summary_index_status": summary_index_status,
+                "need_summary": document.need_summary if document.need_summary is not None else False,
             }
         else:
             dataset_process_rules = DatasetService.get_process_rules(dataset_id)
@@ -681,6 +700,8 @@ class DocumentApi(DatasetApiResource):
                 "display_status": document.display_status,
                 "doc_form": document.doc_form,
                 "doc_language": document.doc_language,
+                "summary_index_status": summary_index_status,
+                "need_summary": document.need_summary if document.need_summary is not None else False,
             }
 
         return response
diff --git a/api/core/app/apps/base_app_generate_response_converter.py b/api/core/app/apps/base_app_generate_response_converter.py
index 74c6d2eca6..d1e2f16b6f 100644
--- a/api/core/app/apps/base_app_generate_response_converter.py
+++ b/api/core/app/apps/base_app_generate_response_converter.py
@@ -79,6 +79,7 @@ class AppGenerateResponseConverter(ABC):
                         "document_name": resource["document_name"],
                         "score": resource["score"],
                         "content": resource["content"],
+                        "summary": resource.get("summary"),
                     }
                 )
             metadata["retriever_resources"] = updated_resources
diff --git a/api/core/entities/knowledge_entities.py b/api/core/entities/knowledge_entities.py
index d4093b5245..b1ba3c3e2a 100644
--- a/api/core/entities/knowledge_entities.py
+++ b/api/core/entities/knowledge_entities.py
@@ -3,6 +3,7 @@ from pydantic import BaseModel, Field, field_validator
 
 class PreviewDetail(BaseModel):
     content: str
+    summary: str | None = None
     child_chunks: list[str] | None = None
 
 
diff --git a/api/core/indexing_runner.py b/api/core/indexing_runner.py
index f1b50f360b..e172e88298 100644
--- a/api/core/indexing_runner.py
+++ b/api/core/indexing_runner.py
@@ -311,14 +311,18 @@ class IndexingRunner:
         qa_preview_texts: list[QAPreviewDetail] = []
 
         total_segments = 0
+        # doc_form represents the segmentation method (general, parent-child, QA)
         index_type = doc_form
         index_processor = IndexProcessorFactory(index_type).init_index_processor()
+        # one extract_setting is one source document
         for extract_setting in extract_settings:
             # extract
             processing_rule = DatasetProcessRule(
                 mode=tmp_processing_rule["mode"], rules=json.dumps(tmp_processing_rule["rules"])
             )
+            # Extract document content
             text_docs = index_processor.extract(extract_setting, process_rule_mode=tmp_processing_rule["mode"])
+            # Cleaning and segmentation
             documents = index_processor.transform(
                 text_docs,
                 current_user=None,
@@ -361,6 +365,12 @@ class IndexingRunner:
 
         if doc_form and doc_form == "qa_model":
             return IndexingEstimate(total_segments=total_segments * 20, qa_preview=qa_preview_texts, preview=[])
+
+        # Generate summary preview
+        summary_index_setting = tmp_processing_rule.get("summary_index_setting")
+        if summary_index_setting and summary_index_setting.get("enable") and preview_texts:
+            preview_texts = index_processor.generate_summary_preview(tenant_id, preview_texts, summary_index_setting)
+
         return IndexingEstimate(total_segments=total_segments, preview=preview_texts)
 
     def _extract(
diff --git a/api/core/llm_generator/prompts.py b/api/core/llm_generator/prompts.py
index ec2b7f2d44..d46cf049dd 100644
--- a/api/core/llm_generator/prompts.py
+++ b/api/core/llm_generator/prompts.py
@@ -434,3 +434,20 @@ INSTRUCTION_GENERATE_TEMPLATE_PROMPT = """The output of this prompt is not as ex
 You should edit the prompt according to the IDEAL OUTPUT."""
 
 INSTRUCTION_GENERATE_TEMPLATE_CODE = """Please fix the errors in the {{#error_message#}}."""
+
+DEFAULT_GENERATOR_SUMMARY_PROMPT = (
+    """Summarize the following content. Extract only the key information and main points. """
+    """Remove redundant details.
+
+Requirements:
+1. Write a concise summary in plain text
+2. Use the same language as the input content
+3. Focus on important facts, concepts, and details
+4. If images are included, describe their key information
+5. Do not use words like "好的", "ok", "I understand", "This text discusses", "The content mentions"
+6. Write directly without extra words
+
+Output only the summary text. Start summarizing now:
+
+"""
+)
diff --git a/api/core/rag/datasource/retrieval_service.py b/api/core/rag/datasource/retrieval_service.py
index 8ec1ce6242..91c16ce079 100644
--- a/api/core/rag/datasource/retrieval_service.py
+++ b/api/core/rag/datasource/retrieval_service.py
@@ -24,7 +24,13 @@ from core.rag.rerank.rerank_type import RerankMode
 from core.rag.retrieval.retrieval_methods import RetrievalMethod
 from core.tools.signature import sign_upload_file
 from extensions.ext_database import db
-from models.dataset import ChildChunk, Dataset, DocumentSegment, SegmentAttachmentBinding
+from models.dataset import (
+    ChildChunk,
+    Dataset,
+    DocumentSegment,
+    DocumentSegmentSummary,
+    SegmentAttachmentBinding,
+)
 from models.dataset import Document as DatasetDocument
 from models.model import UploadFile
 from services.external_knowledge_service import ExternalDatasetService
@@ -389,15 +395,15 @@ class RetrievalService:
                 .all()
             }
 
-            records = []
-            include_segment_ids = set()
-            segment_child_map = {}
-
             valid_dataset_documents = {}
             image_doc_ids: list[Any] = []
             child_index_node_ids = []
             index_node_ids = []
             doc_to_document_map = {}
+            summary_segment_ids = set()  # Track segments retrieved via summary
+            summary_score_map: dict[str, float] = {}  # Map original_chunk_id to summary score
+
+            # First pass: collect all document IDs and identify summary documents
             for document in documents:
                 document_id = document.metadata.get("document_id")
                 if document_id not in dataset_documents:
@@ -408,16 +414,39 @@ class RetrievalService:
                     continue
                 valid_dataset_documents[document_id] = dataset_document
 
+                doc_id = document.metadata.get("doc_id") or ""
+                doc_to_document_map[doc_id] = document
+
+                # Check if this is a summary document
+                is_summary = document.metadata.get("is_summary", False)
+                if is_summary:
+                    # For summary documents, find the original chunk via original_chunk_id
+                    original_chunk_id = document.metadata.get("original_chunk_id")
+                    if original_chunk_id:
+                        summary_segment_ids.add(original_chunk_id)
+                        # Save summary's score for later use
+                        summary_score = document.metadata.get("score")
+                        if summary_score is not None:
+                            try:
+                                summary_score_float = float(summary_score)
+                                # If the same segment has multiple summary hits, take the highest score
+                                if original_chunk_id not in summary_score_map:
+                                    summary_score_map[original_chunk_id] = summary_score_float
+                                else:
+                                    summary_score_map[original_chunk_id] = max(
+                                        summary_score_map[original_chunk_id], summary_score_float
+                                    )
+                            except (ValueError, TypeError):
+                                # Skip invalid score values
+                                pass
+                    continue  # Skip adding to other lists for summary documents
+
                 if dataset_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX:
-                    doc_id = document.metadata.get("doc_id") or ""
-                    doc_to_document_map[doc_id] = document
                     if document.metadata.get("doc_type") == DocType.IMAGE:
                         image_doc_ids.append(doc_id)
                     else:
                         child_index_node_ids.append(doc_id)
                 else:
-                    doc_id = document.metadata.get("doc_id") or ""
-                    doc_to_document_map[doc_id] = document
                     if document.metadata.get("doc_type") == DocType.IMAGE:
                         image_doc_ids.append(doc_id)
                     else:
@@ -433,6 +462,7 @@ class RetrievalService:
             attachment_map: dict[str, list[dict[str, Any]]] = {}
             child_chunk_map: dict[str, list[ChildChunk]] = {}
             doc_segment_map: dict[str, list[str]] = {}
+            segment_summary_map: dict[str, str] = {}  # Map segment_id to summary content
 
             with session_factory.create_session() as session:
                 attachments = cls.get_segment_attachment_infos(image_doc_ids, session)
@@ -447,6 +477,7 @@ class RetrievalService:
                         doc_segment_map[attachment["segment_id"]].append(attachment["attachment_id"])
                     else:
                         doc_segment_map[attachment["segment_id"]] = [attachment["attachment_id"]]
+
                 child_chunk_stmt = select(ChildChunk).where(ChildChunk.index_node_id.in_(child_index_node_ids))
                 child_index_nodes = session.execute(child_chunk_stmt).scalars().all()
 
@@ -470,6 +501,7 @@ class RetrievalService:
                     index_node_segments = session.execute(document_segment_stmt).scalars().all()  # type: ignore
                     for index_node_segment in index_node_segments:
                         doc_segment_map[index_node_segment.id] = [index_node_segment.index_node_id]
+
                 if segment_ids:
                     document_segment_stmt = select(DocumentSegment).where(
                         DocumentSegment.enabled == True,
@@ -481,6 +513,40 @@ class RetrievalService:
                 if index_node_segments:
                     segments.extend(index_node_segments)
 
+                # Handle summary documents: query segments by original_chunk_id
+                if summary_segment_ids:
+                    summary_segment_ids_list = list(summary_segment_ids)
+                    summary_segment_stmt = select(DocumentSegment).where(
+                        DocumentSegment.enabled == True,
+                        DocumentSegment.status == "completed",
+                        DocumentSegment.id.in_(summary_segment_ids_list),
+                    )
+                    summary_segments = session.execute(summary_segment_stmt).scalars().all()  # type: ignore
+                    segments.extend(summary_segments)
+                    # Add summary segment IDs to segment_ids for summary query
+                    for seg in summary_segments:
+                        if seg.id not in segment_ids:
+                            segment_ids.append(seg.id)
+
+                # Batch query summaries for segments retrieved via summary (only enabled summaries)
+                if summary_segment_ids:
+                    summaries = (
+                        session.query(DocumentSegmentSummary)
+                        .filter(
+                            DocumentSegmentSummary.chunk_id.in_(list(summary_segment_ids)),
+                            DocumentSegmentSummary.status == "completed",
+                            DocumentSegmentSummary.enabled == True,  # Only retrieve enabled summaries
+                        )
+                        .all()
+                    )
+                    for summary in summaries:
+                        if summary.summary_content:
+                            segment_summary_map[summary.chunk_id] = summary.summary_content
+
+            include_segment_ids = set()
+            segment_child_map: dict[str, dict[str, Any]] = {}
+            records: list[dict[str, Any]] = []
+
             for segment in segments:
                 child_chunks: list[ChildChunk] = child_chunk_map.get(segment.id, [])
                 attachment_infos: list[dict[str, Any]] = attachment_map.get(segment.id, [])
@@ -489,30 +555,44 @@ class RetrievalService:
                 if ds_dataset_document and ds_dataset_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX:
                     if segment.id not in include_segment_ids:
                         include_segment_ids.add(segment.id)
+                        # Check if this segment was retrieved via summary
+                        # Use summary score as base score if available, otherwise 0.0
+                        max_score = summary_score_map.get(segment.id, 0.0)
+
                         if child_chunks or attachment_infos:
                             child_chunk_details = []
-                            max_score = 0.0
                             for child_chunk in child_chunks:
-                                document = doc_to_document_map[child_chunk.index_node_id]
+                                child_document: Document | None = doc_to_document_map.get(child_chunk.index_node_id)
+                                if child_document:
+                                    child_score = child_document.metadata.get("score", 0.0)
+                                else:
+                                    child_score = 0.0
                                 child_chunk_detail = {
                                     "id": child_chunk.id,
                                     "content": child_chunk.content,
                                     "position": child_chunk.position,
-                                    "score": document.metadata.get("score", 0.0) if document else 0.0,
+                                    "score": child_score,
                                 }
                                 child_chunk_details.append(child_chunk_detail)
-                                max_score = max(max_score, document.metadata.get("score", 0.0) if document else 0.0)
+                                max_score = max(max_score, child_score)
                             for attachment_info in attachment_infos:
-                                file_document = doc_to_document_map[attachment_info["id"]]
-                                max_score = max(
-                                    max_score, file_document.metadata.get("score", 0.0) if file_document else 0.0
-                                )
+                                file_document = doc_to_document_map.get(attachment_info["id"])
+                                if file_document:
+                                    max_score = max(max_score, file_document.metadata.get("score", 0.0))
 
                             map_detail = {
                                 "max_score": max_score,
                                 "child_chunks": child_chunk_details,
                             }
                             segment_child_map[segment.id] = map_detail
+                        else:
+                            # No child chunks or attachments, use summary score if available
+                            summary_score = summary_score_map.get(segment.id)
+                            if summary_score is not None:
+                                segment_child_map[segment.id] = {
+                                    "max_score": summary_score,
+                                    "child_chunks": [],
+                                }
                         record: dict[str, Any] = {
                             "segment": segment,
                         }
@@ -520,14 +600,23 @@ class RetrievalService:
                 else:
                     if segment.id not in include_segment_ids:
                         include_segment_ids.add(segment.id)
-                        max_score = 0.0
-                        segment_document = doc_to_document_map.get(segment.index_node_id)
-                        if segment_document:
-                            max_score = max(max_score, segment_document.metadata.get("score", 0.0))
+
+                        # Check if this segment was retrieved via summary
+                        # Use summary score if available (summary retrieval takes priority)
+                        max_score = summary_score_map.get(segment.id, 0.0)
+
+                        # If not retrieved via summary, use original segment's score
+                        if segment.id not in summary_score_map:
+                            segment_document = doc_to_document_map.get(segment.index_node_id)
+                            if segment_document:
+                                max_score = max(max_score, segment_document.metadata.get("score", 0.0))
+
+                        # Also consider attachment scores
                         for attachment_info in attachment_infos:
                             file_doc = doc_to_document_map.get(attachment_info["id"])
                             if file_doc:
                                 max_score = max(max_score, file_doc.metadata.get("score", 0.0))
+
                         record = {
                             "segment": segment,
                             "score": max_score,
@@ -576,9 +665,16 @@ class RetrievalService:
                     else None
                 )
 
+                # Extract summary if this segment was retrieved via summary
+                summary_content = segment_summary_map.get(segment.id)
+
                 # Create RetrievalSegments object
                 retrieval_segment = RetrievalSegments(
-                    segment=segment, child_chunks=child_chunks_list, score=score, files=files
+                    segment=segment,
+                    child_chunks=child_chunks_list,
+                    score=score,
+                    files=files,
+                    summary=summary_content,
                 )
                 result.append(retrieval_segment)
 
diff --git a/api/core/rag/embedding/retrieval.py b/api/core/rag/embedding/retrieval.py
index b54a37b49e..f6834ab87b 100644
--- a/api/core/rag/embedding/retrieval.py
+++ b/api/core/rag/embedding/retrieval.py
@@ -20,3 +20,4 @@ class RetrievalSegments(BaseModel):
     child_chunks: list[RetrievalChildChunk] | None = None
     score: float | None = None
     files: list[dict[str, str | int]] | None = None
+    summary: str | None = None  # Summary content if retrieved via summary index
diff --git a/api/core/rag/entities/citation_metadata.py b/api/core/rag/entities/citation_metadata.py
index 9f66cd9a03..aec5c353f8 100644
--- a/api/core/rag/entities/citation_metadata.py
+++ b/api/core/rag/entities/citation_metadata.py
@@ -22,3 +22,4 @@ class RetrievalSourceMetadata(BaseModel):
     doc_metadata: dict[str, Any] | None = None
     title: str | None = None
     files: list[dict[str, Any]] | None = None
+    summary: str | None = None
diff --git a/api/core/rag/index_processor/index_processor_base.py b/api/core/rag/index_processor/index_processor_base.py
index e36b54eedd..151a3de7d9 100644
--- a/api/core/rag/index_processor/index_processor_base.py
+++ b/api/core/rag/index_processor/index_processor_base.py
@@ -13,6 +13,7 @@ from urllib.parse import unquote, urlparse
 import httpx
 
 from configs import dify_config
+from core.entities.knowledge_entities import PreviewDetail
 from core.helper import ssrf_proxy
 from core.rag.extractor.entity.extract_setting import ExtractSetting
 from core.rag.index_processor.constant.doc_type import DocType
@@ -45,6 +46,17 @@ class BaseIndexProcessor(ABC):
     def transform(self, documents: list[Document], current_user: Account | None = None, **kwargs) -> list[Document]:
         raise NotImplementedError
 
+    @abstractmethod
+    def generate_summary_preview(
+        self, tenant_id: str, preview_texts: list[PreviewDetail], summary_index_setting: dict
+    ) -> list[PreviewDetail]:
+        """
+        For each segment in preview_texts, generate a summary using LLM and attach it to the segment.
+        The summary can be stored in a new attribute, e.g., summary.
+        This method should be implemented by subclasses.
+        """
+        raise NotImplementedError
+
     @abstractmethod
     def load(
         self,
diff --git a/api/core/rag/index_processor/processor/paragraph_index_processor.py b/api/core/rag/index_processor/processor/paragraph_index_processor.py
index cf68cff7dc..ab91e29145 100644
--- a/api/core/rag/index_processor/processor/paragraph_index_processor.py
+++ b/api/core/rag/index_processor/processor/paragraph_index_processor.py
@@ -1,9 +1,27 @@
 """Paragraph index processor."""
 
+import logging
+import re
 import uuid
 from collections.abc import Mapping
-from typing import Any
+from typing import Any, cast
 
+logger = logging.getLogger(__name__)
+
+from core.entities.knowledge_entities import PreviewDetail
+from core.file import File, FileTransferMethod, FileType, file_manager
+from core.llm_generator.prompts import DEFAULT_GENERATOR_SUMMARY_PROMPT
+from core.model_manager import ModelInstance
+from core.model_runtime.entities.llm_entities import LLMResult, LLMUsage
+from core.model_runtime.entities.message_entities import (
+    ImagePromptMessageContent,
+    PromptMessage,
+    PromptMessageContentUnionTypes,
+    TextPromptMessageContent,
+    UserPromptMessage,
+)
+from core.model_runtime.entities.model_entities import ModelFeature, ModelType
+from core.provider_manager import ProviderManager
 from core.rag.cleaner.clean_processor import CleanProcessor
 from core.rag.datasource.keyword.keyword_factory import Keyword
 from core.rag.datasource.retrieval_service import RetrievalService
@@ -17,12 +35,17 @@ from core.rag.index_processor.index_processor_base import BaseIndexProcessor
 from core.rag.models.document import AttachmentDocument, Document, MultimodalGeneralStructureChunk
 from core.rag.retrieval.retrieval_methods import RetrievalMethod
 from core.tools.utils.text_processing_utils import remove_leading_symbols
+from core.workflow.nodes.llm import llm_utils
+from extensions.ext_database import db
+from factories.file_factory import build_from_mapping
 from libs import helper
+from models import UploadFile
 from models.account import Account
-from models.dataset import Dataset, DatasetProcessRule
+from models.dataset import Dataset, DatasetProcessRule, DocumentSegment, SegmentAttachmentBinding
 from models.dataset import Document as DatasetDocument
 from services.account_service import AccountService
 from services.entities.knowledge_entities.knowledge_entities import Rule
+from services.summary_index_service import SummaryIndexService
 
 
 class ParagraphIndexProcessor(BaseIndexProcessor):
@@ -108,6 +131,29 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
                 keyword.add_texts(documents)
 
     def clean(self, dataset: Dataset, node_ids: list[str] | None, with_keywords: bool = True, **kwargs):
+        # Note: Summary indexes are now disabled (not deleted) when segments are disabled.
+        # This method is called for actual deletion scenarios (e.g., when segment is deleted).
+        # For disable operations, disable_summaries_for_segments is called directly in the task.
+        # Only delete summaries if explicitly requested (e.g., when segment is actually deleted)
+        delete_summaries = kwargs.get("delete_summaries", False)
+        if delete_summaries:
+            if node_ids:
+                # Find segments by index_node_id
+                segments = (
+                    db.session.query(DocumentSegment)
+                    .filter(
+                        DocumentSegment.dataset_id == dataset.id,
+                        DocumentSegment.index_node_id.in_(node_ids),
+                    )
+                    .all()
+                )
+                segment_ids = [segment.id for segment in segments]
+                if segment_ids:
+                    SummaryIndexService.delete_summaries_for_segments(dataset, segment_ids)
+            else:
+                # Delete all summaries for the dataset
+                SummaryIndexService.delete_summaries_for_segments(dataset, None)
+
         if dataset.indexing_technique == "high_quality":
             vector = Vector(dataset)
             if node_ids:
@@ -227,3 +273,322 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
             }
         else:
             raise ValueError("Chunks is not a list")
+
+    def generate_summary_preview(
+        self, tenant_id: str, preview_texts: list[PreviewDetail], summary_index_setting: dict
+    ) -> list[PreviewDetail]:
+        """
+        For each segment, concurrently call generate_summary to generate a summary
+        and write it to the summary attribute of PreviewDetail.
+        In preview mode (indexing-estimate), if any summary generation fails, the method will raise an exception.
+        """
+        import concurrent.futures
+
+        from flask import current_app
+
+        # Capture Flask app context for worker threads
+        flask_app = None
+        try:
+            flask_app = current_app._get_current_object()  # type: ignore
+        except RuntimeError:
+            logger.warning("No Flask application context available, summary generation may fail")
+
+        def process(preview: PreviewDetail) -> None:
+            """Generate summary for a single preview item."""
+            if flask_app:
+                # Ensure Flask app context in worker thread
+                with flask_app.app_context():
+                    summary, _ = self.generate_summary(tenant_id, preview.content, summary_index_setting)
+                    preview.summary = summary
+            else:
+                # Fallback: try without app context (may fail)
+                summary, _ = self.generate_summary(tenant_id, preview.content, summary_index_setting)
+                preview.summary = summary
+
+        # Generate summaries concurrently using ThreadPoolExecutor
+        # Set a reasonable timeout to prevent hanging (60 seconds per chunk, max 5 minutes total)
+        timeout_seconds = min(300, 60 * len(preview_texts))
+        errors: list[Exception] = []
+
+        with concurrent.futures.ThreadPoolExecutor(max_workers=min(10, len(preview_texts))) as executor:
+            futures = [executor.submit(process, preview) for preview in preview_texts]
+            # Wait for all tasks to complete with timeout
+            done, not_done = concurrent.futures.wait(futures, timeout=timeout_seconds)
+
+            # Cancel tasks that didn't complete in time
+            if not_done:
+                timeout_error_msg = (
+                    f"Summary generation timeout: {len(not_done)} chunks did not complete within {timeout_seconds}s"
+                )
+                logger.warning("%s. Cancelling remaining tasks...", timeout_error_msg)
+                # In preview mode, timeout is also an error
+                errors.append(TimeoutError(timeout_error_msg))
+                for future in not_done:
+                    future.cancel()
+                # Wait a bit for cancellation to take effect
+                concurrent.futures.wait(not_done, timeout=5)
+
+            # Collect exceptions from completed futures
+            for future in done:
+                try:
+                    future.result()  # This will raise any exception that occurred
+                except Exception as e:
+                    logger.exception("Error in summary generation future")
+                    errors.append(e)
+
+        # In preview mode (indexing-estimate), if there are any errors, fail the request
+        if errors:
+            error_messages = [str(e) for e in errors]
+            error_summary = (
+                f"Failed to generate summaries for {len(errors)} chunk(s). "
+                f"Errors: {'; '.join(error_messages[:3])}"  # Show first 3 errors
+            )
+            if len(errors) > 3:
+                error_summary += f" (and {len(errors) - 3} more)"
+            logger.error("Summary generation failed in preview mode: %s", error_summary)
+            raise ValueError(error_summary)
+
+        return preview_texts
+
+    @staticmethod
+    def generate_summary(
+        tenant_id: str,
+        text: str,
+        summary_index_setting: dict | None = None,
+        segment_id: str | None = None,
+    ) -> tuple[str, LLMUsage]:
+        """
+        Generate summary for the given text using ModelInstance.invoke_llm and the default or custom summary prompt,
+        and supports vision models by including images from the segment attachments or text content.
+
+        Args:
+            tenant_id: Tenant ID
+            text: Text content to summarize
+            summary_index_setting: Summary index configuration
+            segment_id: Optional segment ID to fetch attachments from SegmentAttachmentBinding table
+
+        Returns:
+            Tuple of (summary_content, llm_usage) where llm_usage is LLMUsage object
+        """
+        if not summary_index_setting or not summary_index_setting.get("enable"):
+            raise ValueError("summary_index_setting is required and must be enabled to generate summary.")
+
+        model_name = summary_index_setting.get("model_name")
+        model_provider_name = summary_index_setting.get("model_provider_name")
+        summary_prompt = summary_index_setting.get("summary_prompt")
+
+        if not model_name or not model_provider_name:
+            raise ValueError("model_name and model_provider_name are required in summary_index_setting")
+
+        # Import default summary prompt
+        if not summary_prompt:
+            summary_prompt = DEFAULT_GENERATOR_SUMMARY_PROMPT
+
+        provider_manager = ProviderManager()
+        provider_model_bundle = provider_manager.get_provider_model_bundle(
+            tenant_id, model_provider_name, ModelType.LLM
+        )
+        model_instance = ModelInstance(provider_model_bundle, model_name)
+
+        # Get model schema to check if vision is supported
+        model_schema = model_instance.model_type_instance.get_model_schema(model_name, model_instance.credentials)
+        supports_vision = model_schema and model_schema.features and ModelFeature.VISION in model_schema.features
+
+        # Extract images if model supports vision
+        image_files = []
+        if supports_vision:
+            # First, try to get images from SegmentAttachmentBinding (preferred method)
+            if segment_id:
+                image_files = ParagraphIndexProcessor._extract_images_from_segment_attachments(tenant_id, segment_id)
+
+            # If no images from attachments, fall back to extracting from text
+            if not image_files:
+                image_files = ParagraphIndexProcessor._extract_images_from_text(tenant_id, text)
+
+        # Build prompt messages
+        prompt_messages = []
+
+        if image_files:
+            # If we have images, create a UserPromptMessage with both text and images
+            prompt_message_contents: list[PromptMessageContentUnionTypes] = []
+
+            # Add images first
+            for file in image_files:
+                try:
+                    file_content = file_manager.to_prompt_message_content(
+                        file, image_detail_config=ImagePromptMessageContent.DETAIL.LOW
+                    )
+                    prompt_message_contents.append(file_content)
+                except Exception as e:
+                    logger.warning("Failed to convert image file to prompt message content: %s", str(e))
+                    continue
+
+            # Add text content
+            if prompt_message_contents:  # Only add text if we successfully added images
+                prompt_message_contents.append(TextPromptMessageContent(data=f"{summary_prompt}\n{text}"))
+                prompt_messages.append(UserPromptMessage(content=prompt_message_contents))
+            else:
+                # If image conversion failed, fall back to text-only
+                prompt = f"{summary_prompt}\n{text}"
+                prompt_messages.append(UserPromptMessage(content=prompt))
+        else:
+            # No images, use simple text prompt
+            prompt = f"{summary_prompt}\n{text}"
+            prompt_messages.append(UserPromptMessage(content=prompt))
+
+        result = model_instance.invoke_llm(
+            prompt_messages=cast(list[PromptMessage], prompt_messages), model_parameters={}, stream=False
+        )
+
+        # Type assertion: when stream=False, invoke_llm returns LLMResult, not Generator
+        if not isinstance(result, LLMResult):
+            raise ValueError("Expected LLMResult when stream=False")
+
+        summary_content = getattr(result.message, "content", "")
+        usage = result.usage
+
+        # Deduct quota for summary generation (same as workflow nodes)
+        try:
+            llm_utils.deduct_llm_quota(tenant_id=tenant_id, model_instance=model_instance, usage=usage)
+        except Exception as e:
+            # Log but don't fail summary generation if quota deduction fails
+            logger.warning("Failed to deduct quota for summary generation: %s", str(e))
+
+        return summary_content, usage
+
+    @staticmethod
+    def _extract_images_from_text(tenant_id: str, text: str) -> list[File]:
+        """
+        Extract images from markdown text and convert them to File objects.
+
+        Args:
+            tenant_id: Tenant ID
+            text: Text content that may contain markdown image links
+
+        Returns:
+            List of File objects representing images found in the text
+        """
+        # Extract markdown images using regex pattern
+        pattern = r"!\[.*?\]\((.*?)\)"
+        images = re.findall(pattern, text)
+
+        if not images:
+            return []
+
+        upload_file_id_list = []
+
+        for image in images:
+            # For data before v0.10.0
+            pattern = r"/files/([a-f0-9\-]+)/image-preview(?:\?.*?)?"
+            match = re.search(pattern, image)
+            if match:
+                upload_file_id = match.group(1)
+                upload_file_id_list.append(upload_file_id)
+                continue
+
+            # For data after v0.10.0
+            pattern = r"/files/([a-f0-9\-]+)/file-preview(?:\?.*?)?"
+            match = re.search(pattern, image)
+            if match:
+                upload_file_id = match.group(1)
+                upload_file_id_list.append(upload_file_id)
+                continue
+
+            # For tools directory - direct file formats (e.g., .png, .jpg, etc.)
+            pattern = r"/files/tools/([a-f0-9\-]+)\.([a-zA-Z0-9]+)(?:\?[^\s\)\"\']*)?"
+            match = re.search(pattern, image)
+            if match:
+                # Tool files are handled differently, skip for now
+                continue
+
+        if not upload_file_id_list:
+            return []
+
+        # Get unique IDs for database query
+        unique_upload_file_ids = list(set(upload_file_id_list))
+        upload_files = (
+            db.session.query(UploadFile)
+            .where(UploadFile.id.in_(unique_upload_file_ids), UploadFile.tenant_id == tenant_id)
+            .all()
+        )
+
+        # Create File objects from UploadFile records
+        file_objects = []
+        for upload_file in upload_files:
+            # Only process image files
+            if not upload_file.mime_type or "image" not in upload_file.mime_type:
+                continue
+
+            mapping = {
+                "upload_file_id": upload_file.id,
+                "transfer_method": FileTransferMethod.LOCAL_FILE.value,
+                "type": FileType.IMAGE.value,
+            }
+
+            try:
+                file_obj = build_from_mapping(
+                    mapping=mapping,
+                    tenant_id=tenant_id,
+                )
+                file_objects.append(file_obj)
+            except Exception as e:
+                logger.warning("Failed to create File object from UploadFile %s: %s", upload_file.id, str(e))
+                continue
+
+        return file_objects
+
+    @staticmethod
+    def _extract_images_from_segment_attachments(tenant_id: str, segment_id: str) -> list[File]:
+        """
+        Extract images from SegmentAttachmentBinding table (preferred method).
+        This matches how DatasetRetrieval gets segment attachments.
+
+        Args:
+            tenant_id: Tenant ID
+            segment_id: Segment ID to fetch attachments for
+
+        Returns:
+            List of File objects representing images found in segment attachments
+        """
+        from sqlalchemy import select
+
+        # Query attachments from SegmentAttachmentBinding table
+        attachments_with_bindings = db.session.execute(
+            select(SegmentAttachmentBinding, UploadFile)
+            .join(UploadFile, UploadFile.id == SegmentAttachmentBinding.attachment_id)
+            .where(
+                SegmentAttachmentBinding.segment_id == segment_id,
+                SegmentAttachmentBinding.tenant_id == tenant_id,
+            )
+        ).all()
+
+        if not attachments_with_bindings:
+            return []
+
+        file_objects = []
+        for _, upload_file in attachments_with_bindings:
+            # Only process image files
+            if not upload_file.mime_type or "image" not in upload_file.mime_type:
+                continue
+
+            try:
+                # Create File object directly (similar to DatasetRetrieval)
+                file_obj = File(
+                    id=upload_file.id,
+                    filename=upload_file.name,
+                    extension="." + upload_file.extension,
+                    mime_type=upload_file.mime_type,
+                    tenant_id=tenant_id,
+                    type=FileType.IMAGE,
+                    transfer_method=FileTransferMethod.LOCAL_FILE,
+                    remote_url=upload_file.source_url,
+                    related_id=upload_file.id,
+                    size=upload_file.size,
+                    storage_key=upload_file.key,
+                )
+                file_objects.append(file_obj)
+            except Exception as e:
+                logger.warning("Failed to create File object from UploadFile %s: %s", upload_file.id, str(e))
+                continue
+
+        return file_objects
diff --git a/api/core/rag/index_processor/processor/parent_child_index_processor.py b/api/core/rag/index_processor/processor/parent_child_index_processor.py
index 0366f3259f..961df2e50c 100644
--- a/api/core/rag/index_processor/processor/parent_child_index_processor.py
+++ b/api/core/rag/index_processor/processor/parent_child_index_processor.py
@@ -1,11 +1,14 @@
 """Paragraph index processor."""
 
 import json
+import logging
 import uuid
 from collections.abc import Mapping
 from typing import Any
 
 from configs import dify_config
+from core.db.session_factory import session_factory
+from core.entities.knowledge_entities import PreviewDetail
 from core.model_manager import ModelInstance
 from core.rag.cleaner.clean_processor import CleanProcessor
 from core.rag.datasource.retrieval_service import RetrievalService
@@ -25,6 +28,9 @@ from models.dataset import ChildChunk, Dataset, DatasetProcessRule, DocumentSegm
 from models.dataset import Document as DatasetDocument
 from services.account_service import AccountService
 from services.entities.knowledge_entities.knowledge_entities import ParentMode, Rule
+from services.summary_index_service import SummaryIndexService
+
+logger = logging.getLogger(__name__)
 
 
 class ParentChildIndexProcessor(BaseIndexProcessor):
@@ -135,6 +141,30 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
 
     def clean(self, dataset: Dataset, node_ids: list[str] | None, with_keywords: bool = True, **kwargs):
         # node_ids is segment's node_ids
+        # Note: Summary indexes are now disabled (not deleted) when segments are disabled.
+        # This method is called for actual deletion scenarios (e.g., when segment is deleted).
+        # For disable operations, disable_summaries_for_segments is called directly in the task.
+        # Only delete summaries if explicitly requested (e.g., when segment is actually deleted)
+        delete_summaries = kwargs.get("delete_summaries", False)
+        if delete_summaries:
+            if node_ids:
+                # Find segments by index_node_id
+                with session_factory.create_session() as session:
+                    segments = (
+                        session.query(DocumentSegment)
+                        .filter(
+                            DocumentSegment.dataset_id == dataset.id,
+                            DocumentSegment.index_node_id.in_(node_ids),
+                        )
+                        .all()
+                    )
+                    segment_ids = [segment.id for segment in segments]
+                    if segment_ids:
+                        SummaryIndexService.delete_summaries_for_segments(dataset, segment_ids)
+            else:
+                # Delete all summaries for the dataset
+                SummaryIndexService.delete_summaries_for_segments(dataset, None)
+
         if dataset.indexing_technique == "high_quality":
             delete_child_chunks = kwargs.get("delete_child_chunks") or False
             precomputed_child_node_ids = kwargs.get("precomputed_child_node_ids")
@@ -326,3 +356,91 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
             "preview": preview,
             "total_segments": len(parent_childs.parent_child_chunks),
         }
+
+    def generate_summary_preview(
+        self, tenant_id: str, preview_texts: list[PreviewDetail], summary_index_setting: dict
+    ) -> list[PreviewDetail]:
+        """
+        For each parent chunk in preview_texts, concurrently call generate_summary to generate a summary
+        and write it to the summary attribute of PreviewDetail.
+        In preview mode (indexing-estimate), if any summary generation fails, the method will raise an exception.
+
+        Note: For parent-child structure, we only generate summaries for parent chunks.
+        """
+        import concurrent.futures
+
+        from flask import current_app
+
+        # Capture Flask app context for worker threads
+        flask_app = None
+        try:
+            flask_app = current_app._get_current_object()  # type: ignore
+        except RuntimeError:
+            logger.warning("No Flask application context available, summary generation may fail")
+
+        def process(preview: PreviewDetail) -> None:
+            """Generate summary for a single preview item (parent chunk)."""
+            from core.rag.index_processor.processor.paragraph_index_processor import ParagraphIndexProcessor
+
+            if flask_app:
+                # Ensure Flask app context in worker thread
+                with flask_app.app_context():
+                    summary, _ = ParagraphIndexProcessor.generate_summary(
+                        tenant_id=tenant_id,
+                        text=preview.content,
+                        summary_index_setting=summary_index_setting,
+                    )
+                    preview.summary = summary
+            else:
+                # Fallback: try without app context (may fail)
+                summary, _ = ParagraphIndexProcessor.generate_summary(
+                    tenant_id=tenant_id,
+                    text=preview.content,
+                    summary_index_setting=summary_index_setting,
+                )
+                preview.summary = summary
+
+        # Generate summaries concurrently using ThreadPoolExecutor
+        # Set a reasonable timeout to prevent hanging (60 seconds per chunk, max 5 minutes total)
+        timeout_seconds = min(300, 60 * len(preview_texts))
+        errors: list[Exception] = []
+
+        with concurrent.futures.ThreadPoolExecutor(max_workers=min(10, len(preview_texts))) as executor:
+            futures = [executor.submit(process, preview) for preview in preview_texts]
+            # Wait for all tasks to complete with timeout
+            done, not_done = concurrent.futures.wait(futures, timeout=timeout_seconds)
+
+            # Cancel tasks that didn't complete in time
+            if not_done:
+                timeout_error_msg = (
+                    f"Summary generation timeout: {len(not_done)} chunks did not complete within {timeout_seconds}s"
+                )
+                logger.warning("%s. Cancelling remaining tasks...", timeout_error_msg)
+                # In preview mode, timeout is also an error
+                errors.append(TimeoutError(timeout_error_msg))
+                for future in not_done:
+                    future.cancel()
+                # Wait a bit for cancellation to take effect
+                concurrent.futures.wait(not_done, timeout=5)
+
+            # Collect exceptions from completed futures
+            for future in done:
+                try:
+                    future.result()  # This will raise any exception that occurred
+                except Exception as e:
+                    logger.exception("Error in summary generation future")
+                    errors.append(e)
+
+        # In preview mode (indexing-estimate), if there are any errors, fail the request
+        if errors:
+            error_messages = [str(e) for e in errors]
+            error_summary = (
+                f"Failed to generate summaries for {len(errors)} chunk(s). "
+                f"Errors: {'; '.join(error_messages[:3])}"  # Show first 3 errors
+            )
+            if len(errors) > 3:
+                error_summary += f" (and {len(errors) - 3} more)"
+            logger.error("Summary generation failed in preview mode: %s", error_summary)
+            raise ValueError(error_summary)
+
+        return preview_texts
diff --git a/api/core/rag/index_processor/processor/qa_index_processor.py b/api/core/rag/index_processor/processor/qa_index_processor.py
index 1183d5fbd7..272d2ed351 100644
--- a/api/core/rag/index_processor/processor/qa_index_processor.py
+++ b/api/core/rag/index_processor/processor/qa_index_processor.py
@@ -11,6 +11,8 @@ import pandas as pd
 from flask import Flask, current_app
 from werkzeug.datastructures import FileStorage
 
+from core.db.session_factory import session_factory
+from core.entities.knowledge_entities import PreviewDetail
 from core.llm_generator.llm_generator import LLMGenerator
 from core.rag.cleaner.clean_processor import CleanProcessor
 from core.rag.datasource.retrieval_service import RetrievalService
@@ -25,9 +27,10 @@ from core.rag.retrieval.retrieval_methods import RetrievalMethod
 from core.tools.utils.text_processing_utils import remove_leading_symbols
 from libs import helper
 from models.account import Account
-from models.dataset import Dataset
+from models.dataset import Dataset, DocumentSegment
 from models.dataset import Document as DatasetDocument
 from services.entities.knowledge_entities.knowledge_entities import Rule
+from services.summary_index_service import SummaryIndexService
 
 logger = logging.getLogger(__name__)
 
@@ -144,6 +147,31 @@ class QAIndexProcessor(BaseIndexProcessor):
                 vector.create_multimodal(multimodal_documents)
 
     def clean(self, dataset: Dataset, node_ids: list[str] | None, with_keywords: bool = True, **kwargs):
+        # Note: Summary indexes are now disabled (not deleted) when segments are disabled.
+        # This method is called for actual deletion scenarios (e.g., when segment is deleted).
+        # For disable operations, disable_summaries_for_segments is called directly in the task.
+        # Note: qa_model doesn't generate summaries, but we clean them for completeness
+        # Only delete summaries if explicitly requested (e.g., when segment is actually deleted)
+        delete_summaries = kwargs.get("delete_summaries", False)
+        if delete_summaries:
+            if node_ids:
+                # Find segments by index_node_id
+                with session_factory.create_session() as session:
+                    segments = (
+                        session.query(DocumentSegment)
+                        .filter(
+                            DocumentSegment.dataset_id == dataset.id,
+                            DocumentSegment.index_node_id.in_(node_ids),
+                        )
+                        .all()
+                    )
+                    segment_ids = [segment.id for segment in segments]
+                    if segment_ids:
+                        SummaryIndexService.delete_summaries_for_segments(dataset, segment_ids)
+            else:
+                # Delete all summaries for the dataset
+                SummaryIndexService.delete_summaries_for_segments(dataset, None)
+
         vector = Vector(dataset)
         if node_ids:
             vector.delete_by_ids(node_ids)
@@ -212,6 +240,17 @@ class QAIndexProcessor(BaseIndexProcessor):
             "total_segments": len(qa_chunks.qa_chunks),
         }
 
+    def generate_summary_preview(
+        self, tenant_id: str, preview_texts: list[PreviewDetail], summary_index_setting: dict
+    ) -> list[PreviewDetail]:
+        """
+        QA model doesn't generate summaries, so this method returns preview_texts unchanged.
+
+        Note: QA model uses question-answer pairs, which don't require summary generation.
+        """
+        # QA model doesn't generate summaries, return as-is
+        return preview_texts
+
     def _format_qa_document(self, flask_app: Flask, tenant_id: str, document_node, all_qa_documents, document_language):
         format_documents = []
         if document_node.page_content is None or not document_node.page_content.strip():
diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py
index f8f85d141a..541c241ae5 100644
--- a/api/core/rag/retrieval/dataset_retrieval.py
+++ b/api/core/rag/retrieval/dataset_retrieval.py
@@ -236,20 +236,24 @@ class DatasetRetrieval:
             if records:
                 for record in records:
                     segment = record.segment
+                    # Build content: if summary exists, add it before the segment content
                     if segment.answer:
-                        document_context_list.append(
-                            DocumentContext(
-                                content=f"question:{segment.get_sign_content()} answer:{segment.answer}",
-                                score=record.score,
-                            )
-                        )
+                        segment_content = f"question:{segment.get_sign_content()} answer:{segment.answer}"
                     else:
-                        document_context_list.append(
-                            DocumentContext(
-                                content=segment.get_sign_content(),
-                                score=record.score,
-                            )
+                        segment_content = segment.get_sign_content()
+
+                    # If summary exists, prepend it to the content
+                    if record.summary:
+                        final_content = f"{record.summary}\n{segment_content}"
+                    else:
+                        final_content = segment_content
+
+                    document_context_list.append(
+                        DocumentContext(
+                            content=final_content,
+                            score=record.score,
                         )
+                    )
                     if vision_enabled:
                         attachments_with_bindings = db.session.execute(
                             select(SegmentAttachmentBinding, UploadFile)
@@ -316,6 +320,9 @@ class DatasetRetrieval:
                                 source.content = f"question:{segment.content} \nanswer:{segment.answer}"
                             else:
                                 source.content = segment.content
+                            # Add summary if this segment was retrieved via summary
+                            if hasattr(record, "summary") and record.summary:
+                                source.summary = record.summary
                             retrieval_resource_list.append(source)
         if hit_callback and retrieval_resource_list:
             retrieval_resource_list = sorted(retrieval_resource_list, key=lambda x: x.score or 0.0, reverse=True)
diff --git a/api/core/tools/utils/dataset_retriever/dataset_retriever_tool.py b/api/core/tools/utils/dataset_retriever/dataset_retriever_tool.py
index f96510fb45..057ec41f65 100644
--- a/api/core/tools/utils/dataset_retriever/dataset_retriever_tool.py
+++ b/api/core/tools/utils/dataset_retriever/dataset_retriever_tool.py
@@ -169,20 +169,24 @@ class DatasetRetrieverTool(DatasetRetrieverBaseTool):
                 if records:
                     for record in records:
                         segment = record.segment
+                        # Build content: if summary exists, add it before the segment content
                         if segment.answer:
-                            document_context_list.append(
-                                DocumentContext(
-                                    content=f"question:{segment.get_sign_content()} answer:{segment.answer}",
-                                    score=record.score,
-                                )
-                            )
+                            segment_content = f"question:{segment.get_sign_content()} answer:{segment.answer}"
                         else:
-                            document_context_list.append(
-                                DocumentContext(
-                                    content=segment.get_sign_content(),
-                                    score=record.score,
-                                )
+                            segment_content = segment.get_sign_content()
+
+                        # If summary exists, prepend it to the content
+                        if record.summary:
+                            final_content = f"{record.summary}\n{segment_content}"
+                        else:
+                            final_content = segment_content
+
+                        document_context_list.append(
+                            DocumentContext(
+                                content=final_content,
+                                score=record.score,
                             )
+                        )
 
                     if self.return_resource:
                         for record in records:
@@ -216,6 +220,9 @@ class DatasetRetrieverTool(DatasetRetrieverBaseTool):
                                     source.content = f"question:{segment.content} \nanswer:{segment.answer}"
                                 else:
                                     source.content = segment.content
+                                # Add summary if this segment was retrieved via summary
+                                if hasattr(record, "summary") and record.summary:
+                                    source.summary = record.summary
                                 retrieval_resource_list.append(source)
 
             if self.return_resource and retrieval_resource_list:
diff --git a/api/core/workflow/nodes/knowledge_index/entities.py b/api/core/workflow/nodes/knowledge_index/entities.py
index 3daca90b9b..bfeb9b5b79 100644
--- a/api/core/workflow/nodes/knowledge_index/entities.py
+++ b/api/core/workflow/nodes/knowledge_index/entities.py
@@ -158,3 +158,5 @@ class KnowledgeIndexNodeData(BaseNodeData):
     type: str = "knowledge-index"
     chunk_structure: str
     index_chunk_variable_selector: list[str]
+    indexing_technique: str | None = None
+    summary_index_setting: dict | None = None
diff --git a/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py b/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py
index 17ca4bef7b..b88c2d510f 100644
--- a/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py
+++ b/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py
@@ -1,9 +1,11 @@
+import concurrent.futures
 import datetime
 import logging
 import time
 from collections.abc import Mapping
 from typing import Any
 
+from flask import current_app
 from sqlalchemy import func, select
 
 from core.app.entities.app_invoke_entities import InvokeFrom
@@ -16,7 +18,9 @@ from core.workflow.nodes.base.node import Node
 from core.workflow.nodes.base.template import Template
 from core.workflow.runtime import VariablePool
 from extensions.ext_database import db
-from models.dataset import Dataset, Document, DocumentSegment
+from models.dataset import Dataset, Document, DocumentSegment, DocumentSegmentSummary
+from services.summary_index_service import SummaryIndexService
+from tasks.generate_summary_index_task import generate_summary_index_task
 
 from .entities import KnowledgeIndexNodeData
 from .exc import (
@@ -67,7 +71,20 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
         # index knowledge
         try:
             if is_preview:
-                outputs = self._get_preview_output(node_data.chunk_structure, chunks)
+                # Preview mode: generate summaries for chunks directly without saving to database
+                # Format preview and generate summaries on-the-fly
+                # Get indexing_technique and summary_index_setting from node_data (workflow graph config)
+                # or fallback to dataset if not available in node_data
+                indexing_technique = node_data.indexing_technique or dataset.indexing_technique
+                summary_index_setting = node_data.summary_index_setting or dataset.summary_index_setting
+
+                outputs = self._get_preview_output_with_summaries(
+                    node_data.chunk_structure,
+                    chunks,
+                    dataset=dataset,
+                    indexing_technique=indexing_technique,
+                    summary_index_setting=summary_index_setting,
+                )
                 return NodeRunResult(
                     status=WorkflowNodeExecutionStatus.SUCCEEDED,
                     inputs=variables,
@@ -148,6 +165,11 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
             )
             .scalar()
         )
+        # Update need_summary based on dataset's summary_index_setting
+        if dataset.summary_index_setting and dataset.summary_index_setting.get("enable") is True:
+            document.need_summary = True
+        else:
+            document.need_summary = False
         db.session.add(document)
         # update document segment status
         db.session.query(DocumentSegment).where(
@@ -163,6 +185,9 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
 
         db.session.commit()
 
+        # Generate summary index if enabled
+        self._handle_summary_index_generation(dataset, document, variable_pool)
+
         return {
             "dataset_id": ds_id_value,
             "dataset_name": dataset_name_value,
@@ -173,9 +198,304 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
             "display_status": "completed",
         }
 
-    def _get_preview_output(self, chunk_structure: str, chunks: Any) -> Mapping[str, Any]:
+    def _handle_summary_index_generation(
+        self,
+        dataset: Dataset,
+        document: Document,
+        variable_pool: VariablePool,
+    ) -> None:
+        """
+        Handle summary index generation based on mode (debug/preview or production).
+
+        Args:
+            dataset: Dataset containing the document
+            document: Document to generate summaries for
+            variable_pool: Variable pool to check invoke_from
+        """
+        # Only generate summary index for high_quality indexing technique
+        if dataset.indexing_technique != "high_quality":
+            return
+
+        # Check if summary index is enabled
+        summary_index_setting = dataset.summary_index_setting
+        if not summary_index_setting or not summary_index_setting.get("enable"):
+            return
+
+        # Skip qa_model documents
+        if document.doc_form == "qa_model":
+            return
+
+        # Determine if in preview/debug mode
+        invoke_from = variable_pool.get(["sys", SystemVariableKey.INVOKE_FROM])
+        is_preview = invoke_from and invoke_from.value == InvokeFrom.DEBUGGER
+
+        if is_preview:
+            try:
+                # Query segments that need summary generation
+                query = db.session.query(DocumentSegment).filter_by(
+                    dataset_id=dataset.id,
+                    document_id=document.id,
+                    status="completed",
+                    enabled=True,
+                )
+                segments = query.all()
+
+                if not segments:
+                    logger.info("No segments found for document %s", document.id)
+                    return
+
+                # Filter segments based on mode
+                segments_to_process = []
+                for segment in segments:
+                    # Skip if summary already exists
+                    existing_summary = (
+                        db.session.query(DocumentSegmentSummary)
+                        .filter_by(chunk_id=segment.id, dataset_id=dataset.id, status="completed")
+                        .first()
+                    )
+                    if existing_summary:
+                        continue
+
+                    # For parent-child mode, all segments are parent chunks, so process all
+                    segments_to_process.append(segment)
+
+                if not segments_to_process:
+                    logger.info("No segments need summary generation for document %s", document.id)
+                    return
+
+                # Use ThreadPoolExecutor for concurrent generation
+                flask_app = current_app._get_current_object()  # type: ignore
+                max_workers = min(10, len(segments_to_process))  # Limit to 10 workers
+
+                def process_segment(segment: DocumentSegment) -> None:
+                    """Process a single segment in a thread with Flask app context."""
+                    with flask_app.app_context():
+                        try:
+                            SummaryIndexService.generate_and_vectorize_summary(segment, dataset, summary_index_setting)
+                        except Exception:
+                            logger.exception(
+                                "Failed to generate summary for segment %s",
+                                segment.id,
+                            )
+                            # Continue processing other segments
+
+                with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+                    futures = [executor.submit(process_segment, segment) for segment in segments_to_process]
+                    # Wait for all tasks to complete
+                    concurrent.futures.wait(futures)
+
+                logger.info(
+                    "Successfully generated summary index for %s segments in document %s",
+                    len(segments_to_process),
+                    document.id,
+                )
+            except Exception:
+                logger.exception("Failed to generate summary index for document %s", document.id)
+                # Don't fail the entire indexing process if summary generation fails
+        else:
+            # Production mode: asynchronous generation
+            logger.info(
+                "Queuing summary index generation task for document %s (production mode)",
+                document.id,
+            )
+            try:
+                generate_summary_index_task.delay(dataset.id, document.id, None)
+                logger.info("Summary index generation task queued for document %s", document.id)
+            except Exception:
+                logger.exception(
+                    "Failed to queue summary index generation task for document %s",
+                    document.id,
+                )
+                # Don't fail the entire indexing process if task queuing fails
+
+    def _get_preview_output_with_summaries(
+        self,
+        chunk_structure: str,
+        chunks: Any,
+        dataset: Dataset,
+        indexing_technique: str | None = None,
+        summary_index_setting: dict | None = None,
+    ) -> Mapping[str, Any]:
+        """
+        Generate preview output with summaries for chunks in preview mode.
+        This method generates summaries on-the-fly without saving to database.
+
+        Args:
+            chunk_structure: Chunk structure type
+            chunks: Chunks to generate preview for
+            dataset: Dataset object (for tenant_id)
+            indexing_technique: Indexing technique from node config or dataset
+            summary_index_setting: Summary index setting from node config or dataset
+        """
         index_processor = IndexProcessorFactory(chunk_structure).init_index_processor()
-        return index_processor.format_preview(chunks)
+        preview_output = index_processor.format_preview(chunks)
+
+        # Check if summary index is enabled
+        if indexing_technique != "high_quality":
+            return preview_output
+
+        if not summary_index_setting or not summary_index_setting.get("enable"):
+            return preview_output
+
+        # Generate summaries for chunks
+        if "preview" in preview_output and isinstance(preview_output["preview"], list):
+            chunk_count = len(preview_output["preview"])
+            logger.info(
+                "Generating summaries for %s chunks in preview mode (dataset: %s)",
+                chunk_count,
+                dataset.id,
+            )
+            # Use ParagraphIndexProcessor's generate_summary method
+            from core.rag.index_processor.processor.paragraph_index_processor import ParagraphIndexProcessor
+
+            # Get Flask app for application context in worker threads
+            flask_app = None
+            try:
+                flask_app = current_app._get_current_object()  # type: ignore
+            except RuntimeError:
+                logger.warning("No Flask application context available, summary generation may fail")
+
+            def generate_summary_for_chunk(preview_item: dict) -> None:
+                """Generate summary for a single chunk."""
+                if "content" in preview_item:
+                    # Set Flask application context in worker thread
+                    if flask_app:
+                        with flask_app.app_context():
+                            summary, _ = ParagraphIndexProcessor.generate_summary(
+                                tenant_id=dataset.tenant_id,
+                                text=preview_item["content"],
+                                summary_index_setting=summary_index_setting,
+                            )
+                            if summary:
+                                preview_item["summary"] = summary
+                    else:
+                        # Fallback: try without app context (may fail)
+                        summary, _ = ParagraphIndexProcessor.generate_summary(
+                            tenant_id=dataset.tenant_id,
+                            text=preview_item["content"],
+                            summary_index_setting=summary_index_setting,
+                        )
+                        if summary:
+                            preview_item["summary"] = summary
+
+            # Generate summaries concurrently using ThreadPoolExecutor
+            # Set a reasonable timeout to prevent hanging (60 seconds per chunk, max 5 minutes total)
+            timeout_seconds = min(300, 60 * len(preview_output["preview"]))
+            errors: list[Exception] = []
+
+            with concurrent.futures.ThreadPoolExecutor(max_workers=min(10, len(preview_output["preview"]))) as executor:
+                futures = [
+                    executor.submit(generate_summary_for_chunk, preview_item)
+                    for preview_item in preview_output["preview"]
+                ]
+                # Wait for all tasks to complete with timeout
+                done, not_done = concurrent.futures.wait(futures, timeout=timeout_seconds)
+
+                # Cancel tasks that didn't complete in time
+                if not_done:
+                    timeout_error_msg = (
+                        f"Summary generation timeout: {len(not_done)} chunks did not complete within {timeout_seconds}s"
+                    )
+                    logger.warning("%s. Cancelling remaining tasks...", timeout_error_msg)
+                    # In preview mode, timeout is also an error
+                    errors.append(TimeoutError(timeout_error_msg))
+                    for future in not_done:
+                        future.cancel()
+                    # Wait a bit for cancellation to take effect
+                    concurrent.futures.wait(not_done, timeout=5)
+
+                # Collect exceptions from completed futures
+                for future in done:
+                    try:
+                        future.result()  # This will raise any exception that occurred
+                    except Exception as e:
+                        logger.exception("Error in summary generation future")
+                        errors.append(e)
+
+            # In preview mode, if there are any errors, fail the request
+            if errors:
+                error_messages = [str(e) for e in errors]
+                error_summary = (
+                    f"Failed to generate summaries for {len(errors)} chunk(s). "
+                    f"Errors: {'; '.join(error_messages[:3])}"  # Show first 3 errors
+                )
+                if len(errors) > 3:
+                    error_summary += f" (and {len(errors) - 3} more)"
+                logger.error("Summary generation failed in preview mode: %s", error_summary)
+                raise KnowledgeIndexNodeError(error_summary)
+
+            completed_count = sum(1 for item in preview_output["preview"] if item.get("summary") is not None)
+            logger.info(
+                "Completed summary generation for preview chunks: %s/%s succeeded",
+                completed_count,
+                len(preview_output["preview"]),
+            )
+
+        return preview_output
+
+    def _get_preview_output(
+        self,
+        chunk_structure: str,
+        chunks: Any,
+        dataset: Dataset | None = None,
+        variable_pool: VariablePool | None = None,
+    ) -> Mapping[str, Any]:
+        index_processor = IndexProcessorFactory(chunk_structure).init_index_processor()
+        preview_output = index_processor.format_preview(chunks)
+
+        # If dataset is provided, try to enrich preview with summaries
+        if dataset and variable_pool:
+            document_id = variable_pool.get(["sys", SystemVariableKey.DOCUMENT_ID])
+            if document_id:
+                document = db.session.query(Document).filter_by(id=document_id.value).first()
+                if document:
+                    # Query summaries for this document
+                    summaries = (
+                        db.session.query(DocumentSegmentSummary)
+                        .filter_by(
+                            dataset_id=dataset.id,
+                            document_id=document.id,
+                            status="completed",
+                            enabled=True,
+                        )
+                        .all()
+                    )
+
+                    if summaries:
+                        # Create a map of segment content to summary for matching
+                        # Use content matching as chunks in preview might not be indexed yet
+                        summary_by_content = {}
+                        for summary in summaries:
+                            segment = (
+                                db.session.query(DocumentSegment)
+                                .filter_by(id=summary.chunk_id, dataset_id=dataset.id)
+                                .first()
+                            )
+                            if segment:
+                                # Normalize content for matching (strip whitespace)
+                                normalized_content = segment.content.strip()
+                                summary_by_content[normalized_content] = summary.summary_content
+
+                        # Enrich preview with summaries by content matching
+                        if "preview" in preview_output and isinstance(preview_output["preview"], list):
+                            matched_count = 0
+                            for preview_item in preview_output["preview"]:
+                                if "content" in preview_item:
+                                    # Normalize content for matching
+                                    normalized_chunk_content = preview_item["content"].strip()
+                                    if normalized_chunk_content in summary_by_content:
+                                        preview_item["summary"] = summary_by_content[normalized_chunk_content]
+                                        matched_count += 1
+
+                            if matched_count > 0:
+                                logger.info(
+                                    "Enriched preview with %s existing summaries (dataset: %s, document: %s)",
+                                    matched_count,
+                                    dataset.id,
+                                    document.id,
+                                )
+
+        return preview_output
 
     @classmethod
     def version(cls) -> str:
diff --git a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py
index 8670a71aa3..3c4850ebac 100644
--- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py
+++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py
@@ -419,6 +419,9 @@ class KnowledgeRetrievalNode(LLMUsageTrackingMixin, Node[KnowledgeRetrievalNodeD
                             source["content"] = f"question:{segment.get_sign_content()} \nanswer:{segment.answer}"
                         else:
                             source["content"] = segment.get_sign_content()
+                        # Add summary if available
+                        if record.summary:
+                            source["summary"] = record.summary
                         retrieval_resource_list.append(source)
         if retrieval_resource_list:
             retrieval_resource_list = sorted(
diff --git a/api/core/workflow/nodes/llm/node.py b/api/core/workflow/nodes/llm/node.py
index dfb55dcd80..17d82c2118 100644
--- a/api/core/workflow/nodes/llm/node.py
+++ b/api/core/workflow/nodes/llm/node.py
@@ -685,6 +685,8 @@ class LLMNode(Node[LLMNodeData]):
                         if "content" not in item:
                             raise InvalidContextStructureError(f"Invalid context structure: {item}")
 
+                        if item.get("summary"):
+                            context_str += item["summary"] + "\n"
                         context_str += item["content"] + "\n"
 
                         retriever_resource = self._convert_to_original_retriever_resource(item)
@@ -746,6 +748,7 @@ class LLMNode(Node[LLMNodeData]):
                 page=metadata.get("page"),
                 doc_metadata=metadata.get("doc_metadata"),
                 files=context_dict.get("files"),
+                summary=context_dict.get("summary"),
             )
 
             return source
diff --git a/api/extensions/ext_celery.py b/api/extensions/ext_celery.py
index 08cf96c1c1..af983f6d87 100644
--- a/api/extensions/ext_celery.py
+++ b/api/extensions/ext_celery.py
@@ -102,6 +102,8 @@ def init_app(app: DifyApp) -> Celery:
     imports = [
         "tasks.async_workflow_tasks",  # trigger workers
         "tasks.trigger_processing_tasks",  # async trigger processing
+        "tasks.generate_summary_index_task",  # summary index generation
+        "tasks.regenerate_summary_index_task",  # summary index regeneration
     ]
     day = dify_config.CELERY_BEAT_SCHEDULER_TIME
 
diff --git a/api/fields/dataset_fields.py b/api/fields/dataset_fields.py
index 1e5ec7d200..ff6578098b 100644
--- a/api/fields/dataset_fields.py
+++ b/api/fields/dataset_fields.py
@@ -39,6 +39,14 @@ dataset_retrieval_model_fields = {
     "score_threshold_enabled": fields.Boolean,
     "score_threshold": fields.Float,
 }
+
+dataset_summary_index_fields = {
+    "enable": fields.Boolean,
+    "model_name": fields.String,
+    "model_provider_name": fields.String,
+    "summary_prompt": fields.String,
+}
+
 external_retrieval_model_fields = {
     "top_k": fields.Integer,
     "score_threshold": fields.Float,
@@ -83,6 +91,7 @@ dataset_detail_fields = {
     "embedding_model_provider": fields.String,
     "embedding_available": fields.Boolean,
     "retrieval_model_dict": fields.Nested(dataset_retrieval_model_fields),
+    "summary_index_setting": fields.Nested(dataset_summary_index_fields),
     "tags": fields.List(fields.Nested(tag_fields)),
     "doc_form": fields.String,
     "external_knowledge_info": fields.Nested(external_knowledge_info_fields),
diff --git a/api/fields/document_fields.py b/api/fields/document_fields.py
index 9be59f7454..35a2a04f3e 100644
--- a/api/fields/document_fields.py
+++ b/api/fields/document_fields.py
@@ -33,6 +33,11 @@ document_fields = {
     "hit_count": fields.Integer,
     "doc_form": fields.String,
     "doc_metadata": fields.List(fields.Nested(document_metadata_fields), attribute="doc_metadata_details"),
+    # Summary index generation status:
+    # "SUMMARIZING" (when task is queued and generating)
+    "summary_index_status": fields.String,
+    # Whether this document needs summary index generation
+    "need_summary": fields.Boolean,
 }
 
 document_with_segments_fields = {
@@ -60,6 +65,10 @@ document_with_segments_fields = {
     "completed_segments": fields.Integer,
     "total_segments": fields.Integer,
     "doc_metadata": fields.List(fields.Nested(document_metadata_fields), attribute="doc_metadata_details"),
+    # Summary index generation status:
+    # "SUMMARIZING" (when task is queued and generating)
+    "summary_index_status": fields.String,
+    "need_summary": fields.Boolean,  # Whether this document needs summary index generation
 }
 
 dataset_and_document_fields = {
diff --git a/api/fields/hit_testing_fields.py b/api/fields/hit_testing_fields.py
index e70f9fa722..0b54992835 100644
--- a/api/fields/hit_testing_fields.py
+++ b/api/fields/hit_testing_fields.py
@@ -58,4 +58,5 @@ hit_testing_record_fields = {
     "score": fields.Float,
     "tsne_position": fields.Raw,
     "files": fields.List(fields.Nested(files_fields)),
+    "summary": fields.String,  # Summary content if retrieved via summary index
 }
diff --git a/api/fields/message_fields.py b/api/fields/message_fields.py
index c81e482f73..e6c3b42f93 100644
--- a/api/fields/message_fields.py
+++ b/api/fields/message_fields.py
@@ -36,6 +36,7 @@ class RetrieverResource(ResponseModel):
     segment_position: int | None = None
     index_node_hash: str | None = None
     content: str | None = None
+    summary: str | None = None
     created_at: int | None = None
 
     @field_validator("created_at", mode="before")
diff --git a/api/fields/segment_fields.py b/api/fields/segment_fields.py
index 56d6b68378..2ce9fb154c 100644
--- a/api/fields/segment_fields.py
+++ b/api/fields/segment_fields.py
@@ -49,4 +49,5 @@ segment_fields = {
     "stopped_at": TimestampField,
     "child_chunks": fields.List(fields.Nested(child_chunk_fields)),
     "attachments": fields.List(fields.Nested(attachment_fields)),
+    "summary": fields.String,  # Summary content for the segment
 }
diff --git a/api/migrations/versions/2026_01_27_1815-788d3099ae3a_add_summary_index_feature.py b/api/migrations/versions/2026_01_27_1815-788d3099ae3a_add_summary_index_feature.py
new file mode 100644
index 0000000000..3c2e0822e1
--- /dev/null
+++ b/api/migrations/versions/2026_01_27_1815-788d3099ae3a_add_summary_index_feature.py
@@ -0,0 +1,107 @@
+"""add summary index feature
+
+Revision ID: 788d3099ae3a
+Revises: 9d77545f524e
+Create Date: 2026-01-27 18:15:45.277928
+
+"""
+from alembic import op
+import models as models
+import sqlalchemy as sa
+
+def _is_pg(conn):
+    return conn.dialect.name == "postgresql"
+
+# revision identifiers, used by Alembic.
+revision = '788d3099ae3a'
+down_revision = '9d77545f524e'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    conn = op.get_bind()
+    if _is_pg(conn):
+        op.create_table('document_segment_summaries',
+        sa.Column('id', models.types.StringUUID(), nullable=False),
+        sa.Column('dataset_id', models.types.StringUUID(), nullable=False),
+        sa.Column('document_id', models.types.StringUUID(), nullable=False),
+        sa.Column('chunk_id', models.types.StringUUID(), nullable=False),
+        sa.Column('summary_content', models.types.LongText(), nullable=True),
+        sa.Column('summary_index_node_id', sa.String(length=255), nullable=True),
+        sa.Column('summary_index_node_hash', sa.String(length=255), nullable=True),
+        sa.Column('tokens', sa.Integer(), nullable=True),
+        sa.Column('status', sa.String(length=32), server_default=sa.text("'generating'"), nullable=False),
+        sa.Column('error', models.types.LongText(), nullable=True),
+        sa.Column('enabled', sa.Boolean(), server_default=sa.text('true'), nullable=False),
+        sa.Column('disabled_at', sa.DateTime(), nullable=True),
+        sa.Column('disabled_by', models.types.StringUUID(), nullable=True),
+        sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
+        sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
+        sa.PrimaryKeyConstraint('id', name='document_segment_summaries_pkey')
+        )
+        with op.batch_alter_table('document_segment_summaries', schema=None) as batch_op:
+            batch_op.create_index('document_segment_summaries_chunk_id_idx', ['chunk_id'], unique=False)
+            batch_op.create_index('document_segment_summaries_dataset_id_idx', ['dataset_id'], unique=False)
+            batch_op.create_index('document_segment_summaries_document_id_idx', ['document_id'], unique=False)
+            batch_op.create_index('document_segment_summaries_status_idx', ['status'], unique=False)
+
+        with op.batch_alter_table('datasets', schema=None) as batch_op:
+            batch_op.add_column(sa.Column('summary_index_setting', models.types.AdjustedJSON(), nullable=True))
+
+        with op.batch_alter_table('documents', schema=None) as batch_op:
+            batch_op.add_column(sa.Column('need_summary', sa.Boolean(), server_default=sa.text('false'), nullable=True))
+    else:
+        # MySQL: Use compatible syntax
+        op.create_table(
+            'document_segment_summaries',
+            sa.Column('id', models.types.StringUUID(), nullable=False),
+            sa.Column('dataset_id', models.types.StringUUID(), nullable=False),
+            sa.Column('document_id', models.types.StringUUID(), nullable=False),
+            sa.Column('chunk_id', models.types.StringUUID(), nullable=False),
+            sa.Column('summary_content', models.types.LongText(), nullable=True),
+            sa.Column('summary_index_node_id', sa.String(length=255), nullable=True),
+            sa.Column('summary_index_node_hash', sa.String(length=255), nullable=True),
+            sa.Column('tokens', sa.Integer(), nullable=True),
+            sa.Column('status', sa.String(length=32), server_default=sa.text("'generating'"), nullable=False),
+            sa.Column('error', models.types.LongText(), nullable=True),
+            sa.Column('enabled', sa.Boolean(), server_default=sa.text('true'), nullable=False),
+            sa.Column('disabled_at', sa.DateTime(), nullable=True),
+            sa.Column('disabled_by', models.types.StringUUID(), nullable=True),
+            sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
+            sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
+            sa.PrimaryKeyConstraint('id', name='document_segment_summaries_pkey'),
+        )
+        with op.batch_alter_table('document_segment_summaries', schema=None) as batch_op:
+            batch_op.create_index('document_segment_summaries_chunk_id_idx', ['chunk_id'], unique=False)
+            batch_op.create_index('document_segment_summaries_dataset_id_idx', ['dataset_id'], unique=False)
+            batch_op.create_index('document_segment_summaries_document_id_idx', ['document_id'], unique=False)
+            batch_op.create_index('document_segment_summaries_status_idx', ['status'], unique=False)
+
+        with op.batch_alter_table('datasets', schema=None) as batch_op:
+            batch_op.add_column(sa.Column('summary_index_setting', models.types.AdjustedJSON(), nullable=True))
+
+        with op.batch_alter_table('documents', schema=None) as batch_op:
+            batch_op.add_column(sa.Column('need_summary', sa.Boolean(), server_default=sa.text('false'), nullable=True))
+
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+
+    with op.batch_alter_table('documents', schema=None) as batch_op:
+        batch_op.drop_column('need_summary')
+
+    with op.batch_alter_table('datasets', schema=None) as batch_op:
+        batch_op.drop_column('summary_index_setting')
+
+    with op.batch_alter_table('document_segment_summaries', schema=None) as batch_op:
+        batch_op.drop_index('document_segment_summaries_status_idx')
+        batch_op.drop_index('document_segment_summaries_document_id_idx')
+        batch_op.drop_index('document_segment_summaries_dataset_id_idx')
+        batch_op.drop_index('document_segment_summaries_chunk_id_idx')
+
+    op.drop_table('document_segment_summaries')
+    # ### end Alembic commands ###
diff --git a/api/models/dataset.py b/api/models/dataset.py
index 62f11b8c72..6ab8f372bf 100644
--- a/api/models/dataset.py
+++ b/api/models/dataset.py
@@ -72,6 +72,7 @@ class Dataset(Base):
     keyword_number = mapped_column(sa.Integer, nullable=True, server_default=sa.text("10"))
     collection_binding_id = mapped_column(StringUUID, nullable=True)
     retrieval_model = mapped_column(AdjustedJSON, nullable=True)
+    summary_index_setting = mapped_column(AdjustedJSON, nullable=True)
     built_in_field_enabled = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false"))
     icon_info = mapped_column(AdjustedJSON, nullable=True)
     runtime_mode = mapped_column(sa.String(255), nullable=True, server_default=sa.text("'general'"))
@@ -419,6 +420,7 @@ class Document(Base):
     doc_metadata = mapped_column(AdjustedJSON, nullable=True)
     doc_form = mapped_column(String(255), nullable=False, server_default=sa.text("'text_model'"))
     doc_language = mapped_column(String(255), nullable=True)
+    need_summary: Mapped[bool | None] = mapped_column(sa.Boolean, nullable=True, server_default=sa.text("false"))
 
     DATA_SOURCES = ["upload_file", "notion_import", "website_crawl"]
 
@@ -1575,3 +1577,36 @@ class SegmentAttachmentBinding(Base):
     segment_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
     attachment_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
     created_at: Mapped[datetime] = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp())
+
+
+class DocumentSegmentSummary(Base):
+    __tablename__ = "document_segment_summaries"
+    __table_args__ = (
+        sa.PrimaryKeyConstraint("id", name="document_segment_summaries_pkey"),
+        sa.Index("document_segment_summaries_dataset_id_idx", "dataset_id"),
+        sa.Index("document_segment_summaries_document_id_idx", "document_id"),
+        sa.Index("document_segment_summaries_chunk_id_idx", "chunk_id"),
+        sa.Index("document_segment_summaries_status_idx", "status"),
+    )
+
+    id: Mapped[str] = mapped_column(StringUUID, nullable=False, default=lambda: str(uuid4()))
+    dataset_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    document_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    # corresponds to DocumentSegment.id or parent chunk id
+    chunk_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    summary_content: Mapped[str] = mapped_column(LongText, nullable=True)
+    summary_index_node_id: Mapped[str] = mapped_column(String(255), nullable=True)
+    summary_index_node_hash: Mapped[str] = mapped_column(String(255), nullable=True)
+    tokens: Mapped[int | None] = mapped_column(sa.Integer, nullable=True)
+    status: Mapped[str] = mapped_column(String(32), nullable=False, server_default=sa.text("'generating'"))
+    error: Mapped[str] = mapped_column(LongText, nullable=True)
+    enabled: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true"))
+    disabled_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
+    disabled_by = mapped_column(StringUUID, nullable=True)
+    created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp())
+    updated_at: Mapped[datetime] = mapped_column(
+        DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp()
+    )
+
+    def __repr__(self):
+        return f"<DocumentSegmentSummary id={self.id} chunk_id={self.chunk_id} status={self.status}>"
diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py
index be9a0e9279..0b3fcbe4ae 100644
--- a/api/services/dataset_service.py
+++ b/api/services/dataset_service.py
@@ -89,6 +89,7 @@ from tasks.disable_segments_from_index_task import disable_segments_from_index_t
 from tasks.document_indexing_update_task import document_indexing_update_task
 from tasks.enable_segments_to_index_task import enable_segments_to_index_task
 from tasks.recover_document_indexing_task import recover_document_indexing_task
+from tasks.regenerate_summary_index_task import regenerate_summary_index_task
 from tasks.remove_document_from_index_task import remove_document_from_index_task
 from tasks.retry_document_indexing_task import retry_document_indexing_task
 from tasks.sync_website_document_indexing_task import sync_website_document_indexing_task
@@ -211,6 +212,7 @@ class DatasetService:
         embedding_model_provider: str | None = None,
         embedding_model_name: str | None = None,
         retrieval_model: RetrievalModel | None = None,
+        summary_index_setting: dict | None = None,
     ):
         # check if dataset name already exists
         if db.session.query(Dataset).filter_by(name=name, tenant_id=tenant_id).first():
@@ -253,6 +255,8 @@ class DatasetService:
         dataset.retrieval_model = retrieval_model.model_dump() if retrieval_model else None
         dataset.permission = permission or DatasetPermissionEnum.ONLY_ME
         dataset.provider = provider
+        if summary_index_setting is not None:
+            dataset.summary_index_setting = summary_index_setting
         db.session.add(dataset)
         db.session.flush()
 
@@ -476,6 +480,11 @@ class DatasetService:
         if external_retrieval_model:
             dataset.retrieval_model = external_retrieval_model
 
+        # Update summary index setting if provided
+        summary_index_setting = data.get("summary_index_setting", None)
+        if summary_index_setting is not None:
+            dataset.summary_index_setting = summary_index_setting
+
         # Update basic dataset properties
         dataset.name = data.get("name", dataset.name)
         dataset.description = data.get("description", dataset.description)
@@ -564,6 +573,9 @@ class DatasetService:
         # update Retrieval model
         if data.get("retrieval_model"):
             filtered_data["retrieval_model"] = data["retrieval_model"]
+        # update summary index setting
+        if data.get("summary_index_setting"):
+            filtered_data["summary_index_setting"] = data.get("summary_index_setting")
         # update icon info
         if data.get("icon_info"):
             filtered_data["icon_info"] = data.get("icon_info")
@@ -572,12 +584,27 @@ class DatasetService:
         db.session.query(Dataset).filter_by(id=dataset.id).update(filtered_data)
         db.session.commit()
 
+        # Reload dataset to get updated values
+        db.session.refresh(dataset)
+
         # update pipeline knowledge base node data
         DatasetService._update_pipeline_knowledge_base_node_data(dataset, user.id)
 
         # Trigger vector index task if indexing technique changed
         if action:
             deal_dataset_vector_index_task.delay(dataset.id, action)
+            # If embedding_model changed, also regenerate summary vectors
+            if action == "update":
+                regenerate_summary_index_task.delay(
+                    dataset.id,
+                    regenerate_reason="embedding_model_changed",
+                    regenerate_vectors_only=True,
+                )
+
+        # Note: summary_index_setting changes do not trigger automatic regeneration of existing summaries.
+        # The new setting will only apply to:
+        # 1. New documents added after the setting change
+        # 2. Manual summary generation requests
 
         return dataset
 
@@ -616,6 +643,7 @@ class DatasetService:
                             knowledge_index_node_data["chunk_structure"] = dataset.chunk_structure
                             knowledge_index_node_data["indexing_technique"] = dataset.indexing_technique  # pyright: ignore[reportAttributeAccessIssue]
                             knowledge_index_node_data["keyword_number"] = dataset.keyword_number
+                            knowledge_index_node_data["summary_index_setting"] = dataset.summary_index_setting
                             node["data"] = knowledge_index_node_data
                             updated = True
                         except Exception:
@@ -854,6 +882,54 @@ class DatasetService:
         )
         filtered_data["collection_binding_id"] = dataset_collection_binding.id
 
+    @staticmethod
+    def _check_summary_index_setting_model_changed(dataset: Dataset, data: dict[str, Any]) -> bool:
+        """
+        Check if summary_index_setting model (model_name or model_provider_name) has changed.
+
+        Args:
+            dataset: Current dataset object
+            data: Update data dictionary
+
+        Returns:
+            bool: True if summary model changed, False otherwise
+        """
+        # Check if summary_index_setting is being updated
+        if "summary_index_setting" not in data or data.get("summary_index_setting") is None:
+            return False
+
+        new_summary_setting = data.get("summary_index_setting")
+        old_summary_setting = dataset.summary_index_setting
+
+        # If new setting is disabled, no need to regenerate
+        if not new_summary_setting or not new_summary_setting.get("enable"):
+            return False
+
+        # If old setting doesn't exist, no need to regenerate (no existing summaries to regenerate)
+        # Note: This task only regenerates existing summaries, not generates new ones
+        if not old_summary_setting:
+            return False
+
+        # Compare model_name and model_provider_name
+        old_model_name = old_summary_setting.get("model_name")
+        old_model_provider = old_summary_setting.get("model_provider_name")
+        new_model_name = new_summary_setting.get("model_name")
+        new_model_provider = new_summary_setting.get("model_provider_name")
+
+        # Check if model changed
+        if old_model_name != new_model_name or old_model_provider != new_model_provider:
+            logger.info(
+                "Summary index setting model changed for dataset %s: old=%s/%s, new=%s/%s",
+                dataset.id,
+                old_model_provider,
+                old_model_name,
+                new_model_provider,
+                new_model_name,
+            )
+            return True
+
+        return False
+
     @staticmethod
     def update_rag_pipeline_dataset_settings(
         session: Session, dataset: Dataset, knowledge_configuration: KnowledgeConfiguration, has_published: bool = False
@@ -889,6 +965,9 @@ class DatasetService:
             else:
                 raise ValueError("Invalid index method")
             dataset.retrieval_model = knowledge_configuration.retrieval_model.model_dump()
+            # Update summary_index_setting if provided
+            if knowledge_configuration.summary_index_setting is not None:
+                dataset.summary_index_setting = knowledge_configuration.summary_index_setting
             session.add(dataset)
         else:
             if dataset.chunk_structure and dataset.chunk_structure != knowledge_configuration.chunk_structure:
@@ -994,6 +1073,9 @@ class DatasetService:
                     if dataset.keyword_number != knowledge_configuration.keyword_number:
                         dataset.keyword_number = knowledge_configuration.keyword_number
             dataset.retrieval_model = knowledge_configuration.retrieval_model.model_dump()
+            # Update summary_index_setting if provided
+            if knowledge_configuration.summary_index_setting is not None:
+                dataset.summary_index_setting = knowledge_configuration.summary_index_setting
             session.add(dataset)
             session.commit()
             if action:
@@ -1314,6 +1396,50 @@ class DocumentService:
         upload_file = DocumentService._get_upload_file_for_upload_file_document(document)
         return file_helpers.get_signed_file_url(upload_file_id=upload_file.id, as_attachment=True)
 
+    @staticmethod
+    def enrich_documents_with_summary_index_status(
+        documents: Sequence[Document],
+        dataset: Dataset,
+        tenant_id: str,
+    ) -> None:
+        """
+        Enrich documents with summary_index_status based on dataset summary index settings.
+
+        This method calculates and sets the summary_index_status for each document that needs summary.
+        Documents that don't need summary or when summary index is disabled will have status set to None.
+
+        Args:
+            documents: List of Document instances to enrich
+            dataset: Dataset instance containing summary_index_setting
+            tenant_id: Tenant ID for summary status lookup
+        """
+        # Check if dataset has summary index enabled
+        has_summary_index = dataset.summary_index_setting and dataset.summary_index_setting.get("enable") is True
+
+        # Filter documents that need summary calculation
+        documents_need_summary = [doc for doc in documents if doc.need_summary is True]
+        document_ids_need_summary = [str(doc.id) for doc in documents_need_summary]
+
+        # Calculate summary_index_status for documents that need summary (only if dataset summary index is enabled)
+        summary_status_map: dict[str, str | None] = {}
+        if has_summary_index and document_ids_need_summary:
+            from services.summary_index_service import SummaryIndexService
+
+            summary_status_map = SummaryIndexService.get_documents_summary_index_status(
+                document_ids=document_ids_need_summary,
+                dataset_id=dataset.id,
+                tenant_id=tenant_id,
+            )
+
+        # Add summary_index_status to each document
+        for document in documents:
+            if has_summary_index and document.need_summary is True:
+                # Get status from map, default to None (not queued yet)
+                document.summary_index_status = summary_status_map.get(str(document.id))  # type: ignore[attr-defined]
+            else:
+                # Return null if summary index is not enabled or document doesn't need summary
+                document.summary_index_status = None  # type: ignore[attr-defined]
+
     @staticmethod
     def prepare_document_batch_download_zip(
         *,
@@ -1964,6 +2090,8 @@ class DocumentService:
                         DuplicateDocumentIndexingTaskProxy(
                             dataset.tenant_id, dataset.id, duplicate_document_ids
                         ).delay()
+                    # Note: Summary index generation is triggered in document_indexing_task after indexing completes
+                    # to ensure segments are available. See tasks/document_indexing_task.py
             except LockNotOwnedError:
                 pass
 
@@ -2268,6 +2396,11 @@ class DocumentService:
         name: str,
         batch: str,
     ):
+        # Set need_summary based on dataset's summary_index_setting
+        need_summary = False
+        if dataset.summary_index_setting and dataset.summary_index_setting.get("enable") is True:
+            need_summary = True
+
         document = Document(
             tenant_id=dataset.tenant_id,
             dataset_id=dataset.id,
@@ -2281,6 +2414,7 @@ class DocumentService:
             created_by=account.id,
             doc_form=document_form,
             doc_language=document_language,
+            need_summary=need_summary,
         )
         doc_metadata = {}
         if dataset.built_in_field_enabled:
@@ -2505,6 +2639,7 @@ class DocumentService:
             embedding_model_provider=knowledge_config.embedding_model_provider,
             collection_binding_id=dataset_collection_binding_id,
             retrieval_model=retrieval_model.model_dump() if retrieval_model else None,
+            summary_index_setting=knowledge_config.summary_index_setting,
             is_multimodal=knowledge_config.is_multimodal,
         )
 
@@ -2686,6 +2821,14 @@ class DocumentService:
             if not isinstance(args["process_rule"]["rules"]["segmentation"]["max_tokens"], int):
                 raise ValueError("Process rule segmentation max_tokens is invalid")
 
+        # valid summary index setting
+        summary_index_setting = args["process_rule"].get("summary_index_setting")
+        if summary_index_setting and summary_index_setting.get("enable"):
+            if "model_name" not in summary_index_setting or not summary_index_setting["model_name"]:
+                raise ValueError("Summary index model name is required")
+            if "model_provider_name" not in summary_index_setting or not summary_index_setting["model_provider_name"]:
+                raise ValueError("Summary index model provider name is required")
+
     @staticmethod
     def batch_update_document_status(
         dataset: Dataset, document_ids: list[str], action: Literal["enable", "disable", "archive", "un_archive"], user
@@ -3154,6 +3297,35 @@ class SegmentService:
                     if args.enabled or keyword_changed:
                         # update segment vector index
                         VectorService.update_segment_vector(args.keywords, segment, dataset)
+                # update summary index if summary is provided and has changed
+                if args.summary is not None:
+                    # When user manually provides summary, allow saving even if summary_index_setting doesn't exist
+                    # summary_index_setting is only needed for LLM generation, not for manual summary vectorization
+                    # Vectorization uses dataset.embedding_model, which doesn't require summary_index_setting
+                    if dataset.indexing_technique == "high_quality":
+                        # Query existing summary from database
+                        from models.dataset import DocumentSegmentSummary
+
+                        existing_summary = (
+                            db.session.query(DocumentSegmentSummary)
+                            .where(
+                                DocumentSegmentSummary.chunk_id == segment.id,
+                                DocumentSegmentSummary.dataset_id == dataset.id,
+                            )
+                            .first()
+                        )
+
+                        # Check if summary has changed
+                        existing_summary_content = existing_summary.summary_content if existing_summary else None
+                        if existing_summary_content != args.summary:
+                            # Summary has changed, update it
+                            from services.summary_index_service import SummaryIndexService
+
+                            try:
+                                SummaryIndexService.update_summary_for_segment(segment, dataset, args.summary)
+                            except Exception:
+                                logger.exception("Failed to update summary for segment %s", segment.id)
+                                # Don't fail the entire update if summary update fails
             else:
                 segment_hash = helper.generate_text_hash(content)
                 tokens = 0
@@ -3228,6 +3400,73 @@ class SegmentService:
                 elif document.doc_form in (IndexStructureType.PARAGRAPH_INDEX, IndexStructureType.QA_INDEX):
                     # update segment vector index
                     VectorService.update_segment_vector(args.keywords, segment, dataset)
+                # Handle summary index when content changed
+                if dataset.indexing_technique == "high_quality":
+                    from models.dataset import DocumentSegmentSummary
+
+                    existing_summary = (
+                        db.session.query(DocumentSegmentSummary)
+                        .where(
+                            DocumentSegmentSummary.chunk_id == segment.id,
+                            DocumentSegmentSummary.dataset_id == dataset.id,
+                        )
+                        .first()
+                    )
+
+                    if args.summary is None:
+                        # User didn't provide summary, auto-regenerate if segment previously had summary
+                        # Auto-regeneration only happens if summary_index_setting exists and enable is True
+                        if (
+                            existing_summary
+                            and dataset.summary_index_setting
+                            and dataset.summary_index_setting.get("enable") is True
+                        ):
+                            # Segment previously had summary, regenerate it with new content
+                            from services.summary_index_service import SummaryIndexService
+
+                            try:
+                                SummaryIndexService.generate_and_vectorize_summary(
+                                    segment, dataset, dataset.summary_index_setting
+                                )
+                                logger.info("Auto-regenerated summary for segment %s after content change", segment.id)
+                            except Exception:
+                                logger.exception("Failed to auto-regenerate summary for segment %s", segment.id)
+                                # Don't fail the entire update if summary regeneration fails
+                    else:
+                        # User provided summary, check if it has changed
+                        # Manual summary updates are allowed even if summary_index_setting doesn't exist
+                        existing_summary_content = existing_summary.summary_content if existing_summary else None
+                        if existing_summary_content != args.summary:
+                            # Summary has changed, use user-provided summary
+                            from services.summary_index_service import SummaryIndexService
+
+                            try:
+                                SummaryIndexService.update_summary_for_segment(segment, dataset, args.summary)
+                                logger.info("Updated summary for segment %s with user-provided content", segment.id)
+                            except Exception:
+                                logger.exception("Failed to update summary for segment %s", segment.id)
+                                # Don't fail the entire update if summary update fails
+                        else:
+                            # Summary hasn't changed, regenerate based on new content
+                            # Auto-regeneration only happens if summary_index_setting exists and enable is True
+                            if (
+                                existing_summary
+                                and dataset.summary_index_setting
+                                and dataset.summary_index_setting.get("enable") is True
+                            ):
+                                from services.summary_index_service import SummaryIndexService
+
+                                try:
+                                    SummaryIndexService.generate_and_vectorize_summary(
+                                        segment, dataset, dataset.summary_index_setting
+                                    )
+                                    logger.info(
+                                        "Regenerated summary for segment %s after content change (summary unchanged)",
+                                        segment.id,
+                                    )
+                                except Exception:
+                                    logger.exception("Failed to regenerate summary for segment %s", segment.id)
+                                    # Don't fail the entire update if summary regeneration fails
             # update multimodel vector index
             VectorService.update_multimodel_vector(segment, args.attachment_ids or [], dataset)
         except Exception as e:
@@ -3616,6 +3855,39 @@ class SegmentService:
         )
         return result if isinstance(result, DocumentSegment) else None
 
+    @classmethod
+    def get_segments_by_document_and_dataset(
+        cls,
+        document_id: str,
+        dataset_id: str,
+        status: str | None = None,
+        enabled: bool | None = None,
+    ) -> Sequence[DocumentSegment]:
+        """
+        Get segments for a document in a dataset with optional filtering.
+
+        Args:
+            document_id: Document ID
+            dataset_id: Dataset ID
+            status: Optional status filter (e.g., "completed")
+            enabled: Optional enabled filter (True/False)
+
+        Returns:
+            Sequence of DocumentSegment instances
+        """
+        query = select(DocumentSegment).where(
+            DocumentSegment.document_id == document_id,
+            DocumentSegment.dataset_id == dataset_id,
+        )
+
+        if status is not None:
+            query = query.where(DocumentSegment.status == status)
+
+        if enabled is not None:
+            query = query.where(DocumentSegment.enabled == enabled)
+
+        return db.session.scalars(query).all()
+
 
 class DatasetCollectionBindingService:
     @classmethod
diff --git a/api/services/entities/knowledge_entities/knowledge_entities.py b/api/services/entities/knowledge_entities/knowledge_entities.py
index 7959734e89..8dc5b93501 100644
--- a/api/services/entities/knowledge_entities/knowledge_entities.py
+++ b/api/services/entities/knowledge_entities/knowledge_entities.py
@@ -119,6 +119,7 @@ class KnowledgeConfig(BaseModel):
     data_source: DataSource | None = None
     process_rule: ProcessRule | None = None
     retrieval_model: RetrievalModel | None = None
+    summary_index_setting: dict | None = None
     doc_form: str = "text_model"
     doc_language: str = "English"
     embedding_model: str | None = None
@@ -141,6 +142,7 @@ class SegmentUpdateArgs(BaseModel):
     regenerate_child_chunks: bool = False
     enabled: bool | None = None
     attachment_ids: list[str] | None = None
+    summary: str | None = None  # Summary content for summary index
 
 
 class ChildChunkUpdateArgs(BaseModel):
diff --git a/api/services/entities/knowledge_entities/rag_pipeline_entities.py b/api/services/entities/knowledge_entities/rag_pipeline_entities.py
index cbb0efcc2a..041ae4edba 100644
--- a/api/services/entities/knowledge_entities/rag_pipeline_entities.py
+++ b/api/services/entities/knowledge_entities/rag_pipeline_entities.py
@@ -116,6 +116,8 @@ class KnowledgeConfiguration(BaseModel):
     embedding_model: str = ""
     keyword_number: int | None = 10
     retrieval_model: RetrievalSetting
+    # add summary index setting
+    summary_index_setting: dict | None = None
 
     @field_validator("embedding_model_provider", mode="before")
     @classmethod
diff --git a/api/services/rag_pipeline/rag_pipeline_dsl_service.py b/api/services/rag_pipeline/rag_pipeline_dsl_service.py
index c1c6e204fb..be1ce834f6 100644
--- a/api/services/rag_pipeline/rag_pipeline_dsl_service.py
+++ b/api/services/rag_pipeline/rag_pipeline_dsl_service.py
@@ -343,6 +343,9 @@ class RagPipelineDslService:
                         dataset.embedding_model_provider = knowledge_configuration.embedding_model_provider
                     elif knowledge_configuration.indexing_technique == "economy":
                         dataset.keyword_number = knowledge_configuration.keyword_number
+                    # Update summary_index_setting if provided
+                    if knowledge_configuration.summary_index_setting is not None:
+                        dataset.summary_index_setting = knowledge_configuration.summary_index_setting
                     dataset.pipeline_id = pipeline.id
                     self._session.add(dataset)
                     self._session.commit()
@@ -477,6 +480,9 @@ class RagPipelineDslService:
                         dataset.embedding_model_provider = knowledge_configuration.embedding_model_provider
                     elif knowledge_configuration.indexing_technique == "economy":
                         dataset.keyword_number = knowledge_configuration.keyword_number
+                    # Update summary_index_setting if provided
+                    if knowledge_configuration.summary_index_setting is not None:
+                        dataset.summary_index_setting = knowledge_configuration.summary_index_setting
                     dataset.pipeline_id = pipeline.id
                     self._session.add(dataset)
                     self._session.commit()
diff --git a/api/services/summary_index_service.py b/api/services/summary_index_service.py
new file mode 100644
index 0000000000..b8e1f8bc3f
--- /dev/null
+++ b/api/services/summary_index_service.py
@@ -0,0 +1,1432 @@
+"""Summary index service for generating and managing document segment summaries."""
+
+import logging
+import time
+import uuid
+from datetime import UTC, datetime
+from typing import Any
+
+from sqlalchemy.orm import Session
+
+from core.db.session_factory import session_factory
+from core.model_manager import ModelManager
+from core.model_runtime.entities.llm_entities import LLMUsage
+from core.model_runtime.entities.model_entities import ModelType
+from core.rag.datasource.vdb.vector_factory import Vector
+from core.rag.index_processor.constant.doc_type import DocType
+from core.rag.models.document import Document
+from libs import helper
+from models.dataset import Dataset, DocumentSegment, DocumentSegmentSummary
+from models.dataset import Document as DatasetDocument
+
+logger = logging.getLogger(__name__)
+
+
+class SummaryIndexService:
+    """Service for generating and managing summary indexes."""
+
+    @staticmethod
+    def generate_summary_for_segment(
+        segment: DocumentSegment,
+        dataset: Dataset,
+        summary_index_setting: dict,
+    ) -> tuple[str, LLMUsage]:
+        """
+        Generate summary for a single segment.
+
+        Args:
+            segment: DocumentSegment to generate summary for
+            dataset: Dataset containing the segment
+            summary_index_setting: Summary index configuration
+
+        Returns:
+            Tuple of (summary_content, llm_usage) where llm_usage is LLMUsage object
+
+        Raises:
+            ValueError: If summary_index_setting is invalid or generation fails
+        """
+        # Reuse the existing generate_summary method from ParagraphIndexProcessor
+        # Use lazy import to avoid circular import
+        from core.rag.index_processor.processor.paragraph_index_processor import ParagraphIndexProcessor
+
+        summary_content, usage = ParagraphIndexProcessor.generate_summary(
+            tenant_id=dataset.tenant_id,
+            text=segment.content,
+            summary_index_setting=summary_index_setting,
+            segment_id=segment.id,
+        )
+
+        if not summary_content:
+            raise ValueError("Generated summary is empty")
+
+        return summary_content, usage
+
+    @staticmethod
+    def create_summary_record(
+        segment: DocumentSegment,
+        dataset: Dataset,
+        summary_content: str,
+        status: str = "generating",
+    ) -> DocumentSegmentSummary:
+        """
+        Create or update a DocumentSegmentSummary record.
+        If a summary record already exists for this segment, it will be updated instead of creating a new one.
+
+        Args:
+            segment: DocumentSegment to create summary for
+            dataset: Dataset containing the segment
+            summary_content: Generated summary content
+            status: Summary status (default: "generating")
+
+        Returns:
+            Created or updated DocumentSegmentSummary instance
+        """
+        with session_factory.create_session() as session:
+            # Check if summary record already exists
+            existing_summary = (
+                session.query(DocumentSegmentSummary).filter_by(chunk_id=segment.id, dataset_id=dataset.id).first()
+            )
+
+            if existing_summary:
+                # Update existing record
+                existing_summary.summary_content = summary_content
+                existing_summary.status = status
+                existing_summary.error = None  # type: ignore[assignment]  # Clear any previous errors
+                # Re-enable if it was disabled
+                if not existing_summary.enabled:
+                    existing_summary.enabled = True
+                    existing_summary.disabled_at = None
+                    existing_summary.disabled_by = None
+                session.add(existing_summary)
+                session.flush()
+                return existing_summary
+            else:
+                # Create new record (enabled by default)
+                summary_record = DocumentSegmentSummary(
+                    dataset_id=dataset.id,
+                    document_id=segment.document_id,
+                    chunk_id=segment.id,
+                    summary_content=summary_content,
+                    status=status,
+                    enabled=True,  # Explicitly set enabled to True
+                )
+                session.add(summary_record)
+                session.flush()
+                return summary_record
+
+    @staticmethod
+    def vectorize_summary(
+        summary_record: DocumentSegmentSummary,
+        segment: DocumentSegment,
+        dataset: Dataset,
+        session: Session | None = None,
+    ) -> None:
+        """
+        Vectorize summary and store in vector database.
+
+        Args:
+            summary_record: DocumentSegmentSummary record
+            segment: Original DocumentSegment
+            dataset: Dataset containing the segment
+            session: Optional SQLAlchemy session. If provided, uses this session instead of creating a new one.
+                    If not provided, creates a new session and commits automatically.
+        """
+        if dataset.indexing_technique != "high_quality":
+            logger.warning(
+                "Summary vectorization skipped for dataset %s: indexing_technique is not high_quality",
+                dataset.id,
+            )
+            return
+
+        # Get summary_record_id for later session queries
+        summary_record_id = summary_record.id
+        # Save the original session parameter for use in error handling
+        original_session = session
+        logger.debug(
+            "Starting vectorization for segment %s, summary_record_id=%s, using_provided_session=%s",
+            segment.id,
+            summary_record_id,
+            original_session is not None,
+        )
+
+        # Reuse existing index_node_id if available (like segment does), otherwise generate new one
+        old_summary_node_id = summary_record.summary_index_node_id
+        if old_summary_node_id:
+            # Reuse existing index_node_id (like segment behavior)
+            summary_index_node_id = old_summary_node_id
+            logger.debug("Reusing existing index_node_id %s for segment %s", summary_index_node_id, segment.id)
+        else:
+            # Generate new index node ID only for new summaries
+            summary_index_node_id = str(uuid.uuid4())
+            logger.debug("Generated new index_node_id %s for segment %s", summary_index_node_id, segment.id)
+
+        # Always regenerate hash (in case summary content changed)
+        summary_content = summary_record.summary_content
+        if not summary_content or not summary_content.strip():
+            raise ValueError(f"Summary content is empty for segment {segment.id}, cannot vectorize")
+        summary_hash = helper.generate_text_hash(summary_content)
+
+        # Delete old vector only if we're reusing the same index_node_id (to overwrite)
+        # If index_node_id changed, the old vector should have been deleted elsewhere
+        if old_summary_node_id and old_summary_node_id == summary_index_node_id:
+            try:
+                vector = Vector(dataset)
+                vector.delete_by_ids([old_summary_node_id])
+            except Exception as e:
+                logger.warning(
+                    "Failed to delete old summary vector for segment %s: %s. Continuing with new vectorization.",
+                    segment.id,
+                    str(e),
+                )
+
+        # Calculate embedding tokens for summary (for logging and statistics)
+        embedding_tokens = 0
+        try:
+            model_manager = ModelManager()
+            embedding_model = model_manager.get_model_instance(
+                tenant_id=dataset.tenant_id,
+                provider=dataset.embedding_model_provider,
+                model_type=ModelType.TEXT_EMBEDDING,
+                model=dataset.embedding_model,
+            )
+            if embedding_model:
+                tokens_list = embedding_model.get_text_embedding_num_tokens([summary_content])
+                embedding_tokens = tokens_list[0] if tokens_list else 0
+        except Exception as e:
+            logger.warning("Failed to calculate embedding tokens for summary: %s", str(e))
+
+        # Create document with summary content and metadata
+        summary_document = Document(
+            page_content=summary_content,
+            metadata={
+                "doc_id": summary_index_node_id,
+                "doc_hash": summary_hash,
+                "dataset_id": dataset.id,
+                "document_id": segment.document_id,
+                "original_chunk_id": segment.id,  # Key: link to original chunk
+                "doc_type": DocType.TEXT,
+                "is_summary": True,  # Identifier for summary documents
+            },
+        )
+
+        # Vectorize and store with retry mechanism for connection errors
+        max_retries = 3
+        retry_delay = 2.0
+
+        for attempt in range(max_retries):
+            try:
+                logger.debug(
+                    "Attempting to vectorize summary for segment %s (attempt %s/%s)",
+                    segment.id,
+                    attempt + 1,
+                    max_retries,
+                )
+                vector = Vector(dataset)
+                # Use duplicate_check=False to ensure re-vectorization even if old vector still exists
+                # The old vector should have been deleted above, but if deletion failed,
+                # we still want to re-vectorize (upsert will overwrite)
+                vector.add_texts([summary_document], duplicate_check=False)
+                logger.debug(
+                    "Successfully added summary vector to database for segment %s (attempt %s/%s)",
+                    segment.id,
+                    attempt + 1,
+                    max_retries,
+                )
+
+                # Log embedding token usage
+                if embedding_tokens > 0:
+                    logger.info(
+                        "Summary embedding for segment %s used %s tokens",
+                        segment.id,
+                        embedding_tokens,
+                    )
+
+                # Success - update summary record with index node info
+                # Use provided session if available, otherwise create a new one
+                use_provided_session = session is not None
+                if not use_provided_session:
+                    logger.debug("Creating new session for vectorization of segment %s", segment.id)
+                    session_context = session_factory.create_session()
+                    session = session_context.__enter__()
+                else:
+                    logger.debug("Using provided session for vectorization of segment %s", segment.id)
+                    session_context = None  # Don't use context manager for provided session
+
+                # At this point, session is guaranteed to be not None
+                # Type narrowing: session is definitely not None after the if/else above
+                if session is None:
+                    raise RuntimeError("Session should not be None at this point")
+
+                try:
+                    # Declare summary_record_in_session variable
+                    summary_record_in_session: DocumentSegmentSummary | None
+
+                    # If using provided session, merge the summary_record into it
+                    if use_provided_session:
+                        # Merge the summary_record into the provided session
+                        logger.debug(
+                            "Merging summary_record (id=%s) into provided session for segment %s",
+                            summary_record_id,
+                            segment.id,
+                        )
+                        summary_record_in_session = session.merge(summary_record)
+                        logger.debug(
+                            "Successfully merged summary_record for segment %s, merged_id=%s",
+                            segment.id,
+                            summary_record_in_session.id,
+                        )
+                    else:
+                        # Query the summary record in the new session
+                        logger.debug(
+                            "Querying summary_record by id=%s for segment %s in new session",
+                            summary_record_id,
+                            segment.id,
+                        )
+                        summary_record_in_session = (
+                            session.query(DocumentSegmentSummary).filter_by(id=summary_record_id).first()
+                        )
+
+                        if not summary_record_in_session:
+                            # Record not found - try to find by chunk_id and dataset_id instead
+                            logger.debug(
+                                "Summary record not found by id=%s, trying chunk_id=%s and dataset_id=%s "
+                                "for segment %s",
+                                summary_record_id,
+                                segment.id,
+                                dataset.id,
+                                segment.id,
+                            )
+                            summary_record_in_session = (
+                                session.query(DocumentSegmentSummary)
+                                .filter_by(chunk_id=segment.id, dataset_id=dataset.id)
+                                .first()
+                            )
+
+                            if not summary_record_in_session:
+                                # Still not found - create a new one using the parameter data
+                                logger.warning(
+                                    "Summary record not found in database for segment %s (id=%s), creating new one. "
+                                    "This may indicate a session isolation issue.",
+                                    segment.id,
+                                    summary_record_id,
+                                )
+                                summary_record_in_session = DocumentSegmentSummary(
+                                    id=summary_record_id,  # Use the same ID if available
+                                    dataset_id=dataset.id,
+                                    document_id=segment.document_id,
+                                    chunk_id=segment.id,
+                                    summary_content=summary_content,
+                                    summary_index_node_id=summary_index_node_id,
+                                    summary_index_node_hash=summary_hash,
+                                    tokens=embedding_tokens,
+                                    status="completed",
+                                    enabled=True,
+                                )
+                                session.add(summary_record_in_session)
+                                logger.info(
+                                    "Created new summary record (id=%s) for segment %s after vectorization",
+                                    summary_record_id,
+                                    segment.id,
+                                )
+                            else:
+                                # Found by chunk_id - update it
+                                logger.info(
+                                    "Found summary record for segment %s by chunk_id "
+                                    "(id mismatch: expected %s, found %s). "
+                                    "This may indicate the record was created in a different session.",
+                                    segment.id,
+                                    summary_record_id,
+                                    summary_record_in_session.id,
+                                )
+                        else:
+                            logger.debug(
+                                "Found summary_record (id=%s) for segment %s in new session",
+                                summary_record_id,
+                                segment.id,
+                            )
+
+                        # At this point, summary_record_in_session is guaranteed to be not None
+                        if summary_record_in_session is None:
+                            raise RuntimeError("summary_record_in_session should not be None at this point")
+
+                    # Update all fields including summary_content
+                    # Always use the summary_content from the parameter (which is the latest from outer session)
+                    # rather than relying on what's in the database, in case outer session hasn't committed yet
+                    summary_record_in_session.summary_index_node_id = summary_index_node_id
+                    summary_record_in_session.summary_index_node_hash = summary_hash
+                    summary_record_in_session.tokens = embedding_tokens  # Save embedding tokens
+                    summary_record_in_session.status = "completed"
+                    # Ensure summary_content is preserved (use the latest from summary_record parameter)
+                    # This is critical: use the parameter value, not the database value
+                    summary_record_in_session.summary_content = summary_content
+                    # Explicitly update updated_at to ensure it's refreshed even if other fields haven't changed
+                    summary_record_in_session.updated_at = datetime.now(UTC).replace(tzinfo=None)
+                    session.add(summary_record_in_session)
+
+                    # Only commit if we created the session ourselves
+                    if not use_provided_session:
+                        logger.debug("Committing session for segment %s (self-created session)", segment.id)
+                        session.commit()
+                        logger.debug("Successfully committed session for segment %s", segment.id)
+                    else:
+                        # When using provided session, flush to ensure changes are written to database
+                        # This prevents refresh() from overwriting our changes
+                        logger.debug(
+                            "Flushing session for segment %s (using provided session, caller will commit)",
+                            segment.id,
+                        )
+                        session.flush()
+                        logger.debug("Successfully flushed session for segment %s", segment.id)
+                    # If using provided session, let the caller handle commit
+
+                    logger.info(
+                        "Successfully vectorized summary for segment %s, index_node_id=%s, index_node_hash=%s, "
+                        "tokens=%s, summary_record_id=%s, use_provided_session=%s",
+                        segment.id,
+                        summary_index_node_id,
+                        summary_hash,
+                        embedding_tokens,
+                        summary_record_in_session.id,
+                        use_provided_session,
+                    )
+                    # Update the original object for consistency
+                    summary_record.summary_index_node_id = summary_index_node_id
+                    summary_record.summary_index_node_hash = summary_hash
+                    summary_record.tokens = embedding_tokens
+                    summary_record.status = "completed"
+                    summary_record.summary_content = summary_content
+                    if summary_record_in_session.updated_at:
+                        summary_record.updated_at = summary_record_in_session.updated_at
+                finally:
+                    # Only close session if we created it ourselves
+                    if not use_provided_session and session_context:
+                        session_context.__exit__(None, None, None)
+                # Success, exit function
+                return
+
+            except (ConnectionError, Exception) as e:
+                error_str = str(e).lower()
+                # Check if it's a connection-related error that might be transient
+                is_connection_error = any(
+                    keyword in error_str
+                    for keyword in [
+                        "connection",
+                        "disconnected",
+                        "timeout",
+                        "network",
+                        "could not connect",
+                        "server disconnected",
+                        "weaviate",
+                    ]
+                )
+
+                if is_connection_error and attempt < max_retries - 1:
+                    # Retry for connection errors
+                    wait_time = retry_delay * (2**attempt)  # Exponential backoff
+                    logger.warning(
+                        "Vectorization attempt %s/%s failed for segment %s (connection error): %s. "
+                        "Retrying in %.1f seconds...",
+                        attempt + 1,
+                        max_retries,
+                        segment.id,
+                        str(e),
+                        wait_time,
+                    )
+                    time.sleep(wait_time)
+                    continue
+                else:
+                    # Final attempt failed or non-connection error - log and update status
+                    logger.error(
+                        "Failed to vectorize summary for segment %s after %s attempts: %s. "
+                        "summary_record_id=%s, index_node_id=%s, use_provided_session=%s",
+                        segment.id,
+                        attempt + 1,
+                        str(e),
+                        summary_record_id,
+                        summary_index_node_id,
+                        session is not None,
+                        exc_info=True,
+                    )
+                    # Update error status in session
+                    # Use the original_session saved at function start (the function parameter)
+                    logger.debug(
+                        "Updating error status for segment %s, summary_record_id=%s, has_original_session=%s",
+                        segment.id,
+                        summary_record_id,
+                        original_session is not None,
+                    )
+                    # Always create a new session for error handling to avoid issues with closed sessions
+                    # Even if original_session was provided, we create a new one for safety
+                    with session_factory.create_session() as error_session:
+                        # Try to find the record by id first
+                        # Note: Using assignment only (no type annotation) to avoid redeclaration error
+                        summary_record_in_session = (
+                            error_session.query(DocumentSegmentSummary).filter_by(id=summary_record_id).first()
+                        )
+                        if not summary_record_in_session:
+                            # Try to find by chunk_id and dataset_id
+                            logger.debug(
+                                "Summary record not found by id=%s, trying chunk_id=%s and dataset_id=%s "
+                                "for segment %s",
+                                summary_record_id,
+                                segment.id,
+                                dataset.id,
+                                segment.id,
+                            )
+                            summary_record_in_session = (
+                                error_session.query(DocumentSegmentSummary)
+                                .filter_by(chunk_id=segment.id, dataset_id=dataset.id)
+                                .first()
+                            )
+
+                        if summary_record_in_session:
+                            summary_record_in_session.status = "error"
+                            summary_record_in_session.error = f"Vectorization failed: {str(e)}"
+                            summary_record_in_session.updated_at = datetime.now(UTC).replace(tzinfo=None)
+                            error_session.add(summary_record_in_session)
+                            error_session.commit()
+                            logger.info(
+                                "Updated error status in new session for segment %s, record_id=%s",
+                                segment.id,
+                                summary_record_in_session.id,
+                            )
+                            # Update the original object for consistency
+                            summary_record.status = "error"
+                            summary_record.error = summary_record_in_session.error
+                            summary_record.updated_at = summary_record_in_session.updated_at
+                        else:
+                            logger.warning(
+                                "Could not update error status: summary record not found for segment %s (id=%s). "
+                                "This may indicate a session isolation issue.",
+                                segment.id,
+                                summary_record_id,
+                            )
+                    raise
+
+    @staticmethod
+    def batch_create_summary_records(
+        segments: list[DocumentSegment],
+        dataset: Dataset,
+        status: str = "not_started",
+    ) -> None:
+        """
+        Batch create summary records for segments with specified status.
+        If a record already exists, update its status.
+
+        Args:
+            segments: List of DocumentSegment instances
+            dataset: Dataset containing the segments
+            status: Initial status for the records (default: "not_started")
+        """
+        segment_ids = [segment.id for segment in segments]
+        if not segment_ids:
+            return
+
+        with session_factory.create_session() as session:
+            # Query existing summary records
+            existing_summaries = (
+                session.query(DocumentSegmentSummary)
+                .filter(
+                    DocumentSegmentSummary.chunk_id.in_(segment_ids),
+                    DocumentSegmentSummary.dataset_id == dataset.id,
+                )
+                .all()
+            )
+            existing_summary_map = {summary.chunk_id: summary for summary in existing_summaries}
+
+            # Create or update records
+            for segment in segments:
+                existing_summary = existing_summary_map.get(segment.id)
+                if existing_summary:
+                    # Update existing record
+                    existing_summary.status = status
+                    existing_summary.error = None  # type: ignore[assignment]  # Clear any previous errors
+                    if not existing_summary.enabled:
+                        existing_summary.enabled = True
+                        existing_summary.disabled_at = None
+                        existing_summary.disabled_by = None
+                    session.add(existing_summary)
+                else:
+                    # Create new record
+                    summary_record = DocumentSegmentSummary(
+                        dataset_id=dataset.id,
+                        document_id=segment.document_id,
+                        chunk_id=segment.id,
+                        summary_content=None,  # Will be filled later
+                        status=status,
+                        enabled=True,
+                    )
+                    session.add(summary_record)
+
+    @staticmethod
+    def update_summary_record_error(
+        segment: DocumentSegment,
+        dataset: Dataset,
+        error: str,
+    ) -> None:
+        """
+        Update summary record with error status.
+
+        Args:
+            segment: DocumentSegment
+            dataset: Dataset containing the segment
+            error: Error message
+        """
+        with session_factory.create_session() as session:
+            summary_record = (
+                session.query(DocumentSegmentSummary).filter_by(chunk_id=segment.id, dataset_id=dataset.id).first()
+            )
+
+            if summary_record:
+                summary_record.status = "error"
+                summary_record.error = error
+                session.add(summary_record)
+                session.commit()
+            else:
+                logger.warning("Summary record not found for segment %s when updating error", segment.id)
+
+    @staticmethod
+    def generate_and_vectorize_summary(
+        segment: DocumentSegment,
+        dataset: Dataset,
+        summary_index_setting: dict,
+    ) -> DocumentSegmentSummary:
+        """
+        Generate summary for a segment and vectorize it.
+        Assumes summary record already exists (created by batch_create_summary_records).
+
+        Args:
+            segment: DocumentSegment to generate summary for
+            dataset: Dataset containing the segment
+            summary_index_setting: Summary index configuration
+
+        Returns:
+            Created DocumentSegmentSummary instance
+
+        Raises:
+            ValueError: If summary generation fails
+        """
+        with session_factory.create_session() as session:
+            try:
+                # Get or refresh summary record in this session
+                summary_record_in_session = (
+                    session.query(DocumentSegmentSummary).filter_by(chunk_id=segment.id, dataset_id=dataset.id).first()
+                )
+
+                if not summary_record_in_session:
+                    # If not found, create one
+                    logger.warning("Summary record not found for segment %s, creating one", segment.id)
+                    summary_record_in_session = DocumentSegmentSummary(
+                        dataset_id=dataset.id,
+                        document_id=segment.document_id,
+                        chunk_id=segment.id,
+                        summary_content="",
+                        status="generating",
+                        enabled=True,
+                    )
+                    session.add(summary_record_in_session)
+                    session.flush()
+
+                # Update status to "generating"
+                summary_record_in_session.status = "generating"
+                summary_record_in_session.error = None  # type: ignore[assignment]
+                session.add(summary_record_in_session)
+                # Don't flush here - wait until after vectorization succeeds
+
+                # Generate summary (returns summary_content and llm_usage)
+                summary_content, llm_usage = SummaryIndexService.generate_summary_for_segment(
+                    segment, dataset, summary_index_setting
+                )
+
+                # Update summary content
+                summary_record_in_session.summary_content = summary_content
+                session.add(summary_record_in_session)
+                # Flush to ensure summary_content is saved before vectorize_summary queries it
+                session.flush()
+
+                # Log LLM usage for summary generation
+                if llm_usage and llm_usage.total_tokens > 0:
+                    logger.info(
+                        "Summary generation for segment %s used %s tokens (prompt: %s, completion: %s)",
+                        segment.id,
+                        llm_usage.total_tokens,
+                        llm_usage.prompt_tokens,
+                        llm_usage.completion_tokens,
+                    )
+
+                # Vectorize summary (will delete old vector if exists before creating new one)
+                # Pass the session-managed record to vectorize_summary
+                # vectorize_summary will update status to "completed" and tokens in its own session
+                # vectorize_summary will also ensure summary_content is preserved
+                try:
+                    # Pass the session to vectorize_summary to avoid session isolation issues
+                    SummaryIndexService.vectorize_summary(summary_record_in_session, segment, dataset, session=session)
+                    # Refresh the object from database to get the updated status and tokens from vectorize_summary
+                    session.refresh(summary_record_in_session)
+                    # Commit the session
+                    # (summary_record_in_session should have status="completed" and tokens from refresh)
+                    session.commit()
+                    logger.info("Successfully generated and vectorized summary for segment %s", segment.id)
+                    return summary_record_in_session
+                except Exception as vectorize_error:
+                    # If vectorization fails, update status to error in current session
+                    logger.exception("Failed to vectorize summary for segment %s", segment.id)
+                    summary_record_in_session.status = "error"
+                    summary_record_in_session.error = f"Vectorization failed: {str(vectorize_error)}"
+                    session.add(summary_record_in_session)
+                    session.commit()
+                    raise
+
+            except Exception as e:
+                logger.exception("Failed to generate summary for segment %s", segment.id)
+                # Update summary record with error status
+                summary_record_in_session = (
+                    session.query(DocumentSegmentSummary).filter_by(chunk_id=segment.id, dataset_id=dataset.id).first()
+                )
+                if summary_record_in_session:
+                    summary_record_in_session.status = "error"
+                    summary_record_in_session.error = str(e)
+                    session.add(summary_record_in_session)
+                    session.commit()
+                raise
+
+    @staticmethod
+    def generate_summaries_for_document(
+        dataset: Dataset,
+        document: DatasetDocument,
+        summary_index_setting: dict,
+        segment_ids: list[str] | None = None,
+        only_parent_chunks: bool = False,
+    ) -> list[DocumentSegmentSummary]:
+        """
+        Generate summaries for all segments in a document including vectorization.
+
+        Args:
+            dataset: Dataset containing the document
+            document: DatasetDocument to generate summaries for
+            summary_index_setting: Summary index configuration
+            segment_ids: Optional list of specific segment IDs to process
+            only_parent_chunks: If True, only process parent chunks (for parent-child mode)
+
+        Returns:
+            List of created DocumentSegmentSummary instances
+        """
+        # Only generate summary index for high_quality indexing technique
+        if dataset.indexing_technique != "high_quality":
+            logger.info(
+                "Skipping summary generation for dataset %s: indexing_technique is %s, not 'high_quality'",
+                dataset.id,
+                dataset.indexing_technique,
+            )
+            return []
+
+        if not summary_index_setting or not summary_index_setting.get("enable"):
+            logger.info("Summary index is disabled for dataset %s", dataset.id)
+            return []
+
+        # Skip qa_model documents
+        if document.doc_form == "qa_model":
+            logger.info("Skipping summary generation for qa_model document %s", document.id)
+            return []
+
+        logger.info(
+            "Starting summary generation for document %s in dataset %s, segment_ids: %s, only_parent_chunks: %s",
+            document.id,
+            dataset.id,
+            len(segment_ids) if segment_ids else "all",
+            only_parent_chunks,
+        )
+
+        with session_factory.create_session() as session:
+            # Query segments (only enabled segments)
+            query = session.query(DocumentSegment).filter_by(
+                dataset_id=dataset.id,
+                document_id=document.id,
+                status="completed",
+                enabled=True,  # Only generate summaries for enabled segments
+            )
+
+            if segment_ids:
+                query = query.filter(DocumentSegment.id.in_(segment_ids))
+
+            segments = query.all()
+
+            if not segments:
+                logger.info("No segments found for document %s", document.id)
+                return []
+
+            # Batch create summary records with "not_started" status before processing
+            # This ensures all records exist upfront, allowing status tracking
+            SummaryIndexService.batch_create_summary_records(
+                segments=segments,
+                dataset=dataset,
+                status="not_started",
+            )
+            session.commit()  # Commit initial records
+
+            summary_records = []
+
+            for segment in segments:
+                # For parent-child mode, only process parent chunks
+                # In parent-child mode, all DocumentSegments are parent chunks,
+                # so we process all of them. Child chunks are stored in ChildChunk table
+                # and are not DocumentSegments, so they won't be in the segments list.
+                # This check is mainly for clarity and future-proofing.
+                if only_parent_chunks:
+                    # In parent-child mode, all segments in the query are parent chunks
+                    # Child chunks are not DocumentSegments, so they won't appear here
+                    # We can process all segments
+                    pass
+
+                try:
+                    summary_record = SummaryIndexService.generate_and_vectorize_summary(
+                        segment, dataset, summary_index_setting
+                    )
+                    summary_records.append(summary_record)
+                except Exception as e:
+                    logger.exception("Failed to generate summary for segment %s", segment.id)
+                    # Update summary record with error status
+                    SummaryIndexService.update_summary_record_error(
+                        segment=segment,
+                        dataset=dataset,
+                        error=str(e),
+                    )
+                    # Continue with other segments
+                    continue
+
+            logger.info(
+                "Completed summary generation for document %s: %s summaries generated and vectorized",
+                document.id,
+                len(summary_records),
+            )
+            return summary_records
+
+    @staticmethod
+    def disable_summaries_for_segments(
+        dataset: Dataset,
+        segment_ids: list[str] | None = None,
+        disabled_by: str | None = None,
+    ) -> None:
+        """
+        Disable summary records and remove vectors from vector database for segments.
+        Unlike delete, this preserves the summary records but marks them as disabled.
+
+        Args:
+            dataset: Dataset containing the segments
+            segment_ids: List of segment IDs to disable summaries for. If None, disable all.
+            disabled_by: User ID who disabled the summaries
+        """
+        from libs.datetime_utils import naive_utc_now
+
+        with session_factory.create_session() as session:
+            query = session.query(DocumentSegmentSummary).filter_by(
+                dataset_id=dataset.id,
+                enabled=True,  # Only disable enabled summaries
+            )
+
+            if segment_ids:
+                query = query.filter(DocumentSegmentSummary.chunk_id.in_(segment_ids))
+
+            summaries = query.all()
+
+            if not summaries:
+                return
+
+            logger.info(
+                "Disabling %s summary records for dataset %s, segment_ids: %s",
+                len(summaries),
+                dataset.id,
+                len(segment_ids) if segment_ids else "all",
+            )
+
+            # Remove from vector database (but keep records)
+            if dataset.indexing_technique == "high_quality":
+                summary_node_ids = [s.summary_index_node_id for s in summaries if s.summary_index_node_id]
+                if summary_node_ids:
+                    try:
+                        vector = Vector(dataset)
+                        vector.delete_by_ids(summary_node_ids)
+                    except Exception as e:
+                        logger.warning("Failed to remove summary vectors: %s", str(e))
+
+            # Disable summary records (don't delete)
+            now = naive_utc_now()
+            for summary in summaries:
+                summary.enabled = False
+                summary.disabled_at = now
+                summary.disabled_by = disabled_by
+                session.add(summary)
+
+            session.commit()
+            logger.info("Disabled %s summary records for dataset %s", len(summaries), dataset.id)
+
+    @staticmethod
+    def enable_summaries_for_segments(
+        dataset: Dataset,
+        segment_ids: list[str] | None = None,
+    ) -> None:
+        """
+        Enable summary records and re-add vectors to vector database for segments.
+
+        Note: This method enables summaries based on chunk status, not summary_index_setting.enable.
+        The summary_index_setting.enable flag only controls automatic generation,
+        not whether existing summaries can be used.
+        Summary.enabled should always be kept in sync with chunk.enabled.
+
+        Args:
+            dataset: Dataset containing the segments
+            segment_ids: List of segment IDs to enable summaries for. If None, enable all.
+        """
+        # Only enable summary index for high_quality indexing technique
+        if dataset.indexing_technique != "high_quality":
+            return
+
+        with session_factory.create_session() as session:
+            query = session.query(DocumentSegmentSummary).filter_by(
+                dataset_id=dataset.id,
+                enabled=False,  # Only enable disabled summaries
+            )
+
+            if segment_ids:
+                query = query.filter(DocumentSegmentSummary.chunk_id.in_(segment_ids))
+
+            summaries = query.all()
+
+            if not summaries:
+                return
+
+            logger.info(
+                "Enabling %s summary records for dataset %s, segment_ids: %s",
+                len(summaries),
+                dataset.id,
+                len(segment_ids) if segment_ids else "all",
+            )
+
+            # Re-vectorize and re-add to vector database
+            enabled_count = 0
+            for summary in summaries:
+                # Get the original segment
+                segment = (
+                    session.query(DocumentSegment)
+                    .filter_by(
+                        id=summary.chunk_id,
+                        dataset_id=dataset.id,
+                    )
+                    .first()
+                )
+
+                # Summary.enabled stays in sync with chunk.enabled,
+                # only enable summary if the associated chunk is enabled.
+                if not segment or not segment.enabled or segment.status != "completed":
+                    continue
+
+                if not summary.summary_content:
+                    continue
+
+                try:
+                    # Re-vectorize summary (this will update status and tokens in its own session)
+                    # Pass the session to vectorize_summary to avoid session isolation issues
+                    SummaryIndexService.vectorize_summary(summary, segment, dataset, session=session)
+
+                    # Refresh the object from database to get the updated status and tokens from vectorize_summary
+                    session.refresh(summary)
+
+                    # Enable summary record
+                    summary.enabled = True
+                    summary.disabled_at = None
+                    summary.disabled_by = None
+                    session.add(summary)
+                    enabled_count += 1
+                except Exception:
+                    logger.exception("Failed to re-vectorize summary %s", summary.id)
+                    # Keep it disabled if vectorization fails
+                    continue
+
+            session.commit()
+            logger.info("Enabled %s summary records for dataset %s", enabled_count, dataset.id)
+
+    @staticmethod
+    def delete_summaries_for_segments(
+        dataset: Dataset,
+        segment_ids: list[str] | None = None,
+    ) -> None:
+        """
+        Delete summary records and vectors for segments (used only for actual deletion scenarios).
+        For disable/enable operations, use disable_summaries_for_segments/enable_summaries_for_segments.
+
+        Args:
+            dataset: Dataset containing the segments
+            segment_ids: List of segment IDs to delete summaries for. If None, delete all.
+        """
+        with session_factory.create_session() as session:
+            query = session.query(DocumentSegmentSummary).filter_by(dataset_id=dataset.id)
+
+            if segment_ids:
+                query = query.filter(DocumentSegmentSummary.chunk_id.in_(segment_ids))
+
+            summaries = query.all()
+
+            if not summaries:
+                return
+
+            # Delete from vector database
+            if dataset.indexing_technique == "high_quality":
+                summary_node_ids = [s.summary_index_node_id for s in summaries if s.summary_index_node_id]
+                if summary_node_ids:
+                    vector = Vector(dataset)
+                    vector.delete_by_ids(summary_node_ids)
+
+            # Delete summary records
+            for summary in summaries:
+                session.delete(summary)
+
+            session.commit()
+            logger.info("Deleted %s summary records for dataset %s", len(summaries), dataset.id)
+
+    @staticmethod
+    def update_summary_for_segment(
+        segment: DocumentSegment,
+        dataset: Dataset,
+        summary_content: str,
+    ) -> DocumentSegmentSummary | None:
+        """
+        Update summary for a segment and re-vectorize it.
+
+        Args:
+            segment: DocumentSegment to update summary for
+            dataset: Dataset containing the segment
+            summary_content: New summary content
+
+        Returns:
+            Updated DocumentSegmentSummary instance, or None if indexing technique is not high_quality
+        """
+        # Only update summary index for high_quality indexing technique
+        if dataset.indexing_technique != "high_quality":
+            return None
+
+        # When user manually provides summary, allow saving even if summary_index_setting doesn't exist
+        # summary_index_setting is only needed for LLM generation, not for manual summary vectorization
+        # Vectorization uses dataset.embedding_model, which doesn't require summary_index_setting
+
+        # Skip qa_model documents
+        if segment.document and segment.document.doc_form == "qa_model":
+            return None
+
+        with session_factory.create_session() as session:
+            try:
+                # Check if summary_content is empty (whitespace-only strings are considered empty)
+                if not summary_content or not summary_content.strip():
+                    # If summary is empty, only delete existing summary vector and record
+                    summary_record = (
+                        session.query(DocumentSegmentSummary)
+                        .filter_by(chunk_id=segment.id, dataset_id=dataset.id)
+                        .first()
+                    )
+
+                    if summary_record:
+                        # Delete old vector if exists
+                        old_summary_node_id = summary_record.summary_index_node_id
+                        if old_summary_node_id:
+                            try:
+                                vector = Vector(dataset)
+                                vector.delete_by_ids([old_summary_node_id])
+                            except Exception as e:
+                                logger.warning(
+                                    "Failed to delete old summary vector for segment %s: %s",
+                                    segment.id,
+                                    str(e),
+                                )
+
+                        # Delete summary record since summary is empty
+                        session.delete(summary_record)
+                        session.commit()
+                        logger.info("Deleted summary for segment %s (empty content provided)", segment.id)
+                        return None
+                    else:
+                        # No existing summary record, nothing to do
+                        logger.info("No summary record found for segment %s, nothing to delete", segment.id)
+                        return None
+
+                # Find existing summary record
+                summary_record = (
+                    session.query(DocumentSegmentSummary).filter_by(chunk_id=segment.id, dataset_id=dataset.id).first()
+                )
+
+                if summary_record:
+                    # Update existing summary
+                    old_summary_node_id = summary_record.summary_index_node_id
+
+                    # Update summary content
+                    summary_record.summary_content = summary_content
+                    summary_record.status = "generating"
+                    summary_record.error = None  # type: ignore[assignment]  # Clear any previous errors
+                    session.add(summary_record)
+                    # Flush to ensure summary_content is saved before vectorize_summary queries it
+                    session.flush()
+
+                    # Delete old vector if exists (before vectorization)
+                    if old_summary_node_id:
+                        try:
+                            vector = Vector(dataset)
+                            vector.delete_by_ids([old_summary_node_id])
+                        except Exception as e:
+                            logger.warning(
+                                "Failed to delete old summary vector for segment %s: %s",
+                                segment.id,
+                                str(e),
+                            )
+
+                    # Re-vectorize summary (this will update status to "completed" and tokens in its own session)
+                    # vectorize_summary will also ensure summary_content is preserved
+                    # Note: vectorize_summary may take time due to embedding API calls, but we need to complete it
+                    # to ensure the summary is properly indexed
+                    try:
+                        # Pass the session to vectorize_summary to avoid session isolation issues
+                        SummaryIndexService.vectorize_summary(summary_record, segment, dataset, session=session)
+                        # Refresh the object from database to get the updated status and tokens from vectorize_summary
+                        session.refresh(summary_record)
+                        # Now commit the session (summary_record should have status="completed" and tokens from refresh)
+                        session.commit()
+                        logger.info("Successfully updated and re-vectorized summary for segment %s", segment.id)
+                        return summary_record
+                    except Exception as e:
+                        # If vectorization fails, update status to error in current session
+                        # Don't raise the exception - just log it and return the record with error status
+                        # This allows the segment update to complete even if vectorization fails
+                        summary_record.status = "error"
+                        summary_record.error = f"Vectorization failed: {str(e)}"
+                        session.commit()
+                        logger.exception("Failed to vectorize summary for segment %s", segment.id)
+                        # Return the record with error status instead of raising
+                        # The caller can check the status if needed
+                        return summary_record
+                else:
+                    # Create new summary record if doesn't exist
+                    summary_record = SummaryIndexService.create_summary_record(
+                        segment, dataset, summary_content, status="generating"
+                    )
+                    # Re-vectorize summary (this will update status to "completed" and tokens in its own session)
+                    # Note: summary_record was created in a different session,
+                    # so we need to merge it into current session
+                    try:
+                        # Merge the record into current session first (since it was created in a different session)
+                        summary_record = session.merge(summary_record)
+                        # Pass the session to vectorize_summary - it will update the merged record
+                        SummaryIndexService.vectorize_summary(summary_record, segment, dataset, session=session)
+                        # Refresh to get updated status and tokens from database
+                        session.refresh(summary_record)
+                        # Commit the session to persist the changes
+                        session.commit()
+                        logger.info("Successfully created and vectorized summary for segment %s", segment.id)
+                        return summary_record
+                    except Exception as e:
+                        # If vectorization fails, update status to error in current session
+                        # Merge the record into current session first
+                        error_record = session.merge(summary_record)
+                        error_record.status = "error"
+                        error_record.error = f"Vectorization failed: {str(e)}"
+                        session.commit()
+                        logger.exception("Failed to vectorize summary for segment %s", segment.id)
+                        # Return the record with error status instead of raising
+                        return error_record
+
+            except Exception as e:
+                logger.exception("Failed to update summary for segment %s", segment.id)
+                # Update summary record with error status if it exists
+                summary_record = (
+                    session.query(DocumentSegmentSummary).filter_by(chunk_id=segment.id, dataset_id=dataset.id).first()
+                )
+                if summary_record:
+                    summary_record.status = "error"
+                    summary_record.error = str(e)
+                    session.add(summary_record)
+                    session.commit()
+                raise
+
+    @staticmethod
+    def get_segment_summary(segment_id: str, dataset_id: str) -> DocumentSegmentSummary | None:
+        """
+        Get summary for a single segment.
+
+        Args:
+            segment_id: Segment ID (chunk_id)
+            dataset_id: Dataset ID
+
+        Returns:
+            DocumentSegmentSummary instance if found, None otherwise
+        """
+        with session_factory.create_session() as session:
+            return (
+                session.query(DocumentSegmentSummary)
+                .where(
+                    DocumentSegmentSummary.chunk_id == segment_id,
+                    DocumentSegmentSummary.dataset_id == dataset_id,
+                    DocumentSegmentSummary.enabled == True,  # Only return enabled summaries
+                )
+                .first()
+            )
+
+    @staticmethod
+    def get_segments_summaries(segment_ids: list[str], dataset_id: str) -> dict[str, DocumentSegmentSummary]:
+        """
+        Get summaries for multiple segments.
+
+        Args:
+            segment_ids: List of segment IDs (chunk_ids)
+            dataset_id: Dataset ID
+
+        Returns:
+            Dictionary mapping segment_id to DocumentSegmentSummary (only enabled summaries)
+        """
+        if not segment_ids:
+            return {}
+
+        with session_factory.create_session() as session:
+            summary_records = (
+                session.query(DocumentSegmentSummary)
+                .where(
+                    DocumentSegmentSummary.chunk_id.in_(segment_ids),
+                    DocumentSegmentSummary.dataset_id == dataset_id,
+                    DocumentSegmentSummary.enabled == True,  # Only return enabled summaries
+                )
+                .all()
+            )
+
+            return {summary.chunk_id: summary for summary in summary_records}
+
+    @staticmethod
+    def get_document_summaries(
+        document_id: str, dataset_id: str, segment_ids: list[str] | None = None
+    ) -> list[DocumentSegmentSummary]:
+        """
+        Get all summary records for a document.
+
+        Args:
+            document_id: Document ID
+            dataset_id: Dataset ID
+            segment_ids: Optional list of segment IDs to filter by
+
+        Returns:
+            List of DocumentSegmentSummary instances (only enabled summaries)
+        """
+        with session_factory.create_session() as session:
+            query = session.query(DocumentSegmentSummary).filter(
+                DocumentSegmentSummary.document_id == document_id,
+                DocumentSegmentSummary.dataset_id == dataset_id,
+                DocumentSegmentSummary.enabled == True,  # Only return enabled summaries
+            )
+
+            if segment_ids:
+                query = query.filter(DocumentSegmentSummary.chunk_id.in_(segment_ids))
+
+            return query.all()
+
+    @staticmethod
+    def get_document_summary_index_status(document_id: str, dataset_id: str, tenant_id: str) -> str | None:
+        """
+        Get summary_index_status for a single document.
+
+        Args:
+            document_id: Document ID
+            dataset_id: Dataset ID
+            tenant_id: Tenant ID
+
+        Returns:
+            "SUMMARIZING" if there are pending summaries, None otherwise
+        """
+        # Get all segments for this document (excluding qa_model and re_segment)
+        with session_factory.create_session() as session:
+            segments = (
+                session.query(DocumentSegment.id)
+                .where(
+                    DocumentSegment.document_id == document_id,
+                    DocumentSegment.status != "re_segment",
+                    DocumentSegment.tenant_id == tenant_id,
+                )
+                .all()
+            )
+        segment_ids = [seg.id for seg in segments]
+
+        if not segment_ids:
+            return None
+
+        # Get all summary records for these segments
+        summaries = SummaryIndexService.get_segments_summaries(segment_ids, dataset_id)
+        summary_status_map = {chunk_id: summary.status for chunk_id, summary in summaries.items()}
+
+        # Check if there are any "not_started" or "generating" status summaries
+        has_pending_summaries = any(
+            summary_status_map.get(segment_id) is not None  # Ensure summary exists (enabled=True)
+            and summary_status_map[segment_id] in ("not_started", "generating")
+            for segment_id in segment_ids
+        )
+
+        return "SUMMARIZING" if has_pending_summaries else None
+
+    @staticmethod
+    def get_documents_summary_index_status(
+        document_ids: list[str], dataset_id: str, tenant_id: str
+    ) -> dict[str, str | None]:
+        """
+        Get summary_index_status for multiple documents.
+
+        Args:
+            document_ids: List of document IDs
+            dataset_id: Dataset ID
+            tenant_id: Tenant ID
+
+        Returns:
+            Dictionary mapping document_id to summary_index_status ("SUMMARIZING" or None)
+        """
+        if not document_ids:
+            return {}
+
+        # Get all segments for these documents (excluding qa_model and re_segment)
+        with session_factory.create_session() as session:
+            segments = (
+                session.query(DocumentSegment.id, DocumentSegment.document_id)
+                .where(
+                    DocumentSegment.document_id.in_(document_ids),
+                    DocumentSegment.status != "re_segment",
+                    DocumentSegment.tenant_id == tenant_id,
+                )
+                .all()
+            )
+
+        # Group segments by document_id
+        document_segments_map: dict[str, list[str]] = {}
+        for segment in segments:
+            doc_id = str(segment.document_id)
+            if doc_id not in document_segments_map:
+                document_segments_map[doc_id] = []
+            document_segments_map[doc_id].append(segment.id)
+
+        # Get all summary records for these segments
+        all_segment_ids = [seg.id for seg in segments]
+        summaries = SummaryIndexService.get_segments_summaries(all_segment_ids, dataset_id)
+        summary_status_map = {chunk_id: summary.status for chunk_id, summary in summaries.items()}
+
+        # Calculate summary_index_status for each document
+        result: dict[str, str | None] = {}
+        for doc_id in document_ids:
+            segment_ids = document_segments_map.get(doc_id, [])
+            if not segment_ids:
+                # No segments, status is None (not started)
+                result[doc_id] = None
+                continue
+
+            # Check if there are any "not_started" or "generating" status summaries
+            # Only check enabled=True summaries (already filtered in query)
+            # If segment has no summary record (summary_status_map.get returns None),
+            # it means the summary is disabled (enabled=False) or not created yet, ignore it
+            has_pending_summaries = any(
+                summary_status_map.get(segment_id) is not None  # Ensure summary exists (enabled=True)
+                and summary_status_map[segment_id] in ("not_started", "generating")
+                for segment_id in segment_ids
+            )
+
+            if has_pending_summaries:
+                # Task is still running (not started or generating)
+                result[doc_id] = "SUMMARIZING"
+            else:
+                # All enabled=True summaries are "completed" or "error", task finished
+                # Or no enabled=True summaries exist (all disabled)
+                result[doc_id] = None
+
+        return result
+
+    @staticmethod
+    def get_document_summary_status_detail(
+        document_id: str,
+        dataset_id: str,
+    ) -> dict[str, Any]:
+        """
+        Get detailed summary status for a document.
+
+        Args:
+            document_id: Document ID
+            dataset_id: Dataset ID
+
+        Returns:
+            Dictionary containing:
+            - total_segments: Total number of segments in the document
+            - summary_status: Dictionary with status counts
+              - completed: Number of summaries completed
+              - generating: Number of summaries being generated
+              - error: Number of summaries with errors
+              - not_started: Number of segments without summary records
+            - summaries: List of summary records with status and content preview
+        """
+        from services.dataset_service import SegmentService
+
+        # Get all segments for this document
+        segments = SegmentService.get_segments_by_document_and_dataset(
+            document_id=document_id,
+            dataset_id=dataset_id,
+            status="completed",
+            enabled=True,
+        )
+
+        total_segments = len(segments)
+
+        # Get all summary records for these segments
+        segment_ids = [segment.id for segment in segments]
+        summaries = []
+        if segment_ids:
+            summaries = SummaryIndexService.get_document_summaries(
+                document_id=document_id,
+                dataset_id=dataset_id,
+                segment_ids=segment_ids,
+            )
+
+        # Create a mapping of chunk_id to summary
+        summary_map = {summary.chunk_id: summary for summary in summaries}
+
+        # Count statuses
+        status_counts = {
+            "completed": 0,
+            "generating": 0,
+            "error": 0,
+            "not_started": 0,
+        }
+
+        summary_list = []
+        for segment in segments:
+            summary = summary_map.get(segment.id)
+            if summary:
+                status = summary.status
+                status_counts[status] = status_counts.get(status, 0) + 1
+                summary_list.append(
+                    {
+                        "segment_id": segment.id,
+                        "segment_position": segment.position,
+                        "status": summary.status,
+                        "summary_preview": (
+                            summary.summary_content[:100] + "..."
+                            if summary.summary_content and len(summary.summary_content) > 100
+                            else summary.summary_content
+                        ),
+                        "error": summary.error,
+                        "created_at": int(summary.created_at.timestamp()) if summary.created_at else None,
+                        "updated_at": int(summary.updated_at.timestamp()) if summary.updated_at else None,
+                    }
+                )
+            else:
+                status_counts["not_started"] += 1
+                summary_list.append(
+                    {
+                        "segment_id": segment.id,
+                        "segment_position": segment.position,
+                        "status": "not_started",
+                        "summary_preview": None,
+                        "error": None,
+                        "created_at": None,
+                        "updated_at": None,
+                    }
+                )
+
+        return {
+            "total_segments": total_segments,
+            "summary_status": status_counts,
+            "summaries": summary_list,
+        }
diff --git a/api/tasks/add_document_to_index_task.py b/api/tasks/add_document_to_index_task.py
index 62e6497e9d..2d3d00cd50 100644
--- a/api/tasks/add_document_to_index_task.py
+++ b/api/tasks/add_document_to_index_task.py
@@ -118,6 +118,19 @@ def add_document_to_index_task(dataset_document_id: str):
             )
             session.commit()
 
+            # Enable summary indexes for all segments in this document
+            from services.summary_index_service import SummaryIndexService
+
+            segment_ids_list = [segment.id for segment in segments]
+            if segment_ids_list:
+                try:
+                    SummaryIndexService.enable_summaries_for_segments(
+                        dataset=dataset,
+                        segment_ids=segment_ids_list,
+                    )
+                except Exception as e:
+                    logger.warning("Failed to enable summaries for document %s: %s", dataset_document.id, str(e))
+
             end_at = time.perf_counter()
             logger.info(
                 click.style(f"Document added to index: {dataset_document.id} latency: {end_at - start_at}", fg="green")
diff --git a/api/tasks/batch_clean_document_task.py b/api/tasks/batch_clean_document_task.py
index 74b939e84d..d388284980 100644
--- a/api/tasks/batch_clean_document_task.py
+++ b/api/tasks/batch_clean_document_task.py
@@ -50,7 +50,9 @@ def batch_clean_document_task(document_ids: list[str], dataset_id: str, doc_form
             if segments:
                 index_node_ids = [segment.index_node_id for segment in segments]
                 index_processor = IndexProcessorFactory(doc_form).init_index_processor()
-                index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True)
+                index_processor.clean(
+                    dataset, index_node_ids, with_keywords=True, delete_child_chunks=True, delete_summaries=True
+                )
 
                 for segment in segments:
                     image_upload_file_ids = get_image_upload_file_ids(segment.content)
diff --git a/api/tasks/clean_document_task.py b/api/tasks/clean_document_task.py
index 86e7cc7160..91ace6be02 100644
--- a/api/tasks/clean_document_task.py
+++ b/api/tasks/clean_document_task.py
@@ -51,7 +51,9 @@ def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_i
             if segments:
                 index_node_ids = [segment.index_node_id for segment in segments]
                 index_processor = IndexProcessorFactory(doc_form).init_index_processor()
-                index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True)
+                index_processor.clean(
+                    dataset, index_node_ids, with_keywords=True, delete_child_chunks=True, delete_summaries=True
+                )
 
                 for segment in segments:
                     image_upload_file_ids = get_image_upload_file_ids(segment.content)
diff --git a/api/tasks/clean_notion_document_task.py b/api/tasks/clean_notion_document_task.py
index bcca1bf49f..4214f043e0 100644
--- a/api/tasks/clean_notion_document_task.py
+++ b/api/tasks/clean_notion_document_task.py
@@ -42,7 +42,9 @@ def clean_notion_document_task(document_ids: list[str], dataset_id: str):
                 ).all()
                 index_node_ids = [segment.index_node_id for segment in segments]
 
-                index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True)
+                index_processor.clean(
+                    dataset, index_node_ids, with_keywords=True, delete_child_chunks=True, delete_summaries=True
+                )
                 segment_ids = [segment.id for segment in segments]
                 segment_delete_stmt = delete(DocumentSegment).where(DocumentSegment.id.in_(segment_ids))
                 session.execute(segment_delete_stmt)
diff --git a/api/tasks/delete_segment_from_index_task.py b/api/tasks/delete_segment_from_index_task.py
index bfa709502c..764c635d83 100644
--- a/api/tasks/delete_segment_from_index_task.py
+++ b/api/tasks/delete_segment_from_index_task.py
@@ -47,6 +47,7 @@ def delete_segment_from_index_task(
             doc_form = dataset_document.doc_form
 
             # Proceed with index cleanup using the index_node_ids directly
+            # For actual deletion, we should delete summaries (not just disable them)
             index_processor = IndexProcessorFactory(doc_form).init_index_processor()
             index_processor.clean(
                 dataset,
@@ -54,6 +55,7 @@ def delete_segment_from_index_task(
                 with_keywords=True,
                 delete_child_chunks=True,
                 precomputed_child_node_ids=child_node_ids,
+                delete_summaries=True,  # Actually delete summaries when segment is deleted
             )
             if dataset.is_multimodal:
                 # delete segment attachment binding
diff --git a/api/tasks/disable_segment_from_index_task.py b/api/tasks/disable_segment_from_index_task.py
index 0ce6429a94..bc45171623 100644
--- a/api/tasks/disable_segment_from_index_task.py
+++ b/api/tasks/disable_segment_from_index_task.py
@@ -60,6 +60,18 @@ def disable_segment_from_index_task(segment_id: str):
             index_processor = IndexProcessorFactory(index_type).init_index_processor()
             index_processor.clean(dataset, [segment.index_node_id])
 
+            # Disable summary index for this segment
+            from services.summary_index_service import SummaryIndexService
+
+            try:
+                SummaryIndexService.disable_summaries_for_segments(
+                    dataset=dataset,
+                    segment_ids=[segment.id],
+                    disabled_by=segment.disabled_by,
+                )
+            except Exception as e:
+                logger.warning("Failed to disable summary for segment %s: %s", segment.id, str(e))
+
             end_at = time.perf_counter()
             logger.info(
                 click.style(
diff --git a/api/tasks/disable_segments_from_index_task.py b/api/tasks/disable_segments_from_index_task.py
index 03635902d1..3cc267e821 100644
--- a/api/tasks/disable_segments_from_index_task.py
+++ b/api/tasks/disable_segments_from_index_task.py
@@ -68,6 +68,21 @@ def disable_segments_from_index_task(segment_ids: list, dataset_id: str, documen
                     index_node_ids.extend(attachment_ids)
             index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=False)
 
+            # Disable summary indexes for these segments
+            from services.summary_index_service import SummaryIndexService
+
+            segment_ids_list = [segment.id for segment in segments]
+            try:
+                # Get disabled_by from first segment (they should all have the same disabled_by)
+                disabled_by = segments[0].disabled_by if segments else None
+                SummaryIndexService.disable_summaries_for_segments(
+                    dataset=dataset,
+                    segment_ids=segment_ids_list,
+                    disabled_by=disabled_by,
+                )
+            except Exception as e:
+                logger.warning("Failed to disable summaries for segments: %s", str(e))
+
             end_at = time.perf_counter()
             logger.info(click.style(f"Segments removed from index latency: {end_at - start_at}", fg="green"))
         except Exception:
diff --git a/api/tasks/document_indexing_task.py b/api/tasks/document_indexing_task.py
index 3bdff60196..34496e9c6f 100644
--- a/api/tasks/document_indexing_task.py
+++ b/api/tasks/document_indexing_task.py
@@ -14,6 +14,7 @@ from enums.cloud_plan import CloudPlan
 from libs.datetime_utils import naive_utc_now
 from models.dataset import Dataset, Document
 from services.feature_service import FeatureService
+from tasks.generate_summary_index_task import generate_summary_index_task
 
 logger = logging.getLogger(__name__)
 
@@ -99,6 +100,78 @@ def _document_indexing(dataset_id: str, document_ids: Sequence[str]):
             indexing_runner.run(documents)
             end_at = time.perf_counter()
             logger.info(click.style(f"Processed dataset: {dataset_id} latency: {end_at - start_at}", fg="green"))
+
+            # Trigger summary index generation for completed documents if enabled
+            # Only generate for high_quality indexing technique and when summary_index_setting is enabled
+            # Re-query dataset to get latest summary_index_setting (in case it was updated)
+            dataset = session.query(Dataset).where(Dataset.id == dataset_id).first()
+            if not dataset:
+                logger.warning("Dataset %s not found after indexing", dataset_id)
+                return
+
+            if dataset.indexing_technique == "high_quality":
+                summary_index_setting = dataset.summary_index_setting
+                if summary_index_setting and summary_index_setting.get("enable"):
+                    # expire all session to get latest document's indexing status
+                    session.expire_all()
+                    # Check each document's indexing status and trigger summary generation if completed
+                    for document_id in document_ids:
+                        # Re-query document to get latest status (IndexingRunner may have updated it)
+                        document = (
+                            session.query(Document)
+                            .where(Document.id == document_id, Document.dataset_id == dataset_id)
+                            .first()
+                        )
+                        if document:
+                            logger.info(
+                                "Checking document %s for summary generation: status=%s, doc_form=%s, need_summary=%s",
+                                document_id,
+                                document.indexing_status,
+                                document.doc_form,
+                                document.need_summary,
+                            )
+                            if (
+                                document.indexing_status == "completed"
+                                and document.doc_form != "qa_model"
+                                and document.need_summary is True
+                            ):
+                                try:
+                                    generate_summary_index_task.delay(dataset.id, document_id, None)
+                                    logger.info(
+                                        "Queued summary index generation task for document %s in dataset %s "
+                                        "after indexing completed",
+                                        document_id,
+                                        dataset.id,
+                                    )
+                                except Exception:
+                                    logger.exception(
+                                        "Failed to queue summary index generation task for document %s",
+                                        document_id,
+                                    )
+                                    # Don't fail the entire indexing process if summary task queuing fails
+                            else:
+                                logger.info(
+                                    "Skipping summary generation for document %s: "
+                                    "status=%s, doc_form=%s, need_summary=%s",
+                                    document_id,
+                                    document.indexing_status,
+                                    document.doc_form,
+                                    document.need_summary,
+                                )
+                        else:
+                            logger.warning("Document %s not found after indexing", document_id)
+                else:
+                    logger.info(
+                        "Summary index generation skipped for dataset %s: summary_index_setting.enable=%s",
+                        dataset.id,
+                        summary_index_setting.get("enable") if summary_index_setting else None,
+                    )
+            else:
+                logger.info(
+                    "Summary index generation skipped for dataset %s: indexing_technique=%s (not 'high_quality')",
+                    dataset.id,
+                    dataset.indexing_technique,
+                )
         except DocumentIsPausedError as ex:
             logger.info(click.style(str(ex), fg="yellow"))
         except Exception:
diff --git a/api/tasks/enable_segment_to_index_task.py b/api/tasks/enable_segment_to_index_task.py
index 1f9f21aa7e..41ebb0b076 100644
--- a/api/tasks/enable_segment_to_index_task.py
+++ b/api/tasks/enable_segment_to_index_task.py
@@ -106,6 +106,17 @@ def enable_segment_to_index_task(segment_id: str):
             # save vector index
             index_processor.load(dataset, [document], multimodal_documents=multimodel_documents)
 
+            # Enable summary index for this segment
+            from services.summary_index_service import SummaryIndexService
+
+            try:
+                SummaryIndexService.enable_summaries_for_segments(
+                    dataset=dataset,
+                    segment_ids=[segment.id],
+                )
+            except Exception as e:
+                logger.warning("Failed to enable summary for segment %s: %s", segment.id, str(e))
+
             end_at = time.perf_counter()
             logger.info(click.style(f"Segment enabled to index: {segment.id} latency: {end_at - start_at}", fg="green"))
         except Exception as e:
diff --git a/api/tasks/enable_segments_to_index_task.py b/api/tasks/enable_segments_to_index_task.py
index 48d3c8e178..d90eb4c39f 100644
--- a/api/tasks/enable_segments_to_index_task.py
+++ b/api/tasks/enable_segments_to_index_task.py
@@ -106,6 +106,18 @@ def enable_segments_to_index_task(segment_ids: list, dataset_id: str, document_i
             # save vector index
             index_processor.load(dataset, documents, multimodal_documents=multimodal_documents)
 
+            # Enable summary indexes for these segments
+            from services.summary_index_service import SummaryIndexService
+
+            segment_ids_list = [segment.id for segment in segments]
+            try:
+                SummaryIndexService.enable_summaries_for_segments(
+                    dataset=dataset,
+                    segment_ids=segment_ids_list,
+                )
+            except Exception as e:
+                logger.warning("Failed to enable summaries for segments: %s", str(e))
+
             end_at = time.perf_counter()
             logger.info(click.style(f"Segments enabled to index latency: {end_at - start_at}", fg="green"))
         except Exception as e:
diff --git a/api/tasks/generate_summary_index_task.py b/api/tasks/generate_summary_index_task.py
new file mode 100644
index 0000000000..e4273e16b5
--- /dev/null
+++ b/api/tasks/generate_summary_index_task.py
@@ -0,0 +1,119 @@
+"""Async task for generating summary indexes."""
+
+import logging
+import time
+
+import click
+from celery import shared_task
+
+from core.db.session_factory import session_factory
+from models.dataset import Dataset, DocumentSegment
+from models.dataset import Document as DatasetDocument
+from services.summary_index_service import SummaryIndexService
+
+logger = logging.getLogger(__name__)
+
+
+@shared_task(queue="dataset")
+def generate_summary_index_task(dataset_id: str, document_id: str, segment_ids: list[str] | None = None):
+    """
+    Async generate summary index for document segments.
+
+    Args:
+        dataset_id: Dataset ID
+        document_id: Document ID
+        segment_ids: Optional list of specific segment IDs to process. If None, process all segments.
+
+    Usage:
+        generate_summary_index_task.delay(dataset_id, document_id)
+        generate_summary_index_task.delay(dataset_id, document_id, segment_ids)
+    """
+    logger.info(
+        click.style(
+            f"Start generating summary index for document {document_id} in dataset {dataset_id}",
+            fg="green",
+        )
+    )
+    start_at = time.perf_counter()
+
+    try:
+        with session_factory.create_session() as session:
+            dataset = session.query(Dataset).where(Dataset.id == dataset_id).first()
+            if not dataset:
+                logger.error(click.style(f"Dataset not found: {dataset_id}", fg="red"))
+                return
+
+            document = session.query(DatasetDocument).where(DatasetDocument.id == document_id).first()
+            if not document:
+                logger.error(click.style(f"Document not found: {document_id}", fg="red"))
+                return
+
+            # Check if document needs summary
+            if not document.need_summary:
+                logger.info(
+                    click.style(
+                        f"Skipping summary generation for document {document_id}: need_summary is False",
+                        fg="cyan",
+                    )
+                )
+                return
+
+            # Only generate summary index for high_quality indexing technique
+            if dataset.indexing_technique != "high_quality":
+                logger.info(
+                    click.style(
+                        f"Skipping summary generation for dataset {dataset_id}: "
+                        f"indexing_technique is {dataset.indexing_technique}, not 'high_quality'",
+                        fg="cyan",
+                    )
+                )
+                return
+
+            # Check if summary index is enabled
+            summary_index_setting = dataset.summary_index_setting
+            if not summary_index_setting or not summary_index_setting.get("enable"):
+                logger.info(
+                    click.style(
+                        f"Summary index is disabled for dataset {dataset_id}",
+                        fg="cyan",
+                    )
+                )
+                return
+
+            # Determine if only parent chunks should be processed
+            only_parent_chunks = dataset.chunk_structure == "parent_child_index"
+
+            # Generate summaries
+            summary_records = SummaryIndexService.generate_summaries_for_document(
+                dataset=dataset,
+                document=document,
+                summary_index_setting=summary_index_setting,
+                segment_ids=segment_ids,
+                only_parent_chunks=only_parent_chunks,
+            )
+
+            end_at = time.perf_counter()
+            logger.info(
+                click.style(
+                    f"Summary index generation completed for document {document_id}: "
+                    f"{len(summary_records)} summaries generated, latency: {end_at - start_at}",
+                    fg="green",
+                )
+            )
+
+    except Exception as e:
+        logger.exception("Failed to generate summary index for document %s", document_id)
+        # Update document segments with error status if needed
+        if segment_ids:
+            error_message = f"Summary generation failed: {str(e)}"
+            with session_factory.create_session() as session:
+                session.query(DocumentSegment).filter(
+                    DocumentSegment.id.in_(segment_ids),
+                    DocumentSegment.dataset_id == dataset_id,
+                ).update(
+                    {
+                        DocumentSegment.error: error_message,
+                    },
+                    synchronize_session=False,
+                )
+                session.commit()
diff --git a/api/tasks/regenerate_summary_index_task.py b/api/tasks/regenerate_summary_index_task.py
new file mode 100644
index 0000000000..cf8988d13e
--- /dev/null
+++ b/api/tasks/regenerate_summary_index_task.py
@@ -0,0 +1,315 @@
+"""Task for regenerating summary indexes when dataset settings change."""
+
+import logging
+import time
+from collections import defaultdict
+
+import click
+from celery import shared_task
+from sqlalchemy import or_, select
+
+from core.db.session_factory import session_factory
+from models.dataset import Dataset, DocumentSegment, DocumentSegmentSummary
+from models.dataset import Document as DatasetDocument
+from services.summary_index_service import SummaryIndexService
+
+logger = logging.getLogger(__name__)
+
+
+@shared_task(queue="dataset")
+def regenerate_summary_index_task(
+    dataset_id: str,
+    regenerate_reason: str = "summary_model_changed",
+    regenerate_vectors_only: bool = False,
+):
+    """
+    Regenerate summary indexes for all documents in a dataset.
+
+    This task is triggered when:
+    1. summary_index_setting model changes (regenerate_reason="summary_model_changed")
+       - Regenerates summary content and vectors for all existing summaries
+    2. embedding_model changes (regenerate_reason="embedding_model_changed")
+       - Only regenerates vectors for existing summaries (keeps summary content)
+
+    Args:
+        dataset_id: Dataset ID
+        regenerate_reason: Reason for regeneration ("summary_model_changed" or "embedding_model_changed")
+        regenerate_vectors_only: If True, only regenerate vectors without regenerating summary content
+    """
+    logger.info(
+        click.style(
+            f"Start regenerate summary index for dataset {dataset_id}, reason: {regenerate_reason}",
+            fg="green",
+        )
+    )
+    start_at = time.perf_counter()
+
+    try:
+        with session_factory.create_session() as session:
+            dataset = session.query(Dataset).filter_by(id=dataset_id).first()
+            if not dataset:
+                logger.error(click.style(f"Dataset not found: {dataset_id}", fg="red"))
+                return
+
+            # Only regenerate summary index for high_quality indexing technique
+            if dataset.indexing_technique != "high_quality":
+                logger.info(
+                    click.style(
+                        f"Skipping summary regeneration for dataset {dataset_id}: "
+                        f"indexing_technique is {dataset.indexing_technique}, not 'high_quality'",
+                        fg="cyan",
+                    )
+                )
+                return
+
+            # Check if summary index is enabled (only for summary_model change)
+            # For embedding_model change, we still re-vectorize existing summaries even if setting is disabled
+            summary_index_setting = dataset.summary_index_setting
+            if not regenerate_vectors_only:
+                # For summary_model change, require summary_index_setting to be enabled
+                if not summary_index_setting or not summary_index_setting.get("enable"):
+                    logger.info(
+                        click.style(
+                            f"Summary index is disabled for dataset {dataset_id}",
+                            fg="cyan",
+                        )
+                    )
+                    return
+
+            total_segments_processed = 0
+            total_segments_failed = 0
+
+            if regenerate_vectors_only:
+                # For embedding_model change: directly query all segments with existing summaries
+                # Don't require document indexing_status == "completed"
+                # Include summaries with status "completed" or "error" (if they have content)
+                segments_with_summaries = (
+                    session.query(DocumentSegment, DocumentSegmentSummary)
+                    .join(
+                        DocumentSegmentSummary,
+                        DocumentSegment.id == DocumentSegmentSummary.chunk_id,
+                    )
+                    .join(
+                        DatasetDocument,
+                        DocumentSegment.document_id == DatasetDocument.id,
+                    )
+                    .where(
+                        DocumentSegment.dataset_id == dataset_id,
+                        DocumentSegment.status == "completed",  # Segment must be completed
+                        DocumentSegment.enabled == True,
+                        DocumentSegmentSummary.dataset_id == dataset_id,
+                        DocumentSegmentSummary.summary_content.isnot(None),  # Must have summary content
+                        # Include completed summaries or error summaries (with content)
+                        or_(
+                            DocumentSegmentSummary.status == "completed",
+                            DocumentSegmentSummary.status == "error",
+                        ),
+                        DatasetDocument.enabled == True,  # Document must be enabled
+                        DatasetDocument.archived == False,  # Document must not be archived
+                        DatasetDocument.doc_form != "qa_model",  # Skip qa_model documents
+                    )
+                    .order_by(DocumentSegment.document_id.asc(), DocumentSegment.position.asc())
+                    .all()
+                )
+
+                if not segments_with_summaries:
+                    logger.info(
+                        click.style(
+                            f"No segments with summaries found for re-vectorization in dataset {dataset_id}",
+                            fg="cyan",
+                        )
+                    )
+                    return
+
+                logger.info(
+                    "Found %s segments with summaries for re-vectorization in dataset %s",
+                    len(segments_with_summaries),
+                    dataset_id,
+                )
+
+                # Group by document for logging
+                segments_by_document = defaultdict(list)
+                for segment, summary_record in segments_with_summaries:
+                    segments_by_document[segment.document_id].append((segment, summary_record))
+
+                logger.info(
+                    "Segments grouped into %s documents for re-vectorization",
+                    len(segments_by_document),
+                )
+
+                for document_id, segment_summary_pairs in segments_by_document.items():
+                    logger.info(
+                        "Re-vectorizing summaries for %s segments in document %s",
+                        len(segment_summary_pairs),
+                        document_id,
+                    )
+
+                    for segment, summary_record in segment_summary_pairs:
+                        try:
+                            # Delete old vector
+                            if summary_record.summary_index_node_id:
+                                try:
+                                    from core.rag.datasource.vdb.vector_factory import Vector
+
+                                    vector = Vector(dataset)
+                                    vector.delete_by_ids([summary_record.summary_index_node_id])
+                                except Exception as e:
+                                    logger.warning(
+                                        "Failed to delete old summary vector for segment %s: %s",
+                                        segment.id,
+                                        str(e),
+                                    )
+
+                            # Re-vectorize with new embedding model
+                            SummaryIndexService.vectorize_summary(summary_record, segment, dataset)
+                            session.commit()
+                            total_segments_processed += 1
+
+                        except Exception as e:
+                            logger.error(
+                                "Failed to re-vectorize summary for segment %s: %s",
+                                segment.id,
+                                str(e),
+                                exc_info=True,
+                            )
+                            total_segments_failed += 1
+                            # Update summary record with error status
+                            summary_record.status = "error"
+                            summary_record.error = f"Re-vectorization failed: {str(e)}"
+                            session.add(summary_record)
+                            session.commit()
+                            continue
+
+            else:
+                # For summary_model change: require document indexing_status == "completed"
+                # Get all documents with completed indexing status
+                dataset_documents = session.scalars(
+                    select(DatasetDocument).where(
+                        DatasetDocument.dataset_id == dataset_id,
+                        DatasetDocument.indexing_status == "completed",
+                        DatasetDocument.enabled == True,
+                        DatasetDocument.archived == False,
+                    )
+                ).all()
+
+                if not dataset_documents:
+                    logger.info(
+                        click.style(
+                            f"No documents found for summary regeneration in dataset {dataset_id}",
+                            fg="cyan",
+                        )
+                    )
+                    return
+
+                logger.info(
+                    "Found %s documents for summary regeneration in dataset %s",
+                    len(dataset_documents),
+                    dataset_id,
+                )
+
+                for dataset_document in dataset_documents:
+                    # Skip qa_model documents
+                    if dataset_document.doc_form == "qa_model":
+                        continue
+
+                    try:
+                        # Get all segments with existing summaries
+                        segments = (
+                            session.query(DocumentSegment)
+                            .join(
+                                DocumentSegmentSummary,
+                                DocumentSegment.id == DocumentSegmentSummary.chunk_id,
+                            )
+                            .where(
+                                DocumentSegment.document_id == dataset_document.id,
+                                DocumentSegment.dataset_id == dataset_id,
+                                DocumentSegment.status == "completed",
+                                DocumentSegment.enabled == True,
+                                DocumentSegmentSummary.dataset_id == dataset_id,
+                            )
+                            .order_by(DocumentSegment.position.asc())
+                            .all()
+                        )
+
+                        if not segments:
+                            continue
+
+                        logger.info(
+                            "Regenerating summaries for %s segments in document %s",
+                            len(segments),
+                            dataset_document.id,
+                        )
+
+                        for segment in segments:
+                            summary_record = None
+                            try:
+                                # Get existing summary record
+                                summary_record = (
+                                    session.query(DocumentSegmentSummary)
+                                    .filter_by(
+                                        chunk_id=segment.id,
+                                        dataset_id=dataset_id,
+                                    )
+                                    .first()
+                                )
+
+                                if not summary_record:
+                                    logger.warning("Summary record not found for segment %s, skipping", segment.id)
+                                    continue
+
+                                # Regenerate both summary content and vectors (for summary_model change)
+                                SummaryIndexService.generate_and_vectorize_summary(
+                                    segment, dataset, summary_index_setting
+                                )
+                                session.commit()
+                                total_segments_processed += 1
+
+                            except Exception as e:
+                                logger.error(
+                                    "Failed to regenerate summary for segment %s: %s",
+                                    segment.id,
+                                    str(e),
+                                    exc_info=True,
+                                )
+                                total_segments_failed += 1
+                                # Update summary record with error status
+                                if summary_record:
+                                    summary_record.status = "error"
+                                    summary_record.error = f"Regeneration failed: {str(e)}"
+                                    session.add(summary_record)
+                                    session.commit()
+                                continue
+
+                    except Exception as e:
+                        logger.error(
+                            "Failed to process document %s for summary regeneration: %s",
+                            dataset_document.id,
+                            str(e),
+                            exc_info=True,
+                        )
+                        continue
+
+            end_at = time.perf_counter()
+            if regenerate_vectors_only:
+                logger.info(
+                    click.style(
+                        f"Summary re-vectorization completed for dataset {dataset_id}: "
+                        f"{total_segments_processed} segments processed successfully, "
+                        f"{total_segments_failed} segments failed, "
+                        f"latency: {end_at - start_at:.2f}s",
+                        fg="green",
+                    )
+                )
+            else:
+                logger.info(
+                    click.style(
+                        f"Summary index regeneration completed for dataset {dataset_id}: "
+                        f"{total_segments_processed} segments processed successfully, "
+                        f"{total_segments_failed} segments failed, "
+                        f"latency: {end_at - start_at:.2f}s",
+                        fg="green",
+                    )
+                )
+
+    except Exception:
+        logger.exception("Regenerate summary index failed for dataset %s", dataset_id)
diff --git a/api/tasks/remove_document_from_index_task.py b/api/tasks/remove_document_from_index_task.py
index c3c255fb17..55259ab527 100644
--- a/api/tasks/remove_document_from_index_task.py
+++ b/api/tasks/remove_document_from_index_task.py
@@ -46,6 +46,21 @@ def remove_document_from_index_task(document_id: str):
             index_processor = IndexProcessorFactory(document.doc_form).init_index_processor()
 
             segments = session.scalars(select(DocumentSegment).where(DocumentSegment.document_id == document.id)).all()
+
+            # Disable summary indexes for all segments in this document
+            from services.summary_index_service import SummaryIndexService
+
+            segment_ids_list = [segment.id for segment in segments]
+            if segment_ids_list:
+                try:
+                    SummaryIndexService.disable_summaries_for_segments(
+                        dataset=dataset,
+                        segment_ids=segment_ids_list,
+                        disabled_by=document.disabled_by,
+                    )
+                except Exception as e:
+                    logger.warning("Failed to disable summaries for document %s: %s", document.id, str(e))
+
             index_node_ids = [segment.index_node_id for segment in segments]
             if index_node_ids:
                 try:
diff --git a/api/tests/unit_tests/services/test_dataset_service_update_dataset.py b/api/tests/unit_tests/services/test_dataset_service_update_dataset.py
index 0aabe2fc30..08818945e3 100644
--- a/api/tests/unit_tests/services/test_dataset_service_update_dataset.py
+++ b/api/tests/unit_tests/services/test_dataset_service_update_dataset.py
@@ -138,6 +138,7 @@ class TestDatasetServiceUpdateDataset:
                 "services.dataset_service.DatasetCollectionBindingService.get_dataset_collection_binding"
             ) as mock_get_binding,
             patch("services.dataset_service.deal_dataset_vector_index_task") as mock_task,
+            patch("services.dataset_service.regenerate_summary_index_task") as mock_regenerate_task,
             patch(
                 "services.dataset_service.current_user", create_autospec(Account, instance=True)
             ) as mock_current_user,
@@ -147,6 +148,7 @@ class TestDatasetServiceUpdateDataset:
                 "model_manager": mock_model_manager,
                 "get_binding": mock_get_binding,
                 "task": mock_task,
+                "regenerate_task": mock_regenerate_task,
                 "current_user": mock_current_user,
             }
 
@@ -549,6 +551,13 @@ class TestDatasetServiceUpdateDataset:
         # Verify vector index task was triggered
         mock_internal_provider_dependencies["task"].delay.assert_called_once_with("dataset-123", "update")
 
+        # Verify regenerate summary index task was triggered (when embedding_model changes)
+        mock_internal_provider_dependencies["regenerate_task"].delay.assert_called_once_with(
+            "dataset-123",
+            regenerate_reason="embedding_model_changed",
+            regenerate_vectors_only=True,
+        )
+
         # Verify return value
         assert result == dataset
 

From dbfc47e8b01919afbb6e83a54fe582483652db51 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=9B=90=E7=B2=92=20Yanli?= <yanli@dify.ai>
Date: Thu, 29 Jan 2026 14:01:21 +0800
Subject: [PATCH 02/15] fix: SSRF in WordExtractor URL download (credit to
 @EaEa0001 ) (#31678)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 api/core/file/file_manager.py                 |  4 ++
 api/core/helper/ssrf_proxy.py                 | 30 ++++++++++-----
 api/core/rag/extractor/word_extractor.py      |  9 +++--
 .../core/rag/extractor/test_word_extractor.py | 38 +++++++++++++++++++
 4 files changed, 68 insertions(+), 13 deletions(-)

diff --git a/api/core/file/file_manager.py b/api/core/file/file_manager.py
index 120fb73cdb..c0fefef3d0 100644
--- a/api/core/file/file_manager.py
+++ b/api/core/file/file_manager.py
@@ -104,6 +104,8 @@ def download(f: File, /):
     ):
         return _download_file_content(f.storage_key)
     elif f.transfer_method == FileTransferMethod.REMOTE_URL:
+        if f.remote_url is None:
+            raise ValueError("Missing file remote_url")
         response = ssrf_proxy.get(f.remote_url, follow_redirects=True)
         response.raise_for_status()
         return response.content
@@ -134,6 +136,8 @@ def _download_file_content(path: str, /):
 def _get_encoded_string(f: File, /):
     match f.transfer_method:
         case FileTransferMethod.REMOTE_URL:
+            if f.remote_url is None:
+                raise ValueError("Missing file remote_url")
             response = ssrf_proxy.get(f.remote_url, follow_redirects=True)
             response.raise_for_status()
             data = response.content
diff --git a/api/core/helper/ssrf_proxy.py b/api/core/helper/ssrf_proxy.py
index 128c64ff2c..ddccfbaf45 100644
--- a/api/core/helper/ssrf_proxy.py
+++ b/api/core/helper/ssrf_proxy.py
@@ -4,8 +4,10 @@ Proxy requests to avoid SSRF
 
 import logging
 import time
+from typing import Any, TypeAlias
 
 import httpx
+from pydantic import TypeAdapter, ValidationError
 
 from configs import dify_config
 from core.helper.http_client_pooling import get_pooled_http_client
@@ -18,6 +20,9 @@ SSRF_DEFAULT_MAX_RETRIES = dify_config.SSRF_DEFAULT_MAX_RETRIES
 BACKOFF_FACTOR = 0.5
 STATUS_FORCELIST = [429, 500, 502, 503, 504]
 
+Headers: TypeAlias = dict[str, str]
+_HEADERS_ADAPTER = TypeAdapter(Headers)
+
 _SSL_VERIFIED_POOL_KEY = "ssrf:verified"
 _SSL_UNVERIFIED_POOL_KEY = "ssrf:unverified"
 _SSRF_CLIENT_LIMITS = httpx.Limits(
@@ -76,7 +81,7 @@ def _get_ssrf_client(ssl_verify_enabled: bool) -> httpx.Client:
     )
 
 
-def _get_user_provided_host_header(headers: dict | None) -> str | None:
+def _get_user_provided_host_header(headers: Headers | None) -> str | None:
     """
     Extract the user-provided Host header from the headers dict.
 
@@ -92,7 +97,7 @@ def _get_user_provided_host_header(headers: dict | None) -> str | None:
     return None
 
 
-def _inject_trace_headers(headers: dict | None) -> dict:
+def _inject_trace_headers(headers: Headers | None) -> Headers:
     """
     Inject W3C traceparent header for distributed tracing.
 
@@ -125,7 +130,7 @@ def _inject_trace_headers(headers: dict | None) -> dict:
     return headers
 
 
-def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
+def make_request(method: str, url: str, max_retries: int = SSRF_DEFAULT_MAX_RETRIES, **kwargs: Any) -> httpx.Response:
     # Convert requests-style allow_redirects to httpx-style follow_redirects
     if "allow_redirects" in kwargs:
         allow_redirects = kwargs.pop("allow_redirects")
@@ -142,10 +147,15 @@ def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
 
     # prioritize per-call option, which can be switched on and off inside the HTTP node on the web UI
     verify_option = kwargs.pop("ssl_verify", dify_config.HTTP_REQUEST_NODE_SSL_VERIFY)
+    if not isinstance(verify_option, bool):
+        raise ValueError("ssl_verify must be a boolean")
     client = _get_ssrf_client(verify_option)
 
     # Inject traceparent header for distributed tracing (when OTEL is not enabled)
-    headers = kwargs.get("headers") or {}
+    try:
+        headers: Headers = _HEADERS_ADAPTER.validate_python(kwargs.get("headers") or {})
+    except ValidationError as e:
+        raise ValueError("headers must be a mapping of string keys to string values") from e
     headers = _inject_trace_headers(headers)
     kwargs["headers"] = headers
 
@@ -198,25 +208,25 @@ def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
     raise MaxRetriesExceededError(f"Reached maximum retries ({max_retries}) for URL {url}")
 
 
-def get(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
+def get(url: str, max_retries: int = SSRF_DEFAULT_MAX_RETRIES, **kwargs: Any) -> httpx.Response:
     return make_request("GET", url, max_retries=max_retries, **kwargs)
 
 
-def post(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
+def post(url: str, max_retries: int = SSRF_DEFAULT_MAX_RETRIES, **kwargs: Any) -> httpx.Response:
     return make_request("POST", url, max_retries=max_retries, **kwargs)
 
 
-def put(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
+def put(url: str, max_retries: int = SSRF_DEFAULT_MAX_RETRIES, **kwargs: Any) -> httpx.Response:
     return make_request("PUT", url, max_retries=max_retries, **kwargs)
 
 
-def patch(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
+def patch(url: str, max_retries: int = SSRF_DEFAULT_MAX_RETRIES, **kwargs: Any) -> httpx.Response:
     return make_request("PATCH", url, max_retries=max_retries, **kwargs)
 
 
-def delete(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
+def delete(url: str, max_retries: int = SSRF_DEFAULT_MAX_RETRIES, **kwargs: Any) -> httpx.Response:
     return make_request("DELETE", url, max_retries=max_retries, **kwargs)
 
 
-def head(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
+def head(url: str, max_retries: int = SSRF_DEFAULT_MAX_RETRIES, **kwargs: Any) -> httpx.Response:
     return make_request("HEAD", url, max_retries=max_retries, **kwargs)
diff --git a/api/core/rag/extractor/word_extractor.py b/api/core/rag/extractor/word_extractor.py
index 511f5a698d..1ddbfc5864 100644
--- a/api/core/rag/extractor/word_extractor.py
+++ b/api/core/rag/extractor/word_extractor.py
@@ -1,4 +1,7 @@
-"""Abstract interface for document loader implementations."""
+"""Word (.docx) document extractor used for RAG ingestion.
+
+Supports local file paths and remote URLs (downloaded via `core.helper.ssrf_proxy`).
+"""
 
 import logging
 import mimetypes
@@ -8,7 +11,6 @@ import tempfile
 import uuid
 from urllib.parse import urlparse
 
-import httpx
 from docx import Document as DocxDocument
 from docx.oxml.ns import qn
 from docx.text.run import Run
@@ -44,7 +46,7 @@ class WordExtractor(BaseExtractor):
 
         # If the file is a web path, download it to a temporary file, and use that
         if not os.path.isfile(self.file_path) and self._is_valid_url(self.file_path):
-            response = httpx.get(self.file_path, timeout=None)
+            response = ssrf_proxy.get(self.file_path)
 
             if response.status_code != 200:
                 response.close()
@@ -55,6 +57,7 @@ class WordExtractor(BaseExtractor):
             self.temp_file = tempfile.NamedTemporaryFile()  # noqa SIM115
             try:
                 self.temp_file.write(response.content)
+                self.temp_file.flush()
             finally:
                 response.close()
             self.file_path = self.temp_file.name
diff --git a/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py b/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py
index f9e59a5f05..0792ada194 100644
--- a/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py
+++ b/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py
@@ -1,7 +1,9 @@
 """Primarily used for testing merged cell scenarios"""
 
+import io
 import os
 import tempfile
+from pathlib import Path
 from types import SimpleNamespace
 
 from docx import Document
@@ -56,6 +58,42 @@ def test_parse_row():
         assert extractor._parse_row(row, {}, 3) == gt[idx]
 
 
+def test_init_downloads_via_ssrf_proxy(monkeypatch):
+    doc = Document()
+    doc.add_paragraph("hello")
+    buf = io.BytesIO()
+    doc.save(buf)
+    docx_bytes = buf.getvalue()
+
+    calls: list[tuple[str, object]] = []
+
+    class FakeResponse:
+        status_code = 200
+        content = docx_bytes
+
+        def close(self) -> None:
+            calls.append(("close", None))
+
+    def fake_get(url: str, **kwargs):
+        calls.append(("get", (url, kwargs)))
+        return FakeResponse()
+
+    monkeypatch.setattr(we, "ssrf_proxy", SimpleNamespace(get=fake_get))
+
+    extractor = WordExtractor("https://example.com/test.docx", "tenant_id", "user_id")
+    try:
+        assert calls
+        assert calls[0][0] == "get"
+        url, kwargs = calls[0][1]
+        assert url == "https://example.com/test.docx"
+        assert kwargs.get("timeout") is None
+        assert extractor.web_path == "https://example.com/test.docx"
+        assert extractor.file_path != extractor.web_path
+        assert Path(extractor.file_path).read_bytes() == docx_bytes
+    finally:
+        extractor.temp_file.close()
+
+
 def test_extract_images_from_docx(monkeypatch):
     external_bytes = b"ext-bytes"
     internal_bytes = b"int-bytes"

From ceb69147931c95ff2df4d31094a3f0dd030cddf2 Mon Sep 17 00:00:00 2001
From: Nie Ronghua <nieronghua@sf-express.com>
Date: Thu, 29 Jan 2026 14:31:15 +0800
Subject: [PATCH 03/15] refactor(model): Refactor plugin model schema cache to
 be process-global to prevent redundant Daemon API calls (#31689)

Signed-off-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: -LAN- <laipz8200@outlook.com>
---
 api/configs/feature/__init__.py               |  5 ++
 api/contexts/__init__.py                      |  7 --
 .../model_providers/__base/ai_model.py        | 78 +++++++++++++------
 .../model_providers/model_provider_factory.py | 73 ++++++++++++-----
 4 files changed, 111 insertions(+), 52 deletions(-)

diff --git a/api/configs/feature/__init__.py b/api/configs/feature/__init__.py
index 786094f295..4343a056dd 100644
--- a/api/configs/feature/__init__.py
+++ b/api/configs/feature/__init__.py
@@ -243,6 +243,11 @@ class PluginConfig(BaseSettings):
         default=15728640 * 12,
     )
 
+    PLUGIN_MODEL_SCHEMA_CACHE_TTL: PositiveInt = Field(
+        description="TTL in seconds for caching plugin model schemas in Redis",
+        default=24 * 60 * 60,
+    )
+
 
 class MarketplaceConfig(BaseSettings):
     """
diff --git a/api/contexts/__init__.py b/api/contexts/__init__.py
index 7c16bc231f..c52dcf8a57 100644
--- a/api/contexts/__init__.py
+++ b/api/contexts/__init__.py
@@ -6,7 +6,6 @@ from contexts.wrapper import RecyclableContextVar
 
 if TYPE_CHECKING:
     from core.datasource.__base.datasource_provider import DatasourcePluginProviderController
-    from core.model_runtime.entities.model_entities import AIModelEntity
     from core.plugin.entities.plugin_daemon import PluginModelProviderEntity
     from core.tools.plugin_tool.provider import PluginToolProviderController
     from core.trigger.provider import PluginTriggerProviderController
@@ -29,12 +28,6 @@ plugin_model_providers_lock: RecyclableContextVar[Lock] = RecyclableContextVar(
     ContextVar("plugin_model_providers_lock")
 )
 
-plugin_model_schema_lock: RecyclableContextVar[Lock] = RecyclableContextVar(ContextVar("plugin_model_schema_lock"))
-
-plugin_model_schemas: RecyclableContextVar[dict[str, "AIModelEntity"]] = RecyclableContextVar(
-    ContextVar("plugin_model_schemas")
-)
-
 datasource_plugin_providers: RecyclableContextVar[dict[str, "DatasourcePluginProviderController"]] = (
     RecyclableContextVar(ContextVar("datasource_plugin_providers"))
 )
diff --git a/api/core/model_runtime/model_providers/__base/ai_model.py b/api/core/model_runtime/model_providers/__base/ai_model.py
index 45f0335c2e..c3e50eaddd 100644
--- a/api/core/model_runtime/model_providers/__base/ai_model.py
+++ b/api/core/model_runtime/model_providers/__base/ai_model.py
@@ -1,10 +1,11 @@
 import decimal
 import hashlib
-from threading import Lock
+import logging
 
-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, ConfigDict, Field, ValidationError
+from redis import RedisError
 
-import contexts
+from configs import dify_config
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.defaults import PARAMETER_RULE_TEMPLATE
 from core.model_runtime.entities.model_entities import (
@@ -24,6 +25,9 @@ from core.model_runtime.errors.invoke import (
     InvokeServerUnavailableError,
 )
 from core.plugin.entities.plugin_daemon import PluginModelProviderEntity
+from extensions.ext_redis import redis_client
+
+logger = logging.getLogger(__name__)
 
 
 class AIModel(BaseModel):
@@ -144,34 +148,60 @@ class AIModel(BaseModel):
 
         plugin_model_manager = PluginModelClient()
         cache_key = f"{self.tenant_id}:{self.plugin_id}:{self.provider_name}:{self.model_type.value}:{model}"
-        # sort credentials
         sorted_credentials = sorted(credentials.items()) if credentials else []
         cache_key += ":".join([hashlib.md5(f"{k}:{v}".encode()).hexdigest() for k, v in sorted_credentials])
 
+        cached_schema_json = None
         try:
-            contexts.plugin_model_schemas.get()
-        except LookupError:
-            contexts.plugin_model_schemas.set({})
-            contexts.plugin_model_schema_lock.set(Lock())
-
-        with contexts.plugin_model_schema_lock.get():
-            if cache_key in contexts.plugin_model_schemas.get():
-                return contexts.plugin_model_schemas.get()[cache_key]
-
-            schema = plugin_model_manager.get_model_schema(
-                tenant_id=self.tenant_id,
-                user_id="unknown",
-                plugin_id=self.plugin_id,
-                provider=self.provider_name,
-                model_type=self.model_type.value,
-                model=model,
-                credentials=credentials or {},
+            cached_schema_json = redis_client.get(cache_key)
+        except (RedisError, RuntimeError) as exc:
+            logger.warning(
+                "Failed to read plugin model schema cache for model %s: %s",
+                model,
+                str(exc),
+                exc_info=True,
             )
+        if cached_schema_json:
+            try:
+                return AIModelEntity.model_validate_json(cached_schema_json)
+            except ValidationError:
+                logger.warning(
+                    "Failed to validate cached plugin model schema for model %s",
+                    model,
+                    exc_info=True,
+                )
+                try:
+                    redis_client.delete(cache_key)
+                except (RedisError, RuntimeError) as exc:
+                    logger.warning(
+                        "Failed to delete invalid plugin model schema cache for model %s: %s",
+                        model,
+                        str(exc),
+                        exc_info=True,
+                    )
 
-            if schema:
-                contexts.plugin_model_schemas.get()[cache_key] = schema
+        schema = plugin_model_manager.get_model_schema(
+            tenant_id=self.tenant_id,
+            user_id="unknown",
+            plugin_id=self.plugin_id,
+            provider=self.provider_name,
+            model_type=self.model_type.value,
+            model=model,
+            credentials=credentials or {},
+        )
 
-            return schema
+        if schema:
+            try:
+                redis_client.setex(cache_key, dify_config.PLUGIN_MODEL_SCHEMA_CACHE_TTL, schema.model_dump_json())
+            except (RedisError, RuntimeError) as exc:
+                logger.warning(
+                    "Failed to write plugin model schema cache for model %s: %s",
+                    model,
+                    str(exc),
+                    exc_info=True,
+                )
+
+        return schema
 
     def get_customizable_model_schema_from_credentials(self, model: str, credentials: dict) -> AIModelEntity | None:
         """
diff --git a/api/core/model_runtime/model_providers/model_provider_factory.py b/api/core/model_runtime/model_providers/model_provider_factory.py
index 28f162a928..64538a6779 100644
--- a/api/core/model_runtime/model_providers/model_provider_factory.py
+++ b/api/core/model_runtime/model_providers/model_provider_factory.py
@@ -5,7 +5,11 @@ import logging
 from collections.abc import Sequence
 from threading import Lock
 
+from pydantic import ValidationError
+from redis import RedisError
+
 import contexts
+from configs import dify_config
 from core.model_runtime.entities.model_entities import AIModelEntity, ModelType
 from core.model_runtime.entities.provider_entities import ProviderConfig, ProviderEntity, SimpleProviderEntity
 from core.model_runtime.model_providers.__base.ai_model import AIModel
@@ -18,6 +22,7 @@ from core.model_runtime.model_providers.__base.tts_model import TTSModel
 from core.model_runtime.schema_validators.model_credential_schema_validator import ModelCredentialSchemaValidator
 from core.model_runtime.schema_validators.provider_credential_schema_validator import ProviderCredentialSchemaValidator
 from core.plugin.entities.plugin_daemon import PluginModelProviderEntity
+from extensions.ext_redis import redis_client
 from models.provider_ids import ModelProviderID
 
 logger = logging.getLogger(__name__)
@@ -175,34 +180,60 @@ class ModelProviderFactory:
         """
         plugin_id, provider_name = self.get_plugin_id_and_provider_name_from_provider(provider)
         cache_key = f"{self.tenant_id}:{plugin_id}:{provider_name}:{model_type.value}:{model}"
-        # sort credentials
         sorted_credentials = sorted(credentials.items()) if credentials else []
         cache_key += ":".join([hashlib.md5(f"{k}:{v}".encode()).hexdigest() for k, v in sorted_credentials])
 
+        cached_schema_json = None
         try:
-            contexts.plugin_model_schemas.get()
-        except LookupError:
-            contexts.plugin_model_schemas.set({})
-            contexts.plugin_model_schema_lock.set(Lock())
-
-        with contexts.plugin_model_schema_lock.get():
-            if cache_key in contexts.plugin_model_schemas.get():
-                return contexts.plugin_model_schemas.get()[cache_key]
-
-            schema = self.plugin_model_manager.get_model_schema(
-                tenant_id=self.tenant_id,
-                user_id="unknown",
-                plugin_id=plugin_id,
-                provider=provider_name,
-                model_type=model_type.value,
-                model=model,
-                credentials=credentials or {},
+            cached_schema_json = redis_client.get(cache_key)
+        except (RedisError, RuntimeError) as exc:
+            logger.warning(
+                "Failed to read plugin model schema cache for model %s: %s",
+                model,
+                str(exc),
+                exc_info=True,
             )
+        if cached_schema_json:
+            try:
+                return AIModelEntity.model_validate_json(cached_schema_json)
+            except ValidationError:
+                logger.warning(
+                    "Failed to validate cached plugin model schema for model %s",
+                    model,
+                    exc_info=True,
+                )
+                try:
+                    redis_client.delete(cache_key)
+                except (RedisError, RuntimeError) as exc:
+                    logger.warning(
+                        "Failed to delete invalid plugin model schema cache for model %s: %s",
+                        model,
+                        str(exc),
+                        exc_info=True,
+                    )
 
-            if schema:
-                contexts.plugin_model_schemas.get()[cache_key] = schema
+        schema = self.plugin_model_manager.get_model_schema(
+            tenant_id=self.tenant_id,
+            user_id="unknown",
+            plugin_id=plugin_id,
+            provider=provider_name,
+            model_type=model_type.value,
+            model=model,
+            credentials=credentials or {},
+        )
 
-            return schema
+        if schema:
+            try:
+                redis_client.setex(cache_key, dify_config.PLUGIN_MODEL_SCHEMA_CACHE_TTL, schema.model_dump_json())
+            except (RedisError, RuntimeError) as exc:
+                logger.warning(
+                    "Failed to write plugin model schema cache for model %s: %s",
+                    model,
+                    str(exc),
+                    exc_info=True,
+                )
+
+        return schema
 
     def get_models(
         self,

From 3bcfb4031ac3a1e97238ab6e5c68a986f73f6a95 Mon Sep 17 00:00:00 2001
From: Asuka Minato <i@asukaminato.eu.org>
Date: Thu, 29 Jan 2026 15:34:14 +0900
Subject: [PATCH 04/15] refactor: ExporleBanner to TypeBase (#31698)

---
 api/controllers/console/admin.py | 14 ++++++--------
 api/models/model.py              | 22 ++++++++++++++--------
 2 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/api/controllers/console/admin.py b/api/controllers/console/admin.py
index e1ee2c24b8..03b602f6e8 100644
--- a/api/controllers/console/admin.py
+++ b/api/controllers/console/admin.py
@@ -243,15 +243,13 @@ class InsertExploreBannerApi(Resource):
     def post(self):
         payload = InsertExploreBannerPayload.model_validate(console_ns.payload)
 
-        content = {
-            "category": payload.category,
-            "title": payload.title,
-            "description": payload.description,
-            "img-src": payload.img_src,
-        }
-
         banner = ExporleBanner(
-            content=content,
+            content={
+                "category": payload.category,
+                "title": payload.title,
+                "description": payload.description,
+                "img-src": payload.img_src,
+            },
             link=payload.link,
             sort=payload.sort,
             language=payload.language,
diff --git a/api/models/model.py b/api/models/model.py
index be0cfd58a7..c1c6e04ce9 100644
--- a/api/models/model.py
+++ b/api/models/model.py
@@ -657,16 +657,22 @@ class AccountTrialAppRecord(Base):
         return user
 
 
-class ExporleBanner(Base):
+class ExporleBanner(TypeBase):
     __tablename__ = "exporle_banners"
     __table_args__ = (sa.PrimaryKeyConstraint("id", name="exporler_banner_pkey"),)
-    id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"))
-    content = mapped_column(sa.JSON, nullable=False)
-    link = mapped_column(String(255), nullable=False)
-    sort = mapped_column(sa.Integer, nullable=False)
-    status = mapped_column(sa.String(255), nullable=False, server_default=sa.text("'enabled'::character varying"))
-    created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp())
-    language = mapped_column(String(255), nullable=False, server_default=sa.text("'en-US'::character varying"))
+    id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"), init=False)
+    content: Mapped[dict[str, Any]] = mapped_column(sa.JSON, nullable=False)
+    link: Mapped[str] = mapped_column(String(255), nullable=False)
+    sort: Mapped[int] = mapped_column(sa.Integer, nullable=False)
+    status: Mapped[str] = mapped_column(
+        sa.String(255), nullable=False, server_default=sa.text("'enabled'::character varying"), default="enabled"
+    )
+    created_at: Mapped[datetime] = mapped_column(
+        sa.DateTime, nullable=False, server_default=func.current_timestamp(), init=False
+    )
+    language: Mapped[str] = mapped_column(
+        String(255), nullable=False, server_default=sa.text("'en-US'::character varying"), default="en-US"
+    )
 
 
 class OAuthProviderApp(TypeBase):

From 0934b89da9401ee3068b7c723b3fc2b24ef38a7d Mon Sep 17 00:00:00 2001
From: -LAN- <laipz8200@outlook.com>
Date: Thu, 29 Jan 2026 15:06:40 +0800
Subject: [PATCH 05/15] chore(import-linter): add a rule to make model_runtime
 isolate (#31706)

---
 api/.importlinter | 52 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/api/.importlinter b/api/.importlinter
index ff0577222e..9dad254560 100644
--- a/api/.importlinter
+++ b/api/.importlinter
@@ -303,6 +303,58 @@ ignore_imports =
     core.workflow.nodes.agent.agent_node -> services
     core.workflow.nodes.tool.tool_node -> services
 
+[importlinter:contract:model-runtime-no-internal-imports]
+name = Model Runtime Internal Imports
+type = forbidden
+source_modules =
+    core.model_runtime
+forbidden_modules =
+    configs
+    controllers
+    extensions
+    models
+    services
+    tasks
+    core.agent
+    core.app
+    core.base
+    core.callback_handler
+    core.datasource
+    core.db
+    core.entities
+    core.errors
+    core.extension
+    core.external_data_tool
+    core.file
+    core.helper
+    core.hosting_configuration
+    core.indexing_runner
+    core.llm_generator
+    core.logging
+    core.mcp
+    core.memory
+    core.model_manager
+    core.moderation
+    core.ops
+    core.plugin
+    core.prompt
+    core.provider_manager
+    core.rag
+    core.repositories
+    core.schemas
+    core.tools
+    core.trigger
+    core.variables
+    core.workflow
+ignore_imports =
+    core.model_runtime.model_providers.__base.ai_model -> configs
+    core.model_runtime.model_providers.__base.ai_model -> extensions.ext_redis
+    core.model_runtime.model_providers.__base.large_language_model -> configs
+    core.model_runtime.model_providers.__base.text_embedding_model -> core.entities.embedding_type
+    core.model_runtime.model_providers.model_provider_factory -> configs
+    core.model_runtime.model_providers.model_provider_factory -> extensions.ext_redis
+    core.model_runtime.model_providers.model_provider_factory -> models.provider_ids
+
 [importlinter:contract:rsc]
 name = RSC
 type = layers

From 4f2cd4049856115dab18464b93601c7aa1ea18b1 Mon Sep 17 00:00:00 2001
From: JQSevenMiao <141806521+JQSevenMiao@users.noreply.github.com>
Date: Thu, 29 Jan 2026 15:37:37 +0800
Subject: [PATCH 06/15] fix: convert HTTP method to lowercase when parsing cURL
 commands (#31704)

Co-authored-by: jiasiqi <jiasiqi3@tal.com>
---
 .../components/workflow/nodes/http/components/curl-panel.tsx    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web/app/components/workflow/nodes/http/components/curl-panel.tsx b/web/app/components/workflow/nodes/http/components/curl-panel.tsx
index aa67a2a0ae..6c809c310f 100644
--- a/web/app/components/workflow/nodes/http/components/curl-panel.tsx
+++ b/web/app/components/workflow/nodes/http/components/curl-panel.tsx
@@ -41,7 +41,7 @@ const parseCurl = (curlCommand: string): { node: HttpNodeType | null, error: str
       case '--request':
         if (i + 1 >= args.length)
           return { node: null, error: 'Missing HTTP method after -X or --request.' }
-        node.method = (args[++i].replace(/^['"]|['"]$/g, '') as Method) || Method.get
+        node.method = (args[++i].replace(/^['"]|['"]$/g, '').toLowerCase() as Method) || Method.get
         hasData = true
         break
       case '-H':

From 74cfe776744c7aa4ca5c820f0a60cb852a650700 Mon Sep 17 00:00:00 2001
From: Seokrin Taron Sung <sungsjade@gmail.com>
Date: Thu, 29 Jan 2026 16:51:51 +0900
Subject: [PATCH 07/15] fix(web): remove unwanted border on sticky elements in
 dark mode (#31699)

---
 web/app/styles/monaco-sticky-fix.css | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/web/app/styles/monaco-sticky-fix.css b/web/app/styles/monaco-sticky-fix.css
index 66bb5921ce..ac928cf246 100644
--- a/web/app/styles/monaco-sticky-fix.css
+++ b/web/app/styles/monaco-sticky-fix.css
@@ -9,8 +9,7 @@ html[data-theme="dark"] .monaco-editor .sticky-line-content:hover {
   background-color: var(--color-components-sticky-header-bg-hover) !important;
 }
 
-/* Fallback: any app sticky header using input-bg variables should use the sticky header bg when sticky */
-html[data-theme="dark"] .sticky, html[data-theme="dark"] .is-sticky {
+/* Monaco editor specific sticky scroll styles in dark mode */
+html[data-theme="dark"] .monaco-editor .sticky-line-root {
   background-color: var(--color-components-sticky-header-bg) !important;
-  border-bottom: 1px solid var(--color-components-sticky-header-border) !important;
 }
\ No newline at end of file

From b9ac7af9c5246f84bf86efd87207ac0e26ae8a9f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=9B=90=E7=B2=92=20Yanli?= <yanli@dify.ai>
Date: Thu, 29 Jan 2026 16:02:49 +0800
Subject: [PATCH 08/15] refactor(web): consolidate download helpers (#31664)

---
 web/app/components/app-sidebar/app-info.tsx   |  8 +-
 .../app-sidebar/dataset-info/dropdown.tsx     |  8 +-
 .../app/annotation/header-opts/index.tsx      | 16 ++--
 .../configuration/config-var/index.spec.tsx   |  6 +-
 web/app/components/apps/app-card.tsx          | 10 +--
 .../file-uploader-in-attachment/file-item.tsx |  4 +-
 .../file-image-item.tsx                       |  4 +-
 .../file-uploader-in-chat-input/file-item.tsx |  4 +-
 .../base/file-uploader/utils.spec.ts          | 72 ------------------
 .../components/base/file-uploader/utils.ts    | 12 ---
 .../base/image-uploader/image-preview.tsx     | 36 +++------
 web/app/components/base/qrcode/index.tsx      | 10 +--
 .../index-failed.spec.tsx                     | 11 ++-
 .../list/template-card/index.spec.tsx         | 13 ++--
 .../list/template-card/index.tsx              |  7 +-
 .../create/website/watercrawl/index.tsx       | 20 ++++-
 .../hooks/use-dataset-card-state.ts           |  8 +-
 .../header/account-dropdown/compliance.tsx    |  5 +-
 .../components/rag-pipeline/hooks/use-DSL.ts  |  8 +-
 .../components/workflow-app/hooks/use-DSL.ts  |  8 +-
 .../market-place-plugin/action.tsx            |  4 +-
 .../workflow/operator/more-actions.tsx        | 23 ++----
 web/eslint-suppressions.json                  |  9 +--
 web/utils/download.spec.ts                    | 75 +++++++++++++++++++
 web/utils/format.spec.ts                      | 45 +----------
 web/utils/format.ts                           | 11 ---
 26 files changed, 167 insertions(+), 270 deletions(-)
 create mode 100644 web/utils/download.spec.ts

diff --git a/web/app/components/app-sidebar/app-info.tsx b/web/app/components/app-sidebar/app-info.tsx
index 255feaccdf..aa31f0201f 100644
--- a/web/app/components/app-sidebar/app-info.tsx
+++ b/web/app/components/app-sidebar/app-info.tsx
@@ -31,6 +31,7 @@ import { fetchWorkflowDraft } from '@/service/workflow'
 import { AppModeEnum } from '@/types/app'
 import { getRedirection } from '@/utils/app-redirection'
 import { cn } from '@/utils/classnames'
+import { downloadBlob } from '@/utils/download'
 import AppIcon from '../base/app-icon'
 import AppOperations from './app-operations'
 
@@ -145,13 +146,8 @@ const AppInfo = ({ expand, onlyShowDetail = false, openState = false, onDetailEx
         appID: appDetail.id,
         include,
       })
-      const a = document.createElement('a')
       const file = new Blob([data], { type: 'application/yaml' })
-      const url = URL.createObjectURL(file)
-      a.href = url
-      a.download = `${appDetail.name}.yml`
-      a.click()
-      URL.revokeObjectURL(url)
+      downloadBlob({ data: file, fileName: `${appDetail.name}.yml` })
     }
     catch {
       notify({ type: 'error', message: t('exportFailed', { ns: 'app' }) })
diff --git a/web/app/components/app-sidebar/dataset-info/dropdown.tsx b/web/app/components/app-sidebar/dataset-info/dropdown.tsx
index 4d7c832e04..96127c4210 100644
--- a/web/app/components/app-sidebar/dataset-info/dropdown.tsx
+++ b/web/app/components/app-sidebar/dataset-info/dropdown.tsx
@@ -11,6 +11,7 @@ import { datasetDetailQueryKeyPrefix, useInvalidDatasetList } from '@/service/kn
 import { useInvalid } from '@/service/use-base'
 import { useExportPipelineDSL } from '@/service/use-pipeline'
 import { cn } from '@/utils/classnames'
+import { downloadBlob } from '@/utils/download'
 import ActionButton from '../../base/action-button'
 import Confirm from '../../base/confirm'
 import { PortalToFollowElem, PortalToFollowElemContent, PortalToFollowElemTrigger } from '../../base/portal-to-follow-elem'
@@ -64,13 +65,8 @@ const DropDown = ({
         pipelineId: pipeline_id,
         include,
       })
-      const a = document.createElement('a')
       const file = new Blob([data], { type: 'application/yaml' })
-      const url = URL.createObjectURL(file)
-      a.href = url
-      a.download = `${name}.pipeline`
-      a.click()
-      URL.revokeObjectURL(url)
+      downloadBlob({ data: file, fileName: `${name}.pipeline` })
     }
     catch {
       Toast.notify({ type: 'error', message: t('exportFailed', { ns: 'app' }) })
diff --git a/web/app/components/app/annotation/header-opts/index.tsx b/web/app/components/app/annotation/header-opts/index.tsx
index 5add1aed32..4fc1e26007 100644
--- a/web/app/components/app/annotation/header-opts/index.tsx
+++ b/web/app/components/app/annotation/header-opts/index.tsx
@@ -21,6 +21,7 @@ import { LanguagesSupported } from '@/i18n-config/language'
 import { clearAllAnnotations, fetchExportAnnotationList } from '@/service/annotation'
 
 import { cn } from '@/utils/classnames'
+import { downloadBlob } from '@/utils/download'
 import Button from '../../../base/button'
 import AddAnnotationModal from '../add-annotation-modal'
 import BatchAddModal from '../batch-add-annotation-modal'
@@ -56,28 +57,23 @@ const HeaderOptions: FC<Props> = ({
   )
 
   const JSONLOutput = () => {
-    const a = document.createElement('a')
     const content = listTransformer(list).join('\n')
     const file = new Blob([content], { type: 'application/jsonl' })
-    const url = URL.createObjectURL(file)
-    a.href = url
-    a.download = `annotations-${locale}.jsonl`
-    a.click()
-    URL.revokeObjectURL(url)
+    downloadBlob({ data: file, fileName: `annotations-${locale}.jsonl` })
   }
 
-  const fetchList = async () => {
+  const fetchList = React.useCallback(async () => {
     const { data }: any = await fetchExportAnnotationList(appId)
     setList(data as AnnotationItemBasic[])
-  }
+  }, [appId])
 
   useEffect(() => {
     fetchList()
-  }, [])
+  }, [fetchList])
   useEffect(() => {
     if (controlUpdateList)
       fetchList()
-  }, [controlUpdateList])
+  }, [controlUpdateList, fetchList])
 
   const [showBulkImportModal, setShowBulkImportModal] = useState(false)
   const [showClearConfirm, setShowClearConfirm] = useState(false)
diff --git a/web/app/components/app/configuration/config-var/index.spec.tsx b/web/app/components/app/configuration/config-var/index.spec.tsx
index b5015ed079..490d7b4410 100644
--- a/web/app/components/app/configuration/config-var/index.spec.tsx
+++ b/web/app/components/app/configuration/config-var/index.spec.tsx
@@ -2,7 +2,7 @@ import type { ReactNode } from 'react'
 import type { IConfigVarProps } from './index'
 import type { ExternalDataTool } from '@/models/common'
 import type { PromptVariable } from '@/models/debug'
-import { act, fireEvent, render, screen } from '@testing-library/react'
+import { act, fireEvent, render, screen, waitFor } from '@testing-library/react'
 import * as React from 'react'
 import { vi } from 'vitest'
 import Toast from '@/app/components/base/toast'
@@ -240,7 +240,9 @@ describe('ConfigVar', () => {
       const saveButton = await screen.findByRole('button', { name: 'common.operation.save' })
       fireEvent.click(saveButton)
 
-      expect(onPromptVariablesChange).toHaveBeenCalledTimes(1)
+      await waitFor(() => {
+        expect(onPromptVariablesChange).toHaveBeenCalledTimes(1)
+      })
     })
 
     it('should show error when variable key is duplicated', async () => {
diff --git a/web/app/components/apps/app-card.tsx b/web/app/components/apps/app-card.tsx
index f1eadb9d05..730a39b68d 100644
--- a/web/app/components/apps/app-card.tsx
+++ b/web/app/components/apps/app-card.tsx
@@ -33,6 +33,7 @@ import { fetchWorkflowDraft } from '@/service/workflow'
 import { AppModeEnum } from '@/types/app'
 import { getRedirection } from '@/utils/app-redirection'
 import { cn } from '@/utils/classnames'
+import { downloadBlob } from '@/utils/download'
 import { formatTime } from '@/utils/time'
 import { basePath } from '@/utils/var'
 
@@ -161,13 +162,8 @@ const AppCard = ({ app, onRefresh }: AppCardProps) => {
         appID: app.id,
         include,
       })
-      const a = document.createElement('a')
       const file = new Blob([data], { type: 'application/yaml' })
-      const url = URL.createObjectURL(file)
-      a.href = url
-      a.download = `${app.name}.yml`
-      a.click()
-      URL.revokeObjectURL(url)
+      downloadBlob({ data: file, fileName: `${app.name}.yml` })
     }
     catch {
       notify({ type: 'error', message: t('exportFailed', { ns: 'app' }) })
@@ -346,7 +342,7 @@ const AppCard = ({ app, onRefresh }: AppCardProps) => {
       dateFormat: `${t('segment.dateTimeFormat', { ns: 'datasetDocuments' })}`,
     })
     return `${t('segment.editedAt', { ns: 'datasetDocuments' })} ${timeText}`
-  }, [app.updated_at, app.created_at])
+  }, [app.updated_at, app.created_at, t])
 
   return (
     <>
diff --git a/web/app/components/base/file-uploader/file-uploader-in-attachment/file-item.tsx b/web/app/components/base/file-uploader/file-uploader-in-attachment/file-item.tsx
index 6ef5bcb308..f8015aa7c7 100644
--- a/web/app/components/base/file-uploader/file-uploader-in-attachment/file-item.tsx
+++ b/web/app/components/base/file-uploader/file-uploader-in-attachment/file-item.tsx
@@ -15,11 +15,11 @@ import ImagePreview from '@/app/components/base/image-uploader/image-preview'
 import ProgressCircle from '@/app/components/base/progress-bar/progress-circle'
 import { SupportUploadFileTypes } from '@/app/components/workflow/types'
 import { cn } from '@/utils/classnames'
+import { downloadUrl } from '@/utils/download'
 import { formatFileSize } from '@/utils/format'
 import FileImageRender from '../file-image-render'
 import FileTypeIcon from '../file-type-icon'
 import {
-  downloadFile,
   fileIsUploaded,
   getFileAppearanceType,
   getFileExtension,
@@ -140,7 +140,7 @@ const FileInAttachmentItem = ({
             showDownloadAction && (
               <ActionButton onClick={(e) => {
                 e.stopPropagation()
-                downloadFile(url || base64Url || '', name)
+                downloadUrl({ url: url || base64Url || '', fileName: name, target: '_blank' })
               }}
               >
                 <RiDownloadLine className="h-4 w-4" />
diff --git a/web/app/components/base/file-uploader/file-uploader-in-chat-input/file-image-item.tsx b/web/app/components/base/file-uploader/file-uploader-in-chat-input/file-image-item.tsx
index 77dc3e35b8..d9118aac4f 100644
--- a/web/app/components/base/file-uploader/file-uploader-in-chat-input/file-image-item.tsx
+++ b/web/app/components/base/file-uploader/file-uploader-in-chat-input/file-image-item.tsx
@@ -8,9 +8,9 @@ import Button from '@/app/components/base/button'
 import { ReplayLine } from '@/app/components/base/icons/src/vender/other'
 import ImagePreview from '@/app/components/base/image-uploader/image-preview'
 import ProgressCircle from '@/app/components/base/progress-bar/progress-circle'
+import { downloadUrl } from '@/utils/download'
 import FileImageRender from '../file-image-render'
 import {
-  downloadFile,
   fileIsUploaded,
 } from '../utils'
 
@@ -85,7 +85,7 @@ const FileImageItem = ({
                 className="absolute bottom-0.5 right-0.5  flex h-6 w-6 items-center justify-center rounded-lg bg-components-actionbar-bg shadow-md"
                 onClick={(e) => {
                   e.stopPropagation()
-                  downloadFile(download_url || '', name)
+                  downloadUrl({ url: download_url || '', fileName: name, target: '_blank' })
                 }}
               >
                 <RiDownloadLine className="h-4 w-4 text-text-tertiary" />
diff --git a/web/app/components/base/file-uploader/file-uploader-in-chat-input/file-item.tsx b/web/app/components/base/file-uploader/file-uploader-in-chat-input/file-item.tsx
index 828864239a..af32f917b9 100644
--- a/web/app/components/base/file-uploader/file-uploader-in-chat-input/file-item.tsx
+++ b/web/app/components/base/file-uploader/file-uploader-in-chat-input/file-item.tsx
@@ -12,10 +12,10 @@ import VideoPreview from '@/app/components/base/file-uploader/video-preview'
 import { ReplayLine } from '@/app/components/base/icons/src/vender/other'
 import ProgressCircle from '@/app/components/base/progress-bar/progress-circle'
 import { cn } from '@/utils/classnames'
+import { downloadUrl } from '@/utils/download'
 import { formatFileSize } from '@/utils/format'
 import FileTypeIcon from '../file-type-icon'
 import {
-  downloadFile,
   fileIsUploaded,
   getFileAppearanceType,
   getFileExtension,
@@ -100,7 +100,7 @@ const FileItem = ({
                 className="absolute -right-1 -top-1 hidden group-hover/file-item:flex"
                 onClick={(e) => {
                   e.stopPropagation()
-                  downloadFile(download_url || '', name)
+                  downloadUrl({ url: download_url || '', fileName: name, target: '_blank' })
                 }}
               >
                 <RiDownloadLine className="h-3.5 w-3.5 text-text-tertiary" />
diff --git a/web/app/components/base/file-uploader/utils.spec.ts b/web/app/components/base/file-uploader/utils.spec.ts
index de167a8c25..f69b3c27f5 100644
--- a/web/app/components/base/file-uploader/utils.spec.ts
+++ b/web/app/components/base/file-uploader/utils.spec.ts
@@ -1,4 +1,3 @@
-import type { MockInstance } from 'vitest'
 import mime from 'mime'
 import { SupportUploadFileTypes } from '@/app/components/workflow/types'
 import { upload } from '@/service/base'
@@ -6,7 +5,6 @@ import { TransferMethod } from '@/types/app'
 import { FILE_EXTS } from '../prompt-editor/constants'
 import { FileAppearanceTypeEnum } from './types'
 import {
-  downloadFile,
   fileIsUploaded,
   fileUpload,
   getFileAppearanceType,
@@ -782,74 +780,4 @@ describe('file-uploader utils', () => {
       } as any)).toBe(true)
     })
   })
-
-  describe('downloadFile', () => {
-    let mockAnchor: HTMLAnchorElement
-    let createElementMock: MockInstance
-    let appendChildMock: MockInstance
-    let removeChildMock: MockInstance
-
-    beforeEach(() => {
-      // Mock createElement and appendChild
-      mockAnchor = {
-        href: '',
-        download: '',
-        style: { display: '' },
-        target: '',
-        title: '',
-        click: vi.fn(),
-      } as unknown as HTMLAnchorElement
-
-      createElementMock = vi.spyOn(document, 'createElement').mockReturnValue(mockAnchor as any)
-      appendChildMock = vi.spyOn(document.body, 'appendChild').mockImplementation((node: Node) => {
-        return node
-      })
-      removeChildMock = vi.spyOn(document.body, 'removeChild').mockImplementation((node: Node) => {
-        return node
-      })
-    })
-
-    afterEach(() => {
-      vi.resetAllMocks()
-    })
-
-    it('should create and trigger download with correct attributes', () => {
-      const url = 'https://example.com/test.pdf'
-      const filename = 'test.pdf'
-
-      downloadFile(url, filename)
-
-      // Verify anchor element was created with correct properties
-      expect(createElementMock).toHaveBeenCalledWith('a')
-      expect(mockAnchor.href).toBe(url)
-      expect(mockAnchor.download).toBe(filename)
-      expect(mockAnchor.style.display).toBe('none')
-      expect(mockAnchor.target).toBe('_blank')
-      expect(mockAnchor.title).toBe(filename)
-
-      // Verify DOM operations
-      expect(appendChildMock).toHaveBeenCalledWith(mockAnchor)
-      expect(mockAnchor.click).toHaveBeenCalled()
-      expect(removeChildMock).toHaveBeenCalledWith(mockAnchor)
-    })
-
-    it('should handle empty filename', () => {
-      const url = 'https://example.com/test.pdf'
-      const filename = ''
-
-      downloadFile(url, filename)
-
-      expect(mockAnchor.download).toBe('')
-      expect(mockAnchor.title).toBe('')
-    })
-
-    it('should handle empty url', () => {
-      const url = ''
-      const filename = 'test.pdf'
-
-      downloadFile(url, filename)
-
-      expect(mockAnchor.href).toBe('')
-    })
-  })
 })
diff --git a/web/app/components/base/file-uploader/utils.ts b/web/app/components/base/file-uploader/utils.ts
index 5d5754b8fe..23e460db51 100644
--- a/web/app/components/base/file-uploader/utils.ts
+++ b/web/app/components/base/file-uploader/utils.ts
@@ -249,15 +249,3 @@ export const fileIsUploaded = (file: FileEntity) => {
   if (file.transferMethod === TransferMethod.remote_url && file.progress === 100)
     return true
 }
-
-export const downloadFile = (url: string, filename: string) => {
-  const anchor = document.createElement('a')
-  anchor.href = url
-  anchor.download = filename
-  anchor.style.display = 'none'
-  anchor.target = '_blank'
-  anchor.title = filename
-  document.body.appendChild(anchor)
-  anchor.click()
-  document.body.removeChild(anchor)
-}
diff --git a/web/app/components/base/image-uploader/image-preview.tsx b/web/app/components/base/image-uploader/image-preview.tsx
index b6a07c60aa..0641af3d79 100644
--- a/web/app/components/base/image-uploader/image-preview.tsx
+++ b/web/app/components/base/image-uploader/image-preview.tsx
@@ -8,6 +8,7 @@ import { createPortal } from 'react-dom'
 import { useHotkeys } from 'react-hotkeys-hook'
 import Toast from '@/app/components/base/toast'
 import Tooltip from '@/app/components/base/tooltip'
+import { downloadUrl } from '@/utils/download'
 
 type ImagePreviewProps = {
   url: string
@@ -60,27 +61,14 @@ const ImagePreview: FC<ImagePreviewProps> = ({
 
   const downloadImage = () => {
     // Open in a new window, considering the case when the page is inside an iframe
-    if (url.startsWith('http') || url.startsWith('https')) {
-      const a = document.createElement('a')
-      a.href = url
-      a.target = '_blank'
-      a.download = title
-      a.click()
-    }
-    else if (url.startsWith('data:image')) {
-      // Base64 image
-      const a = document.createElement('a')
-      a.href = url
-      a.target = '_blank'
-      a.download = title
-      a.click()
-    }
-    else {
-      Toast.notify({
-        type: 'error',
-        message: `Unable to open image: ${url}`,
-      })
+    if (url.startsWith('http') || url.startsWith('https') || url.startsWith('data:image')) {
+      downloadUrl({ url, fileName: title, target: '_blank' })
+      return
     }
+    Toast.notify({
+      type: 'error',
+      message: `Unable to open image: ${url}`,
+    })
   }
 
   const zoomIn = () => {
@@ -135,12 +123,7 @@ const ImagePreview: FC<ImagePreviewProps> = ({
       catch (err) {
         console.error('Failed to copy image:', err)
 
-        const link = document.createElement('a')
-        link.href = url
-        link.download = `${title}.png`
-        document.body.appendChild(link)
-        link.click()
-        document.body.removeChild(link)
+        downloadUrl({ url, fileName: `${title}.png` })
 
         Toast.notify({
           type: 'info',
@@ -215,6 +198,7 @@ const ImagePreview: FC<ImagePreviewProps> = ({
       tabIndex={-1}
     >
       { }
+      {/* eslint-disable-next-line next/no-img-element */}
       <img
         ref={imgRef}
         alt={title}
diff --git a/web/app/components/base/qrcode/index.tsx b/web/app/components/base/qrcode/index.tsx
index f69305ef6d..dd0eebdac6 100644
--- a/web/app/components/base/qrcode/index.tsx
+++ b/web/app/components/base/qrcode/index.tsx
@@ -8,6 +8,7 @@ import { useEffect, useRef, useState } from 'react'
 import { useTranslation } from 'react-i18next'
 import ActionButton from '@/app/components/base/action-button'
 import Tooltip from '@/app/components/base/tooltip'
+import { downloadUrl } from '@/utils/download'
 
 type Props = {
   content: string
@@ -40,11 +41,10 @@ const ShareQRCode = ({ content }: Props) => {
   }, [isShow])
 
   const downloadQR = () => {
-    const canvas = document.getElementsByTagName('canvas')[0]
-    const link = document.createElement('a')
-    link.download = 'qrcode.png'
-    link.href = canvas.toDataURL()
-    link.click()
+    const canvas = qrCodeRef.current?.querySelector('canvas')
+    if (!(canvas instanceof HTMLCanvasElement))
+      return
+    downloadUrl({ url: canvas.toDataURL(), fileName: 'qrcode.png' })
   }
 
   const handlePanelClick = (event: React.MouseEvent) => {
diff --git a/web/app/components/datasets/common/document-status-with-action/index-failed.spec.tsx b/web/app/components/datasets/common/document-status-with-action/index-failed.spec.tsx
index 43255ce908..27070aaaed 100644
--- a/web/app/components/datasets/common/document-status-with-action/index-failed.spec.tsx
+++ b/web/app/components/datasets/common/document-status-with-action/index-failed.spec.tsx
@@ -179,8 +179,10 @@ describe('RetryButton (IndexFailed)', () => {
         }, false),
       )
 
-      // Delay the response to test loading state
-      mockRetryErrorDocs.mockImplementation(() => new Promise(resolve => setTimeout(() => resolve({ result: 'success' }), 100)))
+      let resolveRetry: ((value: { result: 'success' }) => void) | undefined
+      mockRetryErrorDocs.mockImplementation(() => new Promise((resolve) => {
+        resolveRetry = resolve
+      }))
 
       render(<RetryButton datasetId="test-dataset" />)
 
@@ -193,6 +195,11 @@ describe('RetryButton (IndexFailed)', () => {
         expect(button).toHaveClass('cursor-not-allowed')
         expect(button).toHaveClass('text-text-disabled')
       })
+
+      resolveRetry?.({ result: 'success' })
+      await waitFor(() => {
+        expect(mockRefetch).toHaveBeenCalled()
+      })
     })
   })
 
diff --git a/web/app/components/datasets/create-from-pipeline/list/template-card/index.spec.tsx b/web/app/components/datasets/create-from-pipeline/list/template-card/index.spec.tsx
index 290f7af99b..036370abd3 100644
--- a/web/app/components/datasets/create-from-pipeline/list/template-card/index.spec.tsx
+++ b/web/app/components/datasets/create-from-pipeline/list/template-card/index.spec.tsx
@@ -23,9 +23,10 @@ vi.mock('@/app/components/base/toast', () => ({
   },
 }))
 
-// Mock downloadFile utility
-vi.mock('@/utils/format', () => ({
-  downloadFile: vi.fn(),
+// Mock download utilities
+vi.mock('@/utils/download', () => ({
+  downloadBlob: vi.fn(),
+  downloadUrl: vi.fn(),
 }))
 
 // Capture Confirm callbacks
@@ -502,8 +503,8 @@ describe('TemplateCard', () => {
       })
     })
 
-    it('should call downloadFile on successful export', async () => {
-      const { downloadFile } = await import('@/utils/format')
+    it('should call downloadBlob on successful export', async () => {
+      const { downloadBlob } = await import('@/utils/download')
       mockExportPipelineDSL.mockImplementation((_id, callbacks) => {
         callbacks.onSuccess({ data: 'yaml_content' })
         return Promise.resolve()
@@ -514,7 +515,7 @@ describe('TemplateCard', () => {
       fireEvent.click(exportButton)
 
       await waitFor(() => {
-        expect(downloadFile).toHaveBeenCalledWith(expect.objectContaining({
+        expect(downloadBlob).toHaveBeenCalledWith(expect.objectContaining({
           fileName: 'Test Pipeline.pipeline',
         }))
       })
diff --git a/web/app/components/datasets/create-from-pipeline/list/template-card/index.tsx b/web/app/components/datasets/create-from-pipeline/list/template-card/index.tsx
index 662ca72080..b3395a83d5 100644
--- a/web/app/components/datasets/create-from-pipeline/list/template-card/index.tsx
+++ b/web/app/components/datasets/create-from-pipeline/list/template-card/index.tsx
@@ -16,7 +16,7 @@ import {
   useInvalidCustomizedTemplateList,
   usePipelineTemplateById,
 } from '@/service/use-pipeline'
-import { downloadFile } from '@/utils/format'
+import { downloadBlob } from '@/utils/download'
 import Actions from './actions'
 import Content from './content'
 import Details from './details'
@@ -108,10 +108,7 @@ const TemplateCard = ({
     await exportPipelineDSL(pipeline.id, {
       onSuccess: (res) => {
         const blob = new Blob([res.data], { type: 'application/yaml' })
-        downloadFile({
-          data: blob,
-          fileName: `${pipeline.name}.pipeline`,
-        })
+        downloadBlob({ data: blob, fileName: `${pipeline.name}.pipeline` })
         Toast.notify({
           type: 'success',
           message: t('exportDSL.successTip', { ns: 'datasetPipeline' }),
diff --git a/web/app/components/datasets/create/website/watercrawl/index.tsx b/web/app/components/datasets/create/website/watercrawl/index.tsx
index 0df2dbe8a1..e68a89ae5a 100644
--- a/web/app/components/datasets/create/website/watercrawl/index.tsx
+++ b/web/app/components/datasets/create/website/watercrawl/index.tsx
@@ -125,11 +125,25 @@ const WaterCrawl: FC<Props> = ({
       await sleep(2500)
       return await waitForCrawlFinished(jobId)
     }
-    catch (e: any) {
-      const errorBody = await e.json()
+    catch (error: unknown) {
+      let errorMessage = ''
+
+      const maybeErrorWithJson = error as { json?: () => Promise<unknown>, message?: unknown } | null
+      if (maybeErrorWithJson?.json) {
+        try {
+          const errorBody = await maybeErrorWithJson.json() as { message?: unknown } | null
+          if (typeof errorBody?.message === 'string')
+            errorMessage = errorBody.message
+        }
+        catch {}
+      }
+
+      if (!errorMessage && typeof maybeErrorWithJson?.message === 'string')
+        errorMessage = maybeErrorWithJson.message
+
       return {
         isError: true,
-        errorMessage: errorBody.message,
+        errorMessage,
         data: {
           data: [],
         },
diff --git a/web/app/components/datasets/list/dataset-card/hooks/use-dataset-card-state.ts b/web/app/components/datasets/list/dataset-card/hooks/use-dataset-card-state.ts
index ad68a1df1c..4bd8357f1c 100644
--- a/web/app/components/datasets/list/dataset-card/hooks/use-dataset-card-state.ts
+++ b/web/app/components/datasets/list/dataset-card/hooks/use-dataset-card-state.ts
@@ -5,6 +5,7 @@ import { useTranslation } from 'react-i18next'
 import Toast from '@/app/components/base/toast'
 import { useCheckDatasetUsage, useDeleteDataset } from '@/service/use-dataset-card'
 import { useExportPipelineDSL } from '@/service/use-pipeline'
+import { downloadBlob } from '@/utils/download'
 
 type ModalState = {
   showRenameModal: boolean
@@ -65,13 +66,8 @@ export const useDatasetCardState = ({ dataset, onSuccess }: UseDatasetCardStateO
         pipelineId: pipeline_id,
         include,
       })
-      const a = document.createElement('a')
       const file = new Blob([data], { type: 'application/yaml' })
-      const url = URL.createObjectURL(file)
-      a.href = url
-      a.download = `${name}.pipeline`
-      a.click()
-      URL.revokeObjectURL(url)
+      downloadBlob({ data: file, fileName: `${name}.pipeline` })
     }
     catch {
       Toast.notify({ type: 'error', message: t('exportFailed', { ns: 'app' }) })
diff --git a/web/app/components/header/account-dropdown/compliance.tsx b/web/app/components/header/account-dropdown/compliance.tsx
index 562914dd07..6bc5b5c3f1 100644
--- a/web/app/components/header/account-dropdown/compliance.tsx
+++ b/web/app/components/header/account-dropdown/compliance.tsx
@@ -10,6 +10,7 @@ import { useModalContext } from '@/context/modal-context'
 import { useProviderContext } from '@/context/provider-context'
 import { getDocDownloadUrl } from '@/service/common'
 import { cn } from '@/utils/classnames'
+import { downloadUrl } from '@/utils/download'
 import Button from '../../base/button'
 import Gdpr from '../../base/icons/src/public/common/Gdpr'
 import Iso from '../../base/icons/src/public/common/Iso'
@@ -47,9 +48,7 @@ const UpgradeOrDownload: FC<UpgradeOrDownloadProps> = ({ doc_name }) => {
     mutationFn: async () => {
       try {
         const ret = await getDocDownloadUrl(doc_name)
-        const a = document.createElement('a')
-        a.href = ret.url
-        a.click()
+        downloadUrl({ url: ret.url })
         Toast.notify({
           type: 'success',
           message: t('operation.downloadSuccess', { ns: 'common' }),
diff --git a/web/app/components/rag-pipeline/hooks/use-DSL.ts b/web/app/components/rag-pipeline/hooks/use-DSL.ts
index 1660d555eb..5c0f9def1c 100644
--- a/web/app/components/rag-pipeline/hooks/use-DSL.ts
+++ b/web/app/components/rag-pipeline/hooks/use-DSL.ts
@@ -11,6 +11,7 @@ import { useWorkflowStore } from '@/app/components/workflow/store'
 import { useEventEmitterContextContext } from '@/context/event-emitter'
 import { useExportPipelineDSL } from '@/service/use-pipeline'
 import { fetchWorkflowDraft } from '@/service/workflow'
+import { downloadBlob } from '@/utils/download'
 import { useNodesSyncDraft } from './use-nodes-sync-draft'
 
 export const useDSL = () => {
@@ -37,13 +38,8 @@ export const useDSL = () => {
         pipelineId,
         include,
       })
-      const a = document.createElement('a')
       const file = new Blob([data], { type: 'application/yaml' })
-      const url = URL.createObjectURL(file)
-      a.href = url
-      a.download = `${knowledgeName}.pipeline`
-      a.click()
-      URL.revokeObjectURL(url)
+      downloadBlob({ data: file, fileName: `${knowledgeName}.pipeline` })
     }
     catch {
       notify({ type: 'error', message: t('exportFailed', { ns: 'app' }) })
diff --git a/web/app/components/workflow-app/hooks/use-DSL.ts b/web/app/components/workflow-app/hooks/use-DSL.ts
index 6c01509bc5..939e43b554 100644
--- a/web/app/components/workflow-app/hooks/use-DSL.ts
+++ b/web/app/components/workflow-app/hooks/use-DSL.ts
@@ -11,6 +11,7 @@ import {
 import { useEventEmitterContextContext } from '@/context/event-emitter'
 import { exportAppConfig } from '@/service/apps'
 import { fetchWorkflowDraft } from '@/service/workflow'
+import { downloadBlob } from '@/utils/download'
 import { useNodesSyncDraft } from './use-nodes-sync-draft'
 
 export const useDSL = () => {
@@ -37,13 +38,8 @@ export const useDSL = () => {
         include,
         workflowID: workflowId,
       })
-      const a = document.createElement('a')
       const file = new Blob([data], { type: 'application/yaml' })
-      const url = URL.createObjectURL(file)
-      a.href = url
-      a.download = `${appDetail.name}.yml`
-      a.click()
-      URL.revokeObjectURL(url)
+      downloadBlob({ data: file, fileName: `${appDetail.name}.yml` })
     }
     catch {
       notify({ type: 'error', message: t('exportFailed', { ns: 'app' }) })
diff --git a/web/app/components/workflow/block-selector/market-place-plugin/action.tsx b/web/app/components/workflow/block-selector/market-place-plugin/action.tsx
index b8300d6f2b..abdbae1b4c 100644
--- a/web/app/components/workflow/block-selector/market-place-plugin/action.tsx
+++ b/web/app/components/workflow/block-selector/market-place-plugin/action.tsx
@@ -15,7 +15,7 @@ import {
 } from '@/app/components/base/portal-to-follow-elem'
 import { useDownloadPlugin } from '@/service/use-plugins'
 import { cn } from '@/utils/classnames'
-import { downloadFile } from '@/utils/format'
+import { downloadBlob } from '@/utils/download'
 import { getMarketplaceUrl } from '@/utils/var'
 
 type Props = {
@@ -67,7 +67,7 @@ const OperationDropdown: FC<Props> = ({
     if (!needDownload || !blob)
       return
     const fileName = `${author}-${name}_${version}.zip`
-    downloadFile({ data: blob, fileName })
+    downloadBlob({ data: blob, fileName })
     setNeedDownload(false)
     queryClient.removeQueries({
       queryKey: ['plugins', 'downloadPlugin', downloadInfo],
diff --git a/web/app/components/workflow/operator/more-actions.tsx b/web/app/components/workflow/operator/more-actions.tsx
index e9fc1ea87d..7e6617e84b 100644
--- a/web/app/components/workflow/operator/more-actions.tsx
+++ b/web/app/components/workflow/operator/more-actions.tsx
@@ -19,6 +19,7 @@ import {
 } from '@/app/components/base/portal-to-follow-elem'
 import { useStore } from '@/app/components/workflow/store'
 import { cn } from '@/utils/classnames'
+import { downloadUrl } from '@/utils/download'
 import { useNodesReadOnly } from '../hooks'
 import TipPopup from './tip-popup'
 
@@ -146,26 +147,14 @@ const MoreActions: FC = () => {
         }
       }
 
+      const fileName = `${filename}.${type}`
+
       if (currentWorkflow) {
         setPreviewUrl(dataUrl)
-        setPreviewTitle(`${filename}.${type}`)
+        setPreviewTitle(fileName)
+      }
 
-        const link = document.createElement('a')
-        link.href = dataUrl
-        link.download = `${filename}.${type}`
-        document.body.appendChild(link)
-        link.click()
-        document.body.removeChild(link)
-      }
-      else {
-        // For current view, just download
-        const link = document.createElement('a')
-        link.href = dataUrl
-        link.download = `${filename}.${type}`
-        document.body.appendChild(link)
-        link.click()
-        document.body.removeChild(link)
-      }
+      downloadUrl({ url: dataUrl, fileName })
     }
     catch (error) {
       console.error('Export image failed:', error)
diff --git a/web/eslint-suppressions.json b/web/eslint-suppressions.json
index abee200f66..6193a8ad4e 100644
--- a/web/eslint-suppressions.json
+++ b/web/eslint-suppressions.json
@@ -994,7 +994,7 @@
       "count": 1
     },
     "ts/no-explicit-any": {
-      "count": 3
+      "count": 2
     }
   },
   "app/components/base/file-uploader/utils.ts": {
@@ -1661,7 +1661,7 @@
       "count": 1
     },
     "ts/no-explicit-any": {
-      "count": 5
+      "count": 4
     }
   },
   "app/components/datasets/create/website/watercrawl/options.tsx": {
@@ -4376,11 +4376,6 @@
       "count": 1
     }
   },
-  "utils/format.spec.ts": {
-    "ts/no-explicit-any": {
-      "count": 1
-    }
-  },
   "utils/get-icon.spec.ts": {
     "ts/no-explicit-any": {
       "count": 2
diff --git a/web/utils/download.spec.ts b/web/utils/download.spec.ts
new file mode 100644
index 0000000000..ff41ddfff7
--- /dev/null
+++ b/web/utils/download.spec.ts
@@ -0,0 +1,75 @@
+import { downloadBlob, downloadUrl } from './download'
+
+describe('downloadUrl', () => {
+  let mockAnchor: HTMLAnchorElement
+
+  beforeEach(() => {
+    mockAnchor = {
+      href: '',
+      download: '',
+      rel: '',
+      target: '',
+      style: { display: '' },
+      click: vi.fn(),
+      remove: vi.fn(),
+    } as unknown as HTMLAnchorElement
+
+    vi.spyOn(document, 'createElement').mockReturnValue(mockAnchor)
+    vi.spyOn(document.body, 'appendChild').mockImplementation((node: Node) => node)
+  })
+
+  afterEach(() => {
+    vi.restoreAllMocks()
+  })
+
+  it('should create a link and trigger a download correctly', () => {
+    downloadUrl({ url: 'https://example.com/file.txt', fileName: 'file.txt', target: '_blank' })
+
+    expect(mockAnchor.href).toBe('https://example.com/file.txt')
+    expect(mockAnchor.download).toBe('file.txt')
+    expect(mockAnchor.rel).toBe('noopener noreferrer')
+    expect(mockAnchor.target).toBe('_blank')
+    expect(mockAnchor.style.display).toBe('none')
+    expect(mockAnchor.click).toHaveBeenCalled()
+    expect(mockAnchor.remove).toHaveBeenCalled()
+  })
+
+  it('should skip when url is empty', () => {
+    downloadUrl({ url: '' })
+    expect(document.createElement).not.toHaveBeenCalled()
+  })
+})
+
+describe('downloadBlob', () => {
+  it('should create a blob url, trigger download, and revoke url', () => {
+    const blob = new Blob(['test'], { type: 'text/plain' })
+    const mockUrl = 'blob:mock-url'
+    const createObjectURLMock = vi.spyOn(window.URL, 'createObjectURL').mockReturnValue(mockUrl)
+    const revokeObjectURLMock = vi.spyOn(window.URL, 'revokeObjectURL').mockImplementation(() => {})
+
+    const mockAnchor = {
+      href: '',
+      download: '',
+      rel: '',
+      target: '',
+      style: { display: '' },
+      click: vi.fn(),
+      remove: vi.fn(),
+    } as unknown as HTMLAnchorElement
+
+    vi.spyOn(document, 'createElement').mockReturnValue(mockAnchor)
+    vi.spyOn(document.body, 'appendChild').mockImplementation((node: Node) => node)
+
+    downloadBlob({ data: blob, fileName: 'file.txt' })
+
+    expect(createObjectURLMock).toHaveBeenCalledWith(blob)
+    expect(mockAnchor.href).toBe(mockUrl)
+    expect(mockAnchor.download).toBe('file.txt')
+    expect(mockAnchor.rel).toBe('noopener noreferrer')
+    expect(mockAnchor.click).toHaveBeenCalled()
+    expect(mockAnchor.remove).toHaveBeenCalled()
+    expect(revokeObjectURLMock).toHaveBeenCalledWith(mockUrl)
+
+    vi.restoreAllMocks()
+  })
+})
diff --git a/web/utils/format.spec.ts b/web/utils/format.spec.ts
index 3a1709dbdc..2796854e34 100644
--- a/web/utils/format.spec.ts
+++ b/web/utils/format.spec.ts
@@ -1,4 +1,4 @@
-import { downloadFile, formatFileSize, formatNumber, formatNumberAbbreviated, formatTime } from './format'
+import { formatFileSize, formatNumber, formatNumberAbbreviated, formatTime } from './format'
 
 describe('formatNumber', () => {
   it('should correctly format integers', () => {
@@ -82,49 +82,6 @@ describe('formatTime', () => {
     expect(formatTime(7200)).toBe('2.00 h')
   })
 })
-describe('downloadFile', () => {
-  it('should create a link and trigger a download correctly', () => {
-    // Mock data
-    const blob = new Blob(['test content'], { type: 'text/plain' })
-    const fileName = 'test-file.txt'
-    const mockUrl = 'blob:mockUrl'
-
-    // Mock URL.createObjectURL
-    const createObjectURLMock = vi.fn().mockReturnValue(mockUrl)
-    const revokeObjectURLMock = vi.fn()
-    Object.defineProperty(window.URL, 'createObjectURL', { value: createObjectURLMock })
-    Object.defineProperty(window.URL, 'revokeObjectURL', { value: revokeObjectURLMock })
-
-    // Mock createElement and appendChild
-    const mockLink = {
-      href: '',
-      download: '',
-      click: vi.fn(),
-      remove: vi.fn(),
-    }
-    const createElementMock = vi.spyOn(document, 'createElement').mockReturnValue(mockLink as any)
-    const appendChildMock = vi.spyOn(document.body, 'appendChild').mockImplementation((node: Node) => {
-      return node
-    })
-
-    // Call the function
-    downloadFile({ data: blob, fileName })
-
-    // Assertions
-    expect(createObjectURLMock).toHaveBeenCalledWith(blob)
-    expect(createElementMock).toHaveBeenCalledWith('a')
-    expect(mockLink.href).toBe(mockUrl)
-    expect(mockLink.download).toBe(fileName)
-    expect(appendChildMock).toHaveBeenCalledWith(mockLink)
-    expect(mockLink.click).toHaveBeenCalled()
-    expect(mockLink.remove).toHaveBeenCalled()
-    expect(revokeObjectURLMock).toHaveBeenCalledWith(mockUrl)
-
-    // Clean up mocks
-    vi.restoreAllMocks()
-  })
-})
-
 describe('formatNumberAbbreviated', () => {
   it('should return number as string when less than 1000', () => {
     expect(formatNumberAbbreviated(0)).toBe('0')
diff --git a/web/utils/format.ts b/web/utils/format.ts
index ce813d3999..d6968e0ef1 100644
--- a/web/utils/format.ts
+++ b/web/utils/format.ts
@@ -100,17 +100,6 @@ export const formatTime = (seconds: number) => {
   return `${seconds.toFixed(2)} ${units[index]}`
 }
 
-export const downloadFile = ({ data, fileName }: { data: Blob, fileName: string }) => {
-  const url = window.URL.createObjectURL(data)
-  const a = document.createElement('a')
-  a.href = url
-  a.download = fileName
-  document.body.appendChild(a)
-  a.click()
-  a.remove()
-  window.URL.revokeObjectURL(url)
-}
-
 /**
  * Formats a number into a readable string using "k", "M", or "B" suffix.
  * @example

From 2626e773d90c5dd71b705014187fdcd0e592325b Mon Sep 17 00:00:00 2001
From: -LAN- <laipz8200@outlook.com>
Date: Thu, 29 Jan 2026 16:41:09 +0800
Subject: [PATCH 09/15] chore: Set plugin schema cache TTL to 1h (#31708)

---
 api/.env.example                | 2 +-
 api/configs/feature/__init__.py | 2 +-
 docker/.env.example             | 1 +
 docker/docker-compose.yaml      | 1 +
 4 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/api/.env.example b/api/.env.example
index c3b1474549..8bd2c706c1 100644
--- a/api/.env.example
+++ b/api/.env.example
@@ -617,6 +617,7 @@ PLUGIN_DAEMON_URL=http://127.0.0.1:5002
 PLUGIN_REMOTE_INSTALL_PORT=5003
 PLUGIN_REMOTE_INSTALL_HOST=localhost
 PLUGIN_MAX_PACKAGE_SIZE=15728640
+PLUGIN_MODEL_SCHEMA_CACHE_TTL=3600
 INNER_API_KEY_FOR_PLUGIN=QaHbTe77CtuXmsfyhR7+vRjI/+XbV1AaFy691iy+kGDv2Jvy0/eAh8Y1
 
 # Marketplace configuration
@@ -716,4 +717,3 @@ SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD=21
 SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_SIZE=1000
 SANDBOX_EXPIRED_RECORDS_RETENTION_DAYS=30
 SANDBOX_EXPIRED_RECORDS_CLEAN_TASK_LOCK_TTL=90000
-
diff --git a/api/configs/feature/__init__.py b/api/configs/feature/__init__.py
index 4343a056dd..d97e9a0440 100644
--- a/api/configs/feature/__init__.py
+++ b/api/configs/feature/__init__.py
@@ -245,7 +245,7 @@ class PluginConfig(BaseSettings):
 
     PLUGIN_MODEL_SCHEMA_CACHE_TTL: PositiveInt = Field(
         description="TTL in seconds for caching plugin model schemas in Redis",
-        default=24 * 60 * 60,
+        default=60 * 60,
     )
 
 
diff --git a/docker/.env.example b/docker/.env.example
index b6c04fdb77..41a0205bf5 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -1375,6 +1375,7 @@ PLUGIN_DAEMON_PORT=5002
 PLUGIN_DAEMON_KEY=lYkiYYT6owG+71oLerGzA7GXCgOT++6ovaezWAjpCjf+Sjc3ZtU+qUEi
 PLUGIN_DAEMON_URL=http://plugin_daemon:5002
 PLUGIN_MAX_PACKAGE_SIZE=52428800
+PLUGIN_MODEL_SCHEMA_CACHE_TTL=3600
 PLUGIN_PPROF_ENABLED=false
 
 PLUGIN_DEBUGGING_HOST=0.0.0.0
diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml
index 902ca3103c..2e97891a60 100644
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@@ -589,6 +589,7 @@ x-shared-env: &shared-api-worker-env
   PLUGIN_DAEMON_KEY: ${PLUGIN_DAEMON_KEY:-lYkiYYT6owG+71oLerGzA7GXCgOT++6ovaezWAjpCjf+Sjc3ZtU+qUEi}
   PLUGIN_DAEMON_URL: ${PLUGIN_DAEMON_URL:-http://plugin_daemon:5002}
   PLUGIN_MAX_PACKAGE_SIZE: ${PLUGIN_MAX_PACKAGE_SIZE:-52428800}
+  PLUGIN_MODEL_SCHEMA_CACHE_TTL: ${PLUGIN_MODEL_SCHEMA_CACHE_TTL:-3600}
   PLUGIN_PPROF_ENABLED: ${PLUGIN_PPROF_ENABLED:-false}
   PLUGIN_DEBUGGING_HOST: ${PLUGIN_DEBUGGING_HOST:-0.0.0.0}
   PLUGIN_DEBUGGING_PORT: ${PLUGIN_DEBUGGING_PORT:-5003}

From 62f46fc55c434862c1418d0f86e378a1d6adf344 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=9B=90=E7=B2=92=20Yanli?= <yanli@dify.ai>
Date: Thu, 29 Jan 2026 16:45:07 +0800
Subject: [PATCH 10/15] chore(ty): Bootstrap ty type checking for api (#31681)

---
 .github/workflows/style.yml |  8 ++------
 Makefile                    | 10 ++++++----
 api/ty.toml                 | 24 +++++++++++++++++++++++-
 3 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml
index fdc05d1d65..cbd6edf94b 100644
--- a/.github/workflows/style.yml
+++ b/.github/workflows/style.yml
@@ -47,13 +47,9 @@ jobs:
         if: steps.changed-files.outputs.any_changed == 'true'
         run: uv run --directory api --dev lint-imports
 
-      - name: Run Basedpyright Checks
+      - name: Run Type Checks
         if: steps.changed-files.outputs.any_changed == 'true'
-        run: dev/basedpyright-check
-
-      - name: Run Mypy Type Checks
-        if: steps.changed-files.outputs.any_changed == 'true'
-        run: uv --directory api run mypy --exclude-gitignore --exclude 'tests/' --exclude 'migrations/' --check-untyped-defs --disable-error-code=import-untyped .
+        run: make type-check
 
       - name: Dotenv check
         if: steps.changed-files.outputs.any_changed == 'true'
diff --git a/Makefile b/Makefile
index e92a7b1314..20cede9a5e 100644
--- a/Makefile
+++ b/Makefile
@@ -68,9 +68,11 @@ lint:
 	@echo "✅ Linting complete"
 
 type-check:
-	@echo "📝 Running type check with basedpyright..."
-	@uv run --directory api --dev basedpyright
-	@echo "✅ Type check complete"
+	@echo "📝 Running type checks (basedpyright + mypy + ty)..."
+	@./dev/basedpyright-check $(PATH_TO_CHECK)
+	@uv --directory api run mypy --exclude-gitignore --exclude 'tests/' --exclude 'migrations/' --check-untyped-defs --disable-error-code=import-untyped .
+	@cd api && uv run ty check
+	@echo "✅ Type checks complete"
 
 test:
 	@echo "🧪 Running backend unit tests..."
@@ -130,7 +132,7 @@ help:
 	@echo "  make format         - Format code with ruff"
 	@echo "  make check          - Check code with ruff"
 	@echo "  make lint           - Format, fix, and lint code (ruff, imports, dotenv)"
-	@echo "  make type-check     - Run type checking with basedpyright"
+	@echo "  make type-check     - Run type checks (basedpyright, mypy, ty)"
 	@echo "  make test           - Run backend unit tests (or TARGET_TESTS=./api/tests/<target_tests>)"
 	@echo ""
 	@echo "Docker Build Targets:"
diff --git a/api/ty.toml b/api/ty.toml
index bb4ff5bbcf..640ed6cdee 100644
--- a/api/ty.toml
+++ b/api/ty.toml
@@ -1,11 +1,33 @@
 [src]
 exclude = [
-    # TODO: enable when violations fixed
+    # deps groups (A1/A2/B/C/D/E)
+    # A1: foundational runtime typing / provider plumbing
+    "core/mcp/session",
+    "core/model_runtime/model_providers",
+    "core/workflow/nodes/protocols.py",
+    "libs/gmpy2_pkcs10aep_cipher.py",
+    # A2: workflow engine/nodes
+    "core/workflow",
+    "core/app/workflow",
+    "core/helper/code_executor",
+    # B: app runner + prompt
+    "core/prompt",
+    "core/app/apps/base_app_runner.py",
     "core/app/apps/workflow_app_runner.py",
+    # C: services/controllers/fields/libs
+    "services",
     "controllers/console/app",
     "controllers/console/explore",
     "controllers/console/datasets",
     "controllers/console/workspace",
+    "controllers/service_api/wraps.py",
+    "fields/conversation_fields.py",
+    "libs/external_api.py",
+    # D: observability + integrations
+    "core/ops",
+    "extensions",
+    # E: vector DB integrations
+    "core/rag/datasource/vdb",
     # non-producition or generated code
     "migrations",
     "tests",

From 7d1ad7e03ad008c810dfa6851729d0d9b0046ca8 Mon Sep 17 00:00:00 2001
From: CrabSAMA <40541269+CrabSAMA@users.noreply.github.com>
Date: Thu, 29 Jan 2026 17:57:46 +0800
Subject: [PATCH 11/15] refactor: unified shortcut keys display using component
 (#31713)

---
 .../components/app-sidebar/toggle-button.tsx    | 15 ++-------------
 web/app/components/app/app-publisher/index.tsx  | 11 +++--------
 .../components/app/create-app-modal/index.tsx   |  8 +++-----
 .../app/create-from-dsl-modal/index.tsx         |  8 +++-----
 .../detail/completed/common/action-buttons.tsx  | 10 ++++------
 .../explore/create-app-modal/index.tsx          |  8 +++-----
 web/app/components/goto-anything/index.tsx      | 12 +++---------
 .../rag-pipeline-header/publisher/popup.tsx     | 11 +++--------
 .../components/rag-pipeline-header/run-mode.tsx | 11 ++---------
 .../workflow-onboarding-modal/index.tsx         |  5 ++---
 web/app/components/workflow/header/run-mode.tsx | 11 ++---------
 .../workflow/header/version-history-button.tsx  | 14 +++-----------
 .../edit-card/advanced-actions.tsx              | 17 +++--------------
 web/app/components/workflow/shortcuts-name.tsx  |  6 +++++-
 14 files changed, 41 insertions(+), 106 deletions(-)

diff --git a/web/app/components/app-sidebar/toggle-button.tsx b/web/app/components/app-sidebar/toggle-button.tsx
index a6bdee4f78..cbfbeee452 100644
--- a/web/app/components/app-sidebar/toggle-button.tsx
+++ b/web/app/components/app-sidebar/toggle-button.tsx
@@ -4,7 +4,7 @@ import { useTranslation } from 'react-i18next'
 import { cn } from '@/utils/classnames'
 import Button from '../base/button'
 import Tooltip from '../base/tooltip'
-import { getKeyboardKeyNameBySystem } from '../workflow/utils'
+import ShortcutsName from '../workflow/shortcuts-name'
 
 type TooltipContentProps = {
   expand: boolean
@@ -20,18 +20,7 @@ const TooltipContent = ({
   return (
     <div className="flex items-center gap-x-1">
       <span className="system-xs-medium px-0.5 text-text-secondary">{expand ? t('sidebar.collapseSidebar', { ns: 'layout' }) : t('sidebar.expandSidebar', { ns: 'layout' })}</span>
-      <div className="flex items-center gap-x-0.5">
-        {
-          TOGGLE_SHORTCUT.map(key => (
-            <span
-              key={key}
-              className="system-kbd inline-flex items-center justify-center rounded-[4px] bg-components-kbd-bg-gray px-1 text-text-tertiary"
-            >
-              {getKeyboardKeyNameBySystem(key)}
-            </span>
-          ))
-        }
-      </div>
+      <ShortcutsName keys={TOGGLE_SHORTCUT} textColor="secondary" />
     </div>
   )
 }
diff --git a/web/app/components/app/app-publisher/index.tsx b/web/app/components/app/app-publisher/index.tsx
index 0a026a680b..0fc364cb7e 100644
--- a/web/app/components/app/app-publisher/index.tsx
+++ b/web/app/components/app/app-publisher/index.tsx
@@ -49,7 +49,8 @@ import Divider from '../../base/divider'
 import Loading from '../../base/loading'
 import Toast from '../../base/toast'
 import Tooltip from '../../base/tooltip'
-import { getKeyboardKeyCodeBySystem, getKeyboardKeyNameBySystem } from '../../workflow/utils'
+import ShortcutsName from '../../workflow/shortcuts-name'
+import { getKeyboardKeyCodeBySystem } from '../../workflow/utils'
 import AccessControl from '../app-access-control'
 import PublishWithMultipleModel from './publish-with-multiple-model'
 import SuggestedAction from './suggested-action'
@@ -345,13 +346,7 @@ const AppPublisher = ({
                             : (
                                 <div className="flex gap-1">
                                   <span>{t('common.publishUpdate', { ns: 'workflow' })}</span>
-                                  <div className="flex gap-0.5">
-                                    {PUBLISH_SHORTCUT.map(key => (
-                                      <span key={key} className="system-kbd h-4 w-4 rounded-[4px] bg-components-kbd-bg-white text-text-primary-on-surface">
-                                        {getKeyboardKeyNameBySystem(key)}
-                                      </span>
-                                    ))}
-                                  </div>
+                                  <ShortcutsName keys={PUBLISH_SHORTCUT} bgColor="white" />
                                 </div>
                               )
                         }
diff --git a/web/app/components/app/create-app-modal/index.tsx b/web/app/components/app/create-app-modal/index.tsx
index e2b50cf030..66c7bce80c 100644
--- a/web/app/components/app/create-app-modal/index.tsx
+++ b/web/app/components/app/create-app-modal/index.tsx
@@ -1,7 +1,7 @@
 'use client'
 
 import type { AppIconSelection } from '../../base/app-icon-picker'
-import { RiArrowRightLine, RiArrowRightSLine, RiCommandLine, RiCornerDownLeftLine, RiExchange2Fill } from '@remixicon/react'
+import { RiArrowRightLine, RiArrowRightSLine, RiExchange2Fill } from '@remixicon/react'
 
 import { useDebounceFn, useKeyPress } from 'ahooks'
 import Image from 'next/image'
@@ -29,6 +29,7 @@ import { getRedirection } from '@/utils/app-redirection'
 import { cn } from '@/utils/classnames'
 import { basePath } from '@/utils/var'
 import AppIconPicker from '../../base/app-icon-picker'
+import ShortcutsName from '../../workflow/shortcuts-name'
 
 type CreateAppProps = {
   onSuccess: () => void
@@ -269,10 +270,7 @@ function CreateApp({ onClose, onSuccess, onCreateFromTemplate, defaultAppMode }:
                 <Button onClick={onClose}>{t('newApp.Cancel', { ns: 'app' })}</Button>
                 <Button disabled={isAppsFull || !name} className="gap-1" variant="primary" onClick={handleCreateApp}>
                   <span>{t('newApp.Create', { ns: 'app' })}</span>
-                  <div className="flex gap-0.5">
-                    <RiCommandLine size={14} className="system-kbd rounded-sm bg-components-kbd-bg-white p-0.5" />
-                    <RiCornerDownLeftLine size={14} className="system-kbd rounded-sm bg-components-kbd-bg-white p-0.5" />
-                  </div>
+                  <ShortcutsName keys={['ctrl', '↵']} bgColor="white" />
                 </Button>
               </div>
             </div>
diff --git a/web/app/components/app/create-from-dsl-modal/index.tsx b/web/app/components/app/create-from-dsl-modal/index.tsx
index 838e9cc03f..04d8b1e754 100644
--- a/web/app/components/app/create-from-dsl-modal/index.tsx
+++ b/web/app/components/app/create-from-dsl-modal/index.tsx
@@ -1,7 +1,7 @@
 'use client'
 
 import type { MouseEventHandler } from 'react'
-import { RiCloseLine, RiCommandLine, RiCornerDownLeftLine } from '@remixicon/react'
+import { RiCloseLine } from '@remixicon/react'
 import { useDebounceFn, useKeyPress } from 'ahooks'
 import { noop } from 'es-toolkit/function'
 import { useRouter } from 'next/navigation'
@@ -28,6 +28,7 @@ import {
 } from '@/service/apps'
 import { getRedirection } from '@/utils/app-redirection'
 import { cn } from '@/utils/classnames'
+import ShortcutsName from '../../workflow/shortcuts-name'
 import Uploader from './uploader'
 
 type CreateFromDSLModalProps = {
@@ -298,10 +299,7 @@ const CreateFromDSLModal = ({ show, onSuccess, onClose, activeTab = CreateFromDS
             className="gap-1"
           >
             <span>{t('newApp.Create', { ns: 'app' })}</span>
-            <div className="flex gap-0.5">
-              <RiCommandLine size={14} className="system-kbd rounded-sm bg-components-kbd-bg-white p-0.5" />
-              <RiCornerDownLeftLine size={14} className="system-kbd rounded-sm bg-components-kbd-bg-white p-0.5" />
-            </div>
+            <ShortcutsName keys={['ctrl', '↵']} bgColor="white" />
           </Button>
         </div>
       </Modal>
diff --git a/web/app/components/datasets/documents/detail/completed/common/action-buttons.tsx b/web/app/components/datasets/documents/detail/completed/common/action-buttons.tsx
index efb9848494..a0cbfea147 100644
--- a/web/app/components/datasets/documents/detail/completed/common/action-buttons.tsx
+++ b/web/app/components/datasets/documents/detail/completed/common/action-buttons.tsx
@@ -4,7 +4,8 @@ import * as React from 'react'
 import { useMemo } from 'react'
 import { useTranslation } from 'react-i18next'
 import Button from '@/app/components/base/button'
-import { getKeyboardKeyCodeBySystem, getKeyboardKeyNameBySystem } from '@/app/components/workflow/utils'
+import ShortcutsName from '@/app/components/workflow/shortcuts-name'
+import { getKeyboardKeyCodeBySystem } from '@/app/components/workflow/utils'
 import { ChunkingMode } from '@/models/datasets'
 import { useDocumentContext } from '../../context'
 
@@ -54,7 +55,7 @@ const ActionButtons: FC<IActionButtonsProps> = ({
       >
         <div className="flex items-center gap-x-1">
           <span className="system-sm-medium text-components-button-secondary-text">{t('operation.cancel', { ns: 'common' })}</span>
-          <span className="system-kbd rounded-[4px] bg-components-kbd-bg-gray px-[1px] text-text-tertiary">ESC</span>
+          <ShortcutsName keys={['ESC']} textColor="secondary" />
         </div>
       </Button>
       {(isParentChildParagraphMode && actionType === 'edit' && !isChildChunk && showRegenerationButton)
@@ -76,10 +77,7 @@ const ActionButtons: FC<IActionButtonsProps> = ({
       >
         <div className="flex items-center gap-x-1">
           <span className="text-components-button-primary-text">{t('operation.save', { ns: 'common' })}</span>
-          <div className="flex items-center gap-x-0.5">
-            <span className="system-kbd h-4 w-4 rounded-[4px] bg-components-kbd-bg-white capitalize text-text-primary-on-surface">{getKeyboardKeyNameBySystem('ctrl')}</span>
-            <span className="system-kbd h-4 w-4 rounded-[4px] bg-components-kbd-bg-white text-text-primary-on-surface">S</span>
-          </div>
+          <ShortcutsName keys={['ctrl', 'S']} bgColor="white" />
         </div>
       </Button>
     </div>
diff --git a/web/app/components/explore/create-app-modal/index.tsx b/web/app/components/explore/create-app-modal/index.tsx
index 9bffcc6c69..cfe59fb7f3 100644
--- a/web/app/components/explore/create-app-modal/index.tsx
+++ b/web/app/components/explore/create-app-modal/index.tsx
@@ -1,6 +1,6 @@
 'use client'
 import type { AppIconType } from '@/types/app'
-import { RiCloseLine, RiCommandLine, RiCornerDownLeftLine } from '@remixicon/react'
+import { RiCloseLine } from '@remixicon/react'
 import { useDebounceFn, useKeyPress } from 'ahooks'
 import { noop } from 'es-toolkit/function'
 import * as React from 'react'
@@ -17,6 +17,7 @@ import AppsFull from '@/app/components/billing/apps-full-in-dialog'
 import { useProviderContext } from '@/context/provider-context'
 import { AppModeEnum } from '@/types/app'
 import AppIconPicker from '../../base/app-icon-picker'
+import ShortcutsName from '../../workflow/shortcuts-name'
 
 export type CreateAppModalProps = {
   show: boolean
@@ -198,10 +199,7 @@ const CreateAppModal = ({
             onClick={handleSubmit}
           >
             <span>{!isEditModal ? t('operation.create', { ns: 'common' }) : t('operation.save', { ns: 'common' })}</span>
-            <div className="flex gap-0.5">
-              <RiCommandLine size={14} className="system-kbd rounded-sm bg-components-kbd-bg-white p-0.5" />
-              <RiCornerDownLeftLine size={14} className="system-kbd rounded-sm bg-components-kbd-bg-white p-0.5" />
-            </div>
+            <ShortcutsName keys={['ctrl', '↵']} bgColor="white" />
           </Button>
           <Button className="w-24" onClick={onHide}>{t('operation.cancel', { ns: 'common' })}</Button>
         </div>
diff --git a/web/app/components/goto-anything/index.tsx b/web/app/components/goto-anything/index.tsx
index d34176e4c7..733e1d3162 100644
--- a/web/app/components/goto-anything/index.tsx
+++ b/web/app/components/goto-anything/index.tsx
@@ -12,7 +12,8 @@ import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
 import { useTranslation } from 'react-i18next'
 import Input from '@/app/components/base/input'
 import Modal from '@/app/components/base/modal'
-import { getKeyboardKeyCodeBySystem, isEventTargetInputArea, isMac } from '@/app/components/workflow/utils/common'
+import ShortcutsName from '@/app/components/workflow/shortcuts-name'
+import { getKeyboardKeyCodeBySystem, isEventTargetInputArea } from '@/app/components/workflow/utils/common'
 import { selectWorkflowNode } from '@/app/components/workflow/utils/node-navigation'
 import { useGetLanguage } from '@/context/i18n'
 import InstallFromMarketplace from '../plugins/install-plugin/install-from-marketplace'
@@ -356,14 +357,7 @@ const GotoAnything: FC<Props> = ({
                   </div>
                 )}
               </div>
-              <div className="text-xs text-text-quaternary">
-                <span className="system-kbd rounded bg-gray-200 px-1 py-[2px] font-mono text-gray-700 dark:bg-gray-800 dark:text-gray-100">
-                  {isMac() ? '⌘' : 'Ctrl'}
-                </span>
-                <span className="system-kbd ml-1 rounded bg-gray-200 px-1 py-[2px] font-mono text-gray-700 dark:bg-gray-800 dark:text-gray-100">
-                  K
-                </span>
-              </div>
+              <ShortcutsName keys={['ctrl', 'K']} textColor="secondary" />
             </div>
 
             <Command.List className="h-[240px] overflow-y-auto">
diff --git a/web/app/components/rag-pipeline/components/rag-pipeline-header/publisher/popup.tsx b/web/app/components/rag-pipeline/components/rag-pipeline-header/publisher/popup.tsx
index 0cdc9a0327..c66b293d8a 100644
--- a/web/app/components/rag-pipeline/components/rag-pipeline-header/publisher/popup.tsx
+++ b/web/app/components/rag-pipeline/components/rag-pipeline-header/publisher/popup.tsx
@@ -28,11 +28,12 @@ import { useToastContext } from '@/app/components/base/toast'
 import {
   useChecklistBeforePublish,
 } from '@/app/components/workflow/hooks'
+import ShortcutsName from '@/app/components/workflow/shortcuts-name'
 import {
   useStore,
   useWorkflowStore,
 } from '@/app/components/workflow/store'
-import { getKeyboardKeyCodeBySystem, getKeyboardKeyNameBySystem } from '@/app/components/workflow/utils'
+import { getKeyboardKeyCodeBySystem } from '@/app/components/workflow/utils'
 import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
 import { useDocLink } from '@/context/i18n'
 import { useModalContextSelector } from '@/context/modal-context'
@@ -261,13 +262,7 @@ const Popup = () => {
               : (
                   <div className="flex gap-1">
                     <span>{t('common.publishUpdate', { ns: 'workflow' })}</span>
-                    <div className="flex gap-0.5">
-                      {PUBLISH_SHORTCUT.map(key => (
-                        <span key={key} className="system-kbd h-4 w-4 rounded-[4px] bg-components-kbd-bg-white text-text-primary-on-surface">
-                          {getKeyboardKeyNameBySystem(key)}
-                        </span>
-                      ))}
-                    </div>
+                    <ShortcutsName keys={PUBLISH_SHORTCUT} bgColor="white" />
                   </div>
                 )
           }
diff --git a/web/app/components/rag-pipeline/components/rag-pipeline-header/run-mode.tsx b/web/app/components/rag-pipeline/components/rag-pipeline-header/run-mode.tsx
index 00c531004f..81389e51b4 100644
--- a/web/app/components/rag-pipeline/components/rag-pipeline-header/run-mode.tsx
+++ b/web/app/components/rag-pipeline/components/rag-pipeline-header/run-mode.tsx
@@ -4,9 +4,9 @@ import { useCallback } from 'react'
 import { useTranslation } from 'react-i18next'
 import { StopCircle } from '@/app/components/base/icons/src/vender/line/mediaAndDevices'
 import { useWorkflowRun, useWorkflowStartRun } from '@/app/components/workflow/hooks'
+import ShortcutsName from '@/app/components/workflow/shortcuts-name'
 import { useStore, useWorkflowStore } from '@/app/components/workflow/store'
 import { WorkflowRunningStatus } from '@/app/components/workflow/types'
-import { getKeyboardKeyNameBySystem } from '@/app/components/workflow/utils'
 import { EVENT_WORKFLOW_STOP } from '@/app/components/workflow/variable-inspect/types'
 import { useEventEmitterContextContext } from '@/context/event-emitter'
 import { cn } from '@/utils/classnames'
@@ -78,14 +78,7 @@ const RunMode = ({
         )}
         {
           !isDisabled && (
-            <div className="system-kbd flex items-center gap-x-0.5 text-text-tertiary">
-              <div className="flex size-4 items-center justify-center rounded-[4px] bg-components-kbd-bg-gray">
-                {getKeyboardKeyNameBySystem('alt')}
-              </div>
-              <div className="flex size-4 items-center justify-center rounded-[4px] bg-components-kbd-bg-gray">
-                R
-              </div>
-            </div>
+            <ShortcutsName keys={['alt', 'R']} textColor="secondary" />
           )
         }
       </button>
diff --git a/web/app/components/workflow-app/components/workflow-onboarding-modal/index.tsx b/web/app/components/workflow-app/components/workflow-onboarding-modal/index.tsx
index c483abfb0b..16bae51246 100644
--- a/web/app/components/workflow-app/components/workflow-onboarding-modal/index.tsx
+++ b/web/app/components/workflow-app/components/workflow-onboarding-modal/index.tsx
@@ -7,6 +7,7 @@ import {
 } from 'react'
 import { useTranslation } from 'react-i18next'
 import Modal from '@/app/components/base/modal'
+import ShortcutsName from '@/app/components/workflow/shortcuts-name'
 import { BlockEnum } from '@/app/components/workflow/types'
 import StartNodeSelectionPanel from './start-node-selection-panel'
 
@@ -75,9 +76,7 @@ const WorkflowOnboardingModal: FC<WorkflowOnboardingModalProps> = ({
       {isShow && (
         <div className="body-xs-regular pointer-events-none fixed left-1/2 top-1/2 z-[70] flex -translate-x-1/2 translate-y-[165px] items-center gap-1 text-text-quaternary">
           <span>{t('onboarding.escTip.press', { ns: 'workflow' })}</span>
-          <kbd className="system-kbd inline-flex h-4 min-w-4 items-center justify-center rounded bg-components-kbd-bg-gray px-1 text-text-tertiary">
-            {t('onboarding.escTip.key', { ns: 'workflow' })}
-          </kbd>
+          <ShortcutsName keys={[t('onboarding.escTip.key', { ns: 'workflow' })]} textColor="secondary" />
           <span>{t('onboarding.escTip.toDismiss', { ns: 'workflow' })}</span>
         </div>
       )}
diff --git a/web/app/components/workflow/header/run-mode.tsx b/web/app/components/workflow/header/run-mode.tsx
index 1a101bc6d2..74bc5bc80a 100644
--- a/web/app/components/workflow/header/run-mode.tsx
+++ b/web/app/components/workflow/header/run-mode.tsx
@@ -7,9 +7,9 @@ import { trackEvent } from '@/app/components/base/amplitude'
 import { StopCircle } from '@/app/components/base/icons/src/vender/line/mediaAndDevices'
 import { useToastContext } from '@/app/components/base/toast'
 import { useWorkflowRun, useWorkflowRunValidation, useWorkflowStartRun } from '@/app/components/workflow/hooks'
+import ShortcutsName from '@/app/components/workflow/shortcuts-name'
 import { useStore } from '@/app/components/workflow/store'
 import { WorkflowRunningStatus } from '@/app/components/workflow/types'
-import { getKeyboardKeyNameBySystem } from '@/app/components/workflow/utils'
 import { EVENT_WORKFLOW_STOP } from '@/app/components/workflow/variable-inspect/types'
 import { useEventEmitterContextContext } from '@/context/event-emitter'
 import { cn } from '@/utils/classnames'
@@ -143,14 +143,7 @@ const RunMode = ({
                 >
                   <RiPlayLargeLine className="mr-1 size-4" />
                   {text ?? t('common.run', { ns: 'workflow' })}
-                  <div className="system-kbd flex items-center gap-x-0.5 text-text-tertiary">
-                    <div className="flex size-4 items-center justify-center rounded-[4px] bg-components-kbd-bg-gray">
-                      {getKeyboardKeyNameBySystem('alt')}
-                    </div>
-                    <div className="flex size-4 items-center justify-center rounded-[4px] bg-components-kbd-bg-gray">
-                      R
-                    </div>
-                  </div>
+                  <ShortcutsName keys={['alt', 'R']} textColor="secondary" />
                 </div>
               </TestRunMenu>
             )
diff --git a/web/app/components/workflow/header/version-history-button.tsx b/web/app/components/workflow/header/version-history-button.tsx
index 32e72dc184..b98dfeea76 100644
--- a/web/app/components/workflow/header/version-history-button.tsx
+++ b/web/app/components/workflow/header/version-history-button.tsx
@@ -8,7 +8,8 @@ import useTheme from '@/hooks/use-theme'
 import { cn } from '@/utils/classnames'
 import Button from '../../base/button'
 import Tooltip from '../../base/tooltip'
-import { getKeyboardKeyCodeBySystem, getKeyboardKeyNameBySystem } from '../utils'
+import ShortcutsName from '../shortcuts-name'
+import { getKeyboardKeyCodeBySystem } from '../utils'
 
 type VersionHistoryButtonProps = {
   onClick: () => Promise<unknown> | unknown
@@ -23,16 +24,7 @@ const PopupContent = React.memo(() => {
       <div className="system-xs-medium px-0.5 text-text-secondary">
         {t('common.versionHistory', { ns: 'workflow' })}
       </div>
-      <div className="flex items-center gap-x-0.5">
-        {VERSION_HISTORY_SHORTCUT.map(key => (
-          <span
-            key={key}
-            className="system-kbd rounded-[4px] bg-components-kbd-bg-white px-[1px] text-text-tertiary"
-          >
-            {getKeyboardKeyNameBySystem(key)}
-          </span>
-        ))}
-      </div>
+      <ShortcutsName keys={VERSION_HISTORY_SHORTCUT} bgColor="gray" textColor="secondary" />
     </div>
   )
 })
diff --git a/web/app/components/workflow/nodes/llm/components/json-schema-config-modal/visual-editor/edit-card/advanced-actions.tsx b/web/app/components/workflow/nodes/llm/components/json-schema-config-modal/visual-editor/edit-card/advanced-actions.tsx
index 536277b9e2..8aad824008 100644
--- a/web/app/components/workflow/nodes/llm/components/json-schema-config-modal/visual-editor/edit-card/advanced-actions.tsx
+++ b/web/app/components/workflow/nodes/llm/components/json-schema-config-modal/visual-editor/edit-card/advanced-actions.tsx
@@ -3,7 +3,8 @@ import { useKeyPress } from 'ahooks'
 import * as React from 'react'
 import { useTranslation } from 'react-i18next'
 import Button from '@/app/components/base/button'
-import { getKeyboardKeyCodeBySystem, getKeyboardKeyNameBySystem } from '@/app/components/workflow/utils'
+import ShortcutsName from '@/app/components/workflow/shortcuts-name'
+import { getKeyboardKeyCodeBySystem } from '@/app/components/workflow/utils'
 
 type AdvancedActionsProps = {
   isConfirmDisabled: boolean
@@ -11,15 +12,6 @@ type AdvancedActionsProps = {
   onConfirm: () => void
 }
 
-const Key = (props: { keyName: string }) => {
-  const { keyName } = props
-  return (
-    <kbd className="system-kbd flex h-4 min-w-4 items-center justify-center rounded-[4px] bg-components-kbd-bg-white px-px text-text-primary-on-surface">
-      {keyName}
-    </kbd>
-  )
-}
-
 const AdvancedActions: FC<AdvancedActionsProps> = ({
   isConfirmDisabled,
   onCancel,
@@ -48,10 +40,7 @@ const AdvancedActions: FC<AdvancedActionsProps> = ({
         onClick={onConfirm}
       >
         <span>{t('operation.confirm', { ns: 'common' })}</span>
-        <div className="flex items-center gap-x-0.5">
-          <Key keyName={getKeyboardKeyNameBySystem('ctrl')} />
-          <Key keyName="⏎" />
-        </div>
+        <ShortcutsName keys={['ctrl', '⏎']} bgColor="white" />
       </Button>
     </div>
   )
diff --git a/web/app/components/workflow/shortcuts-name.tsx b/web/app/components/workflow/shortcuts-name.tsx
index d0ce007f61..3d21cff316 100644
--- a/web/app/components/workflow/shortcuts-name.tsx
+++ b/web/app/components/workflow/shortcuts-name.tsx
@@ -6,11 +6,13 @@ type ShortcutsNameProps = {
   keys: string[]
   className?: string
   textColor?: 'default' | 'secondary'
+  bgColor?: 'gray' | 'white'
 }
 const ShortcutsName = ({
   keys,
   className,
   textColor = 'default',
+  bgColor = 'gray',
 }: ShortcutsNameProps) => {
   return (
     <div className={cn(
@@ -23,7 +25,9 @@ const ShortcutsName = ({
           <div
             key={key}
             className={cn(
-              'system-kbd flex h-4 min-w-4 items-center justify-center rounded-[4px] bg-components-kbd-bg-gray capitalize',
+              'system-kbd flex h-4 min-w-4 items-center justify-center rounded-[4px] px-1 capitalize',
+              bgColor === 'gray' && 'bg-components-kbd-bg-gray',
+              bgColor === 'white' && 'bg-components-kbd-bg-white text-text-primary-on-surface',
               textColor === 'secondary' && 'text-text-tertiary',
             )}
           >

From 25ac69afc5ac9324079be5f0d02b2a2b03dcc784 Mon Sep 17 00:00:00 2001
From: Stephen Zhou <38493346+hyoban@users.noreply.github.com>
Date: Thu, 29 Jan 2026 17:58:10 +0800
Subject: [PATCH 12/15] docs: relocate frontend docs for agents and human
 (#31714)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 .agents/skills/component-refactoring/SKILL.md |  2 +-
 .agents/skills/frontend-testing/SKILL.md      |  4 +-
 .../frontend-testing/references/workflow.md   |  2 +-
 AGENTS.md                                     | 33 +-----------
 CONTRIBUTING.md                               |  2 +-
 web/AGENTS.md                                 |  6 ++-
 web/README.md                                 |  2 +
 web/docs/lint.md                              | 51 +++++++++++++++++++
 web/{testing/testing.md => docs/test.md}      |  4 +-
 web/eslint-suppressions.json                  |  5 --
 web/scripts/analyze-component.js              |  4 +-
 11 files changed, 69 insertions(+), 46 deletions(-)
 create mode 100644 web/docs/lint.md
 rename web/{testing/testing.md => docs/test.md} (99%)

diff --git a/.agents/skills/component-refactoring/SKILL.md b/.agents/skills/component-refactoring/SKILL.md
index 7006c382c8..140e0ef434 100644
--- a/.agents/skills/component-refactoring/SKILL.md
+++ b/.agents/skills/component-refactoring/SKILL.md
@@ -480,4 +480,4 @@ const useButtonState = () => {
 ### Related Skills
 
 - `frontend-testing` - For testing refactored components
-- `web/testing/testing.md` - Testing specification
+- `web/docs/test.md` - Testing specification
diff --git a/.agents/skills/frontend-testing/SKILL.md b/.agents/skills/frontend-testing/SKILL.md
index 0716c81ef7..280fcb6341 100644
--- a/.agents/skills/frontend-testing/SKILL.md
+++ b/.agents/skills/frontend-testing/SKILL.md
@@ -7,7 +7,7 @@ description: Generate Vitest + React Testing Library tests for Dify frontend com
 
 This skill enables Claude to generate high-quality, comprehensive frontend tests for the Dify project following established conventions and best practices.
 
-> **⚠️ Authoritative Source**: This skill is derived from `web/testing/testing.md`. Use Vitest mock/timer APIs (`vi.*`).
+> **⚠️ Authoritative Source**: This skill is derived from `web/docs/test.md`. Use Vitest mock/timer APIs (`vi.*`).
 
 ## When to Apply This Skill
 
@@ -309,7 +309,7 @@ For more detailed information, refer to:
 
 ### Primary Specification (MUST follow)
 
-- **`web/testing/testing.md`** - The canonical testing specification. This skill is derived from this document.
+- **`web/docs/test.md`** - The canonical testing specification. This skill is derived from this document.
 
 ### Reference Examples in Codebase
 
diff --git a/.agents/skills/frontend-testing/references/workflow.md b/.agents/skills/frontend-testing/references/workflow.md
index 009c3e013b..bc4ed8285a 100644
--- a/.agents/skills/frontend-testing/references/workflow.md
+++ b/.agents/skills/frontend-testing/references/workflow.md
@@ -4,7 +4,7 @@ This guide defines the workflow for generating tests, especially for complex com
 
 ## Scope Clarification
 
-This guide addresses **multi-file workflow** (how to process multiple test files). For coverage requirements within a single test file, see `web/testing/testing.md` § Coverage Goals.
+This guide addresses **multi-file workflow** (how to process multiple test files). For coverage requirements within a single test file, see `web/docs/test.md` § Coverage Goals.
 
 | Scope | Rule |
 |-------|------|
diff --git a/AGENTS.md b/AGENTS.md
index 7d96ac3a6d..51fa6e4527 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -7,7 +7,7 @@ Dify is an open-source platform for developing LLM applications with an intuitiv
 The codebase is split into:
 
 - **Backend API** (`/api`): Python Flask application organized with Domain-Driven Design
-- **Frontend Web** (`/web`): Next.js 15 application using TypeScript and React 19
+- **Frontend Web** (`/web`): Next.js application using TypeScript and React
 - **Docker deployment** (`/docker`): Containerized deployment configurations
 
 ## Backend Workflow
@@ -18,36 +18,7 @@ The codebase is split into:
 
 ## Frontend Workflow
 
-```bash
-cd web
-pnpm lint:fix
-pnpm type-check:tsgo
-pnpm test
-```
-
-### Frontend Linting
-
-ESLint is used for frontend code quality. Available commands:
-
-```bash
-# Lint all files (report only)
-pnpm lint
-
-# Lint and auto-fix issues
-pnpm lint:fix
-
-# Lint specific files or directories
-pnpm lint:fix app/components/base/button/
-pnpm lint:fix app/components/base/button/index.tsx
-
-# Lint quietly (errors only, no warnings)
-pnpm lint:quiet
-
-# Check code complexity
-pnpm lint:complexity
-```
-
-**Important**: Always run `pnpm lint:fix` before committing. The pre-commit hook runs `lint-staged` which only lints staged files.
+- Read `web/AGENTS.md` for details
 
 ## Testing & Quality Practices
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 20a7d6c6f6..d7f007af67 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -77,7 +77,7 @@ How we prioritize:
 
 For setting up the frontend service, please refer to our comprehensive [guide](https://github.com/langgenius/dify/blob/main/web/README.md) in the `web/README.md` file. This document provides detailed instructions to help you set up the frontend environment properly.
 
-**Testing**: All React components must have comprehensive test coverage. See [web/testing/testing.md](https://github.com/langgenius/dify/blob/main/web/testing/testing.md) for the canonical frontend testing guidelines and follow every requirement described there.
+**Testing**: All React components must have comprehensive test coverage. See [web/docs/test.md](https://github.com/langgenius/dify/blob/main/web/docs/test.md) for the canonical frontend testing guidelines and follow every requirement described there.
 
 #### Backend
 
diff --git a/web/AGENTS.md b/web/AGENTS.md
index 7362cd51db..5dd41b8a3c 100644
--- a/web/AGENTS.md
+++ b/web/AGENTS.md
@@ -1,5 +1,9 @@
+## Frontend Workflow
+
+- Refer to the `./docs/test.md` and `./docs/lint.md` for detailed frontend workflow instructions.
+
 ## Automated Test Generation
 
-- Use `web/testing/testing.md` as the canonical instruction set for generating frontend automated tests.
+- Use `./docs/test.md` as the canonical instruction set for generating frontend automated tests.
 - When proposing or saving tests, re-read that document and follow every requirement.
 - All frontend tests MUST also comply with the `frontend-testing` skill. Treat the skill as a mandatory constraint, not optional guidance.
diff --git a/web/README.md b/web/README.md
index 9c731a081a..64039709dc 100644
--- a/web/README.md
+++ b/web/README.md
@@ -107,6 +107,8 @@ Open [http://localhost:6006](http://localhost:6006) with your browser to see the
 
 If your IDE is VSCode, rename `web/.vscode/settings.example.json` to `web/.vscode/settings.json` for lint code setting.
 
+Then follow the [Lint Documentation](./docs/lint.md) to lint the code.
+
 ## Test
 
 We use [Vitest](https://vitest.dev/) and [React Testing Library](https://testing-library.com/docs/react-testing-library/intro/) for Unit Testing.
diff --git a/web/docs/lint.md b/web/docs/lint.md
new file mode 100644
index 0000000000..051f9e6ecd
--- /dev/null
+++ b/web/docs/lint.md
@@ -0,0 +1,51 @@
+# Lint Guide
+
+We use ESLint and Typescript to maintain code quality and consistency across the project.
+
+## ESLint
+
+### Common Flags
+
+**File/folder targeting**: Append paths to lint specific files or directories.
+
+```sh
+pnpm eslint [options] file.js [file.js] [dir]
+```
+
+**`--cache`**: Caches lint results for faster subsequent runs. Keep this enabled by default; only disable when you encounter unexpected lint results.
+
+**`--concurrency`**: Enables multi-threaded linting. Use `--concurrency=auto` or experiment with specific numbers to find the optimal setting for your machine. Keep this enabled when linting multiple files.
+
+- [ESLint multi-thread linting blog post](https://eslint.org/blog/2025/08/multithread-linting/)
+
+**`--fix`**: Automatically fixes auto-fixable rule violations. Always review the diff before committing to ensure no unintended changes.
+
+**`--quiet`**: Suppresses warnings and only shows errors. Useful when you want to reduce noise from existing issues.
+
+**`--suppress-all`**: Temporarily suppresses error-level violations and records them, allowing CI to pass. Treat this as an escape hatch—fix these errors when time permits.
+
+**`--prune-suppressions`**: Removes outdated suppressions after you've fixed the underlying errors.
+
+- [ESLint bulk suppressions blog post](https://eslint.org/blog/2025/04/introducing-bulk-suppressions/)
+
+### Type-Aware Linting
+
+Some ESLint rules require type information, such as [no-leaked-conditional-rendering](https://www.eslint-react.xyz/docs/rules/no-leaked-conditional-rendering). However, [typed linting via typescript-eslint](https://typescript-eslint.io/getting-started/typed-linting) is too slow for practical use, so we use [TSSLint](https://github.com/johnsoncodehk/tsslint) instead.
+
+```sh
+pnpm lint:tss
+```
+
+This command lints the entire project and is intended for final verification before committing or pushing changes.
+
+## Type Check
+
+You should be able to see suggestions from TypeScript in your editor for all open files.
+
+However, it can be useful to run the TypeScript 7 command-line (tsgo) to type check all files:
+
+```sh
+pnpm type-check:tsgo
+```
+
+Prefer using `tsgo` for type checking as it is significantly faster than the standard TypeScript compiler. Only fall back to `pnpm type-check` (which uses `tsc`) if you encounter unexpected results.
diff --git a/web/testing/testing.md b/web/docs/test.md
similarity index 99%
rename from web/testing/testing.md
rename to web/docs/test.md
index 47341e445e..cac0e0e351 100644
--- a/web/testing/testing.md
+++ b/web/docs/test.md
@@ -360,11 +360,11 @@ describe('ComponentName', () => {
 let mockPortalOpenState = false
 
 vi.mock('@/app/components/base/portal-to-follow-elem', () => ({
-  PortalToFollowElem: ({ children, open, ...props }: any) => {
+  PortalToFollowElem: ({ children, open, ...props }) => {
     mockPortalOpenState = open || false // Update shared state
     return <div data-open={open}>{children}</div>
   },
-  PortalToFollowElemContent: ({ children }: any) => {
+  PortalToFollowElemContent: ({ children }) => {
     // ✅ Matches actual: returns null when open is false
     if (!mockPortalOpenState)
       return null
diff --git a/web/eslint-suppressions.json b/web/eslint-suppressions.json
index 6193a8ad4e..63f10d238c 100644
--- a/web/eslint-suppressions.json
+++ b/web/eslint-suppressions.json
@@ -4318,11 +4318,6 @@
       "count": 10
     }
   },
-  "testing/testing.md": {
-    "ts/no-explicit-any": {
-      "count": 2
-    }
-  },
   "types/app.ts": {
     "ts/no-explicit-any": {
       "count": 1
diff --git a/web/scripts/analyze-component.js b/web/scripts/analyze-component.js
index b09301503c..2fdff2f3d0 100755
--- a/web/scripts/analyze-component.js
+++ b/web/scripts/analyze-component.js
@@ -337,7 +337,7 @@ Test file under review:
 ${testPath}
 
 Checklist (ensure every item is addressed in your review):
-- Confirm the tests satisfy all requirements listed above and in web/testing/TESTING.md.
+- Confirm the tests satisfy all requirements listed above and in web/docs/test.md.
 - Verify Arrange → Act → Assert structure, mocks, and cleanup follow project conventions.
 - Ensure all detected component features (state, effects, routing, API, events, etc.) are exercised, including edge cases and error paths.
 - Check coverage of prop variations, null/undefined inputs, and high-priority workflows implied by usage score.
@@ -382,7 +382,7 @@ Examples:
   # Review existing test
   pnpm analyze-component app/components/base/button/index.tsx --review
 
-For complete testing guidelines, see: web/testing/testing.md
+For complete testing guidelines, see: web/docs/test.md
 `)
 }
 

From 8aeef36e2d16c9b9ba41088aee937d0348b5cbec Mon Sep 17 00:00:00 2001
From: yihong <zouzou0208@gmail.com>
Date: Thu, 29 Jan 2026 18:17:40 +0800
Subject: [PATCH 13/15] feat: use xdist to make make test faster (#30824)

Signed-off-by: yihong0618 <zouzou0208@gmail.com>
---
 .github/workflows/api-tests.yml               |  1 +
 Makefile                                      |  2 +-
 api/pyproject.toml                            |  1 +
 api/tests/unit_tests/conftest.py              | 17 +++++++++++++
 .../console/app/test_app_response_models.py   |  7 ++++++
 api/uv.lock                                   | 24 +++++++++++++++++++
 dev/pytest/pytest_unit_tests.sh               | 10 ++++++--
 7 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/api-tests.yml b/.github/workflows/api-tests.yml
index 190e00d9fe..52e3272f99 100644
--- a/.github/workflows/api-tests.yml
+++ b/.github/workflows/api-tests.yml
@@ -72,6 +72,7 @@ jobs:
           OPENDAL_FS_ROOT: /tmp/dify-storage
         run: |
           uv run --project api pytest \
+            -n auto \
             --timeout "${PYTEST_TIMEOUT:-180}" \
             api/tests/integration_tests/workflow \
             api/tests/integration_tests/tools \
diff --git a/Makefile b/Makefile
index 20cede9a5e..984e8676ee 100644
--- a/Makefile
+++ b/Makefile
@@ -80,7 +80,7 @@ test:
 		echo "Target: $(TARGET_TESTS)"; \
 		uv run --project api --dev pytest $(TARGET_TESTS); \
 	else \
-		uv run --project api --dev dev/pytest/pytest_unit_tests.sh; \
+		PYTEST_XDIST_ARGS="-n auto" uv run --project api --dev dev/pytest/pytest_unit_tests.sh; \
 	fi
 	@echo "✅ Tests complete"
 
diff --git a/api/pyproject.toml b/api/pyproject.toml
index 575c1434c5..af2dba6fac 100644
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -175,6 +175,7 @@ dev = [
     # "locust>=2.40.4",  # Temporarily removed due to compatibility issues. Uncomment when resolved.
     "sseclient-py>=1.8.0",
     "pytest-timeout>=2.4.0",
+    "pytest-xdist>=3.8.0",
 ]
 
 ############################################################
diff --git a/api/tests/unit_tests/conftest.py b/api/tests/unit_tests/conftest.py
index c5e1576186..e3c1a617f7 100644
--- a/api/tests/unit_tests/conftest.py
+++ b/api/tests/unit_tests/conftest.py
@@ -3,6 +3,7 @@ from unittest.mock import MagicMock, patch
 
 import pytest
 from flask import Flask
+from sqlalchemy import create_engine
 
 # Getting the absolute path of the current file's directory
 ABS_PATH = os.path.dirname(os.path.abspath(__file__))
@@ -36,6 +37,7 @@ import sys
 
 sys.path.insert(0, PROJECT_DIR)
 
+from core.db.session_factory import configure_session_factory, session_factory
 from extensions import ext_redis
 
 
@@ -102,3 +104,18 @@ def reset_secret_key():
         yield
     finally:
         dify_config.SECRET_KEY = original
+
+
+@pytest.fixture(scope="session")
+def _unit_test_engine():
+    engine = create_engine("sqlite:///:memory:")
+    yield engine
+    engine.dispose()
+
+
+@pytest.fixture(autouse=True)
+def _configure_session_factory(_unit_test_engine):
+    try:
+        session_factory.get_session_maker()
+    except RuntimeError:
+        configure_session_factory(_unit_test_engine, expire_on_commit=False)
diff --git a/api/tests/unit_tests/controllers/console/app/test_app_response_models.py b/api/tests/unit_tests/controllers/console/app/test_app_response_models.py
index 40eb59a8f4..c557605916 100644
--- a/api/tests/unit_tests/controllers/console/app/test_app_response_models.py
+++ b/api/tests/unit_tests/controllers/console/app/test_app_response_models.py
@@ -31,6 +31,13 @@ def _load_app_module():
 
         def schema_model(self, name, schema):
             self.models[name] = schema
+            return schema
+
+        def model(self, name, model_dict=None, **kwargs):
+            """Register a model with the namespace (flask-restx compatibility)."""
+            if model_dict is not None:
+                self.models[name] = model_dict
+            return model_dict
 
         def _decorator(self, obj):
             return obj
diff --git a/api/uv.lock b/api/uv.lock
index 7808c16a8c..a3ad292168 100644
--- a/api/uv.lock
+++ b/api/uv.lock
@@ -1479,6 +1479,7 @@ dev = [
     { name = "pytest-env" },
     { name = "pytest-mock" },
     { name = "pytest-timeout" },
+    { name = "pytest-xdist" },
     { name = "ruff" },
     { name = "scipy-stubs" },
     { name = "sseclient-py" },
@@ -1678,6 +1679,7 @@ dev = [
     { name = "pytest-env", specifier = "~=1.1.3" },
     { name = "pytest-mock", specifier = "~=3.14.0" },
     { name = "pytest-timeout", specifier = ">=2.4.0" },
+    { name = "pytest-xdist", specifier = ">=3.8.0" },
     { name = "ruff", specifier = "~=0.14.0" },
     { name = "scipy-stubs", specifier = ">=1.15.3.0" },
     { name = "sseclient-py", specifier = ">=1.8.0" },
@@ -1896,6 +1898,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/19/d8/2a1c638d9e0aa7e269269a1a1bf423ddd94267f1a01bbe3ad03432b67dd4/eval_type_backport-0.3.0-py3-none-any.whl", hash = "sha256:975a10a0fe333c8b6260d7fdb637698c9a16c3a9e3b6eb943fee6a6f67a37fe8", size = 6061, upload-time = "2025-11-13T20:56:49.499Z" },
 ]
 
+[[package]]
+name = "execnet"
+version = "2.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/bf/89/780e11f9588d9e7128a3f87788354c7946a9cbb1401ad38a48c4db9a4f07/execnet-2.1.2.tar.gz", hash = "sha256:63d83bfdd9a23e35b9c6a3261412324f964c2ec8dcd8d3c6916ee9373e0befcd", size = 166622, upload-time = "2025-11-12T09:56:37.75Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ab/84/02fc1827e8cdded4aa65baef11296a9bbe595c474f0d6d758af082d849fd/execnet-2.1.2-py3-none-any.whl", hash = "sha256:67fba928dd5a544b783f6056f449e5e3931a5c378b128bc18501f7ea79e296ec", size = 40708, upload-time = "2025-11-12T09:56:36.333Z" },
+]
+
 [[package]]
 name = "faker"
 version = "38.2.0"
@@ -5141,6 +5152,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fa/b6/3127540ecdf1464a00e5a01ee60a1b09175f6913f0644ac748494d9c4b21/pytest_timeout-2.4.0-py3-none-any.whl", hash = "sha256:c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2", size = 14382, upload-time = "2025-05-05T19:44:33.502Z" },
 ]
 
+[[package]]
+name = "pytest-xdist"
+version = "3.8.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "execnet" },
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/78/b4/439b179d1ff526791eb921115fca8e44e596a13efeda518b9d845a619450/pytest_xdist-3.8.0.tar.gz", hash = "sha256:7e578125ec9bc6050861aa93f2d59f1d8d085595d6551c2c90b6f4fad8d3a9f1", size = 88069, upload-time = "2025-07-01T13:30:59.346Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ca/31/d4e37e9e550c2b92a9cbc2e4d0b7420a27224968580b5a447f420847c975/pytest_xdist-3.8.0-py3-none-any.whl", hash = "sha256:202ca578cfeb7370784a8c33d6d05bc6e13b4f25b5053c30a152269fd10f0b88", size = 46396, upload-time = "2025-07-01T13:30:56.632Z" },
+]
+
 [[package]]
 name = "python-calamine"
 version = "0.5.4"
diff --git a/dev/pytest/pytest_unit_tests.sh b/dev/pytest/pytest_unit_tests.sh
index 496cb40952..7c39a48bf4 100755
--- a/dev/pytest/pytest_unit_tests.sh
+++ b/dev/pytest/pytest_unit_tests.sh
@@ -5,6 +5,12 @@ SCRIPT_DIR="$(dirname "$(realpath "$0")")"
 cd "$SCRIPT_DIR/../.."
 
 PYTEST_TIMEOUT="${PYTEST_TIMEOUT:-20}"
+PYTEST_XDIST_ARGS="${PYTEST_XDIST_ARGS:--n auto}"
 
-# libs
-pytest --timeout "${PYTEST_TIMEOUT}" api/tests/unit_tests
+# Run most tests in parallel (excluding controllers which have import conflicts with xdist)
+# Controller tests have module-level side effects (Flask route registration) that cause
+# race conditions when imported concurrently by multiple pytest-xdist workers.
+pytest --timeout "${PYTEST_TIMEOUT}" ${PYTEST_XDIST_ARGS} api/tests/unit_tests --ignore=api/tests/unit_tests/controllers
+
+# Run controller tests sequentially to avoid import race conditions
+pytest --timeout "${PYTEST_TIMEOUT}" api/tests/unit_tests/controllers

From c27df884170b318b05ddd19b5c55a959f2649c53 Mon Sep 17 00:00:00 2001
From: Joel <iamjoel007@gmail.com>
Date: Thu, 29 Jan 2026 19:40:47 +0800
Subject: [PATCH 14/15] feat: try app support review (#31716)

---
 web/app/components/apps/index.tsx             |  1 +
 web/app/components/explore/app-card/index.tsx | 12 +++++-------
 web/app/components/explore/app-list/index.tsx |  1 +
 web/app/components/explore/try-app/index.tsx  | 16 +++++++++++++++-
 web/app/components/explore/try-app/tab.tsx    |  4 +++-
 web/i18n/en-US/explore.json                   |  2 +-
 6 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/web/app/components/apps/index.tsx b/web/app/components/apps/index.tsx
index 255bfbf9c5..3be8492489 100644
--- a/web/app/components/apps/index.tsx
+++ b/web/app/components/apps/index.tsx
@@ -105,6 +105,7 @@ const Apps = () => {
         {isShowTryAppPanel && (
           <TryApp
             appId={currentTryAppParams?.appId || ''}
+            app={currentTryAppParams?.app}
             category={currentTryAppParams?.app?.category}
             onClose={hideTryAppPanel}
             onCreate={handleShowFromTryApp}
diff --git a/web/app/components/explore/app-card/index.tsx b/web/app/components/explore/app-card/index.tsx
index 5d82ab65cc..15152e0695 100644
--- a/web/app/components/explore/app-card/index.tsx
+++ b/web/app/components/explore/app-card/index.tsx
@@ -74,17 +74,15 @@ const AppCard = ({
       </div>
       {isExplore && (canCreate || isTrialApp) && (
         <div className={cn('absolute bottom-0 left-0 right-0 hidden bg-gradient-to-t from-components-panel-gradient-2 from-[60.27%] to-transparent p-4 pt-8 group-hover:flex')}>
-          <div className={cn('grid h-8 w-full grid-cols-1 space-x-2', isTrialApp && 'grid-cols-2')}>
+          <div className={cn('grid h-8 w-full grid-cols-2 space-x-2')}>
             <Button variant="primary" className="h-7" onClick={() => onCreate()}>
               <PlusIcon className="mr-1 h-4 w-4" />
               <span className="text-xs">{t('appCard.addToWorkspace', { ns: 'explore' })}</span>
             </Button>
-            {isTrialApp && (
-              <Button className="h-7" onClick={showTryAPPPanel(app.app_id)}>
-                <RiInformation2Line className="mr-1 size-4" />
-                <span>{t('appCard.try', { ns: 'explore' })}</span>
-              </Button>
-            )}
+            <Button className="h-7" onClick={showTryAPPPanel(app.app_id)}>
+              <RiInformation2Line className="mr-1 size-4" />
+              <span>{t('appCard.try', { ns: 'explore' })}</span>
+            </Button>
           </div>
         </div>
       )}
diff --git a/web/app/components/explore/app-list/index.tsx b/web/app/components/explore/app-list/index.tsx
index 1749bde76a..04f75107da 100644
--- a/web/app/components/explore/app-list/index.tsx
+++ b/web/app/components/explore/app-list/index.tsx
@@ -251,6 +251,7 @@ const Apps = ({
       {isShowTryAppPanel && (
         <TryApp
           appId={appParams?.appId || ''}
+          app={appParams?.app}
           category={appParams?.app?.category}
           onClose={hideTryAppPanel}
           onCreate={handleShowFromTryApp}
diff --git a/web/app/components/explore/try-app/index.tsx b/web/app/components/explore/try-app/index.tsx
index b2e2b72140..c6f00ed08e 100644
--- a/web/app/components/explore/try-app/index.tsx
+++ b/web/app/components/explore/try-app/index.tsx
@@ -1,11 +1,13 @@
 /* eslint-disable style/multiline-ternary */
 'use client'
 import type { FC } from 'react'
+import type { App as AppType } from '@/models/explore'
 import { RiCloseLine } from '@remixicon/react'
 import * as React from 'react'
 import { useState } from 'react'
 import Loading from '@/app/components/base/loading'
 import Modal from '@/app/components/base/modal/index'
+import { useGlobalPublicStore } from '@/context/global-public-context'
 import { useGetTryAppInfo } from '@/service/use-try-app'
 import Button from '../../base/button'
 import App from './app'
@@ -15,6 +17,7 @@ import Tab, { TypeEnum } from './tab'
 
 type Props = {
   appId: string
+  app?: AppType
   category?: string
   onClose: () => void
   onCreate: () => void
@@ -22,13 +25,23 @@ type Props = {
 
 const TryApp: FC<Props> = ({
   appId,
+  app,
   category,
   onClose,
   onCreate,
 }) => {
-  const [type, setType] = useState<TypeEnum>(TypeEnum.TRY)
+  const { systemFeatures } = useGlobalPublicStore()
+  const isTrialApp = !!(app && app.can_trial && systemFeatures.enable_trial_app)
+  const [type, setType] = useState<TypeEnum>(() => (app && !isTrialApp ? TypeEnum.DETAIL : TypeEnum.TRY))
   const { data: appDetail, isLoading } = useGetTryAppInfo(appId)
 
+  React.useEffect(() => {
+    if (app && !isTrialApp && type !== TypeEnum.DETAIL)
+      // eslint-disable-next-line react-hooks-extra/no-direct-set-state-in-use-effect
+      setType(TypeEnum.DETAIL)
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [app, isTrialApp])
+
   return (
     <Modal
       isShow
@@ -45,6 +58,7 @@ const TryApp: FC<Props> = ({
             <Tab
               value={type}
               onChange={setType}
+              disableTry={app ? !isTrialApp : false}
             />
             <Button
               size="large"
diff --git a/web/app/components/explore/try-app/tab.tsx b/web/app/components/explore/try-app/tab.tsx
index 75ba402204..cbd8e1db85 100644
--- a/web/app/components/explore/try-app/tab.tsx
+++ b/web/app/components/explore/try-app/tab.tsx
@@ -12,15 +12,17 @@ export enum TypeEnum {
 type Props = {
   value: TypeEnum
   onChange: (value: TypeEnum) => void
+  disableTry?: boolean
 }
 
 const Tab: FC<Props> = ({
   value,
   onChange,
+  disableTry,
 }) => {
   const { t } = useTranslation()
   const tabs = [
-    { id: TypeEnum.TRY, name: t('tryApp.tabHeader.try', { ns: 'explore' }) },
+    { id: TypeEnum.TRY, name: t('tryApp.tabHeader.try', { ns: 'explore' }), disabled: disableTry },
     { id: TypeEnum.DETAIL, name: t('tryApp.tabHeader.detail', { ns: 'explore' }) },
   ]
   return (
diff --git a/web/i18n/en-US/explore.json b/web/i18n/en-US/explore.json
index 89bbea81e5..68b8b30b0f 100644
--- a/web/i18n/en-US/explore.json
+++ b/web/i18n/en-US/explore.json
@@ -36,5 +36,5 @@
   "tryApp.requirements": "Requirements",
   "tryApp.tabHeader.detail": "Orchestration Details",
   "tryApp.tabHeader.try": "Try it",
-  "tryApp.tryInfo": "This is a sample app. You can try up to 5 messages. To keep using it, click \"Create form this sample app\" and set it up!"
+  "tryApp.tryInfo": "This is a sample app. You can try up to 5 messages. To keep using it, click \"Create from this sample app\" and set it up!"
 }

From f79512ec78b838ef48581320f2f609b5d9d272d0 Mon Sep 17 00:00:00 2001
From: Stephen Zhou <38493346+hyoban@users.noreply.github.com>
Date: Thu, 29 Jan 2026 21:14:55 +0800
Subject: [PATCH 15/15] refactor: prefer css icon (#31551)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 web/eslint-rules/index.js                     |   4 +-
 .../rules/prefer-tailwind-icon.js             | 384 ++++++++++++++++++
 web/eslint.config.mjs                         |  44 +-
 web/package.json                              |   8 +-
 web/pnpm-lock.yaml                            | 269 ++++++++++--
 web/tailwind-common-config.ts                 | 139 ++++++-
 6 files changed, 783 insertions(+), 65 deletions(-)
 create mode 100644 web/eslint-rules/rules/prefer-tailwind-icon.js

diff --git a/web/eslint-rules/index.js b/web/eslint-rules/index.js
index 8eda0caaa6..1559590328 100644
--- a/web/eslint-rules/index.js
+++ b/web/eslint-rules/index.js
@@ -3,13 +3,14 @@ import noAsAnyInT from './rules/no-as-any-in-t.js'
 import noExtraKeys from './rules/no-extra-keys.js'
 import noLegacyNamespacePrefix from './rules/no-legacy-namespace-prefix.js'
 import noVersionPrefix from './rules/no-version-prefix.js'
+import preferTailwindIcon from './rules/prefer-tailwind-icon.js'
 import requireNsOption from './rules/require-ns-option.js'
 import validI18nKeys from './rules/valid-i18n-keys.js'
 
 /** @type {import('eslint').ESLint.Plugin} */
 const plugin = {
   meta: {
-    name: 'dify-i18n',
+    name: 'dify',
     version: '1.0.0',
   },
   rules: {
@@ -18,6 +19,7 @@ const plugin = {
     'no-extra-keys': noExtraKeys,
     'no-legacy-namespace-prefix': noLegacyNamespacePrefix,
     'no-version-prefix': noVersionPrefix,
+    'prefer-tailwind-icon': preferTailwindIcon,
     'require-ns-option': requireNsOption,
     'valid-i18n-keys': validI18nKeys,
   },
diff --git a/web/eslint-rules/rules/prefer-tailwind-icon.js b/web/eslint-rules/rules/prefer-tailwind-icon.js
new file mode 100644
index 0000000000..ed5e111316
--- /dev/null
+++ b/web/eslint-rules/rules/prefer-tailwind-icon.js
@@ -0,0 +1,384 @@
+/**
+ * Default prop-to-class mappings
+ * Maps component props to Tailwind class prefixes
+ */
+const DEFAULT_PROP_MAPPINGS = {
+  size: 'size',
+  width: 'w',
+  height: 'h',
+}
+
+/**
+ * Convert PascalCase/camelCase to kebab-case
+ * @param {string} name
+ * @returns {string} The kebab-case string
+ */
+function camelToKebab(name) {
+  return name
+    .replace(/([a-z])(\d)/g, '$1-$2')
+    .replace(/(\d)([a-z])/gi, '$1-$2')
+    .replace(/([a-z])([A-Z])/g, '$1-$2')
+    .toLowerCase()
+}
+
+/**
+ * Default icon library configurations
+ *
+ * Config options:
+ * - pattern: string | RegExp - Pattern to match import source
+ * - prefix: string | ((match: RegExpMatchArray) => string) - Icon class prefix
+ * - suffix: string | ((match: RegExpMatchArray) => string) - Icon class suffix
+ * - extractSubPath: boolean - Extract subdirectory path and add to prefix
+ * - iconFilter: (name: string) => boolean - Filter which imports to process
+ * - stripPrefix: string - Prefix to remove from icon name before transform
+ * - stripSuffix: string - Suffix to remove from icon name before transform
+ */
+const DEFAULT_ICON_CONFIGS = [
+  {
+    // @/app/components/base/icons/src/public/* and vender/*
+    pattern: /^@\/app\/components\/base\/icons\/src\/(public|vender)/,
+    prefix: match => `i-custom-${match[1]}-`,
+    extractSubPath: true,
+  },
+  {
+    // @remixicon/react
+    pattern: '@remixicon/react',
+    prefix: 'i-ri-',
+    iconFilter: name => name.startsWith('Ri'),
+    stripPrefix: 'Ri',
+  },
+  {
+    // @heroicons/react/{size}/{variant}
+    pattern: /^@heroicons\/react\/(\d+)\/(solid|outline)$/,
+    prefix: 'i-heroicons-',
+    suffix: match => `-${match[1]}-${match[2]}`,
+    iconFilter: name => name.endsWith('Icon'),
+    stripSuffix: 'Icon',
+  },
+]
+
+/**
+ * Convert pixel value to Tailwind class
+ * @param {number} pixels
+ * @param {string} classPrefix - e.g., 'size', 'w', 'h'
+ * @returns {string} The Tailwind class string
+ */
+function pixelToClass(pixels, classPrefix) {
+  if (pixels % 4 === 0) {
+    const units = pixels / 4
+    return `${classPrefix}-${units}`
+  }
+  // For non-standard sizes, use Tailwind arbitrary value syntax
+  return `${classPrefix}-[${pixels}px]`
+}
+
+/**
+ * Match source against config pattern
+ * @param {string} source - The import source path
+ * @param {object} config - The icon config
+ * @returns {{ matched: boolean, match: RegExpMatchArray | null, basePath: string }} Match result
+ */
+function matchPattern(source, config) {
+  const { pattern } = config
+  if (pattern instanceof RegExp) {
+    const match = source.match(pattern)
+    if (match) {
+      return { matched: true, match, basePath: match[0] }
+    }
+    return { matched: false, match: null, basePath: '' }
+  }
+  // String pattern: exact match or prefix match
+  if (source === pattern || source.startsWith(`${pattern}/`)) {
+    return { matched: true, match: null, basePath: pattern }
+  }
+  return { matched: false, match: null, basePath: '' }
+}
+
+/**
+ * Get icon class from config
+ * @param {string} iconName
+ * @param {object} config
+ * @param {string} source - The import source path
+ * @param {RegExpMatchArray | null} match - The regex match result
+ * @returns {string} The full Tailwind icon class string
+ */
+function getIconClass(iconName, config, source, match) {
+  // Strip prefix/suffix from icon name if configured
+  let name = iconName
+  if (config.stripPrefix && name.startsWith(config.stripPrefix)) {
+    name = name.slice(config.stripPrefix.length)
+  }
+  if (config.stripSuffix && name.endsWith(config.stripSuffix)) {
+    name = name.slice(0, -config.stripSuffix.length)
+  }
+
+  // Transform name (use custom or default camelToKebab)
+  const transformed = config.transformName ? config.transformName(name, source) : camelToKebab(name)
+
+  // Get prefix (can be string or function)
+  const prefix = typeof config.prefix === 'function' ? config.prefix(match) : config.prefix
+
+  // Get suffix (can be string or function)
+  const suffix = typeof config.suffix === 'function' ? config.suffix(match) : (config.suffix || '')
+
+  // Extract subdirectory path after the pattern to include in prefix (only if extractSubPath is enabled)
+  let subPrefix = ''
+  if (config.extractSubPath) {
+    const basePath = match ? match[0] : config.pattern
+    if (source.startsWith(`${basePath}/`)) {
+      const subPath = source.slice(basePath.length + 1)
+      if (subPath) {
+        subPrefix = `${subPath.replace(/\//g, '-')}-`
+      }
+    }
+  }
+
+  return `${prefix}${subPrefix}${transformed}${suffix}`
+}
+
+/** @type {import('eslint').Rule.RuleModule} */
+export default {
+  meta: {
+    type: 'suggestion',
+    docs: {
+      description: 'Prefer Tailwind CSS icon classes over icon library components',
+    },
+    hasSuggestions: true,
+    schema: [
+      {
+        type: 'object',
+        properties: {
+          libraries: {
+            type: 'array',
+            items: {
+              type: 'object',
+              properties: {
+                pattern: { type: 'string' },
+                prefix: { type: 'string' },
+                suffix: { type: 'string' },
+                extractSubPath: { type: 'boolean' },
+              },
+              required: ['pattern', 'prefix'],
+            },
+          },
+          propMappings: {
+            type: 'object',
+            additionalProperties: { type: 'string' },
+            description: 'Maps component props to Tailwind class prefixes, e.g., { size: "size", width: "w", height: "h" }',
+          },
+        },
+        additionalProperties: false,
+      },
+    ],
+    messages: {
+      preferTailwindIcon:
+        'Prefer using Tailwind CSS icon class "{{iconClass}}" over "{{componentName}}" from "{{source}}"',
+      preferTailwindIconImport:
+        'Icon "{{importedName}}" from "{{source}}" can be replaced with Tailwind CSS class "{{iconClass}}"',
+    },
+  },
+  create(context) {
+    const options = context.options[0] || {}
+    const iconConfigs = options.libraries || DEFAULT_ICON_CONFIGS
+    const propMappings = options.propMappings || DEFAULT_PROP_MAPPINGS
+
+    // Track imports: localName -> { node, importedName, config, source, match, used }
+    const iconImports = new Map()
+
+    return {
+      ImportDeclaration(node) {
+        const source = node.source.value
+
+        // Find matching config
+        let matchedConfig = null
+        let matchResult = null
+        for (const config of iconConfigs) {
+          const result = matchPattern(source, config)
+          if (result.matched) {
+            matchedConfig = config
+            matchResult = result.match
+            break
+          }
+        }
+        if (!matchedConfig)
+          return
+
+        // Use default filter if not provided (for user-configured libraries)
+        const iconFilter = matchedConfig.iconFilter || (() => true)
+
+        for (const specifier of node.specifiers) {
+          if (specifier.type === 'ImportSpecifier') {
+            const importedName = specifier.imported.name
+            const localName = specifier.local.name
+
+            if (iconFilter(importedName)) {
+              iconImports.set(localName, {
+                node: specifier,
+                importedName,
+                localName,
+                config: matchedConfig,
+                source,
+                match: matchResult,
+                used: false,
+              })
+            }
+          }
+        }
+      },
+
+      JSXOpeningElement(node) {
+        if (node.name.type !== 'JSXIdentifier')
+          return
+
+        const componentName = node.name.name
+        const iconInfo = iconImports.get(componentName)
+
+        if (!iconInfo)
+          return
+
+        iconInfo.used = true
+
+        const iconClass = getIconClass(iconInfo.importedName, iconInfo.config, iconInfo.source, iconInfo.match)
+
+        // Find className attribute
+        const classNameAttr = node.attributes.find(
+          attr => attr.type === 'JSXAttribute' && attr.name.name === 'className',
+        )
+
+        // Process prop mappings (size, width, height, etc.)
+        const mappedClasses = []
+        const mappedPropNames = Object.keys(propMappings)
+
+        for (const propName of mappedPropNames) {
+          const attr = node.attributes.find(
+            a => a.type === 'JSXAttribute' && a.name.name === propName,
+          )
+
+          if (attr && attr.value) {
+            let pixelValue = null
+
+            if (attr.value.type === 'JSXExpressionContainer'
+              && attr.value.expression.type === 'Literal'
+              && typeof attr.value.expression.value === 'number') {
+              pixelValue = attr.value.expression.value
+            }
+            else if (attr.value.type === 'Literal'
+              && typeof attr.value.value === 'number') {
+              pixelValue = attr.value.value
+            }
+
+            if (pixelValue !== null) {
+              mappedClasses.push(pixelToClass(pixelValue, propMappings[propName]))
+            }
+          }
+        }
+
+        // Build new className
+        const sourceCode = context.sourceCode
+        let newClassName
+        const classesToAdd = [iconClass, ...mappedClasses].filter(Boolean).join(' ')
+
+        if (classNameAttr && classNameAttr.value) {
+          if (classNameAttr.value.type === 'Literal') {
+            newClassName = `${classesToAdd} ${classNameAttr.value.value}`
+          }
+          else if (classNameAttr.value.type === 'JSXExpressionContainer') {
+            const expression = sourceCode.getText(classNameAttr.value.expression)
+            newClassName = `\`${classesToAdd} \${${expression}}\``
+          }
+        }
+        else {
+          newClassName = classesToAdd
+        }
+
+        const parent = node.parent
+        const isSelfClosing = node.selfClosing
+        const excludedAttrs = ['className', ...mappedPropNames]
+
+        context.report({
+          node,
+          messageId: 'preferTailwindIcon',
+          data: {
+            iconClass,
+            componentName,
+            source: iconInfo.source,
+          },
+          suggest: [
+            {
+              messageId: 'preferTailwindIcon',
+              data: {
+                iconClass,
+                componentName,
+                source: iconInfo.source,
+              },
+              fix(fixer) {
+                const fixes = []
+
+                const classValue = newClassName.startsWith('`')
+                  ? `{${newClassName}}`
+                  : `"${newClassName}"`
+
+                const otherAttrs = node.attributes
+                  .filter(attr => !(attr.type === 'JSXAttribute' && excludedAttrs.includes(attr.name.name)))
+                  .map(attr => sourceCode.getText(attr))
+                  .join(' ')
+
+                const attrsStr = otherAttrs
+                  ? `className=${classValue} ${otherAttrs}`
+                  : `className=${classValue}`
+
+                if (isSelfClosing) {
+                  fixes.push(fixer.replaceText(parent, `<span ${attrsStr} />`))
+                }
+                else {
+                  const closingElement = parent.closingElement
+                  fixes.push(fixer.replaceText(node, `<span ${attrsStr}>`))
+                  if (closingElement) {
+                    fixes.push(fixer.replaceText(closingElement, '</span>'))
+                  }
+                }
+
+                return fixes
+              },
+            },
+          ],
+        })
+      },
+
+      'Program:exit': function () {
+        const sourceCode = context.sourceCode
+
+        // Report icons that were imported but not found in JSX
+        for (const [, iconInfo] of iconImports) {
+          if (!iconInfo.used) {
+            // Verify the import is still referenced somewhere in the file (besides the import itself)
+            try {
+              const variables = sourceCode.getDeclaredVariables(iconInfo.node)
+              const variable = variables[0]
+              // Check if there are any references besides the import declaration
+              const hasReferences = variable && variable.references.some(
+                ref => ref.identifier !== iconInfo.node.local,
+              )
+              if (!hasReferences)
+                continue
+            }
+            catch {
+              continue
+            }
+
+            const iconClass = getIconClass(iconInfo.importedName, iconInfo.config, iconInfo.source, iconInfo.match)
+            context.report({
+              node: iconInfo.node,
+              messageId: 'preferTailwindIconImport',
+              data: {
+                importedName: iconInfo.importedName,
+                source: iconInfo.source,
+                iconClass,
+              },
+            })
+          }
+        }
+      },
+    }
+  },
+}
diff --git a/web/eslint.config.mjs b/web/eslint.config.mjs
index 9ef3f8d04f..9d582828fd 100644
--- a/web/eslint.config.mjs
+++ b/web/eslint.config.mjs
@@ -4,7 +4,7 @@ import pluginQuery from '@tanstack/eslint-plugin-query'
 import sonar from 'eslint-plugin-sonarjs'
 import storybook from 'eslint-plugin-storybook'
 import tailwind from 'eslint-plugin-tailwindcss'
-import difyI18n from './eslint-rules/index.js'
+import dify from './eslint-rules/index.js'
 
 export default antfu(
   {
@@ -104,44 +104,34 @@ export default antfu(
       'tailwindcss/migration-from-tailwind-2': 'warn',
     },
   },
-  // dify i18n namespace migration
-  // {
-  //   files: ['**/*.ts', '**/*.tsx'],
-  //   ignores: ['eslint-rules/**', 'i18n/**', 'i18n-config/**'],
-  //   plugins: {
-  //     'dify-i18n': difyI18n,
-  //   },
-  //   rules: {
-  //     // 'dify-i18n/no-as-any-in-t': ['error', { mode: 'all' }],
-  //     'dify-i18n/no-as-any-in-t': 'error',
-  //     // 'dify-i18n/no-legacy-namespace-prefix': 'error',
-  //     // 'dify-i18n/require-ns-option': 'error',
-  //   },
-  // },
-  // i18n JSON validation rules
+  // Dify custom rules
+  {
+    plugins: {
+      dify,
+    },
+  },
+  {
+    files: ['**/*.tsx'],
+    rules: {
+      'dify/prefer-tailwind-icon': 'warn',
+    },
+  },
   {
     files: ['i18n/**/*.json'],
-    plugins: {
-      'dify-i18n': difyI18n,
-    },
     rules: {
       'sonarjs/max-lines': 'off',
       'max-lines': 'off',
       'jsonc/sort-keys': 'error',
 
-      'dify-i18n/valid-i18n-keys': 'error',
-      'dify-i18n/no-extra-keys': 'error',
-      'dify-i18n/consistent-placeholders': 'error',
+      'dify/valid-i18n-keys': 'error',
+      'dify/no-extra-keys': 'error',
+      'dify/consistent-placeholders': 'error',
     },
   },
-  // package.json version prefix validation
   {
     files: ['**/package.json'],
-    plugins: {
-      'dify-i18n': difyI18n,
-    },
     rules: {
-      'dify-i18n/no-version-prefix': 'error',
+      'dify/no-version-prefix': 'error',
     },
   },
 )
diff --git a/web/package.json b/web/package.json
index b8f8e3499f..47a46ed6fc 100644
--- a/web/package.json
+++ b/web/package.json
@@ -162,7 +162,13 @@
   "devDependencies": {
     "@antfu/eslint-config": "7.0.1",
     "@chromatic-com/storybook": "5.0.0",
+    "@egoist/tailwindcss-icons": "1.9.0",
     "@eslint-react/eslint-plugin": "2.7.0",
+    "@iconify-json/heroicons": "1.2.3",
+    "@iconify-json/ri": "1.2.7",
+    "@iconify/tools": "5.0.2",
+    "@iconify/types": "2.0.0",
+    "@iconify/utils": "3.1.0",
     "@mdx-js/loader": "3.1.1",
     "@mdx-js/react": "3.1.1",
     "@next/bundle-analyzer": "16.1.5",
@@ -205,7 +211,7 @@
     "@vitejs/plugin-react": "5.1.2",
     "@vitest/coverage-v8": "4.0.17",
     "autoprefixer": "10.4.21",
-    "code-inspector-plugin": "1.3.6",
+    "code-inspector-plugin": "1.4.1",
     "cross-env": "10.1.0",
     "esbuild-wasm": "0.27.2",
     "eslint": "9.39.2",
diff --git a/web/pnpm-lock.yaml b/web/pnpm-lock.yaml
index e79dee6936..da5ec2b627 100644
--- a/web/pnpm-lock.yaml
+++ b/web/pnpm-lock.yaml
@@ -372,9 +372,27 @@ importers:
       '@chromatic-com/storybook':
         specifier: 5.0.0
         version: 5.0.0(storybook@10.2.0(@testing-library/dom@10.4.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4))
+      '@egoist/tailwindcss-icons':
+        specifier: 1.9.0
+        version: 1.9.0(tailwindcss@3.4.18(tsx@4.21.0)(yaml@2.8.2))
       '@eslint-react/eslint-plugin':
         specifier: 2.7.0
         version: 2.7.0(eslint@9.39.2(jiti@1.21.7))(typescript@5.9.3)
+      '@iconify-json/heroicons':
+        specifier: 1.2.3
+        version: 1.2.3
+      '@iconify-json/ri':
+        specifier: 1.2.7
+        version: 1.2.7
+      '@iconify/tools':
+        specifier: 5.0.2
+        version: 5.0.2
+      '@iconify/types':
+        specifier: 2.0.0
+        version: 2.0.0
+      '@iconify/utils':
+        specifier: 3.1.0
+        version: 3.1.0
       '@mdx-js/loader':
         specifier: 3.1.1
         version: 3.1.1(webpack@5.104.1(esbuild@0.27.2)(uglify-js@3.19.3))
@@ -502,8 +520,8 @@ importers:
         specifier: 10.4.21
         version: 10.4.21(postcss@8.5.6)
       code-inspector-plugin:
-        specifier: 1.3.6
-        version: 1.3.6
+        specifier: 1.4.1
+        version: 1.4.1
       cross-env:
         specifier: 10.1.0
         version: 10.1.0
@@ -730,6 +748,9 @@ packages:
   '@antfu/install-pkg@1.1.0':
     resolution: {integrity: sha512-MGQsmw10ZyI+EJo45CdSER4zEb+p31LpDAFp2Z3gkSd1yqVZGi0Ebx++YTEMonJy4oChEMLsxZ64j8FH6sSqtQ==}
 
+  '@antfu/utils@8.1.1':
+    resolution: {integrity: sha512-Mex9nXf9vR6AhcXmMrlz/HVgYYZpVGJ6YlPgwl7UnaFpnshXs6EK/oa5Gpf3CzENMjkvEx2tQtntGnb7UtSTOQ==}
+
   '@asamuzakjp/css-color@4.1.1':
     resolution: {integrity: sha512-B0Hv6G3gWGMn0xKJ0txEi/jM5iFpT3MfDxmhZFb4W047GvytCf1DHQ1D69W3zHI4yWe2aTZAA0JnbMZ7Xc8DuQ==}
 
@@ -866,23 +887,23 @@ packages:
   '@clack/prompts@0.8.2':
     resolution: {integrity: sha512-6b9Ab2UiZwJYA9iMyboYyW9yJvAO9V753ZhS+DHKEjZRKAxPPOb7MXXu84lsPFG+vZt6FRFniZ8rXi+zCIw4yQ==}
 
-  '@code-inspector/core@1.3.6':
-    resolution: {integrity: sha512-bSxf/PWDPY6rv9EFf0mJvTnLnz3927PPrpX6BmQcRKQab+Ez95yRqrVZY8IcBUpaqA/k3etA5rZ1qkN0V4ERtw==}
+  '@code-inspector/core@1.4.1':
+    resolution: {integrity: sha512-k5iLYvrBBPBPODcwuzgEcAZnXU4XTnEO1jOmNQBHCehN6nrMO1m5Efjz35KPkSX+8T4IWvXvLoXR5XPfhDlxug==}
 
-  '@code-inspector/esbuild@1.3.6':
-    resolution: {integrity: sha512-s35dseBXI2yqfX6ZK29Ix941jaE/4KPlZZeMk6B5vDahj75FDUfVxQ7ORy4cX2hyz8CmlOycsY/au5mIvFpAFg==}
+  '@code-inspector/esbuild@1.4.1':
+    resolution: {integrity: sha512-0tf73j0wgsu1Rl5CNe5o5L/GB/lGvQQVjuLTbAB/but+Bw//nHRnlrA29lBzNM6cyBDZzwofa71Q+TH8Fu4aZQ==}
 
-  '@code-inspector/mako@1.3.6':
-    resolution: {integrity: sha512-FJvuTElOi3TUCWTIaYTFYk2iTUD6MlO51SC8SYfwmelhuvnOvTMa2TkylInX16OGb4f7sGNLRj2r+7NNx/gqpw==}
+  '@code-inspector/mako@1.4.1':
+    resolution: {integrity: sha512-inpiJbc8J+qaEYcMgzyAFusuyryZ9i0wUQhLJRbWl1WrUdWTE8xNHDjhPeTVaMav42NTGDnVKJhhKD6tNaxyFA==}
 
-  '@code-inspector/turbopack@1.3.6':
-    resolution: {integrity: sha512-pfXgvZCn4/brpTvqy8E0HTe6V/ksVKEPQo697Nt5k22kBnlEM61UT3rI2Art+fDDEMPQTxVOFpdbwCKSLwMnmQ==}
+  '@code-inspector/turbopack@1.4.1':
+    resolution: {integrity: sha512-xVefk907E39U/oywR9YiEqJn1VlNBHIcIsYkjNnFp0U3qBb3A40VqivlCqkWaP9xHAwEH8/UT3Sfh3aoUPC9/Q==}
 
-  '@code-inspector/vite@1.3.6':
-    resolution: {integrity: sha512-vXYvzGc0S1NR4p3BeD1Xx2170OnyecZD0GtebLlTiHw/cetzlrBHVpbkIwIEzzzpTYYshwwDt8ZbuvdjmqhHgw==}
+  '@code-inspector/vite@1.4.1':
+    resolution: {integrity: sha512-ptbGkmtw5mvuFse6Kjmd6bCgm+isHrBq+HumWlAMBH//Qb2frHkEV7kWjO6/AkBXfm/ccNJy+jNwWq0632ChDg==}
 
-  '@code-inspector/webpack@1.3.6':
-    resolution: {integrity: sha512-bi/+vsym9d6NXQQ++Phk74VLMiVoGKjgPHr445j/D43URG8AN8yYa+gRDBEDcZx4B128dihrVMxEO8+OgWGjTw==}
+  '@code-inspector/webpack@1.4.1':
+    resolution: {integrity: sha512-UkqC5MsWRVJT2y10GM7tIZdQmFuGAlArJSfq2hq727eXMDV3otY5d1UCQopYvUIEC90QQNHJDeK4e+UQipF6AQ==}
 
   '@csstools/color-helpers@5.1.0':
     resolution: {integrity: sha512-S11EXWJyy0Mz5SYvRmY8nJYTFFd1LCNV+7cXyAgQtOOuzb4EsgfqDufL+9esx72/eLhsRdGZwaldu/h+E4t4BA==}
@@ -915,10 +936,18 @@ packages:
     resolution: {integrity: sha512-Vd/9EVDiu6PPJt9yAh6roZP6El1xHrdvIVGjyBsHR0RYwNHgL7FJPyIIW4fANJNG6FtyZfvlRPpFI4ZM/lubvw==}
     engines: {node: '>=18'}
 
+  '@cyberalien/svg-utils@1.0.11':
+    resolution: {integrity: sha512-qEE9mnyI+avfGT3emKuRs3ucYkITeaV0Xi7VlYN41f+uGnZBecQP3jwz/AF437H9J4Q7qPClHKm4NiTYpNE6hA==}
+
   '@discoveryjs/json-ext@0.5.7':
     resolution: {integrity: sha512-dBVuXR082gk3jsFp7Rd/JI4kytwGHecnCoTtXFb7DB6CNHp4rg5k1bhg0nWdLGLnOV71lmDzGQaLMy8iPLY0pw==}
     engines: {node: '>=10.0.0'}
 
+  '@egoist/tailwindcss-icons@1.9.0':
+    resolution: {integrity: sha512-xWA9cUy6hzlK7Y6TaoRIcwmilSXiTJ8rbXcEdf9uht7yzDgw/yIgF4rThIQMrpD2Y2v4od51+r2y6Z7GStanDQ==}
+    peerDependencies:
+      tailwindcss: '*'
+
   '@emnapi/core@1.8.1':
     resolution: {integrity: sha512-AvT9QFpxK0Zd8J0jopedNm+w/2fIzvtPKPjqyw9jwvBaReTTqPBk9Hixaz7KbjimP+QNz605/XnjFcDAL2pqBg==}
 
@@ -1292,9 +1321,21 @@ packages:
     resolution: {integrity: sha512-bV0Tgo9K4hfPCek+aMAn81RppFKv2ySDQeMoSZuvTASywNTnVJCArCZE2FWqpvIatKu7VMRLWlR1EazvVhDyhQ==}
     engines: {node: '>=18.18'}
 
+  '@iconify-json/heroicons@1.2.3':
+    resolution: {integrity: sha512-n+vmCEgTesRsOpp5AB5ILB6srsgsYK+bieoQBNlafvoEhjVXLq8nIGN4B0v/s4DUfa0dOrjwE/cKJgIKdJXOEg==}
+
+  '@iconify-json/ri@1.2.7':
+    resolution: {integrity: sha512-j/Fkb8GlWY5y/zLj1BGxWRtDzuJFrI7562zLw+iQVEykieBgew43+r8qAvtSajvb75MfUIHjsNOYQPRD8FfLfw==}
+
+  '@iconify/tools@5.0.2':
+    resolution: {integrity: sha512-esoFiH0LYpiqqVAO+RTenh6qqGKf0V8T0T6IG7dFLCw26cjcYGG34UMHjkbuq+MMl23U39FtkzhWZsCDDtOhew==}
+
   '@iconify/types@2.0.0':
     resolution: {integrity: sha512-+wluvCrRhXrhyOmRDJ3q8mux9JkKy5SJ/v8ol2tu4FVjyYvtEzkc/3pK15ET6RKg4b4w4BmTk1+gsCUhf21Ykg==}
 
+  '@iconify/utils@2.3.0':
+    resolution: {integrity: sha512-GmQ78prtwYW6EtzXRU1rY+KwOKfz32PD7iJh6Iyqw68GiKuoZ2A6pRtzWONz5VQJbp50mEjXh/7NkumtrAgRKA==}
+
   '@iconify/utils@3.1.0':
     resolution: {integrity: sha512-Zlzem1ZXhI1iHeeERabLNzBHdOa4VhQbqAcOQaMKuTuyZCpwKbC2R4Dd0Zo3g9EAc+Y4fiarO8HIHRAth7+skw==}
 
@@ -3879,8 +3920,8 @@ packages:
       react: ^18 || ^19 || ^19.0.0-rc
       react-dom: ^18 || ^19 || ^19.0.0-rc
 
-  code-inspector-plugin@1.3.6:
-    resolution: {integrity: sha512-ddTg8embDqLZxKEdSNOm+/0YnVVgWKr10+Bu2qFqQDObj/3twGh0Z23TIz+5/URxfRhTPbp2sUSpWlw78piJbQ==}
+  code-inspector-plugin@1.4.1:
+    resolution: {integrity: sha512-DuOEoOWtkz3Mq6JTogJjSfXkVnXuGy6Gjfi+eBYtgRFlZmQ5sw1/LacsPnTK89O4Oz6gZj+zjxpwNfpWg3htpA==}
 
   collapse-white-space@2.1.0:
     resolution: {integrity: sha512-loKTxY1zCOuG4j9f6EPnuyyYkf58RnhhWTvRoZEokgB+WbdXehfjFviyOVYkqzEWz1Q5kRiZdBYS5SwxbQYwzw==}
@@ -3908,6 +3949,10 @@ packages:
   comma-separated-tokens@2.0.3:
     resolution: {integrity: sha512-Fu4hJdvzeylCfQPp9SGWidpzrMs7tTrlu6Vb8XGaRGck8QSNZJJp538Wrb60Lax4fPwR64ViY468OIUTbRlGZg==}
 
+  commander@11.1.0:
+    resolution: {integrity: sha512-yPVavfyCcRhmorC7rWlkHn15b4wDVgVmBA7kV4QVBsF7kv/9TKJAbAXVTxvTnwP8HHKjRCJDClKbciiYS7p0DQ==}
+    engines: {node: '>=16'}
+
   commander@13.1.0:
     resolution: {integrity: sha512-/rFeCpNJQbhSZjGVwO9RFV3xPqbnERS8MmIQzCtD/zl6gpJuV/bMLuN92oG3F7d8oDEHHRrujSXNUr8fpjntKw==}
     engines: {node: '>=18'}
@@ -3979,10 +4024,21 @@ packages:
   css-mediaquery@0.1.2:
     resolution: {integrity: sha512-COtn4EROW5dBGlE/4PiKnh6rZpAPxDeFLaEEwt4i10jpDMFt2EhQGS79QmmrO+iKCHv0PU/HrOWEhijFd1x99Q==}
 
+  css-select@5.2.2:
+    resolution: {integrity: sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==}
+
+  css-tree@2.2.1:
+    resolution: {integrity: sha512-OA0mILzGc1kCOCSJerOeqDxDQ4HOh+G8NbOJFOTgOCzpw7fCBubk0fEyxp8AgOL/jvLgYA/uV0cMbe43ElF1JA==}
+    engines: {node: ^10 || ^12.20.0 || ^14.13.0 || >=15.0.0, npm: '>=7.0.0'}
+
   css-tree@3.1.0:
     resolution: {integrity: sha512-0eW44TGN5SQXU1mWSkKwFstI/22X2bG1nYzZTYMAWjylYURhse752YgbE4Cx46AC+bAvI+/dYTPRk1LqSUnu6w==}
     engines: {node: ^10 || ^12.20.0 || ^14.13.0 || >=15.0.0}
 
+  css-what@6.2.2:
+    resolution: {integrity: sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==}
+    engines: {node: '>= 6'}
+
   css.escape@1.5.1:
     resolution: {integrity: sha512-YUifsXXuknHlUsmlgyY0PKzgPOr7/FjCePfHNt0jxm83wHZi44VDMQ7/fGNkjY3/jV1MC+1CmZbaHzugyeRtpg==}
 
@@ -3991,6 +4047,10 @@ packages:
     engines: {node: '>=4'}
     hasBin: true
 
+  csso@5.0.5:
+    resolution: {integrity: sha512-0LrrStPOdJj+SPCCrGhzryycLjwcgUSHBtxNA8aIDxf0GLsRh1cKYhB00Gd1lDOS4yGH69+SNn13+TWbVHETFQ==}
+    engines: {node: ^10 || ^12.20.0 || ^14.13.0 || >=15.0.0, npm: '>=7.0.0'}
+
   cssstyle@5.3.7:
     resolution: {integrity: sha512-7D2EPVltRrsTkhpQmksIu+LxeWAIEk6wRDMJ1qljlv+CKHJM+cJLlfhWIzNA44eAsHXSNe3+vO6DW1yCYx8SuQ==}
     engines: {node: '>=20'}
@@ -4249,12 +4309,25 @@ packages:
   dom-accessibility-api@0.6.3:
     resolution: {integrity: sha512-7ZgogeTnjuHbo+ct10G9Ffp0mif17idi0IyWNVA/wcwcm7NPOD/WEHVP3n7n3MhXqxoIYm8d6MuZohYWIZ4T3w==}
 
+  dom-serializer@2.0.0:
+    resolution: {integrity: sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==}
+
+  domelementtype@2.3.0:
+    resolution: {integrity: sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==}
+
+  domhandler@5.0.3:
+    resolution: {integrity: sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==}
+    engines: {node: '>= 4'}
+
   dompurify@3.2.7:
     resolution: {integrity: sha512-WhL/YuveyGXJaerVlMYGWhvQswa7myDG17P7Vu65EWC05o8vfeNbvNf4d/BOvH99+ZW+LlQsc1GDKMa1vNK6dw==}
 
   dompurify@3.3.0:
     resolution: {integrity: sha512-r+f6MYR1gGN1eJv0TVQbhA7if/U7P87cdPl3HN5rikqaBSBxLiCb/b9O+2eG0cxz0ghyU+mU1QkbsOwERMYlWQ==}
 
+  domutils@3.2.2:
+    resolution: {integrity: sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==}
+
   dotenv@16.6.1:
     resolution: {integrity: sha512-uBq4egWHTcTt33a72vpSG0z3HnPuIl6NqYcTrKEg2azoEyl2hpW0zqlxysq2pK9HlDIHyHyakeYaYnSAwd8bow==}
     engines: {node: '>=12'}
@@ -4312,6 +4385,10 @@ packages:
     resolution: {integrity: sha512-LgQMM4WXU3QI+SYgEc2liRgznaD5ojbmY3sb8LxyguVkIg5FxdpTkvk72te2R38/TGKxH634oLxXRGY6d7AP+Q==}
     engines: {node: '>=10.13.0'}
 
+  entities@4.5.0:
+    resolution: {integrity: sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==}
+    engines: {node: '>=0.12'}
+
   entities@6.0.1:
     resolution: {integrity: sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==}
     engines: {node: '>=0.12'}
@@ -4745,6 +4822,9 @@ packages:
   fflate@0.4.8:
     resolution: {integrity: sha512-FJqqoDBR00Mdj9ppamLa/Y7vxm+PRmNWA67N846RvsoYVMKB4q3y/de5PA7gUmRMYK/8CMz2GDZQmCRN1wBcWA==}
 
+  fflate@0.8.2:
+    resolution: {integrity: sha512-cPJU47OaAoCbg0pBvzsgpTPhmhqI5eJjh/JIu8tPj5q+T7iLvW/JAYUqmE7KOB4R1ZyEhzBaIQpQpardBF5z8A==}
+
   file-entry-cache@8.0.0:
     resolution: {integrity: sha512-XXTUwCvisa5oacNGRP9SfNtYBNAMi+RPwBFmblZEF7N7swHYQS6/Zfk7SRwx4D5j3CH211YNRco1DEMNVfZCnQ==}
     engines: {node: '>=16.0.0'}
@@ -5546,6 +5626,9 @@ packages:
   mdast-util-to-string@4.0.0:
     resolution: {integrity: sha512-0H44vDimn51F0YwvxSJSm0eCDOJTRlmN0R1yBh4HLj9wiV1Dn0QoXGbvFAWj2hSItVTlCmBF1hqKlIyUBVFLPg==}
 
+  mdn-data@2.0.28:
+    resolution: {integrity: sha512-aylIc7Z9y4yzHYAJNuESG3hfhC+0Ibp/MAMiaOZgNv4pmEdFyfZhhhny4MNiAfWdBQ1RQ2mfDWmM1x8SvGyp8g==}
+
   mdn-data@2.12.2:
     resolution: {integrity: sha512-IEn+pegP1aManZuckezWCO+XZQDplx1366JoVhTpMpBB1sPey/SbveZQUosKiKiGYjg1wH4pMlNgXbCiYgihQA==}
 
@@ -5733,6 +5816,10 @@ packages:
   mlly@1.8.0:
     resolution: {integrity: sha512-l8D9ODSRWLe2KHJSifWGwBqpTZXIXTeo8mlKjY+E2HAakaTeNpqAyBZ8GSqLzHgw4XmHmC8whvpjJNMbFZN7/g==}
 
+  modern-tar@0.7.3:
+    resolution: {integrity: sha512-4W79zekKGyYU4JXVmB78DOscMFaJth2gGhgfTl2alWE4rNe3nf4N2pqenQ0rEtIewrnD79M687Ouba3YGTLOvg==}
+    engines: {node: '>=18.0.0'}
+
   module-alias@2.2.3:
     resolution: {integrity: sha512-23g5BFj4zdQL/b6tor7Ji+QY4pEfNH784BMslY9Qb0UnJWRAt+lQGLYmRaM0KDBwIG23ffEBELhZDP2rhi9f/Q==}
 
@@ -6533,6 +6620,10 @@ packages:
     engines: {node: '>=14.0.0'}
     hasBin: true
 
+  sax@1.4.4:
+    resolution: {integrity: sha512-1n3r/tGXO6b6VXMdFT54SHzT9ytu9yr7TaELowdYpMqY/Ao7EnlQGmAQ1+RatX7Tkkdm6hONI2owqNx2aZj5Sw==}
+    engines: {node: '>=11.0.0'}
+
   saxes@6.0.0:
     resolution: {integrity: sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==}
     engines: {node: '>=v12.22.7'}
@@ -6802,6 +6893,11 @@ packages:
     resolution: {integrity: sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==}
     engines: {node: '>= 0.4'}
 
+  svgo@4.0.0:
+    resolution: {integrity: sha512-VvrHQ+9uniE+Mvx3+C9IEe/lWasXCU0nXMY2kZeLrHNICuRiC8uMPyM14UEaMOFA5mhyQqEkB02VoQ16n3DLaw==}
+    engines: {node: '>=16'}
+    hasBin: true
+
   symbol-tree@3.2.4:
     resolution: {integrity: sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==}
 
@@ -7712,6 +7808,8 @@ snapshots:
       package-manager-detector: 1.6.0
       tinyexec: 1.0.2
 
+  '@antfu/utils@8.1.1': {}
+
   '@asamuzakjp/css-color@4.1.1':
     dependencies:
       '@csstools/css-calc': 2.1.4(@csstools/css-parser-algorithms@3.0.5(@csstools/css-tokenizer@3.0.4))(@csstools/css-tokenizer@3.0.4)
@@ -7899,7 +7997,7 @@ snapshots:
       picocolors: 1.1.1
       sisteransi: 1.0.5
 
-  '@code-inspector/core@1.3.6':
+  '@code-inspector/core@1.4.1':
     dependencies:
       '@vue/compiler-dom': 3.5.27
       chalk: 4.1.2
@@ -7909,35 +8007,35 @@ snapshots:
     transitivePeerDependencies:
       - supports-color
 
-  '@code-inspector/esbuild@1.3.6':
+  '@code-inspector/esbuild@1.4.1':
     dependencies:
-      '@code-inspector/core': 1.3.6
+      '@code-inspector/core': 1.4.1
     transitivePeerDependencies:
       - supports-color
 
-  '@code-inspector/mako@1.3.6':
+  '@code-inspector/mako@1.4.1':
     dependencies:
-      '@code-inspector/core': 1.3.6
+      '@code-inspector/core': 1.4.1
     transitivePeerDependencies:
       - supports-color
 
-  '@code-inspector/turbopack@1.3.6':
+  '@code-inspector/turbopack@1.4.1':
     dependencies:
-      '@code-inspector/core': 1.3.6
-      '@code-inspector/webpack': 1.3.6
+      '@code-inspector/core': 1.4.1
+      '@code-inspector/webpack': 1.4.1
     transitivePeerDependencies:
       - supports-color
 
-  '@code-inspector/vite@1.3.6':
+  '@code-inspector/vite@1.4.1':
     dependencies:
-      '@code-inspector/core': 1.3.6
+      '@code-inspector/core': 1.4.1
       chalk: 4.1.1
     transitivePeerDependencies:
       - supports-color
 
-  '@code-inspector/webpack@1.3.6':
+  '@code-inspector/webpack@1.4.1':
     dependencies:
-      '@code-inspector/core': 1.3.6
+      '@code-inspector/core': 1.4.1
     transitivePeerDependencies:
       - supports-color
 
@@ -7963,8 +8061,19 @@ snapshots:
 
   '@csstools/css-tokenizer@3.0.4': {}
 
+  '@cyberalien/svg-utils@1.0.11':
+    dependencies:
+      '@iconify/types': 2.0.0
+
   '@discoveryjs/json-ext@0.5.7': {}
 
+  '@egoist/tailwindcss-icons@1.9.0(tailwindcss@3.4.18(tsx@4.21.0)(yaml@2.8.2))':
+    dependencies:
+      '@iconify/utils': 2.3.0
+      tailwindcss: 3.4.18(tsx@4.21.0)(yaml@2.8.2)
+    transitivePeerDependencies:
+      - supports-color
+
   '@emnapi/core@1.8.1':
     dependencies:
       '@emnapi/wasi-threads': 1.1.0
@@ -8328,8 +8437,39 @@ snapshots:
 
   '@humanwhocodes/retry@0.4.3': {}
 
+  '@iconify-json/heroicons@1.2.3':
+    dependencies:
+      '@iconify/types': 2.0.0
+
+  '@iconify-json/ri@1.2.7':
+    dependencies:
+      '@iconify/types': 2.0.0
+
+  '@iconify/tools@5.0.2':
+    dependencies:
+      '@cyberalien/svg-utils': 1.0.11
+      '@iconify/types': 2.0.0
+      '@iconify/utils': 3.1.0
+      fflate: 0.8.2
+      modern-tar: 0.7.3
+      pathe: 2.0.3
+      svgo: 4.0.0
+
   '@iconify/types@2.0.0': {}
 
+  '@iconify/utils@2.3.0':
+    dependencies:
+      '@antfu/install-pkg': 1.1.0
+      '@antfu/utils': 8.1.1
+      '@iconify/types': 2.0.0
+      debug: 4.4.3
+      globals: 15.15.0
+      kolorist: 1.8.0
+      local-pkg: 1.1.2
+      mlly: 1.8.0
+    transitivePeerDependencies:
+      - supports-color
+
   '@iconify/utils@3.1.0':
     dependencies:
       '@antfu/install-pkg': 1.1.0
@@ -11143,14 +11283,14 @@ snapshots:
       - '@types/react'
       - '@types/react-dom'
 
-  code-inspector-plugin@1.3.6:
+  code-inspector-plugin@1.4.1:
     dependencies:
-      '@code-inspector/core': 1.3.6
-      '@code-inspector/esbuild': 1.3.6
-      '@code-inspector/mako': 1.3.6
-      '@code-inspector/turbopack': 1.3.6
-      '@code-inspector/vite': 1.3.6
-      '@code-inspector/webpack': 1.3.6
+      '@code-inspector/core': 1.4.1
+      '@code-inspector/esbuild': 1.4.1
+      '@code-inspector/mako': 1.4.1
+      '@code-inspector/turbopack': 1.4.1
+      '@code-inspector/vite': 1.4.1
+      '@code-inspector/webpack': 1.4.1
       chalk: 4.1.1
     transitivePeerDependencies:
       - supports-color
@@ -11179,6 +11319,8 @@ snapshots:
 
   comma-separated-tokens@2.0.3: {}
 
+  commander@11.1.0: {}
+
   commander@13.1.0: {}
 
   commander@2.20.3:
@@ -11237,15 +11379,34 @@ snapshots:
 
   css-mediaquery@0.1.2: {}
 
+  css-select@5.2.2:
+    dependencies:
+      boolbase: 1.0.0
+      css-what: 6.2.2
+      domhandler: 5.0.3
+      domutils: 3.2.2
+      nth-check: 2.1.1
+
+  css-tree@2.2.1:
+    dependencies:
+      mdn-data: 2.0.28
+      source-map-js: 1.2.1
+
   css-tree@3.1.0:
     dependencies:
       mdn-data: 2.12.2
       source-map-js: 1.2.1
 
+  css-what@6.2.2: {}
+
   css.escape@1.5.1: {}
 
   cssesc@3.0.0: {}
 
+  csso@5.0.5:
+    dependencies:
+      css-tree: 2.2.1
+
   cssstyle@5.3.7:
     dependencies:
       '@asamuzakjp/css-color': 4.1.1
@@ -11511,6 +11672,18 @@ snapshots:
 
   dom-accessibility-api@0.6.3: {}
 
+  dom-serializer@2.0.0:
+    dependencies:
+      domelementtype: 2.3.0
+      domhandler: 5.0.3
+      entities: 4.5.0
+
+  domelementtype@2.3.0: {}
+
+  domhandler@5.0.3:
+    dependencies:
+      domelementtype: 2.3.0
+
   dompurify@3.2.7:
     optionalDependencies:
       '@types/trusted-types': 2.0.7
@@ -11519,6 +11692,12 @@ snapshots:
     optionalDependencies:
       '@types/trusted-types': 2.0.7
 
+  domutils@3.2.2:
+    dependencies:
+      dom-serializer: 2.0.0
+      domelementtype: 2.3.0
+      domhandler: 5.0.3
+
   dotenv@16.6.1: {}
 
   duplexer@0.1.2: {}
@@ -11571,6 +11750,8 @@ snapshots:
       graceful-fs: 4.2.11
       tapable: 2.3.0
 
+  entities@4.5.0: {}
+
   entities@6.0.1: {}
 
   entities@7.0.1: {}
@@ -12220,6 +12401,8 @@ snapshots:
 
   fflate@0.4.8: {}
 
+  fflate@0.8.2: {}
+
   file-entry-cache@8.0.0:
     dependencies:
       flat-cache: 4.0.1
@@ -13171,6 +13354,8 @@ snapshots:
     dependencies:
       '@types/mdast': 4.0.4
 
+  mdn-data@2.0.28: {}
+
   mdn-data@2.12.2: {}
 
   memoize-one@5.2.1: {}
@@ -13535,6 +13720,8 @@ snapshots:
       pkg-types: 1.3.1
       ufo: 1.6.3
 
+  modern-tar@0.7.3: {}
+
   module-alias@2.2.3: {}
 
   monaco-editor@0.55.1:
@@ -14464,6 +14651,8 @@ snapshots:
     optionalDependencies:
       '@parcel/watcher': 2.5.6
 
+  sax@1.4.4: {}
+
   saxes@6.0.0:
     dependencies:
       xmlchars: 2.2.0
@@ -14772,6 +14961,16 @@ snapshots:
 
   supports-preserve-symlinks-flag@1.0.0: {}
 
+  svgo@4.0.0:
+    dependencies:
+      commander: 11.1.0
+      css-select: 5.2.2
+      css-tree: 3.1.0
+      css-what: 6.2.2
+      csso: 5.0.5
+      picocolors: 1.1.1
+      sax: 1.4.4
+
   symbol-tree@3.2.4: {}
 
   synckit@0.11.12:
diff --git a/web/tailwind-common-config.ts b/web/tailwind-common-config.ts
index 05aabfc2f1..2c3128d507 100644
--- a/web/tailwind-common-config.ts
+++ b/web/tailwind-common-config.ts
@@ -1,8 +1,131 @@
+import type { IconifyJSON } from '@iconify/types'
+import fs from 'node:fs'
+import path from 'node:path'
+import { fileURLToPath } from 'node:url'
+import { getIconCollections, iconsPlugin } from '@egoist/tailwindcss-icons'
+import { cleanupSVG, deOptimisePaths, importDirectorySync, isEmptyColor, parseColors, runSVGO } from '@iconify/tools'
+import { compareColors, stringToColor } from '@iconify/utils/lib/colors'
 import tailwindTypography from '@tailwindcss/typography'
 // @ts-expect-error workaround for turbopack issue
 import tailwindThemeVarDefine from './themes/tailwind-theme-var-define.ts'
 import typography from './typography.js'
 
+const _dirname = typeof __dirname !== 'undefined'
+  ? __dirname
+  : path.dirname(fileURLToPath(import.meta.url))
+
+// https://iconify.design/docs/articles/cleaning-up-icons/
+function getIconSetFromDir(dir: string, prefix: string) {
+  // Import icons
+  const iconSet = importDirectorySync(dir, {
+    prefix,
+    ignoreImportErrors: 'warn',
+  })
+
+  // Validate, clean up, fix palette and optimise
+  iconSet.forEachSync((name, type) => {
+    if (type !== 'icon')
+      return
+
+    const svg = iconSet.toSVG(name)
+    if (!svg) {
+      // Invalid icon
+      iconSet.remove(name)
+      return
+    }
+
+    // Clean up and optimise icons
+    try {
+      // Clean up icon code
+      cleanupSVG(svg)
+
+      // Change color to `currentColor`
+      // Skip this step if icon has hardcoded palette
+      const blackColor = stringToColor('black')!
+      const whiteColor = stringToColor('white')!
+      parseColors(svg, {
+        defaultColor: 'currentColor',
+        callback: (attr, colorStr, color) => {
+          if (!color) {
+            // Color cannot be parsed!
+            throw new Error(`Invalid color: "${colorStr}" in attribute ${attr}`)
+          }
+
+          if (isEmptyColor(color)) {
+            // Color is empty: 'none' or 'transparent'. Return as is
+            return color
+          }
+
+          // Change black to 'currentColor'
+          if (compareColors(color, blackColor))
+            return 'currentColor'
+
+          // Remove shapes with white color
+          if (compareColors(color, whiteColor))
+            return 'remove'
+
+          // Icon is not monotone
+          return color
+        },
+      })
+
+      // Optimise
+      runSVGO(svg)
+
+      // Update paths for compatibility with old software
+      deOptimisePaths(svg)
+    }
+    catch (err) {
+      // Invalid icon
+      console.error(`Error parsing ${name}:`, err)
+      iconSet.remove(name)
+      return
+    }
+
+    // Update icon
+    iconSet.fromSVG(name, svg)
+  })
+
+  // Export
+  return iconSet.export()
+}
+
+function getCollectionsFromSubDirs(baseDir: string, prefixBase: string): Record<string, IconifyJSON> {
+  const collections: Record<string, IconifyJSON> = {}
+
+  function processDir(dir: string, prefix: string): void {
+    const entries = fs.readdirSync(dir, { withFileTypes: true })
+    const subDirs = entries.filter(e => e.isDirectory())
+    const svgFiles = entries.filter(e => e.isFile() && e.name.endsWith('.svg'))
+
+    // Process SVG files in current directory if any
+    if (svgFiles.length > 0) {
+      collections[prefix] = getIconSetFromDir(dir, prefix)
+    }
+
+    // Recurse into subdirectories if any
+    if (subDirs.length > 0) {
+      for (const subDir of subDirs) {
+        const subDirPath = path.join(dir, subDir.name)
+        const subPrefix = `${prefix}-${subDir.name}`
+        processDir(subDirPath, subPrefix)
+      }
+    }
+  }
+
+  // Read top-level subdirectories and process each
+  const entries = fs.readdirSync(baseDir, { withFileTypes: true })
+  for (const entry of entries) {
+    if (entry.isDirectory()) {
+      const subDirPath = path.join(baseDir, entry.name)
+      const prefix = `${prefixBase}-${entry.name}`
+      processDir(subDirPath, prefix)
+    }
+  }
+
+  return collections
+}
+
 const config = {
   theme: {
     typography,
@@ -146,7 +269,21 @@ const config = {
       },
     },
   },
-  plugins: [tailwindTypography],
+  plugins: [
+    tailwindTypography,
+    iconsPlugin({
+      collections: {
+        ...getCollectionsFromSubDirs(path.resolve(_dirname, 'app/components/base/icons/assets/public'), 'custom-public'),
+        ...getCollectionsFromSubDirs(path.resolve(_dirname, 'app/components/base/icons/assets/vender'), 'custom-vender'),
+        ...getIconCollections(['heroicons', 'ri']),
+      },
+      extraProperties: {
+        width: '1rem',
+        height: '1rem',
+        display: 'block',
+      },
+    }),
+  ],
   // https://github.com/tailwindlabs/tailwindcss/discussions/5969
   corePlugins: {
     preflight: false,