Merge remote-tracking branch 'origin/main' into feat/ee-admin-dsl

2026-05-26 13:00:51 -04:00 · 2026-03-25 12:57:33 +08:00
parent e57ee4bcbe 5f82ccc750
commit cb8a3ca196
1564 changed files with 83983 additions and 24857 deletions
--- a/.agents/skills/frontend-testing/SKILL.md
+++ b/.agents/skills/frontend-testing/SKILL.md
@@ -63,7 +63,8 @@ pnpm analyze-component <path> --review

 ### File Naming

- Test files: `ComponentName.spec.tsx` (same directory as component)
+- Test files: `ComponentName.spec.tsx` inside a same-level `__tests__/` directory
+- Placement rule: Component, hook, and utility tests must live in a sibling `__tests__/` folder at the same level as the source under test. For example, `foo/index.tsx` maps to `foo/__tests__/index.spec.tsx`, and `foo/bar.ts` maps to `foo/__tests__/bar.spec.ts`.
 - Integration tests: `web/__tests__/` directory

 ## Test Structure Template
--- a/.agents/skills/frontend-testing/assets/component-test.template.tsx
+++ b/.agents/skills/frontend-testing/assets/component-test.template.tsx
@@ -41,7 +41,7 @@ import userEvent from '@testing-library/user-event'
 // Router (if component uses useRouter, usePathname, useSearchParams)
 // WHY: Isolates tests from Next.js routing, enables testing navigation behavior
 // const mockPush = vi.fn()
-// vi.mock('next/navigation', () => ({
+// vi.mock('@/next/navigation', () => ({
 //   useRouter: () => ({ push: mockPush }),
 //   usePathname: () => '/test-path',
 // }))
--- a/.gemini/config.yaml
+++ b/.gemini/config.yaml
@@ -0,0 +1,13 @@
+have_fun: false
+memory_config:
+  disabled: false
+code_review:
+  disable: true
+  comment_severity_threshold: MEDIUM
+  max_review_comments: -1
+  pull_request_opened:
+    help: false
+    summary: false
+    code_review: false
+    include_drafts: false
+ignore_patterns: []
--- a/.github/actions/setup-web/action.yml
+++ b/.github/actions/setup-web/action.yml
@@ -4,10 +4,9 @@ runs:
  using: composite
  steps:
    - name: Setup Vite+
-      uses: voidzero-dev/setup-vp@b5d848f5a62488f3d3d920f8aa6ac318a60c5f07 # v1
+      uses: voidzero-dev/setup-vp@20553a7a7429c429a74894104a2835d7fed28a72 # v1.3.0
      with:
-        node-version-file: "./web/.nvmrc"
+        working-directory: web
+        node-version-file: .nvmrc
        cache: true
-        run-install: |
-          - cwd: ./web
-            args: ['--frozen-lockfile']
+        run-install: true
--- a/.github/workflows/anti-slop.yml
+++ b/.github/workflows/anti-slop.yml
@@ -12,7 +12,7 @@ jobs:
  anti-slop:
    runs-on: ubuntu-latest
    steps:
-      - uses: peakoss/anti-slop@v0
+      - uses: peakoss/anti-slop@85daca1880e9e1af197fc06ea03349daf08f4202 # v0.2.1
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          close-pr: false
--- a/.github/workflows/api-tests.yml
+++ b/.github/workflows/api-tests.yml
@@ -2,6 +2,12 @@ name: Run Pytest

 on:
  workflow_call:
+    secrets:
+      CODECOV_TOKEN:
+        required: false
+
+permissions:
+  contents: read

 concurrency:
  group: api-tests-${{ github.head_ref || github.run_id }}
@@ -11,6 +17,8 @@ jobs:
  test:
    name: API Tests
    runs-on: ubuntu-latest
+    env:
+      CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
    defaults:
      run:
        shell: bash
@@ -24,10 +32,11 @@ jobs:
      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
+          fetch-depth: 0
          persist-credentials: false

      - name: Setup UV and Python
-        uses: astral-sh/setup-uv@e06108dd0aef18192324c70427afc47652e63a82 # v7.5.0
+        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0
        with:
          enable-cache: true
          python-version: ${{ matrix.python-version }}
@@ -79,21 +88,12 @@ jobs:
            api/tests/test_containers_integration_tests \
            api/tests/unit_tests

-      - name: Coverage Summary
-        run: |
-          set -x
-          # Extract coverage percentage and create a summary
-          TOTAL_COVERAGE=$(python -c 'import json; print(json.load(open("coverage.json"))["totals"]["percent_covered_display"])')
-
-          # Create a detailed coverage summary
-          echo "### Test Coverage Summary :test_tube:" >> $GITHUB_STEP_SUMMARY
-          echo "Total Coverage: ${TOTAL_COVERAGE}%" >> $GITHUB_STEP_SUMMARY
-          {
-            echo ""
-            echo "<details><summary>File-level coverage (click to expand)</summary>"
-            echo ""
-            echo '```'
-            uv run --project api coverage report -m
-            echo '```'
-            echo "</details>"
-          } >> $GITHUB_STEP_SUMMARY
+      - name: Report coverage
+        if: ${{ env.CODECOV_TOKEN != '' && matrix.python-version == '3.12' }}
+        uses: codecov/codecov-action@1af58845a975a7985b0beb0cbe6fbbb71a41dbad # v5.5.3
+        with:
+          files: ./coverage.xml
+          disable_search: true
+          flags: api
+        env:
+          CODECOV_TOKEN: ${{ env.CODECOV_TOKEN }}
--- a/.github/workflows/autofix.yml
+++ b/.github/workflows/autofix.yml
@@ -39,7 +39,7 @@ jobs:
        with:
          python-version: "3.11"

-      - uses: astral-sh/setup-uv@e06108dd0aef18192324c70427afc47652e63a82 # v7.5.0
+      - uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0

      - name: Generate Docker Compose
        if: steps.docker-compose-changes.outputs.any_changed == 'true'
@@ -94,11 +94,6 @@ jobs:
          find . -name "*.py" -type f -exec sed -i.bak -E 's/"([^"]+)" \| None/Optional["\1"]/g; s/'"'"'([^'"'"']+)'"'"' \| None/Optional['"'"'\1'"'"']/g' {} \;
          find . -name "*.py.bak" -type f -delete

-      # mdformat breaks YAML front matter in markdown files. Add --exclude for directories containing YAML front matter.
-      - name: mdformat
-        run: |
-          uvx --python 3.13 mdformat . --exclude ".agents/skills/**"
-
      - name: Setup web environment
        if: steps.web-changes.outputs.any_changed == 'true'
        uses: ./.github/actions/setup-web
--- a/.github/workflows/db-migration-test.yml
+++ b/.github/workflows/db-migration-test.yml
@@ -19,7 +19,7 @@ jobs:
          persist-credentials: false

      - name: Setup UV and Python
-        uses: astral-sh/setup-uv@e06108dd0aef18192324c70427afc47652e63a82 # v7.5.0
+        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0
        with:
          enable-cache: true
          python-version: "3.12"
@@ -69,7 +69,7 @@ jobs:
          persist-credentials: false

      - name: Setup UV and Python
-        uses: astral-sh/setup-uv@e06108dd0aef18192324c70427afc47652e63a82 # v7.5.0
+        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0
        with:
          enable-cache: true
          python-version: "3.12"
--- a/.github/workflows/main-ci.yml
+++ b/.github/workflows/main-ci.yml
@@ -56,16 +56,14 @@ jobs:
    needs: check-changes
    if: needs.check-changes.outputs.api-changed == 'true'
    uses: ./.github/workflows/api-tests.yml
+    secrets: inherit

  web-tests:
    name: Web Tests
    needs: check-changes
    if: needs.check-changes.outputs.web-changed == 'true'
    uses: ./.github/workflows/web-tests.yml
-    with:
-      base_sha: ${{ github.event.before || github.event.pull_request.base.sha }}
-      diff_range_mode: ${{ github.event.before && 'exact' || 'merge-base' }}
-      head_sha: ${{ github.event.after || github.event.pull_request.head.sha || github.sha }}
+    secrets: inherit

  style-check:
    name: Style Check
--- a/.github/workflows/pyrefly-diff.yml
+++ b/.github/workflows/pyrefly-diff.yml
@@ -22,7 +22,7 @@ jobs:
          fetch-depth: 0

      - name: Setup Python & UV
-        uses: astral-sh/setup-uv@e06108dd0aef18192324c70427afc47652e63a82 # v7.5.0
+        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0
        with:
          enable-cache: true

--- a/.github/workflows/style.yml
+++ b/.github/workflows/style.yml
@@ -33,7 +33,7 @@ jobs:

      - name: Setup UV and Python
        if: steps.changed-files.outputs.any_changed == 'true'
-        uses: astral-sh/setup-uv@e06108dd0aef18192324c70427afc47652e63a82 # v7.5.0
+        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0
        with:
          enable-cache: false
          python-version: "3.12"
@@ -84,20 +84,20 @@ jobs:
        if: steps.changed-files.outputs.any_changed == 'true'
        uses: ./.github/actions/setup-web

+      - name: Restore ESLint cache
+        if: steps.changed-files.outputs.any_changed == 'true'
+        id: eslint-cache-restore
+        uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
+        with:
+          path: web/.eslintcache
+          key: ${{ runner.os }}-web-eslint-${{ hashFiles('web/package.json', 'web/pnpm-lock.yaml', 'web/eslint.config.mjs', 'web/eslint.constants.mjs', 'web/plugins/eslint/**') }}-${{ github.sha }}
+          restore-keys: |
+            ${{ runner.os }}-web-eslint-${{ hashFiles('web/package.json', 'web/pnpm-lock.yaml', 'web/eslint.config.mjs', 'web/eslint.constants.mjs', 'web/plugins/eslint/**') }}-
+
      - name: Web style check
        if: steps.changed-files.outputs.any_changed == 'true'
        working-directory: ./web
-        run: |
-          vp run lint:ci
-        # pnpm run lint:report
-        # continue-on-error: true
-
-      # - name: Annotate Code
-      #   if: steps.changed-files.outputs.any_changed == 'true' && github.event_name == 'pull_request'
-      #   uses: DerLev/eslint-annotations@51347b3a0abfb503fc8734d5ae31c4b151297fae
-      #   with:
-      #     eslint-report: web/eslint_report.json
-      #     github-token: ${{ secrets.GITHUB_TOKEN }}
+        run: vp run lint:ci

      - name: Web tsslint
        if: steps.changed-files.outputs.any_changed == 'true'
@@ -114,6 +114,13 @@ jobs:
        working-directory: ./web
        run: vp run knip

+      - name: Save ESLint cache
+        if: steps.changed-files.outputs.any_changed == 'true' && success() && steps.eslint-cache-restore.outputs.cache-hit != 'true'
+        uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
+        with:
+          path: web/.eslintcache
+          key: ${{ steps.eslint-cache-restore.outputs.cache-primary-key }}
+
  superlinter:
    name: SuperLinter
    runs-on: ubuntu-latest
--- a/.github/workflows/translate-i18n-claude.yml
+++ b/.github/workflows/translate-i18n-claude.yml
@@ -120,7 +120,7 @@ jobs:

      - name: Run Claude Code for Translation Sync
        if: steps.detect_changes.outputs.CHANGED_FILES != ''
-        uses: anthropics/claude-code-action@cd77b50d2b0808657f8e6774085c8bf54484351c # v1.0.72
+        uses: anthropics/claude-code-action@ff9acae5886d41a99ed4ec14b7dc147d55834722 # v1.0.77
        with:
          anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
          github_token: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/vdb-tests.yml
+++ b/.github/workflows/vdb-tests.yml
@@ -31,7 +31,7 @@ jobs:
          remove_tool_cache: true

      - name: Setup UV and Python
-        uses: astral-sh/setup-uv@e06108dd0aef18192324c70427afc47652e63a82 # v7.5.0
+        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0
        with:
          enable-cache: true
          python-version: ${{ matrix.python-version }}
--- a/.github/workflows/web-tests.yml
+++ b/.github/workflows/web-tests.yml
@@ -2,16 +2,9 @@ name: Web Tests

 on:
  workflow_call:
-    inputs:
-      base_sha:
+    secrets:
+      CODECOV_TOKEN:
        required: false
-        type: string
-      diff_range_mode:
-        required: false
-        type: string
-      head_sha:
-        required: false
-        type: string

 permissions:
  contents: read
@@ -63,7 +56,7 @@ jobs:
    needs: [test]
    runs-on: ubuntu-latest
    env:
-      VITEST_COVERAGE_SCOPE: app-components
+      CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
    defaults:
      run:
        shell: bash
@@ -87,52 +80,16 @@ jobs:
          merge-multiple: true

      - name: Merge reports
-        run: vp test --merge-reports --reporter=json --reporter=agent --coverage
+        run: vp test --merge-reports --coverage --silent=passed-only

-      - name: Report app/components baseline coverage
-        run: node ./scripts/report-components-coverage-baseline.mjs
-
-      - name: Report app/components test touch
-        env:
-          BASE_SHA: ${{ inputs.base_sha }}
-          DIFF_RANGE_MODE: ${{ inputs.diff_range_mode }}
-          HEAD_SHA: ${{ inputs.head_sha }}
-        run: node ./scripts/report-components-test-touch.mjs
-
-      - name: Check app/components pure diff coverage
-        env:
-          BASE_SHA: ${{ inputs.base_sha }}
-          DIFF_RANGE_MODE: ${{ inputs.diff_range_mode }}
-          HEAD_SHA: ${{ inputs.head_sha }}
-        run: node ./scripts/check-components-diff-coverage.mjs
-
-      - name: Check Coverage Summary
-        if: always()
-        id: coverage-summary
-        run: |
-          set -eo pipefail
-
-          COVERAGE_FILE="coverage/coverage-final.json"
-          COVERAGE_SUMMARY_FILE="coverage/coverage-summary.json"
-
-          if [ -f "$COVERAGE_FILE" ] || [ -f "$COVERAGE_SUMMARY_FILE" ]; then
-            echo "has_coverage=true" >> "$GITHUB_OUTPUT"
-            exit 0
-          fi
-
-          echo "has_coverage=false" >> "$GITHUB_OUTPUT"
-          echo "### 🚨 app/components Diff Coverage" >> "$GITHUB_STEP_SUMMARY"
-          echo "" >> "$GITHUB_STEP_SUMMARY"
-          echo "Coverage artifacts not found. Ensure Vitest merge reports ran with coverage enabled." >> "$GITHUB_STEP_SUMMARY"
-
-      - name: Upload Coverage Artifact
-        if: steps.coverage-summary.outputs.has_coverage == 'true'
-        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+      - name: Report coverage
+        if: ${{ env.CODECOV_TOKEN != '' }}
+        uses: codecov/codecov-action@1af58845a975a7985b0beb0cbe6fbbb71a41dbad # v5.5.3
        with:
-          name: web-coverage-report
-          path: web/coverage
-          retention-days: 30
-          if-no-files-found: error
+          directory: web/coverage
+          flags: web
+        env:
+          CODECOV_TOKEN: ${{ env.CODECOV_TOKEN }}

  web-build:
    name: Web Build
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -97,3 +97,8 @@ Feel free to reach out if you encounter any issues during the setup process.
 ## Getting Help

 If you ever get stuck or get a burning question while contributing, simply shoot your queries our way via the related GitHub issue, or hop onto our [Discord](https://discord.gg/8Tpq4AcN9c) for a quick chat.
+
+## Automated Agent Contributions
+
+> [!NOTE]
+> If you are an automated agent， just add 🤖🤖🤖 to the end of the PR title to opt-in.
--- a/api/.env.example
+++ b/api/.env.example
@@ -353,6 +353,9 @@ BAIDU_VECTOR_DB_SHARD=1
 BAIDU_VECTOR_DB_REPLICAS=3
 BAIDU_VECTOR_DB_INVERTED_INDEX_ANALYZER=DEFAULT_ANALYZER
 BAIDU_VECTOR_DB_INVERTED_INDEX_PARSER_MODE=COARSE_MODE
+BAIDU_VECTOR_DB_AUTO_BUILD_ROW_COUNT_INCREMENT=500
+BAIDU_VECTOR_DB_AUTO_BUILD_ROW_COUNT_INCREMENT_RATIO=0.05
+BAIDU_VECTOR_DB_REBUILD_INDEX_TIMEOUT_IN_SECONDS=300

 # Upstash configuration
 UPSTASH_VECTOR_URL=your-server-url
--- a/api/commands/plugin.py
+++ b/api/commands/plugin.py
@@ -1,9 +1,11 @@
 import json
 import logging
-from typing import Any
+from typing import Any, cast

 import click
 from pydantic import TypeAdapter
+from sqlalchemy import delete, select
+from sqlalchemy.engine import CursorResult

 from configs import dify_config
 from core.helper import encrypter
@@ -48,14 +50,15 @@ def setup_system_tool_oauth_client(provider, client_params):
        click.echo(click.style(f"Error parsing client params: {str(e)}", fg="red"))
        return

-    deleted_count = (
-        db.session.query(ToolOAuthSystemClient)
-        .filter_by(
-            provider=provider_name,
-            plugin_id=plugin_id,
-        )
-        .delete()
-    )
+    deleted_count = cast(
+        CursorResult,
+        db.session.execute(
+            delete(ToolOAuthSystemClient).where(
+                ToolOAuthSystemClient.provider == provider_name,
+                ToolOAuthSystemClient.plugin_id == plugin_id,
+            )
+        ),
+    ).rowcount
    if deleted_count > 0:
        click.echo(click.style(f"Deleted {deleted_count} existing oauth client params.", fg="yellow"))

@@ -97,14 +100,15 @@ def setup_system_trigger_oauth_client(provider, client_params):
        click.echo(click.style(f"Error parsing client params: {str(e)}", fg="red"))
        return

-    deleted_count = (
-        db.session.query(TriggerOAuthSystemClient)
-        .filter_by(
-            provider=provider_name,
-            plugin_id=plugin_id,
-        )
-        .delete()
-    )
+    deleted_count = cast(
+        CursorResult,
+        db.session.execute(
+            delete(TriggerOAuthSystemClient).where(
+                TriggerOAuthSystemClient.provider == provider_name,
+                TriggerOAuthSystemClient.plugin_id == plugin_id,
+            )
+        ),
+    ).rowcount
    if deleted_count > 0:
        click.echo(click.style(f"Deleted {deleted_count} existing oauth client params.", fg="yellow"))

@@ -139,14 +143,15 @@ def setup_datasource_oauth_client(provider, client_params):
        return

    click.echo(click.style(f"Ready to delete existing oauth client params: {provider_name}", fg="yellow"))
-    deleted_count = (
-        db.session.query(DatasourceOauthParamConfig)
-        .filter_by(
-            provider=provider_name,
-            plugin_id=plugin_id,
-        )
-        .delete()
-    )
+    deleted_count = cast(
+        CursorResult,
+        db.session.execute(
+            delete(DatasourceOauthParamConfig).where(
+                DatasourceOauthParamConfig.provider == provider_name,
+                DatasourceOauthParamConfig.plugin_id == plugin_id,
+            )
+        ),
+    ).rowcount
    if deleted_count > 0:
        click.echo(click.style(f"Deleted {deleted_count} existing oauth client params.", fg="yellow"))

@@ -192,7 +197,9 @@ def transform_datasource_credentials(environment: str):

        # deal notion credentials
        deal_notion_count = 0
-        notion_credentials = db.session.query(DataSourceOauthBinding).filter_by(provider="notion").all()
+        notion_credentials = db.session.scalars(
+            select(DataSourceOauthBinding).where(DataSourceOauthBinding.provider == "notion")
+        ).all()
        if notion_credentials:
            notion_credentials_tenant_mapping: dict[str, list[DataSourceOauthBinding]] = {}
            for notion_credential in notion_credentials:
@@ -201,7 +208,7 @@ def transform_datasource_credentials(environment: str):
                    notion_credentials_tenant_mapping[tenant_id] = []
                notion_credentials_tenant_mapping[tenant_id].append(notion_credential)
            for tenant_id, notion_tenant_credentials in notion_credentials_tenant_mapping.items():
-                tenant = db.session.query(Tenant).filter_by(id=tenant_id).first()
+                tenant = db.session.scalar(select(Tenant).where(Tenant.id == tenant_id))
                if not tenant:
                    continue
                try:
@@ -250,7 +257,9 @@ def transform_datasource_credentials(environment: str):
                db.session.commit()
        # deal firecrawl credentials
        deal_firecrawl_count = 0
-        firecrawl_credentials = db.session.query(DataSourceApiKeyAuthBinding).filter_by(provider="firecrawl").all()
+        firecrawl_credentials = db.session.scalars(
+            select(DataSourceApiKeyAuthBinding).where(DataSourceApiKeyAuthBinding.provider == "firecrawl")
+        ).all()
        if firecrawl_credentials:
            firecrawl_credentials_tenant_mapping: dict[str, list[DataSourceApiKeyAuthBinding]] = {}
            for firecrawl_credential in firecrawl_credentials:
@@ -259,7 +268,7 @@ def transform_datasource_credentials(environment: str):
                    firecrawl_credentials_tenant_mapping[tenant_id] = []
                firecrawl_credentials_tenant_mapping[tenant_id].append(firecrawl_credential)
            for tenant_id, firecrawl_tenant_credentials in firecrawl_credentials_tenant_mapping.items():
-                tenant = db.session.query(Tenant).filter_by(id=tenant_id).first()
+                tenant = db.session.scalar(select(Tenant).where(Tenant.id == tenant_id))
                if not tenant:
                    continue
                try:
@@ -312,7 +321,9 @@ def transform_datasource_credentials(environment: str):
                db.session.commit()
        # deal jina credentials
        deal_jina_count = 0
-        jina_credentials = db.session.query(DataSourceApiKeyAuthBinding).filter_by(provider="jinareader").all()
+        jina_credentials = db.session.scalars(
+            select(DataSourceApiKeyAuthBinding).where(DataSourceApiKeyAuthBinding.provider == "jinareader")
+        ).all()
        if jina_credentials:
            jina_credentials_tenant_mapping: dict[str, list[DataSourceApiKeyAuthBinding]] = {}
            for jina_credential in jina_credentials:
@@ -321,7 +332,7 @@ def transform_datasource_credentials(environment: str):
                    jina_credentials_tenant_mapping[tenant_id] = []
                jina_credentials_tenant_mapping[tenant_id].append(jina_credential)
            for tenant_id, jina_tenant_credentials in jina_credentials_tenant_mapping.items():
-                tenant = db.session.query(Tenant).filter_by(id=tenant_id).first()
+                tenant = db.session.scalar(select(Tenant).where(Tenant.id == tenant_id))
                if not tenant:
                    continue
                try:
--- a/api/commands/storage.py
+++ b/api/commands/storage.py
@@ -1,7 +1,10 @@
 import json
+from typing import cast

 import click
 import sqlalchemy as sa
+from sqlalchemy import update
+from sqlalchemy.engine import CursorResult

 from configs import dify_config
 from extensions.ext_database import db
@@ -740,14 +743,17 @@ def migrate_oss(
        else:
            try:
                source_storage_type = StorageType.LOCAL if is_source_local else StorageType.OPENDAL
-                updated = (
-                    db.session.query(UploadFile)
-                    .where(
-                        UploadFile.storage_type == source_storage_type,
-                        UploadFile.key.in_(copied_upload_file_keys),
-                    )
-                    .update({UploadFile.storage_type: dify_config.STORAGE_TYPE}, synchronize_session=False)
-                )
+                updated = cast(
+                    CursorResult,
+                    db.session.execute(
+                        update(UploadFile)
+                        .where(
+                            UploadFile.storage_type == source_storage_type,
+                            UploadFile.key.in_(copied_upload_file_keys),
+                        )
+                        .values(storage_type=dify_config.STORAGE_TYPE)
+                    ),
+                ).rowcount
                db.session.commit()
                click.echo(click.style(f"Updated storage_type for {updated} upload_files records.", fg="green"))
            except Exception as e:
--- a/api/commands/system.py
+++ b/api/commands/system.py
@@ -2,6 +2,7 @@ import logging

 import click
 import sqlalchemy as sa
+from sqlalchemy import delete, select, update
 from sqlalchemy.orm import sessionmaker

 from configs import dify_config
@@ -41,7 +42,7 @@ def reset_encrypt_key_pair():
        click.echo(click.style("This command is only for SELF_HOSTED installations.", fg="red"))
        return
    with sessionmaker(db.engine, expire_on_commit=False).begin() as session:
-        tenants = session.query(Tenant).all()
+        tenants = session.scalars(select(Tenant)).all()
        for tenant in tenants:
            if not tenant:
                click.echo(click.style("No workspaces found. Run /install first.", fg="red"))
@@ -49,8 +50,8 @@ def reset_encrypt_key_pair():

            tenant.encrypt_public_key = generate_key_pair(tenant.id)

-            session.query(Provider).where(Provider.provider_type == "custom", Provider.tenant_id == tenant.id).delete()
-            session.query(ProviderModel).where(ProviderModel.tenant_id == tenant.id).delete()
+            session.execute(delete(Provider).where(Provider.provider_type == "custom", Provider.tenant_id == tenant.id))
+            session.execute(delete(ProviderModel).where(ProviderModel.tenant_id == tenant.id))

            click.echo(
                click.style(
@@ -93,7 +94,7 @@ def convert_to_agent_apps():
                app_id = str(i.id)
                if app_id not in proceeded_app_ids:
                    proceeded_app_ids.append(app_id)
-                    app = db.session.query(App).where(App.id == app_id).first()
+                    app = db.session.scalar(select(App).where(App.id == app_id))
                    if app is not None:
                        apps.append(app)

@@ -108,8 +109,8 @@ def convert_to_agent_apps():
                db.session.commit()

                # update conversation mode to agent
-                db.session.query(Conversation).where(Conversation.app_id == app.id).update(
-                    {Conversation.mode: AppMode.AGENT_CHAT}
+                db.session.execute(
+                    update(Conversation).where(Conversation.app_id == app.id).values(mode=AppMode.AGENT_CHAT)
                )

                db.session.commit()
@@ -177,7 +178,7 @@ where sites.id is null limit 1000"""
                    continue

                try:
-                    app = db.session.query(App).where(App.id == app_id).first()
+                    app = db.session.scalar(select(App).where(App.id == app_id))
                    if not app:
                        logger.info("App %s not found", app_id)
                        continue
--- a/api/commands/vector.py
+++ b/api/commands/vector.py
@@ -10,6 +10,7 @@ from configs import dify_config
 from core.rag.datasource.vdb.vector_factory import Vector
 from core.rag.datasource.vdb.vector_type import VectorType
 from core.rag.index_processor.constant.built_in_field import BuiltInField
+from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
 from core.rag.models.document import ChildDocument, Document
 from extensions.ext_database import db
 from models.dataset import Dataset, DatasetCollectionBinding, DatasetMetadata, DatasetMetadataBinding, DocumentSegment
@@ -41,14 +42,13 @@ def migrate_annotation_vector_database():
            # get apps info
            per_page = 50
            with sessionmaker(db.engine, expire_on_commit=False).begin() as session:
-                apps = (
-                    session.query(App)
+                apps = session.scalars(
+                    select(App)
                    .where(App.status == "normal")
                    .order_by(App.created_at.desc())
                    .limit(per_page)
                    .offset((page - 1) * per_page)
-                    .all()
-                )
+                ).all()
            if not apps:
                break
        except SQLAlchemyError:
@@ -63,8 +63,8 @@ def migrate_annotation_vector_database():
            try:
                click.echo(f"Creating app annotation index: {app.id}")
                with sessionmaker(db.engine, expire_on_commit=False).begin() as session:
-                    app_annotation_setting = (
-                        session.query(AppAnnotationSetting).where(AppAnnotationSetting.app_id == app.id).first()
+                    app_annotation_setting = session.scalar(
+                        select(AppAnnotationSetting).where(AppAnnotationSetting.app_id == app.id).limit(1)
                    )

                    if not app_annotation_setting:
@@ -72,10 +72,10 @@ def migrate_annotation_vector_database():
                        click.echo(f"App annotation setting disabled: {app.id}")
                        continue
                    # get dataset_collection_binding info
-                    dataset_collection_binding = (
-                        session.query(DatasetCollectionBinding)
-                        .where(DatasetCollectionBinding.id == app_annotation_setting.collection_binding_id)
-                        .first()
+                    dataset_collection_binding = session.scalar(
+                        select(DatasetCollectionBinding).where(
+                            DatasetCollectionBinding.id == app_annotation_setting.collection_binding_id
+                        )
                    )
                    if not dataset_collection_binding:
                        click.echo(f"App annotation collection binding not found: {app.id}")
@@ -86,7 +86,7 @@ def migrate_annotation_vector_database():
                dataset = Dataset(
                    id=app.id,
                    tenant_id=app.tenant_id,
-                    indexing_technique="high_quality",
+                    indexing_technique=IndexTechniqueType.HIGH_QUALITY,
                    embedding_model_provider=dataset_collection_binding.provider_name,
                    embedding_model=dataset_collection_binding.model_name,
                    collection_binding_id=dataset_collection_binding.id,
@@ -178,7 +178,9 @@ def migrate_knowledge_vector_database():
    while True:
        try:
            stmt = (
-                select(Dataset).where(Dataset.indexing_technique == "high_quality").order_by(Dataset.created_at.desc())
+                select(Dataset)
+                .where(Dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY)
+                .order_by(Dataset.created_at.desc())
            )

            datasets = db.paginate(select=stmt, page=page, per_page=50, max_per_page=50, error_out=False)
@@ -205,11 +207,11 @@ def migrate_knowledge_vector_database():
                    collection_name = Dataset.gen_collection_name_by_id(dataset_id)
                elif vector_type == VectorType.QDRANT:
                    if dataset.collection_binding_id:
-                        dataset_collection_binding = (
-                            db.session.query(DatasetCollectionBinding)
-                            .where(DatasetCollectionBinding.id == dataset.collection_binding_id)
-                            .one_or_none()
-                        )
+                        dataset_collection_binding = db.session.execute(
+                            select(DatasetCollectionBinding).where(
+                                DatasetCollectionBinding.id == dataset.collection_binding_id
+                            )
+                        ).scalar_one_or_none()
                        if dataset_collection_binding:
                            collection_name = dataset_collection_binding.collection_name
                        else:
@@ -270,7 +272,7 @@ def migrate_knowledge_vector_database():
                                "dataset_id": segment.dataset_id,
                            },
                        )
-                        if dataset_document.doc_form == "hierarchical_model":
+                        if dataset_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX:
                            child_chunks = segment.get_child_chunks()
                            if child_chunks:
                                child_documents = []
@@ -334,7 +336,7 @@ def add_qdrant_index(field: str):
    create_count = 0

    try:
-        bindings = db.session.query(DatasetCollectionBinding).all()
+        bindings = db.session.scalars(select(DatasetCollectionBinding)).all()
        if not bindings:
            click.echo(click.style("No dataset collection bindings found.", fg="red"))
            return
@@ -421,10 +423,10 @@ def old_metadata_migration():
                        if field.value == key:
                            break
                    else:
-                        dataset_metadata = (
-                            db.session.query(DatasetMetadata)
+                        dataset_metadata = db.session.scalar(
+                            select(DatasetMetadata)
                            .where(DatasetMetadata.dataset_id == document.dataset_id, DatasetMetadata.name == key)
-                            .first()
+                            .limit(1)
                        )
                        if not dataset_metadata:
                            dataset_metadata = DatasetMetadata(
@@ -436,7 +438,7 @@ def old_metadata_migration():
                            )
                            db.session.add(dataset_metadata)
                            db.session.flush()
-                            dataset_metadata_binding = DatasetMetadataBinding(
+                            dataset_metadata_binding: DatasetMetadataBinding | None = DatasetMetadataBinding(
                                tenant_id=document.tenant_id,
                                dataset_id=document.dataset_id,
                                metadata_id=dataset_metadata.id,
@@ -445,14 +447,14 @@ def old_metadata_migration():
                            )
                            db.session.add(dataset_metadata_binding)
                        else:
-                            dataset_metadata_binding = (
-                                db.session.query(DatasetMetadataBinding)  # type: ignore
+                            dataset_metadata_binding = db.session.scalar(
+                                select(DatasetMetadataBinding)
                                .where(
                                    DatasetMetadataBinding.dataset_id == document.dataset_id,
                                    DatasetMetadataBinding.document_id == document.id,
                                    DatasetMetadataBinding.metadata_id == dataset_metadata.id,
                                )
-                                .first()
+                                .limit(1)
                            )
                            if not dataset_metadata_binding:
                                dataset_metadata_binding = DatasetMetadataBinding(
--- a/api/configs/middleware/vdb/baidu_vector_config.py
+++ b/api/configs/middleware/vdb/baidu_vector_config.py
@@ -51,3 +51,18 @@ class BaiduVectorDBConfig(BaseSettings):
        description="Parser mode for inverted index in Baidu Vector Database (default is COARSE_MODE)",
        default="COARSE_MODE",
    )
+
+    BAIDU_VECTOR_DB_AUTO_BUILD_ROW_COUNT_INCREMENT: int = Field(
+        description="Auto build row count increment threshold (default is 500)",
+        default=500,
+    )
+
+    BAIDU_VECTOR_DB_AUTO_BUILD_ROW_COUNT_INCREMENT_RATIO: float = Field(
+        description="Auto build row count increment ratio threshold (default is 0.05)",
+        default=0.05,
+    )
+
+    BAIDU_VECTOR_DB_REBUILD_INDEX_TIMEOUT_IN_SECONDS: int = Field(
+        description="Timeout in seconds for rebuilding the index in Baidu Vector Database (default is 3600 seconds)",
+        default=300,
+    )
--- a/api/controllers/console/apikey.py
+++ b/api/controllers/console/apikey.py
@@ -1,7 +1,7 @@
 import flask_restx
 from flask_restx import Resource, fields, marshal_with
 from flask_restx._http import HTTPStatus
-from sqlalchemy import select
+from sqlalchemy import delete, func, select
 from sqlalchemy.orm import Session
 from werkzeug.exceptions import Forbidden

@@ -9,6 +9,7 @@ from extensions.ext_database import db
 from libs.helper import TimestampField
 from libs.login import current_account_with_tenant, login_required
 from models.dataset import Dataset
+from models.enums import ApiTokenType
 from models.model import ApiToken, App
 from services.api_token_service import ApiTokenCache

@@ -33,16 +34,10 @@ api_key_list_model = console_ns.model(


 def _get_resource(resource_id, tenant_id, resource_model):
-    if resource_model == App:
-        with Session(db.engine) as session:
-            resource = session.execute(
-                select(resource_model).filter_by(id=resource_id, tenant_id=tenant_id)
-            ).scalar_one_or_none()
-    else:
-        with Session(db.engine) as session:
-            resource = session.execute(
-                select(resource_model).filter_by(id=resource_id, tenant_id=tenant_id)
-            ).scalar_one_or_none()
+    with Session(db.engine) as session:
+        resource = session.execute(
+            select(resource_model).filter_by(id=resource_id, tenant_id=tenant_id)
+        ).scalar_one_or_none()

    if resource is None:
        flask_restx.abort(HTTPStatus.NOT_FOUND, message=f"{resource_model.__name__} not found.")
@@ -53,7 +48,7 @@ def _get_resource(resource_id, tenant_id, resource_model):
 class BaseApiKeyListResource(Resource):
    method_decorators = [account_initialization_required, login_required, setup_required]

-    resource_type: str | None = None
+    resource_type: ApiTokenType | None = None
    resource_model: type | None = None
    resource_id_field: str | None = None
    token_prefix: str | None = None
@@ -80,10 +75,13 @@ class BaseApiKeyListResource(Resource):
        resource_id = str(resource_id)
        _, current_tenant_id = current_account_with_tenant()
        _get_resource(resource_id, current_tenant_id, self.resource_model)
-        current_key_count = (
-            db.session.query(ApiToken)
-            .where(ApiToken.type == self.resource_type, getattr(ApiToken, self.resource_id_field) == resource_id)
-            .count()
+        current_key_count: int = (
+            db.session.scalar(
+                select(func.count(ApiToken.id)).where(
+                    ApiToken.type == self.resource_type, getattr(ApiToken, self.resource_id_field) == resource_id
+                )
+            )
+            or 0
        )

        if current_key_count >= self.max_keys:
@@ -94,6 +92,7 @@ class BaseApiKeyListResource(Resource):
            )

        key = ApiToken.generate_api_key(self.token_prefix or "", 24)
+        assert self.resource_type is not None, "resource_type must be set"
        api_token = ApiToken()
        setattr(api_token, self.resource_id_field, resource_id)
        api_token.tenant_id = current_tenant_id
@@ -107,7 +106,7 @@ class BaseApiKeyListResource(Resource):
 class BaseApiKeyResource(Resource):
    method_decorators = [account_initialization_required, login_required, setup_required]

-    resource_type: str | None = None
+    resource_type: ApiTokenType | None = None
    resource_model: type | None = None
    resource_id_field: str | None = None

@@ -119,14 +118,14 @@ class BaseApiKeyResource(Resource):
        if not current_user.is_admin_or_owner:
            raise Forbidden()

-        key = (
-            db.session.query(ApiToken)
+        key = db.session.scalar(
+            select(ApiToken)
            .where(
                getattr(ApiToken, self.resource_id_field) == resource_id,
                ApiToken.type == self.resource_type,
                ApiToken.id == api_key_id,
            )
-            .first()
+            .limit(1)
        )

        if key is None:
@@ -137,7 +136,7 @@ class BaseApiKeyResource(Resource):
        assert key is not None  # nosec - for type checker only
        ApiTokenCache.delete(key.token, key.type)

-        db.session.query(ApiToken).where(ApiToken.id == api_key_id).delete()
+        db.session.execute(delete(ApiToken).where(ApiToken.id == api_key_id))
        db.session.commit()

        return {"result": "success"}, 204
@@ -162,7 +161,7 @@ class AppApiKeyListResource(BaseApiKeyListResource):
        """Create a new API key for an app"""
        return super().post(resource_id)

-    resource_type = "app"
+    resource_type = ApiTokenType.APP
    resource_model = App
    resource_id_field = "app_id"
    token_prefix = "app-"
@@ -178,7 +177,7 @@ class AppApiKeyResource(BaseApiKeyResource):
        """Delete an API key for an app"""
        return super().delete(resource_id, api_key_id)

-    resource_type = "app"
+    resource_type = ApiTokenType.APP
    resource_model = App
    resource_id_field = "app_id"

@@ -202,7 +201,7 @@ class DatasetApiKeyListResource(BaseApiKeyListResource):
        """Create a new API key for a dataset"""
        return super().post(resource_id)

-    resource_type = "dataset"
+    resource_type = ApiTokenType.DATASET
    resource_model = Dataset
    resource_id_field = "dataset_id"
    token_prefix = "ds-"
@@ -218,6 +217,6 @@ class DatasetApiKeyResource(BaseApiKeyResource):
        """Delete an API key for a dataset"""
        return super().delete(resource_id, api_key_id)

-    resource_type = "dataset"
+    resource_type = ApiTokenType.DATASET
    resource_model = Dataset
    resource_id_field = "dataset_id"
--- a/api/controllers/console/app/app.py
+++ b/api/controllers/console/app/app.py
@@ -95,7 +95,7 @@ class CreateAppPayload(BaseModel):
    name: str = Field(..., min_length=1, description="App name")
    description: str | None = Field(default=None, description="App description (max 400 chars)", max_length=400)
    mode: Literal["chat", "agent-chat", "advanced-chat", "workflow", "completion"] = Field(..., description="App mode")
-    icon_type: str | None = Field(default=None, description="Icon type")
+    icon_type: IconType | None = Field(default=None, description="Icon type")
    icon: str | None = Field(default=None, description="Icon")
    icon_background: str | None = Field(default=None, description="Icon background color")

@@ -103,7 +103,7 @@ class CreateAppPayload(BaseModel):
 class UpdateAppPayload(BaseModel):
    name: str = Field(..., min_length=1, description="App name")
    description: str | None = Field(default=None, description="App description (max 400 chars)", max_length=400)
-    icon_type: str | None = Field(default=None, description="Icon type")
+    icon_type: IconType | None = Field(default=None, description="Icon type")
    icon: str | None = Field(default=None, description="Icon")
    icon_background: str | None = Field(default=None, description="Icon background color")
    use_icon_as_answer_icon: bool | None = Field(default=None, description="Use icon as answer icon")
@@ -113,7 +113,7 @@ class UpdateAppPayload(BaseModel):
 class CopyAppPayload(BaseModel):
    name: str | None = Field(default=None, description="Name for the copied app")
    description: str | None = Field(default=None, description="Description for the copied app", max_length=400)
-    icon_type: str | None = Field(default=None, description="Icon type")
+    icon_type: IconType | None = Field(default=None, description="Icon type")
    icon: str | None = Field(default=None, description="Icon")
    icon_background: str | None = Field(default=None, description="Icon background color")

@@ -594,7 +594,7 @@ class AppApi(Resource):
        args_dict: AppService.ArgsDict = {
            "name": args.name,
            "description": args.description or "",
-            "icon_type": args.icon_type or "",
+            "icon_type": args.icon_type,
            "icon": args.icon or "",
            "icon_background": args.icon_background or "",
            "use_icon_as_answer_icon": args.use_icon_as_answer_icon or False,
--- a/api/controllers/console/app/conversation.py
+++ b/api/controllers/console/app/conversation.py
@@ -5,7 +5,7 @@ from flask import abort, request
 from flask_restx import Resource, fields, marshal_with
 from pydantic import BaseModel, Field, field_validator
 from sqlalchemy import func, or_
-from sqlalchemy.orm import joinedload
+from sqlalchemy.orm import selectinload
 from werkzeug.exceptions import NotFound

 from controllers.console import console_ns
@@ -376,8 +376,12 @@ class CompletionConversationApi(Resource):

        # FIXME, the type ignore in this file
        if args.annotation_status == "annotated":
-            query = query.options(joinedload(Conversation.message_annotations)).join(  # type: ignore
-                MessageAnnotation, MessageAnnotation.conversation_id == Conversation.id
+            query = (
+                query.options(selectinload(Conversation.message_annotations))  # type: ignore[arg-type]
+                .join(  # type: ignore
+                    MessageAnnotation, MessageAnnotation.conversation_id == Conversation.id
+                )
+                .distinct()
            )
        elif args.annotation_status == "not_annotated":
            query = (
@@ -454,9 +458,7 @@ class ChatConversationApi(Resource):
        args = ChatConversationQuery.model_validate(request.args.to_dict(flat=True))  # type: ignore

        subquery = (
-            db.session.query(
-                Conversation.id.label("conversation_id"), EndUser.session_id.label("from_end_user_session_id")
-            )
+            sa.select(Conversation.id.label("conversation_id"), EndUser.session_id.label("from_end_user_session_id"))
            .outerjoin(EndUser, Conversation.from_end_user_id == EndUser.id)
            .subquery()
        )
@@ -511,8 +513,12 @@ class ChatConversationApi(Resource):

        match args.annotation_status:
            case "annotated":
-                query = query.options(joinedload(Conversation.message_annotations)).join(  # type: ignore
-                    MessageAnnotation, MessageAnnotation.conversation_id == Conversation.id
+                query = (
+                    query.options(selectinload(Conversation.message_annotations))  # type: ignore[arg-type]
+                    .join(  # type: ignore
+                        MessageAnnotation, MessageAnnotation.conversation_id == Conversation.id
+                    )
+                    .distinct()
                )
            case "not_annotated":
                query = (
@@ -587,10 +593,8 @@ class ChatConversationDetailApi(Resource):

 def _get_conversation(app_model, conversation_id):
    current_user, _ = current_account_with_tenant()
-    conversation = (
-        db.session.query(Conversation)
-        .where(Conversation.id == conversation_id, Conversation.app_id == app_model.id)
-        .first()
+    conversation = db.session.scalar(
+        sa.select(Conversation).where(Conversation.id == conversation_id, Conversation.app_id == app_model.id).limit(1)
    )

    if not conversation:
--- a/api/controllers/console/app/generator.py
+++ b/api/controllers/console/app/generator.py
@@ -168,7 +168,7 @@ class InstructionGenerateApi(Resource):
        try:
            # Generate from nothing for a workflow node
            if (args.current in (code_template, "")) and args.node_id != "":
-                app = db.session.query(App).where(App.id == args.flow_id).first()
+                app = db.session.get(App, args.flow_id)
                if not app:
                    return {"error": f"app {args.flow_id} not found"}, 400
                workflow = WorkflowService().get_draft_workflow(app_model=app)
--- a/api/controllers/console/app/mcp_server.py
+++ b/api/controllers/console/app/mcp_server.py
@@ -2,6 +2,7 @@ import json

 from flask_restx import Resource, marshal_with
 from pydantic import BaseModel, Field
+from sqlalchemy import select
 from werkzeug.exceptions import NotFound

 from controllers.console import console_ns
@@ -47,7 +48,7 @@ class AppMCPServerController(Resource):
    @get_app_model
    @marshal_with(app_server_model)
    def get(self, app_model):
-        server = db.session.query(AppMCPServer).where(AppMCPServer.app_id == app_model.id).first()
+        server = db.session.scalar(select(AppMCPServer).where(AppMCPServer.app_id == app_model.id).limit(1))
        return server

    @console_ns.doc("create_app_mcp_server")
@@ -98,18 +99,18 @@ class AppMCPServerController(Resource):
    @edit_permission_required
    def put(self, app_model):
        payload = MCPServerUpdatePayload.model_validate(console_ns.payload or {})
-        server = db.session.query(AppMCPServer).where(AppMCPServer.id == payload.id).first()
+        server = db.session.get(AppMCPServer, payload.id)
        if not server:
            raise NotFound()

        description = payload.description
-        if description is None:
-            pass
-        elif not description:
+        if description is None or not description:
            server.description = app_model.description or ""
        else:
            server.description = description

+        server.name = app_model.name
+
        server.parameters = json.dumps(payload.parameters, ensure_ascii=False)
        if payload.status:
            try:
@@ -135,11 +136,10 @@ class AppMCPServerRefreshController(Resource):
    @edit_permission_required
    def get(self, server_id):
        _, current_tenant_id = current_account_with_tenant()
-        server = (
-            db.session.query(AppMCPServer)
-            .where(AppMCPServer.id == server_id)
-            .where(AppMCPServer.tenant_id == current_tenant_id)
-            .first()
+        server = db.session.scalar(
+            select(AppMCPServer)
+            .where(AppMCPServer.id == server_id, AppMCPServer.tenant_id == current_tenant_id)
+            .limit(1)
        )
        if not server:
            raise NotFound()
--- a/api/controllers/console/app/message.py
+++ b/api/controllers/console/app/message.py
@@ -4,7 +4,7 @@ from typing import Literal
 from flask import request
 from flask_restx import Resource, fields, marshal_with
 from pydantic import BaseModel, Field, field_validator
-from sqlalchemy import exists, select
+from sqlalchemy import exists, func, select
 from werkzeug.exceptions import InternalServerError, NotFound

 from controllers.common.schema import register_schema_models
@@ -30,6 +30,7 @@ from fields.raws import FilesContainedField
 from libs.helper import TimestampField, uuid_value
 from libs.infinite_scroll_pagination import InfiniteScrollPagination
 from libs.login import current_account_with_tenant, login_required
+from models.enums import FeedbackFromSource, FeedbackRating
 from models.model import AppMode, Conversation, Message, MessageAnnotation, MessageFeedback
 from services.errors.conversation import ConversationNotExistsError
 from services.errors.message import MessageNotExistsError, SuggestedQuestionsAfterAnswerDisabledError
@@ -243,27 +244,25 @@ class ChatMessageListApi(Resource):
    def get(self, app_model):
        args = ChatMessagesQuery.model_validate(request.args.to_dict())

-        conversation = (
-            db.session.query(Conversation)
+        conversation = db.session.scalar(
+            select(Conversation)
            .where(Conversation.id == args.conversation_id, Conversation.app_id == app_model.id)
-            .first()
+            .limit(1)
        )

        if not conversation:
            raise NotFound("Conversation Not Exists.")

        if args.first_id:
-            first_message = (
-                db.session.query(Message)
-                .where(Message.conversation_id == conversation.id, Message.id == args.first_id)
-                .first()
+            first_message = db.session.scalar(
+                select(Message).where(Message.conversation_id == conversation.id, Message.id == args.first_id).limit(1)
            )

            if not first_message:
                raise NotFound("First message not found")

-            history_messages = (
-                db.session.query(Message)
+            history_messages = db.session.scalars(
+                select(Message)
                .where(
                    Message.conversation_id == conversation.id,
                    Message.created_at < first_message.created_at,
@@ -271,16 +270,14 @@ class ChatMessageListApi(Resource):
                )
                .order_by(Message.created_at.desc())
                .limit(args.limit)
-                .all()
-            )
+            ).all()
        else:
-            history_messages = (
-                db.session.query(Message)
+            history_messages = db.session.scalars(
+                select(Message)
                .where(Message.conversation_id == conversation.id)
                .order_by(Message.created_at.desc())
                .limit(args.limit)
-                .all()
-            )
+            ).all()

        # Initialize has_more based on whether we have a full page
        if len(history_messages) == args.limit:
@@ -325,7 +322,9 @@ class MessageFeedbackApi(Resource):

        message_id = str(args.message_id)

-        message = db.session.query(Message).where(Message.id == message_id, Message.app_id == app_model.id).first()
+        message = db.session.scalar(
+            select(Message).where(Message.id == message_id, Message.app_id == app_model.id).limit(1)
+        )

        if not message:
            raise NotFound("Message Not Exists.")
@@ -335,7 +334,7 @@ class MessageFeedbackApi(Resource):
        if not args.rating and feedback:
            db.session.delete(feedback)
        elif args.rating and feedback:
-            feedback.rating = args.rating
+            feedback.rating = FeedbackRating(args.rating)
            feedback.content = args.content
        elif not args.rating and not feedback:
            raise ValueError("rating cannot be None when feedback not exists")
@@ -347,9 +346,9 @@ class MessageFeedbackApi(Resource):
                app_id=app_model.id,
                conversation_id=message.conversation_id,
                message_id=message.id,
-                rating=rating_value,
+                rating=FeedbackRating(rating_value),
                content=args.content,
-                from_source="admin",
+                from_source=FeedbackFromSource.ADMIN,
                from_account_id=current_user.id,
            )
            db.session.add(feedback)
@@ -374,7 +373,9 @@ class MessageAnnotationCountApi(Resource):
    @login_required
    @account_initialization_required
    def get(self, app_model):
-        count = db.session.query(MessageAnnotation).where(MessageAnnotation.app_id == app_model.id).count()
+        count = db.session.scalar(
+            select(func.count(MessageAnnotation.id)).where(MessageAnnotation.app_id == app_model.id)
+        )

        return {"count": count}

@@ -478,7 +479,9 @@ class MessageApi(Resource):
    def get(self, app_model, message_id: str):
        message_id = str(message_id)

-        message = db.session.query(Message).where(Message.id == message_id, Message.app_id == app_model.id).first()
+        message = db.session.scalar(
+            select(Message).where(Message.id == message_id, Message.app_id == app_model.id).limit(1)
+        )

        if not message:
            raise NotFound("Message Not Exists.")
--- a/api/controllers/console/app/model_config.py
+++ b/api/controllers/console/app/model_config.py
@@ -69,9 +69,7 @@ class ModelConfigResource(Resource):

        if app_model.mode == AppMode.AGENT_CHAT or app_model.is_agent:
            # get original app model config
-            original_app_model_config = (
-                db.session.query(AppModelConfig).where(AppModelConfig.id == app_model.app_model_config_id).first()
-            )
+            original_app_model_config = db.session.get(AppModelConfig, app_model.app_model_config_id)
            if original_app_model_config is None:
                raise ValueError("Original app model config not found")
            agent_mode = original_app_model_config.agent_mode_dict
--- a/api/controllers/console/app/site.py
+++ b/api/controllers/console/app/site.py
@@ -2,6 +2,7 @@ from typing import Literal

 from flask_restx import Resource, marshal_with
 from pydantic import BaseModel, Field, field_validator
+from sqlalchemy import select
 from werkzeug.exceptions import NotFound

 from constants.languages import supported_language
@@ -75,7 +76,7 @@ class AppSite(Resource):
    def post(self, app_model):
        args = AppSiteUpdatePayload.model_validate(console_ns.payload or {})
        current_user, _ = current_account_with_tenant()
-        site = db.session.query(Site).where(Site.app_id == app_model.id).first()
+        site = db.session.scalar(select(Site).where(Site.app_id == app_model.id).limit(1))
        if not site:
            raise NotFound

@@ -124,7 +125,7 @@ class AppSiteAccessTokenReset(Resource):
    @marshal_with(app_site_model)
    def post(self, app_model):
        current_user, _ = current_account_with_tenant()
-        site = db.session.query(Site).where(Site.app_id == app_model.id).first()
+        site = db.session.scalar(select(Site).where(Site.app_id == app_model.id).limit(1))

        if not site:
            raise NotFound
--- a/api/controllers/console/app/workflow.py
+++ b/api/controllers/console/app/workflow.py
@@ -7,7 +7,7 @@ from flask import abort, request
 from flask_restx import Resource, fields, marshal_with
 from pydantic import BaseModel, Field, field_validator
 from sqlalchemy.orm import Session
-from werkzeug.exceptions import Forbidden, InternalServerError, NotFound
+from werkzeug.exceptions import BadRequest, Forbidden, InternalServerError, NotFound

 import services
 from controllers.console import console_ns
@@ -46,13 +46,14 @@ from models import App
 from models.model import AppMode
 from models.workflow import Workflow
 from services.app_generate_service import AppGenerateService
-from services.errors.app import WorkflowHashNotEqualError
+from services.errors.app import IsDraftWorkflowError, WorkflowHashNotEqualError, WorkflowNotFoundError
 from services.errors.llm import InvokeRateLimitError
 from services.workflow_service import DraftWorkflowDeletionError, WorkflowInUseError, WorkflowService

 logger = logging.getLogger(__name__)
 LISTENING_RETRY_IN = 2000
 DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+RESTORE_SOURCE_WORKFLOW_MUST_BE_PUBLISHED_MESSAGE = "source workflow must be published"

 # Register models for flask_restx to avoid dict type issues in Swagger
 # Register in dependency order: base models first, then dependent models
@@ -284,7 +285,9 @@ class DraftWorkflowApi(Resource):
        workflow_service = WorkflowService()

        try:
-            environment_variables_list = args.get("environment_variables") or []
+            environment_variables_list = Workflow.normalize_environment_variable_mappings(
+                args.get("environment_variables") or [],
+            )
            environment_variables = [
                variable_factory.build_environment_variable_from_mapping(obj) for obj in environment_variables_list
            ]
@@ -994,6 +997,43 @@ class PublishedAllWorkflowApi(Resource):
            }


+@console_ns.route("/apps/<uuid:app_id>/workflows/<string:workflow_id>/restore")
+class DraftWorkflowRestoreApi(Resource):
+    @console_ns.doc("restore_workflow_to_draft")
+    @console_ns.doc(description="Restore a published workflow version into the draft workflow")
+    @console_ns.doc(params={"app_id": "Application ID", "workflow_id": "Published workflow ID"})
+    @console_ns.response(200, "Workflow restored successfully")
+    @console_ns.response(400, "Source workflow must be published")
+    @console_ns.response(404, "Workflow not found")
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW])
+    @edit_permission_required
+    def post(self, app_model: App, workflow_id: str):
+        current_user, _ = current_account_with_tenant()
+        workflow_service = WorkflowService()
+
+        try:
+            workflow = workflow_service.restore_published_workflow_to_draft(
+                app_model=app_model,
+                workflow_id=workflow_id,
+                account=current_user,
+            )
+        except IsDraftWorkflowError as exc:
+            raise BadRequest(RESTORE_SOURCE_WORKFLOW_MUST_BE_PUBLISHED_MESSAGE) from exc
+        except WorkflowNotFoundError as exc:
+            raise NotFound(str(exc)) from exc
+        except ValueError as exc:
+            raise BadRequest(str(exc)) from exc
+
+        return {
+            "result": "success",
+            "hash": workflow.unique_hash,
+            "updated_at": TimestampField().format(workflow.updated_at or workflow.created_at),
+        }
+
+
@console_ns.route("/apps/<uuid:app_id>/workflows/<string:workflow_id>")
 class WorkflowByIdApi(Resource):
    @console_ns.doc("update_workflow_by_id")
--- a/api/controllers/console/app/wraps.py
+++ b/api/controllers/console/app/wraps.py
@@ -2,6 +2,8 @@ from collections.abc import Callable
 from functools import wraps
 from typing import ParamSpec, TypeVar, Union

+from sqlalchemy import select
+
 from controllers.console.app.error import AppNotFoundError
 from extensions.ext_database import db
 from libs.login import current_account_with_tenant
@@ -15,16 +17,14 @@ R1 = TypeVar("R1")

 def _load_app_model(app_id: str) -> App | None:
    _, current_tenant_id = current_account_with_tenant()
-    app_model = (
-        db.session.query(App)
-        .where(App.id == app_id, App.tenant_id == current_tenant_id, App.status == "normal")
-        .first()
+    app_model = db.session.scalar(
+        select(App).where(App.id == app_id, App.tenant_id == current_tenant_id, App.status == "normal").limit(1)
    )
    return app_model


 def _load_app_model_with_trial(app_id: str) -> App | None:
-    app_model = db.session.query(App).where(App.id == app_id, App.status == "normal").first()
+    app_model = db.session.scalar(select(App).where(App.id == app_id, App.status == "normal").limit(1))
    return app_model


--- a/api/controllers/console/auth/email_register.py
+++ b/api/controllers/console/auth/email_register.py
@@ -1,7 +1,7 @@
 from flask import request
 from flask_restx import Resource
 from pydantic import BaseModel, Field, field_validator
-from sqlalchemy.orm import Session
+from sqlalchemy.orm import sessionmaker

 from configs import dify_config
 from constants.languages import languages
@@ -73,7 +73,7 @@ class EmailRegisterSendEmailApi(Resource):
        if dify_config.BILLING_ENABLED and BillingService.is_email_in_freeze(normalized_email):
            raise AccountInFreezeError()

-        with Session(db.engine) as session:
+        with sessionmaker(db.engine).begin() as session:
            account = AccountService.get_account_by_email_with_case_fallback(args.email, session=session)
        token = AccountService.send_email_register_email(email=normalized_email, account=account, language=language)
        return {"result": "success", "data": token}
@@ -145,7 +145,7 @@ class EmailRegisterResetApi(Resource):
        email = register_data.get("email", "")
        normalized_email = email.lower()

-        with Session(db.engine) as session:
+        with sessionmaker(db.engine).begin() as session:
            account = AccountService.get_account_by_email_with_case_fallback(email, session=session)

            if account:
--- a/api/controllers/console/auth/forgot_password.py
+++ b/api/controllers/console/auth/forgot_password.py
@@ -4,7 +4,7 @@ import secrets
 from flask import request
 from flask_restx import Resource
 from pydantic import BaseModel, Field, field_validator
-from sqlalchemy.orm import Session
+from sqlalchemy.orm import sessionmaker

 from controllers.common.schema import register_schema_models
 from controllers.console import console_ns
@@ -102,7 +102,7 @@ class ForgotPasswordSendEmailApi(Resource):
        else:
            language = "en-US"

-        with Session(db.engine) as session:
+        with sessionmaker(db.engine).begin() as session:
            account = AccountService.get_account_by_email_with_case_fallback(args.email, session=session)

        token = AccountService.send_reset_password_email(
@@ -201,7 +201,7 @@ class ForgotPasswordResetApi(Resource):
        password_hashed = hash_password(args.new_password, salt)

        email = reset_data.get("email", "")
-        with Session(db.engine) as session:
+        with sessionmaker(db.engine).begin() as session:
            account = AccountService.get_account_by_email_with_case_fallback(email, session=session)

            if account:
@@ -215,7 +215,6 @@ class ForgotPasswordResetApi(Resource):
        # Update existing account credentials
        account.password = base64.b64encode(password_hashed).decode()
        account.password_salt = base64.b64encode(salt).decode()
-        session.commit()

        # Create workspace if needed
        if (
--- a/api/controllers/console/auth/oauth.py
+++ b/api/controllers/console/auth/oauth.py
@@ -1,9 +1,10 @@
 import logging
+import urllib.parse

 import httpx
 from flask import current_app, redirect, request
 from flask_restx import Resource
-from sqlalchemy.orm import Session
+from sqlalchemy.orm import sessionmaker
 from werkzeug.exceptions import Unauthorized

 from configs import dify_config
@@ -112,6 +113,9 @@ class OAuthCallback(Resource):
                error_text = e.response.text
            logger.exception("An error occurred during the OAuth process with %s: %s", provider, error_text)
            return {"error": "OAuth process failed"}, 400
+        except ValueError as e:
+            logger.warning("OAuth error with %s", provider, exc_info=True)
+            return redirect(f"{dify_config.CONSOLE_WEB_URL}/signin?message={urllib.parse.quote(str(e))}")

        if invite_token and RegisterService.is_valid_invite_token(invite_token):
            invitation = RegisterService.get_invitation_by_token(token=invite_token)
@@ -176,7 +180,7 @@ def _get_account_by_openid_or_email(provider: str, user_info: OAuthUserInfo) ->
    account: Account | None = Account.get_by_openid(provider, user_info.id)

    if not account:
-        with Session(db.engine) as session:
+        with sessionmaker(db.engine).begin() as session:
            account = AccountService.get_account_by_email_with_case_fallback(user_info.email, session=session)

    return account
--- a/api/controllers/console/datasets/datasets.py
+++ b/api/controllers/console/datasets/datasets.py
@@ -3,7 +3,7 @@ from typing import Any, cast
 from flask import request
 from flask_restx import Resource, fields, marshal, marshal_with
 from pydantic import BaseModel, Field, field_validator
-from sqlalchemy import select
+from sqlalchemy import func, select
 from werkzeug.exceptions import Forbidden, NotFound

 import services
@@ -29,6 +29,7 @@ from core.provider_manager import ProviderManager
 from core.rag.datasource.vdb.vector_type import VectorType
 from core.rag.extractor.entity.datasource_type import DatasourceType
 from core.rag.extractor.entity.extract_setting import ExtractSetting, NotionInfo, WebsiteInfo
+from core.rag.index_processor.constant.index_type import IndexTechniqueType
 from core.rag.retrieval.retrieval_methods import RetrievalMethod
 from dify_graph.model_runtime.entities.model_entities import ModelType
 from extensions.ext_database import db
@@ -54,7 +55,7 @@ from fields.document_fields import document_status_fields
 from libs.login import current_account_with_tenant, login_required
 from models import ApiToken, Dataset, Document, DocumentSegment, UploadFile
 from models.dataset import DatasetPermission, DatasetPermissionEnum
-from models.enums import SegmentStatus
+from models.enums import ApiTokenType, SegmentStatus
 from models.provider_ids import ModelProviderID
 from services.api_token_service import ApiTokenCache
 from services.dataset_service import DatasetPermissionService, DatasetService, DocumentService
@@ -355,7 +356,7 @@ class DatasetListApi(Resource):

        for item in data:
            # convert embedding_model_provider to plugin standard format
-            if item["indexing_technique"] == "high_quality" and item["embedding_model_provider"]:
+            if item["indexing_technique"] == IndexTechniqueType.HIGH_QUALITY and item["embedding_model_provider"]:
                item["embedding_model_provider"] = str(ModelProviderID(item["embedding_model_provider"]))
                item_model = f"{item['embedding_model']}:{item['embedding_model_provider']}"
                if item_model in model_names:
@@ -436,7 +437,7 @@ class DatasetApi(Resource):
        except services.errors.account.NoPermissionError as e:
            raise Forbidden(str(e))
        data = cast(dict[str, Any], marshal(dataset, dataset_detail_fields))
-        if dataset.indexing_technique == "high_quality":
+        if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
            if dataset.embedding_model_provider:
                provider_id = ModelProviderID(dataset.embedding_model_provider)
                data["embedding_model_provider"] = str(provider_id)
@@ -454,7 +455,7 @@ class DatasetApi(Resource):
        for embedding_model in embedding_models:
            model_names.append(f"{embedding_model.model}:{embedding_model.provider.provider}")

-        if data["indexing_technique"] == "high_quality":
+        if data["indexing_technique"] == IndexTechniqueType.HIGH_QUALITY:
            item_model = f"{data['embedding_model']}:{data['embedding_model_provider']}"
            if item_model in model_names:
                data["embedding_available"] = True
@@ -485,7 +486,7 @@ class DatasetApi(Resource):
        current_user, current_tenant_id = current_account_with_tenant()
        # check embedding model setting
        if (
-            payload.indexing_technique == "high_quality"
+            payload.indexing_technique == IndexTechniqueType.HIGH_QUALITY
            and payload.embedding_model_provider is not None
            and payload.embedding_model is not None
        ):
@@ -738,20 +739,23 @@ class DatasetIndexingStatusApi(Resource):
        documents_status = []
        for document in documents:
            completed_segments = (
-                db.session.query(DocumentSegment)
-                .where(
-                    DocumentSegment.completed_at.isnot(None),
-                    DocumentSegment.document_id == str(document.id),
-                    DocumentSegment.status != SegmentStatus.RE_SEGMENT,
+                db.session.scalar(
+                    select(func.count(DocumentSegment.id)).where(
+                        DocumentSegment.completed_at.isnot(None),
+                        DocumentSegment.document_id == str(document.id),
+                        DocumentSegment.status != SegmentStatus.RE_SEGMENT,
+                    )
                )
-                .count()
+                or 0
            )
            total_segments = (
-                db.session.query(DocumentSegment)
-                .where(
-                    DocumentSegment.document_id == str(document.id), DocumentSegment.status != SegmentStatus.RE_SEGMENT
+                db.session.scalar(
+                    select(func.count(DocumentSegment.id)).where(
+                        DocumentSegment.document_id == str(document.id),
+                        DocumentSegment.status != SegmentStatus.RE_SEGMENT,
+                    )
                )
-                .count()
+                or 0
            )
            # Create a dictionary with document attributes and additional fields
            document_dict = {
@@ -777,7 +781,7 @@ class DatasetIndexingStatusApi(Resource):
 class DatasetApiKeyApi(Resource):
    max_keys = 10
    token_prefix = "dataset-"
-    resource_type = "dataset"
+    resource_type = ApiTokenType.DATASET

    @console_ns.doc("get_dataset_api_keys")
    @console_ns.doc(description="Get dataset API keys")
@@ -802,9 +806,12 @@ class DatasetApiKeyApi(Resource):
        _, current_tenant_id = current_account_with_tenant()

        current_key_count = (
-            db.session.query(ApiToken)
-            .where(ApiToken.type == self.resource_type, ApiToken.tenant_id == current_tenant_id)
-            .count()
+            db.session.scalar(
+                select(func.count(ApiToken.id)).where(
+                    ApiToken.type == self.resource_type, ApiToken.tenant_id == current_tenant_id
+                )
+            )
+            or 0
        )

        if current_key_count >= self.max_keys:
@@ -826,7 +833,7 @@ class DatasetApiKeyApi(Resource):

@console_ns.route("/datasets/api-keys/<uuid:api_key_id>")
 class DatasetApiDeleteApi(Resource):
-    resource_type = "dataset"
+    resource_type = ApiTokenType.DATASET

    @console_ns.doc("delete_dataset_api_key")
    @console_ns.doc(description="Delete dataset API key")
@@ -839,14 +846,14 @@ class DatasetApiDeleteApi(Resource):
    def delete(self, api_key_id):
        _, current_tenant_id = current_account_with_tenant()
        api_key_id = str(api_key_id)
-        key = (
-            db.session.query(ApiToken)
+        key = db.session.scalar(
+            select(ApiToken)
            .where(
                ApiToken.tenant_id == current_tenant_id,
                ApiToken.type == self.resource_type,
                ApiToken.id == api_key_id,
            )
-            .first()
+            .limit(1)
        )

        if key is None:
@@ -857,7 +864,7 @@ class DatasetApiDeleteApi(Resource):
        assert key is not None  # nosec - for type checker only
        ApiTokenCache.delete(key.token, key.type)

-        db.session.query(ApiToken).where(ApiToken.id == api_key_id).delete()
+        db.session.delete(key)
        db.session.commit()

        return {"result": "success"}, 204
--- a/api/controllers/console/datasets/datasets_document.py
+++ b/api/controllers/console/datasets/datasets_document.py
@@ -10,7 +10,7 @@ import sqlalchemy as sa
 from flask import request, send_file
 from flask_restx import Resource, fields, marshal, marshal_with
 from pydantic import BaseModel, Field
-from sqlalchemy import asc, desc, select
+from sqlalchemy import asc, desc, func, select
 from werkzeug.exceptions import Forbidden, NotFound

 import services
@@ -27,6 +27,7 @@ from core.model_manager import ModelManager
 from core.plugin.impl.exc import PluginDaemonClientSideError
 from core.rag.extractor.entity.datasource_type import DatasourceType
 from core.rag.extractor.entity.extract_setting import ExtractSetting, NotionInfo, WebsiteInfo
+from core.rag.index_processor.constant.index_type import IndexTechniqueType
 from dify_graph.model_runtime.entities.model_entities import ModelType
 from dify_graph.model_runtime.errors.invoke import InvokeAuthorizationError
 from extensions.ext_database import db
@@ -211,12 +212,11 @@ class GetProcessRuleApi(Resource):
                raise Forbidden(str(e))

            # get the latest process rule
-            dataset_process_rule = (
-                db.session.query(DatasetProcessRule)
+            dataset_process_rule = db.session.scalar(
+                select(DatasetProcessRule)
                .where(DatasetProcessRule.dataset_id == document.dataset_id)
                .order_by(DatasetProcessRule.created_at.desc())
                .limit(1)
-                .one_or_none()
            )
            if dataset_process_rule:
                mode = dataset_process_rule.mode
@@ -298,6 +298,7 @@ class DatasetDocumentListApi(Resource):
        if sort == "hit_count":
            sub_query = (
                sa.select(DocumentSegment.document_id, sa.func.sum(DocumentSegment.hit_count).label("total_hit_count"))
+                .where(DocumentSegment.dataset_id == str(dataset_id))
                .group_by(DocumentSegment.document_id)
                .subquery()
            )
@@ -329,21 +330,23 @@ class DatasetDocumentListApi(Resource):
        if fetch:
            for document in documents:
                completed_segments = (
-                    db.session.query(DocumentSegment)
-                    .where(
-                        DocumentSegment.completed_at.isnot(None),
-                        DocumentSegment.document_id == str(document.id),
-                        DocumentSegment.status != SegmentStatus.RE_SEGMENT,
+                    db.session.scalar(
+                        select(func.count(DocumentSegment.id)).where(
+                            DocumentSegment.completed_at.isnot(None),
+                            DocumentSegment.document_id == str(document.id),
+                            DocumentSegment.status != SegmentStatus.RE_SEGMENT,
+                        )
                    )
-                    .count()
+                    or 0
                )
                total_segments = (
-                    db.session.query(DocumentSegment)
-                    .where(
-                        DocumentSegment.document_id == str(document.id),
-                        DocumentSegment.status != SegmentStatus.RE_SEGMENT,
+                    db.session.scalar(
+                        select(func.count(DocumentSegment.id)).where(
+                            DocumentSegment.document_id == str(document.id),
+                            DocumentSegment.status != SegmentStatus.RE_SEGMENT,
+                        )
                    )
-                    .count()
+                    or 0
                )
                document.completed_segments = completed_segments
                document.total_segments = total_segments
@@ -447,7 +450,7 @@ class DatasetInitApi(Resource):
            raise Forbidden()

        knowledge_config = KnowledgeConfig.model_validate(console_ns.payload or {})
-        if knowledge_config.indexing_technique == "high_quality":
+        if knowledge_config.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
            if knowledge_config.embedding_model is None or knowledge_config.embedding_model_provider is None:
                raise ValueError("embedding model and embedding model provider are required for high quality indexing.")
            try:
@@ -461,7 +464,7 @@ class DatasetInitApi(Resource):
                is_multimodal = DatasetService.check_is_multimodal_model(
                    current_tenant_id, knowledge_config.embedding_model_provider, knowledge_config.embedding_model
                )
-                knowledge_config.is_multimodal = is_multimodal
+                knowledge_config.is_multimodal = is_multimodal  # pyrefly: ignore[bad-assignment]
            except InvokeAuthorizationError:
                raise ProviderNotInitializeError(
                    "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
@@ -520,10 +523,10 @@ class DocumentIndexingEstimateApi(DocumentResource):
            if data_source_info and "upload_file_id" in data_source_info:
                file_id = data_source_info["upload_file_id"]

-                file = (
-                    db.session.query(UploadFile)
+                file = db.session.scalar(
+                    select(UploadFile)
                    .where(UploadFile.tenant_id == document.tenant_id, UploadFile.id == file_id)
-                    .first()
+                    .limit(1)
                )

                # raise error if file not found
@@ -585,10 +588,10 @@ class DocumentBatchIndexingEstimateApi(DocumentResource):
                    if not data_source_info:
                        continue
                    file_id = data_source_info["upload_file_id"]
-                    file_detail = (
-                        db.session.query(UploadFile)
+                    file_detail = db.session.scalar(
+                        select(UploadFile)
                        .where(UploadFile.tenant_id == current_tenant_id, UploadFile.id == file_id)
-                        .first()
+                        .limit(1)
                    )

                    if file_detail is None:
@@ -671,20 +674,23 @@ class DocumentBatchIndexingStatusApi(DocumentResource):
        documents_status = []
        for document in documents:
            completed_segments = (
-                db.session.query(DocumentSegment)
-                .where(
-                    DocumentSegment.completed_at.isnot(None),
-                    DocumentSegment.document_id == str(document.id),
-                    DocumentSegment.status != SegmentStatus.RE_SEGMENT,
+                db.session.scalar(
+                    select(func.count(DocumentSegment.id)).where(
+                        DocumentSegment.completed_at.isnot(None),
+                        DocumentSegment.document_id == str(document.id),
+                        DocumentSegment.status != SegmentStatus.RE_SEGMENT,
+                    )
                )
-                .count()
+                or 0
            )
            total_segments = (
-                db.session.query(DocumentSegment)
-                .where(
-                    DocumentSegment.document_id == str(document.id), DocumentSegment.status != SegmentStatus.RE_SEGMENT
+                db.session.scalar(
+                    select(func.count(DocumentSegment.id)).where(
+                        DocumentSegment.document_id == str(document.id),
+                        DocumentSegment.status != SegmentStatus.RE_SEGMENT,
+                    )
                )
-                .count()
+                or 0
            )
            # Create a dictionary with document attributes and additional fields
            document_dict = {
@@ -722,18 +728,23 @@ class DocumentIndexingStatusApi(DocumentResource):
        document = self.get_document(dataset_id, document_id)

        completed_segments = (
-            db.session.query(DocumentSegment)
-            .where(
-                DocumentSegment.completed_at.isnot(None),
-                DocumentSegment.document_id == str(document_id),
-                DocumentSegment.status != SegmentStatus.RE_SEGMENT,
+            db.session.scalar(
+                select(func.count(DocumentSegment.id)).where(
+                    DocumentSegment.completed_at.isnot(None),
+                    DocumentSegment.document_id == str(document_id),
+                    DocumentSegment.status != SegmentStatus.RE_SEGMENT,
+                )
            )
-            .count()
+            or 0
        )
        total_segments = (
-            db.session.query(DocumentSegment)
-            .where(DocumentSegment.document_id == str(document_id), DocumentSegment.status != SegmentStatus.RE_SEGMENT)
-            .count()
+            db.session.scalar(
+                select(func.count(DocumentSegment.id)).where(
+                    DocumentSegment.document_id == str(document_id),
+                    DocumentSegment.status != SegmentStatus.RE_SEGMENT,
+                )
+            )
+            or 0
        )

        # Create a dictionary with document attributes and additional fields
@@ -1257,11 +1268,11 @@ class DocumentPipelineExecutionLogApi(DocumentResource):
        document = DocumentService.get_document(dataset.id, document_id)
        if not document:
            raise NotFound("Document not found.")
-        log = (
-            db.session.query(DocumentPipelineExecutionLog)
-            .filter_by(document_id=document_id)
+        log = db.session.scalar(
+            select(DocumentPipelineExecutionLog)
+            .where(DocumentPipelineExecutionLog.document_id == document_id)
            .order_by(DocumentPipelineExecutionLog.created_at.desc())
-            .first()
+            .limit(1)
        )
        if not log:
            return {
@@ -1327,7 +1338,7 @@ class DocumentGenerateSummaryApi(Resource):
            raise BadRequest("document_list cannot be empty.")

        # Check if dataset configuration supports summary generation
-        if dataset.indexing_technique != "high_quality":
+        if dataset.indexing_technique != IndexTechniqueType.HIGH_QUALITY:
            raise ValueError(
                f"Summary generation is only available for 'high_quality' indexing technique. "
                f"Current indexing technique: {dataset.indexing_technique}"
--- a/api/controllers/console/datasets/datasets_segments.py
+++ b/api/controllers/console/datasets/datasets_segments.py
@@ -26,6 +26,7 @@ from controllers.console.wraps import (
 )
 from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
 from core.model_manager import ModelManager
+from core.rag.index_processor.constant.index_type import IndexTechniqueType
 from dify_graph.model_runtime.entities.model_entities import ModelType
 from extensions.ext_database import db
 from extensions.ext_redis import redis_client
@@ -45,7 +46,7 @@ def _get_segment_with_summary(segment, dataset_id):
    """Helper function to marshal segment and add summary information."""
    from services.summary_index_service import SummaryIndexService

-    segment_dict = dict(marshal(segment, segment_fields))
+    segment_dict = dict(marshal(segment, segment_fields))  # type: ignore
    # Query summary for this segment (only enabled summaries)
    summary = SummaryIndexService.get_segment_summary(segment_id=segment.id, dataset_id=dataset_id)
    segment_dict["summary"] = summary.summary_content if summary else None
@@ -206,7 +207,7 @@ class DatasetDocumentSegmentListApi(Resource):
        # Add summary to each segment
        segments_with_summary = []
        for segment in segments.items:
-            segment_dict = dict(marshal(segment, segment_fields))
+            segment_dict = dict(marshal(segment, segment_fields))  # type: ignore
            segment_dict["summary"] = summaries.get(segment.id)
            segments_with_summary.append(segment_dict)

@@ -279,7 +280,7 @@ class DatasetDocumentSegmentApi(Resource):
            DatasetService.check_dataset_permission(dataset, current_user)
        except services.errors.account.NoPermissionError as e:
            raise Forbidden(str(e))
-        if dataset.indexing_technique == "high_quality":
+        if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
            # check embedding model setting
            try:
                model_manager = ModelManager()
@@ -333,7 +334,7 @@ class DatasetDocumentSegmentAddApi(Resource):
        if not current_user.is_dataset_editor:
            raise Forbidden()
        # check embedding model setting
-        if dataset.indexing_technique == "high_quality":
+        if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
            try:
                model_manager = ModelManager()
                model_manager.get_model_instance(
@@ -383,7 +384,7 @@ class DatasetDocumentSegmentUpdateApi(Resource):
        document = DocumentService.get_document(dataset_id, document_id)
        if not document:
            raise NotFound("Document not found.")
-        if dataset.indexing_technique == "high_quality":
+        if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
            # check embedding model setting
            try:
                model_manager = ModelManager()
@@ -401,10 +402,10 @@ class DatasetDocumentSegmentUpdateApi(Resource):
                raise ProviderNotInitializeError(ex.description)
            # check segment
        segment_id = str(segment_id)
-        segment = (
-            db.session.query(DocumentSegment)
+        segment = db.session.scalar(
+            select(DocumentSegment)
            .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_tenant_id)
-            .first()
+            .limit(1)
        )
        if not segment:
            raise NotFound("Segment not found.")
@@ -447,10 +448,10 @@ class DatasetDocumentSegmentUpdateApi(Resource):
            raise NotFound("Document not found.")
        # check segment
        segment_id = str(segment_id)
-        segment = (
-            db.session.query(DocumentSegment)
+        segment = db.session.scalar(
+            select(DocumentSegment)
            .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_tenant_id)
-            .first()
+            .limit(1)
        )
        if not segment:
            raise NotFound("Segment not found.")
@@ -494,7 +495,7 @@ class DatasetDocumentSegmentBatchImportApi(Resource):
        payload = BatchImportPayload.model_validate(console_ns.payload or {})
        upload_file_id = payload.upload_file_id

-        upload_file = db.session.query(UploadFile).where(UploadFile.id == upload_file_id).first()
+        upload_file = db.session.scalar(select(UploadFile).where(UploadFile.id == upload_file_id).limit(1))
        if not upload_file:
            raise NotFound("UploadFile not found.")

@@ -559,17 +560,17 @@ class ChildChunkAddApi(Resource):
            raise NotFound("Document not found.")
        # check segment
        segment_id = str(segment_id)
-        segment = (
-            db.session.query(DocumentSegment)
+        segment = db.session.scalar(
+            select(DocumentSegment)
            .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_tenant_id)
-            .first()
+            .limit(1)
        )
        if not segment:
            raise NotFound("Segment not found.")
        if not current_user.is_dataset_editor:
            raise Forbidden()
        # check embedding model setting
-        if dataset.indexing_technique == "high_quality":
+        if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
            try:
                model_manager = ModelManager()
                model_manager.get_model_instance(
@@ -616,10 +617,10 @@ class ChildChunkAddApi(Resource):
            raise NotFound("Document not found.")
        # check segment
        segment_id = str(segment_id)
-        segment = (
-            db.session.query(DocumentSegment)
+        segment = db.session.scalar(
+            select(DocumentSegment)
            .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_tenant_id)
-            .first()
+            .limit(1)
        )
        if not segment:
            raise NotFound("Segment not found.")
@@ -666,10 +667,10 @@ class ChildChunkAddApi(Resource):
            raise NotFound("Document not found.")
            # check segment
        segment_id = str(segment_id)
-        segment = (
-            db.session.query(DocumentSegment)
+        segment = db.session.scalar(
+            select(DocumentSegment)
            .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_tenant_id)
-            .first()
+            .limit(1)
        )
        if not segment:
            raise NotFound("Segment not found.")
@@ -714,24 +715,24 @@ class ChildChunkUpdateApi(Resource):
            raise NotFound("Document not found.")
        # check segment
        segment_id = str(segment_id)
-        segment = (
-            db.session.query(DocumentSegment)
+        segment = db.session.scalar(
+            select(DocumentSegment)
            .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_tenant_id)
-            .first()
+            .limit(1)
        )
        if not segment:
            raise NotFound("Segment not found.")
        # check child chunk
        child_chunk_id = str(child_chunk_id)
-        child_chunk = (
-            db.session.query(ChildChunk)
+        child_chunk = db.session.scalar(
+            select(ChildChunk)
            .where(
                ChildChunk.id == str(child_chunk_id),
                ChildChunk.tenant_id == current_tenant_id,
                ChildChunk.segment_id == segment.id,
                ChildChunk.document_id == document_id,
            )
-            .first()
+            .limit(1)
        )
        if not child_chunk:
            raise NotFound("Child chunk not found.")
@@ -771,24 +772,24 @@ class ChildChunkUpdateApi(Resource):
            raise NotFound("Document not found.")
            # check segment
        segment_id = str(segment_id)
-        segment = (
-            db.session.query(DocumentSegment)
+        segment = db.session.scalar(
+            select(DocumentSegment)
            .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_tenant_id)
-            .first()
+            .limit(1)
        )
        if not segment:
            raise NotFound("Segment not found.")
        # check child chunk
        child_chunk_id = str(child_chunk_id)
-        child_chunk = (
-            db.session.query(ChildChunk)
+        child_chunk = db.session.scalar(
+            select(ChildChunk)
            .where(
                ChildChunk.id == str(child_chunk_id),
                ChildChunk.tenant_id == current_tenant_id,
                ChildChunk.segment_id == segment.id,
                ChildChunk.document_id == document_id,
            )
-            .first()
+            .limit(1)
        )
        if not child_chunk:
            raise NotFound("Child chunk not found.")
--- a/api/controllers/console/datasets/hit_testing_base.py
+++ b/api/controllers/console/datasets/hit_testing_base.py
@@ -24,6 +24,7 @@ from fields.hit_testing_fields import hit_testing_record_fields
 from libs.login import current_user
 from models.account import Account
 from services.dataset_service import DatasetService
+from services.entities.knowledge_entities.knowledge_entities import RetrievalModel
 from services.hit_testing_service import HitTestingService

 logger = logging.getLogger(__name__)
@@ -31,7 +32,7 @@ logger = logging.getLogger(__name__)

 class HitTestingPayload(BaseModel):
    query: str = Field(max_length=250)
-    retrieval_model: dict[str, Any] | None = None
+    retrieval_model: RetrievalModel | None = None
    external_retrieval_model: dict[str, Any] | None = None
    attachment_ids: list[str] | None = None

--- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline.py
+++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline.py
@@ -46,6 +46,8 @@ class PipelineTemplateDetailApi(Resource):
        type = request.args.get("type", default="built-in", type=str)
        rag_pipeline_service = RagPipelineService()
        pipeline_template = rag_pipeline_service.get_pipeline_template_detail(template_id, type)
+        if pipeline_template is None:
+            return {"error": "Pipeline template not found from upstream service."}, 404
        return pipeline_template, 200


--- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py
+++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py
@@ -6,7 +6,7 @@ from flask import abort, request
 from flask_restx import Resource, marshal_with  # type: ignore
 from pydantic import BaseModel, Field
 from sqlalchemy.orm import Session
-from werkzeug.exceptions import Forbidden, InternalServerError, NotFound
+from werkzeug.exceptions import BadRequest, Forbidden, InternalServerError, NotFound

 import services
 from controllers.common.schema import register_schema_models
@@ -16,7 +16,11 @@ from controllers.console.app.error import (
    DraftWorkflowNotExist,
    DraftWorkflowNotSync,
 )
-from controllers.console.app.workflow import workflow_model, workflow_pagination_model
+from controllers.console.app.workflow import (
+    RESTORE_SOURCE_WORKFLOW_MUST_BE_PUBLISHED_MESSAGE,
+    workflow_model,
+    workflow_pagination_model,
+)
 from controllers.console.app.workflow_run import (
    workflow_run_detail_model,
    workflow_run_node_execution_list_model,
@@ -42,7 +46,8 @@ from libs.login import current_account_with_tenant, current_user, login_required
 from models import Account
 from models.dataset import Pipeline
 from models.model import EndUser
-from services.errors.app import WorkflowHashNotEqualError
+from models.workflow import Workflow
+from services.errors.app import IsDraftWorkflowError, WorkflowHashNotEqualError, WorkflowNotFoundError
 from services.errors.llm import InvokeRateLimitError
 from services.rag_pipeline.pipeline_generate_service import PipelineGenerateService
 from services.rag_pipeline.rag_pipeline import RagPipelineService
@@ -203,9 +208,12 @@ class DraftRagPipelineApi(Resource):
            abort(415)

        payload = DraftWorkflowSyncPayload.model_validate(payload_dict)
+        rag_pipeline_service = RagPipelineService()

        try:
-            environment_variables_list = payload.environment_variables or []
+            environment_variables_list = Workflow.normalize_environment_variable_mappings(
+                payload.environment_variables or [],
+            )
            environment_variables = [
                variable_factory.build_environment_variable_from_mapping(obj) for obj in environment_variables_list
            ]
@@ -213,7 +221,6 @@ class DraftRagPipelineApi(Resource):
            conversation_variables = [
                variable_factory.build_conversation_variable_from_mapping(obj) for obj in conversation_variables_list
            ]
-            rag_pipeline_service = RagPipelineService()
            workflow = rag_pipeline_service.sync_draft_workflow(
                pipeline=pipeline,
                graph=payload.graph,
@@ -705,6 +712,36 @@ class PublishedAllRagPipelineApi(Resource):
            }


+@console_ns.route("/rag/pipelines/<uuid:pipeline_id>/workflows/<string:workflow_id>/restore")
+class RagPipelineDraftWorkflowRestoreApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @edit_permission_required
+    @get_rag_pipeline
+    def post(self, pipeline: Pipeline, workflow_id: str):
+        current_user, _ = current_account_with_tenant()
+        rag_pipeline_service = RagPipelineService()
+
+        try:
+            workflow = rag_pipeline_service.restore_published_workflow_to_draft(
+                pipeline=pipeline,
+                workflow_id=workflow_id,
+                account=current_user,
+            )
+        except IsDraftWorkflowError as exc:
+            # Use a stable, predefined message to keep the 400 response consistent
+            raise BadRequest(RESTORE_SOURCE_WORKFLOW_MUST_BE_PUBLISHED_MESSAGE) from exc
+        except WorkflowNotFoundError as exc:
+            raise NotFound(str(exc)) from exc
+
+        return {
+            "result": "success",
+            "hash": workflow.unique_hash,
+            "updated_at": TimestampField().format(workflow.updated_at or workflow.created_at),
+        }
+
+
@console_ns.route("/rag/pipelines/<uuid:pipeline_id>/workflows/<string:workflow_id>")
 class RagPipelineByIdApi(Resource):
    @setup_required
--- a/api/controllers/console/datasets/wraps.py
+++ b/api/controllers/console/datasets/wraps.py
@@ -2,6 +2,8 @@ from collections.abc import Callable
 from functools import wraps
 from typing import ParamSpec, TypeVar

+from sqlalchemy import select
+
 from controllers.console.datasets.error import PipelineNotFoundError
 from extensions.ext_database import db
 from libs.login import current_account_with_tenant
@@ -24,10 +26,8 @@ def get_rag_pipeline(view_func: Callable[P, R]):

        del kwargs["pipeline_id"]

-        pipeline = (
-            db.session.query(Pipeline)
-            .where(Pipeline.id == pipeline_id, Pipeline.tenant_id == current_tenant_id)
-            .first()
+        pipeline = db.session.scalar(
+            select(Pipeline).where(Pipeline.id == pipeline_id, Pipeline.tenant_id == current_tenant_id).limit(1)
        )

        if not pipeline:
--- a/api/controllers/console/explore/banner.py
+++ b/api/controllers/console/explore/banner.py
@@ -1,9 +1,11 @@
 from flask import request
 from flask_restx import Resource
+from sqlalchemy import select

 from controllers.console import api
 from controllers.console.explore.wraps import explore_banner_enabled
 from extensions.ext_database import db
+from models.enums import BannerStatus
 from models.model import ExporleBanner


@@ -16,14 +18,18 @@ class BannerApi(Resource):
        language = request.args.get("language", "en-US")

        # Build base query for enabled banners
-        base_query = db.session.query(ExporleBanner).where(ExporleBanner.status == "enabled")
+        base_query = select(ExporleBanner).where(ExporleBanner.status == BannerStatus.ENABLED)

        # Try to get banners in the requested language
-        banners = base_query.where(ExporleBanner.language == language).order_by(ExporleBanner.sort).all()
+        banners = db.session.scalars(
+            base_query.where(ExporleBanner.language == language).order_by(ExporleBanner.sort)
+        ).all()

        # Fallback to en-US if no banners found and language is not en-US
        if not banners and language != "en-US":
-            banners = base_query.where(ExporleBanner.language == "en-US").order_by(ExporleBanner.sort).all()
+            banners = db.session.scalars(
+                base_query.where(ExporleBanner.language == "en-US").order_by(ExporleBanner.sort)
+            ).all()
        # Convert banners to serializable format
        result = []
        for banner in banners:
--- a/api/controllers/console/explore/installed_app.py
+++ b/api/controllers/console/explore/installed_app.py
@@ -133,13 +133,15 @@ class InstalledAppsListApi(Resource):
    def post(self):
        payload = InstalledAppCreatePayload.model_validate(console_ns.payload or {})

-        recommended_app = db.session.query(RecommendedApp).where(RecommendedApp.app_id == payload.app_id).first()
+        recommended_app = db.session.scalar(
+            select(RecommendedApp).where(RecommendedApp.app_id == payload.app_id).limit(1)
+        )
        if recommended_app is None:
            raise NotFound("Recommended app not found")

        _, current_tenant_id = current_account_with_tenant()

-        app = db.session.query(App).where(App.id == payload.app_id).first()
+        app = db.session.get(App, payload.app_id)

        if app is None:
            raise NotFound("App entity not found")
@@ -147,10 +149,10 @@ class InstalledAppsListApi(Resource):
        if not app.is_public:
            raise Forbidden("You can't install a non-public app")

-        installed_app = (
-            db.session.query(InstalledApp)
+        installed_app = db.session.scalar(
+            select(InstalledApp)
            .where(and_(InstalledApp.app_id == payload.app_id, InstalledApp.tenant_id == current_tenant_id))
-            .first()
+            .limit(1)
        )

        if installed_app is None:
--- a/api/controllers/console/explore/message.py
+++ b/api/controllers/console/explore/message.py
@@ -27,6 +27,7 @@ from fields.message_fields import MessageInfiniteScrollPagination, MessageListIt
 from libs import helper
 from libs.helper import UUIDStrOrEmpty
 from libs.login import current_account_with_tenant
+from models.enums import FeedbackRating
 from models.model import AppMode
 from services.app_generate_service import AppGenerateService
 from services.errors.app import MoreLikeThisDisabledError
@@ -116,7 +117,7 @@ class MessageFeedbackApi(InstalledAppResource):
                app_model=app_model,
                message_id=message_id,
                user=current_user,
-                rating=payload.rating,
+                rating=FeedbackRating(payload.rating) if payload.rating else None,
                content=payload.content,
            )
        except MessageNotExistsError:
--- a/api/controllers/console/explore/trial.py
+++ b/api/controllers/console/explore/trial.py
@@ -4,6 +4,7 @@ from typing import Any, Literal, cast
 from flask import request
 from flask_restx import Resource, fields, marshal, marshal_with
 from pydantic import BaseModel
+from sqlalchemy import select
 from werkzeug.exceptions import Forbidden, InternalServerError, NotFound

 import services
@@ -476,7 +477,7 @@ class TrialSitApi(Resource):

        Returns the site configuration for the application including theme, icons, and text.
        """
-        site = db.session.query(Site).where(Site.app_id == app_model.id).first()
+        site = db.session.scalar(select(Site).where(Site.app_id == app_model.id).limit(1))

        if not site:
            raise Forbidden()
@@ -541,13 +542,7 @@ class AppWorkflowApi(Resource):
        if not app_model.workflow_id:
            raise AppUnavailableError()

-        workflow = (
-            db.session.query(Workflow)
-            .where(
-                Workflow.id == app_model.workflow_id,
-            )
-            .first()
-        )
+        workflow = db.session.get(Workflow, app_model.workflow_id)
        return workflow


--- a/api/controllers/console/explore/wraps.py
+++ b/api/controllers/console/explore/wraps.py
@@ -4,6 +4,7 @@ from typing import Concatenate, ParamSpec, TypeVar

 from flask import abort
 from flask_restx import Resource
+from sqlalchemy import select
 from werkzeug.exceptions import NotFound

 from controllers.console.explore.error import AppAccessDeniedError, TrialAppLimitExceeded, TrialAppNotAllowed
@@ -24,10 +25,10 @@ def installed_app_required(view: Callable[Concatenate[InstalledApp, P], R] | Non
        @wraps(view)
        def decorated(installed_app_id: str, *args: P.args, **kwargs: P.kwargs):
            _, current_tenant_id = current_account_with_tenant()
-            installed_app = (
-                db.session.query(InstalledApp)
+            installed_app = db.session.scalar(
+                select(InstalledApp)
                .where(InstalledApp.id == str(installed_app_id), InstalledApp.tenant_id == current_tenant_id)
-                .first()
+                .limit(1)
            )

            if installed_app is None:
@@ -78,7 +79,7 @@ def trial_app_required(view: Callable[Concatenate[App, P], R] | None = None):
        def decorated(app_id: str, *args: P.args, **kwargs: P.kwargs):
            current_user, _ = current_account_with_tenant()

-            trial_app = db.session.query(TrialApp).where(TrialApp.app_id == str(app_id)).first()
+            trial_app = db.session.scalar(select(TrialApp).where(TrialApp.app_id == str(app_id)).limit(1))

            if trial_app is None:
                raise TrialAppNotAllowed()
@@ -87,10 +88,10 @@ def trial_app_required(view: Callable[Concatenate[App, P], R] | None = None):
            if app is None:
                raise TrialAppNotAllowed()

-            account_trial_app_record = (
-                db.session.query(AccountTrialAppRecord)
+            account_trial_app_record = db.session.scalar(
+                select(AccountTrialAppRecord)
                .where(AccountTrialAppRecord.account_id == current_user.id, AccountTrialAppRecord.app_id == app_id)
-                .first()
+                .limit(1)
            )
            if account_trial_app_record:
                if account_trial_app_record.count >= trial_app.trial_limit:
--- a/api/controllers/console/setup.py
+++ b/api/controllers/console/setup.py
@@ -2,6 +2,7 @@ from typing import Literal

 from flask import request
 from pydantic import BaseModel, Field, field_validator
+from sqlalchemy import select

 from configs import dify_config
 from controllers.fastopenapi import console_router
@@ -100,6 +101,6 @@ def setup_system(payload: SetupRequestPayload) -> SetupResponse:

 def get_setup_status() -> DifySetup | bool | None:
    if dify_config.EDITION == "SELF_HOSTED":
-        return db.session.query(DifySetup).first()
+        return db.session.scalar(select(DifySetup).limit(1))

    return True
--- a/api/controllers/console/workspace/account.py
+++ b/api/controllers/console/workspace/account.py
@@ -212,13 +212,13 @@ class AccountInitApi(Resource):
                raise ValueError("invitation_code is required")

            # check invitation code
-            invitation_code = (
-                db.session.query(InvitationCode)
+            invitation_code = db.session.scalar(
+                select(InvitationCode)
                .where(
                    InvitationCode.code == args.invitation_code,
                    InvitationCode.status == InvitationCodeStatus.UNUSED,
                )
-                .first()
+                .limit(1)
            )

            if not invitation_code:
--- a/api/controllers/console/workspace/members.py
+++ b/api/controllers/console/workspace/members.py
@@ -171,7 +171,7 @@ class MemberCancelInviteApi(Resource):
        current_user, _ = current_account_with_tenant()
        if not current_user.current_tenant:
            raise ValueError("No current tenant")
-        member = db.session.query(Account).where(Account.id == str(member_id)).first()
+        member = db.session.get(Account, str(member_id))
        if member is None:
            abort(404)
        else:
--- a/api/controllers/console/workspace/workspace.py
+++ b/api/controllers/console/workspace/workspace.py
@@ -7,6 +7,7 @@ from sqlalchemy import select
 from werkzeug.exceptions import Unauthorized

 import services
+from configs import dify_config
 from controllers.common.errors import (
    FilenameNotExistsError,
    FileTooLargeError,
@@ -29,6 +30,7 @@ from libs.helper import TimestampField
 from libs.login import current_account_with_tenant, login_required
 from models.account import Tenant, TenantStatus
 from services.account_service import TenantService
+from services.billing_service import BillingService, SubscriptionPlan
 from services.enterprise.enterprise_service import EnterpriseService
 from services.feature_service import FeatureService
 from services.file_service import FileService
@@ -108,9 +110,29 @@ class TenantListApi(Resource):
        current_user, current_tenant_id = current_account_with_tenant()
        tenants = TenantService.get_join_tenants(current_user)
        tenant_dicts = []
+        is_enterprise_only = dify_config.ENTERPRISE_ENABLED and not dify_config.BILLING_ENABLED
+        is_saas = dify_config.EDITION == "CLOUD" and dify_config.BILLING_ENABLED
+        tenant_plans: dict[str, SubscriptionPlan] = {}
+
+        if is_saas:
+            tenant_ids = [tenant.id for tenant in tenants]
+            if tenant_ids:
+                tenant_plans = BillingService.get_plan_bulk(tenant_ids)
+                if not tenant_plans:
+                    logger.warning("get_plan_bulk returned empty result, falling back to legacy feature path")

        for tenant in tenants:
-            features = FeatureService.get_features(tenant.id)
+            plan: str = CloudPlan.SANDBOX
+            if is_saas:
+                tenant_plan = tenant_plans.get(tenant.id)
+                if tenant_plan:
+                    plan = tenant_plan["plan"] or CloudPlan.SANDBOX
+                else:
+                    features = FeatureService.get_features(tenant.id)
+                    plan = features.billing.subscription.plan or CloudPlan.SANDBOX
+            elif not is_enterprise_only:
+                features = FeatureService.get_features(tenant.id)
+                plan = features.billing.subscription.plan or CloudPlan.SANDBOX

            # Create a dictionary with tenant attributes
            tenant_dict = {
@@ -118,7 +140,7 @@ class TenantListApi(Resource):
                "name": tenant.name,
                "status": tenant.status,
                "created_at": tenant.created_at,
-                "plan": features.billing.subscription.plan if features.billing.enabled else CloudPlan.SANDBOX,
+                "plan": plan,
                "current": tenant.id == current_tenant_id if current_tenant_id else False,
            }

@@ -198,7 +220,7 @@ class SwitchWorkspaceApi(Resource):
        except Exception:
            raise AccountNotLinkTenantError("Account not link tenant")

-        new_tenant = db.session.query(Tenant).get(args.tenant_id)  # Get new tenant
+        new_tenant = db.session.get(Tenant, args.tenant_id)  # Get new tenant
        if new_tenant is None:
            raise ValueError("Tenant not found")

--- a/api/controllers/console/wraps.py
+++ b/api/controllers/console/wraps.py
@@ -7,6 +7,7 @@ from functools import wraps
 from typing import ParamSpec, TypeVar

 from flask import abort, request
+from sqlalchemy import select

 from configs import dify_config
 from controllers.console.auth.error import AuthenticationFailedError, EmailCodeError
@@ -218,13 +219,9 @@ def setup_required(view: Callable[P, R]) -> Callable[P, R]:
    @wraps(view)
    def decorated(*args: P.args, **kwargs: P.kwargs) -> R:
        # check setup
-        if (
-            dify_config.EDITION == "SELF_HOSTED"
-            and os.environ.get("INIT_PASSWORD")
-            and not db.session.query(DifySetup).first()
-        ):
-            raise NotInitValidateError()
-        elif dify_config.EDITION == "SELF_HOSTED" and not db.session.query(DifySetup).first():
+        if dify_config.EDITION == "SELF_HOSTED" and not db.session.scalar(select(DifySetup).limit(1)):
+            if os.environ.get("INIT_PASSWORD"):
+                raise NotInitValidateError()
            raise NotSetupError()

        return view(*args, **kwargs)
--- a/api/controllers/inner_api/plugin/wraps.py
+++ b/api/controllers/inner_api/plugin/wraps.py
@@ -5,6 +5,7 @@ from typing import ParamSpec, TypeVar
 from flask import current_app, request
 from flask_login import user_logged_in
 from pydantic import BaseModel
+from sqlalchemy import select
 from sqlalchemy.orm import Session

 from extensions.ext_database import db
@@ -36,23 +37,16 @@ def get_user(tenant_id: str, user_id: str | None) -> EndUser:
            user_model = None

            if is_anonymous:
-                user_model = (
-                    session.query(EndUser)
+                user_model = session.scalar(
+                    select(EndUser)
                    .where(
                        EndUser.session_id == user_id,
                        EndUser.tenant_id == tenant_id,
                    )
-                    .first()
+                    .limit(1)
                )
            else:
-                user_model = (
-                    session.query(EndUser)
-                    .where(
-                        EndUser.id == user_id,
-                        EndUser.tenant_id == tenant_id,
-                    )
-                    .first()
-                )
+                user_model = session.get(EndUser, user_id)

            if not user_model:
                user_model = EndUser(
@@ -85,16 +79,7 @@ def get_user_tenant(view_func: Callable[P, R]):
        if not user_id:
            user_id = DefaultEndUserSessionID.DEFAULT_SESSION_ID

-        try:
-            tenant_model = (
-                db.session.query(Tenant)
-                .where(
-                    Tenant.id == tenant_id,
-                )
-                .first()
-            )
-        except Exception:
-            raise ValueError("tenant not found")
+        tenant_model = db.session.get(Tenant, tenant_id)

        if not tenant_model:
            raise ValueError("tenant not found")
--- a/api/controllers/inner_api/workspace/workspace.py
+++ b/api/controllers/inner_api/workspace/workspace.py
@@ -2,6 +2,7 @@ import json

 from flask_restx import Resource
 from pydantic import BaseModel
+from sqlalchemy import select

 from controllers.common.schema import register_schema_models
 from controllers.console.wraps import setup_required
@@ -42,7 +43,7 @@ class EnterpriseWorkspace(Resource):
    def post(self):
        args = WorkspaceCreatePayload.model_validate(inner_api_ns.payload or {})

-        account = db.session.query(Account).filter_by(email=args.owner_email).first()
+        account = db.session.scalar(select(Account).where(Account.email == args.owner_email).limit(1))
        if account is None:
            return {"message": "owner account not found."}, 404

--- a/api/controllers/inner_api/wraps.py
+++ b/api/controllers/inner_api/wraps.py
@@ -75,7 +75,7 @@ def enterprise_inner_api_user_auth(view: Callable[P, R]):
        if signature_base64 != token:
            return view(*args, **kwargs)

-        kwargs["user"] = db.session.query(EndUser).where(EndUser.id == user_id).first()
+        kwargs["user"] = db.session.get(EndUser, user_id)

        return view(*args, **kwargs)

--- a/api/controllers/service_api/app/message.py
+++ b/api/controllers/service_api/app/message.py
@@ -15,6 +15,7 @@ from core.app.entities.app_invoke_entities import InvokeFrom
 from fields.conversation_fields import ResultResponse
 from fields.message_fields import MessageInfiniteScrollPagination, MessageListItem
 from libs.helper import UUIDStrOrEmpty
+from models.enums import FeedbackRating
 from models.model import App, AppMode, EndUser
 from services.errors.message import (
    FirstMessageNotExistsError,
@@ -116,7 +117,7 @@ class MessageFeedbackApi(Resource):
                app_model=app_model,
                message_id=message_id,
                user=end_user,
-                rating=payload.rating,
+                rating=FeedbackRating(payload.rating) if payload.rating else None,
                content=payload.content,
            )
        except MessageNotExistsError:
--- a/api/controllers/service_api/dataset/dataset.py
+++ b/api/controllers/service_api/dataset/dataset.py
@@ -15,6 +15,7 @@ from controllers.service_api.wraps import (
    cloud_edition_billing_rate_limit_check,
 )
 from core.provider_manager import ProviderManager
+from core.rag.index_processor.constant.index_type import IndexTechniqueType
 from dify_graph.model_runtime.entities.model_entities import ModelType
 from fields.dataset_fields import dataset_detail_fields
 from fields.tag_fields import DataSetTag
@@ -153,15 +154,20 @@ class DatasetListApi(DatasetApiResource):

        data = marshal(datasets, dataset_detail_fields)
        for item in data:
-            if item["indexing_technique"] == "high_quality" and item["embedding_model_provider"]:
-                item["embedding_model_provider"] = str(ModelProviderID(item["embedding_model_provider"]))
-                item_model = f"{item['embedding_model']}:{item['embedding_model_provider']}"
+            if (
+                item["indexing_technique"] == IndexTechniqueType.HIGH_QUALITY  # pyrefly: ignore[bad-index]
+                and item["embedding_model_provider"]  # pyrefly: ignore[bad-index]
+            ):
+                item["embedding_model_provider"] = str(  # pyrefly: ignore[unsupported-operation]
+                    ModelProviderID(item["embedding_model_provider"])  # pyrefly: ignore[bad-index]
+                )
+                item_model = f"{item['embedding_model']}:{item['embedding_model_provider']}"  # pyrefly: ignore[bad-index]
                if item_model in model_names:
-                    item["embedding_available"] = True
+                    item["embedding_available"] = True  # type: ignore
                else:
-                    item["embedding_available"] = False
+                    item["embedding_available"] = False  # type: ignore
            else:
-                item["embedding_available"] = True
+                item["embedding_available"] = True  # type: ignore
        response = {
            "data": data,
            "has_more": len(datasets) == query.limit,
@@ -265,7 +271,7 @@ class DatasetApi(DatasetApiResource):
        for embedding_model in embedding_models:
            model_names.append(f"{embedding_model.model}:{embedding_model.provider.provider}")

-        if data.get("indexing_technique") == "high_quality":
+        if data.get("indexing_technique") == IndexTechniqueType.HIGH_QUALITY:
            item_model = f"{data.get('embedding_model')}:{data.get('embedding_model_provider')}"
            if item_model in model_names:
                data["embedding_available"] = True
@@ -315,7 +321,7 @@ class DatasetApi(DatasetApiResource):
        # check embedding model setting
        embedding_model_provider = payload.embedding_model_provider
        embedding_model = payload.embedding_model
-        if payload.indexing_technique == "high_quality" or embedding_model_provider:
+        if payload.indexing_technique == IndexTechniqueType.HIGH_QUALITY or embedding_model_provider:
            if embedding_model_provider and embedding_model:
                DatasetService.check_embedding_model_setting(
                    dataset.tenant_id, embedding_model_provider, embedding_model
--- a/api/controllers/service_api/dataset/segment.py
+++ b/api/controllers/service_api/dataset/segment.py
@@ -17,6 +17,7 @@ from controllers.service_api.wraps import (
 )
 from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
 from core.model_manager import ModelManager
+from core.rag.index_processor.constant.index_type import IndexTechniqueType
 from dify_graph.model_runtime.entities.model_entities import ModelType
 from extensions.ext_database import db
 from fields.segment_fields import child_chunk_fields, segment_fields
@@ -103,7 +104,7 @@ class SegmentApi(DatasetApiResource):
        if not document.enabled:
            raise NotFound("Document is disabled.")
        # check embedding model setting
-        if dataset.indexing_technique == "high_quality":
+        if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
            try:
                model_manager = ModelManager()
                model_manager.get_model_instance(
@@ -157,7 +158,7 @@ class SegmentApi(DatasetApiResource):
        if not document:
            raise NotFound("Document not found.")
        # check embedding model setting
-        if dataset.indexing_technique == "high_quality":
+        if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
            try:
                model_manager = ModelManager()
                model_manager.get_model_instance(
@@ -262,7 +263,7 @@ class DatasetSegmentApi(DatasetApiResource):
        document = DocumentService.get_document(dataset_id, document_id)
        if not document:
            raise NotFound("Document not found.")
-        if dataset.indexing_technique == "high_quality":
+        if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
            # check embedding model setting
            try:
                model_manager = ModelManager()
@@ -358,7 +359,7 @@ class ChildChunkApi(DatasetApiResource):
            raise NotFound("Segment not found.")

        # check embedding model setting
-        if dataset.indexing_technique == "high_quality":
+        if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
            try:
                model_manager = ModelManager()
                model_manager.get_model_instance(
--- a/api/controllers/trigger/webhook.py
+++ b/api/controllers/trigger/webhook.py
@@ -70,7 +70,14 @@ def handle_webhook(webhook_id: str):

@bp.route("/webhook-debug/<string:webhook_id>", methods=["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS"])
 def handle_webhook_debug(webhook_id: str):
-    """Handle webhook debug calls without triggering production workflow execution."""
+    """Handle webhook debug calls without triggering production workflow execution.
+
+    The debug webhook endpoint is only for draft inspection flows. It never enqueues
+    Celery work for the published workflow; instead it dispatches an in-memory debug
+    event to an active Variable Inspector listener. Returning a clear error when no
+    listener is registered prevents a misleading 200 response for requests that are
+    effectively dropped.
+    """
    try:
        webhook_trigger, _, node_config, webhook_data, error = _prepare_webhook_execution(webhook_id, is_debug=True)
        if error:
@@ -94,11 +101,32 @@ def handle_webhook_debug(webhook_id: str):
                "method": webhook_data.get("method"),
            },
        )
-        TriggerDebugEventBus.dispatch(
+        dispatch_count = TriggerDebugEventBus.dispatch(
            tenant_id=webhook_trigger.tenant_id,
            event=event,
            pool_key=pool_key,
        )
+        if dispatch_count == 0:
+            logger.warning(
+                "Webhook debug request dropped without an active listener for webhook %s (tenant=%s, app=%s, node=%s)",
+                webhook_trigger.webhook_id,
+                webhook_trigger.tenant_id,
+                webhook_trigger.app_id,
+                webhook_trigger.node_id,
+            )
+            return (
+                jsonify(
+                    {
+                        "error": "No active debug listener",
+                        "message": (
+                            "The webhook debug URL only works while the Variable Inspector is listening. "
+                            "Use the published webhook URL to execute the workflow in Celery."
+                        ),
+                        "execution_url": webhook_trigger.webhook_url,
+                    }
+                ),
+                409,
+            )
        response_data, status_code = WebhookService.generate_webhook_response(node_config)
        return jsonify(response_data), status_code

--- a/api/controllers/web/human_input_form.py
+++ b/api/controllers/web/human_input_form.py
@@ -8,6 +8,7 @@ from datetime import datetime

 from flask import Response, request
 from flask_restx import Resource, reqparse
+from sqlalchemy import select
 from werkzeug.exceptions import Forbidden

 from configs import dify_config
@@ -147,11 +148,11 @@ class HumanInputFormApi(Resource):

 def _get_app_site_from_form(form: Form) -> tuple[App, Site]:
    """Resolve App/Site for the form's app and validate tenant status."""
-    app_model = db.session.query(App).where(App.id == form.app_id).first()
+    app_model = db.session.get(App, form.app_id)
    if app_model is None or app_model.tenant_id != form.tenant_id:
        raise NotFoundError("Form not found")

-    site = db.session.query(Site).where(Site.app_id == app_model.id).first()
+    site = db.session.scalar(select(Site).where(Site.app_id == app_model.id).limit(1))
    if site is None:
        raise Forbidden()

--- a/api/controllers/web/message.py
+++ b/api/controllers/web/message.py
@@ -25,6 +25,7 @@ from fields.conversation_fields import ResultResponse
 from fields.message_fields import SuggestedQuestionsResponse, WebMessageInfiniteScrollPagination, WebMessageListItem
 from libs import helper
 from libs.helper import uuid_value
+from models.enums import FeedbackRating
 from models.model import AppMode
 from services.app_generate_service import AppGenerateService
 from services.errors.app import MoreLikeThisDisabledError
@@ -157,7 +158,7 @@ class MessageFeedbackApi(WebApiResource):
                app_model=app_model,
                message_id=message_id,
                user=end_user,
-                rating=payload.rating,
+                rating=FeedbackRating(payload.rating) if payload.rating else None,
                content=payload.content,
            )
        except MessageNotExistsError:
--- a/api/controllers/web/site.py
+++ b/api/controllers/web/site.py
@@ -1,6 +1,7 @@
 from typing import cast

 from flask_restx import fields, marshal, marshal_with
+from sqlalchemy import select
 from werkzeug.exceptions import Forbidden

 from configs import dify_config
@@ -72,7 +73,7 @@ class AppSiteApi(WebApiResource):
    def get(self, app_model, end_user):
        """Retrieve app site info."""
        # get site
-        site = db.session.query(Site).where(Site.app_id == app_model.id).first()
+        site = db.session.scalar(select(Site).where(Site.app_id == app_model.id).limit(1))

        if not site:
            raise Forbidden()
--- a/api/core/app/app_config/easy_ui_based_app/dataset/manager.py
+++ b/api/core/app/app_config/easy_ui_based_app/dataset/manager.py
@@ -8,6 +8,7 @@ from core.app.app_config.entities import (
    ModelConfig,
 )
 from core.entities.agent_entities import PlanningStrategy
+from core.rag.data_post_processor.data_post_processor import RerankingModelDict, WeightsDict
 from models.model import AppMode, AppModelConfigDict
 from services.dataset_service import DatasetService

@@ -117,8 +118,10 @@ class DatasetConfigManager:
                    score_threshold=float(score_threshold_val)
                    if dataset_configs.get("score_threshold_enabled", False) and score_threshold_val is not None
                    else None,
-                    reranking_model=reranking_model_val if isinstance(reranking_model_val, dict) else None,
-                    weights=weights_val if isinstance(weights_val, dict) else None,
+                    reranking_model=cast(RerankingModelDict, reranking_model_val)
+                    if isinstance(reranking_model_val, dict)
+                    else None,
+                    weights=cast(WeightsDict, weights_val) if isinstance(weights_val, dict) else None,
                    reranking_enabled=bool(dataset_configs.get("reranking_enabled", True)),
                    rerank_mode=dataset_configs.get("reranking_mode", "reranking_model"),
                    metadata_filtering_mode=cast(
--- a/api/core/app/app_config/entities.py
+++ b/api/core/app/app_config/entities.py
@@ -4,6 +4,7 @@ from typing import Any, Literal

 from pydantic import BaseModel, Field

+from core.rag.data_post_processor.data_post_processor import RerankingModelDict, WeightsDict
 from dify_graph.file import FileUploadConfig
 from dify_graph.model_runtime.entities.llm_entities import LLMMode
 from dify_graph.model_runtime.entities.message_entities import PromptMessageRole
@@ -194,8 +195,8 @@ class DatasetRetrieveConfigEntity(BaseModel):
    top_k: int | None = None
    score_threshold: float | None = 0.0
    rerank_mode: str | None = "reranking_model"
-    reranking_model: dict | None = None
-    weights: dict | None = None
+    reranking_model: RerankingModelDict | None = None
+    weights: WeightsDict | None = None
    reranking_enabled: bool | None = True
    metadata_filtering_mode: Literal["disabled", "automatic", "manual"] | None = "disabled"
    metadata_model_config: ModelConfig | None = None
--- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py
+++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py
@@ -76,7 +76,7 @@ from dify_graph.system_variable import SystemVariable
 from extensions.ext_database import db
 from libs.datetime_utils import naive_utc_now
 from models import Account, Conversation, EndUser, Message, MessageFile
-from models.enums import CreatorUserRole, MessageStatus
+from models.enums import CreatorUserRole, MessageFileBelongsTo, MessageStatus
 from models.execution_extra_content import HumanInputContent
 from models.workflow import Workflow

@@ -939,7 +939,7 @@ class AdvancedChatAppGenerateTaskPipeline(GraphRuntimeStateSupport):
                type=file["type"],
                transfer_method=file["transfer_method"],
                url=file["remote_url"],
-                belongs_to="assistant",
+                belongs_to=MessageFileBelongsTo.ASSISTANT,
                upload_file_id=file["related_id"],
                created_by_role=CreatorUserRole.ACCOUNT
                if message.invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER}
--- a/api/core/app/apps/base_app_generate_response_converter.py
+++ b/api/core/app/apps/base_app_generate_response_converter.py
@@ -74,11 +74,22 @@ class AppGenerateResponseConverter(ABC):
            for resource in metadata["retriever_resources"]:
                updated_resources.append(
                    {
+                        "dataset_id": resource.get("dataset_id"),
+                        "dataset_name": resource.get("dataset_name"),
+                        "document_id": resource.get("document_id"),
                        "segment_id": resource.get("segment_id", ""),
                        "position": resource["position"],
+                        "data_source_type": resource.get("data_source_type"),
                        "document_name": resource["document_name"],
                        "score": resource["score"],
+                        "hit_count": resource.get("hit_count"),
+                        "word_count": resource.get("word_count"),
+                        "segment_position": resource.get("segment_position"),
+                        "index_node_hash": resource.get("index_node_hash"),
                        "content": resource["content"],
+                        "page": resource.get("page"),
+                        "title": resource.get("title"),
+                        "files": resource.get("files"),
                        "summary": resource.get("summary"),
                    }
                )
--- a/api/core/app/apps/base_app_runner.py
+++ b/api/core/app/apps/base_app_runner.py
@@ -40,7 +40,7 @@ from dify_graph.model_runtime.entities.message_entities import (
 from dify_graph.model_runtime.entities.model_entities import ModelPropertyKey
 from dify_graph.model_runtime.errors.invoke import InvokeBadRequestError
 from extensions.ext_database import db
-from models.enums import CreatorUserRole
+from models.enums import CreatorUserRole, MessageFileBelongsTo
 from models.model import App, AppMode, Message, MessageAnnotation, MessageFile

 if TYPE_CHECKING:
@@ -419,7 +419,7 @@ class AppRunner:
            message_id=message_id,
            type=FileType.IMAGE,
            transfer_method=FileTransferMethod.TOOL_FILE,
-            belongs_to="assistant",
+            belongs_to=MessageFileBelongsTo.ASSISTANT,
            url=f"/files/tools/{tool_file.id}",
            upload_file_id=tool_file.id,
            created_by_role=(
--- a/api/core/app/apps/common/workflow_response_converter.py
+++ b/api/core/app/apps/common/workflow_response_converter.py
@@ -517,7 +517,7 @@ class WorkflowResponseConverter:
        snapshot = self._pop_snapshot(event.node_execution_id)

        start_at = snapshot.start_at if snapshot else event.start_at
-        finished_at = naive_utc_now()
+        finished_at = event.finished_at or naive_utc_now()
        elapsed_time = (finished_at - start_at).total_seconds()

        inputs, inputs_truncated = self._truncate_mapping(event.inputs)
--- a/api/core/app/apps/message_based_app_generator.py
+++ b/api/core/app/apps/message_based_app_generator.py
@@ -33,7 +33,7 @@ from extensions.ext_redis import get_pubsub_broadcast_channel
 from libs.broadcast_channel.channel import Topic
 from libs.datetime_utils import naive_utc_now
 from models import Account
-from models.enums import CreatorUserRole
+from models.enums import ConversationFromSource, CreatorUserRole, MessageFileBelongsTo
 from models.model import App, AppMode, AppModelConfig, Conversation, EndUser, Message, MessageFile
 from services.errors.app_model_config import AppModelConfigBrokenError
 from services.errors.conversation import ConversationNotExistsError
@@ -130,10 +130,10 @@ class MessageBasedAppGenerator(BaseAppGenerator):
        end_user_id = None
        account_id = None
        if application_generate_entity.invoke_from in {InvokeFrom.WEB_APP, InvokeFrom.SERVICE_API}:
-            from_source = "api"
+            from_source = ConversationFromSource.API
            end_user_id = application_generate_entity.user_id
        else:
-            from_source = "console"
+            from_source = ConversationFromSource.CONSOLE
            account_id = application_generate_entity.user_id

        if isinstance(application_generate_entity, AdvancedChatAppGenerateEntity):
@@ -225,7 +225,7 @@ class MessageBasedAppGenerator(BaseAppGenerator):
                    message_id=message.id,
                    type=file.type,
                    transfer_method=file.transfer_method,
-                    belongs_to="user",
+                    belongs_to=MessageFileBelongsTo.USER,
                    url=file.remote_url,
                    upload_file_id=file.related_id,
                    created_by_role=(CreatorUserRole.ACCOUNT if account_id else CreatorUserRole.END_USER),
--- a/api/core/app/apps/workflow/generate_task_pipeline.py
+++ b/api/core/app/apps/workflow/generate_task_pipeline.py
@@ -705,7 +705,7 @@ class WorkflowAppGenerateTaskPipeline(GraphRuntimeStateSupport):
            app_id=self._application_generate_entity.app_config.app_id,
            workflow_id=self._workflow.id,
            workflow_run_id=workflow_run_id,
-            created_from=created_from.value,
+            created_from=created_from,
            created_by_role=self._created_by_role,
            created_by=self._user_id,
        )
--- a/api/core/app/apps/workflow_app_runner.py
+++ b/api/core/app/apps/workflow_app_runner.py
@@ -456,6 +456,7 @@ class WorkflowBasedAppRunner:
                    node_id=event.node_id,
                    node_type=event.node_type,
                    start_at=event.start_at,
+                    finished_at=event.finished_at,
                    inputs=inputs,
                    process_data=process_data,
                    outputs=outputs,
@@ -471,6 +472,7 @@ class WorkflowBasedAppRunner:
                    node_id=event.node_id,
                    node_type=event.node_type,
                    start_at=event.start_at,
+                    finished_at=event.finished_at,
                    inputs=event.node_run_result.inputs,
                    process_data=event.node_run_result.process_data,
                    outputs=event.node_run_result.outputs,
@@ -487,6 +489,7 @@ class WorkflowBasedAppRunner:
                    node_id=event.node_id,
                    node_type=event.node_type,
                    start_at=event.start_at,
+                    finished_at=event.finished_at,
                    inputs=event.node_run_result.inputs,
                    process_data=event.node_run_result.process_data,
                    outputs=event.node_run_result.outputs,
--- a/api/core/app/entities/queue_entities.py
+++ b/api/core/app/entities/queue_entities.py
@@ -335,6 +335,7 @@ class QueueNodeSucceededEvent(AppQueueEvent):
    in_loop_id: str | None = None
    """loop id if node is in loop"""
    start_at: datetime
+    finished_at: datetime | None = None

    inputs: Mapping[str, object] = Field(default_factory=dict)
    process_data: Mapping[str, object] = Field(default_factory=dict)
@@ -390,6 +391,7 @@ class QueueNodeExceptionEvent(AppQueueEvent):
    in_loop_id: str | None = None
    """loop id if node is in loop"""
    start_at: datetime
+    finished_at: datetime | None = None

    inputs: Mapping[str, object] = Field(default_factory=dict)
    process_data: Mapping[str, object] = Field(default_factory=dict)
@@ -414,6 +416,7 @@ class QueueNodeFailedEvent(AppQueueEvent):
    in_loop_id: str | None = None
    """loop id if node is in loop"""
    start_at: datetime
+    finished_at: datetime | None = None

    inputs: Mapping[str, object] = Field(default_factory=dict)
    process_data: Mapping[str, object] = Field(default_factory=dict)
--- a/api/core/app/features/annotation_reply/annotation_reply.py
+++ b/api/core/app/features/annotation_reply/annotation_reply.py
@@ -4,9 +4,10 @@ from sqlalchemy import select

 from core.app.entities.app_invoke_entities import InvokeFrom
 from core.rag.datasource.vdb.vector_factory import Vector
+from core.rag.index_processor.constant.index_type import IndexTechniqueType
 from extensions.ext_database import db
 from models.dataset import Dataset
-from models.enums import CollectionBindingType
+from models.enums import CollectionBindingType, ConversationFromSource
 from models.model import App, AppAnnotationSetting, Message, MessageAnnotation
 from services.annotation_service import AppAnnotationService
 from services.dataset_service import DatasetCollectionBindingService
@@ -50,7 +51,7 @@ class AnnotationReplyFeature:
            dataset = Dataset(
                id=app_record.id,
                tenant_id=app_record.tenant_id,
-                indexing_technique="high_quality",
+                indexing_technique=IndexTechniqueType.HIGH_QUALITY,
                embedding_model_provider=embedding_provider_name,
                embedding_model=embedding_model_name,
                collection_binding_id=dataset_collection_binding.id,
@@ -68,9 +69,9 @@ class AnnotationReplyFeature:
                annotation = AppAnnotationService.get_annotation_by_id(annotation_id)
                if annotation:
                    if invoke_from in {InvokeFrom.SERVICE_API, InvokeFrom.WEB_APP}:
-                        from_source = "api"
+                        from_source = ConversationFromSource.API
                    else:
-                        from_source = "console"
+                        from_source = ConversationFromSource.CONSOLE

                    # insert annotation history
                    AppAnnotationService.add_annotation_history(
--- a/api/core/app/features/rate_limiting/rate_limit.py
+++ b/api/core/app/features/rate_limiting/rate_limit.py
@@ -19,6 +19,7 @@ class RateLimit:
    _REQUEST_MAX_ALIVE_TIME = 10 * 60  # 10 minutes
    _ACTIVE_REQUESTS_COUNT_FLUSH_INTERVAL = 5 * 60  # recalculate request_count from request_detail every 5 minutes
    _instance_dict: dict[str, "RateLimit"] = {}
+    max_active_requests: int

    def __new__(cls, client_id: str, max_active_requests: int):
        if client_id not in cls._instance_dict:
@@ -27,7 +28,13 @@ class RateLimit:
        return cls._instance_dict[client_id]

    def __init__(self, client_id: str, max_active_requests: int):
+        flush_cache = hasattr(self, "max_active_requests") and self.max_active_requests != max_active_requests
        self.max_active_requests = max_active_requests
+        # Only flush here if this instance has already been fully initialized,
+        # i.e. the Redis key attributes exist. Otherwise, rely on the flush at
+        # the end of initialization below.
+        if flush_cache and hasattr(self, "active_requests_key") and hasattr(self, "max_active_requests_key"):
+            self.flush_cache(use_local_value=True)
        # must be called after max_active_requests is set
        if self.disabled():
            return
@@ -41,8 +48,6 @@ class RateLimit:
        self.flush_cache(use_local_value=True)

    def flush_cache(self, use_local_value=False):
-        if self.disabled():
-            return
        self.last_recalculate_time = time.time()
        # flush max active requests
        if use_local_value or not redis_client.exists(self.max_active_requests_key):
@@ -50,7 +55,8 @@ class RateLimit:
        else:
            self.max_active_requests = int(redis_client.get(self.max_active_requests_key).decode("utf-8"))
            redis_client.expire(self.max_active_requests_key, timedelta(days=1))
-
+        if self.disabled():
+            return
        # flush max active requests (in-transit request list)
        if not redis_client.exists(self.active_requests_key):
            return
--- a/api/core/app/layers/suspend_layer.py
+++ b/api/core/app/layers/suspend_layer.py
@@ -6,16 +6,23 @@ from dify_graph.graph_events.graph import GraphRunPausedEvent
 class SuspendLayer(GraphEngineLayer):
    """ """

+    def __init__(self) -> None:
+        super().__init__()
+        self._paused = False
+
    def on_graph_start(self):
-        pass
+        self._paused = False

    def on_event(self, event: GraphEngineEvent):
        """
        Handle the paused event, stash runtime state into storage and wait for resume.
        """
        if isinstance(event, GraphRunPausedEvent):
-            pass
+            self._paused = True

    def on_graph_end(self, error: Exception | None):
        """ """
-        pass
+        self._paused = False
+
+    def is_paused(self) -> bool:
+        return self._paused
--- a/api/core/app/task_pipeline/message_cycle_manager.py
+++ b/api/core/app/task_pipeline/message_cycle_manager.py
@@ -34,6 +34,7 @@ from core.llm_generator.llm_generator import LLMGenerator
 from core.tools.signature import sign_tool_file
 from extensions.ext_database import db
 from extensions.ext_redis import redis_client
+from models.enums import MessageFileBelongsTo
 from models.model import AppMode, Conversation, MessageAnnotation, MessageFile
 from services.annotation_service import AppAnnotationService

@@ -233,7 +234,7 @@ class MessageCycleManager:
                task_id=self._application_generate_entity.task_id,
                id=message_file.id,
                type=message_file.type,
-                belongs_to=message_file.belongs_to or "user",
+                belongs_to=message_file.belongs_to or MessageFileBelongsTo.USER,
                url=url,
            )

--- a/api/core/app/workflow/layers/persistence.py
+++ b/api/core/app/workflow/layers/persistence.py
@@ -128,14 +128,14 @@ class WorkflowPersistenceLayer(GraphEngineLayer):
            self._handle_graph_run_paused(event)
            return

-        if isinstance(event, NodeRunStartedEvent):
-            self._handle_node_started(event)
-            return
-
        if isinstance(event, NodeRunRetryEvent):
            self._handle_node_retry(event)
            return

+        if isinstance(event, NodeRunStartedEvent):
+            self._handle_node_started(event)
+            return
+
        if isinstance(event, NodeRunSucceededEvent):
            self._handle_node_succeeded(event)
            return
@@ -268,7 +268,12 @@ class WorkflowPersistenceLayer(GraphEngineLayer):

    def _handle_node_succeeded(self, event: NodeRunSucceededEvent) -> None:
        domain_execution = self._get_node_execution(event.id)
-        self._update_node_execution(domain_execution, event.node_run_result, WorkflowNodeExecutionStatus.SUCCEEDED)
+        self._update_node_execution(
+            domain_execution,
+            event.node_run_result,
+            WorkflowNodeExecutionStatus.SUCCEEDED,
+            finished_at=event.finished_at,
+        )

    def _handle_node_failed(self, event: NodeRunFailedEvent) -> None:
        domain_execution = self._get_node_execution(event.id)
@@ -277,6 +282,7 @@ class WorkflowPersistenceLayer(GraphEngineLayer):
            event.node_run_result,
            WorkflowNodeExecutionStatus.FAILED,
            error=event.error,
+            finished_at=event.finished_at,
        )

    def _handle_node_exception(self, event: NodeRunExceptionEvent) -> None:
@@ -286,6 +292,7 @@ class WorkflowPersistenceLayer(GraphEngineLayer):
            event.node_run_result,
            WorkflowNodeExecutionStatus.EXCEPTION,
            error=event.error,
+            finished_at=event.finished_at,
        )

    def _handle_node_pause_requested(self, event: NodeRunPauseRequestedEvent) -> None:
@@ -352,13 +359,14 @@ class WorkflowPersistenceLayer(GraphEngineLayer):
        *,
        error: str | None = None,
        update_outputs: bool = True,
+        finished_at: datetime | None = None,
    ) -> None:
-        finished_at = naive_utc_now()
+        actual_finished_at = finished_at or naive_utc_now()
        snapshot = self._node_snapshots.get(domain_execution.id)
        start_at = snapshot.created_at if snapshot else domain_execution.created_at
        domain_execution.status = status
-        domain_execution.finished_at = finished_at
-        domain_execution.elapsed_time = max((finished_at - start_at).total_seconds(), 0.0)
+        domain_execution.finished_at = actual_finished_at
+        domain_execution.elapsed_time = max((actual_finished_at - start_at).total_seconds(), 0.0)

        if error:
            domain_execution.error = error
--- a/api/core/datasource/datasource_file_manager.py
+++ b/api/core/datasource/datasource_file_manager.py
@@ -15,6 +15,7 @@ from configs import dify_config
 from core.helper import ssrf_proxy
 from extensions.ext_database import db
 from extensions.ext_storage import storage
+from extensions.storage.storage_type import StorageType
 from models.enums import CreatorUserRole
 from models.model import MessageFile, UploadFile
 from models.tools import ToolFile
@@ -81,7 +82,7 @@ class DatasourceFileManager:

        upload_file = UploadFile(
            tenant_id=tenant_id,
-            storage_type=dify_config.STORAGE_TYPE,
+            storage_type=StorageType(dify_config.STORAGE_TYPE),
            key=filepath,
            name=present_filename,
            size=len(file_binary),
--- a/api/core/entities/provider_configuration.py
+++ b/api/core/entities/provider_configuration.py
@@ -30,6 +30,7 @@ from dify_graph.model_runtime.model_providers.__base.ai_model import AIModel
 from dify_graph.model_runtime.model_providers.model_provider_factory import ModelProviderFactory
 from libs.datetime_utils import naive_utc_now
 from models.engine import db
+from models.enums import CredentialSourceType
 from models.provider import (
    LoadBalancingModelConfig,
    Provider,
@@ -546,7 +547,7 @@ class ProviderConfiguration(BaseModel):
                self._update_load_balancing_configs_with_credential(
                    credential_id=credential_id,
                    credential_record=credential_record,
-                    credential_source="provider",
+                    credential_source=CredentialSourceType.PROVIDER,
                    session=session,
                )
            except Exception:
@@ -623,7 +624,7 @@ class ProviderConfiguration(BaseModel):
                LoadBalancingModelConfig.tenant_id == self.tenant_id,
                LoadBalancingModelConfig.provider_name.in_(self._get_provider_names()),
                LoadBalancingModelConfig.credential_id == credential_id,
-                LoadBalancingModelConfig.credential_source_type == "provider",
+                LoadBalancingModelConfig.credential_source_type == CredentialSourceType.PROVIDER,
            )
            lb_configs_using_credential = session.execute(lb_stmt).scalars().all()
            try:
@@ -1043,7 +1044,7 @@ class ProviderConfiguration(BaseModel):
                self._update_load_balancing_configs_with_credential(
                    credential_id=credential_id,
                    credential_record=credential_record,
-                    credential_source="custom_model",
+                    credential_source=CredentialSourceType.CUSTOM_MODEL,
                    session=session,
                )
            except Exception:
@@ -1073,7 +1074,7 @@ class ProviderConfiguration(BaseModel):
                LoadBalancingModelConfig.tenant_id == self.tenant_id,
                LoadBalancingModelConfig.provider_name.in_(self._get_provider_names()),
                LoadBalancingModelConfig.credential_id == credential_id,
-                LoadBalancingModelConfig.credential_source_type == "custom_model",
+                LoadBalancingModelConfig.credential_source_type == CredentialSourceType.CUSTOM_MODEL,
            )
            lb_configs_using_credential = session.execute(lb_stmt).scalars().all()

@@ -1421,12 +1422,12 @@ class ProviderConfiguration(BaseModel):
            preferred_model_provider = s.execute(stmt).scalars().first()

            if preferred_model_provider:
-                preferred_model_provider.preferred_provider_type = provider_type.value
+                preferred_model_provider.preferred_provider_type = provider_type
            else:
                preferred_model_provider = TenantPreferredModelProvider(
                    tenant_id=self.tenant_id,
                    provider_name=self.provider.provider,
-                    preferred_provider_type=provider_type.value,
+                    preferred_provider_type=provider_type,
                )
                s.add(preferred_model_provider)
            s.commit()
@@ -1711,7 +1712,7 @@ class ProviderConfiguration(BaseModel):
                    provider_model_lb_configs = [
                        config
                        for config in model_setting.load_balancing_configs
-                        if config.credential_source_type != "custom_model"
+                        if config.credential_source_type != CredentialSourceType.CUSTOM_MODEL
                    ]

                    load_balancing_enabled = model_setting.load_balancing_enabled
@@ -1769,7 +1770,7 @@ class ProviderConfiguration(BaseModel):
                custom_model_lb_configs = [
                    config
                    for config in model_setting.load_balancing_configs
-                    if config.credential_source_type != "provider"
+                    if config.credential_source_type != CredentialSourceType.PROVIDER
                ]

                load_balancing_enabled = model_setting.load_balancing_enabled
--- a/api/core/indexing_runner.py
+++ b/api/core/indexing_runner.py
@@ -21,7 +21,7 @@ from core.rag.datasource.keyword.keyword_factory import Keyword
 from core.rag.docstore.dataset_docstore import DatasetDocumentStore
 from core.rag.extractor.entity.datasource_type import DatasourceType
 from core.rag.extractor.entity.extract_setting import ExtractSetting, NotionInfo, WebsiteInfo
-from core.rag.index_processor.constant.index_type import IndexStructureType
+from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
 from core.rag.index_processor.index_processor_base import BaseIndexProcessor
 from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
 from core.rag.models.document import ChildDocument, Document
@@ -271,7 +271,7 @@ class IndexingRunner:
        doc_form: str | None = None,
        doc_language: str = "English",
        dataset_id: str | None = None,
-        indexing_technique: str = "economy",
+        indexing_technique: str = IndexTechniqueType.ECONOMY,
    ) -> IndexingEstimate:
        """
        Estimate the indexing for the document.
@@ -289,7 +289,7 @@ class IndexingRunner:
            dataset = db.session.query(Dataset).filter_by(id=dataset_id).first()
            if not dataset:
                raise ValueError("Dataset not found.")
-            if dataset.indexing_technique == "high_quality" or indexing_technique == "high_quality":
+            if IndexTechniqueType.HIGH_QUALITY in {dataset.indexing_technique, indexing_technique}:
                if dataset.embedding_model_provider:
                    embedding_model_instance = self.model_manager.get_model_instance(
                        tenant_id=tenant_id,
@@ -303,7 +303,7 @@ class IndexingRunner:
                        model_type=ModelType.TEXT_EMBEDDING,
                    )
        else:
-            if indexing_technique == "high_quality":
+            if indexing_technique == IndexTechniqueType.HIGH_QUALITY:
                embedding_model_instance = self.model_manager.get_default_model_instance(
                    tenant_id=tenant_id,
                    model_type=ModelType.TEXT_EMBEDDING,
@@ -573,7 +573,7 @@ class IndexingRunner:
        """

        embedding_model_instance = None
-        if dataset.indexing_technique == "high_quality":
+        if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
            embedding_model_instance = self.model_manager.get_model_instance(
                tenant_id=dataset.tenant_id,
                provider=dataset.embedding_model_provider,
@@ -587,7 +587,7 @@ class IndexingRunner:
        create_keyword_thread = None
        if (
            dataset_document.doc_form != IndexStructureType.PARENT_CHILD_INDEX
-            and dataset.indexing_technique == "economy"
+            and dataset.indexing_technique == IndexTechniqueType.ECONOMY
        ):
            # create keyword index
            create_keyword_thread = threading.Thread(
@@ -597,7 +597,7 @@ class IndexingRunner:
            create_keyword_thread.start()

        max_workers = 10
-        if dataset.indexing_technique == "high_quality":
+        if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
            with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
                futures = []

@@ -628,7 +628,7 @@ class IndexingRunner:
                    tokens += future.result()
        if (
            dataset_document.doc_form != IndexStructureType.PARENT_CHILD_INDEX
-            and dataset.indexing_technique == "economy"
+            and dataset.indexing_technique == IndexTechniqueType.ECONOMY
            and create_keyword_thread is not None
        ):
            create_keyword_thread.join()
@@ -654,7 +654,7 @@ class IndexingRunner:
                raise ValueError("no dataset found")
            keyword = Keyword(dataset)
            keyword.create(documents)
-            if dataset.indexing_technique != "high_quality":
+            if dataset.indexing_technique != IndexTechniqueType.HIGH_QUALITY:
                document_ids = [document.metadata["doc_id"] for document in documents]
                db.session.query(DocumentSegment).where(
                    DocumentSegment.document_id == document_id,
@@ -764,7 +764,7 @@ class IndexingRunner:
    ) -> list[Document]:
        # get embedding model instance
        embedding_model_instance = None
-        if dataset.indexing_technique == "high_quality":
+        if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
            if dataset.embedding_model_provider:
                embedding_model_instance = self.model_manager.get_model_instance(
                    tenant_id=dataset.tenant_id,
--- a/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py
+++ b/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py
@@ -181,10 +181,6 @@ class ArizePhoenixDataTrace(BaseTraceInstance):
        arize_phoenix_config: ArizeConfig | PhoenixConfig,
    ):
        super().__init__(arize_phoenix_config)
-        import logging
-
-        logging.basicConfig()
-        logging.getLogger().setLevel(logging.DEBUG)
        self.arize_phoenix_config = arize_phoenix_config
        self.tracer, self.processor = setup_tracer(arize_phoenix_config)
        self.project = arize_phoenix_config.project
--- a/api/core/ops/weave_trace/entities/weave_trace_entity.py
+++ b/api/core/ops/weave_trace/entities/weave_trace_entity.py
@@ -67,7 +67,8 @@ class WeaveTraceModel(WeaveTokenUsage, WeaveMultiModel):
                if field_name == "inputs":
                    data = {
                        "messages": [
-                            dict(msg, **{"usage_metadata": usage_metadata, "file_list": file_list}) for msg in v
+                            dict(msg, **{"usage_metadata": usage_metadata, "file_list": file_list})  # type: ignore
+                            for msg in v
                        ]
                        if isinstance(v, list)
                        else v,
--- a/api/core/plugin/impl/plugin.py
+++ b/api/core/plugin/impl/plugin.py
@@ -209,8 +209,7 @@ class PluginInstaller(BasePluginClient):
            "GET",
            f"plugin/{tenant_id}/management/decode/from_identifier",
            PluginDecodeResponse,
-            data={"plugin_unique_identifier": plugin_unique_identifier},
-            headers={"Content-Type": "application/json"},
+            params={"plugin_unique_identifier": plugin_unique_identifier},
        )

    def fetch_plugin_installation_by_ids(
--- a/api/core/provider_manager.py
+++ b/api/core/provider_manager.py
@@ -195,7 +195,7 @@ class ProviderManager:
            preferred_provider_type_record = provider_name_to_preferred_model_provider_records_dict.get(provider_name)

            if preferred_provider_type_record:
-                preferred_provider_type = ProviderType.value_of(preferred_provider_type_record.preferred_provider_type)
+                preferred_provider_type = preferred_provider_type_record.preferred_provider_type
            elif dify_config.EDITION == "CLOUD" and system_configuration.enabled:
                preferred_provider_type = ProviderType.SYSTEM
            elif custom_configuration.provider or custom_configuration.models:
@@ -918,11 +918,11 @@ class ProviderManager:

            trail_pool = CreditPoolService.get_pool(
                tenant_id=tenant_id,
-                pool_type=ProviderQuotaType.TRIAL.value,
+                pool_type=ProviderQuotaType.TRIAL,
            )
            paid_pool = CreditPoolService.get_pool(
                tenant_id=tenant_id,
-                pool_type=ProviderQuotaType.PAID.value,
+                pool_type=ProviderQuotaType.PAID,
            )
        else:
            trail_pool = None
--- a/api/core/rag/cleaner/clean_processor.py
+++ b/api/core/rag/cleaner/clean_processor.py
@@ -1,9 +1,10 @@
 import re
+from typing import Any


 class CleanProcessor:
    @classmethod
-    def clean(cls, text: str, process_rule: dict) -> str:
+    def clean(cls, text: str, process_rule: dict[str, Any] | None) -> str:
        # default clean
        # remove invalid symbol
        text = re.sub(r"<\|", "<", text)
--- a/api/core/rag/data_post_processor/data_post_processor.py
+++ b/api/core/rag/data_post_processor/data_post_processor.py
@@ -1,3 +1,5 @@
+from typing_extensions import TypedDict
+
 from core.model_manager import ModelInstance, ModelManager
 from core.rag.data_post_processor.reorder import ReorderRunner
 from core.rag.index_processor.constant.query_type import QueryType
@@ -10,6 +12,26 @@ from dify_graph.model_runtime.entities.model_entities import ModelType
 from dify_graph.model_runtime.errors.invoke import InvokeAuthorizationError


+class RerankingModelDict(TypedDict):
+    reranking_provider_name: str
+    reranking_model_name: str
+
+
+class VectorSettingDict(TypedDict):
+    vector_weight: float
+    embedding_provider_name: str
+    embedding_model_name: str
+
+
+class KeywordSettingDict(TypedDict):
+    keyword_weight: float
+
+
+class WeightsDict(TypedDict):
+    vector_setting: VectorSettingDict
+    keyword_setting: KeywordSettingDict
+
+
 class DataPostProcessor:
    """Interface for data post-processing document."""

@@ -17,8 +39,8 @@ class DataPostProcessor:
        self,
        tenant_id: str,
        reranking_mode: str,
-        reranking_model: dict | None = None,
-        weights: dict | None = None,
+        reranking_model: RerankingModelDict | None = None,
+        weights: WeightsDict | None = None,
        reorder_enabled: bool = False,
    ):
        self.rerank_runner = self._get_rerank_runner(reranking_mode, tenant_id, reranking_model, weights)
@@ -45,8 +67,8 @@ class DataPostProcessor:
        self,
        reranking_mode: str,
        tenant_id: str,
-        reranking_model: dict | None = None,
-        weights: dict | None = None,
+        reranking_model: RerankingModelDict | None = None,
+        weights: WeightsDict | None = None,
    ) -> BaseRerankRunner | None:
        if reranking_mode == RerankMode.WEIGHTED_SCORE and weights:
            runner = RerankRunnerFactory.create_rerank_runner(
@@ -79,12 +101,14 @@ class DataPostProcessor:
            return ReorderRunner()
        return None

-    def _get_rerank_model_instance(self, tenant_id: str, reranking_model: dict | None) -> ModelInstance | None:
+    def _get_rerank_model_instance(
+        self, tenant_id: str, reranking_model: RerankingModelDict | None
+    ) -> ModelInstance | None:
        if reranking_model:
            try:
                model_manager = ModelManager()
-                reranking_provider_name = reranking_model.get("reranking_provider_name")
-                reranking_model_name = reranking_model.get("reranking_model_name")
+                reranking_provider_name = reranking_model["reranking_provider_name"]
+                reranking_model_name = reranking_model["reranking_model_name"]
                if not reranking_provider_name or not reranking_model_name:
                    return None
                rerank_model_instance = model_manager.get_model_instance(
--- a/api/core/rag/datasource/keyword/jieba/jieba.py
+++ b/api/core/rag/datasource/keyword/jieba/jieba.py
@@ -4,6 +4,7 @@ from typing import Any
 import orjson
 from pydantic import BaseModel
 from sqlalchemy import select
+from typing_extensions import TypedDict

 from configs import dify_config
 from core.rag.datasource.keyword.jieba.jieba_keyword_table_handler import JiebaKeywordTableHandler
@@ -15,6 +16,11 @@ from extensions.ext_storage import storage
 from models.dataset import Dataset, DatasetKeywordTable, DocumentSegment


+class PreSegmentData(TypedDict):
+    segment: DocumentSegment
+    keywords: list[str]
+
+
 class KeywordTableConfig(BaseModel):
    max_keywords_per_chunk: int = 10

@@ -128,7 +134,7 @@ class Jieba(BaseKeyword):
                    file_key = "keyword_files/" + self.dataset.tenant_id + "/" + self.dataset.id + ".txt"
                    storage.delete(file_key)

-    def _save_dataset_keyword_table(self, keyword_table):
+    def _save_dataset_keyword_table(self, keyword_table: dict[str, set[str]] | None):
        keyword_table_dict = {
            "__type__": "keyword_table",
            "__data__": {"index_id": self.dataset.id, "summary": None, "table": keyword_table},
@@ -144,7 +150,7 @@ class Jieba(BaseKeyword):
                storage.delete(file_key)
            storage.save(file_key, dumps_with_sets(keyword_table_dict).encode("utf-8"))

-    def _get_dataset_keyword_table(self) -> dict | None:
+    def _get_dataset_keyword_table(self) -> dict[str, set[str]] | None:
        dataset_keyword_table = self.dataset.dataset_keyword_table
        if dataset_keyword_table:
            keyword_table_dict = dataset_keyword_table.keyword_table_dict
@@ -169,14 +175,16 @@ class Jieba(BaseKeyword):

        return {}

-    def _add_text_to_keyword_table(self, keyword_table: dict, id: str, keywords: list[str]):
+    def _add_text_to_keyword_table(
+        self, keyword_table: dict[str, set[str]], id: str, keywords: list[str]
+    ) -> dict[str, set[str]]:
        for keyword in keywords:
            if keyword not in keyword_table:
                keyword_table[keyword] = set()
            keyword_table[keyword].add(id)
        return keyword_table

-    def _delete_ids_from_keyword_table(self, keyword_table: dict, ids: list[str]):
+    def _delete_ids_from_keyword_table(self, keyword_table: dict[str, set[str]], ids: list[str]) -> dict[str, set[str]]:
        # get set of ids that correspond to node
        node_idxs_to_delete = set(ids)

@@ -193,7 +201,7 @@ class Jieba(BaseKeyword):

        return keyword_table

-    def _retrieve_ids_by_query(self, keyword_table: dict, query: str, k: int = 4):
+    def _retrieve_ids_by_query(self, keyword_table: dict[str, set[str]], query: str, k: int = 4) -> list[str]:
        keyword_table_handler = JiebaKeywordTableHandler()
        keywords = keyword_table_handler.extract_keywords(query)

@@ -228,7 +236,7 @@ class Jieba(BaseKeyword):
        keyword_table = self._add_text_to_keyword_table(keyword_table or {}, node_id, keywords)
        self._save_dataset_keyword_table(keyword_table)

-    def multi_create_segment_keywords(self, pre_segment_data_list: list):
+    def multi_create_segment_keywords(self, pre_segment_data_list: list[PreSegmentData]):
        keyword_table_handler = JiebaKeywordTableHandler()
        keyword_table = self._get_dataset_keyword_table()
        for pre_segment_data in pre_segment_data_list:
--- a/api/core/rag/datasource/retrieval_service.py
+++ b/api/core/rag/datasource/retrieval_service.py
@@ -1,19 +1,20 @@
 import concurrent.futures
 import logging
 from concurrent.futures import ThreadPoolExecutor
-from typing import Any
+from typing import Any, NotRequired

 from flask import Flask, current_app
 from sqlalchemy import select
 from sqlalchemy.orm import Session, load_only
+from typing_extensions import TypedDict

 from configs import dify_config
 from core.db.session_factory import session_factory
 from core.model_manager import ModelManager
-from core.rag.data_post_processor.data_post_processor import DataPostProcessor
+from core.rag.data_post_processor.data_post_processor import DataPostProcessor, RerankingModelDict, WeightsDict
 from core.rag.datasource.keyword.keyword_factory import Keyword
 from core.rag.datasource.vdb.vector_factory import Vector
-from core.rag.embedding.retrieval import RetrievalChildChunk, RetrievalSegments
+from core.rag.embedding.retrieval import AttachmentInfoDict, RetrievalChildChunk, RetrievalSegments
 from core.rag.entities.metadata_entities import MetadataCondition
 from core.rag.index_processor.constant.doc_type import DocType
 from core.rag.index_processor.constant.index_type import IndexStructureType
@@ -35,7 +36,49 @@ from models.dataset import Document as DatasetDocument
 from models.model import UploadFile
 from services.external_knowledge_service import ExternalDatasetService

-default_retrieval_model = {
+
+class SegmentAttachmentResult(TypedDict):
+    attachment_info: AttachmentInfoDict
+    segment_id: str
+
+
+class SegmentAttachmentInfoResult(TypedDict):
+    attachment_id: str
+    attachment_info: AttachmentInfoDict
+    segment_id: str
+
+
+class ChildChunkDetail(TypedDict):
+    id: str
+    content: str
+    position: int
+    score: float
+
+
+class SegmentChildMapDetail(TypedDict):
+    max_score: float
+    child_chunks: list[ChildChunkDetail]
+
+
+class SegmentRecord(TypedDict):
+    segment: DocumentSegment
+    score: NotRequired[float]
+    child_chunks: NotRequired[list[ChildChunkDetail]]
+    files: NotRequired[list[AttachmentInfoDict]]
+
+
+class DefaultRetrievalModelDict(TypedDict):
+    search_method: RetrievalMethod
+    reranking_enable: bool
+    reranking_model: RerankingModelDict
+    reranking_mode: NotRequired[str]
+    weights: NotRequired[WeightsDict | None]
+    score_threshold: NotRequired[float]
+    top_k: int
+    score_threshold_enabled: bool
+
+
+default_retrieval_model: DefaultRetrievalModelDict = {
    "search_method": RetrievalMethod.SEMANTIC_SEARCH,
    "reranking_enable": False,
    "reranking_model": {"reranking_provider_name": "", "reranking_model_name": ""},
@@ -56,11 +99,11 @@ class RetrievalService:
        query: str,
        top_k: int = 4,
        score_threshold: float | None = 0.0,
-        reranking_model: dict | None = None,
+        reranking_model: RerankingModelDict | None = None,
        reranking_mode: str = "reranking_model",
-        weights: dict | None = None,
+        weights: WeightsDict | None = None,
        document_ids_filter: list[str] | None = None,
-        attachment_ids: list | None = None,
+        attachment_ids: list[str] | None = None,
    ):
        if not query and not attachment_ids:
            return []
@@ -207,8 +250,8 @@ class RetrievalService:
        dataset_id: str,
        query: str,
        top_k: int,
-        all_documents: list,
-        exceptions: list,
+        all_documents: list[Document],
+        exceptions: list[str],
        document_ids_filter: list[str] | None = None,
    ):
        with flask_app.app_context():
@@ -235,10 +278,10 @@ class RetrievalService:
        query: str,
        top_k: int,
        score_threshold: float | None,
-        reranking_model: dict | None,
-        all_documents: list,
+        reranking_model: RerankingModelDict | None,
+        all_documents: list[Document],
        retrieval_method: RetrievalMethod,
-        exceptions: list,
+        exceptions: list[str],
        document_ids_filter: list[str] | None = None,
        query_type: QueryType = QueryType.TEXT_QUERY,
    ):
@@ -277,8 +320,8 @@ class RetrievalService:
                if documents:
                    if (
                        reranking_model
-                        and reranking_model.get("reranking_model_name")
-                        and reranking_model.get("reranking_provider_name")
+                        and reranking_model["reranking_model_name"]
+                        and reranking_model["reranking_provider_name"]
                        and retrieval_method == RetrievalMethod.SEMANTIC_SEARCH
                    ):
                        data_post_processor = DataPostProcessor(
@@ -288,8 +331,8 @@ class RetrievalService:
                            model_manager = ModelManager()
                            is_support_vision = model_manager.check_model_support_vision(
                                tenant_id=dataset.tenant_id,
-                                provider=reranking_model.get("reranking_provider_name") or "",
-                                model=reranking_model.get("reranking_model_name") or "",
+                                provider=reranking_model["reranking_provider_name"],
+                                model=reranking_model["reranking_model_name"],
                                model_type=ModelType.RERANK,
                            )
                            if is_support_vision:
@@ -329,10 +372,10 @@ class RetrievalService:
        query: str,
        top_k: int,
        score_threshold: float | None,
-        reranking_model: dict | None,
-        all_documents: list,
+        reranking_model: RerankingModelDict | None,
+        all_documents: list[Document],
        retrieval_method: str,
-        exceptions: list,
+        exceptions: list[str],
        document_ids_filter: list[str] | None = None,
    ):
        with flask_app.app_context():
@@ -349,8 +392,8 @@ class RetrievalService:
                if documents:
                    if (
                        reranking_model
-                        and reranking_model.get("reranking_model_name")
-                        and reranking_model.get("reranking_provider_name")
+                        and reranking_model["reranking_model_name"]
+                        and reranking_model["reranking_provider_name"]
                        and retrieval_method == RetrievalMethod.FULL_TEXT_SEARCH
                    ):
                        data_post_processor = DataPostProcessor(
@@ -459,7 +502,7 @@ class RetrievalService:
            segment_ids: list[str] = []
            index_node_segments: list[DocumentSegment] = []
            segments: list[DocumentSegment] = []
-            attachment_map: dict[str, list[dict[str, Any]]] = {}
+            attachment_map: dict[str, list[AttachmentInfoDict]] = {}
            child_chunk_map: dict[str, list[ChildChunk]] = {}
            doc_segment_map: dict[str, list[str]] = {}
            segment_summary_map: dict[str, str] = {}  # Map segment_id to summary content
@@ -544,12 +587,12 @@ class RetrievalService:
                            segment_summary_map[summary.chunk_id] = summary.summary_content

            include_segment_ids = set()
-            segment_child_map: dict[str, dict[str, Any]] = {}
-            records: list[dict[str, Any]] = []
+            segment_child_map: dict[str, SegmentChildMapDetail] = {}
+            records: list[SegmentRecord] = []

            for segment in segments:
                child_chunks: list[ChildChunk] = child_chunk_map.get(segment.id, [])
-                attachment_infos: list[dict[str, Any]] = attachment_map.get(segment.id, [])
+                attachment_infos: list[AttachmentInfoDict] = attachment_map.get(segment.id, [])
                ds_dataset_document: DatasetDocument | None = valid_dataset_documents.get(segment.document_id)

                if ds_dataset_document and ds_dataset_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX:
@@ -560,14 +603,14 @@ class RetrievalService:
                        max_score = summary_score_map.get(segment.id, 0.0)

                        if child_chunks or attachment_infos:
-                            child_chunk_details = []
+                            child_chunk_details: list[ChildChunkDetail] = []
                            for child_chunk in child_chunks:
                                child_document: Document | None = doc_to_document_map.get(child_chunk.index_node_id)
                                if child_document:
                                    child_score = child_document.metadata.get("score", 0.0)
                                else:
                                    child_score = 0.0
-                                child_chunk_detail = {
+                                child_chunk_detail: ChildChunkDetail = {
                                    "id": child_chunk.id,
                                    "content": child_chunk.content,
                                    "position": child_chunk.position,
@@ -580,7 +623,7 @@ class RetrievalService:
                                if file_document:
                                    max_score = max(max_score, file_document.metadata.get("score", 0.0))

-                            map_detail = {
+                            map_detail: SegmentChildMapDetail = {
                                "max_score": max_score,
                                "child_chunks": child_chunk_details,
                            }
@@ -593,7 +636,7 @@ class RetrievalService:
                                    "max_score": summary_score,
                                    "child_chunks": [],
                                }
-                        record: dict[str, Any] = {
+                        record: SegmentRecord = {
                            "segment": segment,
                        }
                        records.append(record)
@@ -617,19 +660,19 @@ class RetrievalService:
                            if file_doc:
                                max_score = max(max_score, file_doc.metadata.get("score", 0.0))

-                        record = {
+                        another_record: SegmentRecord = {
                            "segment": segment,
                            "score": max_score,
                        }
-                        records.append(record)
+                        records.append(another_record)

            # Add child chunks information to records
            for record in records:
                if record["segment"].id in segment_child_map:
-                    record["child_chunks"] = segment_child_map[record["segment"].id].get("child_chunks")  # type: ignore
-                    record["score"] = segment_child_map[record["segment"].id]["max_score"]  # type: ignore
+                    record["child_chunks"] = segment_child_map[record["segment"].id]["child_chunks"]
+                    record["score"] = segment_child_map[record["segment"].id]["max_score"]
                if record["segment"].id in attachment_map:
-                    record["files"] = attachment_map[record["segment"].id]  # type: ignore[assignment]
+                    record["files"] = attachment_map[record["segment"].id]

            result: list[RetrievalSegments] = []
            for record in records:
@@ -693,9 +736,9 @@ class RetrievalService:
        query: str | None = None,
        top_k: int = 4,
        score_threshold: float | None = 0.0,
-        reranking_model: dict | None = None,
+        reranking_model: RerankingModelDict | None = None,
        reranking_mode: str = "reranking_model",
-        weights: dict | None = None,
+        weights: WeightsDict | None = None,
        document_ids_filter: list[str] | None = None,
        attachment_id: str | None = None,
    ):
@@ -807,7 +850,7 @@ class RetrievalService:
    @classmethod
    def get_segment_attachment_info(
        cls, dataset_id: str, tenant_id: str, attachment_id: str, session: Session
-    ) -> dict[str, Any] | None:
+    ) -> SegmentAttachmentResult | None:
        upload_file = session.query(UploadFile).where(UploadFile.id == attachment_id).first()
        if upload_file:
            attachment_binding = (
@@ -816,7 +859,7 @@ class RetrievalService:
                .first()
            )
            if attachment_binding:
-                attachment_info = {
+                attachment_info: AttachmentInfoDict = {
                    "id": upload_file.id,
                    "name": upload_file.name,
                    "extension": "." + upload_file.extension,
@@ -828,8 +871,10 @@ class RetrievalService:
        return None

    @classmethod
-    def get_segment_attachment_infos(cls, attachment_ids: list[str], session: Session) -> list[dict[str, Any]]:
-        attachment_infos = []
+    def get_segment_attachment_infos(
+        cls, attachment_ids: list[str], session: Session
+    ) -> list[SegmentAttachmentInfoResult]:
+        attachment_infos: list[SegmentAttachmentInfoResult] = []
        upload_files = session.query(UploadFile).where(UploadFile.id.in_(attachment_ids)).all()
        if upload_files:
            upload_file_ids = [upload_file.id for upload_file in upload_files]
@@ -843,7 +888,7 @@ class RetrievalService:
            if attachment_bindings:
                for upload_file in upload_files:
                    attachment_binding = attachment_binding_map.get(upload_file.id)
-                    attachment_info = {
+                    info: AttachmentInfoDict = {
                        "id": upload_file.id,
                        "name": upload_file.name,
                        "extension": "." + upload_file.extension,
@@ -855,7 +900,7 @@ class RetrievalService:
                        attachment_infos.append(
                            {
                                "attachment_id": attachment_binding.attachment_id,
-                                "attachment_info": attachment_info,
+                                "attachment_info": info,
                                "segment_id": attachment_binding.segment_id,
                            }
                        )
--- a/api/core/rag/datasource/vdb/baidu/baidu_vector.py
+++ b/api/core/rag/datasource/vdb/baidu/baidu_vector.py
@@ -13,6 +13,7 @@ from pymochow.exception import ServerError  # type: ignore
 from pymochow.model.database import Database
 from pymochow.model.enum import FieldType, IndexState, IndexType, MetricType, ServerErrCode, TableState  # type: ignore
 from pymochow.model.schema import (
+    AutoBuildRowCountIncrement,
    Field,
    FilteringIndex,
    HNSWParams,
@@ -51,6 +52,9 @@ class BaiduConfig(BaseModel):
    replicas: int = 3
    inverted_index_analyzer: str = "DEFAULT_ANALYZER"
    inverted_index_parser_mode: str = "COARSE_MODE"
+    auto_build_row_count_increment: int = 500
+    auto_build_row_count_increment_ratio: float = 0.05
+    rebuild_index_timeout_in_seconds: int = 300

    @model_validator(mode="before")
    @classmethod
@@ -107,18 +111,6 @@ class BaiduVector(BaseVector):
                rows.append(row)
            table.upsert(rows=rows)

-        # rebuild vector index after upsert finished
-        table.rebuild_index(self.vector_index)
-        timeout = 3600  # 1 hour timeout
-        start_time = time.time()
-        while True:
-            time.sleep(1)
-            index = table.describe_index(self.vector_index)
-            if index.state == IndexState.NORMAL:
-                break
-            if time.time() - start_time > timeout:
-                raise TimeoutError(f"Index rebuild timeout after {timeout} seconds")
-
    def text_exists(self, id: str) -> bool:
        res = self._db.table(self._collection_name).query(primary_key={VDBField.PRIMARY_KEY: id})
        if res and res.code == 0:
@@ -232,8 +224,14 @@ class BaiduVector(BaseVector):
            return self._client.database(self._client_config.database)

    def _table_existed(self) -> bool:
-        tables = self._db.list_table()
-        return any(table.table_name == self._collection_name for table in tables)
+        try:
+            table = self._db.table(self._collection_name)
+        except ServerError as e:
+            if e.code == ServerErrCode.TABLE_NOT_EXIST:
+                return False
+            else:
+                raise
+        return True

    def _create_table(self, dimension: int):
        # Try to grab distributed lock and create table
@@ -287,6 +285,11 @@ class BaiduVector(BaseVector):
                    field=VDBField.VECTOR,
                    metric_type=metric_type,
                    params=HNSWParams(m=16, efconstruction=200),
+                    auto_build=True,
+                    auto_build_index_policy=AutoBuildRowCountIncrement(
+                        row_count_increment=self._client_config.auto_build_row_count_increment,
+                        row_count_increment_ratio=self._client_config.auto_build_row_count_increment_ratio,
+                    ),
                )
            )

@@ -335,7 +338,7 @@ class BaiduVector(BaseVector):
            )

            # Wait for table created
-            timeout = 300  # 5 minutes timeout
+            timeout = self._client_config.rebuild_index_timeout_in_seconds  # default 5 minutes timeout
            start_time = time.time()
            while True:
                time.sleep(1)
@@ -345,6 +348,20 @@ class BaiduVector(BaseVector):
                if time.time() - start_time > timeout:
                    raise TimeoutError(f"Table creation timeout after {timeout} seconds")
            redis_client.set(table_exist_cache_key, 1, ex=3600)
+            # rebuild vector index immediately after table created, make sure index is ready
+            table.rebuild_index(self.vector_index)
+            timeout = 3600  # 1 hour timeout
+            self._wait_for_index_ready(table, timeout)
+
+    def _wait_for_index_ready(self, table, timeout: int = 3600):
+        start_time = time.time()
+        while True:
+            time.sleep(1)
+            index = table.describe_index(self.vector_index)
+            if index.state == IndexState.NORMAL:
+                break
+            if time.time() - start_time > timeout:
+                raise TimeoutError(f"Index rebuild timeout after {timeout} seconds")


 class BaiduVectorFactory(AbstractVectorFactory):
@@ -369,5 +386,8 @@ class BaiduVectorFactory(AbstractVectorFactory):
                replicas=dify_config.BAIDU_VECTOR_DB_REPLICAS,
                inverted_index_analyzer=dify_config.BAIDU_VECTOR_DB_INVERTED_INDEX_ANALYZER,
                inverted_index_parser_mode=dify_config.BAIDU_VECTOR_DB_INVERTED_INDEX_PARSER_MODE,
+                auto_build_row_count_increment=dify_config.BAIDU_VECTOR_DB_AUTO_BUILD_ROW_COUNT_INCREMENT,
+                auto_build_row_count_increment_ratio=dify_config.BAIDU_VECTOR_DB_AUTO_BUILD_ROW_COUNT_INCREMENT_RATIO,
+                rebuild_index_timeout_in_seconds=dify_config.BAIDU_VECTOR_DB_REBUILD_INDEX_TIMEOUT_IN_SECONDS,
            ),
        )
--- a/api/core/rag/datasource/vdb/huawei/huawei_cloud_vector.py
+++ b/api/core/rag/datasource/vdb/huawei/huawei_cloud_vector.py
@@ -124,13 +124,13 @@ class HuaweiCloudVector(BaseVector):
                )
            )

+        score_threshold = float(kwargs.get("score_threshold") or 0.0)
        docs = []
        for doc, score in docs_and_scores:
-            score_threshold = float(kwargs.get("score_threshold") or 0.0)
            if score >= score_threshold:
                if doc.metadata is not None:
                    doc.metadata["score"] = score
-            docs.append(doc)
+                    docs.append(doc)

        return docs

--- a/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py
+++ b/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py
@@ -33,6 +33,7 @@ from core.rag.models.document import Document
 from extensions.ext_database import db
 from extensions.ext_redis import redis_client
 from models.dataset import Dataset, TidbAuthBinding
+from models.enums import TidbAuthBindingStatus

 if TYPE_CHECKING:
    from qdrant_client import grpc  # noqa
@@ -284,27 +285,29 @@ class TidbOnQdrantVector(BaseVector):
        from qdrant_client.http import models
        from qdrant_client.http.exceptions import UnexpectedResponse

-        for node_id in ids:
-            try:
-                filter = models.Filter(
-                    must=[
-                        models.FieldCondition(
-                            key="metadata.doc_id",
-                            match=models.MatchValue(value=node_id),
-                        ),
-                    ],
-                )
-                self._client.delete(
-                    collection_name=self._collection_name,
-                    points_selector=FilterSelector(filter=filter),
-                )
-            except UnexpectedResponse as e:
-                # Collection does not exist, so return
-                if e.status_code == 404:
-                    return
-                # Some other error occurred, so re-raise the exception
-                else:
-                    raise e
+        if not ids:
+            return
+
+        try:
+            filter = models.Filter(
+                must=[
+                    models.FieldCondition(
+                        key="metadata.doc_id",
+                        match=models.MatchAny(any=ids),
+                    ),
+                ],
+            )
+            self._client.delete(
+                collection_name=self._collection_name,
+                points_selector=FilterSelector(filter=filter),
+            )
+        except UnexpectedResponse as e:
+            # Collection does not exist, so return
+            if e.status_code == 404:
+                return
+            # Some other error occurred, so re-raise the exception
+            else:
+                raise e

    def text_exists(self, id: str) -> bool:
        all_collection_name = []
@@ -450,7 +453,7 @@ class TidbOnQdrantVectorFactory(AbstractVectorFactory):
                            password=new_cluster["password"],
                            tenant_id=dataset.tenant_id,
                            active=True,
-                            status="ACTIVE",
+                            status=TidbAuthBindingStatus.ACTIVE,
                        )
                        db.session.add(new_tidb_auth_binding)
                        db.session.commit()
--- a/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_service.py
+++ b/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_service.py
@@ -9,6 +9,7 @@ from configs import dify_config
 from extensions.ext_database import db
 from extensions.ext_redis import redis_client
 from models.dataset import TidbAuthBinding
+from models.enums import TidbAuthBindingStatus


 class TidbService:
@@ -170,7 +171,7 @@ class TidbService:
                userPrefix = item["userPrefix"]
                if state == "ACTIVE" and len(userPrefix) > 0:
                    cluster_info = tidb_serverless_list_map[item["clusterId"]]
-                    cluster_info.status = "ACTIVE"
+                    cluster_info.status = TidbAuthBindingStatus.ACTIVE
                    cluster_info.account = f"{userPrefix}.root"
                    db.session.add(cluster_info)
            db.session.commit()
--- a/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py
+++ b/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py
@@ -5,6 +5,7 @@ This module provides integration with Weaviate vector database for storing and r
 document embeddings used in retrieval-augmented generation workflows.
 """

+import atexit
 import datetime
 import json
 import logging
@@ -37,6 +38,32 @@ _weaviate_client: weaviate.WeaviateClient | None = None
 _weaviate_client_lock = threading.Lock()


+def _shutdown_weaviate_client() -> None:
+    """
+    Best-effort shutdown hook to close the module-level Weaviate client.
+
+    This is registered with atexit so that HTTP/gRPC resources are released
+    when the Python interpreter exits.
+    """
+    global _weaviate_client
+
+    # Ensure thread-safety when accessing the shared client instance
+    with _weaviate_client_lock:
+        client = _weaviate_client
+        _weaviate_client = None
+
+    if client is not None:
+        try:
+            client.close()
+        except Exception:
+            # Best-effort cleanup; log at debug level and ignore errors.
+            logger.debug("Failed to close Weaviate client during shutdown", exc_info=True)
+
+
+# Register the shutdown hook once per process.
+atexit.register(_shutdown_weaviate_client)
+
+
 class WeaviateConfig(BaseModel):
    """
    Configuration model for Weaviate connection settings.
@@ -85,18 +112,6 @@ class WeaviateVector(BaseVector):
        self._client = self._init_client(config)
        self._attributes = attributes

-    def __del__(self):
-        """
-        Destructor to properly close the Weaviate client connection.
-        Prevents connection leaks and resource warnings.
-        """
-        if hasattr(self, "_client") and self._client is not None:
-            try:
-                self._client.close()
-            except Exception as e:
-                # Ignore errors during cleanup as object is being destroyed
-                logger.warning("Error closing Weaviate client %s", e, exc_info=True)
-
    def _init_client(self, config: WeaviateConfig) -> weaviate.WeaviateClient:
        """
        Initializes and returns a connected Weaviate client.
--- a/api/core/rag/docstore/dataset_docstore.py
+++ b/api/core/rag/docstore/dataset_docstore.py
@@ -6,6 +6,7 @@ from typing import Any
 from sqlalchemy import func, select

 from core.model_manager import ModelManager
+from core.rag.index_processor.constant.index_type import IndexTechniqueType
 from core.rag.models.document import AttachmentDocument, Document
 from dify_graph.model_runtime.entities.model_entities import ModelType
 from extensions.ext_database import db
@@ -71,7 +72,7 @@ class DatasetDocumentStore:
        if max_position is None:
            max_position = 0
        embedding_model = None
-        if self._dataset.indexing_technique == "high_quality":
+        if self._dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
            model_manager = ModelManager()
            embedding_model = model_manager.get_model_instance(
                tenant_id=self._dataset.tenant_id,
--- a/api/core/rag/embedding/retrieval.py
+++ b/api/core/rag/embedding/retrieval.py
@@ -1,8 +1,18 @@
 from pydantic import BaseModel
+from typing_extensions import TypedDict

 from models.dataset import DocumentSegment


+class AttachmentInfoDict(TypedDict):
+    id: str
+    name: str
+    extension: str
+    mime_type: str
+    source_url: str
+    size: int
+
+
 class RetrievalChildChunk(BaseModel):
    """Retrieval segments."""

@@ -19,5 +29,5 @@ class RetrievalSegments(BaseModel):
    segment: DocumentSegment
    child_chunks: list[RetrievalChildChunk] | None = None
    score: float | None = None
-    files: list[dict[str, str | int]] | None = None
+    files: list[AttachmentInfoDict] | None = None
    summary: str | None = None  # Summary content if retrieved via summary index
--- a/api/core/rag/extractor/firecrawl/firecrawl_app.py
+++ b/api/core/rag/extractor/firecrawl/firecrawl_app.py
@@ -1,12 +1,38 @@
 import json
 import time
-from typing import Any, cast
+from typing import Any, NotRequired, cast

 import httpx
+from typing_extensions import TypedDict

 from extensions.ext_storage import storage


+class FirecrawlDocumentData(TypedDict):
+    title: str | None
+    description: str | None
+    source_url: str | None
+    markdown: str | None
+
+
+class CrawlStatusResponse(TypedDict):
+    status: str
+    total: int | None
+    current: int | None
+    data: list[FirecrawlDocumentData]
+
+
+class MapResponse(TypedDict):
+    success: bool
+    links: list[str]
+
+
+class SearchResponse(TypedDict):
+    success: bool
+    data: list[dict[str, Any]]
+    warning: NotRequired[str]
+
+
 class FirecrawlApp:
    def __init__(self, api_key=None, base_url=None):
        self.api_key = api_key
@@ -14,7 +40,7 @@ class FirecrawlApp:
        if self.api_key is None and self.base_url == "https://api.firecrawl.dev":
            raise ValueError("No API key provided")

-    def scrape_url(self, url, params=None) -> dict[str, Any]:
+    def scrape_url(self, url, params=None) -> FirecrawlDocumentData:
        # Documentation: https://docs.firecrawl.dev/api-reference/endpoint/scrape
        headers = self._prepare_headers()
        json_data = {
@@ -32,9 +58,7 @@ class FirecrawlApp:
            return self._extract_common_fields(data)
        elif response.status_code in {402, 409, 500, 429, 408}:
            self._handle_error(response, "scrape URL")
-            return {}  # Avoid additional exception after handling error
-        else:
-            raise Exception(f"Failed to scrape URL. Status code: {response.status_code}")
+        raise Exception(f"Failed to scrape URL. Status code: {response.status_code}")

    def crawl_url(self, url, params=None) -> str:
        # Documentation: https://docs.firecrawl.dev/api-reference/endpoint/crawl-post
@@ -51,7 +75,7 @@ class FirecrawlApp:
            self._handle_error(response, "start crawl job")
            return ""  # unreachable

-    def map(self, url: str, params: dict[str, Any] | None = None) -> dict[str, Any]:
+    def map(self, url: str, params: dict[str, Any] | None = None) -> MapResponse:
        # Documentation: https://docs.firecrawl.dev/api-reference/endpoint/map
        headers = self._prepare_headers()
        json_data: dict[str, Any] = {"url": url, "integration": "dify"}
@@ -60,28 +84,22 @@ class FirecrawlApp:
            json_data.update(params)
        response = self._post_request(self._build_url("v2/map"), json_data, headers)
        if response.status_code == 200:
-            return cast(dict[str, Any], response.json())
+            return cast(MapResponse, response.json())
        elif response.status_code in {402, 409, 500, 429, 408}:
            self._handle_error(response, "start map job")
-            return {}
-        else:
-            raise Exception(f"Failed to start map job. Status code: {response.status_code}")
+        raise Exception(f"Failed to start map job. Status code: {response.status_code}")

-    def check_crawl_status(self, job_id) -> dict[str, Any]:
+    def check_crawl_status(self, job_id) -> CrawlStatusResponse:
        headers = self._prepare_headers()
        response = self._get_request(self._build_url(f"v2/crawl/{job_id}"), headers)
        if response.status_code == 200:
            crawl_status_response = response.json()
            if crawl_status_response.get("status") == "completed":
-                total = crawl_status_response.get("total", 0)
-                if total == 0:
+                # Normalize to avoid None bypassing the zero-guard when the API returns null.
+                total = crawl_status_response.get("total") or 0
+                if total <= 0:
                    raise Exception("Failed to check crawl status. Error: No page found")
-                data = crawl_status_response.get("data", [])
-                url_data_list = []
-                for item in data:
-                    if isinstance(item, dict) and "metadata" in item and "markdown" in item:
-                        url_data = self._extract_common_fields(item)
-                        url_data_list.append(url_data)
+                url_data_list = self._collect_all_crawl_pages(crawl_status_response, headers)
                if url_data_list:
                    file_key = "website_files/" + job_id + ".txt"
                    try:
@@ -95,13 +113,45 @@ class FirecrawlApp:
                return self._format_crawl_status_response(
                    crawl_status_response.get("status"), crawl_status_response, []
                )
-        else:
-            self._handle_error(response, "check crawl status")
-            return {}  # unreachable
+        self._handle_error(response, "check crawl status")
+        raise RuntimeError("unreachable: _handle_error always raises")
+
+    def _collect_all_crawl_pages(
+        self, first_page: dict[str, Any], headers: dict[str, str]
+    ) -> list[FirecrawlDocumentData]:
+        """Collect all crawl result pages by following pagination links.
+
+        Raises an exception if any paginated request fails, to avoid returning
+        partial data that is inconsistent with the reported total.
+
+        The number of pages processed is capped at ``total`` (the
+        server-reported page count) to guard against infinite loops caused by
+        a misbehaving server that keeps returning a ``next`` URL.
+        """
+        total: int = first_page.get("total") or 0
+        url_data_list: list[FirecrawlDocumentData] = []
+        current_page = first_page
+        pages_processed = 0
+        while True:
+            for item in current_page.get("data", []):
+                if isinstance(item, dict) and "metadata" in item and "markdown" in item:
+                    url_data_list.append(self._extract_common_fields(item))
+            next_url: str | None = current_page.get("next")
+            pages_processed += 1
+            if not next_url or pages_processed >= total:
+                break
+            response = self._get_request(next_url, headers)
+            if response.status_code != 200:
+                self._handle_error(response, "fetch next crawl page")
+            current_page = response.json()
+        return url_data_list

    def _format_crawl_status_response(
-        self, status: str, crawl_status_response: dict[str, Any], url_data_list: list[dict[str, Any]]
-    ) -> dict[str, Any]:
+        self,
+        status: str,
+        crawl_status_response: dict[str, Any],
+        url_data_list: list[FirecrawlDocumentData],
+    ) -> CrawlStatusResponse:
        return {
            "status": status,
            "total": crawl_status_response.get("total"),
@@ -109,7 +159,7 @@ class FirecrawlApp:
            "data": url_data_list,
        }

-    def _extract_common_fields(self, item: dict[str, Any]) -> dict[str, Any]:
+    def _extract_common_fields(self, item: dict[str, Any]) -> FirecrawlDocumentData:
        return {
            "title": item.get("metadata", {}).get("title"),
            "description": item.get("metadata", {}).get("description"),
@@ -117,7 +167,7 @@ class FirecrawlApp:
            "markdown": item.get("markdown"),
        }

-    def _prepare_headers(self) -> dict[str, Any]:
+    def _prepare_headers(self) -> dict[str, str]:
        return {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}

    def _build_url(self, path: str) -> str:
@@ -150,10 +200,10 @@ class FirecrawlApp:
            error_message = response.text or "Unknown error occurred"
        raise Exception(f"Failed to {action}. Status code: {response.status_code}. Error: {error_message}")  # type: ignore[return]

-    def search(self, query: str, params: dict[str, Any] | None = None) -> dict[str, Any]:
+    def search(self, query: str, params: dict[str, Any] | None = None) -> SearchResponse:
        # Documentation: https://docs.firecrawl.dev/api-reference/endpoint/search
        headers = self._prepare_headers()
-        json_data = {
+        json_data: dict[str, Any] = {
            "query": query,
            "limit": 5,
            "lang": "en",
@@ -170,12 +220,10 @@ class FirecrawlApp:
            json_data.update(params)
        response = self._post_request(self._build_url("v2/search"), json_data, headers)
        if response.status_code == 200:
-            response_data = response.json()
+            response_data: SearchResponse = response.json()
            if not response_data.get("success"):
                raise Exception(f"Search failed. Error: {response_data.get('warning', 'Unknown error')}")
-            return cast(dict[str, Any], response_data)
+            return response_data
        elif response.status_code in {402, 409, 500, 429, 408}:
            self._handle_error(response, "perform search")
-            return {}  # Avoid additional exception after handling error
-        else:
-            raise Exception(f"Failed to perform search. Status code: {response.status_code}")
+        raise Exception(f"Failed to perform search. Status code: {response.status_code}")
--- a/api/core/rag/extractor/pdf_extractor.py
+++ b/api/core/rag/extractor/pdf_extractor.py
@@ -15,6 +15,7 @@ from core.rag.extractor.extractor_base import BaseExtractor
 from core.rag.models.document import Document
 from extensions.ext_database import db
 from extensions.ext_storage import storage
+from extensions.storage.storage_type import StorageType
 from libs.datetime_utils import naive_utc_now
 from models.enums import CreatorUserRole
 from models.model import UploadFile
@@ -150,7 +151,7 @@ class PdfExtractor(BaseExtractor):
                    # save file to db
                    upload_file = UploadFile(
                        tenant_id=self._tenant_id,
-                        storage_type=dify_config.STORAGE_TYPE,
+                        storage_type=StorageType(dify_config.STORAGE_TYPE),
                        key=file_key,
                        name=file_key,
                        size=len(img_bytes),
--- a/api/core/rag/extractor/watercrawl/client.py
+++ b/api/core/rag/extractor/watercrawl/client.py
@@ -1,10 +1,11 @@
 import json
 from collections.abc import Generator
-from typing import Union
+from typing import Any, Union
 from urllib.parse import urljoin

 import httpx
 from httpx import Response
+from typing_extensions import TypedDict

 from core.rag.extractor.watercrawl.exceptions import (
    WaterCrawlAuthenticationError,
@@ -13,6 +14,27 @@ from core.rag.extractor.watercrawl.exceptions import (
 )


+class SpiderOptions(TypedDict):
+    max_depth: int
+    page_limit: int
+    allowed_domains: list[str]
+    exclude_paths: list[str]
+    include_paths: list[str]
+
+
+class PageOptions(TypedDict):
+    exclude_tags: list[str]
+    include_tags: list[str]
+    wait_time: int
+    include_html: bool
+    only_main_content: bool
+    include_links: bool
+    timeout: int
+    accept_cookies_selector: str
+    locale: str
+    actions: list[Any]
+
+
 class BaseAPIClient:
    def __init__(self, api_key, base_url):
        self.api_key = api_key
@@ -121,9 +143,9 @@ class WaterCrawlAPIClient(BaseAPIClient):
    def create_crawl_request(
        self,
        url: Union[list, str] | None = None,
-        spider_options: dict | None = None,
-        page_options: dict | None = None,
-        plugin_options: dict | None = None,
+        spider_options: SpiderOptions | None = None,
+        page_options: PageOptions | None = None,
+        plugin_options: dict[str, Any] | None = None,
    ):
        data = {
            # 'urls': url if isinstance(url, list) else [url],
@@ -176,8 +198,8 @@ class WaterCrawlAPIClient(BaseAPIClient):
    def scrape_url(
        self,
        url: str,
-        page_options: dict | None = None,
-        plugin_options: dict | None = None,
+        page_options: PageOptions | None = None,
+        plugin_options: dict[str, Any] | None = None,
        sync: bool = True,
        prefetched: bool = True,
    ):
--- a/api/core/rag/extractor/watercrawl/provider.py
+++ b/api/core/rag/extractor/watercrawl/provider.py
@@ -2,16 +2,39 @@ from collections.abc import Generator
 from datetime import datetime
 from typing import Any

-from core.rag.extractor.watercrawl.client import WaterCrawlAPIClient
+from typing_extensions import TypedDict
+
+from core.rag.extractor.watercrawl.client import PageOptions, SpiderOptions, WaterCrawlAPIClient
+
+
+class WatercrawlDocumentData(TypedDict):
+    title: str | None
+    description: str | None
+    source_url: str | None
+    markdown: str | None
+
+
+class CrawlJobResponse(TypedDict):
+    status: str
+    job_id: str | None
+
+
+class WatercrawlCrawlStatusResponse(TypedDict):
+    status: str
+    job_id: str | None
+    total: int
+    current: int
+    data: list[WatercrawlDocumentData]
+    time_consuming: float


 class WaterCrawlProvider:
    def __init__(self, api_key, base_url: str | None = None):
        self.client = WaterCrawlAPIClient(api_key, base_url)

-    def crawl_url(self, url, options: dict | Any | None = None):
+    def crawl_url(self, url: str, options: dict[str, Any] | None = None) -> CrawlJobResponse:
        options = options or {}
-        spider_options = {
+        spider_options: SpiderOptions = {
            "max_depth": 1,
            "page_limit": 1,
            "allowed_domains": [],
@@ -25,7 +48,7 @@ class WaterCrawlProvider:
            spider_options["exclude_paths"] = options.get("excludes", "").split(",") if options.get("excludes") else []

        wait_time = options.get("wait_time", 1000)
-        page_options = {
+        page_options: PageOptions = {
            "exclude_tags": options.get("exclude_tags", "").split(",") if options.get("exclude_tags") else [],
            "include_tags": options.get("include_tags", "").split(",") if options.get("include_tags") else [],
            "wait_time": max(1000, wait_time),  # minimum wait time is 1 second
@@ -41,9 +64,9 @@ class WaterCrawlProvider:

        return {"status": "active", "job_id": result.get("uuid")}

-    def get_crawl_status(self, crawl_request_id):
+    def get_crawl_status(self, crawl_request_id: str) -> WatercrawlCrawlStatusResponse:
        response = self.client.get_crawl_request(crawl_request_id)
-        data = []
+        data: list[WatercrawlDocumentData] = []
        if response["status"] in ["new", "running"]:
            status = "active"
        else:
@@ -67,7 +90,7 @@ class WaterCrawlProvider:
            "time_consuming": time_consuming,
        }

-    def get_crawl_url_data(self, job_id, url) -> dict | None:
+    def get_crawl_url_data(self, job_id: str, url: str) -> WatercrawlDocumentData | None:
        if not job_id:
            return self.scrape_url(url)

@@ -82,11 +105,11 @@ class WaterCrawlProvider:

        return None

-    def scrape_url(self, url: str):
+    def scrape_url(self, url: str) -> WatercrawlDocumentData:
        response = self.client.scrape_url(url=url, sync=True, prefetched=True)
        return self._structure_data(response)

-    def _structure_data(self, result_object: dict):
+    def _structure_data(self, result_object: dict[str, Any]) -> WatercrawlDocumentData:
        if isinstance(result_object.get("result", {}), str):
            raise ValueError("Invalid result object. Expected a dictionary.")

@@ -98,7 +121,9 @@ class WaterCrawlProvider:
            "markdown": result_object.get("result", {}).get("markdown"),
        }

-    def _get_results(self, crawl_request_id: str, query_params: dict | None = None) -> Generator[dict, None, None]:
+    def _get_results(
+        self, crawl_request_id: str, query_params: dict | None = None
+    ) -> Generator[WatercrawlDocumentData, None, None]:
        page = 0
        page_size = 100

--- a/api/core/rag/extractor/word_extractor.py
+++ b/api/core/rag/extractor/word_extractor.py
@@ -21,6 +21,7 @@ from core.rag.extractor.extractor_base import BaseExtractor
 from core.rag.models.document import Document
 from extensions.ext_database import db
 from extensions.ext_storage import storage
+from extensions.storage.storage_type import StorageType
 from libs.datetime_utils import naive_utc_now
 from models.enums import CreatorUserRole
 from models.model import UploadFile
@@ -112,7 +113,7 @@ class WordExtractor(BaseExtractor):
                        # save file to db
                        upload_file = UploadFile(
                            tenant_id=self.tenant_id,
-                            storage_type=dify_config.STORAGE_TYPE,
+                            storage_type=StorageType(dify_config.STORAGE_TYPE),
                            key=file_key,
                            name=file_key,
                            size=0,
@@ -140,7 +141,7 @@ class WordExtractor(BaseExtractor):
                    # save file to db
                    upload_file = UploadFile(
                        tenant_id=self.tenant_id,
-                        storage_type=dify_config.STORAGE_TYPE,
+                        storage_type=StorageType(dify_config.STORAGE_TYPE),
                        key=file_key,
                        name=file_key,
                        size=0,
@@ -365,7 +366,7 @@ class WordExtractor(BaseExtractor):
            paragraph_content = []
            # State for legacy HYPERLINK fields
            hyperlink_field_url = None
-            hyperlink_field_text_parts: list = []
+            hyperlink_field_text_parts: list[str] = []
            is_collecting_field_text = False
            # Iterate through paragraph elements in document order
            for child in paragraph._element:
--- a/api/core/rag/index_processor/index_processor.py
+++ b/api/core/rag/index_processor/index_processor.py
@@ -9,6 +9,8 @@ from flask import current_app
 from sqlalchemy import delete, func, select

 from core.db.session_factory import session_factory
+from core.rag.index_processor.constant.index_type import IndexTechniqueType
+from core.rag.index_processor.index_processor_base import SummaryIndexSettingDict
 from core.workflow.nodes.knowledge_index.exc import KnowledgeIndexNodeError
 from core.workflow.nodes.knowledge_index.protocols import Preview, PreviewItem, QaPreview
 from models.dataset import Dataset, Document, DocumentSegment
@@ -51,7 +53,7 @@ class IndexProcessor:
        original_document_id: str,
        chunks: Mapping[str, Any],
        batch: Any,
-        summary_index_setting: dict | None = None,
+        summary_index_setting: SummaryIndexSettingDict | None = None,
    ):
        with session_factory.create_session() as session:
            document = session.query(Document).filter_by(id=document_id).first()
@@ -131,7 +133,12 @@ class IndexProcessor:
        }

    def get_preview_output(
-        self, chunks: Any, dataset_id: str, document_id: str, chunk_structure: str, summary_index_setting: dict | None
+        self,
+        chunks: Any,
+        dataset_id: str,
+        document_id: str,
+        chunk_structure: str,
+        summary_index_setting: SummaryIndexSettingDict | None,
    ) -> Preview:
        doc_language = None
        with session_factory.create_session() as session:
@@ -153,7 +160,7 @@ class IndexProcessor:
            tenant_id = dataset.tenant_id

        preview_output = self.format_preview(chunk_structure, chunks)
-        if indexing_technique != "high_quality":
+        if indexing_technique != IndexTechniqueType.HIGH_QUALITY:
            return preview_output

        if not summary_index_setting or not summary_index_setting.get("enable"):
--- a/Show More
+++ b/Show More