feat: knowledge pipeline (#25360)

Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: twwu <twwu@dify.ai> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: jyong <718720800@qq.com> Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com> Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com> Co-authored-by: quicksand <quicksandzn@gmail.com> Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com> Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com> Co-authored-by: zxhlyh <jasonapring2015@outlook.com> Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: Hanqing Zhao <sherry9277@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Harry <xh001x@hotmail.com>
2026-03-02 04:01:14 -05:00 · 2025-09-18 12:49:10 +08:00
parent 7dadb33003
commit 85cda47c70
1772 changed files with 102407 additions and 31710 deletions
--- a/api/core/workflow/nodes/code/code_node.py
+++ b/api/core/workflow/nodes/code/code_node.py
@@ -1,6 +1,6 @@
 from collections.abc import Mapping, Sequence
 from decimal import Decimal
-from typing import Any
+from typing import Any, cast

 from configs import dify_config
 from core.helper.code_executor.code_executor import CodeExecutionError, CodeExecutor, CodeLanguage
@@ -9,12 +9,11 @@ from core.helper.code_executor.javascript.javascript_code_provider import Javasc
 from core.helper.code_executor.python3.python3_code_provider import Python3CodeProvider
 from core.variables.segments import ArrayFileSegment
 from core.variables.types import SegmentType
-from core.workflow.entities.node_entities import NodeRunResult
-from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionStatus
-from core.workflow.nodes.base import BaseNode
+from core.workflow.enums import ErrorStrategy, NodeType, WorkflowNodeExecutionStatus
+from core.workflow.node_events import NodeRunResult
 from core.workflow.nodes.base.entities import BaseNodeData, RetryConfig
+from core.workflow.nodes.base.node import Node
 from core.workflow.nodes.code.entities import CodeNodeData
-from core.workflow.nodes.enums import ErrorStrategy, NodeType

 from .exc import (
    CodeNodeError,
@@ -23,8 +22,8 @@ from .exc import (
 )


-class CodeNode(BaseNode):
-    _node_type = NodeType.CODE
+class CodeNode(Node):
+    node_type = NodeType.CODE

    _node_data: CodeNodeData

@@ -50,7 +49,7 @@ class CodeNode(BaseNode):
        return self._node_data

    @classmethod
-    def get_default_config(cls, filters: dict | None = None):
+    def get_default_config(cls, filters: Mapping[str, object] | None = None) -> Mapping[str, object]:
        """
        Get default config of node.
        :param filters: filter by node config parameters.
@@ -58,7 +57,7 @@ class CodeNode(BaseNode):
        """
        code_language = CodeLanguage.PYTHON3
        if filters:
-            code_language = filters.get("code_language", CodeLanguage.PYTHON3)
+            code_language = cast(CodeLanguage, filters.get("code_language", CodeLanguage.PYTHON3))

        providers: list[type[CodeNodeProvider]] = [Python3CodeProvider, JavascriptCodeProvider]
        code_provider: type[CodeNodeProvider] = next(p for p in providers if p.is_accept_language(code_language))
@@ -109,8 +108,6 @@ class CodeNode(BaseNode):
        """
        if value is None:
            return None
-        if not isinstance(value, str):
-            raise OutputValidationError(f"Output variable `{variable}` must be a string")

        if len(value) > dify_config.CODE_MAX_STRING_LENGTH:
            raise OutputValidationError(
@@ -123,8 +120,6 @@ class CodeNode(BaseNode):
    def _check_boolean(self, value: bool | None, variable: str) -> bool | None:
        if value is None:
            return None
-        if not isinstance(value, bool):
-            raise OutputValidationError(f"Output variable `{variable}` must be a boolean")

        return value

@@ -137,8 +132,6 @@ class CodeNode(BaseNode):
        """
        if value is None:
            return None
-        if not isinstance(value, int | float):
-            raise OutputValidationError(f"Output variable `{variable}` must be a number")

        if value > dify_config.CODE_MAX_NUMBER or value < dify_config.CODE_MIN_NUMBER:
            raise OutputValidationError(
@@ -262,7 +255,13 @@ class CodeNode(BaseNode):
                    )
            elif output_config.type == SegmentType.NUMBER:
                # check if number available
-                checked = self._check_number(value=result[output_name], variable=f"{prefix}{dot}{output_name}")
+                value = result.get(output_name)
+                if value is not None and not isinstance(value, (int, float)):
+                    raise OutputValidationError(
+                        f"Output {prefix}{dot}{output_name} is not a number,"
+                        f" got {type(result.get(output_name))} instead."
+                    )
+                checked = self._check_number(value=value, variable=f"{prefix}{dot}{output_name}")
                # If the output is a boolean and the output schema specifies a NUMBER type,
                # convert the boolean value to an integer.
                #
@@ -272,8 +271,13 @@ class CodeNode(BaseNode):

            elif output_config.type == SegmentType.STRING:
                # check if string available
+                value = result.get(output_name)
+                if value is not None and not isinstance(value, str):
+                    raise OutputValidationError(
+                        f"Output {prefix}{dot}{output_name} must be a string, got {type(value).__name__} instead"
+                    )
                transformed_result[output_name] = self._check_string(
-                    value=result[output_name],
+                    value=value,
                    variable=f"{prefix}{dot}{output_name}",
                )
            elif output_config.type == SegmentType.BOOLEAN:
@@ -283,31 +287,36 @@ class CodeNode(BaseNode):
                )
            elif output_config.type == SegmentType.ARRAY_NUMBER:
                # check if array of number available
-                if not isinstance(result[output_name], list):
-                    if result[output_name] is None:
+                value = result[output_name]
+                if not isinstance(value, list):
+                    if value is None:
                        transformed_result[output_name] = None
                    else:
                        raise OutputValidationError(
-                            f"Output {prefix}{dot}{output_name} is not an array,"
-                            f" got {type(result.get(output_name))} instead."
+                            f"Output {prefix}{dot}{output_name} is not an array, got {type(value)} instead."
                        )
                else:
-                    if len(result[output_name]) > dify_config.CODE_MAX_NUMBER_ARRAY_LENGTH:
+                    if len(value) > dify_config.CODE_MAX_NUMBER_ARRAY_LENGTH:
                        raise OutputValidationError(
                            f"The length of output variable `{prefix}{dot}{output_name}` must be"
                            f" less than {dify_config.CODE_MAX_NUMBER_ARRAY_LENGTH} elements."
                        )

+                    for i, inner_value in enumerate(value):
+                        if not isinstance(inner_value, (int, float)):
+                            raise OutputValidationError(
+                                f"The element at index {i} of output variable `{prefix}{dot}{output_name}` must be"
+                                f" a number."
+                            )
+                        _ = self._check_number(value=inner_value, variable=f"{prefix}{dot}{output_name}[{i}]")
                    transformed_result[output_name] = [
                        # If the element is a boolean and the output schema specifies a `array[number]` type,
                        # convert the boolean value to an integer.
                        #
                        # This ensures compatibility with existing workflows that may use
                        # `True` and `False` as values for NUMBER type outputs.
-                        self._convert_boolean_to_int(
-                            self._check_number(value=value, variable=f"{prefix}{dot}{output_name}[{i}]"),
-                        )
-                        for i, value in enumerate(result[output_name])
+                        self._convert_boolean_to_int(v)
+                        for v in value
                    ]
            elif output_config.type == SegmentType.ARRAY_STRING:
                # check if array of string available
@@ -370,8 +379,9 @@ class CodeNode(BaseNode):
                    ]
            elif output_config.type == SegmentType.ARRAY_BOOLEAN:
                # check if array of object available
-                if not isinstance(result[output_name], list):
-                    if result[output_name] is None:
+                value = result[output_name]
+                if not isinstance(value, list):
+                    if value is None:
                        transformed_result[output_name] = None
                    else:
                        raise OutputValidationError(
@@ -379,10 +389,14 @@ class CodeNode(BaseNode):
                            f" got {type(result.get(output_name))} instead."
                        )
                else:
-                    transformed_result[output_name] = [
-                        self._check_boolean(value=value, variable=f"{prefix}{dot}{output_name}[{i}]")
-                        for i, value in enumerate(result[output_name])
-                    ]
+                    for i, inner_value in enumerate(value):
+                        if inner_value is not None and not isinstance(inner_value, bool):
+                            raise OutputValidationError(
+                                f"Output {prefix}{dot}{output_name}[{i}] is not a boolean,"
+                                f" got {type(inner_value)} instead."
+                            )
+                        _ = self._check_boolean(value=inner_value, variable=f"{prefix}{dot}{output_name}[{i}]")
+                    transformed_result[output_name] = value

            else:
                raise OutputValidationError(f"Output type {output_config.type} is not supported.")
@@ -403,6 +417,7 @@ class CodeNode(BaseNode):
        node_id: str,
        node_data: Mapping[str, Any],
    ) -> Mapping[str, Sequence[str]]:
+        _ = graph_config  # Explicitly mark as unused
        # Create typed NodeData from dict
        typed_node_data = CodeNodeData.model_validate(node_data)

@@ -411,10 +426,6 @@ class CodeNode(BaseNode):
            for variable_selector in typed_node_data.variables
        }

-    @property
-    def continue_on_error(self) -> bool:
-        return self._node_data.error_strategy is not None
-
    @property
    def retry(self) -> bool:
        return self._node_data.retry_config.retry_enabled
--- a/api/core/workflow/nodes/code/entities.py
+++ b/api/core/workflow/nodes/code/entities.py
@@ -1,11 +1,11 @@
-from typing import Annotated, Literal
+from typing import Annotated, Literal, Self

 from pydantic import AfterValidator, BaseModel

 from core.helper.code_executor.code_executor import CodeLanguage
 from core.variables.types import SegmentType
-from core.workflow.entities.variable_entities import VariableSelector
 from core.workflow.nodes.base import BaseNodeData
+from core.workflow.nodes.base.entities import VariableSelector

 _ALLOWED_OUTPUT_FROM_CODE = frozenset(
    [
@@ -34,7 +34,7 @@ class CodeNodeData(BaseNodeData):

    class Output(BaseModel):
        type: Annotated[SegmentType, AfterValidator(_validate_type)]
-        children: dict[str, "CodeNodeData.Output"] | None = None
+        children: dict[str, Self] | None = None

    class Dependency(BaseModel):
        name: str