feat: knowledge pipeline (#25360)

Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: twwu <twwu@dify.ai> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: jyong <718720800@qq.com> Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com> Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com> Co-authored-by: quicksand <quicksandzn@gmail.com> Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com> Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com> Co-authored-by: zxhlyh <jasonapring2015@outlook.com> Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: Hanqing Zhao <sherry9277@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Harry <xh001x@hotmail.com>
2026-05-10 06:00:51 -04:00 · 2025-09-18 12:49:10 +08:00
parent 7dadb33003
commit 85cda47c70
1772 changed files with 102407 additions and 31710 deletions
--- a/api/core/workflow/utils/condition/processor.py
+++ b/api/core/workflow/utils/condition/processor.py
@@ -1,16 +1,16 @@
 import json
-from collections.abc import Sequence
-from typing import Any, Literal, Union
+from collections.abc import Mapping, Sequence
+from typing import Literal, NamedTuple

 from core.file import FileAttribute, file_manager
 from core.variables import ArrayFileSegment
 from core.variables.segments import ArrayBooleanSegment, BooleanSegment
-from core.workflow.entities.variable_pool import VariablePool
+from core.workflow.entities import VariablePool

 from .entities import Condition, SubCondition, SupportedComparisonOperator


-def _convert_to_bool(value: Any) -> bool:
+def _convert_to_bool(value: object) -> bool:
    if isinstance(value, int):
        return bool(value)

@@ -22,6 +22,12 @@ def _convert_to_bool(value: Any) -> bool:
    raise TypeError(f"unexpected value: type={type(value)}, value={value}")


+class ConditionCheckResult(NamedTuple):
+    inputs: Sequence[Mapping[str, object]]
+    group_results: Sequence[bool]
+    final_result: bool
+
+
 class ConditionProcessor:
    def process_conditions(
        self,
@@ -29,9 +35,9 @@ class ConditionProcessor:
        variable_pool: VariablePool,
        conditions: Sequence[Condition],
        operator: Literal["and", "or"],
-    ):
-        input_conditions = []
-        group_results = []
+    ) -> ConditionCheckResult:
+        input_conditions: list[Mapping[str, object]] = []
+        group_results: list[bool] = []

        for condition in conditions:
            variable = variable_pool.get(condition.variable_selector)
@@ -88,17 +94,17 @@ class ConditionProcessor:
            # Implemented short-circuit evaluation for logical conditions
            if (operator == "and" and not result) or (operator == "or" and result):
                final_result = result
-                return input_conditions, group_results, final_result
+                return ConditionCheckResult(input_conditions, group_results, final_result)

        final_result = all(group_results) if operator == "and" else any(group_results)
-        return input_conditions, group_results, final_result
+        return ConditionCheckResult(input_conditions, group_results, final_result)


 def _evaluate_condition(
    *,
    operator: SupportedComparisonOperator,
-    value: Any,
-    expected: Union[str, Sequence[str], bool | Sequence[bool], None],
+    value: object,
+    expected: str | Sequence[str] | bool | Sequence[bool] | None,
 ) -> bool:
    match operator:
        case "contains":
@@ -138,7 +144,17 @@ def _evaluate_condition(
        case "not in":
            return _assert_not_in(value=value, expected=expected)
        case "all of" if isinstance(expected, list):
-            return _assert_all_of(value=value, expected=expected)
+            # Type narrowing: at this point expected is a list, could be list[str] or list[bool]
+            if all(isinstance(item, str) for item in expected):
+                # Create a new typed list to satisfy type checker
+                str_list: list[str] = [item for item in expected if isinstance(item, str)]
+                return _assert_all_of(value=value, expected=str_list)
+            elif all(isinstance(item, bool) for item in expected):
+                # Create a new typed list to satisfy type checker
+                bool_list: list[bool] = [item for item in expected if isinstance(item, bool)]
+                return _assert_all_of_bool(value=value, expected=bool_list)
+            else:
+                raise ValueError("all of operator expects homogeneous list of strings or booleans")
        case "exists":
            return _assert_exists(value=value)
        case "not exists":
@@ -147,55 +163,73 @@ def _evaluate_condition(
            raise ValueError(f"Unsupported operator: {operator}")


-def _assert_contains(*, value: Any, expected: Any) -> bool:
+def _assert_contains(*, value: object, expected: object) -> bool:
    if not value:
        return False

    if not isinstance(value, (str, list)):
        raise ValueError("Invalid actual value type: string or array")

-    if expected not in value:
-        return False
+    # Type checking ensures value is str or list at this point
+    if isinstance(value, str):
+        if not isinstance(expected, str):
+            expected = str(expected)
+        if expected not in value:
+            return False
+    else:  # value is list
+        if expected not in value:
+            return False
    return True


-def _assert_not_contains(*, value: Any, expected: Any) -> bool:
+def _assert_not_contains(*, value: object, expected: object) -> bool:
    if not value:
        return True

    if not isinstance(value, (str, list)):
        raise ValueError("Invalid actual value type: string or array")

-    if expected in value:
-        return False
+    # Type checking ensures value is str or list at this point
+    if isinstance(value, str):
+        if not isinstance(expected, str):
+            expected = str(expected)
+        if expected in value:
+            return False
+    else:  # value is list
+        if expected in value:
+            return False
    return True


-def _assert_start_with(*, value: Any, expected: Any) -> bool:
+def _assert_start_with(*, value: object, expected: object) -> bool:
    if not value:
        return False

    if not isinstance(value, str):
        raise ValueError("Invalid actual value type: string")

+    if not isinstance(expected, str):
+        raise ValueError("Expected value must be a string for startswith")
    if not value.startswith(expected):
        return False
    return True


-def _assert_end_with(*, value: Any, expected: Any) -> bool:
+def _assert_end_with(*, value: object, expected: object) -> bool:
    if not value:
        return False

    if not isinstance(value, str):
        raise ValueError("Invalid actual value type: string")

+    if not isinstance(expected, str):
+        raise ValueError("Expected value must be a string for endswith")
    if not value.endswith(expected):
        return False
    return True


-def _assert_is(*, value: Any, expected: Any) -> bool:
+def _assert_is(*, value: object, expected: object) -> bool:
    if value is None:
        return False

@@ -207,7 +241,7 @@ def _assert_is(*, value: Any, expected: Any) -> bool:
    return True


-def _assert_is_not(*, value: Any, expected: Any) -> bool:
+def _assert_is_not(*, value: object, expected: object) -> bool:
    if value is None:
        return False

@@ -219,19 +253,19 @@ def _assert_is_not(*, value: Any, expected: Any) -> bool:
    return True


-def _assert_empty(*, value: Any) -> bool:
+def _assert_empty(*, value: object) -> bool:
    if not value:
        return True
    return False


-def _assert_not_empty(*, value: Any) -> bool:
+def _assert_not_empty(*, value: object) -> bool:
    if value:
        return True
    return False


-def _assert_equal(*, value: Any, expected: Any) -> bool:
+def _assert_equal(*, value: object, expected: object) -> bool:
    if value is None:
        return False

@@ -240,10 +274,16 @@ def _assert_equal(*, value: Any, expected: Any) -> bool:

    # Handle boolean comparison
    if isinstance(value, bool):
+        if not isinstance(expected, (bool, int, str)):
+            raise ValueError(f"Cannot convert {type(expected)} to bool")
        expected = bool(expected)
    elif isinstance(value, int):
+        if not isinstance(expected, (int, float, str)):
+            raise ValueError(f"Cannot convert {type(expected)} to int")
        expected = int(expected)
    else:
+        if not isinstance(expected, (int, float, str)):
+            raise ValueError(f"Cannot convert {type(expected)} to float")
        expected = float(expected)

    if value != expected:
@@ -251,7 +291,7 @@ def _assert_equal(*, value: Any, expected: Any) -> bool:
    return True


-def _assert_not_equal(*, value: Any, expected: Any) -> bool:
+def _assert_not_equal(*, value: object, expected: object) -> bool:
    if value is None:
        return False

@@ -260,10 +300,16 @@ def _assert_not_equal(*, value: Any, expected: Any) -> bool:

    # Handle boolean comparison
    if isinstance(value, bool):
+        if not isinstance(expected, (bool, int, str)):
+            raise ValueError(f"Cannot convert {type(expected)} to bool")
        expected = bool(expected)
    elif isinstance(value, int):
+        if not isinstance(expected, (int, float, str)):
+            raise ValueError(f"Cannot convert {type(expected)} to int")
        expected = int(expected)
    else:
+        if not isinstance(expected, (int, float, str)):
+            raise ValueError(f"Cannot convert {type(expected)} to float")
        expected = float(expected)

    if value == expected:
@@ -271,7 +317,7 @@ def _assert_not_equal(*, value: Any, expected: Any) -> bool:
    return True


-def _assert_greater_than(*, value: Any, expected: Any) -> bool:
+def _assert_greater_than(*, value: object, expected: object) -> bool:
    if value is None:
        return False

@@ -279,8 +325,12 @@ def _assert_greater_than(*, value: Any, expected: Any) -> bool:
        raise ValueError("Invalid actual value type: number")

    if isinstance(value, int):
+        if not isinstance(expected, (int, float, str)):
+            raise ValueError(f"Cannot convert {type(expected)} to int")
        expected = int(expected)
    else:
+        if not isinstance(expected, (int, float, str)):
+            raise ValueError(f"Cannot convert {type(expected)} to float")
        expected = float(expected)

    if value <= expected:
@@ -288,7 +338,7 @@ def _assert_greater_than(*, value: Any, expected: Any) -> bool:
    return True


-def _assert_less_than(*, value: Any, expected: Any) -> bool:
+def _assert_less_than(*, value: object, expected: object) -> bool:
    if value is None:
        return False

@@ -296,8 +346,12 @@ def _assert_less_than(*, value: Any, expected: Any) -> bool:
        raise ValueError("Invalid actual value type: number")

    if isinstance(value, int):
+        if not isinstance(expected, (int, float, str)):
+            raise ValueError(f"Cannot convert {type(expected)} to int")
        expected = int(expected)
    else:
+        if not isinstance(expected, (int, float, str)):
+            raise ValueError(f"Cannot convert {type(expected)} to float")
        expected = float(expected)

    if value >= expected:
@@ -305,7 +359,7 @@ def _assert_less_than(*, value: Any, expected: Any) -> bool:
    return True


-def _assert_greater_than_or_equal(*, value: Any, expected: Any) -> bool:
+def _assert_greater_than_or_equal(*, value: object, expected: object) -> bool:
    if value is None:
        return False

@@ -313,8 +367,12 @@ def _assert_greater_than_or_equal(*, value: Any, expected: Any) -> bool:
        raise ValueError("Invalid actual value type: number")

    if isinstance(value, int):
+        if not isinstance(expected, (int, float, str)):
+            raise ValueError(f"Cannot convert {type(expected)} to int")
        expected = int(expected)
    else:
+        if not isinstance(expected, (int, float, str)):
+            raise ValueError(f"Cannot convert {type(expected)} to float")
        expected = float(expected)

    if value < expected:
@@ -322,7 +380,7 @@ def _assert_greater_than_or_equal(*, value: Any, expected: Any) -> bool:
    return True


-def _assert_less_than_or_equal(*, value: Any, expected: Any) -> bool:
+def _assert_less_than_or_equal(*, value: object, expected: object) -> bool:
    if value is None:
        return False

@@ -330,8 +388,12 @@ def _assert_less_than_or_equal(*, value: Any, expected: Any) -> bool:
        raise ValueError("Invalid actual value type: number")

    if isinstance(value, int):
+        if not isinstance(expected, (int, float, str)):
+            raise ValueError(f"Cannot convert {type(expected)} to int")
        expected = int(expected)
    else:
+        if not isinstance(expected, (int, float, str)):
+            raise ValueError(f"Cannot convert {type(expected)} to float")
        expected = float(expected)

    if value > expected:
@@ -339,19 +401,19 @@ def _assert_less_than_or_equal(*, value: Any, expected: Any) -> bool:
    return True


-def _assert_null(*, value: Any) -> bool:
+def _assert_null(*, value: object) -> bool:
    if value is None:
        return True
    return False


-def _assert_not_null(*, value: Any) -> bool:
+def _assert_not_null(*, value: object) -> bool:
    if value is not None:
        return True
    return False


-def _assert_in(*, value: Any, expected: Any) -> bool:
+def _assert_in(*, value: object, expected: object) -> bool:
    if not value:
        return False

@@ -363,7 +425,7 @@ def _assert_in(*, value: Any, expected: Any) -> bool:
    return True


-def _assert_not_in(*, value: Any, expected: Any) -> bool:
+def _assert_not_in(*, value: object, expected: object) -> bool:
    if not value:
        return True

@@ -375,20 +437,33 @@ def _assert_not_in(*, value: Any, expected: Any) -> bool:
    return True


-def _assert_all_of(*, value: Any, expected: Sequence[str]) -> bool:
+def _assert_all_of(*, value: object, expected: Sequence[str]) -> bool:
    if not value:
        return False

-    if not all(item in value for item in expected):
+    # Ensure value is a container that supports 'in' operator
+    if not isinstance(value, (list, tuple, set, str)):
        return False
-    return True
+
+    return all(item in value for item in expected)


-def _assert_exists(*, value: Any) -> bool:
+def _assert_all_of_bool(*, value: object, expected: Sequence[bool]) -> bool:
+    if not value:
+        return False
+
+    # Ensure value is a container that supports 'in' operator
+    if not isinstance(value, (list, tuple, set)):
+        return False
+
+    return all(item in value for item in expected)
+
+
+def _assert_exists(*, value: object) -> bool:
    return value is not None


-def _assert_not_exists(*, value: Any) -> bool:
+def _assert_not_exists(*, value: object) -> bool:
    return value is None


@@ -398,7 +473,7 @@ def _process_sub_conditions(
    operator: Literal["and", "or"],
 ) -> bool:
    files = variable.value
-    group_results = []
+    group_results: list[bool] = []
    for condition in sub_conditions:
        key = FileAttribute(condition.key)
        values = [file_manager.get_attr(file=file, attr=key) for file in files]
@@ -409,14 +484,14 @@ def _process_sub_conditions(
            if expected_value and not expected_value.startswith("."):
                expected_value = "." + expected_value

-            normalized_values = []
+            normalized_values: list[object] = []
            for value in values:
                if value and isinstance(value, str):
                    if not value.startswith("."):
                        value = "." + value
                normalized_values.append(value)
            values = normalized_values
-        sub_group_results = [
+        sub_group_results: list[bool] = [
            _evaluate_condition(
                value=value,
                operator=condition.comparison_operator,
--- a/api/core/workflow/utils/variable_template_parser.py
+++ b/api/core/workflow/utils/variable_template_parser.py
@@ -1,130 +0,0 @@
-import re
-from collections.abc import Mapping, Sequence
-from typing import Any
-
-from core.workflow.entities.variable_entities import VariableSelector
-
-REGEX = re.compile(r"\{\{(#[a-zA-Z0-9_]{1,50}(\.[a-zA-Z_][a-zA-Z0-9_]{0,29}){1,10}#)\}\}")
-
-SELECTOR_PATTERN = re.compile(r"\{\{(#[a-zA-Z0-9_]{1,50}(?:\.[a-zA-Z_][a-zA-Z0-9_]{0,29}){1,10}#)\}\}")
-
-
-def extract_selectors_from_template(template: str, /) -> Sequence[VariableSelector]:
-    parts = SELECTOR_PATTERN.split(template)
-    selectors = []
-    for part in filter(lambda x: x, parts):
-        if "." in part and part[0] == "#" and part[-1] == "#":
-            selectors.append(VariableSelector(variable=f"{part}", value_selector=part[1:-1].split(".")))
-    return selectors
-
-
-class VariableTemplateParser:
-    """
-    !NOTE: Consider to use the new `segments` module instead of this class.
-
-    A class for parsing and manipulating template variables in a string.
-
-    Rules:
-
-    1. Template variables must be enclosed in `{{}}`.
-    2. The template variable Key can only be: #node_id.var1.var2#.
-    3. The template variable Key cannot contain new lines or spaces, and must comply with rule 2.
-
-    Example usage:
-
-    template = "Hello, {{#node_id.query.name#}}! Your age is {{#node_id.query.age#}}."
-    parser = VariableTemplateParser(template)
-
-    # Extract template variable keys
-    variable_keys = parser.extract()
-    print(variable_keys)
-    # Output: ['#node_id.query.name#', '#node_id.query.age#']
-
-    # Extract variable selectors
-    variable_selectors = parser.extract_variable_selectors()
-    print(variable_selectors)
-    # Output: [VariableSelector(variable='#node_id.query.name#', value_selector=['node_id', 'query', 'name']),
-    #          VariableSelector(variable='#node_id.query.age#', value_selector=['node_id', 'query', 'age'])]
-
-    # Format the template string
-    inputs = {'#node_id.query.name#': 'John', '#node_id.query.age#': 25}}
-    formatted_string = parser.format(inputs)
-    print(formatted_string)
-    # Output: "Hello, John! Your age is 25."
-    """
-
-    def __init__(self, template: str):
-        self.template = template
-        self.variable_keys = self.extract()
-
-    def extract(self):
-        """
-        Extracts all the template variable keys from the template string.
-
-        Returns:
-            A list of template variable keys.
-        """
-        # Regular expression to match the template rules
-        matches = re.findall(REGEX, self.template)
-
-        first_group_matches = [match[0] for match in matches]
-
-        return list(set(first_group_matches))
-
-    def extract_variable_selectors(self) -> list[VariableSelector]:
-        """
-        Extracts the variable selectors from the template variable keys.
-
-        Returns:
-            A list of VariableSelector objects representing the variable selectors.
-        """
-        variable_selectors = []
-        for variable_key in self.variable_keys:
-            remove_hash = variable_key.replace("#", "")
-            split_result = remove_hash.split(".")
-            if len(split_result) < 2:
-                continue
-
-            variable_selectors.append(VariableSelector(variable=variable_key, value_selector=split_result))
-
-        return variable_selectors
-
-    def format(self, inputs: Mapping[str, Any]) -> str:
-        """
-        Formats the template string by replacing the template variables with their corresponding values.
-
-        Args:
-            inputs: A dictionary containing the values for the template variables.
-
-        Returns:
-            The formatted string with template variables replaced by their values.
-        """
-
-        def replacer(match):
-            key = match.group(1)
-            value = inputs.get(key, match.group(0))  # return original matched string if key not found
-
-            if value is None:
-                value = ""
-            # convert the value to string
-            if isinstance(value, list | dict | bool | int | float):
-                value = str(value)
-
-            # remove template variables if required
-            return VariableTemplateParser.remove_template_variables(value)
-
-        prompt = re.sub(REGEX, replacer, self.template)
-        return re.sub(r"<\|.*?\|>", "", prompt)
-
-    @classmethod
-    def remove_template_variables(cls, text: str):
-        """
-        Removes the template variables from the given text.
-
-        Args:
-            text: The text from which to remove the template variables.
-
-        Returns:
-            The text with template variables removed.
-        """
-        return re.sub(REGEX, r"{\1}", text)