feat: knowledge pipeline (#25360)

Signed-off-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: twwu <twwu@dify.ai>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: jyong <718720800@qq.com>
Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com>
Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com>
Co-authored-by: quicksand <quicksandzn@gmail.com>
Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com>
Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com>
Co-authored-by: zxhlyh <jasonapring2015@outlook.com>
Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Joel <iamjoel007@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: nite-knite <nkCoding@gmail.com>
Co-authored-by: Hanqing Zhao <sherry9277@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Harry <xh001x@hotmail.com>
This commit is contained in:
-LAN-
2025-09-18 12:49:10 +08:00
committed by GitHub
parent 7dadb33003
commit 85cda47c70
1772 changed files with 102407 additions and 31710 deletions

View File

@@ -1,16 +1,16 @@
import json
from collections.abc import Sequence
from typing import Any, Literal, Union
from collections.abc import Mapping, Sequence
from typing import Literal, NamedTuple
from core.file import FileAttribute, file_manager
from core.variables import ArrayFileSegment
from core.variables.segments import ArrayBooleanSegment, BooleanSegment
from core.workflow.entities.variable_pool import VariablePool
from core.workflow.entities import VariablePool
from .entities import Condition, SubCondition, SupportedComparisonOperator
def _convert_to_bool(value: Any) -> bool:
def _convert_to_bool(value: object) -> bool:
if isinstance(value, int):
return bool(value)
@@ -22,6 +22,12 @@ def _convert_to_bool(value: Any) -> bool:
raise TypeError(f"unexpected value: type={type(value)}, value={value}")
class ConditionCheckResult(NamedTuple):
inputs: Sequence[Mapping[str, object]]
group_results: Sequence[bool]
final_result: bool
class ConditionProcessor:
def process_conditions(
self,
@@ -29,9 +35,9 @@ class ConditionProcessor:
variable_pool: VariablePool,
conditions: Sequence[Condition],
operator: Literal["and", "or"],
):
input_conditions = []
group_results = []
) -> ConditionCheckResult:
input_conditions: list[Mapping[str, object]] = []
group_results: list[bool] = []
for condition in conditions:
variable = variable_pool.get(condition.variable_selector)
@@ -88,17 +94,17 @@ class ConditionProcessor:
# Implemented short-circuit evaluation for logical conditions
if (operator == "and" and not result) or (operator == "or" and result):
final_result = result
return input_conditions, group_results, final_result
return ConditionCheckResult(input_conditions, group_results, final_result)
final_result = all(group_results) if operator == "and" else any(group_results)
return input_conditions, group_results, final_result
return ConditionCheckResult(input_conditions, group_results, final_result)
def _evaluate_condition(
*,
operator: SupportedComparisonOperator,
value: Any,
expected: Union[str, Sequence[str], bool | Sequence[bool], None],
value: object,
expected: str | Sequence[str] | bool | Sequence[bool] | None,
) -> bool:
match operator:
case "contains":
@@ -138,7 +144,17 @@ def _evaluate_condition(
case "not in":
return _assert_not_in(value=value, expected=expected)
case "all of" if isinstance(expected, list):
return _assert_all_of(value=value, expected=expected)
# Type narrowing: at this point expected is a list, could be list[str] or list[bool]
if all(isinstance(item, str) for item in expected):
# Create a new typed list to satisfy type checker
str_list: list[str] = [item for item in expected if isinstance(item, str)]
return _assert_all_of(value=value, expected=str_list)
elif all(isinstance(item, bool) for item in expected):
# Create a new typed list to satisfy type checker
bool_list: list[bool] = [item for item in expected if isinstance(item, bool)]
return _assert_all_of_bool(value=value, expected=bool_list)
else:
raise ValueError("all of operator expects homogeneous list of strings or booleans")
case "exists":
return _assert_exists(value=value)
case "not exists":
@@ -147,55 +163,73 @@ def _evaluate_condition(
raise ValueError(f"Unsupported operator: {operator}")
def _assert_contains(*, value: Any, expected: Any) -> bool:
def _assert_contains(*, value: object, expected: object) -> bool:
if not value:
return False
if not isinstance(value, (str, list)):
raise ValueError("Invalid actual value type: string or array")
if expected not in value:
return False
# Type checking ensures value is str or list at this point
if isinstance(value, str):
if not isinstance(expected, str):
expected = str(expected)
if expected not in value:
return False
else: # value is list
if expected not in value:
return False
return True
def _assert_not_contains(*, value: Any, expected: Any) -> bool:
def _assert_not_contains(*, value: object, expected: object) -> bool:
if not value:
return True
if not isinstance(value, (str, list)):
raise ValueError("Invalid actual value type: string or array")
if expected in value:
return False
# Type checking ensures value is str or list at this point
if isinstance(value, str):
if not isinstance(expected, str):
expected = str(expected)
if expected in value:
return False
else: # value is list
if expected in value:
return False
return True
def _assert_start_with(*, value: Any, expected: Any) -> bool:
def _assert_start_with(*, value: object, expected: object) -> bool:
if not value:
return False
if not isinstance(value, str):
raise ValueError("Invalid actual value type: string")
if not isinstance(expected, str):
raise ValueError("Expected value must be a string for startswith")
if not value.startswith(expected):
return False
return True
def _assert_end_with(*, value: Any, expected: Any) -> bool:
def _assert_end_with(*, value: object, expected: object) -> bool:
if not value:
return False
if not isinstance(value, str):
raise ValueError("Invalid actual value type: string")
if not isinstance(expected, str):
raise ValueError("Expected value must be a string for endswith")
if not value.endswith(expected):
return False
return True
def _assert_is(*, value: Any, expected: Any) -> bool:
def _assert_is(*, value: object, expected: object) -> bool:
if value is None:
return False
@@ -207,7 +241,7 @@ def _assert_is(*, value: Any, expected: Any) -> bool:
return True
def _assert_is_not(*, value: Any, expected: Any) -> bool:
def _assert_is_not(*, value: object, expected: object) -> bool:
if value is None:
return False
@@ -219,19 +253,19 @@ def _assert_is_not(*, value: Any, expected: Any) -> bool:
return True
def _assert_empty(*, value: Any) -> bool:
def _assert_empty(*, value: object) -> bool:
if not value:
return True
return False
def _assert_not_empty(*, value: Any) -> bool:
def _assert_not_empty(*, value: object) -> bool:
if value:
return True
return False
def _assert_equal(*, value: Any, expected: Any) -> bool:
def _assert_equal(*, value: object, expected: object) -> bool:
if value is None:
return False
@@ -240,10 +274,16 @@ def _assert_equal(*, value: Any, expected: Any) -> bool:
# Handle boolean comparison
if isinstance(value, bool):
if not isinstance(expected, (bool, int, str)):
raise ValueError(f"Cannot convert {type(expected)} to bool")
expected = bool(expected)
elif isinstance(value, int):
if not isinstance(expected, (int, float, str)):
raise ValueError(f"Cannot convert {type(expected)} to int")
expected = int(expected)
else:
if not isinstance(expected, (int, float, str)):
raise ValueError(f"Cannot convert {type(expected)} to float")
expected = float(expected)
if value != expected:
@@ -251,7 +291,7 @@ def _assert_equal(*, value: Any, expected: Any) -> bool:
return True
def _assert_not_equal(*, value: Any, expected: Any) -> bool:
def _assert_not_equal(*, value: object, expected: object) -> bool:
if value is None:
return False
@@ -260,10 +300,16 @@ def _assert_not_equal(*, value: Any, expected: Any) -> bool:
# Handle boolean comparison
if isinstance(value, bool):
if not isinstance(expected, (bool, int, str)):
raise ValueError(f"Cannot convert {type(expected)} to bool")
expected = bool(expected)
elif isinstance(value, int):
if not isinstance(expected, (int, float, str)):
raise ValueError(f"Cannot convert {type(expected)} to int")
expected = int(expected)
else:
if not isinstance(expected, (int, float, str)):
raise ValueError(f"Cannot convert {type(expected)} to float")
expected = float(expected)
if value == expected:
@@ -271,7 +317,7 @@ def _assert_not_equal(*, value: Any, expected: Any) -> bool:
return True
def _assert_greater_than(*, value: Any, expected: Any) -> bool:
def _assert_greater_than(*, value: object, expected: object) -> bool:
if value is None:
return False
@@ -279,8 +325,12 @@ def _assert_greater_than(*, value: Any, expected: Any) -> bool:
raise ValueError("Invalid actual value type: number")
if isinstance(value, int):
if not isinstance(expected, (int, float, str)):
raise ValueError(f"Cannot convert {type(expected)} to int")
expected = int(expected)
else:
if not isinstance(expected, (int, float, str)):
raise ValueError(f"Cannot convert {type(expected)} to float")
expected = float(expected)
if value <= expected:
@@ -288,7 +338,7 @@ def _assert_greater_than(*, value: Any, expected: Any) -> bool:
return True
def _assert_less_than(*, value: Any, expected: Any) -> bool:
def _assert_less_than(*, value: object, expected: object) -> bool:
if value is None:
return False
@@ -296,8 +346,12 @@ def _assert_less_than(*, value: Any, expected: Any) -> bool:
raise ValueError("Invalid actual value type: number")
if isinstance(value, int):
if not isinstance(expected, (int, float, str)):
raise ValueError(f"Cannot convert {type(expected)} to int")
expected = int(expected)
else:
if not isinstance(expected, (int, float, str)):
raise ValueError(f"Cannot convert {type(expected)} to float")
expected = float(expected)
if value >= expected:
@@ -305,7 +359,7 @@ def _assert_less_than(*, value: Any, expected: Any) -> bool:
return True
def _assert_greater_than_or_equal(*, value: Any, expected: Any) -> bool:
def _assert_greater_than_or_equal(*, value: object, expected: object) -> bool:
if value is None:
return False
@@ -313,8 +367,12 @@ def _assert_greater_than_or_equal(*, value: Any, expected: Any) -> bool:
raise ValueError("Invalid actual value type: number")
if isinstance(value, int):
if not isinstance(expected, (int, float, str)):
raise ValueError(f"Cannot convert {type(expected)} to int")
expected = int(expected)
else:
if not isinstance(expected, (int, float, str)):
raise ValueError(f"Cannot convert {type(expected)} to float")
expected = float(expected)
if value < expected:
@@ -322,7 +380,7 @@ def _assert_greater_than_or_equal(*, value: Any, expected: Any) -> bool:
return True
def _assert_less_than_or_equal(*, value: Any, expected: Any) -> bool:
def _assert_less_than_or_equal(*, value: object, expected: object) -> bool:
if value is None:
return False
@@ -330,8 +388,12 @@ def _assert_less_than_or_equal(*, value: Any, expected: Any) -> bool:
raise ValueError("Invalid actual value type: number")
if isinstance(value, int):
if not isinstance(expected, (int, float, str)):
raise ValueError(f"Cannot convert {type(expected)} to int")
expected = int(expected)
else:
if not isinstance(expected, (int, float, str)):
raise ValueError(f"Cannot convert {type(expected)} to float")
expected = float(expected)
if value > expected:
@@ -339,19 +401,19 @@ def _assert_less_than_or_equal(*, value: Any, expected: Any) -> bool:
return True
def _assert_null(*, value: Any) -> bool:
def _assert_null(*, value: object) -> bool:
if value is None:
return True
return False
def _assert_not_null(*, value: Any) -> bool:
def _assert_not_null(*, value: object) -> bool:
if value is not None:
return True
return False
def _assert_in(*, value: Any, expected: Any) -> bool:
def _assert_in(*, value: object, expected: object) -> bool:
if not value:
return False
@@ -363,7 +425,7 @@ def _assert_in(*, value: Any, expected: Any) -> bool:
return True
def _assert_not_in(*, value: Any, expected: Any) -> bool:
def _assert_not_in(*, value: object, expected: object) -> bool:
if not value:
return True
@@ -375,20 +437,33 @@ def _assert_not_in(*, value: Any, expected: Any) -> bool:
return True
def _assert_all_of(*, value: Any, expected: Sequence[str]) -> bool:
def _assert_all_of(*, value: object, expected: Sequence[str]) -> bool:
if not value:
return False
if not all(item in value for item in expected):
# Ensure value is a container that supports 'in' operator
if not isinstance(value, (list, tuple, set, str)):
return False
return True
return all(item in value for item in expected)
def _assert_exists(*, value: Any) -> bool:
def _assert_all_of_bool(*, value: object, expected: Sequence[bool]) -> bool:
if not value:
return False
# Ensure value is a container that supports 'in' operator
if not isinstance(value, (list, tuple, set)):
return False
return all(item in value for item in expected)
def _assert_exists(*, value: object) -> bool:
return value is not None
def _assert_not_exists(*, value: Any) -> bool:
def _assert_not_exists(*, value: object) -> bool:
return value is None
@@ -398,7 +473,7 @@ def _process_sub_conditions(
operator: Literal["and", "or"],
) -> bool:
files = variable.value
group_results = []
group_results: list[bool] = []
for condition in sub_conditions:
key = FileAttribute(condition.key)
values = [file_manager.get_attr(file=file, attr=key) for file in files]
@@ -409,14 +484,14 @@ def _process_sub_conditions(
if expected_value and not expected_value.startswith("."):
expected_value = "." + expected_value
normalized_values = []
normalized_values: list[object] = []
for value in values:
if value and isinstance(value, str):
if not value.startswith("."):
value = "." + value
normalized_values.append(value)
values = normalized_values
sub_group_results = [
sub_group_results: list[bool] = [
_evaluate_condition(
value=value,
operator=condition.comparison_operator,

View File

@@ -1,130 +0,0 @@
import re
from collections.abc import Mapping, Sequence
from typing import Any
from core.workflow.entities.variable_entities import VariableSelector
REGEX = re.compile(r"\{\{(#[a-zA-Z0-9_]{1,50}(\.[a-zA-Z_][a-zA-Z0-9_]{0,29}){1,10}#)\}\}")
SELECTOR_PATTERN = re.compile(r"\{\{(#[a-zA-Z0-9_]{1,50}(?:\.[a-zA-Z_][a-zA-Z0-9_]{0,29}){1,10}#)\}\}")
def extract_selectors_from_template(template: str, /) -> Sequence[VariableSelector]:
parts = SELECTOR_PATTERN.split(template)
selectors = []
for part in filter(lambda x: x, parts):
if "." in part and part[0] == "#" and part[-1] == "#":
selectors.append(VariableSelector(variable=f"{part}", value_selector=part[1:-1].split(".")))
return selectors
class VariableTemplateParser:
"""
!NOTE: Consider to use the new `segments` module instead of this class.
A class for parsing and manipulating template variables in a string.
Rules:
1. Template variables must be enclosed in `{{}}`.
2. The template variable Key can only be: #node_id.var1.var2#.
3. The template variable Key cannot contain new lines or spaces, and must comply with rule 2.
Example usage:
template = "Hello, {{#node_id.query.name#}}! Your age is {{#node_id.query.age#}}."
parser = VariableTemplateParser(template)
# Extract template variable keys
variable_keys = parser.extract()
print(variable_keys)
# Output: ['#node_id.query.name#', '#node_id.query.age#']
# Extract variable selectors
variable_selectors = parser.extract_variable_selectors()
print(variable_selectors)
# Output: [VariableSelector(variable='#node_id.query.name#', value_selector=['node_id', 'query', 'name']),
# VariableSelector(variable='#node_id.query.age#', value_selector=['node_id', 'query', 'age'])]
# Format the template string
inputs = {'#node_id.query.name#': 'John', '#node_id.query.age#': 25}}
formatted_string = parser.format(inputs)
print(formatted_string)
# Output: "Hello, John! Your age is 25."
"""
def __init__(self, template: str):
self.template = template
self.variable_keys = self.extract()
def extract(self):
"""
Extracts all the template variable keys from the template string.
Returns:
A list of template variable keys.
"""
# Regular expression to match the template rules
matches = re.findall(REGEX, self.template)
first_group_matches = [match[0] for match in matches]
return list(set(first_group_matches))
def extract_variable_selectors(self) -> list[VariableSelector]:
"""
Extracts the variable selectors from the template variable keys.
Returns:
A list of VariableSelector objects representing the variable selectors.
"""
variable_selectors = []
for variable_key in self.variable_keys:
remove_hash = variable_key.replace("#", "")
split_result = remove_hash.split(".")
if len(split_result) < 2:
continue
variable_selectors.append(VariableSelector(variable=variable_key, value_selector=split_result))
return variable_selectors
def format(self, inputs: Mapping[str, Any]) -> str:
"""
Formats the template string by replacing the template variables with their corresponding values.
Args:
inputs: A dictionary containing the values for the template variables.
Returns:
The formatted string with template variables replaced by their values.
"""
def replacer(match):
key = match.group(1)
value = inputs.get(key, match.group(0)) # return original matched string if key not found
if value is None:
value = ""
# convert the value to string
if isinstance(value, list | dict | bool | int | float):
value = str(value)
# remove template variables if required
return VariableTemplateParser.remove_template_variables(value)
prompt = re.sub(REGEX, replacer, self.template)
return re.sub(r"<\|.*?\|>", "", prompt)
@classmethod
def remove_template_variables(cls, text: str):
"""
Removes the template variables from the given text.
Args:
text: The text from which to remove the template variables.
Returns:
The text with template variables removed.
"""
return re.sub(REGEX, r"{\1}", text)