feat: Human Input Node (#32060)

The frontend and backend implementation for the human input node. Co-authored-by: twwu <twwu@dify.ai> Co-authored-by: JzoNg <jzongcode@gmail.com> Co-authored-by: yyh <92089059+lyzno1@users.noreply.github.com> Co-authored-by: zhsama <torvalds@linux.do>
2026-02-13 07:01:23 -05:00 · 2026-02-09 14:57:23 +08:00
parent 56e3a55023
commit a1fc280102
474 changed files with 32667 additions and 2050 deletions
--- a/api/core/workflow/graph_engine/_engine_utils.py
+++ b/api/core/workflow/graph_engine/_engine_utils.py
@@ -0,0 +1,15 @@
+import time
+
+
+def get_timestamp() -> float:
+    """Retrieve a timestamp as a float point numer representing the number of seconds
+    since the Unix epoch.
+
+    This function is primarily used to measure the execution time of the workflow engine.
+    Since workflow execution may be paused and resumed on a different machine,
+    `time.perf_counter` cannot be used as it is inconsistent across machines.
+
+    To address this, the function uses the wall clock as the time source.
+    However, it assumes that the clocks of all servers are properly synchronized.
+    """
+    return round(time.time())
--- a/api/core/workflow/graph_engine/config.py
+++ b/api/core/workflow/graph_engine/config.py
@@ -2,12 +2,14 @@
 GraphEngine configuration models.
 """

-from pydantic import BaseModel
+from pydantic import BaseModel, ConfigDict


 class GraphEngineConfig(BaseModel):
    """Configuration for GraphEngine worker pool scaling."""

+    model_config = ConfigDict(frozen=True)
+
    min_workers: int = 1
    max_workers: int = 5
    scale_up_threshold: int = 3
--- a/api/core/workflow/graph_engine/domain/graph_execution.py
+++ b/api/core/workflow/graph_engine/domain/graph_execution.py
@@ -10,6 +10,7 @@ from pydantic import BaseModel, Field

 from core.workflow.entities.pause_reason import PauseReason
 from core.workflow.enums import NodeState
+from core.workflow.runtime.graph_runtime_state import GraphExecutionProtocol

 from .node_execution import NodeExecution

@@ -236,3 +237,6 @@ class GraphExecution:
    def record_node_failure(self) -> None:
        """Increment the count of node failures encountered during execution."""
        self.exceptions_count += 1
+
+
+_: GraphExecutionProtocol = GraphExecution(workflow_id="")
--- a/api/core/workflow/graph_engine/event_management/event_handlers.py
+++ b/api/core/workflow/graph_engine/event_management/event_handlers.py
@@ -192,9 +192,13 @@ class EventHandler:
            self._event_collector.collect(edge_event)

        # Enqueue ready nodes
-        for node_id in ready_nodes:
-            self._state_manager.enqueue_node(node_id)
-            self._state_manager.start_execution(node_id)
+        if self._graph_execution.is_paused:
+            for node_id in ready_nodes:
+                self._graph_runtime_state.register_deferred_node(node_id)
+        else:
+            for node_id in ready_nodes:
+                self._state_manager.enqueue_node(node_id)
+                self._state_manager.start_execution(node_id)

        # Update execution tracking
        self._state_manager.finish_execution(event.node_id)
--- a/api/core/workflow/graph_engine/graph_engine.py
+++ b/api/core/workflow/graph_engine/graph_engine.py
@@ -14,6 +14,7 @@ from collections.abc import Generator
 from typing import TYPE_CHECKING, cast, final

 from core.workflow.context import capture_current_context
+from core.workflow.entities.workflow_start_reason import WorkflowStartReason
 from core.workflow.enums import NodeExecutionType
 from core.workflow.graph import Graph
 from core.workflow.graph_events import (
@@ -55,6 +56,9 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)


+_DEFAULT_CONFIG = GraphEngineConfig()
+
+
@final
 class GraphEngine:
    """
@@ -70,7 +74,7 @@ class GraphEngine:
        graph: Graph,
        graph_runtime_state: GraphRuntimeState,
        command_channel: CommandChannel,
-        config: GraphEngineConfig,
+        config: GraphEngineConfig = _DEFAULT_CONFIG,
    ) -> None:
        """Initialize the graph engine with all subsystems and dependencies."""
        # stop event
@@ -234,7 +238,9 @@ class GraphEngine:
                self._graph_execution.paused = False
                self._graph_execution.pause_reasons = []

-            start_event = GraphRunStartedEvent()
+            start_event = GraphRunStartedEvent(
+                reason=WorkflowStartReason.RESUMPTION if is_resume else WorkflowStartReason.INITIAL,
+            )
            self._event_manager.notify_layers(start_event)
            yield start_event

@@ -303,15 +309,17 @@ class GraphEngine:
        for layer in self._layers:
            try:
                layer.on_graph_start()
-            except Exception as e:
-                logger.warning("Layer %s failed on_graph_start: %s", layer.__class__.__name__, e)
+            except Exception:
+                logger.exception("Layer %s failed on_graph_start", layer.__class__.__name__)

    def _start_execution(self, *, resume: bool = False) -> None:
        """Start execution subsystems."""
        self._stop_event.clear()
        paused_nodes: list[str] = []
+        deferred_nodes: list[str] = []
        if resume:
            paused_nodes = self._graph_runtime_state.consume_paused_nodes()
+            deferred_nodes = self._graph_runtime_state.consume_deferred_nodes()

        # Start worker pool (it calculates initial workers internally)
        self._worker_pool.start()
@@ -327,7 +335,11 @@ class GraphEngine:
            self._state_manager.enqueue_node(root_node.id)
            self._state_manager.start_execution(root_node.id)
        else:
-            for node_id in paused_nodes:
+            seen_nodes: set[str] = set()
+            for node_id in paused_nodes + deferred_nodes:
+                if node_id in seen_nodes:
+                    continue
+                seen_nodes.add(node_id)
                self._state_manager.enqueue_node(node_id)
                self._state_manager.start_execution(node_id)

@@ -345,8 +357,8 @@ class GraphEngine:
        for layer in self._layers:
            try:
                layer.on_graph_end(self._graph_execution.error)
-            except Exception as e:
-                logger.warning("Layer %s failed on_graph_end: %s", layer.__class__.__name__, e)
+            except Exception:
+                logger.exception("Layer %s failed on_graph_end", layer.__class__.__name__)

    # Public property accessors for attributes that need external access
    @property
--- a/api/core/workflow/graph_engine/graph_state_manager.py
+++ b/api/core/workflow/graph_engine/graph_state_manager.py
@@ -224,6 +224,8 @@ class GraphStateManager:
        Returns:
            Number of executing nodes
        """
+        # This count is a best-effort snapshot and can change concurrently.
+        # Only use it for pause-drain checks where scheduling is already frozen.
        with self._lock:
            return len(self._executing_nodes)

--- a/api/core/workflow/graph_engine/orchestration/dispatcher.py
+++ b/api/core/workflow/graph_engine/orchestration/dispatcher.py
@@ -83,12 +83,12 @@ class Dispatcher:
        """Main dispatcher loop."""
        try:
            self._process_commands()
+            paused = False
            while not self._stop_event.is_set():
-                if (
-                    self._execution_coordinator.aborted
-                    or self._execution_coordinator.paused
-                    or self._execution_coordinator.execution_complete
-                ):
+                if self._execution_coordinator.aborted or self._execution_coordinator.execution_complete:
+                    break
+                if self._execution_coordinator.paused:
+                    paused = True
                    break

                self._execution_coordinator.check_scaling()
@@ -101,13 +101,10 @@ class Dispatcher:
                    time.sleep(0.1)

            self._process_commands()
-            while True:
-                try:
-                    event = self._event_queue.get(block=False)
-                    self._event_handler.dispatch(event)
-                    self._event_queue.task_done()
-                except queue.Empty:
-                    break
+            if paused:
+                self._drain_events_until_idle()
+            else:
+                self._drain_event_queue()

        except Exception as e:
            logger.exception("Dispatcher error")
@@ -122,3 +119,24 @@ class Dispatcher:
    def _process_commands(self, event: GraphNodeEventBase | None = None):
        if event is None or isinstance(event, self._COMMAND_TRIGGER_EVENTS):
            self._execution_coordinator.process_commands()
+
+    def _drain_event_queue(self) -> None:
+        while True:
+            try:
+                event = self._event_queue.get(block=False)
+                self._event_handler.dispatch(event)
+                self._event_queue.task_done()
+            except queue.Empty:
+                break
+
+    def _drain_events_until_idle(self) -> None:
+        while not self._stop_event.is_set():
+            try:
+                event = self._event_queue.get(timeout=0.1)
+                self._event_handler.dispatch(event)
+                self._event_queue.task_done()
+                self._process_commands(event)
+            except queue.Empty:
+                if not self._execution_coordinator.has_executing_nodes():
+                    break
+        self._drain_event_queue()
--- a/api/core/workflow/graph_engine/orchestration/execution_coordinator.py
+++ b/api/core/workflow/graph_engine/orchestration/execution_coordinator.py
@@ -94,3 +94,11 @@ class ExecutionCoordinator:

        self._worker_pool.stop()
        self._state_manager.clear_executing()
+
+    def has_executing_nodes(self) -> bool:
+        """Return True if any nodes are currently marked as executing."""
+        # This check is only safe once execution has already paused.
+        # Before pause, executing state can change concurrently, which makes the result unreliable.
+        if not self._graph_execution.is_paused:
+            raise AssertionError("has_executing_nodes should only be called after execution is paused")
+        return self._state_manager.get_executing_count() > 0