chore(api): upgrade graphon to v0.3.0 (#35469)

Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: WH-2099 <wh2099@pm.me>
2026-05-10 15:01:36 -04:00 · 2026-05-09 15:30:03 +08:00
parent f3eb3ab4dd
commit 19476109da
80 changed files with 2526 additions and 673 deletions
--- a/api/core/app/workflow/layers/llm_quota.py
+++ b/api/core/app/workflow/layers/llm_quota.py
@@ -1,36 +1,48 @@
 """
 LLM quota deduction layer for GraphEngine.

-This layer centralizes model-quota deduction outside node implementations.
+This layer centralizes model-quota handling outside node implementations.
+
+Graphon LLM-backed nodes expose provider/model identity through public node
+configuration and, after execution, through ``node_run_result.inputs``. Resolve
+quota billing from that public identity instead of depending on
+``ModelInstance`` reconstruction inside the workflow layer. Missing identity on
+quota-tracked nodes is treated as a workflow bug and aborts execution so quota
+handling is never silently skipped.
 """

 import logging
-from typing import TYPE_CHECKING, cast, final, override
+from typing import final, override

-from core.app.entities.app_invoke_entities import DIFY_RUN_CONTEXT_KEY, DifyRunContext
-from core.app.llm import deduct_llm_quota, ensure_llm_quota_available
+from core.app.llm import deduct_llm_quota_for_model, ensure_llm_quota_available_for_model
 from core.errors.error import QuotaExceededError
-from core.model_manager import ModelInstance
-from graphon.enums import BuiltinNodeTypes
+from graphon.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus
 from graphon.graph_engine.entities.commands import AbortCommand, CommandType
 from graphon.graph_engine.layers import GraphEngineLayer
 from graphon.graph_events import GraphEngineEvent, GraphNodeEventBase, NodeRunSucceededEvent
+from graphon.node_events import NodeRunResult
 from graphon.nodes.base.node import Node

-if TYPE_CHECKING:
-    from graphon.nodes.llm.node import LLMNode
-    from graphon.nodes.parameter_extractor.parameter_extractor_node import ParameterExtractorNode
-    from graphon.nodes.question_classifier.question_classifier_node import QuestionClassifierNode
-
 logger = logging.getLogger(__name__)
+_QUOTA_NODE_TYPES = frozenset(
+    [
+        BuiltinNodeTypes.LLM,
+        BuiltinNodeTypes.PARAMETER_EXTRACTOR,
+        BuiltinNodeTypes.QUESTION_CLASSIFIER,
+    ]
+)


@final
 class LLMQuotaLayer(GraphEngineLayer):
-    """Graph layer that applies LLM quota deduction after node execution."""
+    """Graph layer that applies tenant-scoped quota checks to LLM-backed nodes."""

-    def __init__(self) -> None:
+    tenant_id: str
+    _abort_sent: bool
+
+    def __init__(self, tenant_id: str) -> None:
        super().__init__()
+        self.tenant_id = tenant_id
        self._abort_sent = False

    @override
@@ -50,33 +62,49 @@ class LLMQuotaLayer(GraphEngineLayer):
        if self._abort_sent:
            return

-        model_instance = self._extract_model_instance(node)
-        if model_instance is None:
+        if not self._supports_quota(node):
            return

+        model_identity = self._extract_model_identity_from_node(node)
+        if model_identity is None:
+            reason = "LLM quota check requires public node model identity before execution."
+            self._abort_before_node_run(node=node, reason=reason, error_type="LLMQuotaIdentityError")
+            logger.error("LLM quota handling aborted, node_id=%s, reason=%s", node.id, reason)
+            return
+
+        provider, model_name = model_identity
        try:
-            ensure_llm_quota_available(model_instance=model_instance)
+            ensure_llm_quota_available_for_model(
+                tenant_id=self.tenant_id,
+                provider=provider,
+                model=model_name,
+            )
        except QuotaExceededError as exc:
-            self._set_stop_event(node)
-            self._send_abort_command(reason=str(exc))
+            self._abort_before_node_run(node=node, reason=str(exc), error_type=QuotaExceededError.__name__)
            logger.warning("LLM quota check failed, node_id=%s, error=%s", node.id, exc)

    @override
    def on_node_run_end(
        self, node: Node, error: Exception | None, result_event: GraphNodeEventBase | None = None
    ) -> None:
-        if error is not None or not isinstance(result_event, NodeRunSucceededEvent):
+        if error is not None or not isinstance(result_event, NodeRunSucceededEvent) or not self._supports_quota(node):
            return

-        model_instance = self._extract_model_instance(node)
-        if model_instance is None:
+        model_identity = self._extract_model_identity_from_result_event(result_event)
+        if model_identity is None:
+            self._abort_for_missing_model_identity(
+                node=node,
+                reason="LLM quota deduction requires model identity in the node result event.",
+            )
            return

+        provider, model_name = model_identity
+
        try:
-            dify_ctx = DifyRunContext.model_validate(node.require_run_context_value(DIFY_RUN_CONTEXT_KEY))
-            deduct_llm_quota(
-                tenant_id=dify_ctx.tenant_id,
-                model_instance=model_instance,
+            deduct_llm_quota_for_model(
+                tenant_id=self.tenant_id,
+                provider=provider,
+                model=model_name,
                usage=result_event.node_run_result.llm_usage,
            )
        except QuotaExceededError as exc:
@@ -92,6 +120,27 @@ class LLMQuotaLayer(GraphEngineLayer):
        if stop_event is not None:
            stop_event.set()

+    def _abort_before_node_run(self, *, node: Node, reason: str, error_type: str) -> None:
+        self._set_stop_event(node)
+        node.node_data.error_strategy = None
+        node.node_data.retry_config.retry_enabled = False
+
+        def quota_aborted_run() -> NodeRunResult:
+            return NodeRunResult(
+                status=WorkflowNodeExecutionStatus.FAILED,
+                error=reason,
+                error_type=error_type,
+            )
+
+        # TODO: Push Graphon to expose a public pre-run failure/skip hook, then replace this private _run override.
+        node._run = quota_aborted_run  # type: ignore[method-assign]
+        self._send_abort_command(reason=reason)
+
+    def _abort_for_missing_model_identity(self, *, node: Node, reason: str) -> None:
+        self._set_stop_event(node)
+        self._send_abort_command(reason=reason)
+        logger.error("LLM quota handling aborted, node_id=%s, reason=%s", node.id, reason)
+
    def _send_abort_command(self, *, reason: str) -> None:
        if not self.command_channel or self._abort_sent:
            return
@@ -108,29 +157,38 @@ class LLMQuotaLayer(GraphEngineLayer):
            logger.exception("Failed to send quota abort command")

    @staticmethod
-    def _extract_model_instance(node: Node) -> ModelInstance | None:
-        try:
-            match node.node_type:
-                case BuiltinNodeTypes.LLM:
-                    model_instance = cast("LLMNode", node).model_instance
-                case BuiltinNodeTypes.PARAMETER_EXTRACTOR:
-                    model_instance = cast("ParameterExtractorNode", node).model_instance
-                case BuiltinNodeTypes.QUESTION_CLASSIFIER:
-                    model_instance = cast("QuestionClassifierNode", node).model_instance
-                case _:
-                    return None
-        except AttributeError:
+    def _supports_quota(node: Node) -> bool:
+        return node.node_type in _QUOTA_NODE_TYPES
+
+    @staticmethod
+    def _extract_model_identity_from_result_event(result_event: NodeRunSucceededEvent) -> tuple[str, str] | None:
+        provider = result_event.node_run_result.inputs.get("model_provider")
+        model_name = result_event.node_run_result.inputs.get("model_name")
+        if isinstance(provider, str) and provider and isinstance(model_name, str) and model_name:
+            return provider, model_name
+        return None
+
+    @staticmethod
+    def _extract_model_identity_from_node(node: Node) -> tuple[str, str] | None:
+        node_data = getattr(node, "node_data", None)
+        if node_data is None:
+            node_data = getattr(node, "data", None)
+
+        model_config = getattr(node_data, "model", None)
+        if model_config is None:
            logger.warning(
-                "LLMQuotaLayer skipped quota deduction because node does not expose a model instance, node_id=%s",
+                "LLMQuotaLayer skipped quota handling because node model config is missing, node_id=%s",
                node.id,
            )
            return None

-        if isinstance(model_instance, ModelInstance):
-            return model_instance
-
-        raw_model_instance = getattr(model_instance, "_model_instance", None)
-        if isinstance(raw_model_instance, ModelInstance):
-            return raw_model_instance
+        provider = getattr(model_config, "provider", None)
+        model_name = getattr(model_config, "name", None)
+        if isinstance(provider, str) and provider and isinstance(model_name, str) and model_name:
+            return provider, model_name

+        logger.warning(
+            "LLMQuotaLayer skipped quota handling because node model identity is invalid, node_id=%s",
+            node.id,
+        )
        return None