chore(api): upgrade graphon to v0.3.0 (#35469)

Signed-off-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: WH-2099 <wh2099@pm.me>
This commit is contained in:
-LAN-
2026-05-09 15:30:03 +08:00
committed by GitHub
parent f3eb3ab4dd
commit 19476109da
80 changed files with 2526 additions and 673 deletions

View File

@@ -1,36 +1,48 @@
"""
LLM quota deduction layer for GraphEngine.
This layer centralizes model-quota deduction outside node implementations.
This layer centralizes model-quota handling outside node implementations.
Graphon LLM-backed nodes expose provider/model identity through public node
configuration and, after execution, through ``node_run_result.inputs``. Resolve
quota billing from that public identity instead of depending on
``ModelInstance`` reconstruction inside the workflow layer. Missing identity on
quota-tracked nodes is treated as a workflow bug and aborts execution so quota
handling is never silently skipped.
"""
import logging
from typing import TYPE_CHECKING, cast, final, override
from typing import final, override
from core.app.entities.app_invoke_entities import DIFY_RUN_CONTEXT_KEY, DifyRunContext
from core.app.llm import deduct_llm_quota, ensure_llm_quota_available
from core.app.llm import deduct_llm_quota_for_model, ensure_llm_quota_available_for_model
from core.errors.error import QuotaExceededError
from core.model_manager import ModelInstance
from graphon.enums import BuiltinNodeTypes
from graphon.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus
from graphon.graph_engine.entities.commands import AbortCommand, CommandType
from graphon.graph_engine.layers import GraphEngineLayer
from graphon.graph_events import GraphEngineEvent, GraphNodeEventBase, NodeRunSucceededEvent
from graphon.node_events import NodeRunResult
from graphon.nodes.base.node import Node
if TYPE_CHECKING:
from graphon.nodes.llm.node import LLMNode
from graphon.nodes.parameter_extractor.parameter_extractor_node import ParameterExtractorNode
from graphon.nodes.question_classifier.question_classifier_node import QuestionClassifierNode
logger = logging.getLogger(__name__)
_QUOTA_NODE_TYPES = frozenset(
[
BuiltinNodeTypes.LLM,
BuiltinNodeTypes.PARAMETER_EXTRACTOR,
BuiltinNodeTypes.QUESTION_CLASSIFIER,
]
)
@final
class LLMQuotaLayer(GraphEngineLayer):
"""Graph layer that applies LLM quota deduction after node execution."""
"""Graph layer that applies tenant-scoped quota checks to LLM-backed nodes."""
def __init__(self) -> None:
tenant_id: str
_abort_sent: bool
def __init__(self, tenant_id: str) -> None:
super().__init__()
self.tenant_id = tenant_id
self._abort_sent = False
@override
@@ -50,33 +62,49 @@ class LLMQuotaLayer(GraphEngineLayer):
if self._abort_sent:
return
model_instance = self._extract_model_instance(node)
if model_instance is None:
if not self._supports_quota(node):
return
model_identity = self._extract_model_identity_from_node(node)
if model_identity is None:
reason = "LLM quota check requires public node model identity before execution."
self._abort_before_node_run(node=node, reason=reason, error_type="LLMQuotaIdentityError")
logger.error("LLM quota handling aborted, node_id=%s, reason=%s", node.id, reason)
return
provider, model_name = model_identity
try:
ensure_llm_quota_available(model_instance=model_instance)
ensure_llm_quota_available_for_model(
tenant_id=self.tenant_id,
provider=provider,
model=model_name,
)
except QuotaExceededError as exc:
self._set_stop_event(node)
self._send_abort_command(reason=str(exc))
self._abort_before_node_run(node=node, reason=str(exc), error_type=QuotaExceededError.__name__)
logger.warning("LLM quota check failed, node_id=%s, error=%s", node.id, exc)
@override
def on_node_run_end(
self, node: Node, error: Exception | None, result_event: GraphNodeEventBase | None = None
) -> None:
if error is not None or not isinstance(result_event, NodeRunSucceededEvent):
if error is not None or not isinstance(result_event, NodeRunSucceededEvent) or not self._supports_quota(node):
return
model_instance = self._extract_model_instance(node)
if model_instance is None:
model_identity = self._extract_model_identity_from_result_event(result_event)
if model_identity is None:
self._abort_for_missing_model_identity(
node=node,
reason="LLM quota deduction requires model identity in the node result event.",
)
return
provider, model_name = model_identity
try:
dify_ctx = DifyRunContext.model_validate(node.require_run_context_value(DIFY_RUN_CONTEXT_KEY))
deduct_llm_quota(
tenant_id=dify_ctx.tenant_id,
model_instance=model_instance,
deduct_llm_quota_for_model(
tenant_id=self.tenant_id,
provider=provider,
model=model_name,
usage=result_event.node_run_result.llm_usage,
)
except QuotaExceededError as exc:
@@ -92,6 +120,27 @@ class LLMQuotaLayer(GraphEngineLayer):
if stop_event is not None:
stop_event.set()
def _abort_before_node_run(self, *, node: Node, reason: str, error_type: str) -> None:
self._set_stop_event(node)
node.node_data.error_strategy = None
node.node_data.retry_config.retry_enabled = False
def quota_aborted_run() -> NodeRunResult:
return NodeRunResult(
status=WorkflowNodeExecutionStatus.FAILED,
error=reason,
error_type=error_type,
)
# TODO: Push Graphon to expose a public pre-run failure/skip hook, then replace this private _run override.
node._run = quota_aborted_run # type: ignore[method-assign]
self._send_abort_command(reason=reason)
def _abort_for_missing_model_identity(self, *, node: Node, reason: str) -> None:
self._set_stop_event(node)
self._send_abort_command(reason=reason)
logger.error("LLM quota handling aborted, node_id=%s, reason=%s", node.id, reason)
def _send_abort_command(self, *, reason: str) -> None:
if not self.command_channel or self._abort_sent:
return
@@ -108,29 +157,38 @@ class LLMQuotaLayer(GraphEngineLayer):
logger.exception("Failed to send quota abort command")
@staticmethod
def _extract_model_instance(node: Node) -> ModelInstance | None:
try:
match node.node_type:
case BuiltinNodeTypes.LLM:
model_instance = cast("LLMNode", node).model_instance
case BuiltinNodeTypes.PARAMETER_EXTRACTOR:
model_instance = cast("ParameterExtractorNode", node).model_instance
case BuiltinNodeTypes.QUESTION_CLASSIFIER:
model_instance = cast("QuestionClassifierNode", node).model_instance
case _:
return None
except AttributeError:
def _supports_quota(node: Node) -> bool:
return node.node_type in _QUOTA_NODE_TYPES
@staticmethod
def _extract_model_identity_from_result_event(result_event: NodeRunSucceededEvent) -> tuple[str, str] | None:
provider = result_event.node_run_result.inputs.get("model_provider")
model_name = result_event.node_run_result.inputs.get("model_name")
if isinstance(provider, str) and provider and isinstance(model_name, str) and model_name:
return provider, model_name
return None
@staticmethod
def _extract_model_identity_from_node(node: Node) -> tuple[str, str] | None:
node_data = getattr(node, "node_data", None)
if node_data is None:
node_data = getattr(node, "data", None)
model_config = getattr(node_data, "model", None)
if model_config is None:
logger.warning(
"LLMQuotaLayer skipped quota deduction because node does not expose a model instance, node_id=%s",
"LLMQuotaLayer skipped quota handling because node model config is missing, node_id=%s",
node.id,
)
return None
if isinstance(model_instance, ModelInstance):
return model_instance
raw_model_instance = getattr(model_instance, "_model_instance", None)
if isinstance(raw_model_instance, ModelInstance):
return raw_model_instance
provider = getattr(model_config, "provider", None)
model_name = getattr(model_config, "name", None)
if isinstance(provider, str) and provider and isinstance(model_name, str) and model_name:
return provider, model_name
logger.warning(
"LLMQuotaLayer skipped quota handling because node model identity is invalid, node_id=%s",
node.id,
)
return None