mirror of
https://github.com/langgenius/dify.git
synced 2026-05-10 15:01:36 -04:00
chore(api): upgrade graphon to v0.3.0 (#35469)
Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: WH-2099 <wh2099@pm.me>
This commit is contained in:
@@ -1,36 +1,48 @@
|
||||
"""
|
||||
LLM quota deduction layer for GraphEngine.
|
||||
|
||||
This layer centralizes model-quota deduction outside node implementations.
|
||||
This layer centralizes model-quota handling outside node implementations.
|
||||
|
||||
Graphon LLM-backed nodes expose provider/model identity through public node
|
||||
configuration and, after execution, through ``node_run_result.inputs``. Resolve
|
||||
quota billing from that public identity instead of depending on
|
||||
``ModelInstance`` reconstruction inside the workflow layer. Missing identity on
|
||||
quota-tracked nodes is treated as a workflow bug and aborts execution so quota
|
||||
handling is never silently skipped.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, cast, final, override
|
||||
from typing import final, override
|
||||
|
||||
from core.app.entities.app_invoke_entities import DIFY_RUN_CONTEXT_KEY, DifyRunContext
|
||||
from core.app.llm import deduct_llm_quota, ensure_llm_quota_available
|
||||
from core.app.llm import deduct_llm_quota_for_model, ensure_llm_quota_available_for_model
|
||||
from core.errors.error import QuotaExceededError
|
||||
from core.model_manager import ModelInstance
|
||||
from graphon.enums import BuiltinNodeTypes
|
||||
from graphon.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus
|
||||
from graphon.graph_engine.entities.commands import AbortCommand, CommandType
|
||||
from graphon.graph_engine.layers import GraphEngineLayer
|
||||
from graphon.graph_events import GraphEngineEvent, GraphNodeEventBase, NodeRunSucceededEvent
|
||||
from graphon.node_events import NodeRunResult
|
||||
from graphon.nodes.base.node import Node
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from graphon.nodes.llm.node import LLMNode
|
||||
from graphon.nodes.parameter_extractor.parameter_extractor_node import ParameterExtractorNode
|
||||
from graphon.nodes.question_classifier.question_classifier_node import QuestionClassifierNode
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
_QUOTA_NODE_TYPES = frozenset(
|
||||
[
|
||||
BuiltinNodeTypes.LLM,
|
||||
BuiltinNodeTypes.PARAMETER_EXTRACTOR,
|
||||
BuiltinNodeTypes.QUESTION_CLASSIFIER,
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@final
|
||||
class LLMQuotaLayer(GraphEngineLayer):
|
||||
"""Graph layer that applies LLM quota deduction after node execution."""
|
||||
"""Graph layer that applies tenant-scoped quota checks to LLM-backed nodes."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
tenant_id: str
|
||||
_abort_sent: bool
|
||||
|
||||
def __init__(self, tenant_id: str) -> None:
|
||||
super().__init__()
|
||||
self.tenant_id = tenant_id
|
||||
self._abort_sent = False
|
||||
|
||||
@override
|
||||
@@ -50,33 +62,49 @@ class LLMQuotaLayer(GraphEngineLayer):
|
||||
if self._abort_sent:
|
||||
return
|
||||
|
||||
model_instance = self._extract_model_instance(node)
|
||||
if model_instance is None:
|
||||
if not self._supports_quota(node):
|
||||
return
|
||||
|
||||
model_identity = self._extract_model_identity_from_node(node)
|
||||
if model_identity is None:
|
||||
reason = "LLM quota check requires public node model identity before execution."
|
||||
self._abort_before_node_run(node=node, reason=reason, error_type="LLMQuotaIdentityError")
|
||||
logger.error("LLM quota handling aborted, node_id=%s, reason=%s", node.id, reason)
|
||||
return
|
||||
|
||||
provider, model_name = model_identity
|
||||
try:
|
||||
ensure_llm_quota_available(model_instance=model_instance)
|
||||
ensure_llm_quota_available_for_model(
|
||||
tenant_id=self.tenant_id,
|
||||
provider=provider,
|
||||
model=model_name,
|
||||
)
|
||||
except QuotaExceededError as exc:
|
||||
self._set_stop_event(node)
|
||||
self._send_abort_command(reason=str(exc))
|
||||
self._abort_before_node_run(node=node, reason=str(exc), error_type=QuotaExceededError.__name__)
|
||||
logger.warning("LLM quota check failed, node_id=%s, error=%s", node.id, exc)
|
||||
|
||||
@override
|
||||
def on_node_run_end(
|
||||
self, node: Node, error: Exception | None, result_event: GraphNodeEventBase | None = None
|
||||
) -> None:
|
||||
if error is not None or not isinstance(result_event, NodeRunSucceededEvent):
|
||||
if error is not None or not isinstance(result_event, NodeRunSucceededEvent) or not self._supports_quota(node):
|
||||
return
|
||||
|
||||
model_instance = self._extract_model_instance(node)
|
||||
if model_instance is None:
|
||||
model_identity = self._extract_model_identity_from_result_event(result_event)
|
||||
if model_identity is None:
|
||||
self._abort_for_missing_model_identity(
|
||||
node=node,
|
||||
reason="LLM quota deduction requires model identity in the node result event.",
|
||||
)
|
||||
return
|
||||
|
||||
provider, model_name = model_identity
|
||||
|
||||
try:
|
||||
dify_ctx = DifyRunContext.model_validate(node.require_run_context_value(DIFY_RUN_CONTEXT_KEY))
|
||||
deduct_llm_quota(
|
||||
tenant_id=dify_ctx.tenant_id,
|
||||
model_instance=model_instance,
|
||||
deduct_llm_quota_for_model(
|
||||
tenant_id=self.tenant_id,
|
||||
provider=provider,
|
||||
model=model_name,
|
||||
usage=result_event.node_run_result.llm_usage,
|
||||
)
|
||||
except QuotaExceededError as exc:
|
||||
@@ -92,6 +120,27 @@ class LLMQuotaLayer(GraphEngineLayer):
|
||||
if stop_event is not None:
|
||||
stop_event.set()
|
||||
|
||||
def _abort_before_node_run(self, *, node: Node, reason: str, error_type: str) -> None:
|
||||
self._set_stop_event(node)
|
||||
node.node_data.error_strategy = None
|
||||
node.node_data.retry_config.retry_enabled = False
|
||||
|
||||
def quota_aborted_run() -> NodeRunResult:
|
||||
return NodeRunResult(
|
||||
status=WorkflowNodeExecutionStatus.FAILED,
|
||||
error=reason,
|
||||
error_type=error_type,
|
||||
)
|
||||
|
||||
# TODO: Push Graphon to expose a public pre-run failure/skip hook, then replace this private _run override.
|
||||
node._run = quota_aborted_run # type: ignore[method-assign]
|
||||
self._send_abort_command(reason=reason)
|
||||
|
||||
def _abort_for_missing_model_identity(self, *, node: Node, reason: str) -> None:
|
||||
self._set_stop_event(node)
|
||||
self._send_abort_command(reason=reason)
|
||||
logger.error("LLM quota handling aborted, node_id=%s, reason=%s", node.id, reason)
|
||||
|
||||
def _send_abort_command(self, *, reason: str) -> None:
|
||||
if not self.command_channel or self._abort_sent:
|
||||
return
|
||||
@@ -108,29 +157,38 @@ class LLMQuotaLayer(GraphEngineLayer):
|
||||
logger.exception("Failed to send quota abort command")
|
||||
|
||||
@staticmethod
|
||||
def _extract_model_instance(node: Node) -> ModelInstance | None:
|
||||
try:
|
||||
match node.node_type:
|
||||
case BuiltinNodeTypes.LLM:
|
||||
model_instance = cast("LLMNode", node).model_instance
|
||||
case BuiltinNodeTypes.PARAMETER_EXTRACTOR:
|
||||
model_instance = cast("ParameterExtractorNode", node).model_instance
|
||||
case BuiltinNodeTypes.QUESTION_CLASSIFIER:
|
||||
model_instance = cast("QuestionClassifierNode", node).model_instance
|
||||
case _:
|
||||
return None
|
||||
except AttributeError:
|
||||
def _supports_quota(node: Node) -> bool:
|
||||
return node.node_type in _QUOTA_NODE_TYPES
|
||||
|
||||
@staticmethod
|
||||
def _extract_model_identity_from_result_event(result_event: NodeRunSucceededEvent) -> tuple[str, str] | None:
|
||||
provider = result_event.node_run_result.inputs.get("model_provider")
|
||||
model_name = result_event.node_run_result.inputs.get("model_name")
|
||||
if isinstance(provider, str) and provider and isinstance(model_name, str) and model_name:
|
||||
return provider, model_name
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _extract_model_identity_from_node(node: Node) -> tuple[str, str] | None:
|
||||
node_data = getattr(node, "node_data", None)
|
||||
if node_data is None:
|
||||
node_data = getattr(node, "data", None)
|
||||
|
||||
model_config = getattr(node_data, "model", None)
|
||||
if model_config is None:
|
||||
logger.warning(
|
||||
"LLMQuotaLayer skipped quota deduction because node does not expose a model instance, node_id=%s",
|
||||
"LLMQuotaLayer skipped quota handling because node model config is missing, node_id=%s",
|
||||
node.id,
|
||||
)
|
||||
return None
|
||||
|
||||
if isinstance(model_instance, ModelInstance):
|
||||
return model_instance
|
||||
|
||||
raw_model_instance = getattr(model_instance, "_model_instance", None)
|
||||
if isinstance(raw_model_instance, ModelInstance):
|
||||
return raw_model_instance
|
||||
provider = getattr(model_config, "provider", None)
|
||||
model_name = getattr(model_config, "name", None)
|
||||
if isinstance(provider, str) and provider and isinstance(model_name, str) and model_name:
|
||||
return provider, model_name
|
||||
|
||||
logger.warning(
|
||||
"LLMQuotaLayer skipped quota handling because node model identity is invalid, node_id=%s",
|
||||
node.id,
|
||||
)
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user