dify/api/core/evaluation/entities/judgment_entity.py

"""Judgment condition entities for evaluation metric assessment.

Key concepts:
  - **value_source**: Where the comparison target comes from.
    - "constant": a literal value supplied by the user (e.g. threshold "0.8").
    - "variable": a named field from the evaluation target's runtime data
      (inputs, actual_output, expected_output). The ``value`` field holds the
      variable key; the actual comparison value is resolved at evaluation time.
  - **condition_type**: Determines operator semantics and type coercion.
    - "string": string operators (contains, is, start with, …).
    - "number": numeric operators (>, <, =, ≠, ≥, ≤).
    - "datetime": temporal operators (before, after).

Typical usage:
    judgment_config = JudgmentConfig(
        logical_operator="and",
        conditions=[
            JudgmentCondition(
                metric_name="faithfulness",
                comparison_operator=">",
                value="0.8",
                condition_type="number",
            ),
            JudgmentCondition(
                metric_name="output",
                comparison_operator="contains",
                value="expected_output",
                value_source="variable",
                condition_type="string",
            ),
        ],
    )
"""

from collections.abc import Sequence
from enum import StrEnum
from typing import Any, Literal

from pydantic import BaseModel, Field


class JudgmentValueSource(StrEnum):
    """Where the comparison target value comes from."""

    CONSTANT = "constant"
    VARIABLE = "variable"


class JudgmentConditionType(StrEnum):
    """Category of the condition, controls operator semantics and type coercion."""

    STRING = "string"
    NUMBER = "number"
    DATETIME = "datetime"


# Supported comparison operators for judgment conditions.
JudgmentComparisonOperator = Literal[
    # string
    "contains",
    "not contains",
    "start with",
    "end with",
    "is",
    "is not",
    "empty",
    "not empty",
    "in",
    "not in",
    # number
    "=",
    "≠",
    ">",
    "<",
    "≥",
    "≤",
    # datetime
    "before",
    "after",
    # universal
    "null",
    "not null",
]


class JudgmentCondition(BaseModel):
    """A single judgment condition that checks one metric value.

    Attributes:
        metric_name: The name of the evaluation metric to check (left side).
            Must match an EvaluationMetric.name in the results.
        comparison_operator: The comparison operator to apply.
        value: The comparison target (right side).
            - When value_source is "constant": the literal threshold/expected value.
            - When value_source is "variable": the variable key name to look up
              from the evaluation target's runtime data.
            For unary operators (empty, null, etc.), this can be None.
        value_source: Where the comparison value comes from.
            "constant" (default) for user-supplied literals,
            "variable" for references to evaluation target data.
        condition_type: Controls type coercion and which operators are valid.
            "string" (default), "number", or "datetime".
    """

    metric_name: str
    comparison_operator: JudgmentComparisonOperator
    value: str | Sequence[str] | None = None
    value_source: JudgmentValueSource = JudgmentValueSource.CONSTANT
    condition_type: JudgmentConditionType = JudgmentConditionType.STRING


class JudgmentConfig(BaseModel):
    """A group of judgment conditions combined with a logical operator.

    Attributes:
        logical_operator: How to combine condition results — "and" requires
            all conditions to pass, "or" requires at least one.
        conditions: The list of individual conditions to evaluate.
    """

    logical_operator: Literal["and", "or"] = "and"
    conditions: list[JudgmentCondition] = Field(default_factory=list)


class JudgmentConditionResult(BaseModel):
    """Result of evaluating a single judgment condition.

    Attributes:
        metric_name: Which metric was checked.
        comparison_operator: The operator that was applied.
        expected_value: The resolved comparison value (after variable resolution).
        actual_value: The actual metric value that was evaluated.
        passed: Whether this individual condition passed.
        error: Error message if the condition evaluation failed.
    """

    metric_name: str
    comparison_operator: str
    expected_value: Any = None
    actual_value: Any = None
    passed: bool = False
    error: str | None = None


class JudgmentResult(BaseModel):
    """Overall result of evaluating all judgment conditions for one item.

    Attributes:
        passed: Whether the overall judgment passed (based on logical_operator).
        logical_operator: The logical operator used to combine conditions.
        condition_results: Detailed result for each individual condition.
    """

    passed: bool = False
    logical_operator: Literal["and", "or"] = "and"
    condition_results: list[JudgmentConditionResult] = Field(default_factory=list)