mirror of
https://github.com/langgenius/dify.git
synced 2026-03-07 18:01:12 -05:00
145 lines
4.8 KiB
Python
145 lines
4.8 KiB
Python
"""Judgment condition processor for evaluation metrics.
|
|
|
|
Evaluates pass/fail judgment conditions against evaluation metric values.
|
|
Reuses the core comparison engine from the workflow condition system
|
|
(core.workflow.utils.condition.processor._evaluate_condition) to ensure
|
|
consistent operator semantics across the platform.
|
|
|
|
"""
|
|
|
|
import logging
|
|
from typing import Any
|
|
|
|
from core.evaluation.entities.judgment_entity import (
|
|
JudgmentCondition,
|
|
JudgmentConditionResult,
|
|
JudgmentConfig,
|
|
JudgmentResult,
|
|
)
|
|
from core.workflow.utils.condition.processor import _evaluate_condition
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class JudgmentProcessor:
|
|
|
|
@staticmethod
|
|
def evaluate(
|
|
metric_values: dict[str, Any],
|
|
config: JudgmentConfig,
|
|
) -> JudgmentResult:
|
|
"""Evaluate all judgment conditions against the given metric values.
|
|
|
|
Args:
|
|
metric_values: Mapping of metric name to its value
|
|
(e.g. {"faithfulness": 0.85, "status": "success"}).
|
|
config: The judgment configuration with logical_operator and conditions.
|
|
|
|
Returns:
|
|
JudgmentResult with overall pass/fail and per-condition details.
|
|
"""
|
|
if not config.conditions:
|
|
return JudgmentResult(
|
|
passed=True,
|
|
logical_operator=config.logical_operator,
|
|
condition_results=[],
|
|
)
|
|
|
|
condition_results: list[JudgmentConditionResult] = []
|
|
|
|
for condition in config.conditions:
|
|
result = JudgmentProcessor._evaluate_single_condition(
|
|
metric_values, condition
|
|
)
|
|
condition_results.append(result)
|
|
|
|
if config.logical_operator == "and" and not result.passed:
|
|
return JudgmentResult(
|
|
passed=False,
|
|
logical_operator=config.logical_operator,
|
|
condition_results=condition_results,
|
|
)
|
|
if config.logical_operator == "or" and result.passed:
|
|
return JudgmentResult(
|
|
passed=True,
|
|
logical_operator=config.logical_operator,
|
|
condition_results=condition_results,
|
|
)
|
|
|
|
if config.logical_operator == "and":
|
|
final_passed = all(r.passed for r in condition_results)
|
|
else:
|
|
final_passed = any(r.passed for r in condition_results)
|
|
|
|
return JudgmentResult(
|
|
passed=final_passed,
|
|
logical_operator=config.logical_operator,
|
|
condition_results=condition_results,
|
|
)
|
|
|
|
@staticmethod
|
|
def _evaluate_single_condition(
|
|
metric_values: dict[str, Any],
|
|
condition: JudgmentCondition,
|
|
) -> JudgmentConditionResult:
|
|
"""Evaluate a single judgment condition against the metric values.
|
|
|
|
Looks up the metric by name, then delegates to the workflow condition
|
|
engine for the actual comparison.
|
|
|
|
Args:
|
|
metric_values: Mapping of metric name to its value.
|
|
condition: The condition to evaluate.
|
|
|
|
Returns:
|
|
JudgmentConditionResult with pass/fail and details.
|
|
"""
|
|
metric_name = condition.metric_name
|
|
actual_value = metric_values.get(metric_name)
|
|
|
|
# Handle metric not found
|
|
if actual_value is None and condition.comparison_operator not in (
|
|
"null",
|
|
"not null",
|
|
"empty",
|
|
"not empty",
|
|
"exists",
|
|
"not exists",
|
|
):
|
|
return JudgmentConditionResult(
|
|
metric_name=metric_name,
|
|
comparison_operator=condition.comparison_operator,
|
|
expected_value=condition.value,
|
|
actual_value=None,
|
|
passed=False,
|
|
error=f"Metric '{metric_name}' not found in evaluation results",
|
|
)
|
|
|
|
try:
|
|
passed = _evaluate_condition(
|
|
operator=condition.comparison_operator,
|
|
value=actual_value,
|
|
expected=condition.value,
|
|
)
|
|
return JudgmentConditionResult(
|
|
metric_name=metric_name,
|
|
comparison_operator=condition.comparison_operator,
|
|
expected_value=condition.value,
|
|
actual_value=actual_value,
|
|
passed=passed,
|
|
)
|
|
except Exception as e:
|
|
logger.warning(
|
|
"Judgment condition evaluation failed for metric '%s': %s",
|
|
metric_name,
|
|
str(e),
|
|
)
|
|
return JudgmentConditionResult(
|
|
metric_name=metric_name,
|
|
comparison_operator=condition.comparison_operator,
|
|
expected_value=condition.value,
|
|
actual_value=actual_value,
|
|
passed=False,
|
|
error=str(e),
|
|
)
|