Files
dify/api/models/evaluation.py

195 lines
7.5 KiB
Python

from __future__ import annotations
import json
from datetime import datetime
from enum import StrEnum
from typing import Any
import sqlalchemy as sa
from sqlalchemy import DateTime, Float, Integer, String, Text, func
from sqlalchemy.orm import Mapped, mapped_column
from libs.uuid_utils import uuidv7
from .base import Base
from .types import LongText, StringUUID
class EvaluationRunStatus(StrEnum):
PENDING = "pending"
RUNNING = "running"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
class EvaluationTargetType(StrEnum):
APP = "app"
SNIPPETS = "snippets"
class EvaluationConfiguration(Base):
"""Stores evaluation configuration for each target (App or Snippet)."""
__tablename__ = "evaluation_configurations"
__table_args__ = (
sa.PrimaryKeyConstraint("id", name="evaluation_configuration_pkey"),
sa.Index("evaluation_configuration_target_idx", "tenant_id", "target_type", "target_id"),
sa.UniqueConstraint("tenant_id", "target_type", "target_id", name="evaluation_configuration_unique"),
)
id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuidv7()))
tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
target_type: Mapped[str] = mapped_column(String(20), nullable=False)
target_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
evaluation_model_provider: Mapped[str | None] = mapped_column(String(255), nullable=True)
evaluation_model: Mapped[str | None] = mapped_column(String(255), nullable=True)
metrics_config: Mapped[str | None] = mapped_column(LongText, nullable=True)
judgement_conditions: Mapped[str | None] = mapped_column(LongText, nullable=True)
created_by: Mapped[str] = mapped_column(StringUUID, nullable=False)
updated_by: Mapped[str] = mapped_column(StringUUID, nullable=False)
created_at: Mapped[datetime] = mapped_column(
DateTime, nullable=False, server_default=func.current_timestamp()
)
updated_at: Mapped[datetime] = mapped_column(
DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp()
)
@property
def metrics_config_dict(self) -> dict[str, Any]:
if self.metrics_config:
return json.loads(self.metrics_config)
return {}
@metrics_config_dict.setter
def metrics_config_dict(self, value: dict[str, Any]) -> None:
self.metrics_config = json.dumps(value)
@property
def judgement_conditions_dict(self) -> dict[str, Any]:
if self.judgement_conditions:
return json.loads(self.judgement_conditions)
return {}
@judgement_conditions_dict.setter
def judgement_conditions_dict(self, value: dict[str, Any]) -> None:
self.judgement_conditions = json.dumps(value)
def __repr__(self) -> str:
return f"<EvaluationConfiguration(id={self.id}, target={self.target_type}:{self.target_id})>"
class EvaluationRun(Base):
"""Stores each evaluation run record."""
__tablename__ = "evaluation_runs"
__table_args__ = (
sa.PrimaryKeyConstraint("id", name="evaluation_run_pkey"),
sa.Index("evaluation_run_target_idx", "tenant_id", "target_type", "target_id"),
sa.Index("evaluation_run_status_idx", "tenant_id", "status"),
)
id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuidv7()))
tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
target_type: Mapped[str] = mapped_column(String(20), nullable=False)
target_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
evaluation_config_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
status: Mapped[str] = mapped_column(
String(20), nullable=False, default=EvaluationRunStatus.PENDING
)
dataset_file_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
result_file_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
total_items: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
completed_items: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
failed_items: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
metrics_summary: Mapped[str | None] = mapped_column(LongText, nullable=True)
error: Mapped[str | None] = mapped_column(Text, nullable=True)
celery_task_id: Mapped[str | None] = mapped_column(String(255), nullable=True)
created_by: Mapped[str] = mapped_column(StringUUID, nullable=False)
started_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
completed_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime, nullable=False, server_default=func.current_timestamp()
)
updated_at: Mapped[datetime] = mapped_column(
DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp()
)
@property
def metrics_summary_dict(self) -> dict[str, Any]:
if self.metrics_summary:
return json.loads(self.metrics_summary)
return {}
@metrics_summary_dict.setter
def metrics_summary_dict(self, value: dict[str, Any]) -> None:
self.metrics_summary = json.dumps(value)
@property
def progress(self) -> float:
if self.total_items == 0:
return 0.0
return (self.completed_items + self.failed_items) / self.total_items
def __repr__(self) -> str:
return f"<EvaluationRun(id={self.id}, status={self.status})>"
class EvaluationRunItem(Base):
"""Stores per-row evaluation results."""
__tablename__ = "evaluation_run_items"
__table_args__ = (
sa.PrimaryKeyConstraint("id", name="evaluation_run_item_pkey"),
sa.Index("evaluation_run_item_run_idx", "evaluation_run_id"),
sa.Index("evaluation_run_item_index_idx", "evaluation_run_id", "item_index"),
)
id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuidv7()))
evaluation_run_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
item_index: Mapped[int] = mapped_column(Integer, nullable=False)
inputs: Mapped[str | None] = mapped_column(LongText, nullable=True)
expected_output: Mapped[str | None] = mapped_column(LongText, nullable=True)
context: Mapped[str | None] = mapped_column(LongText, nullable=True)
actual_output: Mapped[str | None] = mapped_column(LongText, nullable=True)
metrics: Mapped[str | None] = mapped_column(LongText, nullable=True)
judgment: Mapped[str | None] = mapped_column(LongText, nullable=True)
metadata_json: Mapped[str | None] = mapped_column(LongText, nullable=True)
error: Mapped[str | None] = mapped_column(Text, nullable=True)
overall_score: Mapped[float | None] = mapped_column(Float, nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime, nullable=False, server_default=func.current_timestamp()
)
@property
def inputs_dict(self) -> dict[str, Any]:
if self.inputs:
return json.loads(self.inputs)
return {}
@property
def metrics_list(self) -> list[dict[str, Any]]:
if self.metrics:
return json.loads(self.metrics)
return []
@property
def metadata_dict(self) -> dict[str, Any]:
if self.metadata_json:
return json.loads(self.metadata_json)
return {}
def __repr__(self) -> str:
return f"<EvaluationRunItem(id={self.id}, run={self.evaluation_run_id}, index={self.item_index})>"