from __future__ import annotations import json from datetime import datetime from enum import StrEnum from typing import Any import sqlalchemy as sa from sqlalchemy import DateTime, Float, Integer, String, Text, func from sqlalchemy.orm import Mapped, mapped_column from libs.uuid_utils import uuidv7 from .base import Base from .types import LongText, StringUUID class EvaluationRunStatus(StrEnum): PENDING = "pending" RUNNING = "running" COMPLETED = "completed" FAILED = "failed" CANCELLED = "cancelled" class EvaluationTargetType(StrEnum): APP = "app" SNIPPETS = "snippets" class EvaluationConfiguration(Base): """Stores evaluation configuration for each target (App or Snippet).""" __tablename__ = "evaluation_configurations" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="evaluation_configuration_pkey"), sa.Index("evaluation_configuration_target_idx", "tenant_id", "target_type", "target_id"), sa.UniqueConstraint("tenant_id", "target_type", "target_id", name="evaluation_configuration_unique"), ) id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuidv7())) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) target_type: Mapped[str] = mapped_column(String(20), nullable=False) target_id: Mapped[str] = mapped_column(StringUUID, nullable=False) evaluation_model_provider: Mapped[str | None] = mapped_column(String(255), nullable=True) evaluation_model: Mapped[str | None] = mapped_column(String(255), nullable=True) metrics_config: Mapped[str | None] = mapped_column(LongText, nullable=True) judgement_conditions: Mapped[str | None] = mapped_column(LongText, nullable=True) created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) updated_by: Mapped[str] = mapped_column(StringUUID, nullable=False) created_at: Mapped[datetime] = mapped_column( DateTime, nullable=False, server_default=func.current_timestamp() ) updated_at: Mapped[datetime] = mapped_column( DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() ) @property def metrics_config_dict(self) -> dict[str, Any]: if self.metrics_config: return json.loads(self.metrics_config) return {} @metrics_config_dict.setter def metrics_config_dict(self, value: dict[str, Any]) -> None: self.metrics_config = json.dumps(value) @property def judgement_conditions_dict(self) -> dict[str, Any]: if self.judgement_conditions: return json.loads(self.judgement_conditions) return {} @judgement_conditions_dict.setter def judgement_conditions_dict(self, value: dict[str, Any]) -> None: self.judgement_conditions = json.dumps(value) def __repr__(self) -> str: return f"" class EvaluationRun(Base): """Stores each evaluation run record.""" __tablename__ = "evaluation_runs" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="evaluation_run_pkey"), sa.Index("evaluation_run_target_idx", "tenant_id", "target_type", "target_id"), sa.Index("evaluation_run_status_idx", "tenant_id", "status"), ) id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuidv7())) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) target_type: Mapped[str] = mapped_column(String(20), nullable=False) target_id: Mapped[str] = mapped_column(StringUUID, nullable=False) evaluation_config_id: Mapped[str] = mapped_column(StringUUID, nullable=False) status: Mapped[str] = mapped_column( String(20), nullable=False, default=EvaluationRunStatus.PENDING ) dataset_file_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True) result_file_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True) total_items: Mapped[int] = mapped_column(Integer, nullable=False, default=0) completed_items: Mapped[int] = mapped_column(Integer, nullable=False, default=0) failed_items: Mapped[int] = mapped_column(Integer, nullable=False, default=0) metrics_summary: Mapped[str | None] = mapped_column(LongText, nullable=True) error: Mapped[str | None] = mapped_column(Text, nullable=True) celery_task_id: Mapped[str | None] = mapped_column(String(255), nullable=True) created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) started_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) completed_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) created_at: Mapped[datetime] = mapped_column( DateTime, nullable=False, server_default=func.current_timestamp() ) updated_at: Mapped[datetime] = mapped_column( DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() ) @property def metrics_summary_dict(self) -> dict[str, Any]: if self.metrics_summary: return json.loads(self.metrics_summary) return {} @metrics_summary_dict.setter def metrics_summary_dict(self, value: dict[str, Any]) -> None: self.metrics_summary = json.dumps(value) @property def progress(self) -> float: if self.total_items == 0: return 0.0 return (self.completed_items + self.failed_items) / self.total_items def __repr__(self) -> str: return f"" class EvaluationRunItem(Base): """Stores per-row evaluation results.""" __tablename__ = "evaluation_run_items" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="evaluation_run_item_pkey"), sa.Index("evaluation_run_item_run_idx", "evaluation_run_id"), sa.Index("evaluation_run_item_index_idx", "evaluation_run_id", "item_index"), ) id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuidv7())) evaluation_run_id: Mapped[str] = mapped_column(StringUUID, nullable=False) item_index: Mapped[int] = mapped_column(Integer, nullable=False) inputs: Mapped[str | None] = mapped_column(LongText, nullable=True) expected_output: Mapped[str | None] = mapped_column(LongText, nullable=True) context: Mapped[str | None] = mapped_column(LongText, nullable=True) actual_output: Mapped[str | None] = mapped_column(LongText, nullable=True) metrics: Mapped[str | None] = mapped_column(LongText, nullable=True) judgment: Mapped[str | None] = mapped_column(LongText, nullable=True) metadata_json: Mapped[str | None] = mapped_column(LongText, nullable=True) error: Mapped[str | None] = mapped_column(Text, nullable=True) overall_score: Mapped[float | None] = mapped_column(Float, nullable=True) created_at: Mapped[datetime] = mapped_column( DateTime, nullable=False, server_default=func.current_timestamp() ) @property def inputs_dict(self) -> dict[str, Any]: if self.inputs: return json.loads(self.inputs) return {} @property def metrics_list(self) -> list[dict[str, Any]]: if self.metrics: return json.loads(self.metrics) return [] @property def metadata_dict(self) -> dict[str, Any]: if self.metadata_json: return json.loads(self.metadata_json) return {} def __repr__(self) -> str: return f""