From e52dbd49beae877e98d47ae2dbb76107c5617be3 Mon Sep 17 00:00:00 2001
From: JzoNg <jzongcode@gmail.com>
Date: Thu, 9 Apr 2026 15:34:59 +0800
Subject: [PATCH] feat(web): dataset evaluation configure

---
 .../evaluation/__tests__/index.spec.tsx       | 24 ++++++++-
 .../evaluation/__tests__/store.spec.ts        |  1 +
 .../components/layout/pipeline-evaluation.tsx | 33 ++++++++----
 .../pipeline/pipeline-metric-item.tsx         | 51 +++++++++++++++----
 web/app/components/evaluation/store-utils.ts  |  9 +++-
 web/app/components/evaluation/store.ts        | 12 +++++
 web/app/components/evaluation/types.ts        |  1 +
 web/i18n/en-US/evaluation.json                |  1 +
 8 files changed, 111 insertions(+), 21 deletions(-)
diff --git a/web/app/components/evaluation/__tests__/index.spec.tsx b/web/app/components/evaluation/__tests__/index.spec.tsx
index 302df31c9d..e2da9b196f 100644
--- a/web/app/components/evaluation/__tests__/index.spec.tsx
+++ b/web/app/components/evaluation/__tests__/index.spec.tsx
@@ -54,7 +54,7 @@ describe('Evaluation', () => {
 
     mockUseAvailableEvaluationMetrics.mockReturnValue({
       data: {
-        metrics: ['answer-correctness', 'faithfulness'],
+        metrics: ['answer-correctness', 'faithfulness', 'context-precision', 'context-recall', 'context-relevance'],
       },
       isLoading: false,
     })
@@ -240,12 +240,34 @@ describe('Evaluation', () => {
     expect(screen.getByRole('button', { name: 'evaluation.pipeline.uploadAndRun' })).toBeDisabled()
   })
 
+  it('should render selected pipeline metrics from config with the default threshold input', () => {
+    mockUseEvaluationConfig.mockReturnValue({
+      data: {
+        evaluation_model: null,
+        evaluation_model_provider: null,
+        metrics_config: {
+          default_metrics: [{
+            metric: 'context-precision',
+          }],
+          customized_metrics: null,
+        },
+        judgement_conditions: null,
+      },
+    })
+
+    render(<Evaluation resourceType="datasets" resourceId="dataset-2" />)
+
+    expect(screen.getByText('Context Precision')).toBeInTheDocument()
+    expect(screen.getByDisplayValue('0.85')).toBeInTheDocument()
+  })
+
   it('should enable pipeline batch actions after selecting a judge model and metric', () => {
     render(<Evaluation resourceType="datasets" resourceId="dataset-2" />)
 
     fireEvent.click(screen.getByRole('button', { name: 'select-model' }))
     fireEvent.click(screen.getByRole('button', { name: /Context Precision/i }))
 
+    expect(screen.getByDisplayValue('0.85')).toBeInTheDocument()
     expect(screen.getByRole('button', { name: 'evaluation.batch.downloadTemplate' })).toBeEnabled()
     expect(screen.getByRole('button', { name: 'evaluation.pipeline.uploadAndRun' })).toBeEnabled()
   })
diff --git a/web/app/components/evaluation/__tests__/store.spec.ts b/web/app/components/evaluation/__tests__/store.spec.ts
index 9415e8430a..3814b15219 100644
--- a/web/app/components/evaluation/__tests__/store.spec.ts
+++ b/web/app/components/evaluation/__tests__/store.spec.ts
@@ -181,6 +181,7 @@ describe('evaluation store', () => {
     expect(hydratedState.judgeModelId).toBe('openai::gpt-4o-mini')
     expect(hydratedState.metrics).toHaveLength(2)
     expect(hydratedState.metrics[0].optionId).toBe('faithfulness')
+    expect(hydratedState.metrics[0].threshold).toBe(0.85)
     expect(hydratedState.metrics[0].nodeInfoList).toEqual([
       { node_id: 'node-1', title: 'Retriever', type: 'retriever' },
     ])
diff --git a/web/app/components/evaluation/components/layout/pipeline-evaluation.tsx b/web/app/components/evaluation/components/layout/pipeline-evaluation.tsx
index c605c53e17..32254e1e6d 100644
--- a/web/app/components/evaluation/components/layout/pipeline-evaluation.tsx
+++ b/web/app/components/evaluation/components/layout/pipeline-evaluation.tsx
@@ -6,6 +6,7 @@ import { useTranslation } from 'react-i18next'
 import Button from '@/app/components/base/button'
 import { toast } from '@/app/components/base/ui/toast'
 import { useDocLink } from '@/context/i18n'
+import { useAvailableEvaluationMetrics } from '@/service/use-evaluation'
 import { getEvaluationMockConfig } from '../../mock'
 import { isEvaluationRunnable, useEvaluationResource, useEvaluationStore } from '../../store'
 import JudgeModelSelector from '../judge-model-selector'
@@ -24,8 +25,10 @@ const PipelineEvaluation = ({
   const ensureResource = useEvaluationStore(state => state.ensureResource)
   const addBuiltinMetric = useEvaluationStore(state => state.addBuiltinMetric)
   const removeMetric = useEvaluationStore(state => state.removeMetric)
+  const updateMetricThreshold = useEvaluationStore(state => state.updateMetricThreshold)
   const setUploadedFileName = useEvaluationStore(state => state.setUploadedFileName)
   const runBatchTest = useEvaluationStore(state => state.runBatchTest)
+  const { data: availableMetricsData } = useAvailableEvaluationMetrics()
   const resource = useEvaluationResource(resourceType, resourceId)
   const fileInputRef = useRef<HTMLInputElement>(null)
   const config = getEvaluationMockConfig(resourceType)
@@ -34,6 +37,12 @@ const PipelineEvaluation = ({
       .filter(metric => metric.kind === 'builtin')
       .map(metric => [metric.optionId, metric]),
   ), [resource.metrics])
+  const availableMetricIds = useMemo(() => new Set(availableMetricsData?.metrics ?? []), [availableMetricsData?.metrics])
+  const availableBuiltinMetrics = useMemo(() => {
+    return config.builtinMetrics.filter(metric =>
+      availableMetricIds.has(metric.id) || builtinMetricMap.has(metric.id),
+    )
+  }, [availableMetricIds, builtinMetricMap, config.builtinMetrics])
   const isConfigReady = !!resource.judgeModelId && builtinMetricMap.size > 0
   const isRunnable = isEvaluationRunnable(resource)
 
@@ -107,15 +116,21 @@ const PipelineEvaluation = ({
             <section>
               <InlineSectionHeader title={t('metrics.title')} tooltip={t('metrics.description')} />
               <div className="mt-1 space-y-0.5">
-                {config.builtinMetrics.map(metric => (
-                  <PipelineMetricItem
-                    key={metric.id}
-                    metric={metric}
-                    selected={builtinMetricMap.has(metric.id)}
-                    disabledCondition
-                    onToggle={() => handleToggleMetric(metric.id)}
-                  />
-                ))}
+                {availableBuiltinMetrics.map((metric) => {
+                  const selectedMetric = builtinMetricMap.get(metric.id)
+
+                  return (
+                    <PipelineMetricItem
+                      key={metric.id}
+                      metric={metric}
+                      selected={!!selectedMetric}
+                      threshold={selectedMetric?.threshold}
+                      disabledCondition
+                      onToggle={() => handleToggleMetric(metric.id)}
+                      onThresholdChange={value => updateMetricThreshold(resourceType, resourceId, selectedMetric?.id ?? '', value)}
+                    />
+                  )
+                })}
               </div>
             </section>
 
diff --git a/web/app/components/evaluation/components/pipeline/pipeline-metric-item.tsx b/web/app/components/evaluation/components/pipeline/pipeline-metric-item.tsx
index 14c4a3c726..e535da2204 100644
--- a/web/app/components/evaluation/components/pipeline/pipeline-metric-item.tsx
+++ b/web/app/components/evaluation/components/pipeline/pipeline-metric-item.tsx
@@ -1,15 +1,20 @@
 'use client'
 
 import type { MetricOption } from '../../types'
+import { useTranslation } from 'react-i18next'
 import Checkbox from '@/app/components/base/checkbox'
+import Input from '@/app/components/base/input'
 import { Tooltip, TooltipContent, TooltipTrigger } from '@/app/components/base/ui/tooltip'
 import { cn } from '@/utils/classnames'
+import { DEFAULT_PIPELINE_METRIC_THRESHOLD } from '../../store-utils'
 
 type PipelineMetricItemProps = {
   metric: MetricOption
   selected: boolean
   onToggle: () => void
   disabledCondition: boolean
+  threshold?: number
+  onThresholdChange: (value: number) => void
 }
 
 const PipelineMetricItem = ({
@@ -17,7 +22,11 @@ const PipelineMetricItem = ({
   selected,
   onToggle,
   disabledCondition,
+  threshold = DEFAULT_PIPELINE_METRIC_THRESHOLD,
+  onThresholdChange,
 }: PipelineMetricItemProps) => {
+  const { t } = useTranslation('evaluation')
+
   return (
     <div className="flex items-center justify-between gap-3 px-1 py-1">
       <button
@@ -41,16 +50,38 @@ const PipelineMetricItem = ({
         </Tooltip>
       </button>
 
-      <button
-        type="button"
-        disabled={disabledCondition}
-        className={cn(
-          'system-xs-medium text-text-tertiary',
-          disabledCondition && 'cursor-not-allowed text-components-button-secondary-accent-text-disabled',
-        )}
-      >
-        + Condition
-      </button>
+      {selected
+        ? (
+            <div className="flex items-center gap-2">
+              <span className="system-xs-medium text-text-accent">{t('pipeline.passIf')}</span>
+              <div className="w-[52px]">
+                <Input
+                  value={String(threshold)}
+                  type="number"
+                  min={0}
+                  max={1}
+                  step={0.01}
+                  onChange={(event) => {
+                    const parsedValue = Number(event.target.value)
+                    if (!Number.isNaN(parsedValue))
+                      onThresholdChange(parsedValue)
+                  }}
+                />
+              </div>
+            </div>
+          )
+        : (
+            <button
+              type="button"
+              disabled={disabledCondition}
+              className={cn(
+                'system-xs-medium text-text-tertiary',
+                disabledCondition && 'cursor-not-allowed text-components-button-secondary-accent-text-disabled',
+              )}
+            >
+              + Condition
+            </button>
+          )}
     </div>
   )
 }
diff --git a/web/app/components/evaluation/store-utils.ts b/web/app/components/evaluation/store-utils.ts
index c1acc1300c..b1c6baa4de 100644
--- a/web/app/components/evaluation/store-utils.ts
+++ b/web/app/components/evaluation/store-utils.ts
@@ -24,6 +24,8 @@ import { encodeModelSelection } from './utils'
 
 type EvaluationStoreResources = Record<string, EvaluationResourceState>
 
+export const DEFAULT_PIPELINE_METRIC_THRESHOLD = 0.85
+
 const createId = (prefix: string) => `${prefix}-${Math.random().toString(36).slice(2, 10)}`
 
 const humanizeMetricId = (metricId: string) => {
@@ -213,7 +215,11 @@ export function getConditionValue(
   return typeof previousValue === 'string' ? previousValue : null
 }
 
-export function createBuiltinMetric(metric: MetricOption, nodeInfoList: NodeInfo[] = []): EvaluationMetric {
+export function createBuiltinMetric(
+  metric: MetricOption,
+  nodeInfoList: NodeInfo[] = [],
+  threshold = DEFAULT_PIPELINE_METRIC_THRESHOLD,
+): EvaluationMetric {
   return {
     id: createId('metric'),
     optionId: metric.id,
@@ -221,6 +227,7 @@ export function createBuiltinMetric(metric: MetricOption, nodeInfoList: NodeInfo
     label: metric.label,
     description: metric.description,
     badges: metric.badges,
+    threshold,
     nodeInfoList,
   }
 }
diff --git a/web/app/components/evaluation/store.ts b/web/app/components/evaluation/store.ts
index 61e1f773bb..5774dd5934 100644
--- a/web/app/components/evaluation/store.ts
+++ b/web/app/components/evaluation/store.ts
@@ -32,6 +32,7 @@ type EvaluationStore = {
   hydrateResource: (resourceType: EvaluationResourceType, resourceId: string, config: EvaluationConfig) => void
   setJudgeModel: (resourceType: EvaluationResourceType, resourceId: string, judgeModelId: string) => void
   addBuiltinMetric: (resourceType: EvaluationResourceType, resourceId: string, optionId: string, nodeInfoList?: NodeInfo[]) => void
+  updateMetricThreshold: (resourceType: EvaluationResourceType, resourceId: string, metricId: string, threshold: number) => void
   addCustomMetric: (resourceType: EvaluationResourceType, resourceId: string) => void
   removeMetric: (resourceType: EvaluationResourceType, resourceId: string, metricId: string) => void
   setCustomMetricWorkflow: (
@@ -126,6 +127,17 @@ export const useEvaluationStore = create<EvaluationStore>((set, get) => ({
       }
     })
   },
+  updateMetricThreshold: (resourceType, resourceId, metricId, threshold) => {
+    set(state => ({
+      resources: updateResourceState(state.resources, resourceType, resourceId, resource => ({
+        ...resource,
+        metrics: updateMetric(resource.metrics, metricId, metric => ({
+          ...metric,
+          threshold,
+        })),
+      })),
+    }))
+  },
   addCustomMetric: (resourceType, resourceId) => {
     set(state => ({
       resources: updateResourceState(state.resources, resourceType, resourceId, resource => ({
diff --git a/web/app/components/evaluation/types.ts b/web/app/components/evaluation/types.ts
index 936c47365e..411c28d540 100644
--- a/web/app/components/evaluation/types.ts
+++ b/web/app/components/evaluation/types.ts
@@ -82,6 +82,7 @@ export type EvaluationMetric = {
   label: string
   description: string
   badges: string[]
+  threshold?: number
   nodeInfoList?: NodeInfo[]
   customConfig?: CustomMetricConfig
 }
diff --git a/web/i18n/en-US/evaluation.json b/web/i18n/en-US/evaluation.json
index 41dff238e3..0ba7fe44cd 100644
--- a/web/i18n/en-US/evaluation.json
+++ b/web/i18n/en-US/evaluation.json
@@ -92,6 +92,7 @@
   "metrics.showMore": "Show more",
   "metrics.title": "Metrics",
   "metrics.update": "Update",
+  "pipeline.passIf": "Pass if \u2265",
   "pipeline.uploadAndRun": "Upload & Run Test",
   "results.empty": "No evaluation results yet.",
   "title": "Evaluation"