feat(web): save configuration

2026-04-12 00:00:14 -04:00 · 2026-04-10 13:49:05 +08:00
parent 2df79c0404
commit f96e63460e
7 changed files with 277 additions and 27 deletions
--- a/web/app/components/evaluation/tests/index.spec.tsx
+++ b/web/app/components/evaluation/tests/index.spec.tsx
@@ -1,3 +1,5 @@
+import type { ReactNode } from 'react'
+import { QueryClient, QueryClientProvider } from '@tanstack/react-query'
 import { act, fireEvent, render, screen } from '@testing-library/react'
 import Evaluation from '..'
 import ConditionsSection from '../components/conditions-section'
@@ -6,6 +8,8 @@ import { useEvaluationStore } from '../store'
 const mockUseAvailableEvaluationMetrics = vi.hoisted(() => vi.fn())
 const mockUseEvaluationConfig = vi.hoisted(() => vi.fn())
 const mockUseEvaluationNodeInfoMutation = vi.hoisted(() => vi.fn())
+const mockUseSaveEvaluationConfigMutation = vi.hoisted(() => vi.fn())
+const mockUseStartEvaluationRunMutation = vi.hoisted(() => vi.fn())

 vi.mock('@/app/components/header/account-setting/model-provider-page/hooks', () => ({
  useModelList: () => ({
@@ -42,8 +46,71 @@ vi.mock('@/service/use-evaluation', () => ({
  useEvaluationConfig: (...args: unknown[]) => mockUseEvaluationConfig(...args),
  useAvailableEvaluationMetrics: (...args: unknown[]) => mockUseAvailableEvaluationMetrics(...args),
  useEvaluationNodeInfoMutation: (...args: unknown[]) => mockUseEvaluationNodeInfoMutation(...args),
+  useSaveEvaluationConfigMutation: (...args: unknown[]) => mockUseSaveEvaluationConfigMutation(...args),
+  useStartEvaluationRunMutation: (...args: unknown[]) => mockUseStartEvaluationRunMutation(...args),
 }))

+vi.mock('@/service/use-workflow', () => ({
+  useAppWorkflow: () => ({
+    data: {
+      graph: {
+        nodes: [{
+          id: 'start',
+          data: {
+            type: 'start',
+            variables: [{
+              variable: 'query',
+              type: 'text-input',
+            }],
+          },
+        }],
+      },
+    },
+    isLoading: false,
+  }),
+}))
+
+vi.mock('@/service/use-snippet-workflows', () => ({
+  useSnippetPublishedWorkflow: () => ({
+    data: {
+      graph: {
+        nodes: [{
+          id: 'start',
+          data: {
+            type: 'start',
+            variables: [{
+              variable: 'query',
+              type: 'text-input',
+            }],
+          },
+        }],
+      },
+    },
+    isLoading: false,
+  }),
+}))
+
+const renderWithQueryClient = (ui: ReactNode) => {
+  const queryClient = new QueryClient({
+    defaultOptions: {
+      queries: {
+        retry: false,
+      },
+      mutations: {
+        retry: false,
+      },
+    },
+  })
+
+  return render(ui, {
+    wrapper: ({ children }: { children: ReactNode }) => (
+      <QueryClientProvider client={queryClient}>
+        {children}
+      </QueryClientProvider>
+    ),
+  })
+}
+
 describe('Evaluation', () => {
  beforeEach(() => {
    useEvaluationStore.setState({ resources: {} })
@@ -72,12 +139,24 @@ describe('Evaluation', () => {
        })
      },
    })
+    mockUseSaveEvaluationConfigMutation.mockReturnValue({
+      isPending: false,
+      mutate: vi.fn(),
+    })
+    mockUseStartEvaluationRunMutation.mockReturnValue({
+      isPending: false,
+      mutate: vi.fn(),
+    })
  })

-  it('should search, select metric nodes, and create a batch history record', async () => {
-    vi.useFakeTimers()
+  it('should search, select metric nodes, and save evaluation config', () => {
+    const saveConfig = vi.fn()
+    mockUseSaveEvaluationConfigMutation.mockReturnValue({
+      isPending: false,
+      mutate: saveConfig,
+    })

-    render(<Evaluation resourceType="apps" resourceId="app-1" />)
+    renderWithQueryClient(<Evaluation resourceType="apps" resourceId="app-1" />)

    expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('openai:gpt-4o-mini')

@@ -104,17 +183,39 @@ describe('Evaluation', () => {
    fireEvent.click(screen.getByTestId('evaluation-metric-node-answer-correctness-node-answer'))
    expect(screen.getAllByText('Answer Correctness').length).toBeGreaterThan(0)

-    fireEvent.click(screen.getByRole('button', { name: 'evaluation.batch.run' }))
-    expect(screen.getByText('evaluation.batch.status.running')).toBeInTheDocument()
+    fireEvent.click(screen.getByRole('button', { name: 'common.operation.save' }))

-    await act(async () => {
-      vi.advanceTimersByTime(1300)
+    expect(saveConfig).toHaveBeenCalledWith({
+      params: {
+        targetType: 'apps',
+        targetId: 'app-1',
+      },
+      body: {
+        evaluation_model: 'gpt-4o-mini',
+        evaluation_model_provider: 'openai',
+        default_metrics: [
+          {
+            metric: 'faithfulness',
+            value_type: 'number',
+            node_info_list: [
+              { node_id: 'node-faithfulness', title: 'Retriever Node', type: 'retriever' },
+            ],
+          },
+          {
+            metric: 'answer-correctness',
+            value_type: 'number',
+            node_info_list: [
+              { node_id: 'node-answer', title: 'Answer Node', type: 'llm' },
+            ],
+          },
+        ],
+        customized_metrics: null,
+        judgment_config: null,
+      },
+    }, {
+      onSuccess: expect.any(Function),
+      onError: expect.any(Function),
    })
-
-    expect(screen.getByText('evaluation.batch.status.success')).toBeInTheDocument()
-    expect(screen.getByText('Workflow evaluation batch')).toBeInTheDocument()
-
-    vi.useRealTimers()
  })

  it('should hide the value row for empty operators', () => {
@@ -138,7 +239,7 @@ describe('Evaluation', () => {

    let rerender: ReturnType<typeof render>['rerender']
    act(() => {
-      ({ rerender } = render(<Evaluation resourceType={resourceType} resourceId={resourceId} />))
+      ({ rerender } = renderWithQueryClient(<Evaluation resourceType={resourceType} resourceId={resourceId} />))
    })

    expect(screen.getByPlaceholderText('evaluation.conditions.valuePlaceholder')).toBeInTheDocument()
@@ -212,7 +313,7 @@ describe('Evaluation', () => {
      },
    })

-    render(<Evaluation resourceType="apps" resourceId="app-3" />)
+    renderWithQueryClient(<Evaluation resourceType="apps" resourceId="app-3" />)

    fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))

@@ -227,7 +328,7 @@ describe('Evaluation', () => {
      isLoading: false,
    })

-    render(<Evaluation resourceType="apps" resourceId="app-4" />)
+    renderWithQueryClient(<Evaluation resourceType="apps" resourceId="app-4" />)

    fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))

@@ -256,7 +357,7 @@ describe('Evaluation', () => {
      },
    })

-    render(<Evaluation resourceType="apps" resourceId="app-5" />)
+    renderWithQueryClient(<Evaluation resourceType="apps" resourceId="app-5" />)

    fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))

@@ -270,7 +371,7 @@ describe('Evaluation', () => {
  })

  it('should render the pipeline-specific layout without auto-selecting a judge model', () => {
-    render(<Evaluation resourceType="datasets" resourceId="dataset-1" />)
+    renderWithQueryClient(<Evaluation resourceType="datasets" resourceId="dataset-1" />)

    expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('empty')
    expect(screen.getByText('evaluation.history.title')).toBeInTheDocument()
@@ -294,14 +395,14 @@ describe('Evaluation', () => {
      },
    })

-    render(<Evaluation resourceType="datasets" resourceId="dataset-2" />)
+    renderWithQueryClient(<Evaluation resourceType="datasets" resourceId="dataset-2" />)

    expect(screen.getByText('Context Precision')).toBeInTheDocument()
    expect(screen.getByDisplayValue('0.85')).toBeInTheDocument()
  })

  it('should enable pipeline batch actions after selecting a judge model and metric', () => {
-    render(<Evaluation resourceType="datasets" resourceId="dataset-2" />)
+    renderWithQueryClient(<Evaluation resourceType="datasets" resourceId="dataset-2" />)

    fireEvent.click(screen.getByRole('button', { name: 'select-model' }))
    fireEvent.click(screen.getByRole('button', { name: /Context Precision/i }))
--- a/web/app/components/evaluation/tests/store.spec.ts
+++ b/web/app/components/evaluation/tests/store.spec.ts
@@ -6,6 +6,7 @@ import {
  requiresConditionValue,
  useEvaluationStore,
 } from '../store'
+import { buildEvaluationConfigPayload, buildEvaluationRunRequest } from '../store-utils'

 describe('evaluation store', () => {
  beforeEach(() => {
@@ -271,4 +272,76 @@ describe('evaluation store', () => {
    expect(hydratedState.uploadedFileName).toBe('batch.csv')
    expect(hydratedState.batchRecords).toHaveLength(1)
  })
+
+  it('should build an evaluation config save payload from resource state', () => {
+    const resourceType = 'apps'
+    const resourceId = 'app-save-config'
+    const store = useEvaluationStore.getState()
+
+    store.ensureResource(resourceType, resourceId)
+    store.setJudgeModel(resourceType, resourceId, 'openai::gpt-4o-mini')
+    store.addBuiltinMetric(resourceType, resourceId, 'faithfulness', [
+      { node_id: 'node-faithfulness', title: 'Retriever Node', type: 'retriever' },
+    ])
+    store.addCustomMetric(resourceType, resourceId)
+
+    const customMetric = useEvaluationStore.getState().resources['apps:app-save-config'].metrics.find(metric => metric.kind === 'custom-workflow')!
+    store.setCustomMetricWorkflow(resourceType, resourceId, customMetric.id, {
+      workflowId: 'workflow-precision-review',
+      workflowAppId: 'evaluation-workflow-app-id',
+      workflowName: 'Precision Review',
+    })
+    store.syncCustomMetricMappings(resourceType, resourceId, customMetric.id, ['query'])
+    store.syncCustomMetricOutputs(resourceType, resourceId, customMetric.id, [{
+      id: 'score',
+      valueType: 'number',
+    }])
+
+    const syncedMetric = useEvaluationStore.getState().resources['apps:app-save-config'].metrics.find(metric => metric.id === customMetric.id)!
+    store.updateCustomMetricMapping(resourceType, resourceId, customMetric.id, syncedMetric.customConfig!.mappings[0].id, {
+      outputVariableId: '{{#node-answer.output#}}',
+    })
+    store.addCondition(resourceType, resourceId, ['workflow-precision-review', 'score'])
+
+    const condition = useEvaluationStore.getState().resources['apps:app-save-config'].judgmentConfig.conditions[0]
+    store.updateConditionOperator(resourceType, resourceId, condition.id, '≥')
+    store.updateConditionValue(resourceType, resourceId, condition.id, '0.8')
+
+    const resource = useEvaluationStore.getState().resources['apps:app-save-config']
+    const expectedPayload = {
+      evaluation_model: 'gpt-4o-mini',
+      evaluation_model_provider: 'openai',
+      default_metrics: [{
+        metric: 'faithfulness',
+        value_type: 'number',
+        node_info_list: [
+          { node_id: 'node-faithfulness', title: 'Retriever Node', type: 'retriever' },
+        ],
+      }],
+      customized_metrics: {
+        evaluation_workflow_id: 'evaluation-workflow-app-id',
+        input_fields: {
+          query: '{{#node-answer.output#}}',
+        },
+        output_fields: [{
+          variable: 'score',
+          value_type: 'number',
+        }],
+      },
+      judgment_config: {
+        logical_operator: 'and',
+        conditions: [{
+          variable_selector: ['evaluation-workflow-app-id', 'score'],
+          comparison_operator: '≥',
+          value: '0.8',
+        }],
+      },
+    }
+
+    expect(buildEvaluationConfigPayload(resource)).toEqual(expectedPayload)
+    expect(buildEvaluationRunRequest(resource, 'file-1')).toEqual({
+      ...expectedPayload,
+      file_id: 'file-1',
+    })
+  })
 })
--- a/web/app/components/evaluation/components/batch-test-panel/index.tsx
+++ b/web/app/components/evaluation/components/batch-test-panel/index.tsx
@@ -2,8 +2,12 @@

 import type { BatchTestTab, EvaluationResourceProps } from '../../types'
 import { useTranslation } from 'react-i18next'
+import Button from '@/app/components/base/button'
+import { toast } from '@/app/components/base/ui/toast'
+import { useSaveEvaluationConfigMutation } from '@/service/use-evaluation'
 import { cn } from '@/utils/classnames'
 import { isEvaluationRunnable, useEvaluationResource, useEvaluationStore } from '../../store'
+import { buildEvaluationConfigPayload } from '../../store-utils'
 import { TAB_CLASS_NAME } from '../../utils'
 import HistoryTab from './history-tab'
 import InputFieldsTab from './input-fields-tab'
@@ -15,20 +19,64 @@ const BatchTestPanel = ({
  resourceId,
 }: EvaluationResourceProps) => {
  const { t } = useTranslation('evaluation')
+  const { t: tCommon } = useTranslation('common')
  const tabLabels: Record<BatchTestTab, string> = {
    'input-fields': t('batch.tabs.input-fields'),
    'history': t('batch.tabs.history'),
  }
  const resource = useEvaluationResource(resourceType, resourceId)
  const setBatchTab = useEvaluationStore(state => state.setBatchTab)
+  const saveConfigMutation = useSaveEvaluationConfigMutation()
  const isRunnable = isEvaluationRunnable(resource)
  const isPanelReady = !!resource.judgeModelId && resource.metrics.length > 0

+  const handleSave = () => {
+    if (!isRunnable) {
+      toast.warning(t('batch.validation'))
+      return
+    }
+
+    const body = buildEvaluationConfigPayload(resource)
+
+    if (!body) {
+      toast.warning(t('batch.validation'))
+      return
+    }
+
+    saveConfigMutation.mutate({
+      params: {
+        targetType: resourceType,
+        targetId: resourceId,
+      },
+      body,
+    }, {
+      onSuccess: () => {
+        toast.success(tCommon('api.saved'))
+      },
+      onError: () => {
+        toast.error(t('config.saveFailed'))
+      },
+    })
+  }
+
  return (
    <div className="flex h-full min-h-0 flex-col bg-background-default">
      <div className="px-6 py-4">
-        <div className="system-xl-semibold text-text-primary">{t('batch.title')}</div>
-        <div className="mt-1 system-sm-regular text-text-tertiary">{t('batch.description')}</div>
+        <div className="flex items-start justify-between gap-3">
+          <div className="min-w-0">
+            <div className="system-xl-semibold text-text-primary">{t('batch.title')}</div>
+            <div className="mt-1 system-sm-regular text-text-tertiary">{t('batch.description')}</div>
+          </div>
+          <Button
+            className="shrink-0"
+            variant="primary"
+            disabled={!isRunnable}
+            loading={saveConfigMutation.isPending}
+            onClick={handleSave}
+          >
+            {tCommon('operation.save')}
+          </Button>
+        </div>
        <div className="mt-4 rounded-xl border border-divider-subtle bg-components-card-bg p-3">
          <div className="flex items-start gap-3">
            <span aria-hidden="true" className="mt-0.5 i-ri-alert-fill h-4 w-4 shrink-0 text-text-warning" />
--- a/web/app/components/evaluation/store-utils.ts
+++ b/web/app/components/evaluation/store-utils.ts
@@ -11,6 +11,7 @@ import type {
 } from './types'
 import type {
  EvaluationConfig,
+  EvaluationConfigData,
  EvaluationCustomizedMetric,
  EvaluationDefaultMetric,
  EvaluationJudgmentCondition,
@@ -432,7 +433,7 @@ const getCustomMetricScopeId = (metric: EvaluationMetric) => {
  return metric.customConfig?.workflowAppId ?? metric.customConfig?.workflowId ?? null
 }

-const buildCustomizedMetricsPayload = (metrics: EvaluationMetric[]): EvaluationRunRequest['customized_metrics'] => {
+const buildCustomizedMetricsPayload = (metrics: EvaluationMetric[]): EvaluationConfigData['customized_metrics'] => {
  const customMetric = metrics.find(metric => metric.kind === 'custom-workflow')
  const customConfig = customMetric?.customConfig
  const evaluationWorkflowId = customMetric ? getCustomMetricScopeId(customMetric) : null
@@ -456,7 +457,7 @@ const buildCustomizedMetricsPayload = (metrics: EvaluationMetric[]): EvaluationR
  }
 }

-const buildJudgmentConfigPayload = (resource: EvaluationResourceState): EvaluationRunRequest['judgment_config'] => {
+const buildJudgmentConfigPayload = (resource: EvaluationResourceState): EvaluationConfigData['judgment_config'] => {
  const conditions = resource.judgmentConfig.conditions
    .filter(condition => !!condition.variableSelector)
    .map((condition) => {
@@ -484,17 +485,15 @@ const buildJudgmentConfigPayload = (resource: EvaluationResourceState): Evaluati
  }
 }

-export const buildEvaluationRunRequest = (
+export const buildEvaluationConfigPayload = (
  resource: EvaluationResourceState,
-  fileId: string,
-): EvaluationRunRequest | null => {
+): EvaluationConfigData | null => {
  const selectedModel = decodeModelSelection(resource.judgeModelId)

  if (!selectedModel)
    return null

  return {
-    file_id: fileId,
    evaluation_model: selectedModel.model,
    evaluation_model_provider: selectedModel.provider,
    default_metrics: resource.metrics
@@ -509,6 +508,21 @@ export const buildEvaluationRunRequest = (
  }
 }

+export const buildEvaluationRunRequest = (
+  resource: EvaluationResourceState,
+  fileId: string,
+): EvaluationRunRequest | null => {
+  const configPayload = buildEvaluationConfigPayload(resource)
+
+  if (!configPayload)
+    return null
+
+  return {
+    ...configPayload,
+    file_id: fileId,
+  }
+}
+
 const getResourceState = (
  resources: EvaluationStoreResources,
  resourceType: EvaluationResourceType,
--- a/web/i18n/en-US/evaluation.json
+++ b/web/i18n/en-US/evaluation.json
@@ -61,6 +61,7 @@
  "conditions.valueTypes.boolean": "Boolean",
  "conditions.valueTypes.number": "Number",
  "conditions.valueTypes.string": "String",
+  "config.saveFailed": "Failed to save evaluation configuration.",
  "description": "Configure automated testing to grade your application's performance.",
  "history.actions.downloadResultFile": "Download result",
  "history.actions.downloadTestFile": "Download test file",
--- a/web/i18n/zh-Hans/evaluation.json
+++ b/web/i18n/zh-Hans/evaluation.json
@@ -61,6 +61,7 @@
  "conditions.valueTypes.boolean": "布尔",
  "conditions.valueTypes.number": "数值",
  "conditions.valueTypes.string": "文本",
+  "config.saveFailed": "保存评测配置失败。",
  "description": "配置自动化测试，对应用表现进行评分。",
  "history.actions.downloadResultFile": "下载结果文件",
  "history.actions.downloadTestFile": "下载测试文件",
--- a/web/service/use-evaluation.ts
+++ b/web/service/use-evaluation.ts
@@ -63,6 +63,18 @@ export const useEvaluationNodeInfoMutation = () => {
  return useMutation(consoleQuery.evaluation.nodeInfo.mutationOptions())
 }

+export const useSaveEvaluationConfigMutation = () => {
+  const queryClient = useQueryClient()
+
+  return useMutation(consoleQuery.evaluation.saveConfig.mutationOptions({
+    onSuccess: () => {
+      queryClient.invalidateQueries({
+        queryKey: consoleQuery.evaluation.config.key(),
+      })
+    },
+  }))
+}
+
 export const useStartEvaluationRunMutation = () => {
  const queryClient = useQueryClient()