feat(web): save configuration

This commit is contained in:
JzoNg
2026-04-10 13:49:05 +08:00
parent 2df79c0404
commit f96e63460e
7 changed files with 277 additions and 27 deletions

View File

@@ -1,3 +1,5 @@
import type { ReactNode } from 'react'
import { QueryClient, QueryClientProvider } from '@tanstack/react-query'
import { act, fireEvent, render, screen } from '@testing-library/react'
import Evaluation from '..'
import ConditionsSection from '../components/conditions-section'
@@ -6,6 +8,8 @@ import { useEvaluationStore } from '../store'
const mockUseAvailableEvaluationMetrics = vi.hoisted(() => vi.fn())
const mockUseEvaluationConfig = vi.hoisted(() => vi.fn())
const mockUseEvaluationNodeInfoMutation = vi.hoisted(() => vi.fn())
const mockUseSaveEvaluationConfigMutation = vi.hoisted(() => vi.fn())
const mockUseStartEvaluationRunMutation = vi.hoisted(() => vi.fn())
vi.mock('@/app/components/header/account-setting/model-provider-page/hooks', () => ({
useModelList: () => ({
@@ -42,8 +46,71 @@ vi.mock('@/service/use-evaluation', () => ({
useEvaluationConfig: (...args: unknown[]) => mockUseEvaluationConfig(...args),
useAvailableEvaluationMetrics: (...args: unknown[]) => mockUseAvailableEvaluationMetrics(...args),
useEvaluationNodeInfoMutation: (...args: unknown[]) => mockUseEvaluationNodeInfoMutation(...args),
useSaveEvaluationConfigMutation: (...args: unknown[]) => mockUseSaveEvaluationConfigMutation(...args),
useStartEvaluationRunMutation: (...args: unknown[]) => mockUseStartEvaluationRunMutation(...args),
}))
vi.mock('@/service/use-workflow', () => ({
useAppWorkflow: () => ({
data: {
graph: {
nodes: [{
id: 'start',
data: {
type: 'start',
variables: [{
variable: 'query',
type: 'text-input',
}],
},
}],
},
},
isLoading: false,
}),
}))
vi.mock('@/service/use-snippet-workflows', () => ({
useSnippetPublishedWorkflow: () => ({
data: {
graph: {
nodes: [{
id: 'start',
data: {
type: 'start',
variables: [{
variable: 'query',
type: 'text-input',
}],
},
}],
},
},
isLoading: false,
}),
}))
const renderWithQueryClient = (ui: ReactNode) => {
const queryClient = new QueryClient({
defaultOptions: {
queries: {
retry: false,
},
mutations: {
retry: false,
},
},
})
return render(ui, {
wrapper: ({ children }: { children: ReactNode }) => (
<QueryClientProvider client={queryClient}>
{children}
</QueryClientProvider>
),
})
}
describe('Evaluation', () => {
beforeEach(() => {
useEvaluationStore.setState({ resources: {} })
@@ -72,12 +139,24 @@ describe('Evaluation', () => {
})
},
})
mockUseSaveEvaluationConfigMutation.mockReturnValue({
isPending: false,
mutate: vi.fn(),
})
mockUseStartEvaluationRunMutation.mockReturnValue({
isPending: false,
mutate: vi.fn(),
})
})
it('should search, select metric nodes, and create a batch history record', async () => {
vi.useFakeTimers()
it('should search, select metric nodes, and save evaluation config', () => {
const saveConfig = vi.fn()
mockUseSaveEvaluationConfigMutation.mockReturnValue({
isPending: false,
mutate: saveConfig,
})
render(<Evaluation resourceType="apps" resourceId="app-1" />)
renderWithQueryClient(<Evaluation resourceType="apps" resourceId="app-1" />)
expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('openai:gpt-4o-mini')
@@ -104,17 +183,39 @@ describe('Evaluation', () => {
fireEvent.click(screen.getByTestId('evaluation-metric-node-answer-correctness-node-answer'))
expect(screen.getAllByText('Answer Correctness').length).toBeGreaterThan(0)
fireEvent.click(screen.getByRole('button', { name: 'evaluation.batch.run' }))
expect(screen.getByText('evaluation.batch.status.running')).toBeInTheDocument()
fireEvent.click(screen.getByRole('button', { name: 'common.operation.save' }))
await act(async () => {
vi.advanceTimersByTime(1300)
expect(saveConfig).toHaveBeenCalledWith({
params: {
targetType: 'apps',
targetId: 'app-1',
},
body: {
evaluation_model: 'gpt-4o-mini',
evaluation_model_provider: 'openai',
default_metrics: [
{
metric: 'faithfulness',
value_type: 'number',
node_info_list: [
{ node_id: 'node-faithfulness', title: 'Retriever Node', type: 'retriever' },
],
},
{
metric: 'answer-correctness',
value_type: 'number',
node_info_list: [
{ node_id: 'node-answer', title: 'Answer Node', type: 'llm' },
],
},
],
customized_metrics: null,
judgment_config: null,
},
}, {
onSuccess: expect.any(Function),
onError: expect.any(Function),
})
expect(screen.getByText('evaluation.batch.status.success')).toBeInTheDocument()
expect(screen.getByText('Workflow evaluation batch')).toBeInTheDocument()
vi.useRealTimers()
})
it('should hide the value row for empty operators', () => {
@@ -138,7 +239,7 @@ describe('Evaluation', () => {
let rerender: ReturnType<typeof render>['rerender']
act(() => {
({ rerender } = render(<Evaluation resourceType={resourceType} resourceId={resourceId} />))
({ rerender } = renderWithQueryClient(<Evaluation resourceType={resourceType} resourceId={resourceId} />))
})
expect(screen.getByPlaceholderText('evaluation.conditions.valuePlaceholder')).toBeInTheDocument()
@@ -212,7 +313,7 @@ describe('Evaluation', () => {
},
})
render(<Evaluation resourceType="apps" resourceId="app-3" />)
renderWithQueryClient(<Evaluation resourceType="apps" resourceId="app-3" />)
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
@@ -227,7 +328,7 @@ describe('Evaluation', () => {
isLoading: false,
})
render(<Evaluation resourceType="apps" resourceId="app-4" />)
renderWithQueryClient(<Evaluation resourceType="apps" resourceId="app-4" />)
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
@@ -256,7 +357,7 @@ describe('Evaluation', () => {
},
})
render(<Evaluation resourceType="apps" resourceId="app-5" />)
renderWithQueryClient(<Evaluation resourceType="apps" resourceId="app-5" />)
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
@@ -270,7 +371,7 @@ describe('Evaluation', () => {
})
it('should render the pipeline-specific layout without auto-selecting a judge model', () => {
render(<Evaluation resourceType="datasets" resourceId="dataset-1" />)
renderWithQueryClient(<Evaluation resourceType="datasets" resourceId="dataset-1" />)
expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('empty')
expect(screen.getByText('evaluation.history.title')).toBeInTheDocument()
@@ -294,14 +395,14 @@ describe('Evaluation', () => {
},
})
render(<Evaluation resourceType="datasets" resourceId="dataset-2" />)
renderWithQueryClient(<Evaluation resourceType="datasets" resourceId="dataset-2" />)
expect(screen.getByText('Context Precision')).toBeInTheDocument()
expect(screen.getByDisplayValue('0.85')).toBeInTheDocument()
})
it('should enable pipeline batch actions after selecting a judge model and metric', () => {
render(<Evaluation resourceType="datasets" resourceId="dataset-2" />)
renderWithQueryClient(<Evaluation resourceType="datasets" resourceId="dataset-2" />)
fireEvent.click(screen.getByRole('button', { name: 'select-model' }))
fireEvent.click(screen.getByRole('button', { name: /Context Precision/i }))

View File

@@ -6,6 +6,7 @@ import {
requiresConditionValue,
useEvaluationStore,
} from '../store'
import { buildEvaluationConfigPayload, buildEvaluationRunRequest } from '../store-utils'
describe('evaluation store', () => {
beforeEach(() => {
@@ -271,4 +272,76 @@ describe('evaluation store', () => {
expect(hydratedState.uploadedFileName).toBe('batch.csv')
expect(hydratedState.batchRecords).toHaveLength(1)
})
it('should build an evaluation config save payload from resource state', () => {
const resourceType = 'apps'
const resourceId = 'app-save-config'
const store = useEvaluationStore.getState()
store.ensureResource(resourceType, resourceId)
store.setJudgeModel(resourceType, resourceId, 'openai::gpt-4o-mini')
store.addBuiltinMetric(resourceType, resourceId, 'faithfulness', [
{ node_id: 'node-faithfulness', title: 'Retriever Node', type: 'retriever' },
])
store.addCustomMetric(resourceType, resourceId)
const customMetric = useEvaluationStore.getState().resources['apps:app-save-config'].metrics.find(metric => metric.kind === 'custom-workflow')!
store.setCustomMetricWorkflow(resourceType, resourceId, customMetric.id, {
workflowId: 'workflow-precision-review',
workflowAppId: 'evaluation-workflow-app-id',
workflowName: 'Precision Review',
})
store.syncCustomMetricMappings(resourceType, resourceId, customMetric.id, ['query'])
store.syncCustomMetricOutputs(resourceType, resourceId, customMetric.id, [{
id: 'score',
valueType: 'number',
}])
const syncedMetric = useEvaluationStore.getState().resources['apps:app-save-config'].metrics.find(metric => metric.id === customMetric.id)!
store.updateCustomMetricMapping(resourceType, resourceId, customMetric.id, syncedMetric.customConfig!.mappings[0].id, {
outputVariableId: '{{#node-answer.output#}}',
})
store.addCondition(resourceType, resourceId, ['workflow-precision-review', 'score'])
const condition = useEvaluationStore.getState().resources['apps:app-save-config'].judgmentConfig.conditions[0]
store.updateConditionOperator(resourceType, resourceId, condition.id, '≥')
store.updateConditionValue(resourceType, resourceId, condition.id, '0.8')
const resource = useEvaluationStore.getState().resources['apps:app-save-config']
const expectedPayload = {
evaluation_model: 'gpt-4o-mini',
evaluation_model_provider: 'openai',
default_metrics: [{
metric: 'faithfulness',
value_type: 'number',
node_info_list: [
{ node_id: 'node-faithfulness', title: 'Retriever Node', type: 'retriever' },
],
}],
customized_metrics: {
evaluation_workflow_id: 'evaluation-workflow-app-id',
input_fields: {
query: '{{#node-answer.output#}}',
},
output_fields: [{
variable: 'score',
value_type: 'number',
}],
},
judgment_config: {
logical_operator: 'and',
conditions: [{
variable_selector: ['evaluation-workflow-app-id', 'score'],
comparison_operator: '≥',
value: '0.8',
}],
},
}
expect(buildEvaluationConfigPayload(resource)).toEqual(expectedPayload)
expect(buildEvaluationRunRequest(resource, 'file-1')).toEqual({
...expectedPayload,
file_id: 'file-1',
})
})
})

View File

@@ -2,8 +2,12 @@
import type { BatchTestTab, EvaluationResourceProps } from '../../types'
import { useTranslation } from 'react-i18next'
import Button from '@/app/components/base/button'
import { toast } from '@/app/components/base/ui/toast'
import { useSaveEvaluationConfigMutation } from '@/service/use-evaluation'
import { cn } from '@/utils/classnames'
import { isEvaluationRunnable, useEvaluationResource, useEvaluationStore } from '../../store'
import { buildEvaluationConfigPayload } from '../../store-utils'
import { TAB_CLASS_NAME } from '../../utils'
import HistoryTab from './history-tab'
import InputFieldsTab from './input-fields-tab'
@@ -15,20 +19,64 @@ const BatchTestPanel = ({
resourceId,
}: EvaluationResourceProps) => {
const { t } = useTranslation('evaluation')
const { t: tCommon } = useTranslation('common')
const tabLabels: Record<BatchTestTab, string> = {
'input-fields': t('batch.tabs.input-fields'),
'history': t('batch.tabs.history'),
}
const resource = useEvaluationResource(resourceType, resourceId)
const setBatchTab = useEvaluationStore(state => state.setBatchTab)
const saveConfigMutation = useSaveEvaluationConfigMutation()
const isRunnable = isEvaluationRunnable(resource)
const isPanelReady = !!resource.judgeModelId && resource.metrics.length > 0
const handleSave = () => {
if (!isRunnable) {
toast.warning(t('batch.validation'))
return
}
const body = buildEvaluationConfigPayload(resource)
if (!body) {
toast.warning(t('batch.validation'))
return
}
saveConfigMutation.mutate({
params: {
targetType: resourceType,
targetId: resourceId,
},
body,
}, {
onSuccess: () => {
toast.success(tCommon('api.saved'))
},
onError: () => {
toast.error(t('config.saveFailed'))
},
})
}
return (
<div className="flex h-full min-h-0 flex-col bg-background-default">
<div className="px-6 py-4">
<div className="system-xl-semibold text-text-primary">{t('batch.title')}</div>
<div className="mt-1 system-sm-regular text-text-tertiary">{t('batch.description')}</div>
<div className="flex items-start justify-between gap-3">
<div className="min-w-0">
<div className="system-xl-semibold text-text-primary">{t('batch.title')}</div>
<div className="mt-1 system-sm-regular text-text-tertiary">{t('batch.description')}</div>
</div>
<Button
className="shrink-0"
variant="primary"
disabled={!isRunnable}
loading={saveConfigMutation.isPending}
onClick={handleSave}
>
{tCommon('operation.save')}
</Button>
</div>
<div className="mt-4 rounded-xl border border-divider-subtle bg-components-card-bg p-3">
<div className="flex items-start gap-3">
<span aria-hidden="true" className="mt-0.5 i-ri-alert-fill h-4 w-4 shrink-0 text-text-warning" />

View File

@@ -11,6 +11,7 @@ import type {
} from './types'
import type {
EvaluationConfig,
EvaluationConfigData,
EvaluationCustomizedMetric,
EvaluationDefaultMetric,
EvaluationJudgmentCondition,
@@ -432,7 +433,7 @@ const getCustomMetricScopeId = (metric: EvaluationMetric) => {
return metric.customConfig?.workflowAppId ?? metric.customConfig?.workflowId ?? null
}
const buildCustomizedMetricsPayload = (metrics: EvaluationMetric[]): EvaluationRunRequest['customized_metrics'] => {
const buildCustomizedMetricsPayload = (metrics: EvaluationMetric[]): EvaluationConfigData['customized_metrics'] => {
const customMetric = metrics.find(metric => metric.kind === 'custom-workflow')
const customConfig = customMetric?.customConfig
const evaluationWorkflowId = customMetric ? getCustomMetricScopeId(customMetric) : null
@@ -456,7 +457,7 @@ const buildCustomizedMetricsPayload = (metrics: EvaluationMetric[]): EvaluationR
}
}
const buildJudgmentConfigPayload = (resource: EvaluationResourceState): EvaluationRunRequest['judgment_config'] => {
const buildJudgmentConfigPayload = (resource: EvaluationResourceState): EvaluationConfigData['judgment_config'] => {
const conditions = resource.judgmentConfig.conditions
.filter(condition => !!condition.variableSelector)
.map((condition) => {
@@ -484,17 +485,15 @@ const buildJudgmentConfigPayload = (resource: EvaluationResourceState): Evaluati
}
}
export const buildEvaluationRunRequest = (
export const buildEvaluationConfigPayload = (
resource: EvaluationResourceState,
fileId: string,
): EvaluationRunRequest | null => {
): EvaluationConfigData | null => {
const selectedModel = decodeModelSelection(resource.judgeModelId)
if (!selectedModel)
return null
return {
file_id: fileId,
evaluation_model: selectedModel.model,
evaluation_model_provider: selectedModel.provider,
default_metrics: resource.metrics
@@ -509,6 +508,21 @@ export const buildEvaluationRunRequest = (
}
}
export const buildEvaluationRunRequest = (
resource: EvaluationResourceState,
fileId: string,
): EvaluationRunRequest | null => {
const configPayload = buildEvaluationConfigPayload(resource)
if (!configPayload)
return null
return {
...configPayload,
file_id: fileId,
}
}
const getResourceState = (
resources: EvaluationStoreResources,
resourceType: EvaluationResourceType,

View File

@@ -61,6 +61,7 @@
"conditions.valueTypes.boolean": "Boolean",
"conditions.valueTypes.number": "Number",
"conditions.valueTypes.string": "String",
"config.saveFailed": "Failed to save evaluation configuration.",
"description": "Configure automated testing to grade your application's performance.",
"history.actions.downloadResultFile": "Download result",
"history.actions.downloadTestFile": "Download test file",

View File

@@ -61,6 +61,7 @@
"conditions.valueTypes.boolean": "布尔",
"conditions.valueTypes.number": "数值",
"conditions.valueTypes.string": "文本",
"config.saveFailed": "保存评测配置失败。",
"description": "配置自动化测试,对应用表现进行评分。",
"history.actions.downloadResultFile": "下载结果文件",
"history.actions.downloadTestFile": "下载测试文件",

View File

@@ -63,6 +63,18 @@ export const useEvaluationNodeInfoMutation = () => {
return useMutation(consoleQuery.evaluation.nodeInfo.mutationOptions())
}
export const useSaveEvaluationConfigMutation = () => {
const queryClient = useQueryClient()
return useMutation(consoleQuery.evaluation.saveConfig.mutationOptions({
onSuccess: () => {
queryClient.invalidateQueries({
queryKey: consoleQuery.evaluation.config.key(),
})
},
}))
}
export const useStartEvaluationRunMutation = () => {
const queryClient = useQueryClient()