mirror of
https://github.com/langgenius/dify.git
synced 2026-04-12 00:00:14 -04:00
feat(web): save configuration
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
import type { ReactNode } from 'react'
|
||||
import { QueryClient, QueryClientProvider } from '@tanstack/react-query'
|
||||
import { act, fireEvent, render, screen } from '@testing-library/react'
|
||||
import Evaluation from '..'
|
||||
import ConditionsSection from '../components/conditions-section'
|
||||
@@ -6,6 +8,8 @@ import { useEvaluationStore } from '../store'
|
||||
const mockUseAvailableEvaluationMetrics = vi.hoisted(() => vi.fn())
|
||||
const mockUseEvaluationConfig = vi.hoisted(() => vi.fn())
|
||||
const mockUseEvaluationNodeInfoMutation = vi.hoisted(() => vi.fn())
|
||||
const mockUseSaveEvaluationConfigMutation = vi.hoisted(() => vi.fn())
|
||||
const mockUseStartEvaluationRunMutation = vi.hoisted(() => vi.fn())
|
||||
|
||||
vi.mock('@/app/components/header/account-setting/model-provider-page/hooks', () => ({
|
||||
useModelList: () => ({
|
||||
@@ -42,8 +46,71 @@ vi.mock('@/service/use-evaluation', () => ({
|
||||
useEvaluationConfig: (...args: unknown[]) => mockUseEvaluationConfig(...args),
|
||||
useAvailableEvaluationMetrics: (...args: unknown[]) => mockUseAvailableEvaluationMetrics(...args),
|
||||
useEvaluationNodeInfoMutation: (...args: unknown[]) => mockUseEvaluationNodeInfoMutation(...args),
|
||||
useSaveEvaluationConfigMutation: (...args: unknown[]) => mockUseSaveEvaluationConfigMutation(...args),
|
||||
useStartEvaluationRunMutation: (...args: unknown[]) => mockUseStartEvaluationRunMutation(...args),
|
||||
}))
|
||||
|
||||
vi.mock('@/service/use-workflow', () => ({
|
||||
useAppWorkflow: () => ({
|
||||
data: {
|
||||
graph: {
|
||||
nodes: [{
|
||||
id: 'start',
|
||||
data: {
|
||||
type: 'start',
|
||||
variables: [{
|
||||
variable: 'query',
|
||||
type: 'text-input',
|
||||
}],
|
||||
},
|
||||
}],
|
||||
},
|
||||
},
|
||||
isLoading: false,
|
||||
}),
|
||||
}))
|
||||
|
||||
vi.mock('@/service/use-snippet-workflows', () => ({
|
||||
useSnippetPublishedWorkflow: () => ({
|
||||
data: {
|
||||
graph: {
|
||||
nodes: [{
|
||||
id: 'start',
|
||||
data: {
|
||||
type: 'start',
|
||||
variables: [{
|
||||
variable: 'query',
|
||||
type: 'text-input',
|
||||
}],
|
||||
},
|
||||
}],
|
||||
},
|
||||
},
|
||||
isLoading: false,
|
||||
}),
|
||||
}))
|
||||
|
||||
const renderWithQueryClient = (ui: ReactNode) => {
|
||||
const queryClient = new QueryClient({
|
||||
defaultOptions: {
|
||||
queries: {
|
||||
retry: false,
|
||||
},
|
||||
mutations: {
|
||||
retry: false,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
return render(ui, {
|
||||
wrapper: ({ children }: { children: ReactNode }) => (
|
||||
<QueryClientProvider client={queryClient}>
|
||||
{children}
|
||||
</QueryClientProvider>
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
describe('Evaluation', () => {
|
||||
beforeEach(() => {
|
||||
useEvaluationStore.setState({ resources: {} })
|
||||
@@ -72,12 +139,24 @@ describe('Evaluation', () => {
|
||||
})
|
||||
},
|
||||
})
|
||||
mockUseSaveEvaluationConfigMutation.mockReturnValue({
|
||||
isPending: false,
|
||||
mutate: vi.fn(),
|
||||
})
|
||||
mockUseStartEvaluationRunMutation.mockReturnValue({
|
||||
isPending: false,
|
||||
mutate: vi.fn(),
|
||||
})
|
||||
})
|
||||
|
||||
it('should search, select metric nodes, and create a batch history record', async () => {
|
||||
vi.useFakeTimers()
|
||||
it('should search, select metric nodes, and save evaluation config', () => {
|
||||
const saveConfig = vi.fn()
|
||||
mockUseSaveEvaluationConfigMutation.mockReturnValue({
|
||||
isPending: false,
|
||||
mutate: saveConfig,
|
||||
})
|
||||
|
||||
render(<Evaluation resourceType="apps" resourceId="app-1" />)
|
||||
renderWithQueryClient(<Evaluation resourceType="apps" resourceId="app-1" />)
|
||||
|
||||
expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('openai:gpt-4o-mini')
|
||||
|
||||
@@ -104,17 +183,39 @@ describe('Evaluation', () => {
|
||||
fireEvent.click(screen.getByTestId('evaluation-metric-node-answer-correctness-node-answer'))
|
||||
expect(screen.getAllByText('Answer Correctness').length).toBeGreaterThan(0)
|
||||
|
||||
fireEvent.click(screen.getByRole('button', { name: 'evaluation.batch.run' }))
|
||||
expect(screen.getByText('evaluation.batch.status.running')).toBeInTheDocument()
|
||||
fireEvent.click(screen.getByRole('button', { name: 'common.operation.save' }))
|
||||
|
||||
await act(async () => {
|
||||
vi.advanceTimersByTime(1300)
|
||||
expect(saveConfig).toHaveBeenCalledWith({
|
||||
params: {
|
||||
targetType: 'apps',
|
||||
targetId: 'app-1',
|
||||
},
|
||||
body: {
|
||||
evaluation_model: 'gpt-4o-mini',
|
||||
evaluation_model_provider: 'openai',
|
||||
default_metrics: [
|
||||
{
|
||||
metric: 'faithfulness',
|
||||
value_type: 'number',
|
||||
node_info_list: [
|
||||
{ node_id: 'node-faithfulness', title: 'Retriever Node', type: 'retriever' },
|
||||
],
|
||||
},
|
||||
{
|
||||
metric: 'answer-correctness',
|
||||
value_type: 'number',
|
||||
node_info_list: [
|
||||
{ node_id: 'node-answer', title: 'Answer Node', type: 'llm' },
|
||||
],
|
||||
},
|
||||
],
|
||||
customized_metrics: null,
|
||||
judgment_config: null,
|
||||
},
|
||||
}, {
|
||||
onSuccess: expect.any(Function),
|
||||
onError: expect.any(Function),
|
||||
})
|
||||
|
||||
expect(screen.getByText('evaluation.batch.status.success')).toBeInTheDocument()
|
||||
expect(screen.getByText('Workflow evaluation batch')).toBeInTheDocument()
|
||||
|
||||
vi.useRealTimers()
|
||||
})
|
||||
|
||||
it('should hide the value row for empty operators', () => {
|
||||
@@ -138,7 +239,7 @@ describe('Evaluation', () => {
|
||||
|
||||
let rerender: ReturnType<typeof render>['rerender']
|
||||
act(() => {
|
||||
({ rerender } = render(<Evaluation resourceType={resourceType} resourceId={resourceId} />))
|
||||
({ rerender } = renderWithQueryClient(<Evaluation resourceType={resourceType} resourceId={resourceId} />))
|
||||
})
|
||||
|
||||
expect(screen.getByPlaceholderText('evaluation.conditions.valuePlaceholder')).toBeInTheDocument()
|
||||
@@ -212,7 +313,7 @@ describe('Evaluation', () => {
|
||||
},
|
||||
})
|
||||
|
||||
render(<Evaluation resourceType="apps" resourceId="app-3" />)
|
||||
renderWithQueryClient(<Evaluation resourceType="apps" resourceId="app-3" />)
|
||||
|
||||
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
|
||||
|
||||
@@ -227,7 +328,7 @@ describe('Evaluation', () => {
|
||||
isLoading: false,
|
||||
})
|
||||
|
||||
render(<Evaluation resourceType="apps" resourceId="app-4" />)
|
||||
renderWithQueryClient(<Evaluation resourceType="apps" resourceId="app-4" />)
|
||||
|
||||
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
|
||||
|
||||
@@ -256,7 +357,7 @@ describe('Evaluation', () => {
|
||||
},
|
||||
})
|
||||
|
||||
render(<Evaluation resourceType="apps" resourceId="app-5" />)
|
||||
renderWithQueryClient(<Evaluation resourceType="apps" resourceId="app-5" />)
|
||||
|
||||
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
|
||||
|
||||
@@ -270,7 +371,7 @@ describe('Evaluation', () => {
|
||||
})
|
||||
|
||||
it('should render the pipeline-specific layout without auto-selecting a judge model', () => {
|
||||
render(<Evaluation resourceType="datasets" resourceId="dataset-1" />)
|
||||
renderWithQueryClient(<Evaluation resourceType="datasets" resourceId="dataset-1" />)
|
||||
|
||||
expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('empty')
|
||||
expect(screen.getByText('evaluation.history.title')).toBeInTheDocument()
|
||||
@@ -294,14 +395,14 @@ describe('Evaluation', () => {
|
||||
},
|
||||
})
|
||||
|
||||
render(<Evaluation resourceType="datasets" resourceId="dataset-2" />)
|
||||
renderWithQueryClient(<Evaluation resourceType="datasets" resourceId="dataset-2" />)
|
||||
|
||||
expect(screen.getByText('Context Precision')).toBeInTheDocument()
|
||||
expect(screen.getByDisplayValue('0.85')).toBeInTheDocument()
|
||||
})
|
||||
|
||||
it('should enable pipeline batch actions after selecting a judge model and metric', () => {
|
||||
render(<Evaluation resourceType="datasets" resourceId="dataset-2" />)
|
||||
renderWithQueryClient(<Evaluation resourceType="datasets" resourceId="dataset-2" />)
|
||||
|
||||
fireEvent.click(screen.getByRole('button', { name: 'select-model' }))
|
||||
fireEvent.click(screen.getByRole('button', { name: /Context Precision/i }))
|
||||
|
||||
@@ -6,6 +6,7 @@ import {
|
||||
requiresConditionValue,
|
||||
useEvaluationStore,
|
||||
} from '../store'
|
||||
import { buildEvaluationConfigPayload, buildEvaluationRunRequest } from '../store-utils'
|
||||
|
||||
describe('evaluation store', () => {
|
||||
beforeEach(() => {
|
||||
@@ -271,4 +272,76 @@ describe('evaluation store', () => {
|
||||
expect(hydratedState.uploadedFileName).toBe('batch.csv')
|
||||
expect(hydratedState.batchRecords).toHaveLength(1)
|
||||
})
|
||||
|
||||
it('should build an evaluation config save payload from resource state', () => {
|
||||
const resourceType = 'apps'
|
||||
const resourceId = 'app-save-config'
|
||||
const store = useEvaluationStore.getState()
|
||||
|
||||
store.ensureResource(resourceType, resourceId)
|
||||
store.setJudgeModel(resourceType, resourceId, 'openai::gpt-4o-mini')
|
||||
store.addBuiltinMetric(resourceType, resourceId, 'faithfulness', [
|
||||
{ node_id: 'node-faithfulness', title: 'Retriever Node', type: 'retriever' },
|
||||
])
|
||||
store.addCustomMetric(resourceType, resourceId)
|
||||
|
||||
const customMetric = useEvaluationStore.getState().resources['apps:app-save-config'].metrics.find(metric => metric.kind === 'custom-workflow')!
|
||||
store.setCustomMetricWorkflow(resourceType, resourceId, customMetric.id, {
|
||||
workflowId: 'workflow-precision-review',
|
||||
workflowAppId: 'evaluation-workflow-app-id',
|
||||
workflowName: 'Precision Review',
|
||||
})
|
||||
store.syncCustomMetricMappings(resourceType, resourceId, customMetric.id, ['query'])
|
||||
store.syncCustomMetricOutputs(resourceType, resourceId, customMetric.id, [{
|
||||
id: 'score',
|
||||
valueType: 'number',
|
||||
}])
|
||||
|
||||
const syncedMetric = useEvaluationStore.getState().resources['apps:app-save-config'].metrics.find(metric => metric.id === customMetric.id)!
|
||||
store.updateCustomMetricMapping(resourceType, resourceId, customMetric.id, syncedMetric.customConfig!.mappings[0].id, {
|
||||
outputVariableId: '{{#node-answer.output#}}',
|
||||
})
|
||||
store.addCondition(resourceType, resourceId, ['workflow-precision-review', 'score'])
|
||||
|
||||
const condition = useEvaluationStore.getState().resources['apps:app-save-config'].judgmentConfig.conditions[0]
|
||||
store.updateConditionOperator(resourceType, resourceId, condition.id, '≥')
|
||||
store.updateConditionValue(resourceType, resourceId, condition.id, '0.8')
|
||||
|
||||
const resource = useEvaluationStore.getState().resources['apps:app-save-config']
|
||||
const expectedPayload = {
|
||||
evaluation_model: 'gpt-4o-mini',
|
||||
evaluation_model_provider: 'openai',
|
||||
default_metrics: [{
|
||||
metric: 'faithfulness',
|
||||
value_type: 'number',
|
||||
node_info_list: [
|
||||
{ node_id: 'node-faithfulness', title: 'Retriever Node', type: 'retriever' },
|
||||
],
|
||||
}],
|
||||
customized_metrics: {
|
||||
evaluation_workflow_id: 'evaluation-workflow-app-id',
|
||||
input_fields: {
|
||||
query: '{{#node-answer.output#}}',
|
||||
},
|
||||
output_fields: [{
|
||||
variable: 'score',
|
||||
value_type: 'number',
|
||||
}],
|
||||
},
|
||||
judgment_config: {
|
||||
logical_operator: 'and',
|
||||
conditions: [{
|
||||
variable_selector: ['evaluation-workflow-app-id', 'score'],
|
||||
comparison_operator: '≥',
|
||||
value: '0.8',
|
||||
}],
|
||||
},
|
||||
}
|
||||
|
||||
expect(buildEvaluationConfigPayload(resource)).toEqual(expectedPayload)
|
||||
expect(buildEvaluationRunRequest(resource, 'file-1')).toEqual({
|
||||
...expectedPayload,
|
||||
file_id: 'file-1',
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -2,8 +2,12 @@
|
||||
|
||||
import type { BatchTestTab, EvaluationResourceProps } from '../../types'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import Button from '@/app/components/base/button'
|
||||
import { toast } from '@/app/components/base/ui/toast'
|
||||
import { useSaveEvaluationConfigMutation } from '@/service/use-evaluation'
|
||||
import { cn } from '@/utils/classnames'
|
||||
import { isEvaluationRunnable, useEvaluationResource, useEvaluationStore } from '../../store'
|
||||
import { buildEvaluationConfigPayload } from '../../store-utils'
|
||||
import { TAB_CLASS_NAME } from '../../utils'
|
||||
import HistoryTab from './history-tab'
|
||||
import InputFieldsTab from './input-fields-tab'
|
||||
@@ -15,20 +19,64 @@ const BatchTestPanel = ({
|
||||
resourceId,
|
||||
}: EvaluationResourceProps) => {
|
||||
const { t } = useTranslation('evaluation')
|
||||
const { t: tCommon } = useTranslation('common')
|
||||
const tabLabels: Record<BatchTestTab, string> = {
|
||||
'input-fields': t('batch.tabs.input-fields'),
|
||||
'history': t('batch.tabs.history'),
|
||||
}
|
||||
const resource = useEvaluationResource(resourceType, resourceId)
|
||||
const setBatchTab = useEvaluationStore(state => state.setBatchTab)
|
||||
const saveConfigMutation = useSaveEvaluationConfigMutation()
|
||||
const isRunnable = isEvaluationRunnable(resource)
|
||||
const isPanelReady = !!resource.judgeModelId && resource.metrics.length > 0
|
||||
|
||||
const handleSave = () => {
|
||||
if (!isRunnable) {
|
||||
toast.warning(t('batch.validation'))
|
||||
return
|
||||
}
|
||||
|
||||
const body = buildEvaluationConfigPayload(resource)
|
||||
|
||||
if (!body) {
|
||||
toast.warning(t('batch.validation'))
|
||||
return
|
||||
}
|
||||
|
||||
saveConfigMutation.mutate({
|
||||
params: {
|
||||
targetType: resourceType,
|
||||
targetId: resourceId,
|
||||
},
|
||||
body,
|
||||
}, {
|
||||
onSuccess: () => {
|
||||
toast.success(tCommon('api.saved'))
|
||||
},
|
||||
onError: () => {
|
||||
toast.error(t('config.saveFailed'))
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex h-full min-h-0 flex-col bg-background-default">
|
||||
<div className="px-6 py-4">
|
||||
<div className="system-xl-semibold text-text-primary">{t('batch.title')}</div>
|
||||
<div className="mt-1 system-sm-regular text-text-tertiary">{t('batch.description')}</div>
|
||||
<div className="flex items-start justify-between gap-3">
|
||||
<div className="min-w-0">
|
||||
<div className="system-xl-semibold text-text-primary">{t('batch.title')}</div>
|
||||
<div className="mt-1 system-sm-regular text-text-tertiary">{t('batch.description')}</div>
|
||||
</div>
|
||||
<Button
|
||||
className="shrink-0"
|
||||
variant="primary"
|
||||
disabled={!isRunnable}
|
||||
loading={saveConfigMutation.isPending}
|
||||
onClick={handleSave}
|
||||
>
|
||||
{tCommon('operation.save')}
|
||||
</Button>
|
||||
</div>
|
||||
<div className="mt-4 rounded-xl border border-divider-subtle bg-components-card-bg p-3">
|
||||
<div className="flex items-start gap-3">
|
||||
<span aria-hidden="true" className="mt-0.5 i-ri-alert-fill h-4 w-4 shrink-0 text-text-warning" />
|
||||
|
||||
@@ -11,6 +11,7 @@ import type {
|
||||
} from './types'
|
||||
import type {
|
||||
EvaluationConfig,
|
||||
EvaluationConfigData,
|
||||
EvaluationCustomizedMetric,
|
||||
EvaluationDefaultMetric,
|
||||
EvaluationJudgmentCondition,
|
||||
@@ -432,7 +433,7 @@ const getCustomMetricScopeId = (metric: EvaluationMetric) => {
|
||||
return metric.customConfig?.workflowAppId ?? metric.customConfig?.workflowId ?? null
|
||||
}
|
||||
|
||||
const buildCustomizedMetricsPayload = (metrics: EvaluationMetric[]): EvaluationRunRequest['customized_metrics'] => {
|
||||
const buildCustomizedMetricsPayload = (metrics: EvaluationMetric[]): EvaluationConfigData['customized_metrics'] => {
|
||||
const customMetric = metrics.find(metric => metric.kind === 'custom-workflow')
|
||||
const customConfig = customMetric?.customConfig
|
||||
const evaluationWorkflowId = customMetric ? getCustomMetricScopeId(customMetric) : null
|
||||
@@ -456,7 +457,7 @@ const buildCustomizedMetricsPayload = (metrics: EvaluationMetric[]): EvaluationR
|
||||
}
|
||||
}
|
||||
|
||||
const buildJudgmentConfigPayload = (resource: EvaluationResourceState): EvaluationRunRequest['judgment_config'] => {
|
||||
const buildJudgmentConfigPayload = (resource: EvaluationResourceState): EvaluationConfigData['judgment_config'] => {
|
||||
const conditions = resource.judgmentConfig.conditions
|
||||
.filter(condition => !!condition.variableSelector)
|
||||
.map((condition) => {
|
||||
@@ -484,17 +485,15 @@ const buildJudgmentConfigPayload = (resource: EvaluationResourceState): Evaluati
|
||||
}
|
||||
}
|
||||
|
||||
export const buildEvaluationRunRequest = (
|
||||
export const buildEvaluationConfigPayload = (
|
||||
resource: EvaluationResourceState,
|
||||
fileId: string,
|
||||
): EvaluationRunRequest | null => {
|
||||
): EvaluationConfigData | null => {
|
||||
const selectedModel = decodeModelSelection(resource.judgeModelId)
|
||||
|
||||
if (!selectedModel)
|
||||
return null
|
||||
|
||||
return {
|
||||
file_id: fileId,
|
||||
evaluation_model: selectedModel.model,
|
||||
evaluation_model_provider: selectedModel.provider,
|
||||
default_metrics: resource.metrics
|
||||
@@ -509,6 +508,21 @@ export const buildEvaluationRunRequest = (
|
||||
}
|
||||
}
|
||||
|
||||
export const buildEvaluationRunRequest = (
|
||||
resource: EvaluationResourceState,
|
||||
fileId: string,
|
||||
): EvaluationRunRequest | null => {
|
||||
const configPayload = buildEvaluationConfigPayload(resource)
|
||||
|
||||
if (!configPayload)
|
||||
return null
|
||||
|
||||
return {
|
||||
...configPayload,
|
||||
file_id: fileId,
|
||||
}
|
||||
}
|
||||
|
||||
const getResourceState = (
|
||||
resources: EvaluationStoreResources,
|
||||
resourceType: EvaluationResourceType,
|
||||
|
||||
@@ -61,6 +61,7 @@
|
||||
"conditions.valueTypes.boolean": "Boolean",
|
||||
"conditions.valueTypes.number": "Number",
|
||||
"conditions.valueTypes.string": "String",
|
||||
"config.saveFailed": "Failed to save evaluation configuration.",
|
||||
"description": "Configure automated testing to grade your application's performance.",
|
||||
"history.actions.downloadResultFile": "Download result",
|
||||
"history.actions.downloadTestFile": "Download test file",
|
||||
|
||||
@@ -61,6 +61,7 @@
|
||||
"conditions.valueTypes.boolean": "布尔",
|
||||
"conditions.valueTypes.number": "数值",
|
||||
"conditions.valueTypes.string": "文本",
|
||||
"config.saveFailed": "保存评测配置失败。",
|
||||
"description": "配置自动化测试,对应用表现进行评分。",
|
||||
"history.actions.downloadResultFile": "下载结果文件",
|
||||
"history.actions.downloadTestFile": "下载测试文件",
|
||||
|
||||
@@ -63,6 +63,18 @@ export const useEvaluationNodeInfoMutation = () => {
|
||||
return useMutation(consoleQuery.evaluation.nodeInfo.mutationOptions())
|
||||
}
|
||||
|
||||
export const useSaveEvaluationConfigMutation = () => {
|
||||
const queryClient = useQueryClient()
|
||||
|
||||
return useMutation(consoleQuery.evaluation.saveConfig.mutationOptions({
|
||||
onSuccess: () => {
|
||||
queryClient.invalidateQueries({
|
||||
queryKey: consoleQuery.evaluation.config.key(),
|
||||
})
|
||||
},
|
||||
}))
|
||||
}
|
||||
|
||||
export const useStartEvaluationRunMutation = () => {
|
||||
const queryClient = useQueryClient()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user