diff --git a/web/app/components/evaluation/__tests__/index.spec.tsx b/web/app/components/evaluation/__tests__/index.spec.tsx index fca39a5e5d..a4c0232d5e 100644 --- a/web/app/components/evaluation/__tests__/index.spec.tsx +++ b/web/app/components/evaluation/__tests__/index.spec.tsx @@ -1,15 +1,25 @@ import type { ReactNode } from 'react' import { QueryClient, QueryClientProvider } from '@tanstack/react-query' -import { act, fireEvent, render, screen } from '@testing-library/react' +import { act, fireEvent, render, screen, waitFor } from '@testing-library/react' import Evaluation from '..' import ConditionsSection from '../components/conditions-section' import { useEvaluationStore } from '../store' +const mockUpload = vi.hoisted(() => vi.fn()) const mockUseAvailableEvaluationMetrics = vi.hoisted(() => vi.fn()) const mockUseEvaluationConfig = vi.hoisted(() => vi.fn()) const mockUseEvaluationNodeInfoMutation = vi.hoisted(() => vi.fn()) const mockUseSaveEvaluationConfigMutation = vi.hoisted(() => vi.fn()) const mockUseStartEvaluationRunMutation = vi.hoisted(() => vi.fn()) +const mockUsePublishedPipelineInfo = vi.hoisted(() => vi.fn()) + +vi.mock('@/context/dataset-detail', () => ({ + useDatasetDetailContextWithSelector: (selector: (state: { dataset?: { pipeline_id?: string } }) => unknown) => selector({ + dataset: { + pipeline_id: 'pipeline-1', + }, + }), +})) vi.mock('@/app/components/header/account-setting/model-provider-page/hooks', () => ({ useModelList: () => ({ @@ -42,6 +52,10 @@ vi.mock('@/app/components/header/account-setting/model-provider-page/model-selec ), })) +vi.mock('@/service/base', () => ({ + upload: (...args: unknown[]) => mockUpload(...args), +})) + vi.mock('@/service/use-evaluation', () => ({ useEvaluationConfig: (...args: unknown[]) => mockUseEvaluationConfig(...args), useAvailableEvaluationMetrics: (...args: unknown[]) => mockUseAvailableEvaluationMetrics(...args), @@ -50,6 +64,10 @@ vi.mock('@/service/use-evaluation', () => ({ useStartEvaluationRunMutation: (...args: unknown[]) => mockUseStartEvaluationRunMutation(...args), })) +vi.mock('@/service/use-pipeline', () => ({ + usePublishedPipelineInfo: (...args: unknown[]) => mockUsePublishedPipelineInfo(...args), +})) + vi.mock('@/service/use-workflow', () => ({ useAppWorkflow: () => ({ data: { @@ -147,6 +165,28 @@ describe('Evaluation', () => { isPending: false, mutate: vi.fn(), }) + mockUsePublishedPipelineInfo.mockReturnValue({ + data: { + rag_pipeline_variables: [{ + belong_to_node_id: 'shared', + type: 'text-input', + label: 'Question', + variable: 'question', + required: true, + }, { + belong_to_node_id: 'shared', + type: 'number', + label: 'Top K', + variable: 'top_k', + required: false, + }], + }, + isLoading: false, + }) + mockUpload.mockResolvedValue({ + id: 'uploaded-file-id', + name: 'evaluation.csv', + }) }) it('should search, select metric nodes, and save evaluation config', () => { @@ -411,4 +451,67 @@ describe('Evaluation', () => { expect(screen.getByRole('button', { name: 'evaluation.batch.downloadTemplate' })).toBeEnabled() expect(screen.getByRole('button', { name: 'evaluation.pipeline.uploadAndRun' })).toBeEnabled() }) + + it('should upload and start a pipeline evaluation run', async () => { + const startRun = vi.fn() + mockUseStartEvaluationRunMutation.mockReturnValue({ + isPending: false, + mutate: startRun, + }) + mockUpload.mockResolvedValue({ + id: 'file-1', + name: 'pipeline-evaluation.csv', + }) + + renderWithQueryClient() + + fireEvent.click(screen.getByRole('button', { name: 'select-model' })) + fireEvent.click(screen.getByRole('button', { name: /Context Precision/i })) + fireEvent.click(screen.getByRole('button', { name: 'evaluation.pipeline.uploadAndRun' })) + + expect(screen.getAllByText('question').length).toBeGreaterThan(0) + expect(screen.getAllByText('top_k').length).toBeGreaterThan(0) + + const fileInput = document.querySelector('input[type="file"][accept=".csv,.xlsx"]') + expect(fileInput).toBeInTheDocument() + + fireEvent.change(fileInput!, { + target: { + files: [new File(['case_id,input,expected'], 'pipeline-evaluation.csv', { type: 'text/csv' })], + }, + }) + + await waitFor(() => { + expect(mockUpload).toHaveBeenCalledWith({ + xhr: expect.any(XMLHttpRequest), + data: expect.any(FormData), + }) + }) + + fireEvent.click(screen.getByRole('button', { name: 'evaluation.batch.run' })) + + await waitFor(() => { + expect(startRun).toHaveBeenCalledWith({ + params: { + targetType: 'datasets', + targetId: 'dataset-run', + }, + body: { + evaluation_model: 'gpt-4o-mini', + evaluation_model_provider: 'openai', + default_metrics: [{ + metric: 'context-precision', + value_type: 'number', + node_info_list: [], + }], + customized_metrics: null, + judgment_config: null, + file_id: 'file-1', + }, + }, { + onSuccess: expect.any(Function), + onError: expect.any(Function), + }) + }) + }) }) diff --git a/web/app/components/evaluation/components/batch-test-panel/input-fields/input-fields-utils.ts b/web/app/components/evaluation/components/batch-test-panel/input-fields/input-fields-utils.ts index 5a71b81d06..2a12548a21 100644 --- a/web/app/components/evaluation/components/batch-test-panel/input-fields/input-fields-utils.ts +++ b/web/app/components/evaluation/components/batch-test-panel/input-fields/input-fields-utils.ts @@ -1,6 +1,8 @@ import type { StartNodeType } from '@/app/components/workflow/nodes/start/types' import type { InputVar, Node } from '@/app/components/workflow/types' +import type { RAGPipelineVariables } from '@/models/pipeline' import { inputVarTypeToVarType } from '@/app/components/workflow/nodes/_base/components/variable/utils' +import { inputVarTypeToVarType as pipelineInputVarTypeToVarType } from '@/app/components/workflow/nodes/data-source/utils' import { BlockEnum, InputVarType } from '@/app/components/workflow/types' export type InputField = { @@ -27,6 +29,18 @@ export const getStartNodeInputFields = (nodes?: Node[]): InputField[] => { })) } +export const getRagPipelineInputFields = (variables?: RAGPipelineVariables): InputField[] => { + if (!Array.isArray(variables)) + return [] + + return variables + .filter(variable => typeof variable.variable === 'string' && !!variable.variable) + .map(variable => ({ + name: variable.variable, + type: pipelineInputVarTypeToVarType(variable.type), + })) +} + const escapeCsvCell = (value: string) => { if (!/[",\n\r]/.test(value)) return value diff --git a/web/app/components/evaluation/components/batch-test-panel/input-fields/upload-run-popover.tsx b/web/app/components/evaluation/components/batch-test-panel/input-fields/upload-run-popover.tsx index 01c9d84770..3a54d9af0a 100644 --- a/web/app/components/evaluation/components/batch-test-panel/input-fields/upload-run-popover.tsx +++ b/web/app/components/evaluation/components/batch-test-panel/input-fields/upload-run-popover.tsx @@ -15,6 +15,7 @@ type UploadRunPopoverProps = { open: boolean onOpenChange: (open: boolean) => void triggerDisabled: boolean + triggerLabel?: string inputFields: InputField[] currentFileName: string | null | undefined currentFileExtension: string @@ -32,6 +33,7 @@ const UploadRunPopover = ({ open, onOpenChange, triggerDisabled, + triggerLabel, inputFields, currentFileName, currentFileExtension, @@ -65,7 +67,7 @@ const UploadRunPopover = ({ - {t('batch.uploadAndRun')} + {triggerLabel ?? t('batch.uploadAndRun')} )} /> diff --git a/web/app/components/evaluation/components/batch-test-panel/input-fields/use-published-input-fields.ts b/web/app/components/evaluation/components/batch-test-panel/input-fields/use-published-input-fields.ts index a319603026..3c31f1577c 100644 --- a/web/app/components/evaluation/components/batch-test-panel/input-fields/use-published-input-fields.ts +++ b/web/app/components/evaluation/components/batch-test-panel/input-fields/use-published-input-fields.ts @@ -1,8 +1,10 @@ import type { EvaluationResourceType } from '../../../types' import { useMemo } from 'react' +import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail' +import { usePublishedPipelineInfo } from '@/service/use-pipeline' import { useSnippetPublishedWorkflow } from '@/service/use-snippet-workflows' import { useAppWorkflow } from '@/service/use-workflow' -import { getGraphNodes, getStartNodeInputFields } from './input-fields-utils' +import { getGraphNodes, getRagPipelineInputFields, getStartNodeInputFields } from './input-fields-utils' export const usePublishedInputFields = ( resourceType: EvaluationResourceType, @@ -10,6 +12,8 @@ export const usePublishedInputFields = ( ) => { const { data: currentAppWorkflow, isLoading: isAppWorkflowLoading } = useAppWorkflow(resourceType === 'apps' ? resourceId : '') const { data: currentSnippetWorkflow, isLoading: isSnippetWorkflowLoading } = useSnippetPublishedWorkflow(resourceType === 'snippets' ? resourceId : '') + const pipelineId = useDatasetDetailContextWithSelector(state => state.dataset?.pipeline_id) + const { data: currentPipelineWorkflow, isLoading: isPipelineWorkflowLoading } = usePublishedPipelineInfo(resourceType === 'datasets' ? (pipelineId ?? '') : '') const inputFields = useMemo(() => { if (resourceType === 'apps') @@ -18,12 +22,16 @@ export const usePublishedInputFields = ( if (resourceType === 'snippets') return getStartNodeInputFields(getGraphNodes(currentSnippetWorkflow?.graph)) + if (resourceType === 'datasets') + return getRagPipelineInputFields(currentPipelineWorkflow?.rag_pipeline_variables) + return [] - }, [currentAppWorkflow?.graph.nodes, currentSnippetWorkflow?.graph, resourceType]) + }, [currentAppWorkflow?.graph.nodes, currentPipelineWorkflow?.rag_pipeline_variables, currentSnippetWorkflow?.graph, resourceType]) return { inputFields, isInputFieldsLoading: (resourceType === 'apps' && isAppWorkflowLoading) - || (resourceType === 'snippets' && isSnippetWorkflowLoading), + || (resourceType === 'snippets' && isSnippetWorkflowLoading) + || (resourceType === 'datasets' && isPipelineWorkflowLoading), } } diff --git a/web/app/components/evaluation/components/layout/pipeline-evaluation.tsx b/web/app/components/evaluation/components/layout/pipeline-evaluation.tsx index 32254e1e6d..5f4854139b 100644 --- a/web/app/components/evaluation/components/layout/pipeline-evaluation.tsx +++ b/web/app/components/evaluation/components/layout/pipeline-evaluation.tsx @@ -1,14 +1,16 @@ 'use client' import type { EvaluationResourceProps } from '../../types' -import { useEffect, useMemo, useRef } from 'react' +import { useEffect, useMemo } from 'react' import { useTranslation } from 'react-i18next' import Button from '@/app/components/base/button' -import { toast } from '@/app/components/base/ui/toast' import { useDocLink } from '@/context/i18n' import { useAvailableEvaluationMetrics } from '@/service/use-evaluation' import { getEvaluationMockConfig } from '../../mock' import { isEvaluationRunnable, useEvaluationResource, useEvaluationStore } from '../../store' +import UploadRunPopover from '../batch-test-panel/input-fields/upload-run-popover' +import { useInputFieldsActions } from '../batch-test-panel/input-fields/use-input-fields-actions' +import { usePublishedInputFields } from '../batch-test-panel/input-fields/use-published-input-fields' import JudgeModelSelector from '../judge-model-selector' import PipelineHistoryTable from '../pipeline/pipeline-history-table' import PipelineMetricItem from '../pipeline/pipeline-metric-item' @@ -26,11 +28,8 @@ const PipelineEvaluation = ({ const addBuiltinMetric = useEvaluationStore(state => state.addBuiltinMetric) const removeMetric = useEvaluationStore(state => state.removeMetric) const updateMetricThreshold = useEvaluationStore(state => state.updateMetricThreshold) - const setUploadedFileName = useEvaluationStore(state => state.setUploadedFileName) - const runBatchTest = useEvaluationStore(state => state.runBatchTest) const { data: availableMetricsData } = useAvailableEvaluationMetrics() const resource = useEvaluationResource(resourceType, resourceId) - const fileInputRef = useRef(null) const config = getEvaluationMockConfig(resourceType) const builtinMetricMap = useMemo(() => new Map( resource.metrics @@ -45,6 +44,16 @@ const PipelineEvaluation = ({ }, [availableMetricIds, builtinMetricMap, config.builtinMetrics]) const isConfigReady = !!resource.judgeModelId && builtinMetricMap.size > 0 const isRunnable = isEvaluationRunnable(resource) + const { inputFields, isInputFieldsLoading } = usePublishedInputFields(resourceType, resourceId) + const actions = useInputFieldsActions({ + resourceType, + resourceId, + inputFields, + isInputFieldsLoading, + isPanelReady: isConfigReady, + isRunnable, + templateFileName: config.templateFileName, + }) useEffect(() => { ensureResource(resourceType, resourceId) @@ -60,23 +69,6 @@ const PipelineEvaluation = ({ addBuiltinMetric(resourceType, resourceId, metricId) } - const handleDownloadTemplate = () => { - const content = ['case_id,input,expected', '1,Example input,Example output'].join('\n') - const link = document.createElement('a') - link.href = `data:text/csv;charset=utf-8,${encodeURIComponent(content)}` - link.download = config.templateFileName - link.click() - } - - const handleUploadAndRun = () => { - if (!isRunnable) { - toast.warning(t('batch.validation')) - return - } - - fileInputRef.current?.click() - } - return (
@@ -138,37 +130,32 @@ const PipelineEvaluation = ({ - +
+ +
- - { - const file = event.target.files?.[0] - if (!file) - return - - setUploadedFileName(resourceType, resourceId, file.name) - runBatchTest(resourceType, resourceId) - event.target.value = '' - }} - />