mirror of
https://github.com/langgenius/dify.git
synced 2026-04-12 18:00:24 -04:00
feat(web): dataset evaluation layout
This commit is contained in:
@@ -16,9 +16,23 @@ vi.mock('@/app/components/header/account-setting/model-provider-page/hooks', ()
|
||||
}))
|
||||
|
||||
vi.mock('@/app/components/header/account-setting/model-provider-page/model-selector', () => ({
|
||||
default: ({ defaultModel }: { defaultModel?: { provider: string, model: string } }) => (
|
||||
<div data-testid="evaluation-model-selector">
|
||||
{defaultModel ? `${defaultModel.provider}:${defaultModel.model}` : 'empty'}
|
||||
default: ({
|
||||
defaultModel,
|
||||
onSelect,
|
||||
}: {
|
||||
defaultModel?: { provider: string, model: string }
|
||||
onSelect: (model: { provider: string, model: string }) => void
|
||||
}) => (
|
||||
<div>
|
||||
<div data-testid="evaluation-model-selector">
|
||||
{defaultModel ? `${defaultModel.provider}:${defaultModel.model}` : 'empty'}
|
||||
</div>
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => onSelect({ provider: 'openai', model: 'gpt-4o-mini' })}
|
||||
>
|
||||
select-model
|
||||
</button>
|
||||
</div>
|
||||
),
|
||||
}))
|
||||
@@ -208,4 +222,26 @@ describe('Evaluation', () => {
|
||||
expect(screen.getByText('LLM 4')).toBeInTheDocument()
|
||||
expect(screen.getByRole('button', { name: 'evaluation.metrics.showLess' })).toBeInTheDocument()
|
||||
})
|
||||
|
||||
it('should render the pipeline-specific layout without auto-selecting a judge model', () => {
|
||||
render(<Evaluation resourceType="pipeline" resourceId="dataset-1" />)
|
||||
|
||||
expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('empty')
|
||||
expect(screen.getByText('evaluation.history.title')).toBeInTheDocument()
|
||||
expect(screen.getByText('Context Precision')).toBeInTheDocument()
|
||||
expect(screen.getByText('Context Recall')).toBeInTheDocument()
|
||||
expect(screen.getByText('Context Relevance')).toBeInTheDocument()
|
||||
expect(screen.getByText('evaluation.results.empty')).toBeInTheDocument()
|
||||
expect(screen.getByRole('button', { name: 'evaluation.pipeline.uploadAndRun' })).toBeDisabled()
|
||||
})
|
||||
|
||||
it('should enable pipeline batch actions after selecting a judge model and metric', () => {
|
||||
render(<Evaluation resourceType="pipeline" resourceId="dataset-2" />)
|
||||
|
||||
fireEvent.click(screen.getByRole('button', { name: 'select-model' }))
|
||||
fireEvent.click(screen.getByRole('button', { name: /Context Precision/i }))
|
||||
|
||||
expect(screen.getByRole('button', { name: 'evaluation.batch.downloadTemplate' })).toBeEnabled()
|
||||
expect(screen.getByRole('button', { name: 'evaluation.pipeline.uploadAndRun' })).toBeEnabled()
|
||||
})
|
||||
})
|
||||
|
||||
@@ -8,17 +8,22 @@ import ModelSelector from '@/app/components/header/account-setting/model-provide
|
||||
import { useEvaluationResource, useEvaluationStore } from '../store'
|
||||
import { decodeModelSelection, encodeModelSelection } from '../utils'
|
||||
|
||||
type JudgeModelSelectorProps = EvaluationResourceProps & {
|
||||
autoSelectFirst?: boolean
|
||||
}
|
||||
|
||||
const JudgeModelSelector = ({
|
||||
resourceType,
|
||||
resourceId,
|
||||
}: EvaluationResourceProps) => {
|
||||
autoSelectFirst = true,
|
||||
}: JudgeModelSelectorProps) => {
|
||||
const { data: modelList } = useModelList(ModelTypeEnum.textGeneration)
|
||||
const resource = useEvaluationResource(resourceType, resourceId)
|
||||
const setJudgeModel = useEvaluationStore(state => state.setJudgeModel)
|
||||
const selectedModel = decodeModelSelection(resource.judgeModelId)
|
||||
|
||||
useEffect(() => {
|
||||
if (resource.judgeModelId || !modelList.length)
|
||||
if (!autoSelectFirst || resource.judgeModelId || !modelList.length)
|
||||
return
|
||||
|
||||
const firstProvider = modelList[0]
|
||||
@@ -27,7 +32,7 @@ const JudgeModelSelector = ({
|
||||
return
|
||||
|
||||
setJudgeModel(resourceType, resourceId, encodeModelSelection(firstProvider.provider, firstModel.model))
|
||||
}, [modelList, resource.judgeModelId, resourceId, resourceType, setJudgeModel])
|
||||
}, [autoSelectFirst, modelList, resource.judgeModelId, resourceId, resourceType, setJudgeModel])
|
||||
|
||||
return (
|
||||
<ModelSelector
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
'use client'
|
||||
|
||||
import type { EvaluationResourceProps } from '../types'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { useDocLink } from '@/context/i18n'
|
||||
import BatchTestPanel from './batch-test-panel'
|
||||
import ConditionsSection from './conditions-section'
|
||||
import JudgeModelSelector from './judge-model-selector'
|
||||
import MetricSection from './metric-section'
|
||||
import SectionHeader, { InlineSectionHeader } from './section-header'
|
||||
|
||||
const NonPipelineEvaluation = ({
|
||||
resourceType,
|
||||
resourceId,
|
||||
}: EvaluationResourceProps) => {
|
||||
const { t } = useTranslation('evaluation')
|
||||
const { t: tCommon } = useTranslation('common')
|
||||
const docLink = useDocLink()
|
||||
|
||||
return (
|
||||
<div className="flex h-full min-h-0 flex-col bg-background-default xl:flex-row">
|
||||
<div className="min-h-0 flex-1 overflow-y-auto">
|
||||
<div className="flex min-h-full max-w-[748px] flex-col px-6 py-4">
|
||||
<SectionHeader
|
||||
title={t('title')}
|
||||
description={(
|
||||
<>
|
||||
{t('description')}
|
||||
{' '}
|
||||
<a
|
||||
className="text-text-accent"
|
||||
href={docLink()}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
>
|
||||
{tCommon('operation.learnMore')}
|
||||
</a>
|
||||
</>
|
||||
)}
|
||||
descriptionClassName="max-w-[700px]"
|
||||
/>
|
||||
<section className="max-w-[700px] py-4">
|
||||
<InlineSectionHeader title={t('judgeModel.title')} tooltip={t('judgeModel.description')} />
|
||||
<div className="mt-1.5">
|
||||
<JudgeModelSelector resourceType={resourceType} resourceId={resourceId} />
|
||||
</div>
|
||||
</section>
|
||||
<div className="max-w-[700px] border-b border-divider-subtle" />
|
||||
<MetricSection resourceType={resourceType} resourceId={resourceId} />
|
||||
<div className="max-w-[700px] border-b border-divider-subtle" />
|
||||
<ConditionsSection resourceType={resourceType} resourceId={resourceId} />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="h-[420px] shrink-0 border-t border-divider-subtle xl:h-auto xl:w-[450px] xl:border-t-0 xl:border-l">
|
||||
<BatchTestPanel resourceType={resourceType} resourceId={resourceId} />
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default NonPipelineEvaluation
|
||||
346
web/app/components/evaluation/components/pipeline-evaluation.tsx
Normal file
346
web/app/components/evaluation/components/pipeline-evaluation.tsx
Normal file
@@ -0,0 +1,346 @@
|
||||
'use client'
|
||||
|
||||
import type { EvaluationResourceProps, MetricOption } from '../types'
|
||||
import { useEffect, useMemo, useRef, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import Badge from '@/app/components/base/badge'
|
||||
import Button from '@/app/components/base/button'
|
||||
import Checkbox from '@/app/components/base/checkbox'
|
||||
import Input from '@/app/components/base/input'
|
||||
import { toast } from '@/app/components/base/ui/toast'
|
||||
import { Tooltip, TooltipContent, TooltipTrigger } from '@/app/components/base/ui/tooltip'
|
||||
import { useDocLink } from '@/context/i18n'
|
||||
import { cn } from '@/utils/classnames'
|
||||
import { getEvaluationMockConfig } from '../mock'
|
||||
import { isEvaluationRunnable, useEvaluationResource, useEvaluationStore } from '../store'
|
||||
import JudgeModelSelector from './judge-model-selector'
|
||||
import SectionHeader, { InlineSectionHeader } from './section-header'
|
||||
|
||||
type PipelineMetricItemProps = {
|
||||
metric: MetricOption
|
||||
selected: boolean
|
||||
onToggle: () => void
|
||||
disabledCondition: boolean
|
||||
}
|
||||
|
||||
const PipelineMetricItem = ({
|
||||
metric,
|
||||
selected,
|
||||
onToggle,
|
||||
disabledCondition,
|
||||
}: PipelineMetricItemProps) => {
|
||||
return (
|
||||
<div className="flex items-center justify-between gap-3 px-1 py-1">
|
||||
<button
|
||||
type="button"
|
||||
className="flex min-w-0 items-center gap-2 text-left"
|
||||
onClick={onToggle}
|
||||
>
|
||||
<Checkbox checked={selected} />
|
||||
<span className="truncate system-sm-medium text-text-secondary">{metric.label}</span>
|
||||
<Tooltip>
|
||||
<TooltipTrigger
|
||||
render={(
|
||||
<span className="flex h-4 w-4 items-center justify-center text-text-quaternary">
|
||||
<span aria-hidden="true" className="i-ri-question-line h-3.5 w-3.5" />
|
||||
</span>
|
||||
)}
|
||||
/>
|
||||
<TooltipContent>
|
||||
{metric.description}
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</button>
|
||||
|
||||
<button
|
||||
type="button"
|
||||
disabled={disabledCondition}
|
||||
className={cn(
|
||||
'system-xs-medium text-text-tertiary',
|
||||
disabledCondition && 'cursor-not-allowed text-components-button-secondary-accent-text-disabled',
|
||||
)}
|
||||
>
|
||||
+ Condition
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const PipelineHistoryTable = ({
|
||||
resourceType,
|
||||
resourceId,
|
||||
}: EvaluationResourceProps) => {
|
||||
const { t } = useTranslation('evaluation')
|
||||
const resource = useEvaluationResource(resourceType, resourceId)
|
||||
const [query, setQuery] = useState('')
|
||||
const statusLabels = {
|
||||
running: t('batch.status.running'),
|
||||
success: t('batch.status.success'),
|
||||
failed: t('batch.status.failed'),
|
||||
}
|
||||
|
||||
const filteredRecords = useMemo(() => {
|
||||
const keyword = query.trim().toLowerCase()
|
||||
if (!keyword)
|
||||
return resource.batchRecords
|
||||
|
||||
return resource.batchRecords.filter(record =>
|
||||
record.fileName.toLowerCase().includes(keyword)
|
||||
|| record.summary.toLowerCase().includes(keyword),
|
||||
)
|
||||
}, [query, resource.batchRecords])
|
||||
|
||||
return (
|
||||
<div className="flex min-h-0 flex-1 flex-col">
|
||||
<div className="flex items-center justify-between gap-3 px-6 pt-4 pb-2">
|
||||
<div className="system-xl-semibold text-text-primary">{t('history.title')}</div>
|
||||
<div className="w-[160px] shrink-0 sm:w-[200px]">
|
||||
<Input
|
||||
value={query}
|
||||
showLeftIcon
|
||||
placeholder={t('history.searchPlaceholder')}
|
||||
onChange={event => setQuery(event.target.value)}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="min-h-0 flex-1 px-4 pb-4">
|
||||
<div className="flex h-full min-h-0 flex-col overflow-hidden rounded-lg border border-effects-highlight bg-background-default">
|
||||
<div className="grid grid-cols-[minmax(0,1.8fr)_80px_80px_80px_40px] rounded-t-lg bg-background-section px-2 py-1">
|
||||
<div className="flex items-center gap-1 px-2 system-xs-medium-uppercase text-text-tertiary">
|
||||
<span>{t('history.columns.time')}</span>
|
||||
<span aria-hidden="true" className="i-ri-arrow-down-line h-3 w-3" />
|
||||
</div>
|
||||
<div className="px-2 system-xs-medium-uppercase text-text-tertiary">{t('history.columns.creator')}</div>
|
||||
<div className="px-2 system-xs-medium-uppercase text-text-tertiary">{t('history.columns.version')}</div>
|
||||
<div className="px-2 text-center system-xs-medium-uppercase text-text-tertiary">{t('history.columns.status')}</div>
|
||||
<div />
|
||||
</div>
|
||||
|
||||
<div className="min-h-0 flex-1 overflow-y-auto">
|
||||
{filteredRecords.length > 0 && (
|
||||
<div className="divide-y divide-divider-subtle">
|
||||
{filteredRecords.map(record => (
|
||||
<div
|
||||
key={record.id}
|
||||
className="grid grid-cols-[minmax(0,1.8fr)_80px_80px_80px_40px] items-center px-2 py-2"
|
||||
>
|
||||
<div className="truncate px-2 system-sm-regular text-text-secondary">{record.startedAt}</div>
|
||||
<div className="truncate px-2 system-sm-regular text-text-secondary">{t('history.creatorYou')}</div>
|
||||
<div className="truncate px-2 system-sm-regular text-text-secondary">{t('history.latestVersion')}</div>
|
||||
<div className="flex justify-center px-2">
|
||||
<Badge
|
||||
className={cn(
|
||||
record.status === 'failed' && 'badge-warning',
|
||||
record.status === 'success' && 'badge-accent',
|
||||
)}
|
||||
>
|
||||
{record.status === 'running'
|
||||
? (
|
||||
<span className="flex items-center gap-1">
|
||||
<span aria-hidden="true" className="i-ri-loader-4-line h-3 w-3 animate-spin" />
|
||||
{statusLabels.running}
|
||||
</span>
|
||||
)
|
||||
: statusLabels[record.status]}
|
||||
</Badge>
|
||||
</div>
|
||||
<div className="flex justify-center">
|
||||
<button
|
||||
type="button"
|
||||
className="flex h-6 w-6 items-center justify-center rounded-md text-text-quaternary hover:bg-state-base-hover"
|
||||
aria-label={record.summary}
|
||||
>
|
||||
<span aria-hidden="true" className="i-ri-more-2-line h-4 w-4" />
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{filteredRecords.length === 0 && (
|
||||
<div className="flex h-full min-h-[321px] flex-col items-center justify-center gap-2 px-4 text-center">
|
||||
<span aria-hidden="true" className="i-ri-history-line h-5 w-5 text-text-quaternary" />
|
||||
<div className="system-sm-medium text-text-quaternary">{t('history.empty')}</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const PipelineResultsPanel = () => {
|
||||
const { t } = useTranslation('evaluation')
|
||||
|
||||
return (
|
||||
<div className="flex min-h-[360px] flex-1 items-center justify-center xl:min-h-0">
|
||||
<div className="flex flex-col items-center gap-4 px-4 text-center">
|
||||
<span aria-hidden="true" className="i-ri-file-list-3-line h-12 w-12 text-text-quaternary" />
|
||||
<div className="system-md-medium text-text-quaternary">{t('results.empty')}</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const PipelineEvaluation = ({
|
||||
resourceType,
|
||||
resourceId,
|
||||
}: EvaluationResourceProps) => {
|
||||
const { t } = useTranslation('evaluation')
|
||||
const { t: tCommon } = useTranslation('common')
|
||||
const docLink = useDocLink()
|
||||
const ensureResource = useEvaluationStore(state => state.ensureResource)
|
||||
const addBuiltinMetric = useEvaluationStore(state => state.addBuiltinMetric)
|
||||
const removeMetric = useEvaluationStore(state => state.removeMetric)
|
||||
const setUploadedFileName = useEvaluationStore(state => state.setUploadedFileName)
|
||||
const runBatchTest = useEvaluationStore(state => state.runBatchTest)
|
||||
const resource = useEvaluationResource(resourceType, resourceId)
|
||||
const fileInputRef = useRef<HTMLInputElement>(null)
|
||||
const config = getEvaluationMockConfig(resourceType)
|
||||
const builtinMetricMap = useMemo(() => new Map(
|
||||
resource.metrics
|
||||
.filter(metric => metric.kind === 'builtin')
|
||||
.map(metric => [metric.optionId, metric]),
|
||||
), [resource.metrics])
|
||||
const isConfigReady = !!resource.judgeModelId && builtinMetricMap.size > 0
|
||||
const isRunnable = isEvaluationRunnable(resource)
|
||||
|
||||
useEffect(() => {
|
||||
ensureResource(resourceType, resourceId)
|
||||
}, [ensureResource, resourceId, resourceType])
|
||||
|
||||
const handleToggleMetric = (metricId: string) => {
|
||||
const selectedMetric = builtinMetricMap.get(metricId)
|
||||
if (selectedMetric) {
|
||||
removeMetric(resourceType, resourceId, selectedMetric.id)
|
||||
return
|
||||
}
|
||||
|
||||
addBuiltinMetric(resourceType, resourceId, metricId)
|
||||
}
|
||||
|
||||
const handleDownloadTemplate = () => {
|
||||
const content = ['case_id,input,expected', '1,Example input,Example output'].join('\n')
|
||||
const link = document.createElement('a')
|
||||
link.href = `data:text/csv;charset=utf-8,${encodeURIComponent(content)}`
|
||||
link.download = config.templateFileName
|
||||
link.click()
|
||||
}
|
||||
|
||||
const handleUploadAndRun = () => {
|
||||
if (!isRunnable) {
|
||||
toast.warning(t('batch.validation'))
|
||||
return
|
||||
}
|
||||
|
||||
fileInputRef.current?.click()
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex h-full min-h-0 flex-col bg-background-default xl:flex-row">
|
||||
<div className="flex min-h-0 flex-col border-b border-divider-subtle bg-background-default xl:w-[450px] xl:shrink-0 xl:border-r xl:border-b-0">
|
||||
<div className="px-6 pt-4 pb-2">
|
||||
<SectionHeader
|
||||
title={t('title')}
|
||||
description={(
|
||||
<>
|
||||
{t('description')}
|
||||
{' '}
|
||||
<a
|
||||
className="text-text-accent"
|
||||
href={docLink()}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
>
|
||||
{tCommon('operation.learnMore')}
|
||||
</a>
|
||||
</>
|
||||
)}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="px-6 pt-3 pb-4">
|
||||
<div className="space-y-3">
|
||||
<section>
|
||||
<InlineSectionHeader title={t('judgeModel.title')} tooltip={t('judgeModel.description')} />
|
||||
<div className="mt-1">
|
||||
<JudgeModelSelector
|
||||
resourceType={resourceType}
|
||||
resourceId={resourceId}
|
||||
autoSelectFirst={false}
|
||||
/>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<InlineSectionHeader title={t('metrics.title')} tooltip={t('metrics.description')} />
|
||||
<div className="mt-1 space-y-0.5">
|
||||
{config.builtinMetrics.map(metric => (
|
||||
<PipelineMetricItem
|
||||
key={metric.id}
|
||||
metric={metric}
|
||||
selected={builtinMetricMap.has(metric.id)}
|
||||
disabledCondition
|
||||
onToggle={() => handleToggleMetric(metric.id)}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<div className="flex gap-2 pt-2">
|
||||
<Button
|
||||
className="flex-1 justify-center"
|
||||
variant="secondary"
|
||||
disabled={!isConfigReady}
|
||||
onClick={handleDownloadTemplate}
|
||||
>
|
||||
<span aria-hidden="true" className="mr-1 i-ri-file-excel-2-line h-4 w-4" />
|
||||
{t('batch.downloadTemplate')}
|
||||
</Button>
|
||||
<Button
|
||||
className="flex-1 justify-center"
|
||||
variant="primary"
|
||||
disabled={!isConfigReady}
|
||||
onClick={handleUploadAndRun}
|
||||
>
|
||||
{t('pipeline.uploadAndRun')}
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
<input
|
||||
ref={fileInputRef}
|
||||
hidden
|
||||
type="file"
|
||||
accept=".csv,.xlsx"
|
||||
onChange={(event) => {
|
||||
const file = event.target.files?.[0]
|
||||
if (!file)
|
||||
return
|
||||
|
||||
setUploadedFileName(resourceType, resourceId, file.name)
|
||||
runBatchTest(resourceType, resourceId)
|
||||
event.target.value = ''
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="border-t border-divider-subtle" />
|
||||
|
||||
<PipelineHistoryTable
|
||||
resourceType={resourceType}
|
||||
resourceId={resourceId}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="min-h-0 flex-1 bg-background-default">
|
||||
<PipelineResultsPanel />
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default PipelineEvaluation
|
||||
@@ -2,67 +2,34 @@
|
||||
|
||||
import type { EvaluationResourceProps } from './types'
|
||||
import { useEffect } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { useDocLink } from '@/context/i18n'
|
||||
import BatchTestPanel from './components/batch-test-panel'
|
||||
import ConditionsSection from './components/conditions-section'
|
||||
import JudgeModelSelector from './components/judge-model-selector'
|
||||
import MetricSection from './components/metric-section'
|
||||
import SectionHeader, { InlineSectionHeader } from './components/section-header'
|
||||
import NonPipelineEvaluation from './components/non-pipeline-evaluation'
|
||||
import PipelineEvaluation from './components/pipeline-evaluation'
|
||||
import { useEvaluationStore } from './store'
|
||||
|
||||
const Evaluation = ({
|
||||
resourceType,
|
||||
resourceId,
|
||||
}: EvaluationResourceProps) => {
|
||||
const { t } = useTranslation('evaluation')
|
||||
const { t: tCommon } = useTranslation('common')
|
||||
const docLink = useDocLink()
|
||||
const ensureResource = useEvaluationStore(state => state.ensureResource)
|
||||
|
||||
useEffect(() => {
|
||||
ensureResource(resourceType, resourceId)
|
||||
}, [ensureResource, resourceId, resourceType])
|
||||
|
||||
return (
|
||||
<div className="flex h-full min-h-0 flex-col bg-background-default xl:flex-row">
|
||||
<div className="min-h-0 flex-1 overflow-y-auto">
|
||||
<div className="flex min-h-full max-w-[748px] flex-col px-6 py-4">
|
||||
<SectionHeader
|
||||
title={t('title')}
|
||||
description={(
|
||||
<>
|
||||
{t('description')}
|
||||
{' '}
|
||||
<a
|
||||
className="text-text-accent"
|
||||
href={docLink()}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
>
|
||||
{tCommon('operation.learnMore')}
|
||||
</a>
|
||||
</>
|
||||
)}
|
||||
descriptionClassName="max-w-[700px]"
|
||||
/>
|
||||
<section className="max-w-[700px] py-4">
|
||||
<InlineSectionHeader title={t('judgeModel.title')} tooltip={t('judgeModel.description')} />
|
||||
<div className="mt-1.5">
|
||||
<JudgeModelSelector resourceType={resourceType} resourceId={resourceId} />
|
||||
</div>
|
||||
</section>
|
||||
<div className="max-w-[700px] border-b border-divider-subtle" />
|
||||
<MetricSection resourceType={resourceType} resourceId={resourceId} />
|
||||
<div className="max-w-[700px] border-b border-divider-subtle" />
|
||||
<ConditionsSection resourceType={resourceType} resourceId={resourceId} />
|
||||
</div>
|
||||
</div>
|
||||
if (resourceType === 'pipeline') {
|
||||
return (
|
||||
<PipelineEvaluation
|
||||
resourceType={resourceType}
|
||||
resourceId={resourceId}
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
||||
<div className="h-[420px] shrink-0 border-t border-divider-subtle xl:h-auto xl:w-[450px] xl:border-t-0 xl:border-l">
|
||||
<BatchTestPanel resourceType={resourceType} resourceId={resourceId} />
|
||||
</div>
|
||||
</div>
|
||||
return (
|
||||
<NonPipelineEvaluation
|
||||
resourceType={resourceType}
|
||||
resourceId={resourceId}
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -69,6 +69,30 @@ const builtinMetrics: MetricOption[] = [
|
||||
},
|
||||
]
|
||||
|
||||
const pipelineBuiltinMetrics: MetricOption[] = [
|
||||
{
|
||||
id: 'context-precision',
|
||||
label: 'Context Precision',
|
||||
description: 'Measures whether retrieved chunks stay tightly aligned to the request.',
|
||||
group: 'quality',
|
||||
badges: ['Retrieval'],
|
||||
},
|
||||
{
|
||||
id: 'context-recall',
|
||||
label: 'Context Recall',
|
||||
description: 'Checks whether the retrieval result includes the evidence needed to answer.',
|
||||
group: 'quality',
|
||||
badges: ['Retrieval'],
|
||||
},
|
||||
{
|
||||
id: 'context-relevance',
|
||||
label: 'Context Relevance',
|
||||
description: 'Scores how useful the retrieved context is for downstream generation.',
|
||||
group: 'quality',
|
||||
badges: ['Retrieval'],
|
||||
},
|
||||
]
|
||||
|
||||
const workflowOptions = [
|
||||
{
|
||||
id: 'workflow-precision-review',
|
||||
@@ -139,7 +163,7 @@ export const getEvaluationMockConfig = (resourceType: EvaluationResourceType): E
|
||||
if (resourceType === 'pipeline') {
|
||||
return {
|
||||
judgeModels,
|
||||
builtinMetrics,
|
||||
builtinMetrics: pipelineBuiltinMetrics,
|
||||
workflowOptions,
|
||||
fieldOptions: pipelineFields,
|
||||
templateFileName: 'pipeline-evaluation-template.csv',
|
||||
|
||||
@@ -47,6 +47,15 @@
|
||||
"conditions.title": "Judgment Conditions",
|
||||
"conditions.valuePlaceholder": "Enter a value",
|
||||
"description": "Configure automated testing to grade your application's performance.",
|
||||
"history.columns.creator": "Creator",
|
||||
"history.columns.status": "Status",
|
||||
"history.columns.time": "Time",
|
||||
"history.columns.version": "Version",
|
||||
"history.creatorYou": "You",
|
||||
"history.empty": "No test history yet",
|
||||
"history.latestVersion": "Latest",
|
||||
"history.searchPlaceholder": "Search",
|
||||
"history.title": "Test History",
|
||||
"judgeModel.description": "Choose the model used to score your evaluation results.",
|
||||
"judgeModel.title": "Judge Model",
|
||||
"metrics.add": "Add Metric",
|
||||
@@ -83,5 +92,7 @@
|
||||
"metrics.showMore": "Show more",
|
||||
"metrics.title": "Metrics",
|
||||
"metrics.update": "Update",
|
||||
"pipeline.uploadAndRun": "Upload & Run Test",
|
||||
"results.empty": "No evaluation results yet.",
|
||||
"title": "Evaluation"
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user