diff --git a/src/search/lib/ai-search-proxy.ts b/src/search/lib/ai-search-proxy.ts index 3eebf7ba3d..54d3d28146 100644 --- a/src/search/lib/ai-search-proxy.ts +++ b/src/search/lib/ai-search-proxy.ts @@ -1,12 +1,14 @@ import { Request, Response } from 'express' +import statsd from '@/observability/lib/statsd' import got from 'got' import { getHmacWithEpoch } from '@/search/lib/helpers/get-cse-copilot-auth' -import { getCSECopilotSource } from '#src/search/lib/helpers/cse-copilot-docs-versions.js' +import { getCSECopilotSource } from '@/search/lib/helpers/cse-copilot-docs-versions' const memoryCache = new Map() export const aiSearchProxy = async (req: Request, res: Response) => { const { query, version, language } = req.body + const errors = [] // Validate request body @@ -34,13 +36,25 @@ export const aiSearchProxy = async (req: Request, res: Response) => { return } + const diagnosticTags = [ + `version:${version}`.slice(0, 200), + `language:${language}`.slice(0, 200), + `queryLength:${query.length}`.slice(0, 200), + ] + statsd.increment('ai-search.call', 1, diagnosticTags) + + // TODO: Caching here may cause an issue if the cache grows too large. Additionally, the cache will be inconsistent across pods const cacheKey = `${query}:${version}:${language}` if (memoryCache.has(cacheKey)) { + statsd.increment('ai-search.cache_hit', 1, diagnosticTags) res.setHeader('Content-Type', 'application/x-ndjson') res.send(memoryCache.get(cacheKey)) return } + const startTime = Date.now() + let totalChars = 0 + const body = { chat_context: 'docs', docs_source: docsSource, @@ -57,22 +71,19 @@ export const aiSearchProxy = async (req: Request, res: Response) => { }, }) - const chunks: Buffer[] = [] - stream.on('data', (chunk) => { - chunks.push(chunk) + // Listen for data events to count characters + stream.on('data', (chunk: Buffer | string) => { + // Ensure we have a string for proper character count + const dataStr = typeof chunk === 'string' ? chunk : chunk.toString() + totalChars += dataStr.length }) // Handle the upstream response before piping stream.on('response', (upstreamResponse) => { - // When cse-copilot returns a 204, it means the backend received the request - // but was unable to answer the question. So we return a 400 to the client to be handled. - if (upstreamResponse.statusCode === 204) { - return res - .status(400) - .json({ errors: [{ message: 'Sorry I am unable to answer this question.' }] }) - } else if (upstreamResponse.statusCode !== 200) { + if (upstreamResponse.statusCode !== 200) { const errorMessage = `Upstream server responded with status code ${upstreamResponse.statusCode}` console.error(errorMessage) + statsd.increment('ai-search.stream_response_error', 1, diagnosticTags) res.status(500).json({ errors: [{ message: errorMessage }] }) stream.destroy() } else { @@ -95,6 +106,8 @@ export const aiSearchProxy = async (req: Request, res: Response) => { .json({ errors: [{ message: 'Sorry I am unable to answer this question.' }] }) } + statsd.increment('ai-search.stream_error', 1, diagnosticTags) + if (!res.headersSent) { res.status(500).json({ errors: [{ message: 'Internal server error' }] }) } else { @@ -106,12 +119,19 @@ export const aiSearchProxy = async (req: Request, res: Response) => { } }) - // Ensure response ends when stream ends + // Calculate metrics on stream end stream.on('end', () => { - memoryCache.set(cacheKey, Buffer.concat(chunks as Uint8Array[])) + const totalResponseTime = Date.now() - startTime // in ms + const charPerMsRatio = totalResponseTime > 0 ? totalChars / totalResponseTime : 0 // chars per ms + + statsd.gauge('ai-search.total_response_time', totalResponseTime, diagnosticTags) + statsd.gauge('ai-search.response_chars_per_ms', charPerMsRatio, diagnosticTags) + + statsd.increment('ai-search.success_stream_end', 1, diagnosticTags) res.end() }) } catch (error) { + statsd.increment('ai-search.route_error', 1, diagnosticTags) console.error('Error posting /answers to cse-copilot:', error) res.status(500).json({ errors: [{ message: 'Internal server error' }] }) }