From 74a0ff1bebcce2bc3fbaa74c571203baff845118 Mon Sep 17 00:00:00 2001 From: Evan Bonsignori Date: Thu, 3 Jul 2025 11:58:05 -0700 Subject: [PATCH] add `search_client` to search events when the host is not `docs.github.com` (#56458) Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/events/lib/schema.ts | 4 + src/events/types.ts | 1 + src/search/lib/ai-search-proxy.ts | 10 ++ .../lib/helpers/external-search-analytics.ts | 95 +++++++++++++++++++ .../lib/routes/combined-search-route.ts | 9 ++ src/search/middleware/search-routes.ts | 9 ++ 6 files changed, 128 insertions(+) create mode 100644 src/search/lib/helpers/external-search-analytics.ts diff --git a/src/events/lib/schema.ts b/src/events/lib/schema.ts index 2c4efa7b92..302997ac73 100644 --- a/src/events/lib/schema.ts +++ b/src/events/lib/schema.ts @@ -371,6 +371,10 @@ const search = { type: 'string', description: 'Any additional search context, such as component searched.', }, + search_client: { + type: 'string', + description: 'The client name identifier when the request is not from docs.github.com.', + }, }, } diff --git a/src/events/types.ts b/src/events/types.ts index 762dbc9a8d..b66f5e3061 100644 --- a/src/events/types.ts +++ b/src/events/types.ts @@ -104,6 +104,7 @@ export type EventPropsByType = { [EventType.search]: { search_query: string search_context?: string + search_client?: string } [EventType.searchResult]: { search_result_query: string diff --git a/src/search/lib/ai-search-proxy.ts b/src/search/lib/ai-search-proxy.ts index 7c2384039a..599ed87dd0 100644 --- a/src/search/lib/ai-search-proxy.ts +++ b/src/search/lib/ai-search-proxy.ts @@ -4,6 +4,7 @@ import got from 'got' import { getHmacWithEpoch } from '@/search/lib/helpers/get-cse-copilot-auth' import { getCSECopilotSource } from '@/search/lib/helpers/cse-copilot-docs-versions' import type { ExtendedRequest } from '@/types' +import { handleExternalSearchAnalytics } from '@/search/lib/helpers/external-search-analytics' export const aiSearchProxy = async (req: ExtendedRequest, res: Response) => { const { query, version } = req.body @@ -29,6 +30,15 @@ export const aiSearchProxy = async (req: ExtendedRequest, res: Response) => { return } + // Handle search analytics and client_name validation + const analyticsError = await handleExternalSearchAnalytics(req, 'ai-search') + if (analyticsError) { + res.status(analyticsError.status).json({ + errors: [{ message: analyticsError.error }], + }) + return + } + const diagnosticTags = [ `version:${version}`.slice(0, 200), `language:${req.language}`.slice(0, 200), diff --git a/src/search/lib/helpers/external-search-analytics.ts b/src/search/lib/helpers/external-search-analytics.ts new file mode 100644 index 0000000000..e6e4754d82 --- /dev/null +++ b/src/search/lib/helpers/external-search-analytics.ts @@ -0,0 +1,95 @@ +import { publish } from '@/events/lib/hydro' +import { hydroNames } from '@/events/lib/schema' + +/** + * Handles search analytics and client_name validation for external requests + * Returns null if the request should continue, or an error response object if validation failed + */ +export async function handleExternalSearchAnalytics( + req: any, + searchContext: string, +): Promise<{ error: string; status: number } | null> { + const host = req.headers['x-host'] || req.headers.host + const normalizedHost = stripPort(host as string) + + // Skip analytics entirely for production and internal staging environments + if ( + normalizedHost === 'docs.github.com' || + normalizedHost.endsWith('.github.net') || + normalizedHost.endsWith('.githubapp.com') + ) { + return null + } + + // For localhost, send analytics but auto-set client_name if not provided + let client_name = req.query.client_name || req.body?.client_name + if (normalizedHost === 'localhost' && !client_name) { + client_name = 'localhost' + } + + // For all other external requests, require explicit client_name + if (!client_name) { + return { + status: 400, + error: "Missing required parameter 'client_name' for external requests", + } + } + + // Send search event with client identifier + try { + await publish({ + schema: hydroNames.search, + value: { + type: 'search', + version: '1.0.0', + context: { + event_id: crypto.randomUUID(), + user: 'server-side', + version: '1.0.0', + created: new Date().toISOString(), + hostname: normalizedHost, + path: '', + search: '', + hash: '', + path_language: 'en', + path_version: '', + path_product: '', + path_article: '', + }, + search_query: 'REDACTED', + search_context: searchContext, + search_client: client_name as string, + }, + }) + } catch (error) { + // Don't fail the request if analytics fails + console.error('Failed to send search analytics:', error) + } + + return null +} + +/** + * Determines if a host should bypass client_name requirement for analytics + * Returns true if the host is docs.github.com or ends with github.net or githubapp.com + * (for production and internal staging environments) + * Note: localhost is NOT included here as it should send analytics with auto-set client_name + */ +export function shouldBypassClientNameRequirement(host: string | undefined): boolean { + if (!host) return false + + const normalizedHost = stripPort(host) + return ( + normalizedHost === 'docs.github.com' || + normalizedHost.endsWith('.github.net') || + normalizedHost.endsWith('.githubapp.com') + ) +} + +/** + * Strips port number from host string + */ +function stripPort(host: string): string { + const [hostname] = host.split(':') + return hostname +} diff --git a/src/search/lib/routes/combined-search-route.ts b/src/search/lib/routes/combined-search-route.ts index 581bf38f25..07302f4c6e 100644 --- a/src/search/lib/routes/combined-search-route.ts +++ b/src/search/lib/routes/combined-search-route.ts @@ -3,6 +3,7 @@ import { getAISearchAutocompleteResults } from '@/search/lib/get-elasticsearch-r import { searchCacheControl } from '@/frame/middleware/cache-control' import { SURROGATE_ENUMS, setFastlySurrogateKey } from '@/frame/middleware/set-fastly-surrogate-key' import { handleGetSearchResultsError } from '@/search/middleware/search-routes' +import { handleExternalSearchAnalytics } from '@/search/lib/helpers/external-search-analytics' import type { Request, Response } from 'express' import type { CombinedSearchResponse, GeneralSearchResponse } from '@/search/types' @@ -35,6 +36,14 @@ export async function combinedSearchRoute(req: Request, res: Response) { return res.status(400).json(combinedValidationErrors[0]) } + // Handle search analytics and client_name validation + const analyticsError = await handleExternalSearchAnalytics(req, 'combined-search') + if (analyticsError) { + return res.status(analyticsError.status).json({ + error: analyticsError.error, + }) + } + try { const autocompletePromise = getAISearchAutocompleteResults({ indexName: aiIndexName, diff --git a/src/search/middleware/search-routes.ts b/src/search/middleware/search-routes.ts index af3af78d12..278ef9d0a2 100644 --- a/src/search/middleware/search-routes.ts +++ b/src/search/middleware/search-routes.ts @@ -17,6 +17,7 @@ import { getAISearchAutocompleteResults } from '@/search/lib/get-elasticsearch-r import { getSearchFromRequestParams } from '@/search/lib/search-request-params/get-search-from-request-params' import { getGeneralSearchResults } from '@/search/lib/get-elasticsearch-results/general-search' import { combinedSearchRoute } from '@/search/lib/routes/combined-search-route' +import { handleExternalSearchAnalytics } from '@/search/lib/helpers/external-search-analytics' const router = express.Router() @@ -36,6 +37,14 @@ router.get( return res.status(400).json(validationErrors[0]) } + // Handle search analytics and client_name validation + const analyticsError = await handleExternalSearchAnalytics(req, 'general-search') + if (analyticsError) { + return res.status(analyticsError.status).json({ + error: analyticsError.error, + }) + } + const getResultOptions = { indexName, searchParams,